subagent thinking accordion; indicator fixes; script details & edit

2026-04-20 15:09:37 -04:00
parent a188268906
commit b1d4459809
25 changed files with 2041 additions and 174 deletions
--- a/gateway/src/channels/websocket-handler.ts
+++ b/gateway/src/channels/websocket-handler.ts
@@ -381,9 +381,75 @@ export class WebSocketHandler {
            // Workspace sync: patch message
            logger.debug({ store: payload.store, seq: payload.seq }, 'Handling workspace patch');
            await workspace!.handlePatch(payload.store, payload.seq, payload.patch || []);
+          } else if (payload.type === 'client_log') {
+            const level: string = payload.level ?? 'log';
+            const msg = `[client:${authContext.sessionId}] ${payload.message ?? ''}`;
+            const logMeta = { source: 'client', sessionId: authContext.sessionId };
+            if (level === 'error') logger.error(logMeta, msg);
+            else if (level === 'warn') logger.warn(logMeta, msg);
+            else if (level === 'debug') logger.debug(logMeta, msg);
+            else logger.info(logMeta, msg);
          } else if (payload.type === 'agent_stop') {
            logger.info('Agent stop requested');
            harness?.interrupt();
+          } else if (payload.type === 'read_details') {
+            // Read the details field for a category item
+            const { category, name } = payload;
+            if (!harness) {
+              socket.send(JSON.stringify({ type: 'details_error', category, name, error: 'Session not ready' }));
+            } else {
+              try {
+                const details = await harness.readDetails(category, name);
+                if (details === null) {
+                  socket.send(JSON.stringify({ type: 'details_error', category, name, error: 'Item not found or has no details' }));
+                } else {
+                  socket.send(JSON.stringify({ type: 'details_data', category, name, details }));
+                }
+              } catch (error) {
+                logger.error({ error, category, name }, 'Error reading details');
+                socket.send(JSON.stringify({ type: 'details_error', category, name, error: 'Failed to read details' }));
+              }
+            }
+          } else if (payload.type === 'update_details') {
+            // User submitted a revised details string — diff and invoke the appropriate subagent
+            const { category, name, details: newDetails } = payload;
+            if (!harness) {
+              socket.send(JSON.stringify({ type: 'details_updated', category, name, success: false, error: 'Session not ready' }));
+            } else {
+              try {
+                let hadError = false;
+                for await (const event of harness.streamDetailsUpdate(category, name, newDetails)) {
+                  const e = event as HarnessEvent;
+                  switch (e.type) {
+                    case 'chunk':
+                      socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: category, content: e.content }));
+                      break;
+                    case 'subagent_chunk':
+                      socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: e.agentName, content: e.content }));
+                      break;
+                    case 'subagent_tool_call':
+                      socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
+                      break;
+                    case 'tool_call':
+                      socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
+                      break;
+                    case 'image':
+                      socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
+                      break;
+                    case 'error':
+                      hadError = true;
+                      socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: category, content: `Error in ${e.source}` }));
+                      break;
+                    case 'done':
+                      break;
+                  }
+                }
+                socket.send(JSON.stringify({ type: 'details_updated', category, name, success: !hadError }));
+              } catch (error) {
+                logger.error({ error, category, name }, 'Error updating details');
+                socket.send(JSON.stringify({ type: 'details_updated', category, name, success: false, error: 'Failed to update details' }));
+              }
+            }
          } else if (this.isDatafeedMessage(payload)) {
            // Historical data request - send to OHLC service
            logger.info({ type: payload.type }, 'Routing to datafeed handler');
--- a/gateway/src/harness/agent-harness.ts
+++ b/gateway/src/harness/agent-harness.ts
@@ -733,6 +733,106 @@ export class AgentHarness {
    return this.mcpClient.callTool(name, args);
  }

+  /**
+   * Read the `details` field for a category item directly from the sandbox.
+   * Returns null if the item doesn't exist or has no details.
+   */
+  async readDetails(category: string, name: string): Promise<string | null> {
+    try {
+      const raw = await this.mcpClient.callTool('python_read', { category, name });
+      const content = (raw as any)?.content;
+      if (!Array.isArray(content)) return null;
+      for (const item of content) {
+        if (item.type === 'text' && item.text) {
+          try {
+            const parsed = JSON.parse(item.text);
+            if (parsed?.exists && parsed?.metadata?.details !== undefined) {
+              return parsed.metadata.details as string;
+            }
+          } catch { /* ignore */ }
+        }
+      }
+      return null;
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * Stream a details-driven code update for a category item.
+   *
+   * Computes a unified diff between the stored details and `newDetails`,
+   * then instructs the appropriate subagent to update the Python code to
+   * match the revised specification.  Yields HarnessEvents from the subagent
+   * so the WebSocket handler can stream progress to the web client.
+   */
+  async *streamDetailsUpdate(
+    category: string,
+    name: string,
+    newDetails: string,
+    signal?: AbortSignal,
+  ): AsyncGenerator<HarnessEvent> {
+    const logger = this.config.logger;
+
+    // 1. Read current details
+    const oldDetails = await this.readDetails(category, name) ?? '';
+
+    // 2. Compute a simple unified diff
+    const diff = buildUnifiedDiff(oldDetails, newDetails, `${category}/${name} details`);
+
+    if (!diff.trim()) {
+      // No change — nothing to do
+      yield { type: 'done', content: 'No changes detected in the details.' };
+      return;
+    }
+
+    // 3. Build instruction for the subagent
+    const instruction = buildDetailsUpdateInstruction(category, name, newDetails, diff);
+
+    // 4. Build a minimal subagent context
+    const context = {
+      userContext: createUserContext({
+        userId: this.config.userId,
+        sessionId: this.config.sessionId,
+        license: this.config.license,
+        channelType: this.config.channelType ?? ChannelType.WEBSOCKET,
+        channelUserId: this.config.channelUserId ?? this.config.userId,
+      }),
+    };
+
+    // 5. Ensure the right subagent is ready and invoke it
+    if (category === 'indicator') {
+      if (!this.indicatorSubagent) await this.initializeIndicatorSubagent();
+      if (!this.indicatorSubagent) {
+        yield { type: 'error', source: 'indicator', fatal: false };
+        return;
+      }
+      logger.info({ category, name }, 'Streaming indicator details update');
+      yield* this.indicatorSubagent.streamEvents(context, instruction, signal);
+
+    } else if (category === 'strategy') {
+      if (!this.strategySubagent) await this.initializeStrategySubagent();
+      if (!this.strategySubagent) {
+        yield { type: 'error', source: 'strategy', fatal: false };
+        return;
+      }
+      logger.info({ category, name }, 'Streaming strategy details update');
+      yield* this.strategySubagent.streamEvents(context, instruction, signal);
+
+    } else if (category === 'research') {
+      if (!this.researchSubagent) await this.initializeResearchSubagent();
+      if (!this.researchSubagent) {
+        yield { type: 'error', source: 'research', fatal: false };
+        return;
+      }
+      logger.info({ category, name }, 'Streaming research details update');
+      yield* this.researchSubagent.streamEvents(context, instruction, signal);
+
+    } else {
+      yield { type: 'error', source: 'harness', fatal: false };
+    }
+  }
+
  /**
   * Expose MCP client so channel handlers can wire ContainerSync after harness init.
   */
@@ -1138,3 +1238,95 @@ export class AgentHarness {
    await this.mcpClient.disconnect();
  }
 }
+
+// =============================================================================
+// Details update helpers (module-level, no class dependency)
+// =============================================================================
+
+/**
+ * Produce a minimal unified diff between two strings, suitable for passing to
+ * an LLM as a change description. Returns an empty string when there is no diff.
+ */
+function buildUnifiedDiff(oldText: string, newText: string, label: string): string {
+  const oldLines = oldText.split('\n');
+  const newLines = newText.split('\n');
+
+  if (oldLines.join('\n') === newLines.join('\n')) return '';
+
+  const lines: string[] = [];
+  lines.push(`--- a/${label}`);
+  lines.push(`+++ b/${label}`);
+
+  // Simple LCS-based diff — sufficient for the LLM to understand structural changes
+  const lcs = computeLCS(oldLines, newLines);
+  let oi = 0, ni = 0, li = 0;
+
+  while (oi < oldLines.length || ni < newLines.length) {
+    if (li < lcs.length && oi < oldLines.length && ni < newLines.length &&
+        oldLines[oi] === lcs[li] && newLines[ni] === lcs[li]) {
+      lines.push(` ${oldLines[oi]}`);
+      oi++; ni++; li++;
+    } else if (ni < newLines.length && (li >= lcs.length || newLines[ni] !== lcs[li])) {
+      lines.push(`+${newLines[ni]}`);
+      ni++;
+    } else {
+      lines.push(`-${oldLines[oi]}`);
+      oi++;
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/** Compute longest common subsequence of two string arrays. */
+function computeLCS(a: string[], b: string[]): string[] {
+  const m = a.length, n = b.length;
+  const dp: number[][] = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] + 1 : Math.max(dp[i - 1][j], dp[i][j - 1]);
+    }
+  }
+  const result: string[] = [];
+  let i = m, j = n;
+  while (i > 0 && j > 0) {
+    if (a[i - 1] === b[j - 1]) { result.unshift(a[i - 1]); i--; j--; }
+    else if (dp[i - 1][j] > dp[i][j - 1]) { i--; } else { j--; }
+  }
+  return result;
+}
+
+/**
+ * Build the instruction string passed to the subagent when the user edits
+ * the details field of a category item.
+ */
+function buildDetailsUpdateInstruction(
+  category: string,
+  name: string,
+  newDetails: string,
+  diff: string,
+): string {
+  const categoryLabel = category === 'indicator' ? 'custom indicator'
+    : category === 'strategy' ? 'trading strategy'
+    : 'research script';
+
+  return `The user has edited the specification (details) for the ${categoryLabel} named "${name}".
+
+Your task: update the Python implementation to match the revised specification. Use \`python_edit\` with targeted patches — make only the changes implied by the diff below. Also update the \`details\` field via the \`details\` parameter on \`python_edit\` to store the new specification text.
+
+## Revised specification
+
+${newDetails}
+
+## What changed (unified diff of the details text)
+
+\`\`\`diff
+${diff}
+\`\`\`
+
+Instructions:
+- Read the current implementation first with \`python_read(category="${category}", name="${name}")\` to understand what exists.
+- Apply only the changes described by the diff above — do not rewrite unrelated parts of the code.
+- Pass \`details\` as the full revised specification text shown above.
+- After editing, confirm the change was applied and validation passed.`;
+}
--- a/gateway/src/harness/subagents/indicator/system-prompt.md
+++ b/gateway/src/harness/subagents/indicator/system-prompt.md
@@ -208,9 +208,13 @@ workspace_patch("indicators", [

 Custom indicators are Python scripts in the `indicator` category. Use `python_write` / `python_edit` / `python_read` / `python_list` exactly as you would for research scripts, but with `category="indicator"`.

+`python_write` requires `category`, `name`, `description`, `details`, and `code`. The `details` field must be a complete markdown description of the indicator — formula, algorithm, all parameters and their semantics, input series, output columns, and any non-obvious implementation choices — with enough detail that another agent could reproduce the code from it alone.
+
 ### Writing a Custom Indicator Script

-A custom indicator must define a **top-level function whose name exactly matches the sanitized directory name** (the name you passed to `python_write`, after sanitization). It receives the OHLC columns it needs as positional arguments, matching `input_series` in the metadata. It must return a `pd.Series` (single output) or `pd.DataFrame` (multi-output, column names must match `output_columns`).
+A custom indicator must define a **top-level function whose name is the lowercase, snake_case form of the `name` passed to `python_write`**: take `name`, lowercase it, replace spaces and hyphens with underscores. For example, `name="TrendFlex"` → function `def trendflex(...)`, `name="VW RSI"` → function `def vw_rsi(...)`.
+
+The function receives the OHLC columns it needs as positional arguments, matching `input_series` in the metadata. It must return a `pd.Series` (single output) or `pd.DataFrame` (multi-output, column names must match `output_columns`).

 ```python
 # Example: volume-weighted RSI (function name = "vw_rsi", directory name = "vw_rsi")
@@ -243,6 +247,15 @@ After writing a custom indicator with `python_write`, add it to the workspace us

 When writing a custom indicator you **must** supply complete metadata so the web client can auto-construct the TradingView plotter. Pass these fields in the `metadata` argument to `python_write`:

+**Top-level required fields** (not inside `metadata`):
+
+| Field | Required | Description |
+|---|---|---|
+| `description` | yes | One-sentence summary |
+| `details` | yes | Full markdown description — formula, algorithm, all parameters and their semantics, input series, output columns, and any non-obvious choices. Enough detail for another agent to reproduce the code. |
+
+**`metadata` fields:**
+
 | Field | Type | Required | Description |
 |---|---|---|---|
 | `parameters` | dict | yes | Parameter schema: `{param_name: {type, default, description?, min?, max?}}` |
@@ -322,6 +335,15 @@ python_write(
  category="indicator",
  name="vw_rsi",
  description="RSI weighted by relative volume.",
+  details="""## Volume-Weighted RSI
+
+Computes RSI(length) on close prices, then scales it by relative volume (current volume divided by its rolling mean over the same period), and applies a 3-bar smoothing average.
+
+**Formula:** `(rsi * (volume / volume.rolling(length).mean())).rolling(3).mean()`
+
+**Inputs:** close (Series), volume (Series)
+**Output:** single Series named "value" — the smoothed volume-weighted RSI, plotted in a separate pane.
+**Parameters:** length (int, default 14, range 2–200) — lookback period for both RSI and the volume mean.""",
  code="""
 import pandas as pd
 import pandas_ta as ta
@@ -351,6 +373,15 @@ python_write(
  category="indicator",
  name="my_bbands",
  description="Custom Bollinger Bands.",
+  details="""## Custom Bollinger Bands
+
+Standard Bollinger Bands computed via pandas-ta on close prices.
+
+**Formula:** upper = SMA(length) + std * σ(length); lower = SMA(length) - std * σ(length); mid = SMA(length)
+
+**Inputs:** close (Series)
+**Outputs:** upper, mid, lower — three Series plotted on the price pane with a shaded fill between upper and lower.
+**Parameters:** length (int, default 20, range 5–500), std (float, default 2.0, range 0.5–5.0)""",
  code="""
 import pandas as pd
 import pandas_ta as ta
@@ -439,13 +470,15 @@ Use `evaluate_indicator` to test any indicator (standard or custom) before addin
 evaluate_indicator(
  symbol="BTC/USDT.BINANCE",
  from_time="30 days ago",
-  to_time="now",
+  to_time="0 minutes ago",
  period_seconds=3600,
  pandas_ta_name="custom_vw_rsi",
  parameters={"length": 14}
 )
 ```

+**Time format for `from_time`/`to_time`**: Use a relative string like `"30 days ago"` / `"1 minute ago"` (format: `"N unit(s) ago"` where unit is second/minute/hour/day/week/month/year), an ISO date string like `"2024-04-20"`, or a Unix timestamp integer. Do **not** use `"now"` — it is not a valid value; use `"0 minutes ago"` instead.
+
 Returns a structured array of `{timestamp, value}` (or multiple value columns for multi-output indicators like MACD, BBands). Use the results to confirm the indicator is computing as expected before patching the workspace.

 ---
--- a/gateway/src/harness/subagents/research/system-prompt.md
+++ b/gateway/src/harness/subagents/research/system-prompt.md
@@ -42,7 +42,7 @@ Quick reference — approximate bars per resolution at various windows:
 You have direct access to these MCP tools:

 - **python_write**: Create a new script (research, strategy, or indicator category)
-  - Required: category, name, description, code
+  - Required: category, name, description, details, code
  - Optional: metadata (category-specific fields — see below)
  - **For research**: fully executes the script and returns all output (stdout, stderr) and captured chart images. The response IS the execution result — **do not call `execute_research` afterward**.
  - **For indicator/strategy**: runs against synthetic test data to catch compile/runtime errors; no chart images are generated.
@@ -50,7 +50,7 @@ You have direct access to these MCP tools:

 - **python_edit**: Update an existing script
  - Required: category, name
-  - Optional: code, description, metadata
+  - Optional: code, patches, description, details (full replacement), detail_patches (targeted text replacements in details), metadata
  - **For research**: re-executes the script when code is changed and returns all output and images. **Do not call `execute_research` afterward**.
  - **For indicator/strategy**: re-runs the validation test only.
  - Returns validation results and execution output
@@ -100,6 +100,7 @@ When a user requests analysis:
   - Write clean, well-commented Python code
   - Include proper error handling
   - Use appropriate ticker symbols, time ranges, and periods
+   - Always supply `details`: a complete markdown description of what the script does — algorithms, data sources, parameters, and any non-obvious implementation choices — with enough detail that another agent could reproduce the code from it alone
   - The script will auto-execute after writing

 4. **Check execution results**: The tool returns the execution result directly — this is the script's actual output:
@@ -164,6 +165,7 @@ You:
 3. Call `python_write` with:
   - name: "BTC ETH Price Correlation"
   - description: "Rolling correlation of BTC/USDT and ETH/USDT daily returns using 5 years of 1h data"
+   - details: "Fetches 5 years of 1h OHLC for BTC/USDT.BINANCE and ETH/USDT.BINANCE. Computes log daily returns from close prices. Calculates a 30-day rolling Pearson correlation between the two return series. Plots the correlation over time with a horizontal zero line. Prints bar count and date range after each fetch."
   - code: (Python script fetching 5yr of 1h OHLC for both tickers and plotting rolling correlation)
 4. Check execution results
 5. If successful, respond with a brief summary of what the script does
--- a/gateway/src/harness/subagents/strategy/system-prompt.md
+++ b/gateway/src/harness/subagents/strategy/system-prompt.md
@@ -103,6 +103,21 @@ python_write(
    category="strategy",
    name="RSI Mean Reversion",
    description="Buy oversold, sell overbought based on RSI(14) on BTC/USDT 1h bars.",
+    details="""## RSI Mean Reversion
+
+Trades BTC/USDT on 5-minute bars using RSI(14) as the signal.
+
+**Entry logic:**
+- Buy when RSI crosses below `oversold` (default 30) — mean-reversion long
+- Sell when RSI crosses above `overbought` (default 70) — mean-reversion short
+
+**Position sizing:** `trade_qty` (default 0.01 BTC) per trade, fixed quantity.
+
+**Parameters:** rsi_length (14), oversold (30), overbought (70), trade_qty (0.01)
+
+**Data:** BTC/USDT.BINANCE 5-minute OHLCV bars. Requires at least `rsi_length + 1` bars before trading.
+
+**No stop-loss or take-profit** — exits only on the opposite RSI signal.""",
    code="""...""",
    metadata={
        "data_feeds": [
@@ -113,12 +128,18 @@ python_write(
            "oversold": {"default": 30, "description": "RSI oversold threshold"},
            "overbought": {"default": 70, "description": "RSI overbought threshold"},
            "trade_qty": {"default": 0.01, "description": "Trade quantity in BTC"}
-        },
-        "conda_packages": []
+        }
    }
 )
 ```

+### Top-level fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `description` | yes | One-sentence summary of the strategy |
+| `details` | yes | Full markdown description — algorithm, entry/exit logic, parameters, data feeds, position sizing, and any non-obvious implementation choices. Must be detailed enough that another agent could reproduce the code from it alone. |
+
 ### Metadata fields

 | Field | Required | Description |
@@ -297,8 +318,9 @@ class VolumeBreakout(PandasStrategy):

 2. **Write the strategy**:
   ```
-   python_write(category="strategy", name="...", description="...", code="...", metadata={...})
+   python_write(category="strategy", name="...", description="...", details="...", code="...", metadata={...})
   ```
+   Always include `details`: a complete markdown description covering algorithm, entry/exit logic, all parameters, data feeds, and position sizing — enough detail for another agent to reproduce the code.
   After writing, the system automatically runs the strategy against synthetic data. If validation fails, fix the reported error before proceeding.

 3. **Run a backtest** — choose the window to target 100k–200k bars at the strategy's resolution (max 5 years):
--- a/gateway/src/tools/mcp/mcp-tool-wrapper.ts
+++ b/gateway/src/tools/mcp/mcp-tool-wrapper.ts
@@ -20,6 +20,31 @@ export interface MCPToolInfo {
  };
 }

+/**
+ * Strip the `details` field from all entries in a `_types` workspace store before
+ * syncing to clients. `details` is a long markdown blob intended for agent consumption
+ * only and should not be included in the compact workspace state sent to the web client.
+ */
+function filterTypeStoreState(storeName: string, state: unknown): unknown {
+  if (!storeName.endsWith('_types') || typeof state !== 'object' || state === null) {
+    return state;
+  }
+  const typed = state as Record<string, unknown>;
+  if (typeof typed['types'] !== 'object' || typed['types'] === null) {
+    return state;
+  }
+  const filteredTypes: Record<string, unknown> = {};
+  for (const [key, entry] of Object.entries(typed['types'] as Record<string, unknown>)) {
+    if (typeof entry === 'object' && entry !== null) {
+      const { details: _details, ...rest } = entry as Record<string, unknown>;
+      filteredTypes[key] = rest;
+    } else {
+      filteredTypes[key] = entry;
+    }
+  }
+  return { ...typed, types: filteredTypes };
+}
+
 /**
 * Create a LangChain tool from an MCP tool definition
 */
@@ -57,12 +82,14 @@ export function createMCPToolWrapper(
                    (toolInfo.name === 'workspace_patch' || toolInfo.name === 'workspace_write') &&
                    parsed?.success && parsed?.data !== undefined
                  ) {
-                    onWorkspaceMutation((input as any).store_name as string, parsed.data);
+                    const storeName = (input as any).store_name as string;
+                    onWorkspaceMutation(storeName, filterTypeStoreState(storeName, parsed.data));
                  }
                  // python_write / python_edit / python_delete / python_revert:
                  // {"_workspace_sync": {"store": <name>, "data": <state>}}
                  if (parsed?._workspace_sync?.store && parsed._workspace_sync.data !== undefined) {
-                    onWorkspaceMutation(parsed._workspace_sync.store, parsed._workspace_sync.data);
+                    const storeName = parsed._workspace_sync.store as string;
+                    onWorkspaceMutation(storeName, filterTypeStoreState(storeName, parsed._workspace_sync.data));
                  }
                } catch { /* ignore parse errors */ }
              }
--- a/gateway/src/workspace/types.ts
+++ b/gateway/src/workspace/types.ts
@@ -101,17 +101,17 @@ export const DEFAULT_STORES: StoreConfig[] = [
  {
    name: 'indicator_types',
    persistent: true,
-    initialState: () => ({}),
+    initialState: () => ({ types: {} }),
  },
  {
    name: 'strategy_types',
    persistent: true,
-    initialState: () => ({}),
+    initialState: () => ({ types: {} }),
  },
  {
    name: 'research_types',
    persistent: true,
-    initialState: () => ({}),
+    initialState: () => ({ types: {} }),
  },
  {
    name: 'channelState',
--- a/gateway/src/workspace/workspace-manager.ts
+++ b/gateway/src/workspace/workspace-manager.ts
@@ -97,6 +97,15 @@ export class WorkspaceManager {
            this.dirtyStores.add(storeName); // persist migrated format immediately
            this.logger.info({ store: storeName }, 'Migrated shapes store to wrapped format');
          }
+          // Migrate *_types stores from old flat format { key: {...} } to wrapped { types: { key: {...} } }
+          if (
+            (storeName === 'indicator_types' || storeName === 'strategy_types' || storeName === 'research_types') &&
+            state && typeof state === 'object' && !('types' in (state as object))
+          ) {
+            migratedState = { types: state };
+            this.dirtyStores.add(storeName); // persist migrated format immediately
+            this.logger.info({ store: storeName }, 'Migrated types store to wrapped format');
+          }
          this.registry.setState(storeName, migratedState);
          this.logger.debug({ store: storeName }, 'Loaded persistent store');
        }