data fixes, partial custom indicator support

2026-04-08 21:28:31 -04:00
parent b701554996
commit a70dcd954f
81 changed files with 5438 additions and 1852 deletions
--- a/gateway/src/tools/mcp/mcp-tool-wrapper.ts
+++ b/gateway/src/tools/mcp/mcp-tool-wrapper.ts
@@ -27,7 +27,8 @@ export function createMCPToolWrapper(
  toolInfo: MCPToolInfo,
  mcpClient: MCPClientConnector,
  logger: FastifyBaseLogger,
-  onImage?: (image: { data: string; mimeType: string }) => void
+  onImage?: (image: { data: string; mimeType: string }) => void,
+  onWorkspaceMutation?: (storeName: string, newState: unknown) => void
 ): DynamicStructuredTool {
  // Convert MCP input schema to Zod schema
  const zodSchema = mcpInputSchemaToZod(toolInfo.inputSchema);
@@ -42,6 +43,28 @@ export function createMCPToolWrapper(

        logger.info({ tool: toolInfo.name }, 'MCP tool call completed');

+        // Fire workspace mutation callback when workspace_patch or workspace_write succeeds.
+        // The sandbox returns {"success": true, "data": <newState>} as a text content item.
+        if (
+          onWorkspaceMutation &&
+          (toolInfo.name === 'workspace_patch' || toolInfo.name === 'workspace_write')
+        ) {
+          const content = (result as any)?.content;
+          if (Array.isArray(content)) {
+            for (const item of content) {
+              if (item.type === 'text' && item.text) {
+                try {
+                  const parsed = JSON.parse(item.text);
+                  if (parsed?.success && parsed?.data !== undefined) {
+                    onWorkspaceMutation((input as any).store_name as string, parsed.data);
+                  }
+                } catch { /* ignore parse errors */ }
+                break; // only need first text item
+              }
+            }
+          }
+        }
+
        // Handle different MCP result formats
        if (typeof result === 'string') {
          return result;
@@ -180,7 +203,10 @@ export function createMCPToolWrappers(
  toolInfos: MCPToolInfo[],
  mcpClient: MCPClientConnector,
  logger: FastifyBaseLogger,
-  onImage?: (image: { data: string; mimeType: string }) => void
+  onImage?: (image: { data: string; mimeType: string }) => void,
+  onWorkspaceMutation?: (storeName: string, newState: unknown) => void
 ): DynamicStructuredTool[] {
-  return toolInfos.map(toolInfo => createMCPToolWrapper(toolInfo, mcpClient, logger, onImage));
+  return toolInfos.map(toolInfo =>
+    createMCPToolWrapper(toolInfo, mcpClient, logger, onImage, onWorkspaceMutation)
+  );
 }
--- a/gateway/src/tools/platform/arxiv-search.tool.ts
+++ b/gateway/src/tools/platform/arxiv-search.tool.ts
@@ -0,0 +1,65 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import { z } from 'zod';
+import type { FastifyBaseLogger } from 'fastify';
+
+/**
+ * ArXiv Search Tool
+ *
+ * Searches arXiv for academic papers using the LangChain ArxivRetriever.
+ * Free, no API key required.
+ */
+
+export interface ArxivSearchToolConfig {
+  logger: FastifyBaseLogger;
+}
+
+export function createArxivSearchTool(config: ArxivSearchToolConfig): DynamicStructuredTool {
+  const { logger } = config;
+
+  return new DynamicStructuredTool({
+    name: 'arxiv_search',
+    description: 'Search arXiv for academic papers. Returns titles, authors, abstracts, and PDF links. Use this for scientific or technical research queries instead of web_search.',
+    schema: z.object({
+      query: z.string().describe('The research query'),
+      max_results: z.number().optional().default(5).describe('Maximum number of papers to return (default: 5)'),
+    }),
+    func: async ({ query, max_results }) => {
+      logger.debug({ query, max_results }, 'Executing arxiv_search tool');
+
+      try {
+        const { ArxivRetriever } = await import('@langchain/community/retrievers/arxiv');
+
+        const retriever = new ArxivRetriever({
+          getFullDocuments: false,
+          maxSearchResults: max_results,
+        });
+
+        const docs = await retriever.invoke(query);
+
+        const results = docs.map(doc => {
+          const meta = doc.metadata as Record<string, any>;
+          // Derive PDF URL from abstract URL: /abs/ID -> /pdf/ID
+          const pdfUrl = typeof meta.url === 'string'
+            ? meta.url.replace('/abs/', '/pdf/')
+            : undefined;
+
+          return {
+            title: meta.title,
+            authors: Array.isArray(meta.authors) ? meta.authors : [],
+            abstract: doc.pageContent,
+            published: meta.published,
+            url: meta.url,
+            pdf_url: pdfUrl,
+          };
+        });
+
+        logger.info({ query, resultCount: results.length }, 'arXiv search completed');
+
+        return JSON.stringify({ query, results });
+      } catch (error) {
+        logger.error({ error, query }, 'arxiv_search tool failed');
+        return JSON.stringify({ error: error instanceof Error ? error.message : String(error) });
+      }
+    },
+  });
+}
--- a/gateway/src/tools/platform/fetch-page.tool.ts
+++ b/gateway/src/tools/platform/fetch-page.tool.ts
@@ -0,0 +1,80 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import { z } from 'zod';
+import type { FastifyBaseLogger } from 'fastify';
+
+const MAX_CONTENT_LENGTH = 50_000;
+
+/**
+ * Fetch Page Tool
+ *
+ * Fetches a URL and returns its content as text/markdown.
+ * - PDFs are converted to text using pdf-parse
+ * - HTML pages are scraped with cheerio
+ * - Output is truncated to 50k characters
+ */
+
+export interface FetchPageToolConfig {
+  logger: FastifyBaseLogger;
+}
+
+export function createFetchPageTool(config: FetchPageToolConfig): DynamicStructuredTool {
+  const { logger } = config;
+
+  return new DynamicStructuredTool({
+    name: 'fetch_page',
+    description: 'Fetch a web page or PDF and return its text content. PDFs are automatically converted to markdown. Use this after web_search or arxiv_search to read the full content of a result.',
+    schema: z.object({
+      url: z.string().url().describe('The URL to fetch'),
+    }),
+    func: async ({ url }) => {
+      logger.debug({ url }, 'Executing fetch_page tool');
+
+      try {
+        const response = await fetch(url, {
+          headers: { 'User-Agent': 'Mozilla/5.0 (compatible; research-agent/1.0)' },
+          signal: AbortSignal.timeout(30_000),
+        });
+
+        if (!response.ok) {
+          return JSON.stringify({ error: `HTTP ${response.status}: ${response.statusText}`, url });
+        }
+
+        const contentType = response.headers.get('content-type') ?? '';
+        const isPdf = contentType.includes('pdf') || url.toLowerCase().endsWith('.pdf');
+
+        let content: string;
+
+        if (isPdf) {
+          const buffer = Buffer.from(await response.arrayBuffer());
+          const { PDFParse } = await import('pdf-parse');
+          const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
+          const parser = new PDFParse({ data: arrayBuffer });
+          const result = await parser.getText();
+          content = result.text;
+          logger.debug({ url, chars: content.length, pages: result.pages.length }, 'PDF text extracted');
+        } else {
+          const html = await response.text();
+          const { load } = await import('cheerio');
+          const $ = load(html);
+
+          // Remove non-content elements
+          $('script, style, nav, footer, header, aside, [role="navigation"]').remove();
+
+          // Prefer article/main content
+          const main = $('article, main, [role="main"]').first();
+          content = (main.length ? main : $('body')).text().replace(/\s{3,}/g, '\n\n').trim();
+
+          logger.debug({ url, chars: content.length }, 'HTML page scraped');
+        }
+
+        const truncated = content.length > MAX_CONTENT_LENGTH;
+        const output = truncated ? content.slice(0, MAX_CONTENT_LENGTH) + '\n\n[content truncated]' : content;
+
+        return JSON.stringify({ url, content: output, truncated });
+      } catch (error) {
+        logger.error({ error, url }, 'fetch_page tool failed');
+        return JSON.stringify({ error: error instanceof Error ? error.message : String(error), url });
+      }
+    },
+  });
+}
--- a/gateway/src/tools/platform/indicator-agent.tool.ts
+++ b/gateway/src/tools/platform/indicator-agent.tool.ts
@@ -0,0 +1,53 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import { z } from 'zod';
+import type { FastifyBaseLogger } from 'fastify';
+import type { IndicatorSubagent } from '../../harness/subagents/indicator/index.js';
+import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
+
+export interface IndicatorAgentToolConfig {
+  indicatorSubagent: IndicatorSubagent;
+  context: SubagentContext;
+  logger: FastifyBaseLogger;
+}
+
+/**
+ * Creates a LangChain tool that delegates to the indicator subagent.
+ * Mirrors the pattern of research-agent.tool.ts.
+ */
+export function createIndicatorAgentTool(config: IndicatorAgentToolConfig): DynamicStructuredTool {
+  const { indicatorSubagent, context, logger } = config;
+
+  return new DynamicStructuredTool({
+    name: 'indicator',
+    description: `Delegate to the indicator subagent for all indicator-related tasks on the chart.
+
+Use this tool for:
+- Reading which indicators are currently on the chart and explaining what they show
+- Adding indicators to the chart ("show RSI", "add Bollinger Bands with std=1.5")
+- Modifying indicator parameters ("change MACD fast to 8", "set RSI length to 21")
+- Removing indicators ("remove all moving averages", "clear the volume indicators")
+- Toggling indicator visibility
+- Creating custom indicators using Python scripts
+- Recommending indicators for a given strategy or analysis goal
+
+ALWAYS use this tool for any request about the chart's indicators.
+NEVER modify the indicators workspace store directly.`,
+    schema: z.object({
+      instruction: z.string().describe(
+        'The indicator task to perform. Be specific about which indicators, parameters, ' +
+        'and what changes are needed. Include relevant context like the current symbol ' +
+        'if the user mentioned it.'
+      ),
+    }),
+    func: async ({ instruction }: { instruction: string }): Promise<string> => {
+      logger.info({ instruction: instruction.substring(0, 100) }, 'Delegating to indicator subagent');
+
+      try {
+        return await indicatorSubagent.execute(context, instruction);
+      } catch (error) {
+        logger.error({ error, errorMessage: (error as Error)?.message }, 'Indicator subagent failed');
+        throw error;
+      }
+    },
+  });
+}
--- a/gateway/src/tools/platform/web-explore-agent.tool.ts
+++ b/gateway/src/tools/platform/web-explore-agent.tool.ts
@@ -0,0 +1,49 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import { z } from 'zod';
+import type { FastifyBaseLogger } from 'fastify';
+import type { WebExploreSubagent } from '../../harness/subagents/web-explore/index.js';
+import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
+
+export interface WebExploreAgentToolConfig {
+  webExploreSubagent: WebExploreSubagent;
+  context: SubagentContext;
+  logger: FastifyBaseLogger;
+}
+
+/**
+ * Creates a LangChain tool that delegates to the web-explore subagent.
+ * The subagent decides whether to use web search or arXiv based on the instruction.
+ */
+export function createWebExploreAgentTool(config: WebExploreAgentToolConfig): DynamicStructuredTool {
+  const { webExploreSubagent, context, logger } = config;
+
+  return new DynamicStructuredTool({
+    name: 'web_explore',
+    description: `Search the web or academic databases and return a summarized answer.
+
+Use this tool when the user asks about:
+- Current events, news, or real-time information
+- Documentation, tutorials, or how-to guides
+- Academic papers, research findings, or scientific topics
+- Any topic that benefits from external sources
+
+The subagent will search the web (or arXiv for academic queries), fetch relevant content, and return a markdown summary with cited sources.`,
+    schema: z.object({
+      instruction: z.string().describe(
+        'What to search for and summarize. Be specific — include the topic, what aspects matter, ' +
+        'and any context that helps narrow the search (e.g. "recent papers on momentum factor in equities" ' +
+        'or "how to configure rate limiting in Fastify").'
+      ),
+    }),
+    func: async ({ instruction }: { instruction: string }): Promise<string> => {
+      logger.info({ instruction: instruction.substring(0, 100) }, 'Delegating to web-explore subagent');
+
+      try {
+        return await webExploreSubagent.execute(context, instruction);
+      } catch (error) {
+        logger.error({ error, errorMessage: (error as Error)?.message }, 'Web explore subagent failed');
+        throw error;
+      }
+    },
+  });
+}
--- a/gateway/src/tools/platform/web-search.tool.ts
+++ b/gateway/src/tools/platform/web-search.tool.ts
@@ -0,0 +1,65 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import { z } from 'zod';
+import type { FastifyBaseLogger } from 'fastify';
+
+/**
+ * Web Search Tool
+ *
+ * Calls the Tavily REST API directly. The config interface is intentionally
+ * minimal so the underlying provider can be swapped without touching callers.
+ */
+
+export interface WebSearchToolConfig {
+  apiKey: string;
+  logger: FastifyBaseLogger;
+}
+
+export function createWebSearchTool(config: WebSearchToolConfig): DynamicStructuredTool {
+  const { apiKey, logger } = config;
+
+  return new DynamicStructuredTool({
+    name: 'web_search',
+    description: 'Search the web. Returns titles, URLs, and content summaries. Use this for general web searches. For academic/scientific papers, prefer arxiv_search instead.',
+    schema: z.object({
+      query: z.string().describe('The search query'),
+      max_results: z.number().optional().default(8).describe('Maximum number of results to return (default: 8)'),
+    }),
+    func: async ({ query, max_results }) => {
+      logger.debug({ query, max_results }, 'Executing web_search tool');
+
+      try {
+        const response = await fetch('https://api.tavily.com/search', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            api_key: apiKey,
+            query,
+            max_results,
+            search_depth: 'basic',
+          }),
+          signal: AbortSignal.timeout(30_000),
+        });
+
+        if (!response.ok) {
+          const text = await response.text();
+          throw new Error(`Tavily API error ${response.status}: ${text}`);
+        }
+
+        const data = await response.json() as { results?: Array<{ title: string; url: string; content: string }> };
+
+        const items = (data.results ?? []).map(r => ({
+          title: r.title,
+          url: r.url,
+          snippet: r.content,
+        }));
+
+        logger.info({ query, resultCount: items.length }, 'Web search completed');
+
+        return JSON.stringify({ query, results: items });
+      } catch (error) {
+        logger.error({ error, query, errorMessage: error instanceof Error ? error.message : String(error) }, 'web_search tool failed');
+        return JSON.stringify({ error: error instanceof Error ? error.message : String(error) });
+      }
+    },
+  });
+}
--- a/gateway/src/tools/tool-registry.ts
+++ b/gateway/src/tools/tool-registry.ts
@@ -6,6 +6,9 @@ import type { SymbolIndexService } from '../services/symbol-index-service.js';
 import type { WorkspaceManager } from '../workspace/workspace-manager.js';
 import { createSymbolLookupTool } from './platform/symbol-lookup.tool.js';
 import { createGetChartDataTool } from './platform/get-chart-data.tool.js';
+import { createWebSearchTool } from './platform/web-search.tool.js';
+import { createFetchPageTool } from './platform/fetch-page.tool.js';
+import { createArxivSearchTool } from './platform/arxiv-search.tool.js';
 import { createMCPToolWrappers, type MCPToolInfo } from './mcp/mcp-tool-wrapper.js';

 /**
@@ -13,13 +16,13 @@ import { createMCPToolWrappers, type MCPToolInfo } from './mcp/mcp-tool-wrapper.
 * Specifies which tools are available to which agent
 */
 export interface AgentToolConfig {
-  /** Agent name (e.g., 'main', 'research', 'code-reviewer') */
+  /** Agent name (e.g., 'main', 'research', 'web-explore') */
  agentName: string;

  /** Platform tool names to include */
  platformTools: string[];

-  /** MCP tool patterns/names to include (supports wildcards like 'category_*') */
+  /** MCP tool patterns/names to include (supports wildcards like 'python_*') */
  mcpTools: string[];
 }

@@ -31,6 +34,7 @@ export interface PlatformServices {
  ohlcService?: OHLCService | (() => OHLCService | undefined);
  symbolIndexService?: SymbolIndexService | (() => SymbolIndexService | undefined);
  workspaceManager?: WorkspaceManager | (() => WorkspaceManager | undefined);
+  tavilyApiKey?: string;
 }

 /**
@@ -81,7 +85,8 @@ export class ToolRegistry {
    mcpClient?: MCPClientConnector,
    availableMCPTools?: MCPToolInfo[],
    workspaceManager?: WorkspaceManager,
-    onImage?: (image: { data: string; mimeType: string }) => void
+    onImage?: (image: { data: string; mimeType: string }) => void,
+    onWorkspaceMutation?: (storeName: string, newState: unknown) => void
  ): Promise<DynamicStructuredTool[]> {
    const config = this.agentToolConfigs.get(agentName);

@@ -105,7 +110,7 @@ export class ToolRegistry {
    // Add MCP tools (if MCP client and tools are available)
    if (mcpClient && availableMCPTools && availableMCPTools.length > 0) {
      const filteredMCPTools = this.filterMCPTools(availableMCPTools, config.mcpTools);
-      const mcpToolInstances = createMCPToolWrappers(filteredMCPTools, mcpClient, this.logger, onImage);
+      const mcpToolInstances = createMCPToolWrappers(filteredMCPTools, mcpClient, this.logger, onImage, onWorkspaceMutation);
      tools.push(...mcpToolInstances);

      this.logger.debug(
@@ -180,6 +185,25 @@ export class ToolRegistry {
        break;
      }

+      case 'web_search': {
+        if (this.platformServices.tavilyApiKey) {
+          tool = createWebSearchTool({ apiKey: this.platformServices.tavilyApiKey, logger: this.logger });
+        } else {
+          this.logger.warn('TAVILY_API_KEY not configured — web_search tool unavailable');
+        }
+        break;
+      }
+
+      case 'fetch_page': {
+        tool = createFetchPageTool({ logger: this.logger });
+        break;
+      }
+
+      case 'arxiv_search': {
+        tool = createArxivSearchTool({ logger: this.logger });
+        break;
+      }
+
      default:
        this.logger.warn({ tool: toolName }, 'Unknown platform tool');
        return null;
@@ -202,7 +226,7 @@ export class ToolRegistry {

  /**
   * Filter MCP tools based on patterns/names
-   * Supports wildcards like 'category_*' or exact names like 'execute_research'
+   * Supports wildcards like 'python_*' or exact names like 'execute_research'
   */
  private filterMCPTools(availableTools: MCPToolInfo[], patterns: string[]): MCPToolInfo[] {
    if (patterns.length === 0) {
@@ -221,7 +245,7 @@ export class ToolRegistry {

  /**
   * Check if a tool name matches a pattern
-   * Supports wildcards: 'category_*' matches 'category_write', 'category_read', etc.
+   * Supports wildcards: 'python_*' matches 'python_write', 'python_read', etc.
   */
  private matchesPattern(toolName: string, pattern: string): boolean {
    if (pattern === toolName) {