feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages - Support Anthropic models (Sonnet, Haiku, Opus) via @tag - Remove Qdrant vector database from infrastructure and configs - Simplify license model config to use null fallbacks - Add greeting stream after model switch via @tag - Fix protobuf field names to camelCase for v7 compatibility - Add 429 rate limit retry logic with exponential backoff - Remove RAG references from agent harness documentation
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions
--- a/gateway/src/harness/spawn/spawn-service.ts
+++ b/gateway/src/harness/spawn/spawn-service.ts
@@ -1,8 +1,5 @@
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { SystemMessage, HumanMessage } from '@langchain/core/messages';
-
-/** All platform tool names available to every subagent. */
-const ALL_PLATFORM_TOOLS = ['SymbolLookup', 'GetChartData', 'GetTicker24h', 'WebSearch', 'FetchPage', 'ArxivSearch'];
 import type { FastifyBaseLogger } from 'fastify';
 import { createReactAgent } from '@langchain/langgraph/prebuilt';
 import type { HarnessEvent, SubagentChunkEvent, SubagentThinkingEvent } from '../harness-events.js';
@@ -13,6 +10,62 @@ import type { ToolRegistry } from '../../tools/tool-registry.js';
 import type { MCPToolInfo } from '../../tools/mcp/mcp-tool-wrapper.js';
 import { WikiLoader, type SpawnContext } from './wiki-loader.js';

+/** All platform tool names available to every subagent. */
+const ALL_PLATFORM_TOOLS = ['SymbolLookup', 'GetChartData', 'GetTicker24h', 'WebSearch', 'FetchPage', 'ArxivSearch'];
+
+/**
+ * Streaming filter that strips triple-backtick fenced code blocks from text as it
+ * arrives in chunks. Holds back at most 2 characters of look-ahead so normal text
+ * streams through with no perceptible delay.
+ */
+class FenceFilter {
+  private buf = '';
+  private inFence = false;
+
+  write(chunk: string): string {
+    this.buf += chunk;
+    return this.drain(false);
+  }
+
+  end(): string {
+    return this.drain(true);
+  }
+
+  private drain(final: boolean): string {
+    let out = '';
+    while (true) {
+      if (!this.inFence) {
+        const start = this.buf.indexOf('```');
+        if (start === -1) {
+          const keep = final ? this.buf.length : Math.max(0, this.buf.length - 2);
+          out += this.buf.slice(0, keep);
+          this.buf = this.buf.slice(keep);
+          break;
+        }
+        out += this.buf.slice(0, start);
+        const headerEnd = this.buf.indexOf('\n', start + 3);
+        if (headerEnd === -1 && !final) {
+          this.buf = this.buf.slice(start);
+          break;
+        }
+        this.inFence = true;
+        this.buf = headerEnd !== -1 ? this.buf.slice(headerEnd + 1) : '';
+      } else {
+        const end = this.buf.indexOf('```');
+        if (end === -1) {
+          this.buf = final ? '' : this.buf.slice(Math.max(0, this.buf.length - 2));
+          break;
+        }
+        this.inFence = false;
+        const closingEnd = this.buf.indexOf('\n', end + 3);
+        this.buf = closingEnd !== -1 ? this.buf.slice(closingEnd + 1) : this.buf.slice(end + 3);
+      }
+    }
+    // Collapse blank lines left where code blocks were removed
+    return out.replace(/\n{3,}/g, '\n\n');
+  }
+}
+
 export interface SpawnInput {
  agentName: string;
  instruction: string;
@@ -138,13 +191,15 @@ export class SpawnService {
    );

    let finalText = '';
+    const fenceFilter = new FenceFilter();

    for await (const [mode, data] of await stream) {
      if (signal?.aborted) break;

      if (mode === 'messages') {
        for (const chunk of SpawnService.extractStreamChunks(data, agentName)) {
-          yield chunk;
+          const filtered = fenceFilter.write(chunk.content);
+          if (filtered) yield { ...chunk, content: filtered };
        }
      } else if (mode === 'updates') {
        if ((data as any).agent?.messages) {
@@ -167,6 +222,9 @@ export class SpawnService {
      }
    }

+    const tail = fenceFilter.end();
+    if (tail) yield { type: 'subagent_chunk', agentName, content: tail };
+
    this.logger.info(
      { agentName, textLength: finalText.length, imageCount: imageCapture.length },
      'SpawnService: finished'
@@ -182,12 +240,16 @@ export class SpawnService {

  /**
   * Extract subagent_chunk / subagent_thinking events from a LangGraph `messages` stream datum.
+   * Only processes AIMessageChunks — ToolMessages (identified by tool_call_id) are skipped
+   * because their content is raw tool result data, not agent narrative text.
   */
  static extractStreamChunks(
    data: unknown,
    agentName: string,
  ): Array<SubagentChunkEvent | SubagentThinkingEvent> {
    const msg = Array.isArray(data) ? (data as unknown[])[0] : data;
+    // ToolMessages have tool_call_id; AIMessageChunks don't — skip tool results
+    if ((msg as any)?.tool_call_id != null) return [];
    const content = (msg as any)?.content;
    if (typeof content === 'string') {
      return content ? [{ type: 'subagent_chunk', agentName, content }] : [];