data fixes; indicator=>workspace sync

2026-03-31 20:29:12 -04:00
parent 998f69fa1a
commit cd28e18e52
45 changed files with 1324 additions and 1239 deletions
--- a/gateway/src/llm/middleware.ts
+++ b/gateway/src/llm/middleware.ts
@@ -0,0 +1,89 @@
+import type { BaseMessage } from '@langchain/core/messages';
+import { SystemMessage, HumanMessage, AIMessage } from '@langchain/core/messages';
+import type { StructuredTool } from '@langchain/core/tools';
+
+/**
+ * Provider-agnostic hook to preprocess messages before an LLM call.
+ * Applied transparently by the harness; implementations are provider-specific.
+ */
+export interface ModelMiddleware {
+  processMessages(messages: BaseMessage[], tools: StructuredTool[]): BaseMessage[];
+}
+
+/**
+ * No-op implementation for providers that don't support prompt caching.
+ */
+export class NoopMiddleware implements ModelMiddleware {
+  processMessages(messages: BaseMessage[]): BaseMessage[] {
+    return messages;
+  }
+}
+
+/**
+ * Mirrors Python's AnthropicPromptCachingMiddleware logic.
+ *
+ * Tags with cache_control: { type: 'ephemeral' }:
+ * 1. The system message last content block (stable prompt prefix — always a cache hit after turn 1)
+ * 2. The last non-current cacheable message (AIMessage or HumanMessage before the final user message)
+ *    so the full conversation prefix is cached on the next turn.
+ *
+ * Requires ChatAnthropic to be configured with:
+ *   clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } }
+ */
+export class AnthropicCachingMiddleware implements ModelMiddleware {
+  processMessages(messages: BaseMessage[], _tools: StructuredTool[]): BaseMessage[] {
+    if (messages.length === 0) return messages;
+
+    const result = messages.map(msg => cloneMessage(msg));
+
+    // 1. Tag system message
+    const systemMsg = result.find(m => m._getType() === 'system');
+    if (systemMsg) {
+      addCacheControl(systemMsg);
+    }
+
+    // 2. Tag the last cacheable message that isn't the current user input.
+    // The current user message is always the last element; we want the one before it.
+    // We look backwards for the last AIMessage or HumanMessage (excluding the final message).
+    const candidates = result.slice(0, -1);
+    for (let i = candidates.length - 1; i >= 0; i--) {
+      const t = candidates[i]._getType();
+      if (t === 'ai' || t === 'human') {
+        addCacheControl(candidates[i]);
+        break;
+      }
+    }
+
+    return result;
+  }
+}
+
+/**
+ * Shallow-clone a message so we don't mutate history objects.
+ */
+function cloneMessage(msg: BaseMessage): BaseMessage {
+  const type = msg._getType();
+  const content = typeof msg.content === 'string'
+    ? msg.content
+    : JSON.parse(JSON.stringify(msg.content));
+
+  if (type === 'system') return new SystemMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
+  if (type === 'human') return new HumanMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
+  if (type === 'ai') return new AIMessage({ content, additional_kwargs: { ...msg.additional_kwargs }, tool_calls: (msg as AIMessage).tool_calls });
+  // For other types (tool messages etc.), return as-is — we don't tag them
+  return msg;
+}
+
+/**
+ * Add cache_control to the last content block of a message.
+ * Converts string content to a block array if needed.
+ */
+function addCacheControl(msg: BaseMessage): void {
+  if (typeof msg.content === 'string') {
+    // Convert to block array
+    (msg as any).content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }];
+  } else if (Array.isArray(msg.content) && msg.content.length > 0) {
+    const last = msg.content[msg.content.length - 1] as any;
+    last.cache_control = { type: 'ephemeral' };
+  }
+}
--- a/gateway/src/llm/provider.ts
+++ b/gateway/src/llm/provider.ts
@@ -1,6 +1,10 @@
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { ChatAnthropic } from '@langchain/anthropic';
 import type { FastifyBaseLogger } from 'fastify';
+import { type ModelMiddleware, NoopMiddleware, AnthropicCachingMiddleware } from './middleware.js';
+
+export type { ModelMiddleware };
+export { NoopMiddleware, AnthropicCachingMiddleware };

 /**
 * Supported LLM providers
@@ -64,7 +68,7 @@ export class LLMProviderFactory {
  /**
   * Create a chat model instance
   */
-  createModel(modelConfig: ModelConfig): BaseChatModel {
+  createModel(modelConfig: ModelConfig): { model: BaseChatModel; middleware: ModelMiddleware } {
    this.logger.debug(
      { provider: modelConfig.provider, model: modelConfig.model },
      'Creating LLM model'
@@ -82,17 +86,20 @@ export class LLMProviderFactory {
  /**
   * Create Anthropic Claude model
   */
-  private createAnthropicModel(config: ModelConfig): ChatAnthropic {
+  private createAnthropicModel(config: ModelConfig): { model: ChatAnthropic; middleware: AnthropicCachingMiddleware } {
    if (!this.config.anthropicApiKey) {
      throw new Error('Anthropic API key not configured');
    }

-    return new ChatAnthropic({
+    const model = new ChatAnthropic({
      model: config.model,
      temperature: config.temperature ?? 0.7,
      maxTokens: config.maxTokens ?? 4096,
      anthropicApiKey: this.config.anthropicApiKey,
+      clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } },
    });
+
+    return { model, middleware: new AnthropicCachingMiddleware() };
  }

  /**
--- a/gateway/src/llm/router.ts
+++ b/gateway/src/llm/router.ts
@@ -1,6 +1,7 @@
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { FastifyBaseLogger } from 'fastify';
 import { LLMProviderFactory, type ModelConfig, LLMProvider, type LicenseModelsConfig } from './provider.js';
+import type { ModelMiddleware } from './middleware.js';
 import type { License } from '../types/user.js';

 /**
@@ -42,7 +43,7 @@ export class ModelRouter {
    license: License,
    strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
    userId?: string
-  ): Promise<BaseChatModel> {
+  ): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
    let modelConfig: ModelConfig;

    switch (strategy) {