data fixes; indicator=>workspace sync

This commit is contained in:
2026-03-31 20:29:12 -04:00
parent 998f69fa1a
commit cd28e18e52
45 changed files with 1324 additions and 1239 deletions

View File

@@ -0,0 +1,89 @@
import type { BaseMessage } from '@langchain/core/messages';
import { SystemMessage, HumanMessage, AIMessage } from '@langchain/core/messages';
import type { StructuredTool } from '@langchain/core/tools';
/**
* Provider-agnostic hook to preprocess messages before an LLM call.
* Applied transparently by the harness; implementations are provider-specific.
*/
export interface ModelMiddleware {
processMessages(messages: BaseMessage[], tools: StructuredTool[]): BaseMessage[];
}
/**
* No-op implementation for providers that don't support prompt caching.
*/
export class NoopMiddleware implements ModelMiddleware {
processMessages(messages: BaseMessage[]): BaseMessage[] {
return messages;
}
}
/**
* Mirrors Python's AnthropicPromptCachingMiddleware logic.
*
* Tags with cache_control: { type: 'ephemeral' }:
* 1. The system message last content block (stable prompt prefix — always a cache hit after turn 1)
* 2. The last non-current cacheable message (AIMessage or HumanMessage before the final user message)
* so the full conversation prefix is cached on the next turn.
*
* Requires ChatAnthropic to be configured with:
* clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } }
*/
export class AnthropicCachingMiddleware implements ModelMiddleware {
processMessages(messages: BaseMessage[], _tools: StructuredTool[]): BaseMessage[] {
if (messages.length === 0) return messages;
const result = messages.map(msg => cloneMessage(msg));
// 1. Tag system message
const systemMsg = result.find(m => m._getType() === 'system');
if (systemMsg) {
addCacheControl(systemMsg);
}
// 2. Tag the last cacheable message that isn't the current user input.
// The current user message is always the last element; we want the one before it.
// We look backwards for the last AIMessage or HumanMessage (excluding the final message).
const candidates = result.slice(0, -1);
for (let i = candidates.length - 1; i >= 0; i--) {
const t = candidates[i]._getType();
if (t === 'ai' || t === 'human') {
addCacheControl(candidates[i]);
break;
}
}
return result;
}
}
/**
* Shallow-clone a message so we don't mutate history objects.
*/
function cloneMessage(msg: BaseMessage): BaseMessage {
const type = msg._getType();
const content = typeof msg.content === 'string'
? msg.content
: JSON.parse(JSON.stringify(msg.content));
if (type === 'system') return new SystemMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
if (type === 'human') return new HumanMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
if (type === 'ai') return new AIMessage({ content, additional_kwargs: { ...msg.additional_kwargs }, tool_calls: (msg as AIMessage).tool_calls });
// For other types (tool messages etc.), return as-is — we don't tag them
return msg;
}
/**
* Add cache_control to the last content block of a message.
* Converts string content to a block array if needed.
*/
function addCacheControl(msg: BaseMessage): void {
if (typeof msg.content === 'string') {
// Convert to block array
(msg as any).content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }];
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
const last = msg.content[msg.content.length - 1] as any;
last.cache_control = { type: 'ephemeral' };
}
}

View File

@@ -1,6 +1,10 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatAnthropic } from '@langchain/anthropic';
import type { FastifyBaseLogger } from 'fastify';
import { type ModelMiddleware, NoopMiddleware, AnthropicCachingMiddleware } from './middleware.js';
export type { ModelMiddleware };
export { NoopMiddleware, AnthropicCachingMiddleware };
/**
* Supported LLM providers
@@ -64,7 +68,7 @@ export class LLMProviderFactory {
/**
* Create a chat model instance
*/
createModel(modelConfig: ModelConfig): BaseChatModel {
createModel(modelConfig: ModelConfig): { model: BaseChatModel; middleware: ModelMiddleware } {
this.logger.debug(
{ provider: modelConfig.provider, model: modelConfig.model },
'Creating LLM model'
@@ -82,17 +86,20 @@ export class LLMProviderFactory {
/**
* Create Anthropic Claude model
*/
private createAnthropicModel(config: ModelConfig): ChatAnthropic {
private createAnthropicModel(config: ModelConfig): { model: ChatAnthropic; middleware: AnthropicCachingMiddleware } {
if (!this.config.anthropicApiKey) {
throw new Error('Anthropic API key not configured');
}
return new ChatAnthropic({
const model = new ChatAnthropic({
model: config.model,
temperature: config.temperature ?? 0.7,
maxTokens: config.maxTokens ?? 4096,
anthropicApiKey: this.config.anthropicApiKey,
clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } },
});
return { model, middleware: new AnthropicCachingMiddleware() };
}
/**

View File

@@ -1,6 +1,7 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { FastifyBaseLogger } from 'fastify';
import { LLMProviderFactory, type ModelConfig, LLMProvider, type LicenseModelsConfig } from './provider.js';
import type { ModelMiddleware } from './middleware.js';
import type { License } from '../types/user.js';
/**
@@ -42,7 +43,7 @@ export class ModelRouter {
license: License,
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
userId?: string
): Promise<BaseChatModel> {
): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
let modelConfig: ModelConfig;
switch (strategy) {