data fixes; indicator=>workspace sync
This commit is contained in:
89
gateway/src/llm/middleware.ts
Normal file
89
gateway/src/llm/middleware.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import type { BaseMessage } from '@langchain/core/messages';
|
||||
import { SystemMessage, HumanMessage, AIMessage } from '@langchain/core/messages';
|
||||
import type { StructuredTool } from '@langchain/core/tools';
|
||||
|
||||
/**
|
||||
* Provider-agnostic hook to preprocess messages before an LLM call.
|
||||
* Applied transparently by the harness; implementations are provider-specific.
|
||||
*/
|
||||
export interface ModelMiddleware {
|
||||
processMessages(messages: BaseMessage[], tools: StructuredTool[]): BaseMessage[];
|
||||
}
|
||||
|
||||
/**
|
||||
* No-op implementation for providers that don't support prompt caching.
|
||||
*/
|
||||
export class NoopMiddleware implements ModelMiddleware {
|
||||
processMessages(messages: BaseMessage[]): BaseMessage[] {
|
||||
return messages;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mirrors Python's AnthropicPromptCachingMiddleware logic.
|
||||
*
|
||||
* Tags with cache_control: { type: 'ephemeral' }:
|
||||
* 1. The system message last content block (stable prompt prefix — always a cache hit after turn 1)
|
||||
* 2. The last non-current cacheable message (AIMessage or HumanMessage before the final user message)
|
||||
* so the full conversation prefix is cached on the next turn.
|
||||
*
|
||||
* Requires ChatAnthropic to be configured with:
|
||||
* clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } }
|
||||
*/
|
||||
export class AnthropicCachingMiddleware implements ModelMiddleware {
|
||||
processMessages(messages: BaseMessage[], _tools: StructuredTool[]): BaseMessage[] {
|
||||
if (messages.length === 0) return messages;
|
||||
|
||||
const result = messages.map(msg => cloneMessage(msg));
|
||||
|
||||
// 1. Tag system message
|
||||
const systemMsg = result.find(m => m._getType() === 'system');
|
||||
if (systemMsg) {
|
||||
addCacheControl(systemMsg);
|
||||
}
|
||||
|
||||
// 2. Tag the last cacheable message that isn't the current user input.
|
||||
// The current user message is always the last element; we want the one before it.
|
||||
// We look backwards for the last AIMessage or HumanMessage (excluding the final message).
|
||||
const candidates = result.slice(0, -1);
|
||||
for (let i = candidates.length - 1; i >= 0; i--) {
|
||||
const t = candidates[i]._getType();
|
||||
if (t === 'ai' || t === 'human') {
|
||||
addCacheControl(candidates[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shallow-clone a message so we don't mutate history objects.
|
||||
*/
|
||||
function cloneMessage(msg: BaseMessage): BaseMessage {
|
||||
const type = msg._getType();
|
||||
const content = typeof msg.content === 'string'
|
||||
? msg.content
|
||||
: JSON.parse(JSON.stringify(msg.content));
|
||||
|
||||
if (type === 'system') return new SystemMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
|
||||
if (type === 'human') return new HumanMessage({ content, additional_kwargs: { ...msg.additional_kwargs } });
|
||||
if (type === 'ai') return new AIMessage({ content, additional_kwargs: { ...msg.additional_kwargs }, tool_calls: (msg as AIMessage).tool_calls });
|
||||
// For other types (tool messages etc.), return as-is — we don't tag them
|
||||
return msg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add cache_control to the last content block of a message.
|
||||
* Converts string content to a block array if needed.
|
||||
*/
|
||||
function addCacheControl(msg: BaseMessage): void {
|
||||
if (typeof msg.content === 'string') {
|
||||
// Convert to block array
|
||||
(msg as any).content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }];
|
||||
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
|
||||
const last = msg.content[msg.content.length - 1] as any;
|
||||
last.cache_control = { type: 'ephemeral' };
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,10 @@
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { ChatAnthropic } from '@langchain/anthropic';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import { type ModelMiddleware, NoopMiddleware, AnthropicCachingMiddleware } from './middleware.js';
|
||||
|
||||
export type { ModelMiddleware };
|
||||
export { NoopMiddleware, AnthropicCachingMiddleware };
|
||||
|
||||
/**
|
||||
* Supported LLM providers
|
||||
@@ -64,7 +68,7 @@ export class LLMProviderFactory {
|
||||
/**
|
||||
* Create a chat model instance
|
||||
*/
|
||||
createModel(modelConfig: ModelConfig): BaseChatModel {
|
||||
createModel(modelConfig: ModelConfig): { model: BaseChatModel; middleware: ModelMiddleware } {
|
||||
this.logger.debug(
|
||||
{ provider: modelConfig.provider, model: modelConfig.model },
|
||||
'Creating LLM model'
|
||||
@@ -82,17 +86,20 @@ export class LLMProviderFactory {
|
||||
/**
|
||||
* Create Anthropic Claude model
|
||||
*/
|
||||
private createAnthropicModel(config: ModelConfig): ChatAnthropic {
|
||||
private createAnthropicModel(config: ModelConfig): { model: ChatAnthropic; middleware: AnthropicCachingMiddleware } {
|
||||
if (!this.config.anthropicApiKey) {
|
||||
throw new Error('Anthropic API key not configured');
|
||||
}
|
||||
|
||||
return new ChatAnthropic({
|
||||
const model = new ChatAnthropic({
|
||||
model: config.model,
|
||||
temperature: config.temperature ?? 0.7,
|
||||
maxTokens: config.maxTokens ?? 4096,
|
||||
anthropicApiKey: this.config.anthropicApiKey,
|
||||
clientOptions: { defaultHeaders: { 'anthropic-beta': 'prompt-caching-2024-07-31' } },
|
||||
});
|
||||
|
||||
return { model, middleware: new AnthropicCachingMiddleware() };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import { LLMProviderFactory, type ModelConfig, LLMProvider, type LicenseModelsConfig } from './provider.js';
|
||||
import type { ModelMiddleware } from './middleware.js';
|
||||
import type { License } from '../types/user.js';
|
||||
|
||||
/**
|
||||
@@ -42,7 +43,7 @@ export class ModelRouter {
|
||||
license: License,
|
||||
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
|
||||
userId?: string
|
||||
): Promise<BaseChatModel> {
|
||||
): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
|
||||
let modelConfig: ModelConfig;
|
||||
|
||||
switch (strategy) {
|
||||
|
||||
Reference in New Issue
Block a user