container lifecycle management
This commit is contained in:
216
gateway/src/llm/provider.ts
Normal file
216
gateway/src/llm/provider.ts
Normal file
@@ -0,0 +1,216 @@
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { ChatAnthropic } from '@langchain/anthropic';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
|
||||
import { ChatOpenRouter } from '@langchain/openrouter';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
/**
|
||||
* Supported LLM providers
|
||||
*/
|
||||
export enum LLMProvider {
|
||||
ANTHROPIC = 'anthropic',
|
||||
OPENAI = 'openai',
|
||||
GOOGLE = 'google',
|
||||
OPENROUTER = 'openrouter',
|
||||
}
|
||||
|
||||
/**
|
||||
* Model configuration
|
||||
*/
|
||||
export interface ModelConfig {
|
||||
provider: LLMProvider;
|
||||
model: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provider configuration with API keys
|
||||
*/
|
||||
export interface ProviderConfig {
|
||||
anthropicApiKey?: string;
|
||||
openaiApiKey?: string;
|
||||
googleApiKey?: string;
|
||||
openrouterApiKey?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM Provider factory
|
||||
* Creates model instances with unified interface across providers
|
||||
*/
|
||||
export class LLMProviderFactory {
|
||||
private config: ProviderConfig;
|
||||
private logger: FastifyBaseLogger;
|
||||
|
||||
constructor(config: ProviderConfig, logger: FastifyBaseLogger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a chat model instance
|
||||
*/
|
||||
createModel(modelConfig: ModelConfig): BaseChatModel {
|
||||
this.logger.debug(
|
||||
{ provider: modelConfig.provider, model: modelConfig.model },
|
||||
'Creating LLM model'
|
||||
);
|
||||
|
||||
switch (modelConfig.provider) {
|
||||
case LLMProvider.ANTHROPIC:
|
||||
return this.createAnthropicModel(modelConfig);
|
||||
|
||||
case LLMProvider.OPENAI:
|
||||
return this.createOpenAIModel(modelConfig);
|
||||
|
||||
case LLMProvider.GOOGLE:
|
||||
return this.createGoogleModel(modelConfig);
|
||||
|
||||
case LLMProvider.OPENROUTER:
|
||||
return this.createOpenRouterModel(modelConfig);
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported provider: ${modelConfig.provider}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Anthropic Claude model
|
||||
*/
|
||||
private createAnthropicModel(config: ModelConfig): ChatAnthropic {
|
||||
if (!this.config.anthropicApiKey) {
|
||||
throw new Error('Anthropic API key not configured');
|
||||
}
|
||||
|
||||
return new ChatAnthropic({
|
||||
model: config.model,
|
||||
temperature: config.temperature ?? 0.7,
|
||||
maxTokens: config.maxTokens ?? 4096,
|
||||
anthropicApiKey: this.config.anthropicApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create OpenAI GPT model
|
||||
*/
|
||||
private createOpenAIModel(config: ModelConfig): ChatOpenAI {
|
||||
if (!this.config.openaiApiKey) {
|
||||
throw new Error('OpenAI API key not configured');
|
||||
}
|
||||
|
||||
return new ChatOpenAI({
|
||||
model: config.model,
|
||||
temperature: config.temperature ?? 0.7,
|
||||
maxTokens: config.maxTokens ?? 4096,
|
||||
openAIApiKey: this.config.openaiApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Google Gemini model
|
||||
*/
|
||||
private createGoogleModel(config: ModelConfig): ChatGoogleGenerativeAI {
|
||||
if (!this.config.googleApiKey) {
|
||||
throw new Error('Google API key not configured');
|
||||
}
|
||||
|
||||
return new ChatGoogleGenerativeAI({
|
||||
model: config.model,
|
||||
temperature: config.temperature ?? 0.7,
|
||||
maxOutputTokens: config.maxTokens ?? 4096,
|
||||
apiKey: this.config.googleApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create OpenRouter model (access to 300+ models)
|
||||
*/
|
||||
private createOpenRouterModel(config: ModelConfig): ChatOpenRouter {
|
||||
if (!this.config.openrouterApiKey) {
|
||||
throw new Error('OpenRouter API key not configured');
|
||||
}
|
||||
|
||||
return new ChatOpenRouter({
|
||||
model: config.model,
|
||||
temperature: config.temperature ?? 0.7,
|
||||
maxTokens: config.maxTokens ?? 4096,
|
||||
apiKey: this.config.openrouterApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default model based on environment
|
||||
*/
|
||||
getDefaultModel(): ModelConfig {
|
||||
// Check which API keys are available
|
||||
if (this.config.anthropicApiKey) {
|
||||
return {
|
||||
provider: LLMProvider.ANTHROPIC,
|
||||
model: 'claude-3-5-sonnet-20241022',
|
||||
};
|
||||
}
|
||||
|
||||
if (this.config.openaiApiKey) {
|
||||
return {
|
||||
provider: LLMProvider.OPENAI,
|
||||
model: 'gpt-4o',
|
||||
};
|
||||
}
|
||||
|
||||
if (this.config.googleApiKey) {
|
||||
return {
|
||||
provider: LLMProvider.GOOGLE,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
};
|
||||
}
|
||||
|
||||
if (this.config.openrouterApiKey) {
|
||||
return {
|
||||
provider: LLMProvider.OPENROUTER,
|
||||
model: 'anthropic/claude-3.5-sonnet',
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('No LLM API keys configured');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Predefined model configurations
|
||||
*/
|
||||
export const MODELS = {
|
||||
// Anthropic
|
||||
CLAUDE_SONNET: {
|
||||
provider: LLMProvider.ANTHROPIC,
|
||||
model: 'claude-3-5-sonnet-20241022',
|
||||
},
|
||||
CLAUDE_HAIKU: {
|
||||
provider: LLMProvider.ANTHROPIC,
|
||||
model: 'claude-3-5-haiku-20241022',
|
||||
},
|
||||
CLAUDE_OPUS: {
|
||||
provider: LLMProvider.ANTHROPIC,
|
||||
model: 'claude-3-opus-20240229',
|
||||
},
|
||||
|
||||
// OpenAI
|
||||
GPT4O: {
|
||||
provider: LLMProvider.OPENAI,
|
||||
model: 'gpt-4o',
|
||||
},
|
||||
GPT4O_MINI: {
|
||||
provider: LLMProvider.OPENAI,
|
||||
model: 'gpt-4o-mini',
|
||||
},
|
||||
|
||||
// Google
|
||||
GEMINI_2_FLASH: {
|
||||
provider: LLMProvider.GOOGLE,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
},
|
||||
GEMINI_PRO: {
|
||||
provider: LLMProvider.GOOGLE,
|
||||
model: 'gemini-1.5-pro',
|
||||
},
|
||||
} as const satisfies Record<string, ModelConfig>;
|
||||
202
gateway/src/llm/router.ts
Normal file
202
gateway/src/llm/router.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import { LLMProviderFactory, type ModelConfig, LLMProvider } from './provider.js';
|
||||
import type { UserLicense } from '../types/user.js';
|
||||
|
||||
/**
|
||||
* Model routing strategies
|
||||
*/
|
||||
export enum RoutingStrategy {
|
||||
/** Use user's preferred model from license */
|
||||
USER_PREFERENCE = 'user_preference',
|
||||
/** Route based on query complexity */
|
||||
COMPLEXITY = 'complexity',
|
||||
/** Route based on license tier */
|
||||
LICENSE_TIER = 'license_tier',
|
||||
/** Use cheapest available model */
|
||||
COST_OPTIMIZED = 'cost_optimized',
|
||||
}
|
||||
|
||||
/**
|
||||
* Model router
|
||||
* Intelligently selects which model to use based on various factors
|
||||
*/
|
||||
export class ModelRouter {
|
||||
private factory: LLMProviderFactory;
|
||||
private logger: FastifyBaseLogger;
|
||||
private defaultModel: ModelConfig;
|
||||
|
||||
constructor(factory: LLMProviderFactory, logger: FastifyBaseLogger) {
|
||||
this.factory = factory;
|
||||
this.logger = logger;
|
||||
this.defaultModel = factory.getDefaultModel();
|
||||
}
|
||||
|
||||
/**
|
||||
* Route to appropriate model based on context
|
||||
*/
|
||||
async route(
|
||||
message: string,
|
||||
license: UserLicense,
|
||||
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE
|
||||
): Promise<BaseChatModel> {
|
||||
let modelConfig: ModelConfig;
|
||||
|
||||
switch (strategy) {
|
||||
case RoutingStrategy.USER_PREFERENCE:
|
||||
modelConfig = this.routeByUserPreference(license);
|
||||
break;
|
||||
|
||||
case RoutingStrategy.COMPLEXITY:
|
||||
modelConfig = this.routeByComplexity(message, license);
|
||||
break;
|
||||
|
||||
case RoutingStrategy.LICENSE_TIER:
|
||||
modelConfig = this.routeByLicenseTier(license);
|
||||
break;
|
||||
|
||||
case RoutingStrategy.COST_OPTIMIZED:
|
||||
modelConfig = this.routeByCost(license);
|
||||
break;
|
||||
|
||||
default:
|
||||
modelConfig = this.defaultModel;
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
{
|
||||
userId: license.userId,
|
||||
strategy,
|
||||
provider: modelConfig.provider,
|
||||
model: modelConfig.model,
|
||||
},
|
||||
'Routing to model'
|
||||
);
|
||||
|
||||
return this.factory.createModel(modelConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route based on user's preferred model (if set in license)
|
||||
*/
|
||||
private routeByUserPreference(license: UserLicense): ModelConfig {
|
||||
// Check if user has custom model preference
|
||||
const preferredModel = (license as any).preferredModel as ModelConfig | undefined;
|
||||
|
||||
if (preferredModel && this.isModelAllowed(preferredModel, license)) {
|
||||
return preferredModel;
|
||||
}
|
||||
|
||||
// Fall back to license tier default
|
||||
return this.routeByLicenseTier(license);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route based on query complexity
|
||||
*/
|
||||
private routeByComplexity(message: string, license: UserLicense): ModelConfig {
|
||||
const isComplex = this.isComplexQuery(message);
|
||||
|
||||
if (license.licenseType === 'enterprise') {
|
||||
// Enterprise users get best models for complex queries
|
||||
return isComplex
|
||||
? { provider: LLMProvider.ANTHROPIC, model: 'claude-3-opus-20240229' }
|
||||
: { provider: LLMProvider.ANTHROPIC, model: 'claude-3-5-sonnet-20241022' };
|
||||
}
|
||||
|
||||
if (license.licenseType === 'pro') {
|
||||
// Pro users get good models
|
||||
return isComplex
|
||||
? { provider: LLMProvider.ANTHROPIC, model: 'claude-3-5-sonnet-20241022' }
|
||||
: { provider: LLMProvider.OPENAI, model: 'gpt-4o-mini' };
|
||||
}
|
||||
|
||||
// Free users get efficient models
|
||||
return { provider: LLMProvider.GOOGLE, model: 'gemini-2.0-flash-exp' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Route based on license tier
|
||||
*/
|
||||
private routeByLicenseTier(license: UserLicense): ModelConfig {
|
||||
switch (license.licenseType) {
|
||||
case 'enterprise':
|
||||
return { provider: LLMProvider.ANTHROPIC, model: 'claude-3-5-sonnet-20241022' };
|
||||
|
||||
case 'pro':
|
||||
return { provider: LLMProvider.OPENAI, model: 'gpt-4o' };
|
||||
|
||||
case 'free':
|
||||
return { provider: LLMProvider.GOOGLE, model: 'gemini-2.0-flash-exp' };
|
||||
|
||||
default:
|
||||
return this.defaultModel;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Route to cheapest available model
|
||||
*/
|
||||
private routeByCost(license: UserLicense): ModelConfig {
|
||||
// Free tier: use cheapest
|
||||
if (license.licenseType === 'free') {
|
||||
return { provider: LLMProvider.GOOGLE, model: 'gemini-2.0-flash-exp' };
|
||||
}
|
||||
|
||||
// Paid tiers: use GPT-4o-mini for cost efficiency
|
||||
return { provider: LLMProvider.OPENAI, model: 'gpt-4o-mini' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if model is allowed for user's license
|
||||
*/
|
||||
private isModelAllowed(model: ModelConfig, license: UserLicense): boolean {
|
||||
// Free tier: only cheap models
|
||||
if (license.licenseType === 'free') {
|
||||
const allowedModels = ['gemini-2.0-flash-exp', 'gpt-4o-mini', 'claude-3-5-haiku-20241022'];
|
||||
return allowedModels.includes(model.model);
|
||||
}
|
||||
|
||||
// Pro: all except Opus
|
||||
if (license.licenseType === 'pro') {
|
||||
const blockedModels = ['claude-3-opus-20240229'];
|
||||
return !blockedModels.includes(model.model);
|
||||
}
|
||||
|
||||
// Enterprise: all models allowed
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if query is complex
|
||||
*/
|
||||
private isComplexQuery(message: string): boolean {
|
||||
const complexityIndicators = [
|
||||
// Multi-step analysis
|
||||
'backtest',
|
||||
'analyze',
|
||||
'compare',
|
||||
'optimize',
|
||||
|
||||
// Code generation
|
||||
'write',
|
||||
'create',
|
||||
'implement',
|
||||
'build',
|
||||
|
||||
// Deep reasoning
|
||||
'explain why',
|
||||
'what if',
|
||||
'how would',
|
||||
|
||||
// Long messages (> 200 chars likely complex)
|
||||
message.length > 200,
|
||||
];
|
||||
|
||||
const messageLower = message.toLowerCase();
|
||||
|
||||
return complexityIndicators.some((indicator) =>
|
||||
typeof indicator === 'string' ? messageLower.includes(indicator) : indicator
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user