feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages
- Support Anthropic models (Sonnet, Haiku, Opus) via @tag
- Remove Qdrant vector database from infrastructure and configs
- Simplify license model config to use null fallbacks
- Add greeting stream after model switch via @tag
- Fix protobuf field names to camelCase for v7 compatibility
- Add 429 rate limit retry logic with exponential backoff
- Remove RAG references from agent harness documentation
This commit is contained in:
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions

View File

@@ -5,6 +5,8 @@ import type { AgentHarness, HarnessFactory } from '../harness/agent-harness.js';
import type { HarnessEvent } from '../harness/harness-events.js';
import type { InboundMessage } from '../types/messages.js';
import { randomUUID } from 'crypto';
import { parseModelTag, MODEL_TAGS } from '../llm/model-tags.js';
import type { LLMProvider } from '../llm/provider.js';
import type { SessionRegistry, EventSubscriber, Session } from '../events/index.js';
import type { OHLCService, BarUpdateCallback } from '../services/ohlc-service.js';
import type { SymbolIndexService } from '../services/symbol-index-service.js';
@@ -30,6 +32,24 @@ function jsonStringifySafe(obj: any): string {
);
}
function makeChunkDebouncer(send: (content: string) => void, delayMs = 200) {
let buffer = '';
let timer: ReturnType<typeof setTimeout> | null = null;
function flush() {
if (timer !== null) { clearTimeout(timer); timer = null; }
if (buffer.length > 0) { send(buffer); buffer = ''; }
}
function add(content: string) {
buffer += content;
if (timer !== null) clearTimeout(timer);
timer = setTimeout(flush, delayMs);
}
return { add, flush };
}
export type SessionStatus = 'authenticating' | 'spinning_up' | 'initializing' | 'ready' | 'error'
function sendStatus(socket: WebSocket, status: SessionStatus, message: string): void {
@@ -257,6 +277,7 @@ export class WebSocketHandler {
userId: authContext.userId,
licenseType: authContext.license.licenseType,
message: 'Connected to Dexorder AI',
modelTags: MODEL_TAGS.map(m => m.tag),
})
);
@@ -272,25 +293,32 @@ export class WebSocketHandler {
} else {
// First conversation — auto-send greeting prompt and stream the response
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false }));
const greetingDebouncer = makeChunkDebouncer(content =>
socket.send(JSON.stringify({ type: 'agent_chunk', content, done: false }))
);
for await (const event of harness!.streamGreeting()) {
const e = event as HarnessEvent;
switch (e.type) {
case 'chunk':
socket.send(JSON.stringify({ type: 'agent_chunk', content: e.content, done: false }));
greetingDebouncer.add(e.content);
break;
case 'tool_call':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break;
case 'image':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
break;
case 'error':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'text', text: `An error occurred during greeting.` }));
break;
case 'done':
break;
}
}
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: true }));
}
}
@@ -304,47 +332,75 @@ export class WebSocketHandler {
// Route based on message type
if (payload.type === 'message' || payload.type === 'agent_user_message') {
// Chat message - send to agent harness with streaming
const inboundMessage: InboundMessage = {
messageId: randomUUID(),
userId: authContext.userId,
sessionId: authContext.sessionId,
content: payload.content,
attachments: payload.attachments,
timestamp: new Date(),
};
if (!harness) {
logger.error('Harness not initialized');
socket.send(JSON.stringify({ type: 'error', message: 'Session not ready' }));
return;
}
// Check for @ModelTag at the start of the message
const parsedTag = parseModelTag(payload.content ?? '');
let messageContent: string = payload.content ?? '';
let modelOverride: { modelId: string; provider?: LLMProvider } | undefined;
if (parsedTag) {
await harness.clearHistory();
socket.send(JSON.stringify({ type: 'model_switched', tag: parsedTag.tag, modelId: parsedTag.modelId, rest: parsedTag.rest }));
messageContent = parsedTag.rest;
modelOverride = { modelId: parsedTag.modelId, provider: parsedTag.provider };
logger.info({ tag: parsedTag.tag, modelId: parsedTag.modelId }, 'Model tag switch');
}
// Chat message - send to agent harness with streaming
const inboundMessage: InboundMessage = {
messageId: randomUUID(),
userId: authContext.userId,
sessionId: authContext.sessionId,
content: messageContent,
attachments: payload.attachments,
timestamp: new Date(),
};
try {
// Acknowledge receipt immediately so the client can show the seen indicator
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false }));
logger.info('Streaming harness response');
let fatalError = false;
for await (const event of harness.streamMessage(inboundMessage)) {
const msgDebouncer = makeChunkDebouncer(content =>
socket.send(JSON.stringify({ type: 'agent_chunk', content, done: false }))
);
const stream = (parsedTag && !messageContent)
? harness.streamGreeting(modelOverride)
: harness.streamMessage(inboundMessage, { modelOverride });
for await (const event of stream) {
const e = event as HarnessEvent;
switch (e.type) {
case 'chunk':
socket.send(JSON.stringify({ type: 'agent_chunk', content: e.content, done: false }));
msgDebouncer.add(e.content);
break;
case 'tool_call':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break;
case 'subagent_tool_call':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
break;
case 'subagent_chunk':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: e.agentName, content: e.content }));
break;
case 'subagent_thinking':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_thinking', agentName: e.agentName, content: e.content }));
break;
case 'image':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
break;
case 'error':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'text', text: `An unrecoverable error occurred in the ${e.source}.` }));
if (e.fatal) fatalError = true;
break;
@@ -352,6 +408,7 @@ export class WebSocketHandler {
break;
}
}
msgDebouncer.flush();
if (fatalError) {
socket.close(1011, 'Fatal error');
@@ -451,6 +508,9 @@ export class WebSocketHandler {
case 'subagent_tool_call':
socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
break;
case 'subagent_thinking':
socket.send(JSON.stringify({ type: 'subagent_thinking', agentName: e.agentName, content: e.content }));
break;
case 'tool_call':
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break;
@@ -730,6 +790,13 @@ export class WebSocketHandler {
// Create a per-subscription callback that forwards bars to this socket
const barCallback: BarUpdateCallback = (bar) => {
if (socket.readyState !== 1 /* OPEN */) return;
const symbolMeta = symbolIndexService?.getSymbolByTicker(bar.ticker);
const priceDivisor = (symbolMeta?.price_precision ?? 0) > 0
? Math.pow(10, symbolMeta!.price_precision!)
: 1;
const sizeDivisor = (symbolMeta?.size_precision ?? 0) > 0
? Math.pow(10, symbolMeta!.size_precision!)
: 1;
socket.send(JSON.stringify({
type: 'bar_update',
subscription_id: payload.subscription_id,
@@ -739,11 +806,11 @@ export class WebSocketHandler {
bar: {
// Convert nanoseconds → seconds for client compatibility
time: Number(bar.timestamp / 1_000_000_000n),
open: bar.open,
high: bar.high,
low: bar.low,
close: bar.close,
volume: bar.volume,
open: bar.open / priceDivisor,
high: bar.high / priceDivisor,
low: bar.low / priceDivisor,
close: bar.close / priceDivisor,
volume: bar.volume / sizeDivisor,
},
}));
};