feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages
- Support Anthropic models (Sonnet, Haiku, Opus) via @tag
- Remove Qdrant vector database from infrastructure and configs
- Simplify license model config to use null fallbacks
- Add greeting stream after model switch via @tag
- Fix protobuf field names to camelCase for v7 compatibility
- Add 429 rate limit retry logic with exponential backoff
- Remove RAG references from agent harness documentation
This commit is contained in:
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions

6
.aiignore Normal file
View File

@@ -0,0 +1,6 @@
ingestor/protobuf
flink/protobuf
relay/protobuf
gateway/protobuf
deploy/k8s/dev/configs/gateway-config.yaml
deploy/k8s/prod/configs/gateway-config.yaml

5
.idea/ai.iml generated
View File

@@ -20,6 +20,11 @@
<excludeFolder url="file://$MODULE_DIR$/doc/competition" /> <excludeFolder url="file://$MODULE_DIR$/doc/competition" />
<excludeFolder url="file://$MODULE_DIR$/sandbox/dexorder_sandbox.egg-info" /> <excludeFolder url="file://$MODULE_DIR$/sandbox/dexorder_sandbox.egg-info" />
<excludeFolder url="file://$MODULE_DIR$/sandbox/protobuf" /> <excludeFolder url="file://$MODULE_DIR$/sandbox/protobuf" />
<excludeFolder url="file://$MODULE_DIR$/.idea/runConfigurations" />
<excludeFolder url="file://$MODULE_DIR$/chat" />
<excludeFolder url="file://$MODULE_DIR$/gateway/protobuf" />
<excludeFolder url="file://$MODULE_DIR$/gateway/src/generated" />
<excludeFolder url="file://$MODULE_DIR$/web/protobuf" />
</content> </content>
<orderEntry type="jdk" jdkName="Python 3.12 (ai)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.12 (ai)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />

23
bin/dev
View File

@@ -20,10 +20,10 @@ usage() {
echo " start Start minikube and deploy all services" echo " start Start minikube and deploy all services"
echo " stop [--keep-data] Stop minikube (deletes PVCs by default)" echo " stop [--keep-data] Stop minikube (deletes PVCs by default)"
echo " restart [svc] Rebuild and redeploy all services, or just one (relay|ingestor|flink|gateway|sidecar|web|sandbox)" echo " restart [svc] Rebuild and redeploy all services, or just one (relay|ingestor|flink|gateway|sidecar|web|sandbox)"
echo " deep-restart [svc] Restart StatefulSet(s) and delete their PVCs (kafka|postgres|minio|qdrant|all)" echo " deep-restart [svc] Restart StatefulSet(s) and delete their PVCs (kafka|postgres|minio|all)"
echo " rebuild [svc] Rebuild all custom images, or just one" echo " rebuild [svc] Rebuild all custom images, or just one"
echo " deploy [svc] Deploy/update all services, or just one" echo " deploy [svc] Deploy/update all services, or just one"
echo " delete-pvcs [svc] Delete PVCs for specific service or all (kafka|postgres|minio|qdrant|all)" echo " delete-pvcs [svc] Delete PVCs for specific service or all (kafka|postgres|minio|all)"
echo " status Show status of all services" echo " status Show status of all services"
echo " logs Tail logs for a service" echo " logs Tail logs for a service"
echo " shell Open a shell in a service pod" echo " shell Open a shell in a service pod"
@@ -446,19 +446,15 @@ delete_pvcs() {
minio) minio)
kubectl delete pvc -l app=minio || true kubectl delete pvc -l app=minio || true
;; ;;
qdrant)
kubectl delete pvc -l app=qdrant || true
;;
all) all)
echo -e "${YELLOW}Deleting all StatefulSet PVCs...${NC}" echo -e "${YELLOW}Deleting all StatefulSet PVCs...${NC}"
kubectl delete pvc -l app=kafka 2>/dev/null || true kubectl delete pvc -l app=kafka 2>/dev/null || true
kubectl delete pvc -l app=postgres 2>/dev/null || true kubectl delete pvc -l app=postgres 2>/dev/null || true
kubectl delete pvc -l app=minio 2>/dev/null || true kubectl delete pvc -l app=minio 2>/dev/null || true
kubectl delete pvc -l app=qdrant 2>/dev/null || true
;; ;;
*) *)
echo -e "${RED}Error: Unknown service '$service'${NC}" echo -e "${RED}Error: Unknown service '$service'${NC}"
echo "Valid services: kafka, postgres, minio, qdrant, all" echo "Valid services: kafka, postgres, minio, all"
exit 1 exit 1
;; ;;
esac esac
@@ -497,15 +493,9 @@ deep_restart() {
echo -e "${GREEN}→${NC} Force restarting iceberg-catalog (depends on minio)..." echo -e "${GREEN}→${NC} Force restarting iceberg-catalog (depends on minio)..."
kubectl delete pod -l app=iceberg-catalog 2>/dev/null || true kubectl delete pod -l app=iceberg-catalog 2>/dev/null || true
;; ;;
qdrant)
echo -e "${GREEN}→${NC} Deleting qdrant StatefulSet..."
kubectl delete statefulset qdrant || true
sleep 2
delete_pvcs qdrant
;;
all) all)
echo -e "${GREEN}→${NC} Deleting all StatefulSets..." echo -e "${GREEN}→${NC} Deleting all StatefulSets..."
kubectl delete statefulset kafka postgres minio qdrant || true kubectl delete statefulset kafka postgres minio || true
sleep 2 sleep 2
delete_pvcs all delete_pvcs all
# Force restart iceberg-catalog since it depends on postgres and minio # Force restart iceberg-catalog since it depends on postgres and minio
@@ -517,7 +507,7 @@ deep_restart() {
;; ;;
*) *)
echo -e "${RED}Error: Unknown service '$service'${NC}" echo -e "${RED}Error: Unknown service '$service'${NC}"
echo "Valid services: kafka, postgres, minio, qdrant, all" echo "Valid services: kafka, postgres, minio, all"
exit 1 exit 1
;; ;;
esac esac
@@ -642,13 +632,12 @@ case "$COMMAND" in
echo -e "${BLUE}Stopping minikube and deleting PVCs...${NC}" echo -e "${BLUE}Stopping minikube and deleting PVCs...${NC}"
# Scale down StatefulSets first to release PVCs # Scale down StatefulSets first to release PVCs
echo -e "${GREEN}→${NC} Scaling down StatefulSets..." echo -e "${GREEN}→${NC} Scaling down StatefulSets..."
kubectl scale statefulset kafka postgres minio qdrant --replicas=0 2>/dev/null || true kubectl scale statefulset kafka postgres minio --replicas=0 2>/dev/null || true
# Wait for pods to terminate # Wait for pods to terminate
echo -e "${GREEN}→${NC} Waiting for pods to terminate..." echo -e "${GREEN}→${NC} Waiting for pods to terminate..."
kubectl wait --for=delete pod -l app=kafka --timeout=60s 2>/dev/null || true kubectl wait --for=delete pod -l app=kafka --timeout=60s 2>/dev/null || true
kubectl wait --for=delete pod -l app=postgres --timeout=60s 2>/dev/null || true kubectl wait --for=delete pod -l app=postgres --timeout=60s 2>/dev/null || true
kubectl wait --for=delete pod -l app=minio --timeout=60s 2>/dev/null || true kubectl wait --for=delete pod -l app=minio --timeout=60s 2>/dev/null || true
kubectl wait --for=delete pod -l app=qdrant --timeout=60s 2>/dev/null || true
# Now delete PVCs # Now delete PVCs
delete_pvcs all delete_pvcs all
# Delete sandbox namespace # Delete sandbox namespace

View File

@@ -44,9 +44,6 @@ spec:
- name: wait-for-dragonfly - name: wait-for-dragonfly
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z dragonfly 6379; do echo waiting for dragonfly; sleep 2; done;'] command: ['sh', '-c', 'until nc -z dragonfly 6379; do echo waiting for dragonfly; sleep 2; done;']
- name: wait-for-qdrant
image: busybox:1.36
command: ['sh', '-c', 'until nc -z qdrant 6333; do echo waiting for qdrant; sleep 2; done;']
- name: wait-for-iceberg-catalog - name: wait-for-iceberg-catalog
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;'] command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;']

View File

@@ -27,29 +27,22 @@ data:
model_provider: deepinfra model_provider: deepinfra
model: zai-org/GLM-5 model: zai-org/GLM-5
# License tier model configuration # License tier model configuration (null = fall back to defaults.model)
license_models: license_models:
# Free tier models
free: free:
default: zai-org/GLM-5 default: ~
cost_optimized: zai-org/GLM-5 cost_optimized: ~
complex: zai-org/GLM-5 complex: ~
allowed_models:
- zai-org/GLM-5
# Pro tier models
pro: pro:
default: zai-org/GLM-5 default: ~
cost_optimized: zai-org/GLM-5 cost_optimized: ~
complex: zai-org/GLM-5 complex: ~
blocked_models:
- Qwen/Qwen3-235B-A22B-Instruct-2507
# Enterprise tier models
enterprise: enterprise:
default: zai-org/GLM-5 default: ~
cost_optimized: zai-org/GLM-5 cost_optimized: ~
complex: Qwen/Qwen3-235B-A22B-Instruct-2507 complex: ~
# Kubernetes configuration # Kubernetes configuration
kubernetes: kubernetes:
@@ -70,11 +63,6 @@ data:
redis: redis:
url: redis://dragonfly:6379 url: redis://dragonfly:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://qdrant:6333
collection: gateway_memory
# Iceberg (for durable storage via REST catalog) # Iceberg (for durable storage via REST catalog)
iceberg: iceberg:
catalog_uri: http://iceberg-catalog:8181 catalog_uri: http://iceberg-catalog:8181

View File

@@ -45,68 +45,6 @@ spec:
memory: "512Mi" memory: "512Mi"
cpu: "500m" cpu: "500m"
--- ---
# Qdrant (Vector database for RAG)
apiVersion: v1
kind: Service
metadata:
name: qdrant
spec:
selector:
app: qdrant
ports:
- name: http
protocol: TCP
port: 6333
targetPort: 6333
- name: grpc
protocol: TCP
port: 6334
targetPort: 6334
type: ClusterIP
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: qdrant
spec:
serviceName: qdrant
replicas: 1
selector:
matchLabels:
app: qdrant
template:
metadata:
labels:
app: qdrant
spec:
containers:
- name: qdrant
image: qdrant/qdrant:latest
ports:
- containerPort: 6333
name: http
- containerPort: 6334
name: grpc
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
volumeMounts:
- name: qdrant-data
mountPath: /qdrant/storage
volumeClaimTemplates:
- metadata:
name: qdrant-data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: dev-ephemeral
resources:
requests:
storage: 10Gi
---
# Kafka (KRaft mode - no Zookeeper needed) # Kafka (KRaft mode - no Zookeeper needed)
# Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because: # Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because:
# - cp-kafka's entrypoint script has issues with KRaft configuration # - cp-kafka's entrypoint script has issues with KRaft configuration

View File

@@ -21,30 +21,6 @@ data:
model_provider: deepinfra model_provider: deepinfra
model: zai-org/GLM-5 model: zai-org/GLM-5
# License tier model configuration
license_models:
# Free tier models
free:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
allowed_models:
- zai-org/GLM-5
# Pro tier models
pro:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
blocked_models:
- Qwen/Qwen3-235B-A22B-Instruct-2507
# Enterprise tier models
enterprise:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
# Kubernetes configuration # Kubernetes configuration
kubernetes: kubernetes:
namespace: sandbox namespace: sandbox
@@ -59,11 +35,6 @@ data:
redis: redis:
url: redis://dragonfly:6379 url: redis://dragonfly:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://qdrant:6333
collection: gateway_memory
# Agent configuration # Agent configuration
agent: agent:
# Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end # Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end

View File

@@ -45,67 +45,6 @@ spec:
memory: "512Mi" memory: "512Mi"
cpu: "500m" cpu: "500m"
--- ---
# Qdrant (Vector database for RAG)
apiVersion: v1
kind: Service
metadata:
name: qdrant
spec:
selector:
app: qdrant
ports:
- name: http
protocol: TCP
port: 6333
targetPort: 6333
- name: grpc
protocol: TCP
port: 6334
targetPort: 6334
type: ClusterIP
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: qdrant
spec:
serviceName: qdrant
replicas: 1
selector:
matchLabels:
app: qdrant
template:
metadata:
labels:
app: qdrant
spec:
containers:
- name: qdrant
image: qdrant/qdrant:latest
ports:
- containerPort: 6333
name: http
- containerPort: 6334
name: grpc
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
volumeMounts:
- name: qdrant-data
mountPath: /qdrant/storage
volumeClaimTemplates:
- metadata:
name: qdrant-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
---
# Kafka (KRaft mode - no Zookeeper needed) # Kafka (KRaft mode - no Zookeeper needed)
apiVersion: v1 apiVersion: v1
kind: Service kind: Service

View File

@@ -11,7 +11,7 @@ resources:
- ../base - ../base
# Add the 'ai' namespace (base only creates 'sandbox') # Add the 'ai' namespace (base only creates 'sandbox')
- namespaces.yaml - namespaces.yaml
# Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg) # Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, dragonfly, iceberg)
- infrastructure.yaml - infrastructure.yaml
# Sandbox namespace resources (go to sandbox namespace, not ai) # Sandbox namespace resources (go to sandbox namespace, not ai)
- sandbox-config.yaml - sandbox-config.yaml

View File

@@ -19,6 +19,7 @@ stringData:
# LLM Provider API Keys # LLM Provider API Keys
llm_providers: llm_providers:
deepinfra_api_key: "{{ op://AI Prod/Gateway/deepinfra_api_key }}" deepinfra_api_key: "{{ op://AI Prod/Gateway/deepinfra_api_key }}"
anthropic_api_key: "{{ op://AI Prod/Gateway/anthropic_api_key }}"
# Search API Keys # Search API Keys
search: search:
@@ -36,10 +37,6 @@ stringData:
push: push:
service_key: "" service_key: ""
# Qdrant API key (optional, for hosted Qdrant)
qdrant:
api_key: ""
# Iceberg S3 credentials (must match minio-secret) # Iceberg S3 credentials (must match minio-secret)
iceberg: iceberg:
s3_access_key: "{{ op://AI Prod/MinIO/access_key }}" s3_access_key: "{{ op://AI Prod/MinIO/access_key }}"

View File

@@ -10,7 +10,7 @@ The platform runs across two namespaces:
| Namespace | Contents | | Namespace | Contents |
|-----------|----------| |-----------|----------|
| `ai` | Gateway, web UI, all infrastructure services (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg-catalog) | | `ai` | Gateway, web UI, all infrastructure services (postgres, minio, kafka, flink, relay, ingestor, dragonfly, iceberg-catalog) |
| `sandbox` | Per-user sandbox containers (created dynamically by the gateway) | | `sandbox` | Per-user sandbox containers (created dynamically by the gateway) |
Secrets are managed via 1Password CLI (`op inject`). All `.tpl.yaml` files in `deploy/k8s/prod/secrets/` contain `op://` references and are safe to commit; actual values are never stored in git. Secrets are managed via 1Password CLI (`op inject`). All `.tpl.yaml` files in `deploy/k8s/prod/secrets/` contain `op://` references and are safe to commit; actual values are never stored in git.
@@ -217,7 +217,7 @@ kubectl --context=prod -n ai get configmaps
## Step 7 — Deploy Infrastructure ## Step 7 — Deploy Infrastructure
Infrastructure services (postgres, minio, kafka, iceberg-catalog, dragonfly, qdrant, relay, ingestor, flink) are defined in `deploy/k8s/prod/infrastructure.yaml` and were applied in Step 4. Infrastructure services (postgres, minio, kafka, iceberg-catalog, dragonfly, relay, ingestor, flink) are defined in `deploy/k8s/prod/infrastructure.yaml` and were applied in Step 4.
Wait for the StatefulSets and Deployments to become ready: Wait for the StatefulSets and Deployments to become ready:
@@ -225,7 +225,6 @@ Wait for the StatefulSets and Deployments to become ready:
kubectl --context=prod -n ai rollout status statefulset/postgres kubectl --context=prod -n ai rollout status statefulset/postgres
kubectl --context=prod -n ai rollout status statefulset/minio kubectl --context=prod -n ai rollout status statefulset/minio
kubectl --context=prod -n ai rollout status statefulset/kafka kubectl --context=prod -n ai rollout status statefulset/kafka
kubectl --context=prod -n ai rollout status statefulset/qdrant
kubectl --context=prod -n ai rollout status deployment/dragonfly kubectl --context=prod -n ai rollout status deployment/dragonfly
kubectl --context=prod -n ai rollout status deployment/iceberg-catalog kubectl --context=prod -n ai rollout status deployment/iceberg-catalog
kubectl --context=prod -n ai rollout status deployment/relay kubectl --context=prod -n ai rollout status deployment/relay

View File

@@ -22,20 +22,20 @@ The Agent Harness is the core orchestration layer for the Dexorder AI platform,
│ ┌──────────────────┼──────────────────┐ │ │ ┌──────────────────┼──────────────────┐ │
│ │ │ │ │ │ │ │ │ │
│ ┌────▼─────┐ ┌────▼─────┐ ┌────▼─────┐ │ │ ┌────▼─────┐ ┌────▼─────┐ ┌────▼─────┐ │
│ │ MCP │ │ LLM │ │ RAG │ │ │ │ MCP │ │ LLM │ │
│ │ Connector│ │ Router │ │ Retriever│ │ │ │ Connector│ │ Router │ │
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ └────┬─────┘ └────┬─────┘ │
│ │ │ │ │ │ │
└─────────┼──────────────────┼──────────────────┼───────────── └─────────┼──────────────────┼─────────────┘
│ │ │ │
▼ ▼ ▼ ▼
┌────────────┐ ┌───────────┐ ┌───────────┐ ┌────────────┐ ┌───────────┐
│ User's │ │ LLM │ │ Qdrant │ │ User's │ │ LLM │
│ MCP │ │ Providers │ │ (Vectors) │ │ MCP │ │ Providers │
│ Container │ │(Anthropic,│ │ │ │ Container │ │(Anthropic,│
│ (k8s pod) │ │ OpenAI, │ │ Global + │ │ (k8s pod) │ │ OpenAI, │
│ │ │ etc) │ │ User │ │ │ │ etc) │
└────────────┘ └───────────┘ └───────────┘ └────────────┘ └───────────┘
``` ```
## Message Processing Flow ## Message Processing Flow
@@ -57,17 +57,11 @@ When a user sends a message:
│ - context://workspace-state │ - context://workspace-state
│ - context://system-prompt │ - context://system-prompt
├─→ b. RAGRetriever searches for relevant memories: ├─→ b. Build system prompt:
│ - Embeds user query
│ - Searches Qdrant: user_id = current_user OR user_id = "0"
│ - Returns user-specific + global platform knowledge
├─→ c. Build system prompt:
│ - Base platform prompt │ - Base platform prompt
│ - User profile context │ - User profile context
│ - Workspace state │ - Workspace state
│ - Custom user instructions │ - Custom user instructions
│ - Relevant RAG memories
├─→ d. ModelRouter selects LLM: ├─→ d. ModelRouter selects LLM:
│ - Based on license tier │ - Based on license tier
@@ -92,11 +86,10 @@ When a user sends a message:
### 1. Agent Harness (`gateway/src/harness/agent-harness.ts`) ### 1. Agent Harness (`gateway/src/harness/agent-harness.ts`)
**Stateless orchestrator** - all state lives in user's MCP container or RAG. **Stateless orchestrator** - all state lives in user's MCP container.
**Responsibilities:** **Responsibilities:**
- Fetch context from user's MCP resources - Fetch context from user's MCP resources
- Query RAG for relevant memories
- Build prompts with full context - Build prompts with full context
- Route to appropriate LLM - Route to appropriate LLM
- Handle tool calls (platform vs user) - Handle tool calls (platform vs user)
@@ -141,40 +134,12 @@ Routes queries to appropriate LLM based on:
- LangGraph checkpoints (1 hour TTL) - LangGraph checkpoints (1 hour TTL)
- Fast reads for active conversations - Fast reads for active conversations
**Qdrant** (Vector Search)
- Conversation embeddings
- User-specific memories (user_id = actual user ID)
- **Global platform knowledge** (user_id = "0")
- RAG retrieval with cosine similarity
- GDPR-compliant (indexed by user_id for fast deletion)
**Iceberg** (Cold Storage) **Iceberg** (Cold Storage)
- Full conversation history (partitioned by user_id, session_id) - Full conversation history (partitioned by user_id, session_id)
- Checkpoint snapshots for replay - Checkpoint snapshots for replay
- Analytics and time-travel queries - Analytics and time-travel queries
- GDPR-compliant with compaction - GDPR-compliant with compaction
#### RAG System:
**Global Knowledge** (user_id="0"):
- Platform capabilities and architecture
- Trading concepts and fundamentals
- Indicator development guides
- Strategy patterns and examples
- Loaded from `gateway/knowledge/` markdown files
**User Knowledge** (user_id=specific user):
- Personal conversation history
- Trading preferences and style
- Custom indicators and strategies
- Workspace state and context
**Query Flow:**
1. User query is embedded using EmbeddingService
2. Qdrant searches: `user_id IN (current_user, "0")`
3. Top-K relevant chunks returned
4. Added to LLM context automatically
### 5. Skills vs Subagents ### 5. Skills vs Subagents
#### Skills (`gateway/src/harness/skills/`) #### Skills (`gateway/src/harness/skills/`)
@@ -290,44 +255,6 @@ User's MCP container provides access to:
- Tactical order generators (TWAP, iceberg, etc.) - Tactical order generators (TWAP, iceberg, etc.)
- Smart order routing - Smart order routing
## Global Knowledge Management
### Document Loading
At gateway startup:
1. DocumentLoader scans `gateway/knowledge/` directory
2. Markdown files chunked by headers (~1000 tokens/chunk)
3. Embeddings generated via EmbeddingService
4. Stored in Qdrant with user_id="0"
5. Content hashing enables incremental updates
### Directory Structure
```
gateway/knowledge/
├── platform/ # Platform capabilities
├── trading/ # Trading fundamentals
├── indicators/ # Indicator development
└── strategies/ # Strategy patterns
```
### Updating Knowledge
**Development:**
```bash
curl -X POST http://localhost:3000/admin/reload-knowledge
```
**Production:**
- Update markdown files
- Deploy new version
- Auto-loaded on startup
**Monitoring:**
```bash
curl http://localhost:3000/admin/knowledge-stats
```
## Container Lifecycle ## Container Lifecycle
### User Container Creation ### User Container Creation
@@ -362,7 +289,6 @@ When user connects:
### ✅ Completed ### ✅ Completed
- Agent Harness with MCP integration - Agent Harness with MCP integration
- Model routing with license tiers - Model routing with license tiers
- RAG retriever with Qdrant
- Document loader for global knowledge - Document loader for global knowledge
- EmbeddingService (Ollama/OpenAI) - EmbeddingService (Ollama/OpenAI)
- Skills and subagents framework - Skills and subagents framework
@@ -388,5 +314,4 @@ When user connects:
- Documentation: `gateway/src/harness/README.md` - Documentation: `gateway/src/harness/README.md`
- Knowledge base: `gateway/knowledge/` - Knowledge base: `gateway/knowledge/`
- LangGraph: https://langchain-ai.github.io/langgraphjs/ - LangGraph: https://langchain-ai.github.io/langgraphjs/
- Qdrant: https://qdrant.tech/documentation/
- MCP Spec: https://modelcontextprotocol.io/ - MCP Spec: https://modelcontextprotocol.io/

View File

@@ -19,7 +19,6 @@ Dexorder is an AI-powered trading platform that combines real-time market data p
│ • Authentication & session management │ │ • Authentication & session management │
│ • Agent Harness (LangChain/LangGraph orchestration) │ │ • Agent Harness (LangChain/LangGraph orchestration) │
│ - MCP client connector to user containers │ │ - MCP client connector to user containers │
│ - RAG retriever (Qdrant) │
│ - Model router (LLM selection) │ │ - Model router (LLM selection) │
│ - Skills & subagents framework │ │ - Skills & subagents framework │
│ • Dynamic user container provisioning │ │ • Dynamic user container provisioning │
@@ -30,8 +29,7 @@ Dexorder is an AI-powered trading platform that combines real-time market data p
┌──────────────────┐ ┌──────────────┐ ┌──────────────────────┐ ┌──────────────────┐ ┌──────────────┐ ┌──────────────────────┐
│ User Containers │ │ Relay │ │ Infrastructure │ │ User Containers │ │ Relay │ │ Infrastructure │
│ (per-user pods) │ │ (ZMQ Router) │ │ • DragonflyDB (cache)│ │ (per-user pods) │ │ (ZMQ Router) │ │ • DragonflyDB (cache)│
│ │ │ │ │ • Qdrant (vectors) │ │ │ │ • MCP Server │ │ • Market data│ │ • PostgreSQL (meta)
│ • MCP Server │ │ • Market data│ │ • PostgreSQL (meta) │
│ • User files: │ │ fanout │ │ • MinIO (S3) │ │ • User files: │ │ fanout │ │ • MinIO (S3) │
│ - Indicators │ │ • Work queue │ │ │ │ - Indicators │ │ • Work queue │ │ │
│ - Strategies │ │ • Stateless │ │ │ │ - Strategies │ │ • Stateless │ │ │
@@ -86,18 +84,16 @@ Dexorder is an AI-powered trading platform that combines real-time market data p
- **Agent Harness (LangChain/LangGraph):** ([[agent_harness]]) - **Agent Harness (LangChain/LangGraph):** ([[agent_harness]])
- Stateless LLM orchestration - Stateless LLM orchestration
- MCP client connector to user containers - MCP client connector to user containers
- RAG retrieval from Qdrant (global + user-specific knowledge)
- Model routing based on license tier and complexity - Model routing based on license tier and complexity
- Skills and subagents framework - Skills and subagents framework
- Workflow state machines with validation loops - Workflow state machines with validation loops
**Key Features:** **Key Features:**
- **Stateless design:** All conversation state lives in user containers or Qdrant - **Stateless design:** All conversation state lives in user containers
- **Multi-channel support:** WebSocket, Telegram (future: mobile, Discord, Slack) - **Multi-channel support:** WebSocket, Telegram (future: mobile, Discord, Slack)
- **Kubernetes-native:** Uses k8s API for container management - **Kubernetes-native:** Uses k8s API for container management
- **Three-tier memory:** - **Three-tier memory:**
- Redis: Hot storage, active sessions, LangGraph checkpoints (1 hour TTL) - Redis: Hot storage, active sessions, LangGraph checkpoints (1 hour TTL)
- Qdrant: Vector search, RAG, global + user knowledge, GDPR-compliant
- Iceberg: Cold storage, full history, analytics, time-travel queries - Iceberg: Cold storage, full history, analytics, time-travel queries
**Infrastructure:** **Infrastructure:**
@@ -270,12 +266,6 @@ Exchange API → Ingestor → Kafka → Flink → Iceberg
- Redis-compatible in-memory cache - Redis-compatible in-memory cache
- Session state, rate limiting, hot data - Session state, rate limiting, hot data
#### Qdrant
- Vector database for RAG
- **Global knowledge** (user_id="0"): Platform capabilities, trading concepts, strategy patterns
- **User knowledge** (user_id=specific): Personal conversations, preferences, strategies
- GDPR-compliant (indexed by user_id for fast deletion)
#### PostgreSQL #### PostgreSQL
- Iceberg catalog metadata - Iceberg catalog metadata
- User accounts and license info (gateway) - User accounts and license info (gateway)
@@ -458,17 +448,11 @@ The gateway's agent harness (LangChain/LangGraph) orchestrates LLM interactions
│ - context://workspace-state │ - context://workspace-state
│ - context://system-prompt │ - context://system-prompt
├─→ b. RAGRetriever searches Qdrant for relevant memories: ├─→ b. Build system prompt:
│ - Embeds user query
│ - Searches: user_id IN (current_user, "0")
│ - Returns user-specific + global platform knowledge
├─→ c. Build system prompt:
│ - Base platform prompt │ - Base platform prompt
│ - User profile context │ - User profile context
│ - Workspace state │ - Workspace state
│ - Custom user instructions │ - Custom user instructions
│ - Relevant RAG memories
├─→ d. ModelRouter selects LLM: ├─→ d. ModelRouter selects LLM:
│ - Based on license tier │ - Based on license tier
@@ -492,8 +476,6 @@ The gateway's agent harness (LangChain/LangGraph) orchestrates LLM interactions
**Key Architecture:** **Key Architecture:**
- **Gateway is stateless:** No conversation history stored in gateway - **Gateway is stateless:** No conversation history stored in gateway
- **User context in MCP:** All user-specific data lives in user's container - **User context in MCP:** All user-specific data lives in user's container
- **Global knowledge in Qdrant:** Platform documentation loaded from `gateway/knowledge/`
- **RAG at gateway level:** Semantic search combines global + user knowledge
- **Skills vs Subagents:** - **Skills vs Subagents:**
- Skills: Well-defined, single-purpose tasks - Skills: Well-defined, single-purpose tasks
- Subagents: Complex domain expertise with multi-file context - Subagents: Complex domain expertise with multi-file context
@@ -630,7 +612,6 @@ See [[backend_redesign]] for detailed notes.
- Historical backfill service - Historical backfill service
**Phase 3: Agent Features** **Phase 3: Agent Features**
- RAG integration (Qdrant)
- Strategy backtesting - Strategy backtesting
- Risk management tools - Risk management tools
- Portfolio analytics - Portfolio analytics

View File

@@ -14,3 +14,13 @@
* TradingView indicator import tool * TradingView indicator import tool
* Results persistence: ~~research analysis~~, backtests, strategy performance metrics, etc. * Results persistence: ~~research analysis~~, backtests, strategy performance metrics, etc.
* Free tier with token limits and sandbox shutdown * Free tier with token limits and sandbox shutdown
* Performance analysis
* Custom pre-session scanners / summaries
* Saved prompts (Create /presession prompt command for easy re-use)
https://github.com/wangzhe3224/awesome-systematic-trading
https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3247865 151 trading strategies
https://vectorbt.dev/
https://github.com/shiyu-coder/Kronos
https://x.com/RohOnChain/status/2041180375838498950?s=20 combining signals

View File

@@ -25,7 +25,12 @@ import org.slf4j.LoggerFactory;
* - Closed bar (isClosed=true): emitted once when a window boundary is crossed. * - Closed bar (isClosed=true): emitted once when a window boundary is crossed.
* Topic: "{ticker}|ohlc:{period}" — consumed by strategies/triggers. * Topic: "{ticker}|ohlc:{period}" — consumed by strategies/triggers.
* *
* Accumulator layout (long[7]): * Replay protection: ticks whose trade timestamp predates a period's current window start
* are discarded (prevents Kafka replay from contaminating current bars). Open bars are
* additionally suppressed until the first live tick (within LIVE_TICK_THRESHOLD_MS of now)
* is processed, so Kafka catch-up produces a single bar rather than a flood.
*
* Accumulator layout (long[8]):
* [0] open * [0] open
* [1] high * [1] high
* [2] low * [2] low
@@ -33,13 +38,18 @@ import org.slf4j.LoggerFactory;
* [4] volume (sum of base amount) * [4] volume (sum of base amount)
* [5] windowStartMs (epoch ms) * [5] windowStartMs (epoch ms)
* [6] tickCount * [6] tickCount
* [7] valid (1 = seeded or fresh window, 0 = mid-window cold start — open bars suppressed)
*/ */
public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, RealtimeBar> { public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, RealtimeBar> {
private static final Logger LOG = LoggerFactory.getLogger(RealtimeBarFunction.class); private static final Logger LOG = LoggerFactory.getLogger(RealtimeBarFunction.class);
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
// Ticks within this many ms of wall-clock time are considered live (vs. Kafka catch-up).
private static final long LIVE_TICK_THRESHOLD_MS = 30_000L;
private final int[] periods; private final int[] periods;
private transient MapState<Integer, long[]> accumState; private transient MapState<Integer, long[]> accumState;
// Suppresses open bar emissions during Kafka catch-up; set to true on first live tick.
private transient boolean caughtUp = false;
/** /**
* @param periods Period lengths in seconds (e.g., 60, 300, 900, 3600) * @param periods Period lengths in seconds (e.g., 60, 300, 900, 3600)
@@ -63,6 +73,20 @@ public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, Realti
if (tick == null) return; if (tick == null) return;
long nowMs = System.currentTimeMillis(); long nowMs = System.currentTimeMillis();
// Seeds use Long.MAX_VALUE so they always pass the per-period timestamp gate below.
long tickTimestampMs = tick.isSeed() ? Long.MAX_VALUE : (tick.getTimestamp() / 1_000_000L);
if (tick.isSeed()) {
LOG.info("Seed tick received: ticker={}, seedPeriod={}, seedWindowStart={}, seedHigh={}, nowMs={}",
tick.getTicker(), tick.getSeedPeriodSeconds(), tick.getSeedWindowStartMs(),
tick.getSeedHigh(), nowMs);
}
// Advance catch-up flag on the first live tick (within threshold of wall-clock time).
if (!caughtUp && !tick.isSeed() && (nowMs - tickTimestampMs) < LIVE_TICK_THRESHOLD_MS) {
caughtUp = true;
LOG.info("Caught up to live data: ticker={}", tick.getTicker());
}
for (int period : periods) { for (int period : periods) {
long periodMs = period * 1000L; long periodMs = period * 1000L;
@@ -70,37 +94,74 @@ public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, Realti
long[] accum = accumState.get(period); long[] accum = accumState.get(period);
// Seed ticks pre-populate the accumulator from historical OHLC.
// Only apply when the accumulator is absent and the seed targets this period's current window.
if (tick.isSeed()) {
if (tick.getSeedPeriodSeconds() == period && accum == null
&& tick.getSeedWindowStartMs() == windowStart) {
long[] seeded = {
tick.getPrice(), // open
tick.getSeedHigh(), // high
tick.getSeedLow(), // low
tick.getSeedClose(), // close
tick.getAmount(), // volume
windowStart,
0L, // tickCount (no live ticks yet)
1L // valid
};
accumState.put(period, seeded);
LOG.info("Applied seed: ticker={}, period={}s, windowStart={}", tick.getTicker(), period, windowStart);
} else if (tick.getSeedPeriodSeconds() == period) {
// Period matched but seed was not applied — log the mismatch reason
LOG.info("Seed not applied: ticker={}, period={}s, accumNull={}, seedWindow={}, currentWindow={}",
tick.getTicker(), period, accum == null, tick.getSeedWindowStartMs(), windowStart);
}
continue;
}
// Discard ticks whose trade timestamp predates this period's current window.
// Prevents Kafka replay of historical trades from contaminating current bars.
if (tickTimestampMs < windowStart) {
continue;
}
if (accum == null) { if (accum == null) {
// First tick for this period — open a new window and emit the first open bar // First live tick for this period, no seed — open mid-window, suppress open bars
long[] newAccum = openWindow(tick, windowStart); long[] newAccum = openWindow(tick, windowStart, false);
accumState.put(period, newAccum); accumState.put(period, newAccum);
out.collect(toBar(tick.getTicker(), period, newAccum, false)); LOG.info("Cold-start (no seed): ticker={}, period={}s, valid=0, open bars suppressed", tick.getTicker(), period);
} else if (accum[5] != windowStart) { } else if (accum[5] != windowStart) {
// Window boundary crossed — emit the closed bar, then start a fresh window // Window boundary crossed — emit closed bar, then start a fresh valid window
if (accum[6] > 0) { if (accum[6] > 0) {
out.collect(toBar(tick.getTicker(), period, accum, true)); out.collect(toBar(tick.getTicker(), period, accum, true));
LOG.debug("Emitted closed bar: ticker={}, period={}s, windowStart={}, ticks={}", LOG.debug("Emitted closed bar: ticker={}, period={}s, windowStart={}, ticks={}",
tick.getTicker(), period, accum[5], accum[6]); tick.getTicker(), period, accum[5], accum[6]);
} }
long[] newAccum = openWindow(tick, windowStart); long[] newAccum = openWindow(tick, windowStart, true);
accumState.put(period, newAccum); accumState.put(period, newAccum);
if (caughtUp) {
out.collect(toBar(tick.getTicker(), period, newAccum, false)); out.collect(toBar(tick.getTicker(), period, newAccum, false));
}
} else { } else {
// Same window — update accumulator and emit current open bar // Same window — update accumulator
accum[1] = Math.max(accum[1], tick.getPrice()); // high accum[1] = Math.max(accum[1], tick.getPrice()); // high
accum[2] = Math.min(accum[2], tick.getPrice()); // low accum[2] = Math.min(accum[2], tick.getPrice()); // low
accum[3] = tick.getPrice(); // close accum[3] = tick.getPrice(); // close
accum[4] += tick.getAmount(); // volume accum[4] += tick.getAmount(); // volume
accum[6]++; // tick count accum[6]++; // tick count
accumState.put(period, accum); accumState.put(period, accum);
if (accum[7] == 1 && caughtUp) {
out.collect(toBar(tick.getTicker(), period, accum, false)); out.collect(toBar(tick.getTicker(), period, accum, false));
} else if (accum[7] == 0 && caughtUp) {
LOG.debug("Open bar suppressed (valid=0, no seed): ticker={}, period={}s", tick.getTicker(), period);
}
} }
} }
} }
private static long[] openWindow(TickWrapper tick, long windowStart) { private static long[] openWindow(TickWrapper tick, long windowStart, boolean valid) {
return new long[]{ return new long[]{
tick.getPrice(), // open tick.getPrice(), // open
tick.getPrice(), // high tick.getPrice(), // high
@@ -108,7 +169,8 @@ public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, Realti
tick.getPrice(), // close tick.getPrice(), // close
tick.getAmount(), // volume tick.getAmount(), // volume
windowStart, windowStart,
1L // tickCount 1L, // tickCount
valid ? 1L : 0L // valid flag
}; };
} }

View File

@@ -40,7 +40,7 @@ public class TickDeserializer implements DeserializationSchema<TickWrapper> {
Tick tick = Tick.parseFrom(payload); Tick tick = Tick.parseFrom(payload);
return new TickWrapper( TickWrapper tw = new TickWrapper(
tick.getTicker(), tick.getTicker(),
tick.getTradeId(), tick.getTradeId(),
tick.getTimestamp(), tick.getTimestamp(),
@@ -49,6 +49,15 @@ public class TickDeserializer implements DeserializationSchema<TickWrapper> {
tick.getQuoteAmount(), tick.getQuoteAmount(),
tick.getTakerBuy() tick.getTakerBuy()
); );
if (tick.hasIsSeed() && tick.getIsSeed()) {
tw.setIsSeed(true);
tw.setSeedHigh(tick.getSeedHigh());
tw.setSeedLow(tick.getSeedLow());
tw.setSeedClose(tick.getSeedClose());
tw.setSeedWindowStartMs(tick.getSeedWindowStartMs());
tw.setSeedPeriodSeconds(tick.getSeedPeriodSeconds());
}
return tw;
} catch (Exception e) { } catch (Exception e) {
LOG.warn("Failed to deserialize Tick, skipping: {}", e.getMessage()); LOG.warn("Failed to deserialize Tick, skipping: {}", e.getMessage());

View File

@@ -20,6 +20,12 @@ public class TickWrapper implements Serializable {
/** Quote amount as scaled integer */ /** Quote amount as scaled integer */
private long quoteAmount; private long quoteAmount;
private boolean takerBuy; private boolean takerBuy;
private boolean isSeed;
private long seedHigh;
private long seedLow;
private long seedClose;
private long seedWindowStartMs;
private int seedPeriodSeconds;
public TickWrapper() {} public TickWrapper() {}
@@ -41,6 +47,12 @@ public class TickWrapper implements Serializable {
public long getAmount() { return amount; } public long getAmount() { return amount; }
public long getQuoteAmount() { return quoteAmount; } public long getQuoteAmount() { return quoteAmount; }
public boolean isTakerBuy() { return takerBuy; } public boolean isTakerBuy() { return takerBuy; }
public boolean isSeed() { return isSeed; }
public long getSeedHigh() { return seedHigh; }
public long getSeedLow() { return seedLow; }
public long getSeedClose() { return seedClose; }
public long getSeedWindowStartMs() { return seedWindowStartMs; }
public int getSeedPeriodSeconds() { return seedPeriodSeconds; }
public void setTicker(String ticker) { this.ticker = ticker; } public void setTicker(String ticker) { this.ticker = ticker; }
public void setTradeId(String tradeId) { this.tradeId = tradeId; } public void setTradeId(String tradeId) { this.tradeId = tradeId; }
@@ -49,6 +61,12 @@ public class TickWrapper implements Serializable {
public void setAmount(long amount) { this.amount = amount; } public void setAmount(long amount) { this.amount = amount; }
public void setQuoteAmount(long quoteAmount) { this.quoteAmount = quoteAmount; } public void setQuoteAmount(long quoteAmount) { this.quoteAmount = quoteAmount; }
public void setTakerBuy(boolean takerBuy) { this.takerBuy = takerBuy; } public void setTakerBuy(boolean takerBuy) { this.takerBuy = takerBuy; }
public void setIsSeed(boolean isSeed) { this.isSeed = isSeed; }
public void setSeedHigh(long seedHigh) { this.seedHigh = seedHigh; }
public void setSeedLow(long seedLow) { this.seedLow = seedLow; }
public void setSeedClose(long seedClose) { this.seedClose = seedClose; }
public void setSeedWindowStartMs(long seedWindowStartMs) { this.seedWindowStartMs = seedWindowStartMs; }
public void setSeedPeriodSeconds(int seedPeriodSeconds) { this.seedPeriodSeconds = seedPeriodSeconds; }
@Override @Override
public String toString() { public String toString() {

View File

@@ -38,10 +38,6 @@ SANDBOX_STORAGE_CLASS=standard
# Redis (for hot storage and session management) # Redis (for hot storage and session management)
REDIS_URL=redis://localhost:6379 REDIS_URL=redis://localhost:6379
# Qdrant (for RAG vector search)
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY= # optional, leave empty for local dev
# Iceberg (for durable storage via REST catalog) # Iceberg (for durable storage via REST catalog)
ICEBERG_CATALOG_URI=http://iceberg-catalog:8181 ICEBERG_CATALOG_URI=http://iceberg-catalog:8181
ICEBERG_NAMESPACE=gateway ICEBERG_NAMESPACE=gateway

View File

@@ -58,7 +58,6 @@ Multi-channel gateway with agent harness for the Dexorder AI platform.
- **Streaming responses**: Real-time chat with WebSocket and Telegram - **Streaming responses**: Real-time chat with WebSocket and Telegram
- **Complex workflows**: LangGraph for stateful trading analysis (backtest → risk → approval) - **Complex workflows**: LangGraph for stateful trading analysis (backtest → risk → approval)
- **Agent harness**: Stateless orchestrator (all context lives in user's MCP container) - **Agent harness**: Stateless orchestrator (all context lives in user's MCP container)
- **MCP resource integration**: User's RAG, conversation history, and preferences
## Container Management ## Container Management
@@ -91,9 +90,7 @@ Containers self-manage their lifecycle using the lifecycle sidecar (see `../life
- OpenAI GPT - OpenAI GPT
- Google Gemini - Google Gemini
- OpenRouter (one key for 300+ models) - OpenRouter (one key for 300+ models)
- Ollama (for embeddings): https://ollama.com/download
- Redis (for session/hot storage) - Redis (for session/hot storage)
- Qdrant (for RAG vector search)
- Kafka + Flink + Iceberg (for durable storage) - Kafka + Flink + Iceberg (for durable storage)
### Development ### Development
@@ -123,20 +120,7 @@ DEFAULT_MODEL_PROVIDER=anthropic
DEFAULT_MODEL=claude-sonnet-4-6 DEFAULT_MODEL=claude-sonnet-4-6
``` ```
4. Start Ollama and pull embedding model: 4. Run development server:
```bash
# Install Ollama (one-time): https://ollama.com/download
# Or with Docker: docker run -d -p 11434:11434 ollama/ollama
# Pull the all-minilm embedding model (90MB, CPU-friendly)
ollama pull all-minilm
# Alternative models:
# ollama pull nomic-embed-text # 8K context length
# ollama pull mxbai-embed-large # Higher accuracy, slower
```
5. Run development server:
```bash ```bash
npm run dev npm run dev
``` ```
@@ -217,138 +201,6 @@ ws.send(JSON.stringify({
**`GET /health`** **`GET /health`**
- Returns server health status - Returns server health status
## Ollama Deployment Options
The gateway requires Ollama for embedding generation in RAG queries. You have two deployment options:
### Option 1: Ollama in Gateway Container (Recommended for simplicity)
Install Ollama directly in the gateway container. This keeps all dependencies local and simplifies networking.
**Dockerfile additions:**
```dockerfile
FROM node:22-slim
# Install Ollama
RUN curl -fsSL https://ollama.com/install.sh | sh
# Pull embedding model at build time
RUN ollama serve & \
sleep 5 && \
ollama pull all-minilm && \
pkill ollama
# ... rest of your gateway Dockerfile
```
**Start script (entrypoint.sh):**
```bash
#!/bin/bash
# Start Ollama in background
ollama serve &
# Start gateway
node dist/main.js
```
**Pros:**
- Simple networking (localhost:11434)
- No extra K8s resources
- Self-contained deployment
**Cons:**
- Larger container image (~200MB extra)
- CPU/memory shared with gateway process
**Resource requirements:**
- Add +200MB memory
- Add +0.2 CPU cores for embedding inference
### Option 2: Ollama as Separate Pod/Sidecar
Deploy Ollama as a separate container in the same pod (sidecar) or as its own deployment.
**K8s Deployment (sidecar pattern):**
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: gateway
spec:
template:
spec:
containers:
- name: gateway
image: ghcr.io/dexorder/gateway:latest
env:
- name: OLLAMA_URL
value: http://localhost:11434
- name: ollama
image: ollama/ollama:latest
command: ["/bin/sh", "-c"]
args:
- |
ollama serve &
sleep 5
ollama pull all-minilm
wait
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1000m"
```
**K8s Deployment (separate service):**
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
spec:
replicas: 1
template:
spec:
containers:
- name: ollama
image: ollama/ollama:latest
# ... same as above
---
apiVersion: v1
kind: Service
metadata:
name: ollama
spec:
selector:
app: ollama
ports:
- port: 11434
```
Gateway `.env`:
```bash
OLLAMA_URL=http://ollama:11434
```
**Pros:**
- Isolated resource limits
- Can scale separately
- Easier to monitor/debug
**Cons:**
- More K8s resources
- Network hop (minimal latency)
- More complex deployment
### Recommendation
For most deployments: **Use Option 1 (in-container)** for simplicity, unless you need to:
- Share Ollama across multiple services
- Scale embedding inference independently
- Run Ollama on GPU nodes (gateway on CPU nodes)
## TODO ## TODO

View File

@@ -58,11 +58,6 @@ kubernetes:
redis: redis:
url: redis://localhost:6379 url: redis://localhost:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://localhost:6333
collection: gateway_memory
# Iceberg (for durable storage via REST catalog) # Iceberg (for durable storage via REST catalog)
iceberg: iceberg:
catalog_uri: http://iceberg-catalog:8181 catalog_uri: http://iceberg-catalog:8181

View File

@@ -1,6 +1,6 @@
# Dexorder Knowledge Base # Dexorder Knowledge Base
This directory contains global knowledge documents that are automatically loaded into the RAG system as platform-wide knowledge (user_id="0"). This directory contains global knowledge documents that are automatically loaded into the agent's context at startup.
## Structure ## Structure
@@ -40,9 +40,7 @@ Content here...
1. At gateway startup, the DocumentLoader scans this directory 1. At gateway startup, the DocumentLoader scans this directory
2. Each markdown file is chunked by headers (max ~1000 tokens per chunk) 2. Each markdown file is chunked by headers (max ~1000 tokens per chunk)
3. Chunks are embedded using the configured embedding service 3. Content hash tracking enables incremental updates
4. Embeddings are stored in Qdrant with user_id="0" (global namespace)
5. Content hash tracking enables incremental updates
## Updating Documents ## Updating Documents
@@ -55,14 +53,6 @@ Content here...
- Deploy new version - Deploy new version
- Gateway will detect changes and update vectors automatically - Gateway will detect changes and update vectors automatically
## RAG Integration
When users query the agent:
1. Their query is embedded
2. Qdrant searches both global (user_id="0") and user-specific vectors
3. Relevant chunks from these docs are included in context
4. LLM generates response with platform knowledge
## Adding New Documents ## Adding New Documents
1. Create markdown file in appropriate subdirectory 1. Create markdown file in appropriate subdirectory
@@ -90,12 +80,3 @@ Check logs for load statistics:
``` ```
Knowledge documents loaded: { loaded: 5, updated: 2, skipped: 3 } Knowledge documents loaded: { loaded: 5, updated: 2, skipped: 3 }
``` ```
Monitor Qdrant collection stats:
```
GET /health
{
"qdrantVectors": 1234,
"qdrantIndexed": 1234
}
```

View File

@@ -19,6 +19,7 @@
"@langchain/community": "^1.1.27", "@langchain/community": "^1.1.27",
"@langchain/core": "latest", "@langchain/core": "latest",
"@langchain/langgraph": "latest", "@langchain/langgraph": "latest",
"@langchain/anthropic": "latest",
"@langchain/openai": "^1.4.2", "@langchain/openai": "^1.4.2",
"@modelcontextprotocol/sdk": "^1.0.4", "@modelcontextprotocol/sdk": "^1.0.4",
"@types/pdf-parse": "^1.1.5", "@types/pdf-parse": "^1.1.5",

View File

@@ -21,6 +21,8 @@ Delegate specialized tasks to subagents using the `Spawn` tool. Each subagent ha
- Multi-symbol comparisons - Multi-symbol comparisons
- Custom calculations using Python (pandas, numpy, scipy, matplotlib, etc.) - Custom calculations using Python (pandas, numpy, scipy, matplotlib, etc.)
**Always begin the instruction with:** `Research script name: "<Descriptive Name>"` — e.g. `Research script name: "Monday Tuesday Session Overlap"`. The research agent uses this name when calling `PythonWrite` or `PythonEdit`.
Do **NOT** include time range, history length, bar count, period size, or resolution guidance in the instruction unless the user explicitly specifies such. The research agent selects its own optimal window and period otherwise. Do **NOT** include time range, history length, bar count, period size, or resolution guidance in the instruction unless the user explicitly specifies such. The research agent selects its own optimal window and period otherwise.
**`Spawn({agent: "indicator", instruction: "..."})`** — for ANYTHING indicator-related on the chart: **`Spawn({agent: "indicator", instruction: "..."})`** — for ANYTHING indicator-related on the chart:

View File

@@ -15,6 +15,10 @@ dynamic_imports:
You are a specialized assistant that creates Python research scripts for market data analysis and visualization. You are a specialized assistant that creates Python research scripts for market data analysis and visualization.
## CRITICAL RULE
**You MUST call `PythonWrite` (new script) or `PythonEdit` (existing script) as your FIRST tool call. NEVER write analysis text without first creating or updating a script.** If you find yourself about to generate analysis text without a tool call, stop and call `PythonWrite` or `PythonEdit` first. A text-only response is always wrong.
## Your Purpose ## Your Purpose
Create Python scripts that: Create Python scripts that:

View File

@@ -26,10 +26,6 @@ email:
push: push:
service_key: "" service_key: ""
# Qdrant API key (optional, for hosted Qdrant)
qdrant:
api_key: ""
# Iceberg S3 credentials # Iceberg S3 credentials
iceberg: iceberg:
s3_access_key: minioadmin s3_access_key: minioadmin

View File

@@ -105,36 +105,36 @@ export class AuthService {
asResponse: true, asResponse: true,
}); });
// Extract bearer token from response headers (set by bearer plugin)
const token = response.headers.get('set-auth-token');
if (!token) {
this.config.logger.error('Bearer token not found in response headers');
return {
token: '',
userId: '',
error: 'Authentication token not generated',
};
}
// Parse the response body to get user info // Parse the response body to get user info
const result = await response.json() as { const result = await response.json() as {
user?: { id: string; email: string; name: string }; user?: { id: string; email: string; name: string };
error?: string; error?: string;
}; };
if (!response.ok) {
this.config.logger.warn({ status: response.status }, 'Sign in rejected by auth provider');
return {
token: '',
userId: '',
error: 'Invalid email or password.',
};
}
// Extract bearer token from response headers (set by bearer plugin)
const token = response.headers.get('set-auth-token');
this.config.logger.debug({ this.config.logger.debug({
hasUser: !!result.user, hasUser: !!result.user,
userId: result.user?.id, userId: result.user?.id,
hasToken: !!token, hasToken: !!token,
}, 'Sign in result'); }, 'Sign in result');
if (!result.user) { if (!token || !result.user) {
this.config.logger.warn('Sign in failed: no user in result'); this.config.logger.error({ hasToken: !!token, hasUser: !!result.user }, 'Sign in succeeded but session data missing');
return { return {
token: '', token: '',
userId: '', userId: '',
error: 'Invalid credentials', error: 'Login failed. Please try again.',
}; };
} }
@@ -147,7 +147,7 @@ export class AuthService {
return { return {
token: '', token: '',
userId: '', userId: '',
error: error.message || 'Sign in failed', error: 'Login failed. Please try again.',
}; };
} }
} }

View File

@@ -5,6 +5,8 @@ import type { AgentHarness, HarnessFactory } from '../harness/agent-harness.js';
import type { HarnessEvent } from '../harness/harness-events.js'; import type { HarnessEvent } from '../harness/harness-events.js';
import type { InboundMessage } from '../types/messages.js'; import type { InboundMessage } from '../types/messages.js';
import { randomUUID } from 'crypto'; import { randomUUID } from 'crypto';
import { parseModelTag, MODEL_TAGS } from '../llm/model-tags.js';
import type { LLMProvider } from '../llm/provider.js';
import type { SessionRegistry, EventSubscriber, Session } from '../events/index.js'; import type { SessionRegistry, EventSubscriber, Session } from '../events/index.js';
import type { OHLCService, BarUpdateCallback } from '../services/ohlc-service.js'; import type { OHLCService, BarUpdateCallback } from '../services/ohlc-service.js';
import type { SymbolIndexService } from '../services/symbol-index-service.js'; import type { SymbolIndexService } from '../services/symbol-index-service.js';
@@ -30,6 +32,24 @@ function jsonStringifySafe(obj: any): string {
); );
} }
function makeChunkDebouncer(send: (content: string) => void, delayMs = 200) {
let buffer = '';
let timer: ReturnType<typeof setTimeout> | null = null;
function flush() {
if (timer !== null) { clearTimeout(timer); timer = null; }
if (buffer.length > 0) { send(buffer); buffer = ''; }
}
function add(content: string) {
buffer += content;
if (timer !== null) clearTimeout(timer);
timer = setTimeout(flush, delayMs);
}
return { add, flush };
}
export type SessionStatus = 'authenticating' | 'spinning_up' | 'initializing' | 'ready' | 'error' export type SessionStatus = 'authenticating' | 'spinning_up' | 'initializing' | 'ready' | 'error'
function sendStatus(socket: WebSocket, status: SessionStatus, message: string): void { function sendStatus(socket: WebSocket, status: SessionStatus, message: string): void {
@@ -257,6 +277,7 @@ export class WebSocketHandler {
userId: authContext.userId, userId: authContext.userId,
licenseType: authContext.license.licenseType, licenseType: authContext.license.licenseType,
message: 'Connected to Dexorder AI', message: 'Connected to Dexorder AI',
modelTags: MODEL_TAGS.map(m => m.tag),
}) })
); );
@@ -272,25 +293,32 @@ export class WebSocketHandler {
} else { } else {
// First conversation — auto-send greeting prompt and stream the response // First conversation — auto-send greeting prompt and stream the response
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false })); socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false }));
const greetingDebouncer = makeChunkDebouncer(content =>
socket.send(JSON.stringify({ type: 'agent_chunk', content, done: false }))
);
for await (const event of harness!.streamGreeting()) { for await (const event of harness!.streamGreeting()) {
const e = event as HarnessEvent; const e = event as HarnessEvent;
switch (e.type) { switch (e.type) {
case 'chunk': case 'chunk':
socket.send(JSON.stringify({ type: 'agent_chunk', content: e.content, done: false })); greetingDebouncer.add(e.content);
break; break;
case 'tool_call': case 'tool_call':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label })); socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break; break;
case 'image': case 'image':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption })); socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
break; break;
case 'error': case 'error':
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'text', text: `An error occurred during greeting.` })); socket.send(JSON.stringify({ type: 'text', text: `An error occurred during greeting.` }));
break; break;
case 'done': case 'done':
break; break;
} }
} }
greetingDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: true })); socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: true }));
} }
} }
@@ -304,47 +332,75 @@ export class WebSocketHandler {
// Route based on message type // Route based on message type
if (payload.type === 'message' || payload.type === 'agent_user_message') { if (payload.type === 'message' || payload.type === 'agent_user_message') {
// Chat message - send to agent harness with streaming
const inboundMessage: InboundMessage = {
messageId: randomUUID(),
userId: authContext.userId,
sessionId: authContext.sessionId,
content: payload.content,
attachments: payload.attachments,
timestamp: new Date(),
};
if (!harness) { if (!harness) {
logger.error('Harness not initialized'); logger.error('Harness not initialized');
socket.send(JSON.stringify({ type: 'error', message: 'Session not ready' })); socket.send(JSON.stringify({ type: 'error', message: 'Session not ready' }));
return; return;
} }
// Check for @ModelTag at the start of the message
const parsedTag = parseModelTag(payload.content ?? '');
let messageContent: string = payload.content ?? '';
let modelOverride: { modelId: string; provider?: LLMProvider } | undefined;
if (parsedTag) {
await harness.clearHistory();
socket.send(JSON.stringify({ type: 'model_switched', tag: parsedTag.tag, modelId: parsedTag.modelId, rest: parsedTag.rest }));
messageContent = parsedTag.rest;
modelOverride = { modelId: parsedTag.modelId, provider: parsedTag.provider };
logger.info({ tag: parsedTag.tag, modelId: parsedTag.modelId }, 'Model tag switch');
}
// Chat message - send to agent harness with streaming
const inboundMessage: InboundMessage = {
messageId: randomUUID(),
userId: authContext.userId,
sessionId: authContext.sessionId,
content: messageContent,
attachments: payload.attachments,
timestamp: new Date(),
};
try { try {
// Acknowledge receipt immediately so the client can show the seen indicator // Acknowledge receipt immediately so the client can show the seen indicator
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false })); socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false }));
logger.info('Streaming harness response'); logger.info('Streaming harness response');
let fatalError = false; let fatalError = false;
for await (const event of harness.streamMessage(inboundMessage)) { const msgDebouncer = makeChunkDebouncer(content =>
socket.send(JSON.stringify({ type: 'agent_chunk', content, done: false }))
);
const stream = (parsedTag && !messageContent)
? harness.streamGreeting(modelOverride)
: harness.streamMessage(inboundMessage, { modelOverride });
for await (const event of stream) {
const e = event as HarnessEvent; const e = event as HarnessEvent;
switch (e.type) { switch (e.type) {
case 'chunk': case 'chunk':
socket.send(JSON.stringify({ type: 'agent_chunk', content: e.content, done: false })); msgDebouncer.add(e.content);
break; break;
case 'tool_call': case 'tool_call':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label })); socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break; break;
case 'subagent_tool_call': case 'subagent_tool_call':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label })); socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
break; break;
case 'subagent_chunk': case 'subagent_chunk':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: e.agentName, content: e.content })); socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: e.agentName, content: e.content }));
break; break;
case 'subagent_thinking':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'subagent_thinking', agentName: e.agentName, content: e.content }));
break;
case 'image': case 'image':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption })); socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
break; break;
case 'error': case 'error':
msgDebouncer.flush();
socket.send(JSON.stringify({ type: 'text', text: `An unrecoverable error occurred in the ${e.source}.` })); socket.send(JSON.stringify({ type: 'text', text: `An unrecoverable error occurred in the ${e.source}.` }));
if (e.fatal) fatalError = true; if (e.fatal) fatalError = true;
break; break;
@@ -352,6 +408,7 @@ export class WebSocketHandler {
break; break;
} }
} }
msgDebouncer.flush();
if (fatalError) { if (fatalError) {
socket.close(1011, 'Fatal error'); socket.close(1011, 'Fatal error');
@@ -451,6 +508,9 @@ export class WebSocketHandler {
case 'subagent_tool_call': case 'subagent_tool_call':
socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label })); socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
break; break;
case 'subagent_thinking':
socket.send(JSON.stringify({ type: 'subagent_thinking', agentName: e.agentName, content: e.content }));
break;
case 'tool_call': case 'tool_call':
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label })); socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break; break;
@@ -730,6 +790,13 @@ export class WebSocketHandler {
// Create a per-subscription callback that forwards bars to this socket // Create a per-subscription callback that forwards bars to this socket
const barCallback: BarUpdateCallback = (bar) => { const barCallback: BarUpdateCallback = (bar) => {
if (socket.readyState !== 1 /* OPEN */) return; if (socket.readyState !== 1 /* OPEN */) return;
const symbolMeta = symbolIndexService?.getSymbolByTicker(bar.ticker);
const priceDivisor = (symbolMeta?.price_precision ?? 0) > 0
? Math.pow(10, symbolMeta!.price_precision!)
: 1;
const sizeDivisor = (symbolMeta?.size_precision ?? 0) > 0
? Math.pow(10, symbolMeta!.size_precision!)
: 1;
socket.send(JSON.stringify({ socket.send(JSON.stringify({
type: 'bar_update', type: 'bar_update',
subscription_id: payload.subscription_id, subscription_id: payload.subscription_id,
@@ -739,11 +806,11 @@ export class WebSocketHandler {
bar: { bar: {
// Convert nanoseconds → seconds for client compatibility // Convert nanoseconds → seconds for client compatibility
time: Number(bar.timestamp / 1_000_000_000n), time: Number(bar.timestamp / 1_000_000_000n),
open: bar.open, open: bar.open / priceDivisor,
high: bar.high, high: bar.high / priceDivisor,
low: bar.low, low: bar.low / priceDivisor,
close: bar.close, close: bar.close / priceDivisor,
volume: bar.volume, volume: bar.volume / sizeDivisor,
}, },
})); }));
}; };

View File

@@ -151,7 +151,7 @@ export function deserializeUserEvent(data: Buffer): UserEvent {
eventId: json.event_id, eventId: json.event_id,
timestamp: json.timestamp, timestamp: json.timestamp,
eventType: json.event_type as EventType, eventType: json.event_type as EventType,
payload: Buffer.from(json.payload, 'base64'), payload: json.payload ? Buffer.from(json.payload, 'base64') : Buffer.alloc(0),
delivery: { delivery: {
priority: json.delivery.priority as Priority, priority: json.delivery.priority as Priority,
channels: json.delivery.channels.map( channels: json.delivery.channels.map(

View File

@@ -7,7 +7,7 @@ Comprehensive agent orchestration system for Dexorder AI platform, built on Lang
``` ```
gateway/src/ gateway/src/
├── harness/ ├── harness/
│ ├── memory/ # Storage layer (Redis + Iceberg + Qdrant) │ ├── memory/ # Storage layer (Redis + Iceberg)
│ ├── subagents/ # Specialized agents with multi-file memory │ ├── subagents/ # Specialized agents with multi-file memory
│ ├── workflows/ # LangGraph state machines │ ├── workflows/ # LangGraph state machines
│ ├── prompts/ # System prompts │ ├── prompts/ # System prompts
@@ -27,13 +27,10 @@ Tiered storage architecture:
- **Redis**: Hot state (active sessions, checkpoints) - **Redis**: Hot state (active sessions, checkpoints)
- **Iceberg**: Cold storage (durable conversations, analytics) - **Iceberg**: Cold storage (durable conversations, analytics)
- **Qdrant**: Vector search (RAG, semantic memory)
**Key Files:** **Key Files:**
- `checkpoint-saver.ts`: LangGraph checkpoint persistence - `checkpoint-saver.ts`: LangGraph checkpoint persistence
- `conversation-store.ts`: Message history management - `conversation-store.ts`: Message history management
- `rag-retriever.ts`: Vector similarity search
- `embedding-service.ts`: Text→vector conversion
- `session-context.ts`: User context with channel metadata - `session-context.ts`: User context with channel metadata
### 2. Tools (`../tools/`) ### 2. Tools (`../tools/`)
@@ -176,19 +173,11 @@ Based on [harness-rag.txt discussion](../../chat/harness-rag.txt):
- Time-travel queries - Time-travel queries
- GDPR-compliant deletion with compaction - GDPR-compliant deletion with compaction
### Vector Search (Qdrant)
- Conversation embeddings
- Long-term memory
- RAG retrieval
- Payload-indexed by user_id for fast GDPR deletion
- **Global knowledge base** (user_id="0") loaded from markdown files
### GDPR Compliance ### GDPR Compliance
```typescript ```typescript
// Delete user data across all stores // Delete user data across all stores
await conversationStore.deleteUserData(userId); await conversationStore.deleteUserData(userId);
await ragRetriever.deleteUserData(userId);
await checkpointSaver.delete(userId); await checkpointSaver.delete(userId);
await containerManager.deleteContainer(userId); await containerManager.deleteContainer(userId);
@@ -247,19 +236,13 @@ Already in `gateway/package.json`:
import Redis from 'ioredis'; import Redis from 'ioredis';
import { import {
TieredCheckpointSaver, TieredCheckpointSaver,
ConversationStore, ConversationStore
EmbeddingService,
RAGRetriever
} from './harness/memory'; } from './harness/memory';
const redis = new Redis(process.env.REDIS_URL); const redis = new Redis(process.env.REDIS_URL);
const checkpointSaver = new TieredCheckpointSaver(redis, logger); const checkpointSaver = new TieredCheckpointSaver(redis, logger);
const conversationStore = new ConversationStore(redis, logger); const conversationStore = new ConversationStore(redis, logger);
const embeddings = new EmbeddingService({ provider: 'openai', apiKey }, logger);
const ragRetriever = new RAGRetriever({ url: QDRANT_URL }, logger);
await ragRetriever.initialize();
``` ```
### 3. Create Subagents ### 3. Create Subagents
@@ -309,56 +292,6 @@ const analysis = await skill.execute({
}); });
``` ```
## Global Knowledge System
The harness includes a document loader that automatically loads markdown files from `gateway/knowledge/` into Qdrant as global knowledge (user_id="0").
### Directory Structure
```
gateway/knowledge/
├── platform/ # Platform capabilities and architecture
├── trading/ # Trading concepts and fundamentals
├── indicators/ # Indicator development guides
└── strategies/ # Strategy patterns and examples
```
### How It Works
1. **Startup**: Documents are loaded automatically when gateway starts
2. **Chunking**: Intelligent splitting by markdown headers (~1000 tokens/chunk)
3. **Embedding**: Chunks are embedded using configured embedding service
4. **Storage**: Stored in Qdrant with user_id="0" (global namespace)
5. **Updates**: Content hashing detects changes for incremental updates
### RAG Query Flow
When a user sends a message:
1. Query is embedded using same embedding service
2. Qdrant searches vectors with filter: `user_id = current_user OR user_id = "0"`
3. Results include both user-specific and global knowledge
4. Relevant chunks are added to LLM context
5. LLM generates response with platform knowledge
### Managing Knowledge
**Add new documents**:
```bash
# Create markdown file in appropriate directory
echo "# New Topic" > gateway/knowledge/platform/new-topic.md
# Reload knowledge (development)
curl -X POST http://localhost:3000/admin/reload-knowledge
```
**Check stats**:
```bash
curl http://localhost:3000/admin/knowledge-stats
```
**In production**: Just deploy updated markdown files - they'll be loaded on startup.
See [gateway/knowledge/README.md](../../knowledge/README.md) for detailed documentation.
## Next Steps ## Next Steps
1. **Implement Iceberg Integration**: Complete TODOs in checkpoint-saver.ts and conversation-store.ts 1. **Implement Iceberg Integration**: Complete TODOs in checkpoint-saver.ts and conversation-store.ts
@@ -371,5 +304,4 @@ See [gateway/knowledge/README.md](../../knowledge/README.md) for detailed docume
- Architecture discussion: [chat/harness-rag.txt](../../chat/harness-rag.txt) - Architecture discussion: [chat/harness-rag.txt](../../chat/harness-rag.txt)
- LangGraph docs: https://langchain-ai.github.io/langgraphjs/ - LangGraph docs: https://langchain-ai.github.io/langgraphjs/
- Qdrant docs: https://qdrant.tech/documentation/
- Apache Iceberg: https://iceberg.apache.org/docs/latest/ - Apache Iceberg: https://iceberg.apache.org/docs/latest/

View File

@@ -7,7 +7,7 @@ import type { ConversationStore } from './memory/conversation-store.js';
import type { BlobStore } from './memory/blob-store.js'; import type { BlobStore } from './memory/blob-store.js';
import type { InboundMessage, OutboundMessage } from '../types/messages.js'; import type { InboundMessage, OutboundMessage } from '../types/messages.js';
import { MCPClientConnector } from './mcp-client.js'; import { MCPClientConnector } from './mcp-client.js';
import { LLMProviderFactory, type ProviderConfig } from '../llm/provider.js'; import { LLMProvider, LLMProviderFactory, type ProviderConfig } from '../llm/provider.js';
import { ModelRouter, RoutingStrategy } from '../llm/router.js'; import { ModelRouter, RoutingStrategy } from '../llm/router.js';
import type { ModelMiddleware } from '../llm/middleware.js'; import type { ModelMiddleware } from '../llm/middleware.js';
import type { WorkspaceManager } from '../workspace/workspace-manager.js'; import type { WorkspaceManager } from '../workspace/workspace-manager.js';
@@ -107,13 +107,10 @@ export class AgentHarness {
this.wikiLoader, this.wikiLoader,
getToolRegistry(), getToolRegistry(),
async (maxTokens?: number) => { async (maxTokens?: number) => {
const { model } = await this.modelRouter.route( const { model } = this.modelRouter.createModel({
'analyze and backtest research data', ...this.modelFactory.getDefaultModel(),
this.config.license, ...(maxTokens !== undefined && { maxTokens }),
RoutingStrategy.COMPLEXITY, });
this.config.userId,
maxTokens,
);
return model; return model;
}, },
config.logger, config.logger,
@@ -363,6 +360,9 @@ export class AgentHarness {
this.config.logger.debug('Streaming model response...'); this.config.logger.debug('Streaming model response...');
let response: any = null; let response: any = null;
const MAX_STREAM_ATTEMPTS = 4;
for (let attempt = 1; attempt <= MAX_STREAM_ATTEMPTS; attempt++) {
response = null;
try { try {
const stream = await model.stream(messagesCopy, { signal }); const stream = await model.stream(messagesCopy, { signal });
for await (const chunk of stream) { for await (const chunk of stream) {
@@ -379,7 +379,19 @@ export class AgentHarness {
} }
response = response ? response.concat(chunk) : chunk; response = response ? response.concat(chunk) : chunk;
} }
break; // success — exit retry loop
} catch (invokeError: any) { } catch (invokeError: any) {
const is429 = invokeError?.status === 429 || invokeError?.lc_error_code === 'MODEL_RATE_LIMIT';
if (is429 && attempt < MAX_STREAM_ATTEMPTS) {
const delaySec = parseRetryAfter(invokeError?.headers);
const delayMs = delaySec != null ? delaySec * 1000 : Math.min(5000 * attempt, 30000);
this.config.logger.warn(
{ attempt, delayMs, iteration: iterations, messageCount: messagesCopy.length },
'Model rate limited (429), retrying after delay'
);
await new Promise(resolve => setTimeout(resolve, delayMs));
continue;
}
this.config.logger.error( this.config.logger.error(
{ {
error: invokeError, error: invokeError,
@@ -392,6 +404,7 @@ export class AgentHarness {
); );
throw invokeError; throw invokeError;
} }
}
this.config.logger.info( this.config.logger.info(
{ {
@@ -684,7 +697,7 @@ export class AgentHarness {
* Yields typed HarnessEvents (chunk, tool_call, image, done) and saves the * Yields typed HarnessEvents (chunk, tool_call, image, done) and saves the
* conversation to the store once the done event has been emitted. * conversation to the store once the done event has been emitted.
*/ */
async *streamMessage(message: InboundMessage, options?: { saveUserMessage?: boolean }): AsyncGenerator<HarnessEvent> { async *streamMessage(message: InboundMessage, options?: { saveUserMessage?: boolean; modelOverride?: { modelId: string; provider?: LLMProvider } }): AsyncGenerator<HarnessEvent> {
this.config.logger.info( this.config.logger.info(
{ messageId: message.messageId, userId: message.userId, content: message.content.substring(0, 100) }, { messageId: message.messageId, userId: message.userId, content: message.content.substring(0, 100) },
'Processing user message' 'Processing user message'
@@ -725,12 +738,24 @@ export class AgentHarness {
// 4. Get the configured model // 4. Get the configured model
this.config.logger.debug('Routing to model'); this.config.logger.debug('Routing to model');
const { model, middleware } = await this.modelRouter.route( let model, middleware;
if (options?.modelOverride) {
const defaultConfig = this.modelRouter.getDefaultModelConfig();
({ model, middleware } = this.modelRouter.createModel({
...defaultConfig,
model: options.modelOverride.modelId,
provider: options.modelOverride.provider ?? defaultConfig.provider,
thinking: undefined,
}));
this.config.logger.info({ modelId: options.modelOverride.modelId, provider: options.modelOverride.provider }, 'Using @tag model override');
} else {
({ model, middleware } = await this.modelRouter.route(
message.content, message.content,
this.config.license, this.config.license,
RoutingStrategy.COMPLEXITY, RoutingStrategy.COMPLEXITY,
this.config.userId this.config.userId
); ));
}
this.middleware = middleware; this.middleware = middleware;
this.config.logger.info({ modelName: model.constructor.name }, 'Model selected'); this.config.logger.info({ modelName: model.constructor.name }, 'Model selected');
@@ -837,11 +862,18 @@ export class AgentHarness {
} }
} }
async clearHistory(): Promise<void> {
if (this.conversationStore) {
const channelKey = this.config.channelType ?? ChannelType.WEBSOCKET;
await this.conversationStore.deleteSession(this.config.userId, this.config.sessionId, channelKey);
}
}
/** /**
* Stream a greeting response for first-time users. * Stream a greeting response for first-time users (or after a model switch).
* Sends "Who are you and what can you do?" through the normal message pipeline. * Sends "Who are you and what can you do?" through the normal message pipeline.
*/ */
async *streamGreeting(): AsyncGenerator<HarnessEvent> { async *streamGreeting(modelOverride?: { modelId: string; provider?: LLMProvider }): AsyncGenerator<HarnessEvent> {
const content = await AgentHarness.loadWelcomePrompt(); const content = await AgentHarness.loadWelcomePrompt();
const greetingMessage: InboundMessage = { const greetingMessage: InboundMessage = {
messageId: `greeting_${Date.now()}`, messageId: `greeting_${Date.now()}`,
@@ -850,7 +882,7 @@ export class AgentHarness {
content, content,
timestamp: new Date(), timestamp: new Date(),
}; };
yield* this.streamMessage(greetingMessage, { saveUserMessage: false }); yield* this.streamMessage(greetingMessage, { saveUserMessage: false, modelOverride });
} }
/** /**
@@ -1040,9 +1072,25 @@ export class AgentHarness {
} }
// ============================================================================= // =============================================================================
// Details update helpers (module-level, no class dependency) // Helpers (module-level, no class dependency)
// ============================================================================= // =============================================================================
/**
* Parse the Retry-After header value into seconds.
* Accepts both delta-seconds ("30") and HTTP-date ("Mon, 01 Jan 2026 00:00:00 GMT").
* Returns null if the header is absent or unparseable.
*/
function parseRetryAfter(headers: Record<string, string> | undefined): number | null {
if (!headers) return null;
const value = headers['retry-after'] ?? headers['Retry-After'];
if (!value) return null;
const num = parseFloat(value);
if (!isNaN(num)) return Math.max(0, num);
const date = new Date(value);
if (!isNaN(date.getTime())) return Math.max(0, (date.getTime() - Date.now()) / 1000);
return null;
}
/** /**
* Produce a minimal unified diff between two strings, suitable for passing to * Produce a minimal unified diff between two strings, suitable for passing to
* an LLM as a change description. Returns an empty string when there is no diff. * an LLM as a change description. Returns an empty string when there is no diff.

View File

@@ -48,4 +48,10 @@ export interface ErrorEvent {
fatal: boolean; fatal: boolean;
} }
export type HarnessEvent = ChunkEvent | ToolCallEvent | ImageEvent | DoneEvent | SubagentChunkEvent | SubagentThinkingEvent | SubagentToolCallEvent | ErrorEvent; export interface ModelSwitchedEvent {
type: 'model_switched';
tag: string;
modelId: string;
}
export type HarnessEvent = ChunkEvent | ToolCallEvent | ImageEvent | DoneEvent | SubagentChunkEvent | SubagentThinkingEvent | SubagentToolCallEvent | ErrorEvent | ModelSwitchedEvent;

View File

@@ -1,8 +1,5 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { SystemMessage, HumanMessage } from '@langchain/core/messages'; import { SystemMessage, HumanMessage } from '@langchain/core/messages';
/** All platform tool names available to every subagent. */
const ALL_PLATFORM_TOOLS = ['SymbolLookup', 'GetChartData', 'GetTicker24h', 'WebSearch', 'FetchPage', 'ArxivSearch'];
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { createReactAgent } from '@langchain/langgraph/prebuilt';
import type { HarnessEvent, SubagentChunkEvent, SubagentThinkingEvent } from '../harness-events.js'; import type { HarnessEvent, SubagentChunkEvent, SubagentThinkingEvent } from '../harness-events.js';
@@ -13,6 +10,62 @@ import type { ToolRegistry } from '../../tools/tool-registry.js';
import type { MCPToolInfo } from '../../tools/mcp/mcp-tool-wrapper.js'; import type { MCPToolInfo } from '../../tools/mcp/mcp-tool-wrapper.js';
import { WikiLoader, type SpawnContext } from './wiki-loader.js'; import { WikiLoader, type SpawnContext } from './wiki-loader.js';
/** All platform tool names available to every subagent. */
const ALL_PLATFORM_TOOLS = ['SymbolLookup', 'GetChartData', 'GetTicker24h', 'WebSearch', 'FetchPage', 'ArxivSearch'];
/**
* Streaming filter that strips triple-backtick fenced code blocks from text as it
* arrives in chunks. Holds back at most 2 characters of look-ahead so normal text
* streams through with no perceptible delay.
*/
class FenceFilter {
private buf = '';
private inFence = false;
write(chunk: string): string {
this.buf += chunk;
return this.drain(false);
}
end(): string {
return this.drain(true);
}
private drain(final: boolean): string {
let out = '';
while (true) {
if (!this.inFence) {
const start = this.buf.indexOf('```');
if (start === -1) {
const keep = final ? this.buf.length : Math.max(0, this.buf.length - 2);
out += this.buf.slice(0, keep);
this.buf = this.buf.slice(keep);
break;
}
out += this.buf.slice(0, start);
const headerEnd = this.buf.indexOf('\n', start + 3);
if (headerEnd === -1 && !final) {
this.buf = this.buf.slice(start);
break;
}
this.inFence = true;
this.buf = headerEnd !== -1 ? this.buf.slice(headerEnd + 1) : '';
} else {
const end = this.buf.indexOf('```');
if (end === -1) {
this.buf = final ? '' : this.buf.slice(Math.max(0, this.buf.length - 2));
break;
}
this.inFence = false;
const closingEnd = this.buf.indexOf('\n', end + 3);
this.buf = closingEnd !== -1 ? this.buf.slice(closingEnd + 1) : this.buf.slice(end + 3);
}
}
// Collapse blank lines left where code blocks were removed
return out.replace(/\n{3,}/g, '\n\n');
}
}
export interface SpawnInput { export interface SpawnInput {
agentName: string; agentName: string;
instruction: string; instruction: string;
@@ -138,13 +191,15 @@ export class SpawnService {
); );
let finalText = ''; let finalText = '';
const fenceFilter = new FenceFilter();
for await (const [mode, data] of await stream) { for await (const [mode, data] of await stream) {
if (signal?.aborted) break; if (signal?.aborted) break;
if (mode === 'messages') { if (mode === 'messages') {
for (const chunk of SpawnService.extractStreamChunks(data, agentName)) { for (const chunk of SpawnService.extractStreamChunks(data, agentName)) {
yield chunk; const filtered = fenceFilter.write(chunk.content);
if (filtered) yield { ...chunk, content: filtered };
} }
} else if (mode === 'updates') { } else if (mode === 'updates') {
if ((data as any).agent?.messages) { if ((data as any).agent?.messages) {
@@ -167,6 +222,9 @@ export class SpawnService {
} }
} }
const tail = fenceFilter.end();
if (tail) yield { type: 'subagent_chunk', agentName, content: tail };
this.logger.info( this.logger.info(
{ agentName, textLength: finalText.length, imageCount: imageCapture.length }, { agentName, textLength: finalText.length, imageCount: imageCapture.length },
'SpawnService: finished' 'SpawnService: finished'
@@ -182,12 +240,16 @@ export class SpawnService {
/** /**
* Extract subagent_chunk / subagent_thinking events from a LangGraph `messages` stream datum. * Extract subagent_chunk / subagent_thinking events from a LangGraph `messages` stream datum.
* Only processes AIMessageChunks — ToolMessages (identified by tool_call_id) are skipped
* because their content is raw tool result data, not agent narrative text.
*/ */
static extractStreamChunks( static extractStreamChunks(
data: unknown, data: unknown,
agentName: string, agentName: string,
): Array<SubagentChunkEvent | SubagentThinkingEvent> { ): Array<SubagentChunkEvent | SubagentThinkingEvent> {
const msg = Array.isArray(data) ? (data as unknown[])[0] : data; const msg = Array.isArray(data) ? (data as unknown[])[0] : data;
// ToolMessages have tool_call_id; AIMessageChunks don't — skip tool results
if ((msg as any)?.tool_call_id != null) return [];
const content = (msg as any)?.content; const content = (msg as any)?.content;
if (typeof content === 'string') { if (typeof content === 'string') {
return content ? [{ type: 'subagent_chunk', agentName, content }] : []; return content ? [{ type: 'subagent_chunk', agentName, content }] : [];

View File

@@ -14,6 +14,4 @@ approvalNodes:
maxPositionPercent: 0.05 # 5% of portfolio max maxPositionPercent: 0.05 # 5% of portfolio max
minRiskRewardRatio: 2.0 # Minimum 2:1 risk/reward minRiskRewardRatio: 2.0 # Minimum 2:1 risk/reward
# Model override (optional)
model: claude-sonnet-4-6
temperature: 0.2 temperature: 0.2

View File

@@ -0,0 +1,30 @@
import { LLMProvider } from './provider.js';
export interface ModelTag {
tag: string;
modelId: string;
provider?: LLMProvider;
}
export const MODEL_TAGS: ModelTag[] = [
{ tag: 'DeepSeek-Flash', modelId: 'deepseek-ai/DeepSeek-V4-Flash' },
{ tag: 'DeepSeek-Pro', modelId: 'deepseek-ai/DeepSeek-V4-Pro' },
{ tag: 'Kimi', modelId: 'moonshotai/Kimi-K2.6' },
{ tag: 'GLM', modelId: 'zai-org/GLM-5' },
{ tag: 'Qwen', modelId: 'Qwen/Qwen3.5-27B' },
{ tag: 'MiniMax', modelId: 'MiniMaxAI/MiniMax-M2.5' },
{ tag: 'Sonnet', modelId: 'claude-sonnet-4-6', provider: LLMProvider.ANTHROPIC },
{ tag: 'Haiku', modelId: 'claude-haiku-4-5-20251001', provider: LLMProvider.ANTHROPIC },
{ tag: 'Opus', modelId: 'claude-opus-4-7', provider: LLMProvider.ANTHROPIC },
];
/** Parse a leading @Tag from message content. Case-insensitive. Returns null if not a known tag. */
export function parseModelTag(content: string): (ModelTag & { rest: string }) | null {
const trimmed = content.trimStart();
if (!trimmed.startsWith('@')) return null;
const spaceIdx = trimmed.indexOf(' ');
const tagName = spaceIdx === -1 ? trimmed.slice(1) : trimmed.slice(1, spaceIdx);
const rest = spaceIdx === -1 ? '' : trimmed.slice(spaceIdx + 1).trim();
const found = MODEL_TAGS.find(m => m.tag.toLowerCase() === tagName.toLowerCase());
return found ? { ...found, rest } : null;
}

View File

@@ -1,5 +1,6 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatOpenAI } from '@langchain/openai'; import { ChatOpenAI } from '@langchain/openai';
import { ChatAnthropic } from '@langchain/anthropic';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import { type ModelMiddleware, NoopMiddleware, AnthropicCachingMiddleware } from './middleware.js'; import { type ModelMiddleware, NoopMiddleware, AnthropicCachingMiddleware } from './middleware.js';
@@ -11,6 +12,8 @@ export { NoopMiddleware, AnthropicCachingMiddleware };
*/ */
export enum LLMProvider { export enum LLMProvider {
DEEP_INFRA = 'deepinfra', DEEP_INFRA = 'deepinfra',
DEEP_INFRA_ANTHROPIC = 'deepinfra_anthropic',
ANTHROPIC = 'anthropic',
} }
/** /**
@@ -21,15 +24,16 @@ export interface ModelConfig {
model: string; model: string;
temperature?: number; temperature?: number;
maxTokens?: number; maxTokens?: number;
thinking?: { budgetTokens: number };
} }
/** /**
* License tier model configuration * License tier model configuration
*/ */
export interface LicenseTierModels { export interface LicenseTierModels {
default: string; default: string | null;
cost_optimized: string; cost_optimized: string | null;
complex: string; complex: string | null;
allowed_models?: string[]; allowed_models?: string[];
blocked_models?: string[]; blocked_models?: string[];
} }
@@ -48,11 +52,13 @@ export interface LicenseModelsConfig {
*/ */
export interface ProviderConfig { export interface ProviderConfig {
deepinfraApiKey?: string; deepinfraApiKey?: string;
anthropicApiKey?: string;
defaultModel?: ModelConfig; defaultModel?: ModelConfig;
licenseModels?: LicenseModelsConfig; licenseModels?: LicenseModelsConfig;
} }
const DEEP_INFRA_BASE_URL = 'https://api.deepinfra.com/v1/openai'; const DEEP_INFRA_BASE_URL = 'https://api.deepinfra.com/v1/openai';
const DEEP_INFRA_ANTHROPIC_BASE_URL = 'https://api.deepinfra.com/anthropic';
/** /**
* LLM Provider factory * LLM Provider factory
@@ -80,6 +86,12 @@ export class LLMProviderFactory {
case LLMProvider.DEEP_INFRA: case LLMProvider.DEEP_INFRA:
return this.createDeepInfraModel(modelConfig); return this.createDeepInfraModel(modelConfig);
case LLMProvider.DEEP_INFRA_ANTHROPIC:
return this.createDeepInfraAnthropicModel(modelConfig);
case LLMProvider.ANTHROPIC:
return this.createAnthropicModel(modelConfig);
default: default:
throw new Error(`Unsupported provider: ${modelConfig.provider}`); throw new Error(`Unsupported provider: ${modelConfig.provider}`);
} }
@@ -106,6 +118,49 @@ export class LLMProviderFactory {
return { model, middleware: new NoopMiddleware() }; return { model, middleware: new NoopMiddleware() };
} }
/**
* Create Deep Infra model via Anthropic-compatible API (supports thinking)
*/
private createDeepInfraAnthropicModel(config: ModelConfig): { model: ChatAnthropic; middleware: AnthropicCachingMiddleware } {
if (!this.config.deepinfraApiKey) {
throw new Error('Deep Infra API key not configured');
}
const model = new ChatAnthropic({
model: config.model,
...(!config.thinking && { temperature: config.temperature ?? 0.7 }),
maxTokens: config.maxTokens ?? 8192,
anthropicApiKey: this.config.deepinfraApiKey,
clientOptions: { baseURL: DEEP_INFRA_ANTHROPIC_BASE_URL },
...(config.thinking && {
thinking: { type: 'enabled' as const, budget_tokens: config.thinking.budgetTokens },
}),
});
return { model, middleware: new AnthropicCachingMiddleware() };
}
/**
* Create model via native Anthropic API (not Deep Infra)
*/
private createAnthropicModel(config: ModelConfig): { model: ChatAnthropic; middleware: AnthropicCachingMiddleware } {
if (!this.config.anthropicApiKey) {
throw new Error('Anthropic API key not configured');
}
const model = new ChatAnthropic({
model: config.model,
...(!config.thinking && { temperature: config.temperature ?? 0.7 }),
maxTokens: config.maxTokens ?? 8192,
anthropicApiKey: this.config.anthropicApiKey,
...(config.thinking && {
thinking: { type: 'enabled' as const, budget_tokens: config.thinking.budgetTokens },
}),
});
return { model, middleware: new AnthropicCachingMiddleware() };
}
/** /**
* Get default model based on environment * Get default model based on environment
*/ */
@@ -118,10 +173,7 @@ export class LLMProviderFactory {
throw new Error('Deep Infra API key not configured'); throw new Error('Deep Infra API key not configured');
} }
return { throw new Error('Default model not configured — set defaults.model in gateway config');
provider: LLMProvider.DEEP_INFRA,
model: 'zai-org/GLM-5',
};
} }
/** /**
@@ -132,16 +184,3 @@ export class LLMProviderFactory {
} }
} }
/**
* Predefined model configurations
*/
export const MODELS = {
GLM_5: {
provider: LLMProvider.DEEP_INFRA,
model: 'zai-org/GLM-5',
},
QWEN_235B: {
provider: LLMProvider.DEEP_INFRA,
model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
},
} as const satisfies Record<string, ModelConfig>;

View File

@@ -1,6 +1,6 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import { LLMProviderFactory, type ModelConfig, LLMProvider, type LicenseModelsConfig } from './provider.js'; import { LLMProviderFactory, type ModelConfig, type LicenseModelsConfig } from './provider.js';
import type { ModelMiddleware } from './middleware.js'; import type { ModelMiddleware } from './middleware.js';
import type { License } from '../types/user.js'; import type { License } from '../types/user.js';
@@ -35,6 +35,17 @@ export class ModelRouter {
this.licenseModels = factory.getLicenseModelsConfig(); this.licenseModels = factory.getLicenseModelsConfig();
} }
/**
* Create a model directly from a config, bypassing routing logic.
*/
createModel(config: ModelConfig): { model: BaseChatModel; middleware: ModelMiddleware } {
return this.factory.createModel(config);
}
getDefaultModelConfig(): ModelConfig {
return this.defaultModel;
}
/** /**
* Route to appropriate model based on context * Route to appropriate model based on context
*/ */
@@ -107,73 +118,45 @@ export class ModelRouter {
private routeByComplexity(message: string, license: License): ModelConfig { private routeByComplexity(message: string, license: License): ModelConfig {
const isComplex = this.isComplexQuery(message); const isComplex = this.isComplexQuery(message);
// Use configuration if available
if (this.licenseModels) { if (this.licenseModels) {
const tierConfig = this.licenseModels[license.licenseType]; const tierConfig = this.licenseModels[license.licenseType];
if (tierConfig) { if (tierConfig) {
const model = isComplex ? tierConfig.complex : tierConfig.default; const model = isComplex ? tierConfig.complex : tierConfig.default;
return { provider: this.defaultModel.provider as LLMProvider, model }; return model ? { ...this.defaultModel, model } : this.defaultModel;
} }
} }
// Fallback to hardcoded defaults return this.defaultModel;
if (license.licenseType === 'enterprise') {
return isComplex
? { provider: LLMProvider.DEEP_INFRA, model: 'Qwen/Qwen3-235B-A22B-Instruct-2507' }
: { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
}
if (license.licenseType === 'pro') {
return isComplex
? { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' }
: { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
}
return { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
} }
/** /**
* Route based on license tier * Route based on license tier
*/ */
private routeByLicenseTier(license: License): ModelConfig { private routeByLicenseTier(license: License): ModelConfig {
// Use configuration if available
if (this.licenseModels) { if (this.licenseModels) {
const tierConfig = this.licenseModels[license.licenseType]; const tierConfig = this.licenseModels[license.licenseType];
if (tierConfig) { if (tierConfig) {
return { provider: this.defaultModel.provider as LLMProvider, model: tierConfig.default }; const model = tierConfig.default;
return model ? { ...this.defaultModel, model } : this.defaultModel;
} }
} }
// Fallback to hardcoded defaults
switch (license.licenseType) {
case 'enterprise':
return { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
case 'pro':
return { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
case 'free':
return { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
default:
return this.defaultModel; return this.defaultModel;
} }
}
/** /**
* Route to cheapest available model * Route to cheapest available model
*/ */
private routeByCost(license: License): ModelConfig { private routeByCost(license: License): ModelConfig {
// Use configuration if available
if (this.licenseModels) { if (this.licenseModels) {
const tierConfig = this.licenseModels[license.licenseType]; const tierConfig = this.licenseModels[license.licenseType];
if (tierConfig) { if (tierConfig) {
return { provider: this.defaultModel.provider as LLMProvider, model: tierConfig.cost_optimized }; const model = tierConfig.cost_optimized;
return model ? { ...this.defaultModel, model } : this.defaultModel;
} }
} }
// Fallback: use GLM-5 return this.defaultModel;
return { provider: LLMProvider.DEEP_INFRA, model: 'zai-org/GLM-5' };
} }
/** /**
@@ -199,18 +182,7 @@ export class ModelRouter {
} }
} }
// Fallback to hardcoded defaults // Without tier config, all models allowed
if (license.licenseType === 'free') {
const allowedModels = ['zai-org/GLM-5'];
return allowedModels.includes(model.model);
}
if (license.licenseType === 'pro') {
const blockedModels = ['Qwen/Qwen3-235B-A22B-Instruct-2507'];
return !blockedModels.includes(model.model);
}
// Enterprise: all models allowed
return true; return true;
} }

View File

@@ -86,27 +86,31 @@ function loadConfig() {
// LLM provider API keys and model configuration // LLM provider API keys and model configuration
providerConfig: { providerConfig: {
deepinfraApiKey: secretsData.llm_providers?.deepinfra_api_key || process.env.DEEPINFRA_API_KEY, deepinfraApiKey: secretsData.llm_providers?.deepinfra_api_key || process.env.DEEPINFRA_API_KEY,
defaultModel: { anthropicApiKey: secretsData.llm_providers?.anthropic_api_key || process.env.ANTHROPIC_API_KEY,
provider: configData.defaults?.model_provider || 'deepinfra', defaultModel: configData.defaults?.model ? {
model: configData.defaults?.model || 'zai-org/GLM-5', provider: configData.defaults.model_provider,
}, model: configData.defaults.model,
...(configData.defaults.thinking_budget_tokens && {
thinking: { budgetTokens: configData.defaults.thinking_budget_tokens },
}),
} : undefined,
licenseModels: { licenseModels: {
free: { free: {
default: configData.license_models?.free?.default || 'zai-org/GLM-5', default: configData.license_models?.free?.default || null,
cost_optimized: configData.license_models?.free?.cost_optimized || 'zai-org/GLM-5', cost_optimized: configData.license_models?.free?.cost_optimized || null,
complex: configData.license_models?.free?.complex || 'zai-org/GLM-5', complex: configData.license_models?.free?.complex || null,
allowed_models: configData.license_models?.free?.allowed_models || ['zai-org/GLM-5'], allowed_models: configData.license_models?.free?.allowed_models,
}, },
pro: { pro: {
default: configData.license_models?.pro?.default || 'zai-org/GLM-5', default: configData.license_models?.pro?.default || null,
cost_optimized: configData.license_models?.pro?.cost_optimized || 'zai-org/GLM-5', cost_optimized: configData.license_models?.pro?.cost_optimized || null,
complex: configData.license_models?.pro?.complex || 'zai-org/GLM-5', complex: configData.license_models?.pro?.complex || null,
blocked_models: configData.license_models?.pro?.blocked_models || ['Qwen/Qwen3-235B-A22B-Instruct-2507'], blocked_models: configData.license_models?.pro?.blocked_models,
}, },
enterprise: { enterprise: {
default: configData.license_models?.enterprise?.default || 'zai-org/GLM-5', default: configData.license_models?.enterprise?.default || null,
cost_optimized: configData.license_models?.enterprise?.cost_optimized || 'zai-org/GLM-5', cost_optimized: configData.license_models?.enterprise?.cost_optimized || null,
complex: configData.license_models?.enterprise?.complex || 'Qwen/Qwen3-235B-A22B-Instruct-2507', complex: configData.license_models?.enterprise?.complex || null,
}, },
}, },
}, },
@@ -354,6 +358,7 @@ try {
icebergClient, icebergClient,
relayClient: zmqRelayClient, relayClient: zmqRelayClient,
logger: app.log, logger: app.log,
getSymbolIndex: () => symbolIndexService,
}); });
app.log.info('OHLC service initialized'); app.log.info('OHLC service initialized');
} catch (error) { } catch (error) {

View File

@@ -28,12 +28,14 @@ import {
backendToTradingView, backendToTradingView,
DEFAULT_SUPPORTED_RESOLUTIONS, DEFAULT_SUPPORTED_RESOLUTIONS,
} from '../types/ohlc.js'; } from '../types/ohlc.js';
import type { SymbolIndexService } from './symbol-index-service.js';
export interface OHLCServiceConfig { export interface OHLCServiceConfig {
icebergClient: IcebergClient; icebergClient: IcebergClient;
relayClient: ZMQRelayClient; relayClient: ZMQRelayClient;
logger: FastifyBaseLogger; logger: FastifyBaseLogger;
requestTimeout?: number; // Request timeout in ms (default: 30000) requestTimeout?: number; // Request timeout in ms (default: 30000)
getSymbolIndex?: () => SymbolIndexService | undefined;
} }
/** /**
@@ -45,11 +47,13 @@ export class OHLCService {
private icebergClient: IcebergClient; private icebergClient: IcebergClient;
private relayClient: ZMQRelayClient; private relayClient: ZMQRelayClient;
private logger: FastifyBaseLogger; private logger: FastifyBaseLogger;
private getSymbolIndex?: () => SymbolIndexService | undefined;
constructor(config: OHLCServiceConfig) { constructor(config: OHLCServiceConfig) {
this.icebergClient = config.icebergClient; this.icebergClient = config.icebergClient;
this.relayClient = config.relayClient; this.relayClient = config.relayClient;
this.logger = config.logger; this.logger = config.logger;
this.getSymbolIndex = config.getSymbolIndex;
} }
/** /**
@@ -129,7 +133,7 @@ export class OHLCService {
if (missingRanges.length === 0 && data.length > 0) { if (missingRanges.length === 0 && data.length > 0) {
// All data exists in Iceberg // All data exists in Iceberg
this.logger.info({ ticker, period_seconds, cached: true }, 'OHLC data found in cache, returning immediately'); this.logger.info({ ticker, period_seconds, cached: true }, 'OHLC data found in cache, returning immediately');
return this.formatHistoryResult(data, start_time, end_time, period_seconds, countback); return this.formatHistoryResult(ticker, data, start_time, end_time, period_seconds, countback);
} }
// Step 3: Request each missing range from the relay individually so we // Step 3: Request each missing range from the relay individually so we
@@ -160,7 +164,7 @@ export class OHLCService {
data = await this.icebergClient.queryOHLC(ticker, period_seconds, start_time, end_time); data = await this.icebergClient.queryOHLC(ticker, period_seconds, start_time, end_time);
this.logger.info({ ticker, period_seconds, dataCount: data.length }, 'Final Iceberg query complete, returning result'); this.logger.info({ ticker, period_seconds, dataCount: data.length }, 'Final Iceberg query complete, returning result');
return this.formatHistoryResult(data, start_time, end_time, period_seconds, countback); return this.formatHistoryResult(ticker, data, start_time, end_time, period_seconds, countback);
} catch (error: any) { } catch (error: any) {
this.logger.error({ this.logger.error({
@@ -179,8 +183,12 @@ export class OHLCService {
* Interior gaps (confirmed trading periods with no trades) arrive as null-OHLC * Interior gaps (confirmed trading periods with no trades) arrive as null-OHLC
* rows from Iceberg. Edge gaps (data not yet ingested, in-progress candles) are * rows from Iceberg. Edge gaps (data not yet ingested, in-progress candles) are
* simply absent rows. Both are returned as-is; clients fill as appropriate. * simply absent rows. Both are returned as-is; clients fill as appropriate.
*
* Applies decimal correction: Nautilus stores prices/volumes as integers;
* divide by 10^price_precision and 10^size_precision to recover float values.
*/ */
private formatHistoryResult( private formatHistoryResult(
ticker: string,
data: any[], data: any[],
// @ts-ignore // @ts-ignore
start_time: bigint, start_time: bigint,
@@ -197,9 +205,33 @@ export class OHLCService {
}; };
} }
// Convert to TradingView format without null-filling missing slots. // Convert to TradingView format (also converts BigInt fields to Number).
let bars: TradingViewBar[] = data.map(backendToTradingView); let bars: TradingViewBar[] = data.map(backendToTradingView);
// Apply decimal correction using symbol metadata.
const symbolMeta = this.getSymbolIndex?.()?.getSymbolByTicker(ticker);
if (symbolMeta) {
const pricePrecision = symbolMeta.price_precision;
const sizePrecision = symbolMeta.size_precision;
if (pricePrecision != null && pricePrecision > 0) {
const priceDivisor = Math.pow(10, pricePrecision);
bars = bars.map(bar => ({
...bar,
open: bar.open / priceDivisor,
high: bar.high / priceDivisor,
low: bar.low / priceDivisor,
close: bar.close / priceDivisor,
}));
}
if (sizePrecision != null && sizePrecision > 0) {
const sizeDivisor = Math.pow(10, sizePrecision);
bars = bars.map(bar => ({
...bar,
volume: bar.volume != null ? bar.volume / sizeDivisor : bar.volume,
}));
}
}
bars.sort((a, b) => a.time - b.time); bars.sort((a, b) => a.time - b.time);
if (countback && bars.length > countback) { if (countback && bars.length > countback) {

View File

@@ -91,6 +91,13 @@ export class SymbolIndexService {
await this.initPromise; await this.initPromise;
} }
/**
* Look up symbol metadata by Nautilus ticker (e.g. "BTC/USDT.BINANCE")
*/
getSymbolByTicker(ticker: string): SymbolMetadata | undefined {
return this.symbols.get(ticker);
}
/** /**
* Update or add a symbol to the index * Update or add a symbol to the index
*/ */

View File

@@ -170,11 +170,11 @@ export function nanosToSeconds(nanos: bigint | number): number {
export function backendToTradingView(backend: BackendOHLC): TradingViewBar { export function backendToTradingView(backend: BackendOHLC): TradingViewBar {
return { return {
time: nanosToSeconds(backend.timestamp), time: nanosToSeconds(backend.timestamp),
open: backend.open, open: Number(backend.open),
high: backend.high, high: Number(backend.high),
low: backend.low, low: Number(backend.low),
close: backend.close, close: Number(backend.close),
volume: backend.volume ?? undefined, volume: backend.volume != null ? Number(backend.volume) : undefined,
}; };
} }

View File

@@ -421,18 +421,82 @@ export class CCXTFetcher {
const amount = Math.round(trade.amount * sizeMult); const amount = Math.round(trade.amount * sizeMult);
const quoteAmount = Math.round((trade.price * trade.amount) * priceMult); const quoteAmount = Math.round((trade.price * trade.amount) * priceMult);
// protobufjs v7 uses camelCase field names internally — must use camelCase here
return { return {
trade_id: trade.id || `${trade.timestamp}`, tradeId: trade.id || `${trade.timestamp}`,
ticker, ticker,
timestamp: (trade.timestamp * 1_000_000).toString(), // Convert ms to nanoseconds timestamp: (trade.timestamp * 1_000_000).toString(), // Convert ms to nanoseconds
price: price.toString(), price: price.toString(),
amount: amount.toString(), amount: amount.toString(),
quote_amount: quoteAmount.toString(), quoteAmount: quoteAmount.toString(),
taker_buy: trade.side === 'buy', takerBuy: trade.side === 'buy',
sequence: trade.order ? trade.order.toString() : undefined sequence: trade.order ? trade.order.toString() : undefined
}; };
} }
/**
* Fetch 1-minute bars covering the current open window for each configured period,
* rolling them up into a single aggregate per period for Flink accumulator seeding.
*
* Returns one seed object per period (or null for periods that just started with no
* completed 1m bars yet). Throws on exchange errors — caller handles retries.
*
* @param {string} ticker
* @param {number[]} periodSeconds - configured periods (e.g. [60, 300, 900, 3600, 14400, 86400])
* @returns {Promise<Array<{periodSeconds, open, high, low, close, volume, windowStartMs}|null>>}
*/
async fetchSeedCandles(ticker, periodSeconds) {
const nowMs = Date.now();
const maxPeriod = Math.max(...periodSeconds);
const longestWindowStart = Math.floor(nowMs / (maxPeriod * 1000)) * (maxPeriod * 1000);
// fetchHistoricalOHLC expects nanoseconds as strings
const startNs = (longestWindowStart * 1_000_000).toString();
const endNs = (nowMs * 1_000_000).toString();
const bars1m = await this.fetchHistoricalOHLC(ticker, startNs, endNs, 60, null);
return periodSeconds.map(period => {
const windowStart = Math.floor(nowMs / (period * 1000)) * (period * 1000);
const relevant = bars1m.filter(b => {
const tsMs = parseInt(b.timestamp) / 1_000_000;
return tsMs >= windowStart && tsMs < nowMs;
});
if (relevant.length === 0) return null;
const open = parseInt(relevant[0].open);
const high = Math.max(...relevant.map(b => parseInt(b.high)));
const low = Math.min(...relevant.map(b => parseInt(b.low)));
const close = parseInt(relevant[relevant.length - 1].close);
const volume = relevant.reduce((sum, b) => sum + parseInt(b.volume), 0);
return { periodSeconds: period, open, high, low, close, volume, windowStartMs: windowStart };
});
}
/**
* Convert a seed candle aggregate into a Tick-shaped object for Kafka.
* price = open (scaled int), amount = volume (scaled int); seed_* fields carry H/L/C/period.
*/
convertSeedToTick(seed, ticker) {
// protobufjs v7 uses camelCase field names internally — must use camelCase here
return {
tradeId: `seed-${ticker}-${seed.periodSeconds}-${seed.windowStartMs}`,
ticker,
timestamp: (seed.windowStartMs * 1_000_000).toString(),
price: seed.open,
amount: seed.volume,
quoteAmount: 0,
takerBuy: false,
isSeed: true,
seedHigh: seed.high,
seedLow: seed.low,
seedClose: seed.close,
seedWindowStartMs: seed.windowStartMs,
seedPeriodSeconds: seed.periodSeconds
};
}
/** /**
* Convert period seconds to CCXT timeframe string * Convert period seconds to CCXT timeframe string
*/ */

View File

@@ -332,7 +332,9 @@ class IngestorWorker {
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {}); this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
return; return;
} }
this.handleRealtimeRequest(request); this.handleRealtimeRequest(request).catch(err => {
this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in realtime handler');
});
} else if (isTickerSnapshot) { } else if (isTickerSnapshot) {
if (!this.pool.consumeSlot(jobId, exchange, 'HISTORICAL')) { if (!this.pool.consumeSlot(jobId, exchange, 'HISTORICAL')) {
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {}); this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
@@ -431,11 +433,40 @@ class IngestorWorker {
/** /**
* Start realtime tick polling for a job dispatched by Flink. * Start realtime tick polling for a job dispatched by Flink.
* Fetches seed candles first so Flink initializes the open-candle accumulator correctly.
*/ */
handleRealtimeRequest(request) { async handleRealtimeRequest(request) {
const { jobId, requestId, ticker } = request; const { jobId, requestId, ticker } = request;
this.logger.info({ jobId, requestId, ticker }, 'Processing realtime subscription request'); this.logger.info({ jobId, requestId, ticker }, 'Processing realtime subscription request');
const periods = [60, 300, 900, 3600, 14400, 86400];
const MAX_RETRIES = 3;
const RETRY_DELAY_MS = 5000;
let seeds = null;
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
seeds = await this.ccxtFetcher.fetchSeedCandles(ticker, periods);
break;
} catch (err) {
this.logger.warn({ jobId, ticker, attempt, error: err.message }, 'Seed candle fetch failed');
if (attempt < MAX_RETRIES) await new Promise(r => setTimeout(r, RETRY_DELAY_MS * attempt));
}
}
if (seeds !== null) {
const seedTicks = seeds
.filter(s => s !== null)
.map(s => this.ccxtFetcher.convertSeedToTick(s, ticker));
if (seedTicks.length > 0) {
await this.kafkaProducer.writeTicks(this.config.kafka_tick_topic, seedTicks);
this.logger.info({ jobId, ticker, count: seedTicks.length }, 'Wrote seed ticks');
}
} else {
// All retries exhausted — open bars suppressed for current partial window until next candle boundary
this.logger.error({ jobId, ticker }, 'All seed retries failed — open bars suppressed until next candle');
}
this.activeRealtime.add(jobId); this.activeRealtime.add(jobId);
this.realtimePoller.startSubscription(jobId, requestId, ticker, this.config.kafka_tick_topic); this.realtimePoller.startSubscription(jobId, requestId, ticker, this.config.kafka_tick_topic);
} }

View File

@@ -34,6 +34,15 @@ message Tick {
// Additional flags for special trade types // Additional flags for special trade types
optional TradeFlags flags = 10; optional TradeFlags flags = 10;
// When true: synthetic seed record carrying pre-aggregated OHLC for accumulator init.
// price = open (scaled), amount = volume (scaled); seed_* fields carry H/L/C/period.
optional bool is_seed = 11;
optional int64 seed_high = 12;
optional int64 seed_low = 13;
optional int64 seed_close = 14;
optional uint64 seed_window_start_ms = 15;
optional uint32 seed_period_seconds = 16;
} }
message TradeFlags { message TradeFlags {

View File

@@ -48,6 +48,7 @@ COPY --from=builder /build/env /opt/conda/envs/dexorder
# Copy application code # Copy application code
COPY dexorder/ /app/dexorder/ COPY dexorder/ /app/dexorder/
COPY main.py /app/ COPY main.py /app/
COPY environment.yml /app/
# Copy generated protobuf code from builder # Copy generated protobuf code from builder
COPY --from=builder /build/dexorder/generated/ /app/dexorder/generated/ COPY --from=builder /build/dexorder/generated/ /app/dexorder/generated/

View File

@@ -1156,15 +1156,23 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
meta_parts.append(f"error: {result['error']}") meta_parts.append(f"error: {result['error']}")
if result.get("revision"): if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}") meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"): if result.get("validation"):
val = result["validation"] val = result["validation"]
if not val.get("success"):
error_detail = val.get('error') or '' error_detail = val.get('error') or ''
if val.get('output'): if val.get('output'):
error_detail = f"{error_detail}\n{val['output']}" if error_detail else val['output'] error_detail = f"{error_detail}\n{val['output']}" if error_detail else val['output']
meta_parts.append(f"validation error: {error_detail.strip()}") meta_parts.append(f"validation error: {error_detail.strip()}")
elif val.get("output"):
# Always show output — may contain ⚠ WARNING for all-NaN / all-zero results
meta_parts.append(f"validation output: {val['output']}")
content.append(TextContent(type="text", text="\n".join(meta_parts))) content.append(TextContent(type="text", text="\n".join(meta_parts)))
if result.get("execution"): if result.get("execution"):
exec_content = result["execution"].get("content", []) exec_result = result["execution"]
exec_content = exec_result.get("content", [])
if not exec_content and exec_result.get("output"):
# _execute_indicator returns plain {"output": str}, not MCP {"content": [...]}
exec_content = [TextContent(type="text", text=exec_result["output"])]
content.extend(exec_content) content.extend(exec_content)
image_count = sum(1 for item in exec_content if item.type == "image") image_count = sum(1 for item in exec_content if item.type == "image")
logging.info(f"PythonWrite '{arguments.get('name')}': returning {len(content)} items, {image_count} images") logging.info(f"PythonWrite '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
@@ -1208,15 +1216,23 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
meta_parts.append(f"error: {result['error']}") meta_parts.append(f"error: {result['error']}")
if result.get("revision"): if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}") meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"): if result.get("validation"):
val = result["validation"] val = result["validation"]
if not val.get("success"):
error_detail = val.get('error') or '' error_detail = val.get('error') or ''
if val.get('output'): if val.get('output'):
error_detail = f"{error_detail}\n{val['output']}" if error_detail else val['output'] error_detail = f"{error_detail}\n{val['output']}" if error_detail else val['output']
meta_parts.append(f"validation error: {error_detail.strip()}") meta_parts.append(f"validation error: {error_detail.strip()}")
elif val.get("output"):
# Always show output — may contain ⚠ WARNING for all-NaN / all-zero results
meta_parts.append(f"validation output: {val['output']}")
content.append(TextContent(type="text", text="\n".join(meta_parts))) content.append(TextContent(type="text", text="\n".join(meta_parts)))
if result.get("execution"): if result.get("execution"):
exec_content = result["execution"].get("content", []) exec_result = result["execution"]
exec_content = exec_result.get("content", [])
if not exec_content and exec_result.get("output"):
# _execute_indicator returns plain {"output": str}, not MCP {"content": [...]}
exec_content = [TextContent(type="text", text=exec_result["output"])]
content.extend(exec_content) content.extend(exec_content)
image_count = sum(1 for item in exec_content if item.type == "image") image_count = sum(1 for item in exec_content if item.type == "image")
logging.info(f"PythonEdit '{arguments.get('name')}': returning {len(content)} items, {image_count} images") logging.info(f"PythonEdit '{arguments.get('name')}': returning {len(content)} items, {image_count} images")

View File

@@ -1,9 +1,13 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref } from 'vue' import { ref } from 'vue'
import Dialog from 'primevue/dialog'
import Button from 'primevue/button'
import DetailsEditDialog from './DetailsEditDialog.vue' import DetailsEditDialog from './DetailsEditDialog.vue'
import ResearchViewDialog from './ResearchViewDialog.vue' import ResearchViewDialog from './ResearchViewDialog.vue'
import { useIndicatorStore } from '../stores/indicators' import { useIndicatorStore } from '../stores/indicators'
import { useIndicatorTypesStore } from '../stores/indicatorTypes' import { useIndicatorTypesStore } from '../stores/indicatorTypes'
import { useStrategyTypesStore } from '../stores/strategyTypes'
import { useResearchTypesStore } from '../stores/researchTypes'
const props = defineProps<{ const props = defineProps<{
category: 'indicator' | 'strategy' | 'research' category: 'indicator' | 'strategy' | 'research'
@@ -16,8 +20,13 @@ const editingName = ref('')
const viewDialogVisible = ref(false) const viewDialogVisible = ref(false)
const viewingName = ref('') const viewingName = ref('')
const deleteDialogVisible = ref(false)
const deletingRow = ref<{ id: string; display_name: string } | null>(null)
const indicatorStore = useIndicatorStore() const indicatorStore = useIndicatorStore()
const indicatorTypesStore = useIndicatorTypesStore() const indicatorTypesStore = useIndicatorTypesStore()
const strategyTypesStore = useStrategyTypesStore()
const researchTypesStore = useResearchTypesStore()
function openEdit(name: string) { function openEdit(name: string) {
editingName.value = name editingName.value = name
@@ -50,6 +59,25 @@ function addToChart(pandasTaName: string, displayName: string) {
}) })
} }
function requestDelete(row: { id: string; display_name: string }) {
deletingRow.value = row
deleteDialogVisible.value = true
}
function confirmDelete() {
if (!deletingRow.value) return
const id = deletingRow.value.id
if (props.category === 'indicator') {
indicatorTypesStore.removeType(id)
} else if (props.category === 'strategy') {
strategyTypesStore.removeType(id)
} else {
researchTypesStore.removeType(id)
}
deleteDialogVisible.value = false
deletingRow.value = null
}
function onUpdated(_payload: { category: string; name: string; success: boolean; error?: string }) { function onUpdated(_payload: { category: string; name: string; success: boolean; error?: string }) {
} }
</script> </script>
@@ -63,6 +91,7 @@ function onUpdated(_payload: { category: string; name: string; success: boolean;
<button class="edit-btn" @click="openEdit(row.display_name)">Spec</button> <button class="edit-btn" @click="openEdit(row.display_name)">Spec</button>
<button v-if="category === 'research'" class="view-btn" @click="openView(row.display_name)">Result</button> <button v-if="category === 'research'" class="view-btn" @click="openView(row.display_name)">Result</button>
<button v-if="category === 'indicator'" class="use-btn" @click="addToChart(row.id, row.display_name)">Use</button> <button v-if="category === 'indicator'" class="use-btn" @click="addToChart(row.id, row.display_name)">Use</button>
<button class="delete-btn" title="Delete" @click.stop="requestDelete(row)"><i class="pi pi-trash" /></button>
</div> </div>
</div> </div>
@@ -79,6 +108,21 @@ function onUpdated(_payload: { category: string; name: string; success: boolean;
:name="editingName" :name="editingName"
@updated="onUpdated" @updated="onUpdated"
/> />
<Dialog
v-model:visible="deleteDialogVisible"
:modal="true"
:closable="true"
:header="`Delete ${category}?`"
:style="{ width: '360px' }"
class="delete-confirm-dialog"
>
<p class="delete-msg">Delete <strong>{{ deletingRow?.display_name }}</strong>? This cannot be undone.</p>
<template #footer>
<Button label="Cancel" size="small" outlined @click="deleteDialogVisible = false" />
<Button label="Delete" size="small" severity="danger" @click="confirmDelete" />
</template>
</Dialog>
</template> </template>
<style scoped> <style scoped>
@@ -181,4 +225,33 @@ function onUpdated(_payload: { category: string; name: string; success: boolean;
border-color: #4a9eca; border-color: #4a9eca;
color: #4a9eca; color: #4a9eca;
} }
.delete-btn {
flex-shrink: 0;
background: none;
border: none;
color: #555;
cursor: pointer;
font-size: 12px;
padding: 2px 4px;
border-radius: 3px;
line-height: 1;
display: flex;
align-items: center;
}
.delete-btn:hover {
color: #e06c6c;
}
.delete-msg {
color: #dbdbdb;
font-size: 13px;
margin: 0;
padding: 4px 0 8px;
}
.delete-msg strong {
color: #fff;
}
</style> </style>

View File

@@ -41,6 +41,12 @@ const messages = ref<any[]>([])
const messagesLoaded = ref(false) const messagesLoaded = ref(false)
const isConnected = wsManager.isConnected const isConnected = wsManager.isConnected
// Model tag state
const availableModelTags = ref<string[]>([])
const currentModelTag = ref<string | null>(null)
// Content typed after @Tag, held until model_switched arrives to re-insert as first user message
let pendingModelSwitchContent = ''
// Reactive rooms that update based on WebSocket connection and agent processing state // Reactive rooms that update based on WebSocket connection and agent processing state
const rooms = computed(() => [{ const rooms = computed(() => [{
roomId: SESSION_ID, roomId: SESSION_ID,
@@ -48,7 +54,8 @@ const rooms = computed(() => [{
avatar: null, avatar: null,
users: [ users: [
{ _id: CURRENT_USER_ID, username: 'You' }, { _id: CURRENT_USER_ID, username: 'You' },
{ _id: AGENT_ID, username: 'AI Agent' } { _id: AGENT_ID, username: 'AI Agent' },
...availableModelTags.value.map(tag => ({ _id: `model-${tag.toLowerCase().replace(/[^a-z0-9]/g, '-')}`, username: tag })),
], ],
unreadCount: 0, unreadCount: 0,
typingUsers: isAgentProcessing.value ? [AGENT_ID] : [] typingUsers: isAgentProcessing.value ? [AGENT_ID] : []
@@ -153,6 +160,63 @@ const streamingImages = ref<any[]>([])
const handleMessage = (data: WebSocketMessage) => { const handleMessage = (data: WebSocketMessage) => {
console.log('[ChatPanel] Received message:', data) console.log('[ChatPanel] Received message:', data)
if (data.type === 'connected') {
if (Array.isArray(data.modelTags)) {
availableModelTags.value = data.modelTags
}
return
}
if (data.type === 'model_switched') {
// Reset all streaming state from previous conversation
currentStreamingMessageId = null
toolCallMessageId = null
lastSentMessageId = null
streamingBuffer = ''
streamingImages.value = []
subagentContentMap = new Map()
currentModelTag.value = data.tag ?? null
const timestamp = new Date().toTimeString().split(' ')[0].slice(0, 5)
const date = new Date().toLocaleDateString()
// System notice at the top of the new conversation
const systemMsg = {
_id: generateMessageId(),
content: `Switched to ${data.tag} — conversation history cleared`,
system: true,
timestamp,
date,
}
const newMessages: any[] = [systemMsg]
// Re-add the user's first message (content after the @Tag)
const userContent: string = pendingModelSwitchContent || data.rest || ''
if (userContent) {
const userMsgId = generateMessageId()
lastSentMessageId = userMsgId
newMessages.push({
_id: userMsgId,
content: userContent,
senderId: CURRENT_USER_ID,
timestamp,
date,
saved: true,
distributed: true,
seen: false,
files: [],
})
}
messages.value = newMessages
pendingModelSwitchContent = ''
// Show thinking bubble now that the message list is set up
addToolCallBubble('Thinking...')
return
}
if (data.type === 'conversation_history') { if (data.type === 'conversation_history') {
messages.value = (data.messages as any[]).map((m: any) => { messages.value = (data.messages as any[]).map((m: any) => {
const ts = new Date(m.timestamp / 1000) // microseconds → ms const ts = new Date(m.timestamp / 1000) // microseconds → ms
@@ -189,7 +253,7 @@ const handleMessage = (data: WebSocketMessage) => {
return return
} }
if (data.type === 'subagent_chunk') { if (data.type === 'subagent_chunk' || data.type === 'subagent_thinking') {
appendSubagentChunk(data.agentName, data.content) appendSubagentChunk(data.agentName, data.content)
return return
} }
@@ -363,7 +427,17 @@ const sendMessage = async (event: any) => {
// Extract data from CustomEvent.detail[0] // Extract data from CustomEvent.detail[0]
const data = event.detail?.[0] || event const data = event.detail?.[0] || event
const content = data.content || '' // Convert any model @-tags from vue-advanced-chat's <usertag> encoding back to @TagName
const rawContent: string = data.content || ''
const usersTag: any[] = data.usersTag || []
let content = rawContent
let modelSwitchUser: any = null
for (const user of usersTag) {
if (typeof user._id === 'string' && user._id.startsWith('model-')) {
content = content.replace(`<usertag>${user._id}</usertag>`, `@${user.username}`)
if (!modelSwitchUser) modelSwitchUser = user
}
}
const files = data.files const files = data.files
const roomId = data.roomId const roomId = data.roomId
@@ -411,7 +485,24 @@ const sendMessage = async (event: any) => {
} }
} }
// Add user message to UI // Send to backend via WebSocket
const wsMessage = {
type: 'agent_user_message',
session_id: roomId || SESSION_ID,
content: content,
attachments: attachments
}
if (modelSwitchUser) {
// Model switch: store content-after-tag for model_switched handler to re-insert.
// Don't add a user message bubble now — model_switched will set up the full initial state.
pendingModelSwitchContent = content.replace(`@${modelSwitchUser.username}`, '').trim()
wsManager.send(wsMessage)
isAgentProcessing.value = true
return
}
// Normal message: add to UI immediately
const userMessage = { const userMessage = {
_id: messageId, _id: messageId,
content: content, content: content,
@@ -425,14 +516,6 @@ const sendMessage = async (event: any) => {
} }
messages.value = [...messages.value, userMessage] messages.value = [...messages.value, userMessage]
// Send to backend via WebSocket
const wsMessage = {
type: 'agent_user_message',
session_id: roomId || SESSION_ID,
content: content,
attachments: attachments
}
wsManager.send(wsMessage) wsManager.send(wsMessage)
// Track this message so the agent_chunk handler can mark it seen // Track this message so the agent_chunk handler can mark it seen
@@ -692,8 +775,12 @@ onUnmounted(() => {
<span class="workspace-loading-message">{{ channelStore.statusMessage || 'Connecting...' }}</span> <span class="workspace-loading-message">{{ channelStore.statusMessage || 'Connecting...' }}</span>
</div> </div>
<template v-else>
<div v-if="currentModelTag" class="model-tag-badge">
Model: {{ currentModelTag }}
</div>
<vue-advanced-chat <vue-advanced-chat
v-else
:height="chatHeight" :height="chatHeight"
:current-user-id="CURRENT_USER_ID" :current-user-id="CURRENT_USER_ID"
:rooms="JSON.stringify(rooms)" :rooms="JSON.stringify(rooms)"
@@ -751,6 +838,7 @@ onUnmounted(() => {
</svg> </svg>
</div> </div>
</vue-advanced-chat> </vue-advanced-chat>
</template>
</div> </div>
</template> </template>
@@ -765,6 +853,20 @@ onUnmounted(() => {
position: relative; position: relative;
} }
.model-tag-badge {
position: absolute;
top: 6px;
right: 12px;
z-index: 10;
font-size: 0.7rem;
color: #089981;
background: rgba(8, 153, 129, 0.12);
border: 1px solid rgba(8, 153, 129, 0.3);
border-radius: 4px;
padding: 2px 7px;
pointer-events: none;
}
.workspace-loading { .workspace-loading {
position: fixed; position: fixed;
inset: 0; inset: 0;

View File

@@ -13,5 +13,6 @@ export interface CustomIndicatorType {
export const useIndicatorTypesStore = defineStore('indicator_types', () => { export const useIndicatorTypesStore = defineStore('indicator_types', () => {
const types = ref<Record<string, CustomIndicatorType>>({}) const types = ref<Record<string, CustomIndicatorType>>({})
return { types } const removeType = (id: string) => { delete types.value[id] }
return { types, removeType }
}) })

View File

@@ -10,5 +10,6 @@ export interface ResearchType {
export const useResearchTypesStore = defineStore('research_types', () => { export const useResearchTypesStore = defineStore('research_types', () => {
const types = ref<Record<string, ResearchType>>({}) const types = ref<Record<string, ResearchType>>({})
return { types } const removeType = (id: string) => { delete types.value[id] }
return { types, removeType }
}) })

View File

@@ -10,5 +10,6 @@ export interface StrategyType {
export const useStrategyTypesStore = defineStore('strategy_types', () => { export const useStrategyTypesStore = defineStore('strategy_types', () => {
const types = ref<Record<string, StrategyType>>({}) const types = ref<Record<string, StrategyType>>({})
return { types } const removeType = (id: string) => { delete types.value[id] }
return { types, removeType }
}) })