bugfixes; research subproc; higher sandbox limits
This commit is contained in:
37
bin/init
37
bin/init
@@ -204,32 +204,17 @@ if [ -z "$USER_ID" ]; then
|
|||||||
fi
|
fi
|
||||||
echo -e "${GREEN}User ID: $USER_ID${NC}"
|
echo -e "${GREEN}User ID: $USER_ID${NC}"
|
||||||
|
|
||||||
# Build license JSON based on type
|
echo -e "${GREEN}→${NC} Setting $LICENSE_TYPE license..."
|
||||||
case "$LICENSE_TYPE" in
|
HTTP_CODE=$(curl -s -o /tmp/dexorder-set-tier-response.json -w "%{http_code}" \
|
||||||
enterprise)
|
-X POST "$BASE_URL/api/admin/users/$USER_ID/set-tier" \
|
||||||
LICENSE_JSON='{"licenseType":"enterprise","features":{"maxIndicators":200,"maxStrategies":100,"maxBacktestDays":1825,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":20,"maxMessagesPerDay":10000,"maxTokensPerMessage":32768,"rateLimitPerMinute":300},"k8sResources":{"memoryRequest":"1Gi","memoryLimit":"4Gi","cpuRequest":"500m","cpuLimit":"4000m","storage":"50Gi","tmpSizeLimit":"1Gi","enableIdleShutdown":true,"idleTimeoutMinutes":120},"preferredModel":{"provider":"anthropic","model":"claude-opus-4-6","temperature":0.7}}'
|
-H "Content-Type: application/json" \
|
||||||
;;
|
-d "{\"tier\": \"$LICENSE_TYPE\"}")
|
||||||
free)
|
if [[ "$HTTP_CODE" != "200" ]]; then
|
||||||
LICENSE_JSON='{"licenseType":"free","features":{"maxIndicators":10,"maxStrategies":3,"maxBacktestDays":30,"realtimeData":false,"customExecutors":false,"apiAccess":false},"resourceLimits":{"maxConcurrentSessions":1,"maxMessagesPerDay":100,"maxTokensPerMessage":4096,"rateLimitPerMinute":20},"k8sResources":{"memoryRequest":"256Mi","memoryLimit":"512Mi","cpuRequest":"100m","cpuLimit":"500m","storage":"2Gi","tmpSizeLimit":"128Mi","enableIdleShutdown":true,"idleTimeoutMinutes":30},"preferredModel":{"provider":"anthropic","model":"claude-haiku-4-5-20251001","temperature":0.7}}'
|
echo -e "${RED}✗ Failed to set license tier (HTTP $HTTP_CODE)${NC}"
|
||||||
;;
|
cat /tmp/dexorder-set-tier-response.json 2>/dev/null
|
||||||
pro|*)
|
exit 1
|
||||||
LICENSE_JSON='{"licenseType":"pro","features":{"maxIndicators":50,"maxStrategies":20,"maxBacktestDays":365,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":5,"maxMessagesPerDay":1000,"maxTokensPerMessage":8192,"rateLimitPerMinute":60},"k8sResources":{"memoryRequest":"512Mi","memoryLimit":"2Gi","cpuRequest":"250m","cpuLimit":"2000m","storage":"10Gi","tmpSizeLimit":"256Mi","enableIdleShutdown":true,"idleTimeoutMinutes":60},"preferredModel":{"provider":"anthropic","model":"claude-sonnet-4-6","temperature":0.7}}'
|
fi
|
||||||
;;
|
rm -f /tmp/dexorder-set-tier-response.json
|
||||||
esac
|
|
||||||
|
|
||||||
echo -e "${GREEN}→${NC} Creating $LICENSE_TYPE license..."
|
|
||||||
$KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -c "
|
|
||||||
INSERT INTO user_licenses (user_id, email, license, mcp_server_url)
|
|
||||||
VALUES (
|
|
||||||
'$USER_ID',
|
|
||||||
'$USER_EMAIL',
|
|
||||||
'$LICENSE_JSON',
|
|
||||||
'$MCP_URL'
|
|
||||||
)
|
|
||||||
ON CONFLICT (user_id) DO UPDATE SET
|
|
||||||
license = EXCLUDED.license,
|
|
||||||
updated_at = NOW();
|
|
||||||
" > /dev/null
|
|
||||||
|
|
||||||
echo -e "${GREEN}✓ User ready: $USER_EMAIL ($LICENSE_TYPE)${NC}"
|
echo -e "${GREEN}✓ User ready: $USER_EMAIL ($LICENSE_TYPE)${NC}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# RBAC for gateway to CREATE sandbox deployments only
|
# RBAC for gateway to manage sandbox deployments
|
||||||
# Principle of least privilege: gateway can ONLY create deployments/services/PVCs
|
# Principle of least privilege: gateway can create/delete deployments in the
|
||||||
# in the sandbox namespace. Deletion is handled by the lifecycle sidecar.
|
# sandbox namespace. PVC deletion is still handled by the lifecycle sidecar.
|
||||||
# No pods, secrets, exec, or cross-namespace access.
|
# No pods, secrets, exec, or cross-namespace access.
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -15,10 +15,10 @@ metadata:
|
|||||||
name: sandbox-creator
|
name: sandbox-creator
|
||||||
namespace: sandbox
|
namespace: sandbox
|
||||||
rules:
|
rules:
|
||||||
# Deployments: create and read only (deletion handled by sidecar)
|
# Deployments: full management (delete used for license tier changes; PVC deletion still via sidecar)
|
||||||
- apiGroups: ["apps"]
|
- apiGroups: ["apps"]
|
||||||
resources: ["deployments"]
|
resources: ["deployments"]
|
||||||
verbs: ["create", "get", "list", "watch", "patch", "update"]
|
verbs: ["create", "get", "list", "watch", "patch", "update", "delete"]
|
||||||
|
|
||||||
# PVCs: create and read (deletion handled by sidecar)
|
# PVCs: create and read (deletion handled by sidecar)
|
||||||
- apiGroups: [""]
|
- apiGroups: [""]
|
||||||
@@ -41,7 +41,6 @@ rules:
|
|||||||
verbs: ["get"]
|
verbs: ["get"]
|
||||||
|
|
||||||
# Explicitly NOT included:
|
# Explicitly NOT included:
|
||||||
# - deployments/delete - handled by lifecycle sidecar
|
|
||||||
# - pvc/delete - handled by lifecycle sidecar
|
# - pvc/delete - handled by lifecycle sidecar
|
||||||
# - services/delete - handled by lifecycle sidecar
|
# - services/delete - handled by lifecycle sidecar
|
||||||
# - pods (create/delete) - must go through deployments
|
# - pods (create/delete) - must go through deployments
|
||||||
|
|||||||
@@ -83,10 +83,10 @@ spec:
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "512Mi"
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
limits:
|
limits:
|
||||||
memory: "512Mi"
|
memory: "2Gi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
|
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ spec:
|
|||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
# Maximum any single container can request
|
# Maximum any single container can request
|
||||||
max:
|
max:
|
||||||
memory: "2Gi"
|
memory: "8Gi"
|
||||||
cpu: "2000m"
|
cpu: "4000m"
|
||||||
min:
|
min:
|
||||||
memory: "32Mi"
|
memory: "32Mi"
|
||||||
cpu: "10m"
|
cpu: "10m"
|
||||||
|
|||||||
@@ -4,18 +4,32 @@
|
|||||||
flink_hostname: flink-jobmanager
|
flink_hostname: flink-jobmanager
|
||||||
ingestor_broker_port: 5567
|
ingestor_broker_port: 5567
|
||||||
|
|
||||||
# Supported exchanges (subscribe to these prefixes)
|
# Supported exchanges (used for symbol metadata generation)
|
||||||
supported_exchanges:
|
supported_exchanges:
|
||||||
- BINANCE
|
- BINANCE
|
||||||
- COINBASE
|
- COINBASE
|
||||||
- KRAKEN
|
- KRAKEN
|
||||||
|
|
||||||
|
# Per-exchange work slot capacity.
|
||||||
|
# Each slot is one concurrent job. historical_slots limits parallel OHLC fetches;
|
||||||
|
# realtime_slots limits concurrent tick subscriptions. Set based on exchange rate
|
||||||
|
# limits and connection constraints — these are conservative starting values.
|
||||||
|
exchange_capacity:
|
||||||
|
BINANCE:
|
||||||
|
historical_slots: 1
|
||||||
|
realtime_slots: 5
|
||||||
|
COINBASE:
|
||||||
|
historical_slots: 1
|
||||||
|
realtime_slots: 4
|
||||||
|
KRAKEN:
|
||||||
|
historical_slots: 1
|
||||||
|
realtime_slots: 3
|
||||||
|
|
||||||
# Kafka configuration
|
# Kafka configuration
|
||||||
kafka_brokers:
|
kafka_brokers:
|
||||||
- kafka:9092
|
- kafka:9092
|
||||||
|
|
||||||
# Worker configuration
|
# Worker configuration
|
||||||
max_concurrent: 10
|
|
||||||
poll_interval_ms: 10000
|
poll_interval_ms: 10000
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
|
|||||||
@@ -46,6 +46,11 @@ data:
|
|||||||
alerts:
|
alerts:
|
||||||
max_active: 100
|
max_active: 100
|
||||||
|
|
||||||
|
# Memory guard: soft RLIMIT_AS limit as a fraction of the cgroup memory.max.
|
||||||
|
# Set below 1.0 so Python raises MemoryError before the kernel OOM-kills the pod.
|
||||||
|
memory:
|
||||||
|
limit_fraction: 0.85
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
logging:
|
logging:
|
||||||
level: "INFO"
|
level: "INFO"
|
||||||
|
|||||||
10
doc/plan.md
Normal file
10
doc/plan.md
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Development Plan
|
||||||
|
|
||||||
|
* Realtime data
|
||||||
|
* Triggers
|
||||||
|
* Strategy UI
|
||||||
|
* Backtesting TV integration
|
||||||
|
* Paper Trading
|
||||||
|
* User secrets
|
||||||
|
* Live Execution
|
||||||
|
* Sandbox <=> Dexorder auth
|
||||||
139
doc/prod_deployment.md
Normal file
139
doc/prod_deployment.md
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
# Production Deployment Guide
|
||||||
|
|
||||||
|
This document describes the full process for deploying the AI platform to the production Kubernetes cluster, including the special steps required when the Iceberg schema has changed.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The production cluster runs under `kubectl --context prod`, defaulting to the `ai` namespace. The `sandbox` namespace is shared between dev and prod.
|
||||||
|
|
||||||
|
Deployment consists of two parts:
|
||||||
|
|
||||||
|
1. **Standard deploy** — rebuild and push all images, apply k8s manifests, roll out services
|
||||||
|
2. **Iceberg schema wipe** *(when schema has changed)* — clear both the Iceberg REST catalog (postgres) and the MinIO data warehouse before deploying
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Standard Deployment (no schema changes)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/deploy-all --sandboxes
|
||||||
|
```
|
||||||
|
|
||||||
|
This script (hardcoded to `--context=prod`) performs:
|
||||||
|
|
||||||
|
1. Applies base kustomize manifests (`deploy/k8s/prod/`) — namespaces, RBAC, policies
|
||||||
|
2. Applies `deploy/k8s/prod/infrastructure.yaml` — statefulsets, deployments
|
||||||
|
3. Runs `bin/config-update prod` — updates ConfigMaps
|
||||||
|
4. Builds and pushes images for all 7 services: `gateway`, `web`, `sandbox`, `lifecycle-sidecar`, `flink`, `relay`, `ingestor`
|
||||||
|
5. *(with `--sandboxes`)* Deletes sandbox Deployments and Services in the `sandbox` namespace (PVCs are retained; gateway recreates them on next login)
|
||||||
|
6. Waits for rollouts on all 6 main deployments
|
||||||
|
|
||||||
|
> **Secrets are NOT updated by this script.** Run `bin/secret-update prod` separately if secrets have changed.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Full Deploy with Iceberg Schema Wipe
|
||||||
|
|
||||||
|
Use this when the Iceberg table schema has changed (e.g. protobuf/column changes in the `trading.ohlc` table).
|
||||||
|
|
||||||
|
### Architecture note
|
||||||
|
|
||||||
|
The Iceberg REST catalog uses **two storage layers** that must both be cleared:
|
||||||
|
|
||||||
|
| Layer | What it stores | How to clear |
|
||||||
|
|---|---|---|
|
||||||
|
| PostgreSQL `iceberg` database | Table/namespace metadata (catalog) | Drop and recreate the database |
|
||||||
|
| MinIO `warehouse` bucket | Parquet data files | `mc rm --recursive --force` |
|
||||||
|
|
||||||
|
**Important:** The gateway also uses the `iceberg` postgres database for its own auth tables (`user`, `user_licenses`, `session`, etc.). Wiping the database removes all user accounts. After the wipe, the schema must be re-applied and users recreated.
|
||||||
|
|
||||||
|
### Step-by-step
|
||||||
|
|
||||||
|
#### 1. Scale down Iceberg consumers
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai scale deployment iceberg-catalog flink-jobmanager flink-taskmanager --replicas=0
|
||||||
|
```
|
||||||
|
|
||||||
|
This prevents in-flight writes during the wipe.
|
||||||
|
|
||||||
|
#### 2. Wipe the Iceberg PostgreSQL catalog
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai exec postgres-0 -- psql -U postgres -c "DROP DATABASE iceberg;"
|
||||||
|
kubectl --context prod -n ai exec postgres-0 -- psql -U postgres -c "CREATE DATABASE iceberg;"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Wipe the MinIO warehouse bucket
|
||||||
|
|
||||||
|
Get MinIO credentials from the cluster secret:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai get secret minio-secret -o jsonpath='{.data.root-user}' | base64 -d
|
||||||
|
kubectl --context prod -n ai get secret minio-secret -o jsonpath='{.data.root-password}' | base64 -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Configure the `mc` client inside the MinIO pod and remove all objects:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai exec minio-0 -- mc alias set local http://localhost:9000 <user> <password>
|
||||||
|
kubectl --context prod -n ai exec minio-0 -- mc rm --recursive --force local/warehouse/
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Run the full deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/deploy-all --sandboxes
|
||||||
|
```
|
||||||
|
|
||||||
|
This rebuilds and redeploys all services, including `iceberg-catalog`, `flink-jobmanager`, and `flink-taskmanager` (which were scaled to zero above — `deploy-all` will restore them to their manifest replica counts).
|
||||||
|
|
||||||
|
#### 5. Re-apply the gateway database schema
|
||||||
|
|
||||||
|
The gateway does **not** auto-migrate. After the `iceberg` database is recreated, the schema must be applied manually:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai exec -i postgres-0 -- psql -U postgres -d iceberg < gateway/schema.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates the `user`, `session`, `user_licenses`, and related tables.
|
||||||
|
|
||||||
|
#### 6. Recreate all users
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/create-all-users prod
|
||||||
|
```
|
||||||
|
|
||||||
|
This registers all alpha test users via the gateway API and assigns their licenses. Users are defined in the script itself (`bin/create-all-users`).
|
||||||
|
|
||||||
|
To add or modify users, edit that file or run `bin/create-user prod` interactively.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -I https://dexorder.ai/api/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Check gateway logs for errors:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl --context prod -n ai logs deployment/gateway --tail=100
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
### Login fails after Iceberg wipe
|
||||||
|
|
||||||
|
**Symptom:** `Sign in failed` (401) or `User creation failed` (postgres error `42P01: undefined table`)
|
||||||
|
|
||||||
|
**Cause:** Dropping the `iceberg` database removes the gateway's auth tables along with the Iceberg catalog metadata — they share the same database.
|
||||||
|
|
||||||
|
**Fix:** Re-apply the schema and recreate users (steps 5 and 6 above).
|
||||||
|
|
||||||
|
### Gateway shows `42P01` errors but pod is running
|
||||||
|
|
||||||
|
The gateway does not auto-migrate on startup. The schema file must be applied manually after any database recreation. A gateway restart alone will not fix this.
|
||||||
@@ -81,18 +81,29 @@ All sockets bind on **Relay** (well-known endpoint). Components connect to relay
|
|||||||
- Relay publishes DataRequest to ingestor work queue
|
- Relay publishes DataRequest to ingestor work queue
|
||||||
- No request tracking - relay is stateless
|
- No request tracking - relay is stateless
|
||||||
|
|
||||||
### 2. Ingestor Work Queue (Relay → Ingestors)
|
### 2. Ingestor Work Queue (Flink ↔ Ingestors)
|
||||||
**Pattern**: PUB/SUB with exchange prefix filtering
|
**Pattern**: ROUTER/DEALER slot-based broker
|
||||||
- **Socket Type**: Relay uses PUB (bind), Ingestors use SUB (connect)
|
- **Socket Type**: Flink `IngestorBroker` uses ROUTER (bind), Ingestors use DEALER (connect)
|
||||||
- **Endpoint**: `tcp://*:5555` (Relay binds)
|
- **Endpoint**: `tcp://*:5567` (Flink binds)
|
||||||
- **Message Types**: `DataRequest` (historical or realtime)
|
- **Message Types**: `WorkerReady` (slot offer), `DataRequest` (work assignment), `WorkComplete`, `WorkHeartbeat`, `WorkReject`, `WorkStop`
|
||||||
- **Topic Prefix**: Market name (e.g., `BTC/USDT.`, `ETH/BTC.`)
|
- **Capacity model**:
|
||||||
- **Behavior**:
|
- Each `WorkerReady` (0x20) is ONE slot offer for one exchange and one job type (`SlotType`: `HISTORICAL=1`, `REALTIME=2`, `ANY=0`)
|
||||||
- Relay publishes work with exchange prefix from ticker
|
- Ingestors send N `WorkerReady` messages at startup — one per available slot per exchange per type
|
||||||
- Ingestors subscribe only to exchanges they support
|
- Flink dispatches a job by matching the slot's exchange and SlotType to the request
|
||||||
- Multiple ingestors can compete for same exchange
|
- The slot is consumed on dispatch; the ingestor re-offers it (new `WorkerReady`) when the job ends
|
||||||
- Ingestors write data to Kafka only (no direct response)
|
- Rate-limit backoff: if the exchange returns a 429, the ingestor delays the re-offer by the `Retry-After` duration from the response header
|
||||||
- Flink processes Kafka → Iceberg → notification
|
- **Historical job lifecycle**:
|
||||||
|
- Flink dispatches `DataRequest` (HISTORICAL_OHLC) → ingestor fetches and writes to Kafka → sends `WorkComplete` (0x21) → sends new `WorkerReady` for that slot
|
||||||
|
- **Realtime job lifecycle**:
|
||||||
|
- Flink dispatches `DataRequest` (REALTIME_TICKS) → ingestor polls exchange and writes ticks to Kafka → sends `WorkHeartbeat` (0x22) every 5 s → on `WorkStop` (0x25) from Flink: cancels and sends new `WorkerReady`
|
||||||
|
- **Slot configuration** (per ingestor, per exchange):
|
||||||
|
```yaml
|
||||||
|
exchange_capacity:
|
||||||
|
BINANCE: { historical_slots: 3, realtime_slots: 5 }
|
||||||
|
KRAKEN: { historical_slots: 2, realtime_slots: 3 }
|
||||||
|
COINBASE: { historical_slots: 2, realtime_slots: 4 }
|
||||||
|
```
|
||||||
|
- **Flink restart**: when Flink restarts its `freeSlots` deque is cleared; all in-flight jobs time out on the ingestor side, releasing their slots, which then re-offer via `WorkerReady`
|
||||||
|
|
||||||
### 3. Market Data Fanout (Relay ↔ Flink ↔ Clients)
|
### 3. Market Data Fanout (Relay ↔ Flink ↔ Clients)
|
||||||
**Pattern**: XPUB/XSUB proxy
|
**Pattern**: XPUB/XSUB proxy
|
||||||
|
|||||||
@@ -1,4 +1 @@
|
|||||||
what conclusions can you make by analyzing historical data on ETH price direction changes near market session overlaps and market sessions changes on monday and tuesday?
|
what conclusions can you make by analyzing historical data on ETH price direction changes near market session overlaps and market sessions changes on monday and tuesday?
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.dexorder.flink.ingestor;
|
|||||||
import com.dexorder.flink.zmq.ZmqChannelManager;
|
import com.dexorder.flink.zmq.ZmqChannelManager;
|
||||||
import com.dexorder.proto.DataRequest;
|
import com.dexorder.proto.DataRequest;
|
||||||
import com.dexorder.proto.RealtimeParams;
|
import com.dexorder.proto.RealtimeParams;
|
||||||
|
import com.dexorder.proto.SlotType;
|
||||||
import com.dexorder.proto.SubmitHistoricalRequest;
|
import com.dexorder.proto.SubmitHistoricalRequest;
|
||||||
import com.dexorder.proto.WorkComplete;
|
import com.dexorder.proto.WorkComplete;
|
||||||
import com.dexorder.proto.WorkHeartbeat;
|
import com.dexorder.proto.WorkHeartbeat;
|
||||||
@@ -17,27 +18,27 @@ import java.util.ArrayDeque;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Deque;
|
import java.util.Deque;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LRU-style work broker for ingestors.
|
* Slot-based work broker for ingestors.
|
||||||
*
|
*
|
||||||
* Ingestors connect via DEALER to the ROUTER socket on port 5567. They register with READY,
|
* Each WorkerReady message from an ingestor represents ONE available slot for a
|
||||||
* are dispatched WORK messages, and respond with COMPLETE (historical) or HEARTBEAT (realtime).
|
* specific exchange and job type (HISTORICAL or REALTIME). Flink consumes the slot
|
||||||
* If a heartbeat times out the job is re-queued and dispatched to another available worker.
|
* by dispatching a DataRequest to it. The ingestor re-offers the slot (sends another
|
||||||
|
* WorkerReady) once the job completes, subject to any rate-limit backoff.
|
||||||
*
|
*
|
||||||
* Also receives SubmitHistoricalRequest messages forwarded by the relay on the PULL socket (5566).
|
* Also receives SubmitHistoricalRequest messages forwarded by the relay on the PULL
|
||||||
|
* socket (5566), and realtime job requests from RealtimeSubscriptionManager.
|
||||||
*
|
*
|
||||||
* Message type IDs (ZMQ framing, not Kafka):
|
* Message type IDs (ZMQ framing):
|
||||||
* 0x10 SubmitHistoricalRequest (relay → Flink via PULL, same as client wire type)
|
* 0x10 SubmitHistoricalRequest (relay → Flink via PULL)
|
||||||
* 0x20 WorkerReady (ingestor → Flink)
|
* 0x20 WorkerReady (ingestor → Flink: one slot offer)
|
||||||
* 0x21 WorkComplete (ingestor → Flink)
|
* 0x21 WorkComplete (ingestor → Flink)
|
||||||
* 0x22 WorkHeartbeat (ingestor → Flink)
|
* 0x22 WorkHeartbeat (ingestor → Flink)
|
||||||
* 0x23 WorkReject (ingestor → Flink)
|
* 0x23 WorkReject (ingestor → Flink)
|
||||||
@@ -53,7 +54,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
private static final byte MSG_TYPE_WORK_COMPLETE = 0x21;
|
private static final byte MSG_TYPE_WORK_COMPLETE = 0x21;
|
||||||
private static final byte MSG_TYPE_WORK_HEARTBEAT = 0x22;
|
private static final byte MSG_TYPE_WORK_HEARTBEAT = 0x22;
|
||||||
private static final byte MSG_TYPE_WORK_REJECT = 0x23;
|
private static final byte MSG_TYPE_WORK_REJECT = 0x23;
|
||||||
private static final byte MSG_TYPE_WORK_ASSIGN = 0x01; // DataRequest type on wire
|
private static final byte MSG_TYPE_WORK_ASSIGN = 0x01;
|
||||||
private static final byte MSG_TYPE_WORK_STOP = 0x25;
|
private static final byte MSG_TYPE_WORK_STOP = 0x25;
|
||||||
|
|
||||||
/** Re-queue realtime job if no heartbeat received within this window (ms) */
|
/** Re-queue realtime job if no heartbeat received within this window (ms) */
|
||||||
@@ -65,20 +66,20 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
private volatile boolean running;
|
private volatile boolean running;
|
||||||
private Thread brokerThread;
|
private Thread brokerThread;
|
||||||
|
|
||||||
// ── Worker tracking ──────────────────────────────────────────────────────
|
// ── Slot tracking ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/** Workers ready to accept a job, in LRU order (head = least recently used) */
|
/**
|
||||||
private final Deque<WorkerInfo> freeWorkers = new ArrayDeque<>();
|
* Available slots, in LRU order (head = least recently used).
|
||||||
|
* Each entry is one WorkerReady slot offer from an ingestor.
|
||||||
|
*/
|
||||||
|
private final Deque<WorkerSlot> freeSlots = new ArrayDeque<>();
|
||||||
|
|
||||||
/** Jobs waiting for a compatible free worker */
|
/** Jobs waiting for a compatible free slot */
|
||||||
private final Queue<DataRequest> pendingJobs = new ArrayDeque<>();
|
private final Queue<DataRequest> pendingJobs = new ArrayDeque<>();
|
||||||
|
|
||||||
/** Jobs currently executing on a worker */
|
/** Jobs currently executing on a slot */
|
||||||
private final Map<String, ActiveJob> activeJobs = new ConcurrentHashMap<>();
|
private final Map<String, ActiveJob> activeJobs = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
/** Worker identity → supported exchanges (set once on READY) */
|
|
||||||
private final Map<String, WorkerInfo> knownWorkers = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
// ── Thread-safe inbound queue from RealtimeSubscriptionManager ───────────
|
// ── Thread-safe inbound queue from RealtimeSubscriptionManager ───────────
|
||||||
|
|
||||||
private final Queue<DataRequest> externalSubmissions = new ConcurrentLinkedQueue<>();
|
private final Queue<DataRequest> externalSubmissions = new ConcurrentLinkedQueue<>();
|
||||||
@@ -134,8 +135,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop all realtime jobs for a ticker (called when last subscriber leaves).
|
* Stop all realtime jobs for a ticker (called when last subscriber leaves).
|
||||||
* Thread-safe — posts a stop marker via externalSubmissions is complex; instead we
|
* Thread-safe via ConcurrentHashMap.
|
||||||
* directly find and stop active jobs. Protected by ConcurrentHashMap.
|
|
||||||
*/
|
*/
|
||||||
public void stopRealtimeJobsForTicker(String ticker) {
|
public void stopRealtimeJobsForTicker(String ticker) {
|
||||||
List<String> toStop = new ArrayList<>();
|
List<String> toStop = new ArrayList<>();
|
||||||
@@ -154,7 +154,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Broker loop ──────────────────────────────────────────────────────────
|
// ── Broker loop ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
private void brokerLoop() {
|
private void brokerLoop() {
|
||||||
ZMQ.Socket pullSocket = zmqManager.getSocket(ZmqChannelManager.Channel.CLIENT_REQUEST);
|
ZMQ.Socket pullSocket = zmqManager.getSocket(ZmqChannelManager.Channel.CLIENT_REQUEST);
|
||||||
@@ -174,18 +174,15 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
enqueueJob(ext);
|
enqueueJob(ext);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Poll sockets (100ms timeout)
|
|
||||||
poller.poll(100);
|
poller.poll(100);
|
||||||
|
|
||||||
if (poller.pollin(0)) {
|
if (poller.pollin(0)) {
|
||||||
handleClientRequest(pullSocket);
|
handleClientRequest(pullSocket);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (poller.pollin(1)) {
|
if (poller.pollin(1)) {
|
||||||
handleWorkerMessage(routerSocket);
|
handleWorkerMessage(routerSocket);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for heartbeat / completion timeouts
|
|
||||||
checkTimeouts();
|
checkTimeouts();
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@@ -235,7 +232,8 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
.setClientId(req.hasClientId() ? req.getClientId() : "")
|
.setClientId(req.hasClientId() ? req.getClientId() : "")
|
||||||
.build();
|
.build();
|
||||||
enqueueJob(dataRequest);
|
enqueueJob(dataRequest);
|
||||||
LOG.info("Received historical request from relay: request_id={}, ticker={}", req.getRequestId(), req.getTicker());
|
LOG.info("Received historical request from relay: request_id={}, ticker={}",
|
||||||
|
req.getRequestId(), req.getTicker());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Failed to parse SubmitHistoricalRequest from relay", e);
|
LOG.error("Failed to parse SubmitHistoricalRequest from relay", e);
|
||||||
}
|
}
|
||||||
@@ -277,23 +275,28 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A WorkerReady message represents ONE slot offer for one exchange and job type.
|
||||||
|
* Add it directly to freeSlots — no deduplication (multiple slots per ingestor are expected).
|
||||||
|
*/
|
||||||
private void handleWorkerReady(byte[] identity, String identityKey, byte[] payload) throws Exception {
|
private void handleWorkerReady(byte[] identity, String identityKey, byte[] payload) throws Exception {
|
||||||
WorkerReady ready = WorkerReady.parseFrom(payload);
|
WorkerReady ready = WorkerReady.parseFrom(payload);
|
||||||
Set<String> exchanges = new HashSet<>(ready.getExchangesList());
|
SlotType slotType = ready.getJobType();
|
||||||
|
|
||||||
WorkerInfo worker = knownWorkers.computeIfAbsent(identityKey,
|
for (String exchange : ready.getExchangesList()) {
|
||||||
k -> new WorkerInfo(identity, identityKey, exchanges));
|
WorkerSlot slot = new WorkerSlot(identity, identityKey, exchange.toUpperCase(), slotType);
|
||||||
worker.exchanges = exchanges; // update in case re-READY with different config
|
freeSlots.addLast(slot);
|
||||||
worker.identity = identity;
|
LOG.info("Worker slot READY: id={}, exchange={}, type={}, totalFreeSlots={}",
|
||||||
|
identityKey, exchange, slotType, freeSlots.size());
|
||||||
if (!freeWorkers.contains(worker)) {
|
|
||||||
freeWorkers.addLast(worker);
|
|
||||||
}
|
}
|
||||||
LOG.info("Ingestor READY: id={}, exchanges={}, freeWorkers={}", identityKey, exchanges, freeWorkers.size());
|
|
||||||
|
|
||||||
dispatchPending();
|
dispatchPending();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Historical job completed. Remove from activeJobs.
|
||||||
|
* The ingestor will send a new typed WorkerReady to re-offer the slot.
|
||||||
|
*/
|
||||||
private void handleWorkComplete(String identityKey, byte[] payload) throws Exception {
|
private void handleWorkComplete(String identityKey, byte[] payload) throws Exception {
|
||||||
WorkComplete complete = WorkComplete.parseFrom(payload);
|
WorkComplete complete = WorkComplete.parseFrom(payload);
|
||||||
String jobId = complete.getJobId();
|
String jobId = complete.getJobId();
|
||||||
@@ -304,13 +307,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
} else {
|
} else {
|
||||||
LOG.info("Job COMPLETE: jobId={}, ticker={}, success={}", jobId, job.ticker, complete.getSuccess());
|
LOG.info("Job COMPLETE: jobId={}, ticker={}, success={}", jobId, job.ticker, complete.getSuccess());
|
||||||
}
|
}
|
||||||
|
// Slot re-registration is driven by the ingestor via a new WorkerReady.
|
||||||
// Worker is free again
|
|
||||||
WorkerInfo worker = knownWorkers.get(identityKey);
|
|
||||||
if (worker != null) {
|
|
||||||
freeWorkers.addLast(worker);
|
|
||||||
dispatchPending();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleWorkHeartbeat(String identityKey, byte[] payload) throws Exception {
|
private void handleWorkHeartbeat(String identityKey, byte[] payload) throws Exception {
|
||||||
@@ -325,6 +322,10 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ingestor rejected the job. Re-queue it with a new ID.
|
||||||
|
* The ingestor will send a new typed WorkerReady when it's ready again.
|
||||||
|
*/
|
||||||
private void handleWorkReject(String identityKey, byte[] payload) throws Exception {
|
private void handleWorkReject(String identityKey, byte[] payload) throws Exception {
|
||||||
WorkReject reject = WorkReject.parseFrom(payload);
|
WorkReject reject = WorkReject.parseFrom(payload);
|
||||||
String jobId = reject.getJobId();
|
String jobId = reject.getJobId();
|
||||||
@@ -332,31 +333,23 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
|
|
||||||
ActiveJob job = activeJobs.remove(jobId);
|
ActiveJob job = activeJobs.remove(jobId);
|
||||||
if (job != null) {
|
if (job != null) {
|
||||||
// Re-queue with fresh job_id so a different ingestor may pick it up
|
|
||||||
DataRequest requeued = job.request.toBuilder()
|
DataRequest requeued = job.request.toBuilder()
|
||||||
.setJobId(UUID.randomUUID().toString())
|
.setJobId(UUID.randomUUID().toString())
|
||||||
.build();
|
.build();
|
||||||
pendingJobs.add(requeued);
|
pendingJobs.add(requeued);
|
||||||
}
|
}
|
||||||
|
// Slot re-registration is driven by the ingestor via a new WorkerReady.
|
||||||
// Worker is still free (it rejected, not crashed)
|
|
||||||
WorkerInfo worker = knownWorkers.get(identityKey);
|
|
||||||
if (worker != null) {
|
|
||||||
freeWorkers.addLast(worker);
|
|
||||||
dispatchPending();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Dispatch ─────────────────────────────────────────────────────────────
|
// ── Dispatch ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
private void enqueueJob(DataRequest request) {
|
private void enqueueJob(DataRequest request) {
|
||||||
// Check if we can immediately dispatch
|
WorkerSlot slot = findFreeSlot(exchangeOf(request.getTicker()), request.getType());
|
||||||
WorkerInfo worker = findFreeWorker(exchangeOf(request.getTicker()));
|
if (slot != null) {
|
||||||
if (worker != null) {
|
dispatch(slot, request);
|
||||||
dispatch(worker, request);
|
|
||||||
} else {
|
} else {
|
||||||
pendingJobs.add(request);
|
pendingJobs.add(request);
|
||||||
LOG.debug("No free worker for {}, queued (pendingJobs={})", request.getTicker(), pendingJobs.size());
|
LOG.debug("No free slot for {}, queued (pendingJobs={})", request.getTicker(), pendingJobs.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,9 +357,9 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
Queue<DataRequest> remaining = new ArrayDeque<>();
|
Queue<DataRequest> remaining = new ArrayDeque<>();
|
||||||
DataRequest job;
|
DataRequest job;
|
||||||
while ((job = pendingJobs.poll()) != null) {
|
while ((job = pendingJobs.poll()) != null) {
|
||||||
WorkerInfo worker = findFreeWorker(exchangeOf(job.getTicker()));
|
WorkerSlot slot = findFreeSlot(exchangeOf(job.getTicker()), job.getType());
|
||||||
if (worker != null) {
|
if (slot != null) {
|
||||||
dispatch(worker, job);
|
dispatch(slot, job);
|
||||||
} else {
|
} else {
|
||||||
remaining.add(job);
|
remaining.add(job);
|
||||||
}
|
}
|
||||||
@@ -374,28 +367,30 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
pendingJobs.addAll(remaining);
|
pendingJobs.addAll(remaining);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void dispatch(WorkerInfo worker, DataRequest request) {
|
private void dispatch(WorkerSlot slot, DataRequest request) {
|
||||||
freeWorkers.remove(worker);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
byte[] protoBytes = request.toByteArray();
|
byte[] protoBytes = request.toByteArray();
|
||||||
boolean sent = zmqManager.sendToWorker(worker.identity, PROTOCOL_VERSION, MSG_TYPE_WORK_ASSIGN, protoBytes);
|
boolean sent = zmqManager.sendToWorker(slot.identity, PROTOCOL_VERSION, MSG_TYPE_WORK_ASSIGN, protoBytes);
|
||||||
if (!sent) {
|
if (!sent) {
|
||||||
LOG.error("Failed to dispatch job to worker={}, re-queuing", worker.identityKey);
|
// ROUTER_MANDATORY: identity is disconnected — purge all stale slots for this
|
||||||
freeWorkers.addLast(worker);
|
// worker and re-queue the job so dispatchPending() can try a live slot.
|
||||||
|
int purged = purgeWorkerSlots(slot.identityKey);
|
||||||
|
LOG.warn("Worker {} unreachable, purged {} stale free slots, re-queuing job={}",
|
||||||
|
slot.identityKey, purged, request.getJobId());
|
||||||
pendingJobs.add(request);
|
pendingJobs.add(request);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ActiveJob active = new ActiveJob(worker.identity, worker.identityKey,
|
ActiveJob active = new ActiveJob(slot.identity, slot.identityKey,
|
||||||
request, request.getTicker(), request.getType());
|
request, request.getTicker(), request.getType());
|
||||||
activeJobs.put(request.getJobId(), active);
|
activeJobs.put(request.getJobId(), active);
|
||||||
|
|
||||||
LOG.info("Dispatched job: jobId={}, ticker={}, type={}, worker={}",
|
LOG.info("Dispatched job: jobId={}, ticker={}, type={}, worker={}, slotType={}",
|
||||||
request.getJobId(), request.getTicker(), request.getType(), worker.identityKey);
|
request.getJobId(), request.getTicker(), request.getType(),
|
||||||
|
slot.identityKey, slot.slotType);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Error dispatching job", e);
|
LOG.error("Error dispatching job", e);
|
||||||
freeWorkers.addLast(worker);
|
freeSlots.addLast(slot);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -408,7 +403,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Timeout checking ─────────────────────────────────────────────────────
|
// ── Timeout checking ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
private void checkTimeouts() {
|
private void checkTimeouts() {
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
@@ -426,10 +421,9 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
for (String jobId : timedOut) {
|
for (String jobId : timedOut) {
|
||||||
ActiveJob job = activeJobs.remove(jobId);
|
ActiveJob job = activeJobs.remove(jobId);
|
||||||
if (job == null) continue;
|
if (job == null) continue;
|
||||||
LOG.warn("Job timed out (no heartbeat/completion): jobId={}, ticker={}, type={}, worker={}",
|
LOG.warn("Job timed out: jobId={}, ticker={}, type={}, worker={}",
|
||||||
jobId, job.ticker, job.type, job.workerIdentityKey);
|
jobId, job.ticker, job.type, job.workerIdentityKey);
|
||||||
|
|
||||||
// Re-queue with a new job_id
|
|
||||||
DataRequest requeued = job.request.toBuilder()
|
DataRequest requeued = job.request.toBuilder()
|
||||||
.setJobId(UUID.randomUUID().toString())
|
.setJobId(UUID.randomUUID().toString())
|
||||||
.build();
|
.build();
|
||||||
@@ -438,7 +432,7 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/** Extract exchange name from ticker, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
|
/** Extract exchange name from ticker, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
|
||||||
private static String exchangeOf(String ticker) {
|
private static String exchangeOf(String ticker) {
|
||||||
@@ -446,12 +440,32 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
return dot >= 0 ? ticker.substring(dot + 1).toUpperCase() : "";
|
return dot >= 0 ? ticker.substring(dot + 1).toUpperCase() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Find and remove a free worker that supports the given exchange. */
|
/**
|
||||||
private WorkerInfo findFreeWorker(String exchange) {
|
* Remove all free slots offered by a given worker identity.
|
||||||
for (WorkerInfo w : freeWorkers) {
|
* Called when a dispatch to that identity fails (ROUTER_MANDATORY unreachable).
|
||||||
if (exchange.isEmpty() || w.exchanges.contains(exchange)) {
|
* Returns the number of slots removed.
|
||||||
freeWorkers.remove(w);
|
*/
|
||||||
return w;
|
private int purgeWorkerSlots(String identityKey) {
|
||||||
|
int before = freeSlots.size();
|
||||||
|
freeSlots.removeIf(slot -> slot.identityKey.equals(identityKey));
|
||||||
|
return before - freeSlots.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find and remove a free slot that supports the given exchange and request type.
|
||||||
|
* A slot with SlotType.ANY matches any request type.
|
||||||
|
*/
|
||||||
|
private WorkerSlot findFreeSlot(String exchange, DataRequest.RequestType requestType) {
|
||||||
|
for (WorkerSlot slot : freeSlots) {
|
||||||
|
boolean exchangeMatch = exchange.isEmpty() || slot.exchange.equals(exchange);
|
||||||
|
boolean typeMatch = slot.slotType == SlotType.ANY
|
||||||
|
|| (slot.slotType == SlotType.HISTORICAL
|
||||||
|
&& requestType == DataRequest.RequestType.HISTORICAL_OHLC)
|
||||||
|
|| (slot.slotType == SlotType.REALTIME
|
||||||
|
&& requestType == DataRequest.RequestType.REALTIME_TICKS);
|
||||||
|
if (exchangeMatch && typeMatch) {
|
||||||
|
freeSlots.remove(slot);
|
||||||
|
return slot;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@@ -468,17 +482,20 @@ public class IngestorBroker implements AutoCloseable {
|
|||||||
stop();
|
stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Inner types ──────────────────────────────────────────────────────────
|
// ── Inner types ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
private static class WorkerInfo {
|
/** One available work slot offered by an ingestor via WorkerReady. */
|
||||||
byte[] identity;
|
private static class WorkerSlot {
|
||||||
|
final byte[] identity;
|
||||||
final String identityKey;
|
final String identityKey;
|
||||||
Set<String> exchanges;
|
final String exchange;
|
||||||
|
final SlotType slotType;
|
||||||
|
|
||||||
WorkerInfo(byte[] identity, String identityKey, Set<String> exchanges) {
|
WorkerSlot(byte[] identity, String identityKey, String exchange, SlotType slotType) {
|
||||||
this.identity = identity;
|
this.identity = identity;
|
||||||
this.identityKey = identityKey;
|
this.identityKey = identityKey;
|
||||||
this.exchanges = exchanges;
|
this.exchange = exchange;
|
||||||
|
this.slotType = slotType;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,6 +87,11 @@ public class ZmqChannelManager implements Closeable {
|
|||||||
socket.setLinger(1000);
|
socket.setLinger(1000);
|
||||||
socket.setSndHWM(10000);
|
socket.setSndHWM(10000);
|
||||||
socket.setRcvHWM(10000);
|
socket.setRcvHWM(10000);
|
||||||
|
if (socketType == SocketType.ROUTER) {
|
||||||
|
// Return false (EHOSTUNREACH) instead of silently dropping messages to
|
||||||
|
// unknown/disconnected peer identities. Enables immediate stale-slot detection.
|
||||||
|
socket.setRouterMandatory(true);
|
||||||
|
}
|
||||||
socket.bind(endpoint);
|
socket.bind(endpoint);
|
||||||
sockets.put(channel.name(), socket);
|
sockets.put(channel.name(), socket);
|
||||||
LOG.info("Bound {} to {}", description, endpoint);
|
LOG.info("Bound {} to {}", description, endpoint);
|
||||||
|
|||||||
@@ -595,12 +595,13 @@ export class WebSocketHandler {
|
|||||||
case 'get_bars': {
|
case 'get_bars': {
|
||||||
if (!ohlcService) {
|
if (!ohlcService) {
|
||||||
socket.send(JSON.stringify({
|
socket.send(JSON.stringify({
|
||||||
type: 'error',
|
type: 'get_bars_response',
|
||||||
request_id: requestId,
|
request_id: requestId,
|
||||||
error_message: 'OHLC service not available'
|
error: 'OHLC service not available',
|
||||||
}));
|
}));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
const history = await ohlcService.fetchOHLC(
|
const history = await ohlcService.fetchOHLC(
|
||||||
payload.symbol,
|
payload.symbol,
|
||||||
payload.period_seconds,
|
payload.period_seconds,
|
||||||
@@ -609,14 +610,13 @@ export class WebSocketHandler {
|
|||||||
payload.countback
|
payload.countback
|
||||||
);
|
);
|
||||||
logger.info({ requestId, barCount: history.bars?.length ?? 0, noData: history.noData, socketState: socket.readyState }, 'Sending get_bars_response');
|
logger.info({ requestId, barCount: history.bars?.length ?? 0, noData: history.noData, socketState: socket.readyState }, 'Sending get_bars_response');
|
||||||
socket.send(
|
socket.send(jsonStringifySafe({ type: 'get_bars_response', request_id: requestId, history }));
|
||||||
jsonStringifySafe({
|
|
||||||
type: 'get_bars_response',
|
|
||||||
request_id: requestId,
|
|
||||||
history,
|
|
||||||
})
|
|
||||||
);
|
|
||||||
logger.info({ requestId }, 'get_bars_response sent');
|
logger.info({ requestId }, 'get_bars_response sent');
|
||||||
|
} catch (err: any) {
|
||||||
|
const errorMessage = err?.message ?? String(err);
|
||||||
|
logger.error({ requestId, ticker: payload.symbol, errorMessage }, 'get_bars failed');
|
||||||
|
socket.send(JSON.stringify({ type: 'get_bars_response', request_id: requestId, error: errorMessage }));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import type { UserLicense } from '../types/user.js';
|
import type { UserLicense, License, LicenseTier } from '../types/user.js';
|
||||||
import { UserLicenseSchema } from '../types/user.js';
|
import { UserLicenseSchema, LICENSE_TIER_TEMPLATES } from '../types/user.js';
|
||||||
import type { AuthService } from '../auth/auth-service.js';
|
import type { AuthService } from '../auth/auth-service.js';
|
||||||
|
|
||||||
export class UserService {
|
export class UserService {
|
||||||
@@ -114,6 +114,54 @@ export class UserService {
|
|||||||
return await this.authService.verifyToken(token);
|
return await this.authService.verifyToken(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Re-apply the current canonical template for every user's declared licenseType.
|
||||||
|
* Updates only the DB — does not touch deployments, so running pods are unaffected
|
||||||
|
* until their next natural restart.
|
||||||
|
*/
|
||||||
|
async migrateAllLicenses(): Promise<{ updated: number }> {
|
||||||
|
const client = await this.pool.connect();
|
||||||
|
try {
|
||||||
|
const rows = await client.query(
|
||||||
|
`SELECT user_id, license->>'licenseType' AS tier FROM user_licenses`
|
||||||
|
);
|
||||||
|
let updated = 0;
|
||||||
|
for (const row of rows.rows) {
|
||||||
|
const tier = row.tier as LicenseTier;
|
||||||
|
if (!LICENSE_TIER_TEMPLATES[tier]) continue;
|
||||||
|
await client.query(
|
||||||
|
`UPDATE user_licenses SET license = $1::jsonb, updated_at = NOW() WHERE user_id = $2`,
|
||||||
|
[JSON.stringify(LICENSE_TIER_TEMPLATES[tier]), row.user_id]
|
||||||
|
);
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
return { updated };
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a user's license to a canonical tier template.
|
||||||
|
* Overwrites the existing license with the current template for that tier.
|
||||||
|
*/
|
||||||
|
async setUserLicenseTier(userId: string, tier: LicenseTier): Promise<License> {
|
||||||
|
const license = LICENSE_TIER_TEMPLATES[tier];
|
||||||
|
const client = await this.pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query(
|
||||||
|
`INSERT INTO user_licenses (user_id, license, mcp_server_url, updated_at)
|
||||||
|
VALUES ($1, $2::jsonb, 'pending', NOW())
|
||||||
|
ON CONFLICT (user_id) DO UPDATE
|
||||||
|
SET license = EXCLUDED.license, updated_at = NOW()`,
|
||||||
|
[userId, JSON.stringify(license)]
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
return license;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close database pool
|
* Close database pool
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import type { ResearchSubagent } from './subagents/research/index.js';
|
|||||||
import type { IndicatorSubagent } from './subagents/indicator/index.js';
|
import type { IndicatorSubagent } from './subagents/indicator/index.js';
|
||||||
import type { WebExploreSubagent } from './subagents/web-explore/index.js';
|
import type { WebExploreSubagent } from './subagents/web-explore/index.js';
|
||||||
import type { StrategySubagent } from './subagents/strategy/index.js';
|
import type { StrategySubagent } from './subagents/strategy/index.js';
|
||||||
|
import { BaseSubagent } from './subagents/base-subagent.js';
|
||||||
import type { DynamicStructuredTool } from '@langchain/core/tools';
|
import type { DynamicStructuredTool } from '@langchain/core/tools';
|
||||||
import { getToolRegistry } from '../tools/tool-registry.js';
|
import { getToolRegistry } from '../tools/tool-registry.js';
|
||||||
import type { MCPToolInfo } from '../tools/mcp/mcp-tool-wrapper.js';
|
import type { MCPToolInfo } from '../tools/mcp/mcp-tool-wrapper.js';
|
||||||
@@ -237,12 +238,22 @@ export class AgentHarness {
|
|||||||
try {
|
try {
|
||||||
const { createResearchSubagent } = await import('./subagents/research/index.js');
|
const { createResearchSubagent } = await import('./subagents/research/index.js');
|
||||||
|
|
||||||
// Create a model for the research subagent
|
// Path resolution: use the compiled output path
|
||||||
|
const researchSubagentPath = join(__dirname, 'subagents', 'research');
|
||||||
|
this.config.logger.debug({ researchSubagentPath }, 'Using research subagent path');
|
||||||
|
|
||||||
|
// Load the subagent config to get maxTokens — research scripts require more tokens
|
||||||
|
// than the provider default (4096) because python_write arguments include full code bodies
|
||||||
|
const researchSubagentConfig = await BaseSubagent.loadConfig(researchSubagentPath);
|
||||||
|
|
||||||
|
// Create a model for the research subagent — always use the complex model
|
||||||
|
// since research tasks involve data analysis, charting, and code generation
|
||||||
const { model } = await this.modelRouter.route(
|
const { model } = await this.modelRouter.route(
|
||||||
'research analysis', // dummy query
|
'analyze and backtest research data', // triggers complex routing
|
||||||
this.config.license,
|
this.config.license,
|
||||||
RoutingStrategy.COMPLEXITY,
|
RoutingStrategy.COMPLEXITY,
|
||||||
this.config.userId
|
this.config.userId,
|
||||||
|
researchSubagentConfig.maxTokens // honour the subagent's maxTokens (e.g. 8192)
|
||||||
);
|
);
|
||||||
|
|
||||||
// Get tools for research subagent from registry
|
// Get tools for research subagent from registry
|
||||||
@@ -274,10 +285,6 @@ export class AgentHarness {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Path resolution: use the compiled output path
|
|
||||||
const researchSubagentPath = join(__dirname, 'subagents', 'research');
|
|
||||||
this.config.logger.debug({ researchSubagentPath }, 'Using research subagent path');
|
|
||||||
|
|
||||||
this.researchSubagent = await createResearchSubagent(
|
this.researchSubagent = await createResearchSubagent(
|
||||||
model,
|
model,
|
||||||
this.config.logger,
|
this.config.logger,
|
||||||
@@ -535,10 +542,12 @@ export class AgentHarness {
|
|||||||
const stream = await model.stream(messagesCopy, { signal });
|
const stream = await model.stream(messagesCopy, { signal });
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (typeof chunk.content === 'string' && chunk.content.length > 0) {
|
if (typeof chunk.content === 'string' && chunk.content.length > 0) {
|
||||||
|
this.config.logger.trace({ content: chunk.content }, 'raw chunk');
|
||||||
yield { type: 'chunk', content: chunk.content };
|
yield { type: 'chunk', content: chunk.content };
|
||||||
} else if (Array.isArray(chunk.content)) {
|
} else if (Array.isArray(chunk.content)) {
|
||||||
for (const block of chunk.content) {
|
for (const block of chunk.content) {
|
||||||
if (block.type === 'text' && block.text) {
|
if (block.type === 'text' && block.text) {
|
||||||
|
this.config.logger.trace({ content: block.text }, 'raw chunk');
|
||||||
yield { type: 'chunk', content: block.text };
|
yield { type: 'chunk', content: block.text };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,8 +18,11 @@ Dexorder trading platform provides OHLC data at a 1-minute resolution and suppor
|
|||||||
|
|
||||||
Dexorder does not support:
|
Dexorder does not support:
|
||||||
* tick-by-tick trading or high-frequency strategies.
|
* tick-by-tick trading or high-frequency strategies.
|
||||||
* long-running computations like paramater optimizations or training machine learning models.
|
* long-running computations like parameter optimizations or training machine learning models during live execution.
|
||||||
* portfolio optimization or trading strategies that require a large number of symbols.
|
* portfolio optimization or trading strategies that require a large number of symbols.
|
||||||
|
* LLM calls inside strategy scripts — strategies must be deterministic and lightweight for backtesting to be reliable and repeatable. LLMs are slow, expensive, and introduce temperature-based non-determinism that breaks backtesting. (Walk-forward LLM integration via timer/data triggers is planned but not yet available.)
|
||||||
|
* TradFi data (equities, forex, bonds, options, etc.) — only crypto pricing data is available.
|
||||||
|
* Alternative data sources such as news feeds, Twitter/social sentiment, on-chain data, or economic calendars — these are not yet available.
|
||||||
|
|
||||||
Dexorder does support:
|
Dexorder does support:
|
||||||
* backtesting strategies against historical data.
|
* backtesting strategies against historical data.
|
||||||
@@ -33,6 +36,27 @@ If the user asks for a capability not provided by Dexorder, decline and explain
|
|||||||
|
|
||||||
# Important Instructions
|
# Important Instructions
|
||||||
|
|
||||||
|
## Switching Chart Symbol or Timeframe
|
||||||
|
|
||||||
|
**IMPORTANT: When the user asks to switch, change, or update the chart symbol or timeframe, you MUST call `workspace_patch` directly. Do NOT use web_explore, do NOT delegate to the indicator tool.**
|
||||||
|
|
||||||
|
Call `workspace_patch` with `store_name = "chartState"` and the appropriate JSON patch:
|
||||||
|
|
||||||
|
To switch symbol only:
|
||||||
|
```json
|
||||||
|
[{ "op": "replace", "path": "/symbol", "value": "SOL/USDT.BINANCE" }]
|
||||||
|
```
|
||||||
|
|
||||||
|
To switch symbol and period (period is seconds: 60=1m, 300=5m, 900=15m, 3600=1h, 86400=1D):
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{ "op": "replace", "path": "/symbol", "value": "SOL/USDT.BINANCE" },
|
||||||
|
{ "op": "replace", "path": "/period", "value": 900 }
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
You already know this format — do not search for it. After patching, confirm the change to the user.
|
||||||
|
|
||||||
## Investment Advice
|
## Investment Advice
|
||||||
**NEVER** recommend any specific ticker, trade, or position. You may suggest mechanical adjustments or improvements to strategies, but you must **NEVER** offer an opinion on a specific trade or position. You are **NOT** a registered investment advisor.
|
**NEVER** recommend any specific ticker, trade, or position. You may suggest mechanical adjustments or improvements to strategies, but you must **NEVER** offer an opinion on a specific trade or position. You are **NOT** a registered investment advisor.
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
This is your first chat with a new user. Welcome them to Dexorder and describe who are you and what can you do.
|
This is your first chat with a new user. Welcome them to Dexorder, and describe who you are and what can you do.
|
||||||
@@ -83,6 +83,15 @@ self.config.initial_capital # starting capital in quote currency
|
|||||||
| `sell_vol` | float | Sell-side volume (taker sells) |
|
| `sell_vol` | float | Sell-side volume (taker sells) |
|
||||||
| `open_interest` | float | Open interest (futures only; NaN for spot) |
|
| `open_interest` | float | Open interest (futures only; NaN for spot) |
|
||||||
|
|
||||||
|
### Available data — crypto only
|
||||||
|
|
||||||
|
Strategies have access **only** to crypto OHLC feeds with volume, buy/sell volume split, and open interest. The following are **not available** and must never be referenced in a strategy:
|
||||||
|
|
||||||
|
- **TradFi data** — equities, forex, bonds, futures spreads, options, macro indicators, interest rates, etc.
|
||||||
|
- **Alternative data** — news feeds, social sentiment (Twitter/Reddit), on-chain metrics, economic calendars, earnings, etc.
|
||||||
|
|
||||||
|
If a user requests a strategy that depends on unavailable data, explain the limitation and offer a crypto-native alternative (e.g. use order-flow imbalance instead of news sentiment).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Section B — Strategy Metadata
|
## Section B — Strategy Metadata
|
||||||
@@ -355,3 +364,16 @@ deactivate_strategy(strategy_name) # Stop and get final PnL
|
|||||||
- 4h bars: 100k bars ≈ 45 years → cap at 5 years (≈ 10,950 bars)
|
- 4h bars: 100k bars ≈ 45 years → cap at 5 years (≈ 10,950 bars)
|
||||||
|
|
||||||
7. **Never `import` from `dexorder` inside `evaluate()`** — the strategy file is exec'd in a sandbox with PandasStrategy and pandas_ta pre-loaded. Standard library and pandas/numpy/pandas_ta are available.
|
7. **Never `import` from `dexorder` inside `evaluate()`** — the strategy file is exec'd in a sandbox with PandasStrategy and pandas_ta pre-loaded. Standard library and pandas/numpy/pandas_ta are available.
|
||||||
|
|
||||||
|
8. **No LLM calls inside strategies** — strategies must be fully deterministic. LLM invocations are prohibited because:
|
||||||
|
- They are slow and expensive, making backtesting impractical.
|
||||||
|
- Any temperature > 0 produces non-repeatable outputs, breaking backtest reproducibility.
|
||||||
|
- The correct model is: the LLM *writes* the strategy; the strategy runs without LLM involvement.
|
||||||
|
- Walk-forward LLM integration (via timer or data triggers) is a planned feature but is **not yet implemented**. Do not attempt to approximate it now.
|
||||||
|
|
||||||
|
9. **`evaluate()` must be fast, lightweight, and deterministic** — it is called on every bar during backtesting across potentially hundreds of thousands of bars. Specifically:
|
||||||
|
- **No heavy computation at runtime**: model inference, large matrix operations, file I/O, network calls, or database queries are forbidden inside `evaluate()`.
|
||||||
|
- **ML is allowed with restrictions**: a model may be trained offline (e.g. in `__init__` using warm-up data), but inference in `evaluate()` must be fast (microseconds, not milliseconds). If training is compute-intensive, note this clearly in the strategy description.
|
||||||
|
- **No randomness**: do not use `random`, `np.random`, or any non-seeded stochastic operation. All outputs given the same data must be identical across runs.
|
||||||
|
|
||||||
|
10. **Data scope** — strategies may only use data available in the `dfs` feeds. Do not attempt to fetch external data, call APIs, read files, or access anything outside the provided DataFrames. Crypto OHLCV + buy/sell volume + open interest is what is available; nothing else.
|
||||||
|
|||||||
@@ -306,6 +306,25 @@ export class KubernetesClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete only the Deployment, preserving PVC (user data) and Service (stable DNS).
|
||||||
|
* Used when applying a license tier change — next ensureContainerRunning recreates
|
||||||
|
* the deployment with updated resource limits.
|
||||||
|
*/
|
||||||
|
async deleteDeploymentOnly(userId: string): Promise<void> {
|
||||||
|
const deploymentName = KubernetesClient.getDeploymentName(userId);
|
||||||
|
try {
|
||||||
|
await this.appsApi.deleteNamespacedDeployment({
|
||||||
|
name: deploymentName,
|
||||||
|
namespace: this.config.namespace
|
||||||
|
});
|
||||||
|
this.config.logger.info({ deploymentName }, 'Deleted deployment (tier change)');
|
||||||
|
} catch (error: any) {
|
||||||
|
const is404 = error.code === 404 || error.response?.statusCode === 404 || error.statusCode === 404;
|
||||||
|
if (!is404) throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete deployment and associated resources
|
* Delete deployment and associated resources
|
||||||
* (Used for cleanup/testing - normally handled by lifecycle sidecar)
|
* (Used for cleanup/testing - normally handled by lifecycle sidecar)
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
import type { FastifyBaseLogger } from 'fastify';
|
import type { FastifyBaseLogger } from 'fastify';
|
||||||
import { KubernetesClient, type DeploymentSpec } from './client.js';
|
import { KubernetesClient, type DeploymentSpec } from './client.js';
|
||||||
import type { License } from '../types/user.js';
|
import type { License, LicenseTier } from '../types/user.js';
|
||||||
|
import type { UserService } from '../db/user-service.js';
|
||||||
|
|
||||||
export interface ContainerManagerConfig {
|
export interface ContainerManagerConfig {
|
||||||
k8sClient: KubernetesClient;
|
k8sClient: KubernetesClient;
|
||||||
|
userService: UserService;
|
||||||
sandboxImage: string;
|
sandboxImage: string;
|
||||||
sidecarImage: string;
|
sidecarImage: string;
|
||||||
storageClass: string;
|
storageClass: string;
|
||||||
@@ -139,6 +141,17 @@ export class ContainerManager {
|
|||||||
return { exists: true, ready, mcpEndpoint };
|
return { exists: true, ready, mcpEndpoint };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply a canonical license tier to a user: updates DB and deletes the deployment
|
||||||
|
* so it is recreated with the new resource limits on next connect.
|
||||||
|
*/
|
||||||
|
async applyLicenseTier(userId: string, tier: LicenseTier): Promise<License> {
|
||||||
|
const license = await this.config.userService.setUserLicenseTier(userId, tier);
|
||||||
|
await this.config.k8sClient.deleteDeploymentOnly(userId);
|
||||||
|
this.config.logger.info({ userId, tier }, 'License tier applied; deployment will recreate on next connect');
|
||||||
|
return license;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete container (for cleanup/testing)
|
* Delete container (for cleanup/testing)
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -42,7 +42,8 @@ export class ModelRouter {
|
|||||||
message: string,
|
message: string,
|
||||||
license: License,
|
license: License,
|
||||||
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
|
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
|
||||||
userId?: string
|
userId?: string,
|
||||||
|
maxTokens?: number
|
||||||
): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
|
): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
|
||||||
let modelConfig: ModelConfig;
|
let modelConfig: ModelConfig;
|
||||||
|
|
||||||
@@ -67,12 +68,17 @@ export class ModelRouter {
|
|||||||
modelConfig = this.defaultModel;
|
modelConfig = this.defaultModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (maxTokens !== undefined) {
|
||||||
|
modelConfig = { ...modelConfig, maxTokens };
|
||||||
|
}
|
||||||
|
|
||||||
this.logger.info(
|
this.logger.info(
|
||||||
{
|
{
|
||||||
userId,
|
userId,
|
||||||
strategy,
|
strategy,
|
||||||
provider: modelConfig.provider,
|
provider: modelConfig.provider,
|
||||||
model: modelConfig.model,
|
model: modelConfig.model,
|
||||||
|
maxTokens: modelConfig.maxTokens,
|
||||||
},
|
},
|
||||||
'Routing to model'
|
'Routing to model'
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import { AgentHarness, type HarnessSessionConfig } from './harness/agent-harness
|
|||||||
import { OHLCService } from './services/ohlc-service.js';
|
import { OHLCService } from './services/ohlc-service.js';
|
||||||
import { SymbolIndexService } from './services/symbol-index-service.js';
|
import { SymbolIndexService } from './services/symbol-index-service.js';
|
||||||
import { SymbolRoutes } from './routes/symbol-routes.js';
|
import { SymbolRoutes } from './routes/symbol-routes.js';
|
||||||
|
import { AdminRoutes } from './routes/admin-routes.js';
|
||||||
|
|
||||||
// Catch unhandled promise rejections for better debugging
|
// Catch unhandled promise rejections for better debugging
|
||||||
process.on('unhandledRejection', (reason: any, promise) => {
|
process.on('unhandledRejection', (reason: any, promise) => {
|
||||||
@@ -309,6 +310,7 @@ const k8sClient = new KubernetesClient({
|
|||||||
|
|
||||||
const containerManager = new ContainerManager({
|
const containerManager = new ContainerManager({
|
||||||
k8sClient,
|
k8sClient,
|
||||||
|
userService,
|
||||||
sandboxImage: config.kubernetes.sandboxImage,
|
sandboxImage: config.kubernetes.sandboxImage,
|
||||||
sidecarImage: config.kubernetes.sidecarImage,
|
sidecarImage: config.kubernetes.sidecarImage,
|
||||||
storageClass: config.kubernetes.storageClass,
|
storageClass: config.kubernetes.storageClass,
|
||||||
@@ -439,6 +441,9 @@ const getSymbolService = () => symbolIndexService;
|
|||||||
const symbolRoutes = new SymbolRoutes({ getSymbolIndexService: getSymbolService });
|
const symbolRoutes = new SymbolRoutes({ getSymbolIndexService: getSymbolService });
|
||||||
symbolRoutes.register(app);
|
symbolRoutes.register(app);
|
||||||
|
|
||||||
|
// Register admin routes
|
||||||
|
new AdminRoutes(containerManager, userService).register(app);
|
||||||
|
|
||||||
app.log.debug('All routes registered');
|
app.log.debug('All routes registered');
|
||||||
|
|
||||||
// Health check
|
// Health check
|
||||||
@@ -715,7 +720,6 @@ try {
|
|||||||
icebergClient,
|
icebergClient,
|
||||||
logger: app.log,
|
logger: app.log,
|
||||||
});
|
});
|
||||||
await indexService.initialize();
|
|
||||||
|
|
||||||
// Assign to module-level variable so onMetadataUpdate callback can use it
|
// Assign to module-level variable so onMetadataUpdate callback can use it
|
||||||
symbolIndexService = indexService;
|
symbolIndexService = indexService;
|
||||||
@@ -723,7 +727,17 @@ try {
|
|||||||
// Update websocket handler's config so it can use the service
|
// Update websocket handler's config so it can use the service
|
||||||
(websocketHandler as any).config.symbolIndexService = indexService;
|
(websocketHandler as any).config.symbolIndexService = indexService;
|
||||||
|
|
||||||
app.log.info({ stats: symbolIndexService.getStats() }, 'Symbol index service initialized');
|
// Retry until we get at least some symbol metadata
|
||||||
|
while (true) {
|
||||||
|
await indexService.initialize();
|
||||||
|
const stats = indexService.getStats();
|
||||||
|
if (stats.symbolCount > 0) {
|
||||||
|
app.log.info({ stats }, 'Symbol index service initialized');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
app.log.warn('Symbol index has no metadata yet, retrying in 5 seconds...');
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
app.log.warn({ error }, 'Failed to initialize symbol index service - symbol search will not be available');
|
app.log.warn({ error }, 'Failed to initialize symbol index service - symbol search will not be available');
|
||||||
}
|
}
|
||||||
|
|||||||
35
gateway/src/routes/admin-routes.ts
Normal file
35
gateway/src/routes/admin-routes.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import type { ContainerManager } from '../k8s/container-manager.js';
|
||||||
|
import type { UserService } from '../db/user-service.js';
|
||||||
|
import type { LicenseTier } from '../types/user.js';
|
||||||
|
|
||||||
|
const VALID_TIERS: LicenseTier[] = ['free', 'pro', 'enterprise'];
|
||||||
|
|
||||||
|
export class AdminRoutes {
|
||||||
|
private containerManager: ContainerManager;
|
||||||
|
private userService: UserService;
|
||||||
|
|
||||||
|
constructor(containerManager: ContainerManager, userService: UserService) {
|
||||||
|
this.containerManager = containerManager;
|
||||||
|
this.userService = userService;
|
||||||
|
}
|
||||||
|
|
||||||
|
register(app: FastifyInstance): void {
|
||||||
|
app.post<{ Params: { userId: string }; Body: { tier: string } }>(
|
||||||
|
'/admin/users/:userId/set-tier',
|
||||||
|
async (req, reply) => {
|
||||||
|
const { userId } = req.params;
|
||||||
|
const { tier } = req.body;
|
||||||
|
if (!VALID_TIERS.includes(tier as LicenseTier)) {
|
||||||
|
return reply.code(400).send({ error: `Invalid tier. Must be one of: ${VALID_TIERS.join(', ')}` });
|
||||||
|
}
|
||||||
|
const license = await this.containerManager.applyLicenseTier(userId, tier as LicenseTier);
|
||||||
|
return { userId, tier, license };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
app.post('/admin/migrate-licenses', async () => {
|
||||||
|
return await this.userService.migrateAllLicenses();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -167,11 +167,7 @@ export class OHLCService {
|
|||||||
period_seconds,
|
period_seconds,
|
||||||
}, 'Failed to fetch historical data');
|
}, 'Failed to fetch historical data');
|
||||||
|
|
||||||
// Return empty result on error
|
throw error;
|
||||||
return {
|
|
||||||
bars: [],
|
|
||||||
noData: true,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
87
gateway/src/test-deepinfra-chunks.ts
Normal file
87
gateway/src/test-deepinfra-chunks.ts
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
/**
|
||||||
|
* Direct DeepInfra streaming test — bypasses LangChain entirely.
|
||||||
|
* Logs each delta.content with JSON.stringify so spaces are unambiguous.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* DEEPINFRA_API_KEY=$(op read "op://Private/DeepInfra/credential") npx tsx src/test-deepinfra-chunks.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
export {};
|
||||||
|
|
||||||
|
const DEEP_INFRA_URL = 'https://api.deepinfra.com/v1/openai/chat/completions';
|
||||||
|
const MODEL = 'zai-org/GLM-5';
|
||||||
|
|
||||||
|
const apiKey = process.env.DEEPINFRA_API_KEY;
|
||||||
|
if (!apiKey) {
|
||||||
|
console.error('DEEPINFRA_API_KEY is not set');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(DEEP_INFRA_URL, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: MODEL,
|
||||||
|
stream: true,
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: 'Write two sentences about ETH price analysis.' },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok || !res.body) {
|
||||||
|
console.error(`HTTP ${res.status}: ${await res.text()}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = res.body.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let chunkIndex = 0;
|
||||||
|
let assembled = '';
|
||||||
|
|
||||||
|
console.log(`Testing model: ${MODEL}`);
|
||||||
|
console.log('--- chunks ---');
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { value, done } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
const text = decoder.decode(value, { stream: true });
|
||||||
|
|
||||||
|
for (const line of text.split('\n')) {
|
||||||
|
const trimmed = line.trim();
|
||||||
|
if (!trimmed.startsWith('data:')) continue;
|
||||||
|
const data = trimmed.slice(5).trimStart();
|
||||||
|
if (data === '[DONE]') break;
|
||||||
|
|
||||||
|
let parsed: unknown;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(data);
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const choice = (parsed as { choices?: Array<{ delta?: Record<string, unknown> }> })
|
||||||
|
?.choices?.[0];
|
||||||
|
const delta = choice?.delta;
|
||||||
|
const content = delta?.content as string | undefined;
|
||||||
|
|
||||||
|
if (content !== undefined) {
|
||||||
|
const endsSpace = content.endsWith(' ');
|
||||||
|
const startsSpace = content.startsWith(' ');
|
||||||
|
// Log full delta so we can see all available fields (logprobs, token_ids, etc.)
|
||||||
|
console.log(
|
||||||
|
`chunk[${chunkIndex++}]: ${JSON.stringify(content)} ` +
|
||||||
|
`(len=${content.length}, startsSpace=${startsSpace}, endsSpace=${endsSpace}) ` +
|
||||||
|
`delta=${JSON.stringify(delta)}`,
|
||||||
|
);
|
||||||
|
assembled += content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('--- assembled ---');
|
||||||
|
console.log(assembled);
|
||||||
@@ -42,7 +42,8 @@ Use this tool for:
|
|||||||
- Recommending indicators for a given strategy or analysis goal
|
- Recommending indicators for a given strategy or analysis goal
|
||||||
|
|
||||||
ALWAYS use this tool for any request about the chart's indicators.
|
ALWAYS use this tool for any request about the chart's indicators.
|
||||||
NEVER modify the indicators workspace store directly.`,
|
NEVER modify the indicators workspace store directly.
|
||||||
|
NEVER use this tool to switch the chart symbol or timeframe — that is done via workspace_patch on chartState.`,
|
||||||
schema: z.object({
|
schema: z.object({
|
||||||
instruction: z.string().describe(
|
instruction: z.string().describe(
|
||||||
'The indicator task to perform. Be specific about which indicators, parameters, ' +
|
'The indicator task to perform. Be specific about which indicators, parameters, ' +
|
||||||
|
|||||||
@@ -30,13 +30,18 @@ export function createWebExploreAgentTool(config: WebExploreAgentToolConfig): Dy
|
|||||||
|
|
||||||
const tool = new DynamicStructuredTool({
|
const tool = new DynamicStructuredTool({
|
||||||
name: 'web_explore',
|
name: 'web_explore',
|
||||||
description: `Search the web or academic databases and return a summarized answer.
|
description: `Search the EXTERNAL web or academic databases and return a summarized answer.
|
||||||
|
|
||||||
Use this tool when the user asks about:
|
Use this tool ONLY for external, public information:
|
||||||
- Current events, news, or real-time information
|
- Current events, news, or real-time information
|
||||||
- Documentation, tutorials, or how-to guides
|
- External documentation, tutorials, or how-to guides for third-party libraries/tools
|
||||||
- Academic papers, research findings, or scientific topics
|
- Academic papers, research findings, or scientific topics
|
||||||
- Any topic that benefits from external sources
|
- Any topic requiring external sources
|
||||||
|
|
||||||
|
NEVER use this tool for:
|
||||||
|
- Questions about the Dexorder platform itself (workspace tools, chartState, indicators, strategies)
|
||||||
|
- Internal API usage (workspace_patch, workspace_read, etc.) — consult the system prompt instead
|
||||||
|
- Anything that can be answered from the context already available
|
||||||
|
|
||||||
The subagent will search the web (or arXiv for academic queries), fetch relevant content, and return a markdown summary with cited sources.`,
|
The subagent will search the web (or arXiv for academic queries), fetch relevant content, and return a markdown summary with cited sources.`,
|
||||||
schema: z.object({
|
schema: z.object({
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
|||||||
maxTokensPerMessage: 4096, rateLimitPerMinute: 10,
|
maxTokensPerMessage: 4096, rateLimitPerMinute: 10,
|
||||||
},
|
},
|
||||||
k8sResources: {
|
k8sResources: {
|
||||||
memoryRequest: '256Mi', memoryLimit: '512Mi',
|
memoryRequest: '256Mi', memoryLimit: '8Gi',
|
||||||
cpuRequest: '100m', cpuLimit: '500m',
|
cpuRequest: '100m', cpuLimit: '500m',
|
||||||
storage: '1Gi', tmpSizeLimit: '128Mi',
|
storage: '1Gi', tmpSizeLimit: '128Mi',
|
||||||
enableIdleShutdown: true, idleTimeoutMinutes: 15,
|
enableIdleShutdown: true, idleTimeoutMinutes: 15,
|
||||||
@@ -93,7 +93,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
|||||||
maxTokensPerMessage: 8192, rateLimitPerMinute: 60,
|
maxTokensPerMessage: 8192, rateLimitPerMinute: 60,
|
||||||
},
|
},
|
||||||
k8sResources: {
|
k8sResources: {
|
||||||
memoryRequest: '512Mi', memoryLimit: '2Gi',
|
memoryRequest: '512Mi', memoryLimit: '8Gi',
|
||||||
cpuRequest: '250m', cpuLimit: '2000m',
|
cpuRequest: '250m', cpuLimit: '2000m',
|
||||||
storage: '10Gi', tmpSizeLimit: '256Mi',
|
storage: '10Gi', tmpSizeLimit: '256Mi',
|
||||||
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
||||||
@@ -110,7 +110,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
|||||||
maxTokensPerMessage: 32768, rateLimitPerMinute: 300,
|
maxTokensPerMessage: 32768, rateLimitPerMinute: 300,
|
||||||
},
|
},
|
||||||
k8sResources: {
|
k8sResources: {
|
||||||
memoryRequest: '1Gi', memoryLimit: '4Gi',
|
memoryRequest: '1Gi', memoryLimit: '8Gi',
|
||||||
cpuRequest: '500m', cpuLimit: '4000m',
|
cpuRequest: '500m', cpuLimit: '4000m',
|
||||||
storage: '50Gi', tmpSizeLimit: '512Mi',
|
storage: '50Gi', tmpSizeLimit: '512Mi',
|
||||||
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
||||||
|
|||||||
@@ -79,12 +79,12 @@ export interface StoreConfig {
|
|||||||
export const DEFAULT_STORES: StoreConfig[] = [
|
export const DEFAULT_STORES: StoreConfig[] = [
|
||||||
{
|
{
|
||||||
name: 'chartState',
|
name: 'chartState',
|
||||||
persistent: false,
|
persistent: true,
|
||||||
initialState: () => ({
|
initialState: () => ({
|
||||||
symbol: 'BTC/USDT.BINANCE',
|
symbol: 'BTC/USDT.BINANCE',
|
||||||
start_time: null,
|
start_time: null,
|
||||||
end_time: null,
|
end_time: null,
|
||||||
period: '15',
|
period: 900,
|
||||||
selected_shapes: [],
|
selected_shapes: [],
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,6 +1,37 @@
|
|||||||
// CCXT data fetcher for historical OHLC and realtime ticks
|
// CCXT data fetcher for historical OHLC and realtime ticks
|
||||||
import ccxt from 'ccxt';
|
import ccxt from 'ccxt';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when an exchange returns a 429 rate-limit response.
|
||||||
|
* retryAfterMs is derived from the exchange's Retry-After header when available.
|
||||||
|
*/
|
||||||
|
export class ExchangeRateLimitError extends Error {
|
||||||
|
constructor(exchange, retryAfterMs, originalMessage) {
|
||||||
|
super(`Rate limit on ${exchange}: retry after ${retryAfterMs}ms (${originalMessage})`);
|
||||||
|
this.name = 'ExchangeRateLimitError';
|
||||||
|
this.exchange = exchange.toUpperCase();
|
||||||
|
this.retryAfterMs = retryAfterMs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract retry-after duration in milliseconds from a CCXT RateLimitExceeded error.
|
||||||
|
* Priority: Retry-After header → error message numeric → 30s fallback.
|
||||||
|
*/
|
||||||
|
function extractRetryAfterMs(exchange, error) {
|
||||||
|
const header = exchange.last_response_headers?.['retry-after'];
|
||||||
|
if (header) {
|
||||||
|
const secs = parseFloat(header);
|
||||||
|
if (!isNaN(secs)) return Math.ceil(secs * 1000);
|
||||||
|
}
|
||||||
|
// Some exchanges embed the delay in the message (e.g. "retry after 5000 ms")
|
||||||
|
const msMatch = error.message?.match(/(\d+)\s*ms/i);
|
||||||
|
if (msMatch) return parseInt(msMatch[1], 10);
|
||||||
|
const secMatch = error.message?.match(/(\d+(?:\.\d+)?)\s*s(?:ec|econds?)?/i);
|
||||||
|
if (secMatch) return Math.ceil(parseFloat(secMatch[1]) * 1000);
|
||||||
|
return 30_000;
|
||||||
|
}
|
||||||
|
|
||||||
export class CCXTFetcher {
|
export class CCXTFetcher {
|
||||||
constructor(config, logger, metadataGenerator = null) {
|
constructor(config, logger, metadataGenerator = null) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
@@ -135,9 +166,12 @@ export class CCXTFetcher {
|
|||||||
break;
|
break;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
lastError = error;
|
lastError = error;
|
||||||
const isRetryable = error.constructor?.name === 'NetworkError' ||
|
const isRateLimit = error.constructor?.name === 'RateLimitExceeded';
|
||||||
|
const isRetryable = !isRateLimit && (
|
||||||
|
error.constructor?.name === 'NetworkError' ||
|
||||||
error.constructor?.name === 'RequestTimeout' ||
|
error.constructor?.name === 'RequestTimeout' ||
|
||||||
error.constructor?.name === 'ExchangeNotAvailable';
|
error.constructor?.name === 'ExchangeNotAvailable'
|
||||||
|
);
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
{
|
{
|
||||||
errorType: error.constructor?.name,
|
errorType: error.constructor?.name,
|
||||||
@@ -146,15 +180,21 @@ export class CCXTFetcher {
|
|||||||
ticker,
|
ticker,
|
||||||
since,
|
since,
|
||||||
attempt,
|
attempt,
|
||||||
retryable: isRetryable
|
retryable: isRetryable,
|
||||||
|
rateLimit: isRateLimit
|
||||||
},
|
},
|
||||||
'OHLC fetch attempt failed'
|
'OHLC fetch attempt failed'
|
||||||
);
|
);
|
||||||
if (!isRetryable || attempt === FETCH_RETRIES) break;
|
if (isRateLimit || !isRetryable || attempt === FETCH_RETRIES) break;
|
||||||
await exchange.sleep(FETCH_RETRY_DELAY_MS * attempt);
|
await exchange.sleep(FETCH_RETRY_DELAY_MS * attempt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (lastError) {
|
if (lastError) {
|
||||||
|
if (lastError.constructor?.name === 'RateLimitExceeded') {
|
||||||
|
const retryAfterMs = extractRetryAfterMs(exchange, lastError);
|
||||||
|
this.logger.warn({ ticker, retryAfterMs }, 'OHLC fetch rate-limited by exchange');
|
||||||
|
throw new ExchangeRateLimitError(exchangeName, retryAfterMs, lastError.message);
|
||||||
|
}
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
{
|
{
|
||||||
errorType: lastError.constructor?.name,
|
errorType: lastError.constructor?.name,
|
||||||
@@ -278,6 +318,11 @@ export class CCXTFetcher {
|
|||||||
// Convert to our Tick format
|
// Convert to our Tick format
|
||||||
return trades.map(trade => this.convertToTick(trade, ticker, metadata));
|
return trades.map(trade => this.convertToTick(trade, ticker, metadata));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error.constructor?.name === 'RateLimitExceeded') {
|
||||||
|
const retryAfterMs = extractRetryAfterMs(exchange, error);
|
||||||
|
this.logger.warn({ ticker, retryAfterMs }, 'Trades fetch rate-limited by exchange');
|
||||||
|
throw new ExchangeRateLimitError(exchangeName, retryAfterMs, error.message);
|
||||||
|
}
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
{ error: error.message, ticker },
|
{ error: error.message, ticker },
|
||||||
'Error fetching trades'
|
'Error fetching trades'
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ import { parse as parseYaml } from 'yaml';
|
|||||||
import pino from 'pino';
|
import pino from 'pino';
|
||||||
import { ZmqClient } from './zmq-client.js';
|
import { ZmqClient } from './zmq-client.js';
|
||||||
import { KafkaProducer } from './kafka-producer.js';
|
import { KafkaProducer } from './kafka-producer.js';
|
||||||
import { CCXTFetcher } from './ccxt-fetcher.js';
|
import { CCXTFetcher, ExchangeRateLimitError } from './ccxt-fetcher.js';
|
||||||
import { RealtimePoller } from './realtime-poller.js';
|
import { RealtimePoller } from './realtime-poller.js';
|
||||||
import { SymbolMetadataGenerator } from './symbol-metadata-generator.js';
|
import { SymbolMetadataGenerator } from './symbol-metadata-generator.js';
|
||||||
|
import { SlotType } from './proto/messages.js';
|
||||||
|
|
||||||
// Logger setup
|
// Logger setup
|
||||||
const logger = pino({
|
const logger = pino({
|
||||||
@@ -64,10 +65,162 @@ function loadConfig() {
|
|||||||
supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'],
|
supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'],
|
||||||
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000,
|
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000,
|
||||||
|
|
||||||
|
// Per-exchange slot capacity
|
||||||
|
exchange_capacity: config.exchange_capacity || {
|
||||||
|
BINANCE: { historical_slots: 3, realtime_slots: 5 },
|
||||||
|
KRAKEN: { historical_slots: 2, realtime_slots: 3 },
|
||||||
|
COINBASE: { historical_slots: 2, realtime_slots: 4 }
|
||||||
|
},
|
||||||
|
|
||||||
...secrets
|
...secrets
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manages work slots per exchange per job type.
|
||||||
|
*
|
||||||
|
* Each slot corresponds to one WorkerReady message sent to Flink. Flink consumes
|
||||||
|
* a slot when it dispatches a job. The slot is re-offered (via another WorkerReady)
|
||||||
|
* once the job completes, subject to any rate-limit backoff dictated by the exchange.
|
||||||
|
*/
|
||||||
|
class SlotPool {
|
||||||
|
constructor(exchangeCapacity, zmqClient, logger) {
|
||||||
|
this.zmqClient = zmqClient;
|
||||||
|
this.logger = logger;
|
||||||
|
|
||||||
|
// Key: 'EXCHANGE|TYPE' (e.g. 'BINANCE|HISTORICAL')
|
||||||
|
// Value: { max, active: Set<jobId>, backoffUntil: ms timestamp }
|
||||||
|
this.slots = new Map();
|
||||||
|
|
||||||
|
for (const [exchange, cap] of Object.entries(exchangeCapacity)) {
|
||||||
|
const ex = exchange.toUpperCase();
|
||||||
|
this.slots.set(`${ex}|HISTORICAL`, {
|
||||||
|
max: cap.historical_slots ?? 2,
|
||||||
|
active: new Set(),
|
||||||
|
backoffUntil: 0
|
||||||
|
});
|
||||||
|
this.slots.set(`${ex}|REALTIME`, {
|
||||||
|
max: cap.realtime_slots ?? 3,
|
||||||
|
active: new Set(),
|
||||||
|
backoffUntil: 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// jobId → { exchange, type } for release tracking
|
||||||
|
this.jobMap = new Map();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register the onConnected callback so slot offers are sent on every
|
||||||
|
* TCP (re)connect rather than once at startup. Handles both the initial
|
||||||
|
* connection race (Flink ROUTER not yet ready) and Flink restarts.
|
||||||
|
*/
|
||||||
|
init() {
|
||||||
|
this.zmqClient.onConnected = () => this._offerAllFreeSlots();
|
||||||
|
this.logger.info(
|
||||||
|
{ slots: [...this.slots.entries()].map(([k, v]) => `${k}:${v.max}`) },
|
||||||
|
'Slot pool initialized — will offer slots on connect'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Re-offer all currently-free slots. Called on every TCP (re)connect.
|
||||||
|
* Sends (max - active) WorkerReady messages per exchange+type key.
|
||||||
|
*/
|
||||||
|
async _offerAllFreeSlots() {
|
||||||
|
const summary = [];
|
||||||
|
for (const [key, slot] of this.slots) {
|
||||||
|
const [exchange, type] = key.split('|');
|
||||||
|
const freeCount = slot.max - slot.active.size;
|
||||||
|
for (let i = 0; i < freeCount; i++) {
|
||||||
|
await this.zmqClient.sendTypedReady(exchange, SlotType[type]);
|
||||||
|
}
|
||||||
|
summary.push(`${key}:${freeCount}/${slot.max}`);
|
||||||
|
}
|
||||||
|
this.logger.info({ offered: summary }, 'Re-offered all free slots on connect');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record a slot as occupied by jobId.
|
||||||
|
* @param {string} jobId
|
||||||
|
* @param {string} exchange - e.g. 'BINANCE'
|
||||||
|
* @param {string} type - 'HISTORICAL' | 'REALTIME'
|
||||||
|
*/
|
||||||
|
consumeSlot(jobId, exchange, type) {
|
||||||
|
const key = `${exchange.toUpperCase()}|${type}`;
|
||||||
|
const slot = this.slots.get(key);
|
||||||
|
if (slot) {
|
||||||
|
if (slot.active.size >= slot.max) {
|
||||||
|
this.logger.warn({ jobId, key, active: slot.active.size, max: slot.max }, 'Slot capacity exceeded — rejecting job');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
slot.active.add(jobId);
|
||||||
|
this.jobMap.set(jobId, { exchange: exchange.toUpperCase(), type });
|
||||||
|
this.logger.debug({ jobId, key, active: slot.active.size, max: slot.max }, 'Slot consumed');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
this.logger.warn({ jobId, key }, 'No slot config for this exchange+type');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Release the slot occupied by jobId and re-offer it to Flink (after any backoff).
|
||||||
|
*/
|
||||||
|
async releaseSlot(jobId) {
|
||||||
|
const info = this.jobMap.get(jobId);
|
||||||
|
if (!info) {
|
||||||
|
this.logger.warn({ jobId }, 'releaseSlot called for unknown jobId');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.jobMap.delete(jobId);
|
||||||
|
const key = `${info.exchange}|${info.type}`;
|
||||||
|
const slot = this.slots.get(key);
|
||||||
|
if (slot) {
|
||||||
|
slot.active.delete(jobId);
|
||||||
|
await this._offerSlot(info.exchange, info.type, slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record a rate limit from the exchange. Delays slot re-offer by retryAfterMs.
|
||||||
|
* @param {string} exchange
|
||||||
|
* @param {string} type - 'HISTORICAL' | 'REALTIME'
|
||||||
|
* @param {number} retryAfterMs
|
||||||
|
*/
|
||||||
|
reportRateLimit(exchange, type, retryAfterMs) {
|
||||||
|
const key = `${exchange.toUpperCase()}|${type}`;
|
||||||
|
const slot = this.slots.get(key);
|
||||||
|
if (slot) {
|
||||||
|
slot.backoffUntil = Math.max(slot.backoffUntil, Date.now() + retryAfterMs);
|
||||||
|
this.logger.warn({ exchange, type, retryAfterMs }, 'Rate limit backoff set for slot');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async _offerSlot(exchange, type, slot) {
|
||||||
|
const now = Date.now();
|
||||||
|
if (now < slot.backoffUntil) {
|
||||||
|
const delay = slot.backoffUntil - now;
|
||||||
|
this.logger.info({ exchange, type, delayMs: delay }, 'Slot in backoff — scheduling re-offer');
|
||||||
|
setTimeout(() => this._offerSlot(exchange, type, slot), delay);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await this.zmqClient.sendTypedReady(exchange, SlotType[type]);
|
||||||
|
this.logger.debug({ exchange, type }, 'Slot re-offered to Flink');
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error({ exchange, type, error: err.message }, 'Failed to re-offer slot');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
shutdown() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Extract exchange name from ticker string, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
|
||||||
|
function exchangeOf(ticker) {
|
||||||
|
const lastDot = ticker?.lastIndexOf('.');
|
||||||
|
return (lastDot >= 0) ? ticker.slice(lastDot + 1).toUpperCase() : 'UNKNOWN';
|
||||||
|
}
|
||||||
|
|
||||||
class IngestorWorker {
|
class IngestorWorker {
|
||||||
constructor(config, logger) {
|
constructor(config, logger) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
@@ -92,7 +245,22 @@ class IngestorWorker {
|
|||||||
logger.child({ component: 'poller' })
|
logger.child({ component: 'poller' })
|
||||||
);
|
);
|
||||||
|
|
||||||
// jobId → active realtime subscription (for stop handling)
|
this.pool = new SlotPool(
|
||||||
|
config.exchange_capacity,
|
||||||
|
this.zmqClient,
|
||||||
|
logger.child({ component: 'pool' })
|
||||||
|
);
|
||||||
|
|
||||||
|
// When realtime poller terminates a subscription due to repeated errors, release its slot.
|
||||||
|
this.realtimePoller.onJobComplete = (jobId, error) => {
|
||||||
|
if (error instanceof ExchangeRateLimitError) {
|
||||||
|
this.pool.reportRateLimit(error.exchange, 'REALTIME', error.retryAfterMs);
|
||||||
|
}
|
||||||
|
this.pool.releaseSlot(jobId).catch(err =>
|
||||||
|
this.logger.error({ jobId, error: err.message }, 'Failed to release slot after realtime error'));
|
||||||
|
};
|
||||||
|
|
||||||
|
// jobId set for active realtime subscriptions
|
||||||
this.activeRealtime = new Set();
|
this.activeRealtime = new Set();
|
||||||
|
|
||||||
this.isShutdown = false;
|
this.isShutdown = false;
|
||||||
@@ -108,7 +276,10 @@ class IngestorWorker {
|
|||||||
this.zmqClient.onWorkAssign = req => this.handleWorkAssign(req);
|
this.zmqClient.onWorkAssign = req => this.handleWorkAssign(req);
|
||||||
this.zmqClient.onWorkStop = jobId => this.handleWorkStop(jobId);
|
this.zmqClient.onWorkStop = jobId => this.handleWorkStop(jobId);
|
||||||
|
|
||||||
await this.zmqClient.connect(); // also sends WorkerReady
|
// Register slot offer callback before connecting so we don't miss the event
|
||||||
|
this.pool.init();
|
||||||
|
|
||||||
|
await this.zmqClient.connect();
|
||||||
|
|
||||||
// Generate symbol metadata on startup
|
// Generate symbol metadata on startup
|
||||||
this.logger.info('Generating initial symbol metadata');
|
this.logger.info('Generating initial symbol metadata');
|
||||||
@@ -139,18 +310,26 @@ class IngestorWorker {
|
|||||||
*/
|
*/
|
||||||
handleWorkAssign(request) {
|
handleWorkAssign(request) {
|
||||||
const { jobId, requestId, type, ticker } = request;
|
const { jobId, requestId, type, ticker } = request;
|
||||||
|
const exchange = exchangeOf(ticker);
|
||||||
|
|
||||||
this.logger.info({ jobId, requestId, type, ticker }, 'Received WorkAssign');
|
this.logger.info({ jobId, requestId, type, ticker, exchange }, 'Received WorkAssign');
|
||||||
|
|
||||||
// HISTORICAL_OHLC = 0 (proto3 default, may appear as undefined or 'HISTORICAL_OHLC')
|
|
||||||
const isHistorical = !type || type === 'HISTORICAL_OHLC' || type === 0;
|
const isHistorical = !type || type === 'HISTORICAL_OHLC' || type === 0;
|
||||||
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
||||||
|
|
||||||
if (isHistorical) {
|
if (isHistorical) {
|
||||||
|
if (!this.pool.consumeSlot(jobId, exchange, 'HISTORICAL')) {
|
||||||
|
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
|
||||||
|
return;
|
||||||
|
}
|
||||||
this.handleHistoricalRequest(request).catch(err => {
|
this.handleHistoricalRequest(request).catch(err => {
|
||||||
this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in historical handler');
|
this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in historical handler');
|
||||||
});
|
});
|
||||||
} else if (isRealtime) {
|
} else if (isRealtime) {
|
||||||
|
if (!this.pool.consumeSlot(jobId, exchange, 'REALTIME')) {
|
||||||
|
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
|
||||||
|
return;
|
||||||
|
}
|
||||||
this.handleRealtimeRequest(request);
|
this.handleRealtimeRequest(request);
|
||||||
} else {
|
} else {
|
||||||
this.logger.warn({ jobId, type }, 'Unknown request type — rejecting');
|
this.logger.warn({ jobId, type }, 'Unknown request type — rejecting');
|
||||||
@@ -165,7 +344,9 @@ class IngestorWorker {
|
|||||||
this.logger.info({ jobId }, 'Received WorkStop — cancelling realtime subscription');
|
this.logger.info({ jobId }, 'Received WorkStop — cancelling realtime subscription');
|
||||||
this.realtimePoller.cancelSubscription(jobId);
|
this.realtimePoller.cancelSubscription(jobId);
|
||||||
this.activeRealtime.delete(jobId);
|
this.activeRealtime.delete(jobId);
|
||||||
// No WorkComplete needed — Flink sent the stop, it already knows
|
this.pool.releaseSlot(jobId).catch(err =>
|
||||||
|
this.logger.warn({ jobId, error: err.message }, 'Failed to release slot after WorkStop'));
|
||||||
|
// No WorkComplete needed — Flink sent the stop, it already knows.
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -174,10 +355,14 @@ class IngestorWorker {
|
|||||||
*/
|
*/
|
||||||
async handleHistoricalRequest(request) {
|
async handleHistoricalRequest(request) {
|
||||||
const { jobId, requestId, ticker, historical, clientId: client_id } = request;
|
const { jobId, requestId, ticker, historical, clientId: client_id } = request;
|
||||||
|
const exchange = exchangeOf(ticker);
|
||||||
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical || {};
|
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical || {};
|
||||||
|
|
||||||
this.logger.info({ jobId, requestId, ticker, period_seconds }, 'Processing historical OHLC request');
|
this.logger.info({ jobId, requestId, ticker, period_seconds }, 'Processing historical OHLC request');
|
||||||
|
|
||||||
|
// Immediately ack to reset Flink's dispatch-time timeout clock.
|
||||||
|
await this.zmqClient.sendHeartbeat(jobId);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
||||||
ticker, start_time, end_time, period_seconds, limit
|
ticker, start_time, end_time, period_seconds, limit
|
||||||
@@ -193,7 +378,10 @@ class IngestorWorker {
|
|||||||
const isLastPage = (i + PAGE_SIZE) >= candles.length;
|
const isLastPage = (i + PAGE_SIZE) >= candles.length;
|
||||||
await this.kafkaProducer.writeOHLCs(this.config.kafka_ohlc_topic, page, metadata, isLastPage);
|
await this.kafkaProducer.writeOHLCs(this.config.kafka_ohlc_topic, page, metadata, isLastPage);
|
||||||
}
|
}
|
||||||
this.logger.info({ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) }, 'Wrote all pages to Kafka');
|
this.logger.info(
|
||||||
|
{ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) },
|
||||||
|
'Wrote all pages to Kafka'
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
||||||
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
||||||
@@ -207,6 +395,10 @@ class IngestorWorker {
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.error({ jobId, requestId, ticker, error: error.message }, 'Historical request failed');
|
this.logger.error({ jobId, requestId, ticker, error: error.message }, 'Historical request failed');
|
||||||
|
|
||||||
|
if (error instanceof ExchangeRateLimitError) {
|
||||||
|
this.pool.reportRateLimit(exchange, 'HISTORICAL', error.retryAfterMs);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
||||||
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
||||||
@@ -218,11 +410,14 @@ class IngestorWorker {
|
|||||||
|
|
||||||
await this.zmqClient.sendComplete(jobId, false, error.message);
|
await this.zmqClient.sendComplete(jobId, false, error.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Release slot regardless of success or failure
|
||||||
|
this.pool.releaseSlot(jobId).catch(err =>
|
||||||
|
this.logger.error({ jobId, error: err.message }, 'Failed to release historical slot'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start realtime tick polling for a job dispatched by Flink.
|
* Start realtime tick polling for a job dispatched by Flink.
|
||||||
* Ticks flow: exchange → Kafka market-tick → Flink → OHLC bars → clients.
|
|
||||||
*/
|
*/
|
||||||
handleRealtimeRequest(request) {
|
handleRealtimeRequest(request) {
|
||||||
const { jobId, requestId, ticker } = request;
|
const { jobId, requestId, ticker } = request;
|
||||||
@@ -247,6 +442,7 @@ class IngestorWorker {
|
|||||||
|
|
||||||
if (this.metadataInterval) clearInterval(this.metadataInterval);
|
if (this.metadataInterval) clearInterval(this.metadataInterval);
|
||||||
|
|
||||||
|
this.pool.shutdown();
|
||||||
this.realtimePoller.shutdown();
|
this.realtimePoller.shutdown();
|
||||||
await this.ccxtFetcher.close();
|
await this.ccxtFetcher.close();
|
||||||
await this.metadataGenerator.close();
|
await this.metadataGenerator.close();
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ export class RealtimePoller {
|
|||||||
|
|
||||||
this.pollingLoop = null;
|
this.pollingLoop = null;
|
||||||
this.heartbeatLoop = null;
|
this.heartbeatLoop = null;
|
||||||
|
|
||||||
|
// Called with (jobId, error) when a subscription terminates abnormally.
|
||||||
|
// Set by IngestorWorker to release the slot in SlotPool.
|
||||||
|
this.onJobComplete = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -147,6 +151,7 @@ export class RealtimePoller {
|
|||||||
} catch (zmqErr) {
|
} catch (zmqErr) {
|
||||||
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
|
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
|
||||||
}
|
}
|
||||||
|
if (this.onJobComplete) this.onJobComplete(jobId, error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,63 +28,61 @@ export class ZmqClient {
|
|||||||
|
|
||||||
this.dealerSocket = null;
|
this.dealerSocket = null;
|
||||||
this.isShutdown = false;
|
this.isShutdown = false;
|
||||||
this.activeJobId = null;
|
|
||||||
this._idleHeartbeatInterval = null;
|
|
||||||
|
|
||||||
this.supportedExchanges = (config.supported_exchanges || ['BINANCE', 'COINBASE'])
|
// Callbacks set by IngestorWorker / SlotPool
|
||||||
.map(e => e.toUpperCase());
|
|
||||||
|
|
||||||
// Callbacks set by IngestorWorker
|
|
||||||
this.onWorkAssign = null; // (DataRequest) => void
|
this.onWorkAssign = null; // (DataRequest) => void
|
||||||
this.onWorkStop = null; // (jobId) => void
|
this.onWorkStop = null; // (jobId) => void
|
||||||
|
this.onConnected = null; // async () => void — fires on initial connect AND reconnect
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connect DEALER socket to Flink IngestorBroker (ROUTER).
|
* Connect DEALER socket to Flink IngestorBroker (ROUTER).
|
||||||
* Sends WorkerReady immediately so Flink knows this worker is available.
|
* Fires onConnected on every TCP (re)connect so SlotPool can re-offer slots.
|
||||||
*/
|
*/
|
||||||
async connect() {
|
async connect() {
|
||||||
const { flink_hostname, ingestor_broker_port = 5567 } = this.config;
|
const { flink_hostname, ingestor_broker_port = 5567 } = this.config;
|
||||||
|
|
||||||
this.dealerSocket = new zmq.Dealer();
|
this.dealerSocket = new zmq.Dealer();
|
||||||
const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
|
|
||||||
await this.dealerSocket.connect(endpoint);
|
|
||||||
this.logger.info(`Connected DEALER to Flink IngestorBroker at ${endpoint}`);
|
|
||||||
|
|
||||||
// Register as available
|
// Subscribe to connection events BEFORE calling connect() so we catch the
|
||||||
await this.sendReady();
|
// initial establishment. The 'connect' event fires on initial TCP handshake
|
||||||
|
// and again after every ZMQ reconnect (e.g. Flink restart).
|
||||||
// Periodically re-send WorkerReady when idle, to recover from missed initial registration
|
this.dealerSocket.events.on('connect', ({ address }) => {
|
||||||
this._idleHeartbeatInterval = setInterval(() => {
|
this.logger.info({ address }, 'DEALER connected to broker');
|
||||||
if (this.activeJobId === null && !this.isShutdown) {
|
if (this.onConnected) {
|
||||||
this.sendReady().catch(err =>
|
this.onConnected().catch(err =>
|
||||||
this.logger.warn({ error: err.message }, 'Failed to re-send WorkerReady'));
|
this.logger.error({ error: err.message }, 'onConnected callback failed'));
|
||||||
}
|
}
|
||||||
}, 30_000);
|
});
|
||||||
|
|
||||||
|
const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
|
||||||
|
this.dealerSocket.connect(endpoint);
|
||||||
|
this.logger.info(`Connecting DEALER to Flink IngestorBroker at ${endpoint}`);
|
||||||
|
|
||||||
// Start receiving work in background
|
// Start receiving work in background
|
||||||
this._receiveLoop();
|
this._receiveLoop();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send WorkerReady — called on connect and after each COMPLETE.
|
* Send one typed WorkerReady slot offer.
|
||||||
|
* @param {string} exchange - Exchange name (e.g. 'BINANCE')
|
||||||
|
* @param {number} slotType - SlotType enum value (0=ANY, 1=HISTORICAL, 2=REALTIME)
|
||||||
*/
|
*/
|
||||||
async sendReady() {
|
async sendTypedReady(exchange, slotType) {
|
||||||
const frames = encodeBrokerMessage(
|
const frames = encodeBrokerMessage(
|
||||||
MessageTypeId.WORKER_READY,
|
MessageTypeId.WORKER_READY,
|
||||||
{ exchanges: this.supportedExchanges },
|
{ exchanges: [exchange], jobType: slotType },
|
||||||
WorkerReady
|
WorkerReady
|
||||||
);
|
);
|
||||||
await this.dealerSocket.send(frames);
|
await this.dealerSocket.send(frames);
|
||||||
this.logger.info({ exchanges: this.supportedExchanges }, 'Sent WorkerReady');
|
this.logger.debug({ exchange, slotType }, 'Sent WorkerReady slot offer');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send WorkComplete after a historical job finishes.
|
* Send WorkComplete after a historical job finishes.
|
||||||
* Automatically sends WorkerReady so Flink returns us to the free pool.
|
* Slot re-registration is handled by SlotPool after this call.
|
||||||
*/
|
*/
|
||||||
async sendComplete(jobId, success, errorMessage) {
|
async sendComplete(jobId, success, errorMessage) {
|
||||||
this.activeJobId = null;
|
|
||||||
const frames = encodeBrokerMessage(
|
const frames = encodeBrokerMessage(
|
||||||
MessageTypeId.WORK_COMPLETE,
|
MessageTypeId.WORK_COMPLETE,
|
||||||
{
|
{
|
||||||
@@ -96,9 +94,6 @@ export class ZmqClient {
|
|||||||
);
|
);
|
||||||
await this.dealerSocket.send(frames);
|
await this.dealerSocket.send(frames);
|
||||||
this.logger.info({ jobId, success }, 'Sent WorkComplete');
|
this.logger.info({ jobId, success }, 'Sent WorkComplete');
|
||||||
|
|
||||||
// Return to free pool
|
|
||||||
await this.sendReady();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -153,12 +148,10 @@ export class ZmqClient {
|
|||||||
const payload = frames[2].slice(1);
|
const payload = frames[2].slice(1);
|
||||||
|
|
||||||
if (typeId === MessageTypeId.WORK_ASSIGN) {
|
if (typeId === MessageTypeId.WORK_ASSIGN) {
|
||||||
// DataRequest protobuf
|
|
||||||
const request = DataRequest.decode(payload);
|
const request = DataRequest.decode(payload);
|
||||||
const req = DataRequest.toObject(request, {
|
const req = DataRequest.toObject(request, {
|
||||||
longs: String, enums: String, bytes: Buffer
|
longs: String, enums: String, bytes: Buffer
|
||||||
});
|
});
|
||||||
this.activeJobId = req.jobId;
|
|
||||||
this.logger.info(
|
this.logger.info(
|
||||||
{ jobId: req.jobId, requestId: req.requestId, type: req.type, ticker: req.ticker },
|
{ jobId: req.jobId, requestId: req.requestId, type: req.type, ticker: req.ticker },
|
||||||
'Received WorkAssign from broker'
|
'Received WorkAssign from broker'
|
||||||
@@ -192,10 +185,6 @@ export class ZmqClient {
|
|||||||
|
|
||||||
async shutdown() {
|
async shutdown() {
|
||||||
this.isShutdown = true;
|
this.isShutdown = true;
|
||||||
if (this._idleHeartbeatInterval) {
|
|
||||||
clearInterval(this._idleHeartbeatInterval);
|
|
||||||
this._idleHeartbeatInterval = null;
|
|
||||||
}
|
|
||||||
this.logger.info('Shutting down ZMQ DEALER connection');
|
this.logger.info('Shutting down ZMQ DEALER connection');
|
||||||
if (this.dealerSocket) {
|
if (this.dealerSocket) {
|
||||||
this.dealerSocket.close();
|
this.dealerSocket.close();
|
||||||
|
|||||||
@@ -333,12 +333,27 @@ message FieldValue {
|
|||||||
|
|
||||||
// ─── Ingestor Broker Protocol (Flink ROUTER ↔ Ingestor DEALER, port 5567) ───
|
// ─── Ingestor Broker Protocol (Flink ROUTER ↔ Ingestor DEALER, port 5567) ───
|
||||||
// Message type IDs 0x20–0x25
|
// Message type IDs 0x20–0x25
|
||||||
|
//
|
||||||
|
// Capacity model: each WorkerReady is ONE slot offer for a specific exchange
|
||||||
|
// and job type. The ingestor sends N WorkerReady messages at startup (one per
|
||||||
|
// available slot) and re-sends one after each job completes, subject to any
|
||||||
|
// rate-limit backoff.
|
||||||
|
|
||||||
// Ingestor → Flink: register as available (type 0x20)
|
// Job type for a slot offer or assignment.
|
||||||
// Sent on DEALER connect and after every COMPLETE.
|
enum SlotType {
|
||||||
|
ANY = 0; // accepts any job type
|
||||||
|
HISTORICAL = 1; // historical OHLC fetch slot
|
||||||
|
REALTIME = 2; // realtime tick subscription slot
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ingestor → Flink: offer one work slot (type 0x20)
|
||||||
|
// Sent once per available slot at startup and after each job completes.
|
||||||
|
// One WorkerReady = one slot for one exchange and one job type.
|
||||||
message WorkerReady {
|
message WorkerReady {
|
||||||
// Exchanges this ingestor supports (e.g. ["BINANCE", "COINBASE"])
|
// Exchange this slot handles (single entry, e.g. ["BINANCE"])
|
||||||
repeated string exchanges = 1;
|
repeated string exchanges = 1;
|
||||||
|
// Job type this slot accepts
|
||||||
|
SlotType job_type = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ingestor → Flink: historical job finished (type 0x21)
|
// Ingestor → Flink: historical job finished (type 0x21)
|
||||||
|
|||||||
@@ -510,3 +510,44 @@ def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dic
|
|||||||
log.info(f"Conda package sync complete: {len(result['removed'])} packages removed")
|
log.info(f"Conda package sync complete: {len(result['removed'])} packages removed")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Async wrappers — non-blocking equivalents for use from asyncio contexts
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
import asyncio as _asyncio
|
||||||
|
|
||||||
|
|
||||||
|
async def get_installed_packages_async() -> Set[str]:
|
||||||
|
"""Non-blocking wrapper around get_installed_packages()."""
|
||||||
|
return await _asyncio.to_thread(get_installed_packages)
|
||||||
|
|
||||||
|
|
||||||
|
async def install_packages_async(
|
||||||
|
packages: list[str],
|
||||||
|
data_dir: Optional[Path] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Non-blocking wrapper around install_packages()."""
|
||||||
|
return await _asyncio.to_thread(install_packages, packages, data_dir)
|
||||||
|
|
||||||
|
|
||||||
|
async def remove_packages_async(packages: list[str]) -> dict:
|
||||||
|
"""Non-blocking wrapper around remove_packages()."""
|
||||||
|
return await _asyncio.to_thread(remove_packages, packages)
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_extra_packages_async(
|
||||||
|
data_dir: Path,
|
||||||
|
environment_yml: Optional[Path] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Non-blocking wrapper around cleanup_extra_packages()."""
|
||||||
|
return await _asyncio.to_thread(cleanup_extra_packages, data_dir, environment_yml)
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_packages_async(
|
||||||
|
data_dir: Path,
|
||||||
|
environment_yml: Optional[Path] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Non-blocking wrapper around sync_packages()."""
|
||||||
|
return await _asyncio.to_thread(sync_packages, data_dir, environment_yml)
|
||||||
|
|||||||
54
sandbox/dexorder/event_loop.py
Normal file
54
sandbox/dexorder/event_loop.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
"""
|
||||||
|
Thread-safe asyncio.run() for the sandbox.
|
||||||
|
|
||||||
|
Installs a global replacement for asyncio.run() that, when called from a
|
||||||
|
non-async thread while uvicorn's event loop is running, dispatches the
|
||||||
|
coroutine to that loop via run_coroutine_threadsafe(). The calling thread
|
||||||
|
blocks on future.result() — releasing the GIL — so uvicorn's loop runs
|
||||||
|
freely (health checks, MCP requests, etc.).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from dexorder.event_loop import install_thread_safe_asyncio_run
|
||||||
|
install_thread_safe_asyncio_run(asyncio.get_running_loop()) # call once at startup
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_main_loop: asyncio.AbstractEventLoop | None = None
|
||||||
|
_original_asyncio_run = asyncio.run
|
||||||
|
|
||||||
|
|
||||||
|
def install_thread_safe_asyncio_run(loop: asyncio.AbstractEventLoop) -> None:
|
||||||
|
"""
|
||||||
|
Patch asyncio.run globally to cooperate with uvicorn's event loop.
|
||||||
|
Call once from the lifespan startup (main thread, loop already running).
|
||||||
|
"""
|
||||||
|
global _main_loop
|
||||||
|
_main_loop = loop
|
||||||
|
|
||||||
|
def _thread_safe_run(coro, *, debug=None):
|
||||||
|
# Detect if we're in a thread (no running loop in this thread)
|
||||||
|
try:
|
||||||
|
asyncio.get_running_loop()
|
||||||
|
# We're already inside an async context — asyncio.run() is not
|
||||||
|
# valid here regardless; let it raise the normal error.
|
||||||
|
raise RuntimeError(
|
||||||
|
"asyncio.run() cannot be called when another event loop is running "
|
||||||
|
"in the same thread."
|
||||||
|
)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
if "cannot be called" in str(exc):
|
||||||
|
raise
|
||||||
|
# No running loop in this thread — safe to dispatch to main loop.
|
||||||
|
if _main_loop is not None and _main_loop.is_running():
|
||||||
|
log.debug("asyncio.run() from thread → run_coroutine_threadsafe")
|
||||||
|
return asyncio.run_coroutine_threadsafe(coro, _main_loop).result()
|
||||||
|
|
||||||
|
# Fallback: main loop not available (e.g., called before startup or in tests)
|
||||||
|
return _original_asyncio_run(coro, debug=debug)
|
||||||
|
|
||||||
|
asyncio.run = _thread_safe_run
|
||||||
|
log.info("Installed thread-safe asyncio.run()")
|
||||||
@@ -5,6 +5,8 @@ Tickers use Nautilus format: "BTC/USDT.BINANCE"
|
|||||||
All timestamps are nanoseconds since epoch.
|
All timestamps are nanoseconds since epoch.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import tracemalloc
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional, List, Tuple
|
from typing import Optional, List, Tuple
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import logging
|
import logging
|
||||||
@@ -19,6 +21,19 @@ from pyiceberg.expressions import (
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _rss_mb() -> str:
|
||||||
|
"""Return current VmRSS and VmPeak from /proc/self/status as a short string."""
|
||||||
|
try:
|
||||||
|
info = {}
|
||||||
|
for line in Path("/proc/self/status").read_text().splitlines():
|
||||||
|
for key in ("VmRSS", "VmPeak", "VmSize"):
|
||||||
|
if line.startswith(f"{key}:"):
|
||||||
|
info[key] = int(line.split()[1]) // 1024 # kB → MB
|
||||||
|
return f"RSS={info.get('VmRSS','?')}MB peak={info.get('VmPeak','?')}MB virt={info.get('VmSize','?')}MB"
|
||||||
|
except Exception:
|
||||||
|
return "?"
|
||||||
|
|
||||||
|
|
||||||
class IcebergClient:
|
class IcebergClient:
|
||||||
"""
|
"""
|
||||||
Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1).
|
Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1).
|
||||||
@@ -114,8 +129,21 @@ class IcebergClient:
|
|||||||
if fetch_columns is not None:
|
if fetch_columns is not None:
|
||||||
scan = scan.select(*fetch_columns)
|
scan = scan.select(*fetch_columns)
|
||||||
|
|
||||||
|
if not tracemalloc.is_tracing():
|
||||||
|
tracemalloc.start()
|
||||||
|
tm_before = tracemalloc.take_snapshot()
|
||||||
|
log.info("MEM before scan.to_pandas(): %s", _rss_mb())
|
||||||
|
|
||||||
df = scan.to_pandas()
|
df = scan.to_pandas()
|
||||||
|
|
||||||
|
log.info("MEM after scan.to_pandas(): %s | rows=%d cols=%s mem=%dMB",
|
||||||
|
_rss_mb(), len(df), list(df.columns),
|
||||||
|
df.memory_usage(deep=True).sum() // (1024 * 1024))
|
||||||
|
tm_after = tracemalloc.take_snapshot()
|
||||||
|
top = tm_after.compare_to(tm_before, "lineno")
|
||||||
|
for stat in top[:5]:
|
||||||
|
log.info("TRACEMALLOC: %s", stat)
|
||||||
|
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
# Deduplicate: keep the most-recently-ingested row per timestamp.
|
# Deduplicate: keep the most-recently-ingested row per timestamp.
|
||||||
if "ingested_at" in df.columns:
|
if "ingested_at" in df.columns:
|
||||||
@@ -123,6 +151,7 @@ class IcebergClient:
|
|||||||
df.sort_values("ingested_at", ascending=False)
|
df.sort_values("ingested_at", ascending=False)
|
||||||
.drop_duplicates(subset=["timestamp"])
|
.drop_duplicates(subset=["timestamp"])
|
||||||
)
|
)
|
||||||
|
log.info("MEM after dedup: %s | rows=%d", _rss_mb(), len(df))
|
||||||
# Drop ingested_at if the caller did not ask for it
|
# Drop ingested_at if the caller did not ask for it
|
||||||
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
|
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
|
||||||
df = df.drop(columns=["ingested_at"])
|
df = df.drop(columns=["ingested_at"])
|
||||||
|
|||||||
85
sandbox/dexorder/memory_guard.py
Normal file
85
sandbox/dexorder/memory_guard.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""
|
||||||
|
Memory guard for sandbox containers.
|
||||||
|
|
||||||
|
Sets a soft RLIMIT_AS limit derived from the cgroup memory limit at a
|
||||||
|
configurable fraction, so Python raises MemoryError before the kernel's
|
||||||
|
OOM killer fires. The MCP session survives; only the tool call fails.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import gc
|
||||||
|
import logging
|
||||||
|
import resource
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_cgroup_limit_bytes() -> int | None:
|
||||||
|
"""Read container memory.max from cgroup v2. Returns bytes or None."""
|
||||||
|
try:
|
||||||
|
val = Path("/sys/fs/cgroup/memory.max").read_text().strip()
|
||||||
|
if val == "max":
|
||||||
|
return None
|
||||||
|
return int(val)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def setup_memory_limit(fraction: float) -> None:
|
||||||
|
"""
|
||||||
|
Set RLIMIT_AS soft limit to baseline VmSize + allowed growth.
|
||||||
|
|
||||||
|
RLIMIT_AS caps total virtual address space, which includes shared libraries
|
||||||
|
and memory-mapped files that don't consume physical RAM. The baseline VmSize
|
||||||
|
at startup can be 3+ GB even when RSS is only ~200 MB. Setting the limit to
|
||||||
|
a flat cgroup fraction would crash immediately.
|
||||||
|
|
||||||
|
Instead: limit = current VmSize + (cgroup_limit * fraction)
|
||||||
|
This allows `fraction` worth of new allocations (numpy arrays, pandas
|
||||||
|
dataframes, etc.) above the startup baseline before raising MemoryError.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fraction: Proportion of cgroup memory.max to allow as new growth, e.g. 0.85.
|
||||||
|
"""
|
||||||
|
cgroup_bytes = _read_cgroup_limit_bytes()
|
||||||
|
|
||||||
|
# Read baseline VmSize (total virtual address space at startup)
|
||||||
|
vmsize_bytes: int | None = None
|
||||||
|
try:
|
||||||
|
for line in Path("/proc/self/status").read_text().splitlines():
|
||||||
|
if line.startswith("VmSize:"):
|
||||||
|
vmsize_bytes = int(line.split()[1]) * 1024 # kB → bytes
|
||||||
|
log.info("Memory baseline: %s", line.strip())
|
||||||
|
elif line.startswith("VmRSS:"):
|
||||||
|
log.info("Memory baseline: %s", line.strip())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if cgroup_bytes is None:
|
||||||
|
log.warning("cgroup memory.max is unlimited; RLIMIT_AS not set")
|
||||||
|
return
|
||||||
|
|
||||||
|
allowed_growth_bytes = int(cgroup_bytes * fraction)
|
||||||
|
baseline = vmsize_bytes or 0
|
||||||
|
limit_bytes = baseline + allowed_growth_bytes
|
||||||
|
|
||||||
|
_, hard = resource.getrlimit(resource.RLIMIT_AS)
|
||||||
|
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, hard))
|
||||||
|
log.info(
|
||||||
|
"RLIMIT_AS soft limit set to %d MB (baseline %d MB + allowed growth %d MB, %.0f%% of cgroup %d MB)",
|
||||||
|
limit_bytes // (1024 * 1024),
|
||||||
|
baseline // (1024 * 1024),
|
||||||
|
allowed_growth_bytes // (1024 * 1024),
|
||||||
|
fraction * 100,
|
||||||
|
cgroup_bytes // (1024 * 1024),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_memory() -> None:
|
||||||
|
"""
|
||||||
|
Called after a MemoryError is caught in a tool execution thread.
|
||||||
|
Runs gc.collect() to free objects held by the failed script.
|
||||||
|
Hook here for future recovery strategies (cache eviction, etc.).
|
||||||
|
"""
|
||||||
|
log.warning("MemoryError in tool thread — running gc.collect()")
|
||||||
|
gc.collect()
|
||||||
186
sandbox/dexorder/tools/backtest_harness.py
Normal file
186
sandbox/dexorder/tools/backtest_harness.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
backtest_harness — runs a strategy backtest as a subprocess.
|
||||||
|
|
||||||
|
Reads a JSON config from stdin:
|
||||||
|
{
|
||||||
|
"strategy_name": str,
|
||||||
|
"feeds": [{"symbol": str, "period_seconds": int}, ...],
|
||||||
|
"from_time": ...,
|
||||||
|
"to_time": ...,
|
||||||
|
"initial_capital": float,
|
||||||
|
"paper": bool
|
||||||
|
}
|
||||||
|
|
||||||
|
Outputs JSON to stdout on success:
|
||||||
|
{
|
||||||
|
"strategy_name": str,
|
||||||
|
"feeds": [...],
|
||||||
|
"initial_capital": float,
|
||||||
|
"paper": bool,
|
||||||
|
"total_candles": int,
|
||||||
|
... (metrics from run_backtest)
|
||||||
|
}
|
||||||
|
|
||||||
|
On error:
|
||||||
|
{"error": str}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ensure dexorder package is importable when run as a subprocess
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
|
||||||
|
|
||||||
|
_OHLC_EXTRA_COLUMNS = [
|
||||||
|
"volume", "buy_vol", "sell_vol",
|
||||||
|
"open_time", "high_time", "low_time", "close_time",
|
||||||
|
"open_interest",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def _run(cfg: dict) -> dict:
|
||||||
|
strategy_name = cfg["strategy_name"]
|
||||||
|
feeds = cfg["feeds"]
|
||||||
|
from_time = cfg.get("from_time")
|
||||||
|
to_time = cfg.get("to_time")
|
||||||
|
initial_capital = float(cfg.get("initial_capital", 10_000.0))
|
||||||
|
paper = bool(cfg.get("paper", True))
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Initialize API
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml")
|
||||||
|
secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml")
|
||||||
|
|
||||||
|
config_data = {}
|
||||||
|
secrets_data = {}
|
||||||
|
if Path(config_path).exists():
|
||||||
|
with open(config_path) as f:
|
||||||
|
config_data = yaml.safe_load(f) or {}
|
||||||
|
if Path(secrets_path).exists():
|
||||||
|
with open(secrets_path) as f:
|
||||||
|
secrets_data = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
|
data_cfg = config_data.get("data", {})
|
||||||
|
iceberg_cfg = data_cfg.get("iceberg", {})
|
||||||
|
relay_cfg = data_cfg.get("relay", {})
|
||||||
|
|
||||||
|
from dexorder.api import set_api, API
|
||||||
|
from dexorder.impl.charting_api_impl import ChartingAPIImpl
|
||||||
|
from dexorder.impl.data_api_impl import DataAPIImpl
|
||||||
|
|
||||||
|
data_api = DataAPIImpl(
|
||||||
|
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
|
||||||
|
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
|
||||||
|
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
|
||||||
|
namespace=iceberg_cfg.get("namespace", "trading"),
|
||||||
|
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
|
||||||
|
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
|
||||||
|
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
|
||||||
|
s3_region=iceberg_cfg.get("s3_region") or secrets_data.get("s3_region"),
|
||||||
|
request_timeout=240.0,
|
||||||
|
)
|
||||||
|
set_api(API(charting=ChartingAPIImpl(), data=data_api))
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"API initialization failed: {e}"}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Locate strategy
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
|
||||||
|
try:
|
||||||
|
from dexorder.tools.python_tools import get_category_manager, sanitize_name
|
||||||
|
category_manager = get_category_manager(data_dir)
|
||||||
|
safe_name = sanitize_name(strategy_name)
|
||||||
|
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
|
||||||
|
if not impl_path.exists():
|
||||||
|
return {"error": f"Strategy '{strategy_name}' not found (looked at {impl_path})"}
|
||||||
|
except Exception as exc:
|
||||||
|
return {"error": f"Failed to locate strategy: {exc}"}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Register custom indicators and load strategy class
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
try:
|
||||||
|
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
|
||||||
|
_setup_custom_indicators(category_manager.src_dir)
|
||||||
|
except Exception as exc:
|
||||||
|
sys.stderr.write(f"WARNING: custom indicator setup failed: {exc}\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dexorder.nautilus.backtest_runner import _load_strategy_class
|
||||||
|
strategy_class = _load_strategy_class(impl_path)
|
||||||
|
except Exception:
|
||||||
|
return {"error": f"Strategy load failed:\n{traceback.format_exc()}"}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Fetch OHLC data
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
from dexorder.api import get_api
|
||||||
|
from dexorder.nautilus.pandas_strategy import make_feed_key
|
||||||
|
|
||||||
|
api = get_api()
|
||||||
|
parsed_feeds = [(f["symbol"], int(f["period_seconds"])) for f in feeds]
|
||||||
|
ohlc_dfs = {}
|
||||||
|
total_candles = 0
|
||||||
|
|
||||||
|
for ticker, period_seconds in parsed_feeds:
|
||||||
|
feed_key = make_feed_key(ticker, period_seconds)
|
||||||
|
try:
|
||||||
|
df = await api.data.historical_ohlc(
|
||||||
|
ticker=ticker,
|
||||||
|
period_seconds=period_seconds,
|
||||||
|
start_time=from_time,
|
||||||
|
end_time=to_time,
|
||||||
|
extra_columns=_OHLC_EXTRA_COLUMNS,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return {"error": f"OHLC fetch failed for {feed_key}: {exc}"}
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
return {"error": f"No OHLC data for {feed_key} in the requested range"}
|
||||||
|
|
||||||
|
ohlc_dfs[feed_key] = df
|
||||||
|
total_candles += len(df)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Run backtest (synchronous)
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
try:
|
||||||
|
from dexorder.nautilus.backtest_runner import run_backtest
|
||||||
|
metrics = run_backtest(
|
||||||
|
strategy_class=strategy_class,
|
||||||
|
feeds=parsed_feeds,
|
||||||
|
ohlc_dfs=ohlc_dfs,
|
||||||
|
initial_capital=initial_capital,
|
||||||
|
paper=paper,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return {"error": f"Backtest failed:\n{traceback.format_exc()}"}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"strategy_name": strategy_name,
|
||||||
|
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
|
||||||
|
"initial_capital": initial_capital,
|
||||||
|
"paper": paper,
|
||||||
|
"total_candles": total_candles,
|
||||||
|
**metrics,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cfg = json.loads(sys.stdin.read())
|
||||||
|
result = asyncio.run(_run(cfg))
|
||||||
|
print(json.dumps(result))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,25 +1,21 @@
|
|||||||
"""
|
"""
|
||||||
backtest_strategy — run a PandasStrategy against historical OHLC data.
|
backtest_strategy — run a PandasStrategy against historical OHLC data.
|
||||||
|
|
||||||
Called directly from the MCP server's async handle_tool_call.
|
Spawns backtest_harness.py as a subprocess so user strategy code is isolated
|
||||||
|
from the MCP server process. The harness handles API init, data fetch, and
|
||||||
Returns a JSON payload with backtest metrics and equity curve, following the
|
the synchronous BacktestEngine internally.
|
||||||
same pattern as evaluate_indicator.py.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# All OHLC+ columns to request from the DataAPI
|
_BACKTEST_HARNESS = Path(__file__).parent / "backtest_harness.py"
|
||||||
_OHLC_EXTRA_COLUMNS = [
|
|
||||||
"volume", "buy_vol", "sell_vol",
|
|
||||||
"open_time", "high_time", "low_time", "close_time",
|
|
||||||
"open_interest",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
async def backtest_strategy(
|
async def backtest_strategy(
|
||||||
@@ -42,23 +38,8 @@ async def backtest_strategy(
|
|||||||
paper: Always True for historical backtest (flag reserved for forward testing)
|
paper: Always True for historical backtest (flag reserved for forward testing)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list[TextContent] with JSON payload:
|
list[TextContent] with JSON payload containing backtest metrics.
|
||||||
{
|
On error: [TextContent] with {"error": str}
|
||||||
"strategy_name": str,
|
|
||||||
"feeds": [...],
|
|
||||||
"initial_capital": float,
|
|
||||||
"paper": bool,
|
|
||||||
"total_candles": int,
|
|
||||||
"total_return": float, # fractional (0.15 = +15%)
|
|
||||||
"sharpe_ratio": float,
|
|
||||||
"max_drawdown": float, # fractional (0.10 = 10% drawdown)
|
|
||||||
"win_rate": float,
|
|
||||||
"trade_count": int,
|
|
||||||
"equity_curve": [{"timestamp": int, "equity": float}, ...]
|
|
||||||
}
|
|
||||||
|
|
||||||
On error:
|
|
||||||
{"error": str}
|
|
||||||
"""
|
"""
|
||||||
from mcp.types import TextContent
|
from mcp.types import TextContent
|
||||||
|
|
||||||
@@ -66,102 +47,52 @@ async def backtest_strategy(
|
|||||||
log.error("backtest_strategy '%s': %s", strategy_name, msg)
|
log.error("backtest_strategy '%s': %s", strategy_name, msg)
|
||||||
return [TextContent(type="text", text=json.dumps({"error": msg}))]
|
return [TextContent(type="text", text=json.dumps({"error": msg}))]
|
||||||
|
|
||||||
# --- 1. Validate feeds input ---
|
|
||||||
if not feeds:
|
if not feeds:
|
||||||
return _err("feeds list is empty — provide at least one {symbol, period_seconds} entry")
|
return _err("feeds list is empty — provide at least one {symbol, period_seconds} entry")
|
||||||
|
|
||||||
parsed_feeds: list[tuple[str, int]] = []
|
|
||||||
for f in feeds:
|
for f in feeds:
|
||||||
sym = f.get("symbol", "")
|
if not f.get("symbol"):
|
||||||
ps = f.get("period_seconds", 3600)
|
|
||||||
if not sym:
|
|
||||||
return _err(f"Feed entry missing 'symbol': {f}")
|
return _err(f"Feed entry missing 'symbol': {f}")
|
||||||
parsed_feeds.append((sym, int(ps)))
|
|
||||||
|
|
||||||
# --- 2. Resolve strategy implementation file ---
|
cfg = {
|
||||||
try:
|
|
||||||
from dexorder.tools.python_tools import get_category_manager, sanitize_name
|
|
||||||
category_manager = get_category_manager()
|
|
||||||
safe_name = sanitize_name(strategy_name)
|
|
||||||
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
|
|
||||||
if not impl_path.exists():
|
|
||||||
return _err(f"Strategy '{strategy_name}' not found (looked at {impl_path})")
|
|
||||||
except Exception as exc:
|
|
||||||
return _err(f"Failed to locate strategy: {exc}")
|
|
||||||
|
|
||||||
# --- 3. Register custom indicators with pandas-ta ---
|
|
||||||
try:
|
|
||||||
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
|
|
||||||
_setup_custom_indicators(category_manager.src_dir)
|
|
||||||
except Exception as exc:
|
|
||||||
log.warning("backtest_strategy: custom indicator setup failed: %s", exc)
|
|
||||||
|
|
||||||
# --- 4. Load strategy class ---
|
|
||||||
try:
|
|
||||||
from dexorder.nautilus.backtest_runner import _load_strategy_class
|
|
||||||
strategy_class = _load_strategy_class(impl_path)
|
|
||||||
except Exception as exc:
|
|
||||||
log.exception("backtest_strategy: strategy load failed")
|
|
||||||
return _err(f"Strategy load failed: {exc}")
|
|
||||||
|
|
||||||
# --- 5. Fetch OHLC+ data for each feed ---
|
|
||||||
try:
|
|
||||||
from dexorder.api import get_api
|
|
||||||
api = get_api()
|
|
||||||
except Exception as exc:
|
|
||||||
return _err(f"API not available: {exc}")
|
|
||||||
|
|
||||||
ohlc_dfs: dict[str, Any] = {}
|
|
||||||
total_candles = 0
|
|
||||||
|
|
||||||
for ticker, period_seconds in parsed_feeds:
|
|
||||||
from dexorder.nautilus.pandas_strategy import make_feed_key
|
|
||||||
feed_key = make_feed_key(ticker, period_seconds)
|
|
||||||
try:
|
|
||||||
df = await api.data.historical_ohlc(
|
|
||||||
ticker=ticker,
|
|
||||||
period_seconds=period_seconds,
|
|
||||||
start_time=from_time,
|
|
||||||
end_time=to_time,
|
|
||||||
extra_columns=_OHLC_EXTRA_COLUMNS,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
log.exception("backtest_strategy: OHLC fetch failed for %s", feed_key)
|
|
||||||
return _err(f"OHLC fetch failed for {feed_key}: {exc}")
|
|
||||||
|
|
||||||
if df.empty:
|
|
||||||
return _err(f"No OHLC data for {feed_key} in the requested range")
|
|
||||||
|
|
||||||
ohlc_dfs[feed_key] = df
|
|
||||||
total_candles += len(df)
|
|
||||||
|
|
||||||
# --- 6. Run backtest in thread executor (BacktestEngine is synchronous) ---
|
|
||||||
try:
|
|
||||||
import asyncio
|
|
||||||
from dexorder.nautilus.backtest_runner import run_backtest
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
metrics = await loop.run_in_executor(
|
|
||||||
None,
|
|
||||||
lambda: run_backtest(
|
|
||||||
strategy_class=strategy_class,
|
|
||||||
feeds=parsed_feeds,
|
|
||||||
ohlc_dfs=ohlc_dfs,
|
|
||||||
initial_capital=initial_capital,
|
|
||||||
paper=paper,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
log.exception("backtest_strategy: backtest run failed")
|
|
||||||
return _err(f"Backtest failed: {exc}")
|
|
||||||
|
|
||||||
# --- 7. Return results ---
|
|
||||||
payload = {
|
|
||||||
"strategy_name": strategy_name,
|
"strategy_name": strategy_name,
|
||||||
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
|
"feeds": feeds,
|
||||||
|
"from_time": from_time,
|
||||||
|
"to_time": to_time,
|
||||||
"initial_capital": initial_capital,
|
"initial_capital": initial_capital,
|
||||||
"paper": paper,
|
"paper": paper,
|
||||||
"total_candles": total_candles,
|
|
||||||
**metrics, # keys: summary, statistics, trades, equity_curve
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
sys.executable, str(_BACKTEST_HARNESS),
|
||||||
|
stdin=asyncio.subprocess.PIPE,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(json.dumps(cfg).encode()),
|
||||||
|
timeout=600,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return _err("Backtest timed out (10 minutes)")
|
||||||
|
except Exception as exc:
|
||||||
|
return _err(f"Failed to launch backtest harness: {exc}")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
err_text = stderr.decode(errors="replace")
|
||||||
|
log.error("backtest_strategy '%s': harness exited %d: %s", strategy_name, proc.returncode, err_text[:500])
|
||||||
|
return _err(f"Backtest harness failed:\n{err_text}")
|
||||||
|
|
||||||
|
if stderr:
|
||||||
|
log.warning("backtest_strategy '%s' stderr: %s", strategy_name, stderr.decode(errors="replace")[:500])
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = json.loads(stdout.decode())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return _err(f"Harness produced invalid JSON: {stdout.decode(errors='replace')[:200]}")
|
||||||
|
|
||||||
|
if "error" in payload:
|
||||||
|
return _err(payload["error"])
|
||||||
|
|
||||||
return [TextContent(type="text", text=json.dumps(payload))]
|
return [TextContent(type="text", text=json.dumps(payload))]
|
||||||
|
|||||||
@@ -18,51 +18,32 @@ After write/edit operations, a category-specific test harness runs to validate
|
|||||||
the code and capture errors/output for agent feedback.
|
the code and capture errors/output for agent feedback.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import concurrent.futures
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from dexorder.tools.subprocess_runner import run_subprocess_argv, run_in_thread
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Paths to harness scripts run as subprocesses
|
||||||
def _run_inprocess(fn, *args, timeout: int) -> dict:
|
_RESEARCH_HARNESS = Path(__file__).parent / "research_harness.py"
|
||||||
"""
|
_STRATEGY_HARNESS = Path(__file__).parent / "strategy_harness.py"
|
||||||
Run fn(*args) in a one-shot thread and return its result dict.
|
|
||||||
|
|
||||||
Uses a thread so the calling coroutine is not blocked and the calling
|
|
||||||
process does not fork a new Python interpreter. All already-loaded
|
|
||||||
libraries (numpy, pandas, matplotlib, etc.) are shared with the thread.
|
|
||||||
|
|
||||||
On timeout returns a dict with _timeout=True. On unexpected exception
|
|
||||||
returns a dict with error=True and the traceback in stderr.
|
|
||||||
"""
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
|
||||||
future = executor.submit(fn, *args)
|
|
||||||
try:
|
|
||||||
return future.result(timeout=timeout)
|
|
||||||
except concurrent.futures.TimeoutError:
|
|
||||||
return {"_timeout": True, "error": True,
|
|
||||||
"stdout": "", "stderr": "", "images": []}
|
|
||||||
except Exception:
|
|
||||||
return {"error": True, "stdout": "",
|
|
||||||
"stderr": traceback.format_exc(), "images": []}
|
|
||||||
|
|
||||||
|
|
||||||
# Import conda manager for package installation and tracking
|
# Import conda manager for package installation and tracking
|
||||||
try:
|
try:
|
||||||
from dexorder.conda_manager import install_packages, cleanup_extra_packages
|
from dexorder.conda_manager import install_packages_async, cleanup_extra_packages_async
|
||||||
except ImportError:
|
except ImportError:
|
||||||
log.warning("conda_manager not available - package installation disabled")
|
log.warning("conda_manager not available - package installation disabled")
|
||||||
install_packages = None
|
install_packages_async = None
|
||||||
cleanup_extra_packages = None
|
cleanup_extra_packages_async = None
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -355,6 +336,39 @@ class GitManager:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Async variants — delegates to sync methods via asyncio.to_thread
|
||||||
|
# so the event loop stays responsive during git operations.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def commit_async(self, message: str) -> Optional[str]:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.commit, message)
|
||||||
|
|
||||||
|
async def log_async(self, path: Optional[Path] = None, n: int = 20) -> list[dict]:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.log, path, n)
|
||||||
|
|
||||||
|
async def restore_async(self, revision: str, path: Optional[Path] = None) -> Optional[str]:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.restore, revision, path)
|
||||||
|
|
||||||
|
async def head_short_hash_async(self) -> str:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.head_short_hash)
|
||||||
|
|
||||||
|
async def create_worktree_async(self, worktree_path: Path, revision: str = "HEAD") -> str:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.create_worktree, worktree_path, revision)
|
||||||
|
|
||||||
|
async def remove_worktree_async(self, worktree_path: Path) -> None:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.remove_worktree, worktree_path)
|
||||||
|
|
||||||
|
async def prune_worktrees_async(self) -> None:
|
||||||
|
import asyncio
|
||||||
|
return await asyncio.to_thread(self.prune_worktrees)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Custom Indicator Setup
|
# Custom Indicator Setup
|
||||||
@@ -484,7 +498,7 @@ class CategoryFileManager:
|
|||||||
"""Root of the versioned category code (git repo root)."""
|
"""Root of the versioned category code (git repo root)."""
|
||||||
return self.data_dir / "src"
|
return self.data_dir / "src"
|
||||||
|
|
||||||
def write(
|
async def write(
|
||||||
self,
|
self,
|
||||||
category: str,
|
category: str,
|
||||||
name: str,
|
name: str,
|
||||||
@@ -547,7 +561,7 @@ class CategoryFileManager:
|
|||||||
return {"success": False, "error": f"Failed to write metadata: {e}"}
|
return {"success": False, "error": f"Failed to write metadata: {e}"}
|
||||||
|
|
||||||
# Run validation harness
|
# Run validation harness
|
||||||
validation = self._validate(cat, item_dir)
|
validation = await self._validate(cat, item_dir)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"success": validation["success"],
|
"success": validation["success"],
|
||||||
@@ -559,19 +573,19 @@ class CategoryFileManager:
|
|||||||
if validation["success"]:
|
if validation["success"]:
|
||||||
if cat == Category.RESEARCH:
|
if cat == Category.RESEARCH:
|
||||||
log.info(f"Auto-executing research script: {name}")
|
log.info(f"Auto-executing research script: {name}")
|
||||||
result["execution"] = self.execute_research(name)
|
result["execution"] = await self.execute_research(name)
|
||||||
elif cat == Category.INDICATOR:
|
elif cat == Category.INDICATOR:
|
||||||
log.info(f"Auto-executing indicator test: {name}")
|
log.info(f"Auto-executing indicator test: {name}")
|
||||||
result["execution"] = self._execute_indicator(item_dir)
|
result["execution"] = await self._execute_indicator(item_dir)
|
||||||
|
|
||||||
# Commit to git
|
# Commit to git
|
||||||
commit_hash = self.git.commit(f"create({category}): {name}")
|
commit_hash = await self.git.commit_async(f"create({category}): {name}")
|
||||||
if commit_hash:
|
if commit_hash:
|
||||||
result["revision"] = commit_hash
|
result["revision"] = commit_hash
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def edit(
|
async def edit(
|
||||||
self,
|
self,
|
||||||
category: str,
|
category: str,
|
||||||
name: str,
|
name: str,
|
||||||
@@ -671,7 +685,7 @@ class CategoryFileManager:
|
|||||||
# Run validation harness if code was updated
|
# Run validation harness if code was updated
|
||||||
validation = None
|
validation = None
|
||||||
if code is not None:
|
if code is not None:
|
||||||
validation = self._validate(cat, item_dir)
|
validation = await self._validate(cat, item_dir)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"success": True,
|
"success": True,
|
||||||
@@ -685,15 +699,15 @@ class CategoryFileManager:
|
|||||||
if code is not None and result["success"]:
|
if code is not None and result["success"]:
|
||||||
if cat == Category.RESEARCH:
|
if cat == Category.RESEARCH:
|
||||||
log.info(f"Auto-executing research script after edit: {name}")
|
log.info(f"Auto-executing research script after edit: {name}")
|
||||||
result["execution"] = self.execute_research(name)
|
result["execution"] = await self.execute_research(name)
|
||||||
elif cat == Category.INDICATOR:
|
elif cat == Category.INDICATOR:
|
||||||
log.info(f"Auto-executing indicator test after edit: {name}")
|
log.info(f"Auto-executing indicator test after edit: {name}")
|
||||||
result["execution"] = self._execute_indicator(item_dir)
|
result["execution"] = await self._execute_indicator(item_dir)
|
||||||
|
|
||||||
# Commit to git if code changed
|
# Commit to git if code changed
|
||||||
if code is not None and result["success"]:
|
if code is not None and result["success"]:
|
||||||
action = "patch" if patches is not None else "edit"
|
action = "patch" if patches is not None else "edit"
|
||||||
commit_hash = self.git.commit(f"{action}({category}): {name}")
|
commit_hash = await self.git.commit_async(f"{action}({category}): {name}")
|
||||||
if commit_hash:
|
if commit_hash:
|
||||||
result["revision"] = commit_hash
|
result["revision"] = commit_hash
|
||||||
|
|
||||||
@@ -776,7 +790,7 @@ class CategoryFileManager:
|
|||||||
|
|
||||||
return {"items": items}
|
return {"items": items}
|
||||||
|
|
||||||
def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
|
async def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run category-specific validation harness.
|
Run category-specific validation harness.
|
||||||
|
|
||||||
@@ -793,13 +807,13 @@ class CategoryFileManager:
|
|||||||
|
|
||||||
# Install required packages before validation
|
# Install required packages before validation
|
||||||
packages_installed = []
|
packages_installed = []
|
||||||
if install_packages and meta_path.exists():
|
if install_packages_async and meta_path.exists():
|
||||||
try:
|
try:
|
||||||
metadata = json.loads(meta_path.read_text())
|
metadata = json.loads(meta_path.read_text())
|
||||||
conda_packages = metadata.get("conda_packages", [])
|
conda_packages = metadata.get("conda_packages", [])
|
||||||
if conda_packages:
|
if conda_packages:
|
||||||
log.info(f"Installing packages for validation: {conda_packages}")
|
log.info(f"Installing packages for validation: {conda_packages}")
|
||||||
install_result = install_packages(conda_packages, data_dir=self.data_dir)
|
install_result = await install_packages_async(conda_packages, data_dir=self.data_dir)
|
||||||
if install_result.get("success"):
|
if install_result.get("success"):
|
||||||
packages_installed = install_result.get("installed", [])
|
packages_installed = install_result.get("installed", [])
|
||||||
if packages_installed:
|
if packages_installed:
|
||||||
@@ -811,11 +825,11 @@ class CategoryFileManager:
|
|||||||
|
|
||||||
# Run validation
|
# Run validation
|
||||||
if category == Category.STRATEGY:
|
if category == Category.STRATEGY:
|
||||||
result = self._validate_strategy(impl_path)
|
result = await self._validate_strategy(impl_path)
|
||||||
elif category == Category.INDICATOR:
|
elif category == Category.INDICATOR:
|
||||||
result = self._validate_indicator(impl_path)
|
result = await self._validate_indicator(impl_path)
|
||||||
elif category == Category.RESEARCH:
|
elif category == Category.RESEARCH:
|
||||||
result = self._validate_research(impl_path, item_dir)
|
result = await self._validate_research(impl_path, item_dir)
|
||||||
else:
|
else:
|
||||||
result = {"success": False, "error": f"No validator for category {category}"}
|
result = {"success": False, "error": f"No validator for category {category}"}
|
||||||
|
|
||||||
@@ -825,19 +839,18 @@ class CategoryFileManager:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
|
async def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Validate a strategy by running it against synthetic OHLC data.
|
Validate a strategy by running it against synthetic OHLC data.
|
||||||
|
|
||||||
Runs strategy_harness.py in-process via a thread. Catches import errors,
|
Runs strategy_harness.py as a subprocess. Catches import errors,
|
||||||
runtime errors in evaluate(), and wrong class hierarchy — not just syntax.
|
runtime errors in evaluate(), and wrong class hierarchy — not just syntax.
|
||||||
"""
|
"""
|
||||||
meta_path = impl_path.parent / "metadata.json"
|
return await self._execute_strategy(impl_path.parent, timeout=45)
|
||||||
return self._execute_strategy(impl_path.parent, timeout=45)
|
|
||||||
|
|
||||||
def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
|
async def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run a strategy against synthetic OHLC data in-process via a thread.
|
Run a strategy against synthetic OHLC data via strategy_harness.py subprocess.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict with success, output (human-readable summary), trade_count, error
|
dict with success, output (human-readable summary), trade_count, error
|
||||||
@@ -850,24 +863,26 @@ class CategoryFileManager:
|
|||||||
if not meta_path.exists():
|
if not meta_path.exists():
|
||||||
return {"success": False, "error": "metadata.json not found"}
|
return {"success": False, "error": "metadata.json not found"}
|
||||||
|
|
||||||
from dexorder.tools.strategy_harness import run as _strategy_run
|
data = await run_subprocess_argv(
|
||||||
result = _run_inprocess(_strategy_run, impl_path, meta_path, timeout=timeout)
|
sys.executable, str(_STRATEGY_HARNESS), str(impl_path), str(meta_path),
|
||||||
|
timeout=timeout,
|
||||||
if result.get("_timeout"):
|
)
|
||||||
|
if data.get("_timeout"):
|
||||||
return {"success": False, "error": f"Strategy test timed out after {timeout}s"}
|
return {"success": False, "error": f"Strategy test timed out after {timeout}s"}
|
||||||
return result
|
if data.get("error") and not data.get("success"):
|
||||||
|
return {"success": False, "error": data.get("stderr") or "Harness failed"}
|
||||||
|
return data
|
||||||
|
|
||||||
def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
|
async def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Validate an indicator by running it against synthetic OHLC data.
|
Validate an indicator by running it against synthetic OHLC data.
|
||||||
|
|
||||||
Runs indicator_harness.py in-process via a thread. Catches import errors,
|
Runs indicator_harness.py in-process via a thread (main proc). Catches
|
||||||
runtime errors, and wrong return types — not just syntax.
|
import errors, runtime errors, and wrong return types — not just syntax.
|
||||||
"""
|
"""
|
||||||
meta_path = impl_path.parent / "metadata.json"
|
return await self._execute_indicator(impl_path.parent, timeout=30)
|
||||||
return self._execute_indicator(impl_path.parent, timeout=30)
|
|
||||||
|
|
||||||
def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
|
async def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run an indicator against synthetic OHLC data in-process via a thread.
|
Run an indicator against synthetic OHLC data in-process via a thread.
|
||||||
|
|
||||||
@@ -883,29 +898,32 @@ class CategoryFileManager:
|
|||||||
return {"success": False, "error": "metadata.json not found"}
|
return {"success": False, "error": "metadata.json not found"}
|
||||||
|
|
||||||
from dexorder.tools.indicator_harness import run as _indicator_run
|
from dexorder.tools.indicator_harness import run as _indicator_run
|
||||||
result = _run_inprocess(_indicator_run, impl_path, meta_path, timeout=timeout)
|
result = await run_in_thread(_indicator_run, impl_path, meta_path, timeout=timeout)
|
||||||
|
|
||||||
if result.get("_timeout"):
|
if result.get("_timeout"):
|
||||||
return {"success": False, "error": f"Indicator test timed out after {timeout}s"}
|
return {"success": False, "error": f"Indicator test timed out after {timeout}s"}
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
|
async def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run a research script in-process via a thread and return captured results.
|
Run a research script via research_harness.py subprocess and return captured results.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict with stdout, stderr, images, error fields — or an error dict.
|
dict with stdout, stderr, images, error fields.
|
||||||
"""
|
"""
|
||||||
from dexorder.tools.research_harness import run as _research_run
|
return await run_subprocess_argv(
|
||||||
return _run_inprocess(_research_run, impl_path, item_dir, timeout=timeout)
|
sys.executable, str(_RESEARCH_HARNESS), str(impl_path),
|
||||||
|
timeout=timeout,
|
||||||
|
cwd=item_dir,
|
||||||
|
)
|
||||||
|
|
||||||
def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
|
async def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Validate a research script.
|
Validate a research script.
|
||||||
|
|
||||||
Runs the script via the harness and captures output + pyplot images.
|
Runs the script via the harness and captures output + pyplot images.
|
||||||
"""
|
"""
|
||||||
data = self._run_research_harness(impl_path, item_dir, timeout=300)
|
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||||
|
|
||||||
if data.get("_timeout"):
|
if data.get("_timeout"):
|
||||||
return {"success": False, "error": "Research script timeout"}
|
return {"success": False, "error": "Research script timeout"}
|
||||||
@@ -923,7 +941,7 @@ class CategoryFileManager:
|
|||||||
"images": data["images"],
|
"images": data["images"],
|
||||||
}
|
}
|
||||||
|
|
||||||
def execute_research(self, name: str) -> dict[str, Any]:
|
async def execute_research(self, name: str) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Execute a research script and return structured content with images.
|
Execute a research script and return structured content with images.
|
||||||
|
|
||||||
@@ -944,7 +962,7 @@ class CategoryFileManager:
|
|||||||
if not impl_path.exists():
|
if not impl_path.exists():
|
||||||
return {"error": f"Implementation file not found for '{name}'"}
|
return {"error": f"Implementation file not found for '{name}'"}
|
||||||
|
|
||||||
data = self._run_research_harness(impl_path, item_dir, timeout=300)
|
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||||
|
|
||||||
if data.get("_timeout"):
|
if data.get("_timeout"):
|
||||||
log.error(f"execute_research '{name}': timeout")
|
log.error(f"execute_research '{name}': timeout")
|
||||||
@@ -995,7 +1013,7 @@ class CategoryFileManager:
|
|||||||
return {"content": content}
|
return {"content": content}
|
||||||
|
|
||||||
|
|
||||||
def delete(self, category: str, name: str) -> dict[str, Any]:
|
async def delete(self, category: str, name: str) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Delete a category script directory and commit the removal to git.
|
Delete a category script directory and commit the removal to git.
|
||||||
|
|
||||||
@@ -1031,13 +1049,13 @@ class CategoryFileManager:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"success": False, "error": f"Failed to delete: {e}"}
|
return {"success": False, "error": f"Failed to delete: {e}"}
|
||||||
|
|
||||||
commit_hash = self.git.commit(f"delete({category}): {name}")
|
commit_hash = await self.git.commit_async(f"delete({category}): {name}")
|
||||||
result: dict[str, Any] = {"success": True, "category": category, "name": name}
|
result: dict[str, Any] = {"success": True, "category": category, "name": name}
|
||||||
if commit_hash:
|
if commit_hash:
|
||||||
result["revision"] = commit_hash
|
result["revision"] = commit_hash
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def git_log(
|
async def git_log(
|
||||||
self,
|
self,
|
||||||
category: Optional[str] = None,
|
category: Optional[str] = None,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
@@ -1061,10 +1079,10 @@ class CategoryFileManager:
|
|||||||
path = get_category_path(self.src_dir, cat, name)
|
path = get_category_path(self.src_dir, cat, name)
|
||||||
else:
|
else:
|
||||||
path = self.src_dir / cat.value
|
path = self.src_dir / cat.value
|
||||||
entries = self.git.log(path=path, n=limit)
|
entries = await self.git.log_async(path=path, n=limit)
|
||||||
return {"success": True, "commits": entries}
|
return {"success": True, "commits": entries}
|
||||||
|
|
||||||
def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
|
async def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Restore a category item to a previous git revision (creates a new commit).
|
Restore a category item to a previous git revision (creates a new commit).
|
||||||
|
|
||||||
@@ -1085,11 +1103,11 @@ class CategoryFileManager:
|
|||||||
return {"success": False, "error": f"Item '{name}' not found in '{category}'"}
|
return {"success": False, "error": f"Item '{name}' not found in '{category}'"}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
commit_hash = self.git.restore(revision, path=item_dir)
|
commit_hash = await self.git.restore_async(revision, path=item_dir)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
return {"success": False, "error": str(e)}
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
validation = self._validate(cat, item_dir)
|
validation = await self._validate(cat, item_dir)
|
||||||
return {
|
return {
|
||||||
"success": validation["success"],
|
"success": validation["success"],
|
||||||
"revision": commit_hash,
|
"revision": commit_hash,
|
||||||
|
|||||||
@@ -119,11 +119,39 @@ def run(impl_path: Path, item_dir: Path) -> dict:
|
|||||||
stdout_buf = io.StringIO()
|
stdout_buf = io.StringIO()
|
||||||
stderr_buf = io.StringIO()
|
stderr_buf = io.StringIO()
|
||||||
|
|
||||||
|
# Eagerly capture figures when user scripts call plt.close() so images are
|
||||||
|
# not lost even if the script closes figures immediately after savefig().
|
||||||
|
captured_images: list[dict] = []
|
||||||
|
|
||||||
|
def _capture_fig(fig) -> None:
|
||||||
|
buf = io.BytesIO()
|
||||||
|
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
||||||
|
buf.seek(0)
|
||||||
|
captured_images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
|
||||||
|
buf.close()
|
||||||
|
|
||||||
|
_orig_plt_close = plt.close
|
||||||
|
|
||||||
|
def _patched_close(fig=None):
|
||||||
|
if fig is None:
|
||||||
|
for fn in plt.get_fignums():
|
||||||
|
_capture_fig(plt.figure(fn))
|
||||||
|
elif fig == 'all':
|
||||||
|
for fn in plt.get_fignums():
|
||||||
|
_capture_fig(plt.figure(fn))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
_capture_fig(fig if hasattr(fig, 'savefig') else plt.figure(fig))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_orig_plt_close(fig)
|
||||||
|
|
||||||
error_occurred = False
|
error_occurred = False
|
||||||
old_stdout, old_stderr = sys.stdout, sys.stderr
|
old_stdout, old_stderr = sys.stdout, sys.stderr
|
||||||
old_cwd = os.getcwd()
|
old_cwd = os.getcwd()
|
||||||
sys.stdout = stdout_buf
|
sys.stdout = stdout_buf
|
||||||
sys.stderr = stderr_buf
|
sys.stderr = stderr_buf
|
||||||
|
plt.close = _patched_close
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.chdir(impl_path.parent)
|
os.chdir(impl_path.parent)
|
||||||
@@ -136,22 +164,26 @@ def run(impl_path: Path, item_dir: Path) -> dict:
|
|||||||
sys.stdout = old_stdout
|
sys.stdout = old_stdout
|
||||||
sys.stderr = old_stderr
|
sys.stderr = old_stderr
|
||||||
os.chdir(old_cwd)
|
os.chdir(old_cwd)
|
||||||
|
plt.close = _orig_plt_close
|
||||||
|
|
||||||
stdout_output = stdout_buf.getvalue()
|
stdout_output = stdout_buf.getvalue()
|
||||||
stderr_output = stderr_buf.getvalue()
|
stderr_output = stderr_buf.getvalue()
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Capture matplotlib figures
|
# Capture any figures still open after script completion
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
images = []
|
images = captured_images
|
||||||
if not error_occurred:
|
if not error_occurred:
|
||||||
|
already_seen = {img["data"] for img in images}
|
||||||
for fig_num in plt.get_fignums():
|
for fig_num in plt.get_fignums():
|
||||||
fig = plt.figure(fig_num)
|
fig = plt.figure(fig_num)
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
||||||
buf.seek(0)
|
buf.seek(0)
|
||||||
images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
|
data = base64.b64encode(buf.read()).decode('utf-8')
|
||||||
buf.close()
|
buf.close()
|
||||||
|
if data not in already_seen:
|
||||||
|
images.append({"format": "png", "data": data})
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
182
sandbox/dexorder/tools/subprocess_runner.py
Normal file
182
sandbox/dexorder/tools/subprocess_runner.py
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
"""
|
||||||
|
subprocess_runner — non-blocking subprocess primitives for the MCP sandbox.
|
||||||
|
|
||||||
|
All three entrypoints return the same dict shape as the legacy _run_inprocess():
|
||||||
|
{
|
||||||
|
"error": bool,
|
||||||
|
"stdout": str,
|
||||||
|
"stderr": str,
|
||||||
|
"images": list, # always [] for non-research invocations
|
||||||
|
"_timeout": bool # present and True only on timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
Callers can therefore pattern-match on {"_timeout", "error", "stdout", "stderr"}
|
||||||
|
uniformly regardless of whether the work ran in a subprocess or a thread.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _normalise(data: dict, stderr_fallback: str = "") -> dict:
|
||||||
|
"""Ensure the standard shape keys are present in a harness result dict."""
|
||||||
|
data.setdefault("error", False)
|
||||||
|
data.setdefault("stdout", "")
|
||||||
|
data.setdefault("stderr", stderr_fallback)
|
||||||
|
data.setdefault("images", [])
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _err_dict(stderr: str = "", stdout: str = "") -> dict:
|
||||||
|
return {"error": True, "stdout": stdout, "stderr": stderr, "images": []}
|
||||||
|
|
||||||
|
|
||||||
|
def _timeout_dict() -> dict:
|
||||||
|
return {"_timeout": True, "error": True, "stdout": "", "stderr": "", "images": []}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Primitive 1: run_subprocess_argv
|
||||||
|
#
|
||||||
|
# Non-blocking equivalent of:
|
||||||
|
# subprocess.run([sys.executable, harness, arg1, arg2, ...],
|
||||||
|
# capture_output=True, text=True, timeout=N, cwd=cwd)
|
||||||
|
#
|
||||||
|
# Used by: _execute_strategy, _run_research_harness
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def run_subprocess_argv(
|
||||||
|
*cmd: str,
|
||||||
|
timeout: int,
|
||||||
|
cwd: Path | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Spawn cmd as a subprocess, await completion, and return a normalised result dict.
|
||||||
|
|
||||||
|
stdout is expected to contain a JSON object written by the harness. It is
|
||||||
|
decoded and normalised to the standard shape. On JSON decode failure the
|
||||||
|
raw stdout text is preserved in "stdout" and error is set to True.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
cwd=str(cwd) if cwd else None,
|
||||||
|
)
|
||||||
|
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||||
|
proc.communicate(), timeout=timeout
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return _timeout_dict()
|
||||||
|
except Exception as exc:
|
||||||
|
return _err_dict(stderr=f"Harness launch failed: {exc}")
|
||||||
|
|
||||||
|
stdout_text = stdout_bytes.decode(errors="replace")
|
||||||
|
stderr_text = stderr_bytes.decode(errors="replace")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
return _err_dict(
|
||||||
|
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
|
||||||
|
stdout=stdout_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(stdout_text)
|
||||||
|
return _normalise(data, stderr_fallback=stderr_text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Primitive 2: run_subprocess_stdin
|
||||||
|
#
|
||||||
|
# Non-blocking equivalent of the backtest pattern — JSON config fed via stdin.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def run_subprocess_stdin(
|
||||||
|
*cmd: str,
|
||||||
|
stdin_data: bytes,
|
||||||
|
timeout: int,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Spawn cmd, write stdin_data to its stdin, await completion.
|
||||||
|
|
||||||
|
Returns the same normalised dict shape as run_subprocess_argv.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdin=asyncio.subprocess.PIPE,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||||
|
proc.communicate(stdin_data), timeout=timeout
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return _timeout_dict()
|
||||||
|
except Exception as exc:
|
||||||
|
return _err_dict(stderr=f"Harness launch failed: {exc}")
|
||||||
|
|
||||||
|
stdout_text = stdout_bytes.decode(errors="replace")
|
||||||
|
stderr_text = stderr_bytes.decode(errors="replace")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
return _err_dict(
|
||||||
|
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
|
||||||
|
stdout=stdout_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(stdout_text)
|
||||||
|
return _normalise(data, stderr_fallback=stderr_text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Primitive 3: run_in_thread
|
||||||
|
#
|
||||||
|
# Async wrapper around asyncio.to_thread so the event loop stays responsive
|
||||||
|
# while CPU-bound or blocking-IO callables run in a worker thread.
|
||||||
|
#
|
||||||
|
# Used by: _execute_indicator (in-process indicator harness)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def run_in_thread(
|
||||||
|
fn: Callable,
|
||||||
|
*args: Any,
|
||||||
|
timeout: int,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Run fn(*args) in a thread pool worker and yield to the event loop while waiting.
|
||||||
|
|
||||||
|
On timeout the thread is abandoned (daemon) and _timeout_dict() is returned.
|
||||||
|
On MemoryError or unexpected exception a standard error dict is returned.
|
||||||
|
The returned dict is normalised to the standard shape.
|
||||||
|
"""
|
||||||
|
from dexorder.memory_guard import cleanup_memory
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(fn, *args),
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
return _normalise(result)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return _timeout_dict()
|
||||||
|
except MemoryError:
|
||||||
|
cleanup_memory()
|
||||||
|
return _err_dict(
|
||||||
|
stderr="Script exceeded memory limit. Try reducing the data range or batch size."
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return _err_dict(stderr=traceback.format_exc())
|
||||||
@@ -33,7 +33,7 @@ from starlette.routing import Route, Mount
|
|||||||
|
|
||||||
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
|
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
|
||||||
from dexorder.api import set_api, API
|
from dexorder.api import set_api, API
|
||||||
from dexorder.conda_manager import sync_packages, install_packages, cleanup_extra_packages
|
from dexorder.conda_manager import sync_packages_async, install_packages_async, cleanup_extra_packages_async
|
||||||
from dexorder.events import EventType, UserEvent, DeliverySpec
|
from dexorder.events import EventType, UserEvent, DeliverySpec
|
||||||
from dexorder.impl.charting_api_impl import ChartingAPIImpl
|
from dexorder.impl.charting_api_impl import ChartingAPIImpl
|
||||||
from dexorder.impl.data_api_impl import DataAPIImpl
|
from dexorder.impl.data_api_impl import DataAPIImpl
|
||||||
@@ -893,7 +893,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
arguments.get("patch", [])
|
arguments.get("patch", [])
|
||||||
)
|
)
|
||||||
elif name == "python_write":
|
elif name == "python_write":
|
||||||
result = category_manager.write(
|
result = await category_manager.write(
|
||||||
category=arguments.get("category", ""),
|
category=arguments.get("category", ""),
|
||||||
name=arguments.get("name", ""),
|
name=arguments.get("name", ""),
|
||||||
description=arguments.get("description", ""),
|
description=arguments.get("description", ""),
|
||||||
@@ -920,10 +920,10 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
|
logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
|
||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||||
cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||||
return content
|
return content
|
||||||
elif name == "python_edit":
|
elif name == "python_edit":
|
||||||
result = category_manager.edit(
|
result = await category_manager.edit(
|
||||||
category=arguments.get("category", ""),
|
category=arguments.get("category", ""),
|
||||||
name=arguments.get("name", ""),
|
name=arguments.get("name", ""),
|
||||||
code=arguments.get("code"),
|
code=arguments.get("code"),
|
||||||
@@ -951,7 +951,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
logging.info(f"python_edit '{arguments.get('name')}': no execution result")
|
logging.info(f"python_edit '{arguments.get('name')}': no execution result")
|
||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||||
cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||||
return content
|
return content
|
||||||
elif name == "python_read":
|
elif name == "python_read":
|
||||||
return category_manager.read(
|
return category_manager.read(
|
||||||
@@ -963,7 +963,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
category=arguments.get("category", "")
|
category=arguments.get("category", "")
|
||||||
)
|
)
|
||||||
elif name == "python_log":
|
elif name == "python_log":
|
||||||
result = category_manager.git_log(
|
result = await category_manager.git_log(
|
||||||
category=arguments.get("category"),
|
category=arguments.get("category"),
|
||||||
name=arguments.get("name"),
|
name=arguments.get("name"),
|
||||||
limit=int(arguments.get("limit", 20))
|
limit=int(arguments.get("limit", 20))
|
||||||
@@ -973,7 +973,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
lines.append(f"{c['short_hash']} {c['date'][:10]} {c['message']}")
|
lines.append(f"{c['short_hash']} {c['date'][:10]} {c['message']}")
|
||||||
return [TextContent(type="text", text="\n".join(lines))]
|
return [TextContent(type="text", text="\n".join(lines))]
|
||||||
elif name == "python_revert":
|
elif name == "python_revert":
|
||||||
result = category_manager.git_revert(
|
result = await category_manager.git_revert(
|
||||||
revision=arguments.get("revision", ""),
|
revision=arguments.get("revision", ""),
|
||||||
category=arguments.get("category", ""),
|
category=arguments.get("category", ""),
|
||||||
name=arguments.get("name", "")
|
name=arguments.get("name", "")
|
||||||
@@ -989,13 +989,13 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||||
return [TextContent(type="text", text="\n".join(meta_parts))]
|
return [TextContent(type="text", text="\n".join(meta_parts))]
|
||||||
elif name == "python_delete":
|
elif name == "python_delete":
|
||||||
result = category_manager.delete(
|
result = await category_manager.delete(
|
||||||
category=arguments.get("category", ""),
|
category=arguments.get("category", ""),
|
||||||
name=arguments.get("name", "")
|
name=arguments.get("name", "")
|
||||||
)
|
)
|
||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
_remove_type(workspace_store, arguments.get("category", ""), arguments.get("name", ""))
|
_remove_type(workspace_store, arguments.get("category", ""), arguments.get("name", ""))
|
||||||
cleanup_result = cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
cleanup_result = await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||||
if cleanup_result.get("removed"):
|
if cleanup_result.get("removed"):
|
||||||
result["packages_removed"] = cleanup_result["removed"]
|
result["packages_removed"] = cleanup_result["removed"]
|
||||||
parts = [f"success: {result['success']}"]
|
parts = [f"success: {result['success']}"]
|
||||||
@@ -1004,14 +1004,14 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
|||||||
parts.append(f"{k}: {result[k]}")
|
parts.append(f"{k}: {result[k]}")
|
||||||
return [TextContent(type="text", text="\n".join(parts))]
|
return [TextContent(type="text", text="\n".join(parts))]
|
||||||
elif name == "conda_sync":
|
elif name == "conda_sync":
|
||||||
return sync_packages(
|
return await sync_packages_async(
|
||||||
data_dir=get_data_dir(),
|
data_dir=get_data_dir(),
|
||||||
environment_yml=_get_env_yml()
|
environment_yml=_get_env_yml()
|
||||||
)
|
)
|
||||||
elif name == "conda_install":
|
elif name == "conda_install":
|
||||||
return install_packages(arguments.get("packages", []))
|
return await install_packages_async(arguments.get("packages", []))
|
||||||
elif name == "execute_research":
|
elif name == "execute_research":
|
||||||
result = category_manager.execute_research(name=arguments.get("name", ""))
|
result = await category_manager.execute_research(name=arguments.get("name", ""))
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logging.error(f"execute_research '{arguments.get('name')}': {result['error']}")
|
logging.error(f"execute_research '{arguments.get('name')}': {result['error']}")
|
||||||
return [TextContent(type="text", text=f"Error: {result['error']}")]
|
return [TextContent(type="text", text=f"Error: {result['error']}")]
|
||||||
@@ -1113,6 +1113,8 @@ def create_streamable_http_app(mcp_server: Server) -> Starlette:
|
|||||||
|
|
||||||
@contextlib.asynccontextmanager
|
@contextlib.asynccontextmanager
|
||||||
async def lifespan(app: Starlette):
|
async def lifespan(app: Starlette):
|
||||||
|
from dexorder.event_loop import install_thread_safe_asyncio_run
|
||||||
|
install_thread_safe_asyncio_run(asyncio.get_running_loop())
|
||||||
async with session_manager.run():
|
async with session_manager.run():
|
||||||
yield
|
yield
|
||||||
|
|
||||||
@@ -1156,6 +1158,14 @@ class UserContainer:
|
|||||||
# Load configuration
|
# Load configuration
|
||||||
self.config.load()
|
self.config.load()
|
||||||
|
|
||||||
|
# Python-level memory guard (RLIMIT_AS soft limit) — DISABLED.
|
||||||
|
# We assume nodes have ample memory (8Gi limits) and will revisit a
|
||||||
|
# proper RSS-based cgroup monitor later. The implementation is in
|
||||||
|
# dexorder/memory_guard.py if we want to re-enable.
|
||||||
|
# from dexorder.memory_guard import setup_memory_limit
|
||||||
|
# mem_cfg = self.config.config_data.get("memory", {})
|
||||||
|
# setup_memory_limit(fraction=float(mem_cfg.get("limit_fraction", 0.85)))
|
||||||
|
|
||||||
# Initialize data and charting API
|
# Initialize data and charting API
|
||||||
data_cfg = self.config.config_data.get("data", {})
|
data_cfg = self.config.config_data.get("data", {})
|
||||||
iceberg_cfg = data_cfg.get("iceberg", {})
|
iceberg_cfg = data_cfg.get("iceberg", {})
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ import { useShapeStore } from './stores/shapes'
|
|||||||
import { useIndicatorStore } from './stores/indicators'
|
import { useIndicatorStore } from './stores/indicators'
|
||||||
import { useIndicatorTypesStore } from './stores/indicatorTypes'
|
import { useIndicatorTypesStore } from './stores/indicatorTypes'
|
||||||
import { useChannelStore } from './stores/channel'
|
import { useChannelStore } from './stores/channel'
|
||||||
|
import { useResearchTypesStore } from './stores/researchTypes'
|
||||||
|
import { useStrategyTypesStore } from './stores/strategyTypes'
|
||||||
import { useStateSync } from './composables/useStateSync'
|
import { useStateSync } from './composables/useStateSync'
|
||||||
import { wsManager } from './composables/useWebSocket'
|
import { wsManager } from './composables/useWebSocket'
|
||||||
import { authService } from './composables/useAuth'
|
import { authService } from './composables/useAuth'
|
||||||
@@ -44,9 +46,18 @@ function onHDragMove(e: PointerEvent) {
|
|||||||
chartWidth.value = Math.max(CHART_MIN_PX, Math.min(maxWidth, hDragStartWidth + delta))
|
chartWidth.value = Math.max(CHART_MIN_PX, Math.min(maxWidth, hDragStartWidth + delta))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clamp chartWidth so chart + chat always fit within the window
|
||||||
|
function clampChartWidth() {
|
||||||
|
const maxWidth = window.innerWidth - CHAT_MIN_PX - 4
|
||||||
|
if (maxWidth >= CHART_MIN_PX) {
|
||||||
|
chartWidth.value = Math.max(CHART_MIN_PX, Math.min(maxWidth, chartWidth.value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check screen width for mobile layout
|
// Check screen width for mobile layout
|
||||||
const checkMobile = () => {
|
const checkMobile = () => {
|
||||||
isMobile.value = window.innerWidth < 768
|
isMobile.value = window.innerWidth < 768
|
||||||
|
if (!isMobile.value) clampChartWidth()
|
||||||
}
|
}
|
||||||
|
|
||||||
const chartStore = useChartStore()
|
const chartStore = useChartStore()
|
||||||
@@ -108,11 +119,15 @@ const initializeApp = async () => {
|
|||||||
const indicatorStore = useIndicatorStore()
|
const indicatorStore = useIndicatorStore()
|
||||||
const indicatorTypesStore = useIndicatorTypesStore()
|
const indicatorTypesStore = useIndicatorTypesStore()
|
||||||
const channelStore = useChannelStore()
|
const channelStore = useChannelStore()
|
||||||
|
const researchTypesStore = useResearchTypesStore()
|
||||||
|
const strategyTypesStore = useStrategyTypesStore()
|
||||||
const stateSync = useStateSync({
|
const stateSync = useStateSync({
|
||||||
chartState: chartStore,
|
chartState: chartStore,
|
||||||
shapes: shapeStore,
|
shapes: shapeStore,
|
||||||
indicators: indicatorStore,
|
indicators: indicatorStore,
|
||||||
indicator_types: indicatorTypesStore,
|
indicator_types: indicatorTypesStore,
|
||||||
|
research_types: researchTypesStore,
|
||||||
|
strategy_types: strategyTypesStore,
|
||||||
channelState: channelStore
|
channelState: channelStore
|
||||||
})
|
})
|
||||||
stateSyncCleanup = stateSync.cleanup
|
stateSyncCleanup = stateSync.cleanup
|
||||||
@@ -195,7 +210,7 @@ onBeforeUnmount(() => {
|
|||||||
|
|
||||||
.chat-panel {
|
.chat-panel {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
min-width: 0;
|
min-width: 240px;
|
||||||
height: 100%;
|
height: 100%;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
display: flex;
|
display: flex;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import TabPanels from 'primevue/tabpanels'
|
|||||||
import TabPanel from 'primevue/tabpanel'
|
import TabPanel from 'primevue/tabpanel'
|
||||||
import OrdersTab from './tabs/OrdersTab.vue'
|
import OrdersTab from './tabs/OrdersTab.vue'
|
||||||
import PlaceholderTab from './tabs/PlaceholderTab.vue'
|
import PlaceholderTab from './tabs/PlaceholderTab.vue'
|
||||||
|
import ResearchTab from './tabs/ResearchTab.vue'
|
||||||
|
|
||||||
interface TempTab {
|
interface TempTab {
|
||||||
id: string
|
id: string
|
||||||
@@ -81,9 +82,10 @@ defineExpose({
|
|||||||
<template>
|
<template>
|
||||||
<div class="bottom-tray" :style="trayStyle">
|
<div class="bottom-tray" :style="trayStyle">
|
||||||
<div v-if="isExpanded" class="tray-resize-handle" @pointerdown="startResize" @pointermove="onResizeMove" />
|
<div v-if="isExpanded" class="tray-resize-handle" @pointerdown="startResize" @pointermove="onResizeMove" />
|
||||||
<Tabs :value="activeTab" class="tray-tabs">
|
<Tabs :value="isExpanded ? activeTab : null" class="tray-tabs">
|
||||||
<TabList class="tray-tab-list">
|
<TabList class="tray-tab-list">
|
||||||
<Tab value="orders" @click="onTabClick('orders')">Orders</Tab>
|
<Tab value="orders" @click="onTabClick('orders')">Orders</Tab>
|
||||||
|
<Tab value="research" @click="onTabClick('research')">Research</Tab>
|
||||||
<Tab value="strategies" @click="onTabClick('strategies')">Strategies</Tab>
|
<Tab value="strategies" @click="onTabClick('strategies')">Strategies</Tab>
|
||||||
<Tab value="positions" @click="onTabClick('positions')">Positions</Tab>
|
<Tab value="positions" @click="onTabClick('positions')">Positions</Tab>
|
||||||
<Tab
|
<Tab
|
||||||
@@ -102,9 +104,10 @@ defineExpose({
|
|||||||
</button>
|
</button>
|
||||||
</TabList>
|
</TabList>
|
||||||
<TabPanels v-if="isExpanded" class="tray-panels">
|
<TabPanels v-if="isExpanded" class="tray-panels">
|
||||||
|
<TabPanel value="positions" class="tray-panel"><PlaceholderTab label="Positions" /></TabPanel>
|
||||||
<TabPanel value="orders" class="tray-panel"><OrdersTab /></TabPanel>
|
<TabPanel value="orders" class="tray-panel"><OrdersTab /></TabPanel>
|
||||||
<TabPanel value="strategies" class="tray-panel"><PlaceholderTab label="Strategies" /></TabPanel>
|
<TabPanel value="strategies" class="tray-panel"><PlaceholderTab label="Strategies" /></TabPanel>
|
||||||
<TabPanel value="positions" class="tray-panel"><PlaceholderTab label="Positions" /></TabPanel>
|
<TabPanel value="research" class="tray-panel"><ResearchTab /></TabPanel>
|
||||||
<TabPanel
|
<TabPanel
|
||||||
v-for="tab in tempTabs"
|
v-for="tab in tempTabs"
|
||||||
:key="tab.id"
|
:key="tab.id"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted, onBeforeUnmount, watch } from 'vue'
|
import { ref, onMounted, onBeforeUnmount, watch, type WatchStopHandle } from 'vue'
|
||||||
import Card from 'primevue/card'
|
import Card from 'primevue/card'
|
||||||
import { createTradingViewDatafeed } from '../composables/useTradingViewDatafeed'
|
import { createTradingViewDatafeed } from '../composables/useTradingViewDatafeed'
|
||||||
import { useTradingViewShapes } from '../composables/useTradingViewShapes'
|
import { useTradingViewShapes } from '../composables/useTradingViewShapes'
|
||||||
@@ -11,10 +11,11 @@ import type { IChartingLibraryWidget } from '../types/tradingview'
|
|||||||
import { intervalToSeconds } from '../utils'
|
import { intervalToSeconds } from '../utils'
|
||||||
import { wsManager } from '../composables/useWebSocket'
|
import { wsManager } from '../composables/useWebSocket'
|
||||||
|
|
||||||
// Convert seconds to TradingView interval string
|
// Convert seconds to TradingView interval string.
|
||||||
|
// TradingView uses plain minute numbers ("60", "240") for intraday,
|
||||||
|
// and "1D", "2D" etc for daily. Never use "H" suffix — it's not in supported_resolutions.
|
||||||
function secondsToInterval(seconds: number): string {
|
function secondsToInterval(seconds: number): string {
|
||||||
if (seconds % 86400 === 0) return `${seconds / 86400}D`
|
if (seconds % 86400 === 0) return `${seconds / 86400}D`
|
||||||
if (seconds % 3600 === 0) return `${seconds / 3600}H`
|
|
||||||
return `${seconds / 60}` // plain number = minutes
|
return `${seconds / 60}` // plain number = minutes
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,9 +28,23 @@ let shapeCleanup: (() => void) | null = null // Cleanup function for shape sync
|
|||||||
let indicatorCleanup: (() => void) | null = null // Cleanup function for indicator sync
|
let indicatorCleanup: (() => void) | null = null // Cleanup function for indicator sync
|
||||||
let customIndicatorCleanup: (() => void) | null = null // Cleanup for custom TV studies
|
let customIndicatorCleanup: (() => void) | null = null // Cleanup for custom TV studies
|
||||||
let chartInitialized = false // Guard against double-init on reconnect
|
let chartInitialized = false // Guard against double-init on reconnect
|
||||||
|
let symbolWatcher: WatchStopHandle | null = null
|
||||||
|
|
||||||
const maybeInitChart = () => {
|
const maybeInitChart = () => {
|
||||||
if (chartInitialized || !chartContainer.value) return
|
if (chartInitialized || !chartContainer.value) return
|
||||||
|
if (!chartStore.symbol) {
|
||||||
|
// Defer until backend provides a symbol
|
||||||
|
if (!symbolWatcher) {
|
||||||
|
symbolWatcher = watch(() => chartStore.symbol, (sym) => {
|
||||||
|
if (sym) {
|
||||||
|
symbolWatcher?.()
|
||||||
|
symbolWatcher = null
|
||||||
|
maybeInitChart()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
chartInitialized = true
|
chartInitialized = true
|
||||||
initChart()
|
initChart()
|
||||||
}
|
}
|
||||||
@@ -205,6 +220,10 @@ function setupStoreWatchers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
onBeforeUnmount(() => {
|
onBeforeUnmount(() => {
|
||||||
|
if (symbolWatcher) {
|
||||||
|
symbolWatcher()
|
||||||
|
symbolWatcher = null
|
||||||
|
}
|
||||||
// Cleanup shape synchronization
|
// Cleanup shape synchronization
|
||||||
if (shapeCleanup) {
|
if (shapeCleanup) {
|
||||||
shapeCleanup()
|
shapeCleanup()
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
import { ref, onMounted, onUnmounted, computed, onBeforeUnmount, watch, nextTick } from 'vue'
|
import { ref, onMounted, onUnmounted, computed, onBeforeUnmount, watch, nextTick } from 'vue'
|
||||||
import { register } from 'vue-advanced-chat'
|
import { register } from 'vue-advanced-chat'
|
||||||
import Badge from 'primevue/badge'
|
import Badge from 'primevue/badge'
|
||||||
import Button from 'primevue/button'
|
|
||||||
import { wsManager } from '../composables/useWebSocket'
|
import { wsManager } from '../composables/useWebSocket'
|
||||||
import type { WebSocketMessage } from '../composables/useWebSocket'
|
import type { WebSocketMessage } from '../composables/useWebSocket'
|
||||||
import { useChannelStore } from '../stores/channel'
|
import { useChannelStore } from '../stores/channel'
|
||||||
@@ -188,12 +187,13 @@ const handleMessage = (data: WebSocketMessage) => {
|
|||||||
}
|
}
|
||||||
} else if (data.type === 'agent_chunk') {
|
} else if (data.type === 'agent_chunk') {
|
||||||
console.log('[ChatPanel] Processing agent_chunk, content:', data.content, 'done:', data.done)
|
console.log('[ChatPanel] Processing agent_chunk, content:', data.content, 'done:', data.done)
|
||||||
|
// Always remove any tool-call bubble when the agent sends text, whether this
|
||||||
|
// is a new message or a continuation of an existing one (e.g. after a retry).
|
||||||
|
removeToolCallBubble()
|
||||||
const timestamp = new Date().toTimeString().split(' ')[0].slice(0, 5)
|
const timestamp = new Date().toTimeString().split(' ')[0].slice(0, 5)
|
||||||
|
|
||||||
if (!currentStreamingMessageId) {
|
if (!currentStreamingMessageId) {
|
||||||
console.log('[ChatPanel] Starting new streaming message')
|
console.log('[ChatPanel] Starting new streaming message')
|
||||||
// Remove any ephemeral tool-call bubble before starting the real response
|
|
||||||
removeToolCallBubble()
|
|
||||||
// Set up streaming state and mark user message as seen
|
// Set up streaming state and mark user message as seen
|
||||||
isAgentProcessing.value = true
|
isAgentProcessing.value = true
|
||||||
currentStreamingMessageId = generateMessageId()
|
currentStreamingMessageId = generateMessageId()
|
||||||
@@ -314,6 +314,7 @@ const stopAgent = () => {
|
|||||||
|
|
||||||
// Send message handler
|
// Send message handler
|
||||||
const sendMessage = async (event: any) => {
|
const sendMessage = async (event: any) => {
|
||||||
|
if (isAgentProcessing.value) { stopAgent(); return }
|
||||||
// Extract data from CustomEvent.detail[0]
|
// Extract data from CustomEvent.detail[0]
|
||||||
const data = event.detail?.[0] || event
|
const data = event.detail?.[0] || event
|
||||||
|
|
||||||
@@ -617,7 +618,11 @@ onUnmounted(() => {
|
|||||||
|
|
||||||
<!-- Workspace loading overlay -->
|
<!-- Workspace loading overlay -->
|
||||||
<div v-if="!channelStore.isReady" class="workspace-loading">
|
<div v-if="!channelStore.isReady" class="workspace-loading">
|
||||||
<i class="pi pi-spin pi-spinner workspace-loading-spinner" />
|
<svg class="workspace-loading-spinner" viewBox="0 0 50 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<circle cx="25" cy="25" r="20" fill="none" stroke="rgba(8,153,129,0.2)" stroke-width="4"/>
|
||||||
|
<circle cx="25" cy="25" r="20" fill="none" stroke="#089981" stroke-width="4"
|
||||||
|
stroke-dasharray="80 200" stroke-linecap="round"/>
|
||||||
|
</svg>
|
||||||
<span class="workspace-loading-message">{{ channelStore.statusMessage || 'Connecting...' }}</span>
|
<span class="workspace-loading-message">{{ channelStore.statusMessage || 'Connecting...' }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -643,18 +648,18 @@ onUnmounted(() => {
|
|||||||
@send-message="sendMessage"
|
@send-message="sendMessage"
|
||||||
@fetch-messages="fetchMessages"
|
@fetch-messages="fetchMessages"
|
||||||
@open-file="openFile"
|
@open-file="openFile"
|
||||||
/>
|
>
|
||||||
|
<div
|
||||||
<!-- Stop button overlay -->
|
v-if="isAgentProcessing"
|
||||||
<div v-if="isAgentProcessing" class="stop-button-container">
|
slot="send-icon"
|
||||||
<Button
|
@click.stop="stopAgent"
|
||||||
icon="pi pi-stop-circle"
|
style="display:flex;align-items:center;justify-content:center;width:100%;height:100%"
|
||||||
label="Stop"
|
>
|
||||||
severity="danger"
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">
|
||||||
@click="stopAgent"
|
<rect x="4" y="4" width="16" height="16" rx="2" fill="#f23645"/>
|
||||||
class="stop-button"
|
</svg>
|
||||||
/>
|
|
||||||
</div>
|
</div>
|
||||||
|
</vue-advanced-chat>
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
@@ -682,8 +687,13 @@ onUnmounted(() => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.workspace-loading-spinner {
|
.workspace-loading-spinner {
|
||||||
font-size: 2rem;
|
width: 2rem;
|
||||||
color: #089981;
|
height: 2rem;
|
||||||
|
animation: workspace-spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes workspace-spin {
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
}
|
}
|
||||||
|
|
||||||
.workspace-loading-message {
|
.workspace-loading-message {
|
||||||
@@ -721,24 +731,4 @@ onUnmounted(() => {
|
|||||||
color: var(--p-surface-900);
|
color: var(--p-surface-900);
|
||||||
}
|
}
|
||||||
|
|
||||||
.stop-button-container {
|
|
||||||
position: absolute;
|
|
||||||
bottom: 80px;
|
|
||||||
right: 20px;
|
|
||||||
z-index: 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.stop-button {
|
|
||||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
|
||||||
animation: pulse 2s infinite;
|
|
||||||
}
|
|
||||||
|
|
||||||
@keyframes pulse {
|
|
||||||
0%, 100% {
|
|
||||||
opacity: 1;
|
|
||||||
}
|
|
||||||
50% {
|
|
||||||
opacity: 0.8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
108
web/src/components/tabs/ResearchTab.vue
Normal file
108
web/src/components/tabs/ResearchTab.vue
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, computed } from 'vue'
|
||||||
|
import { storeToRefs } from 'pinia'
|
||||||
|
import { useResearchTypesStore } from '../../stores/researchTypes'
|
||||||
|
|
||||||
|
const store = useResearchTypesStore()
|
||||||
|
const { types } = storeToRefs(store)
|
||||||
|
|
||||||
|
const expanded = ref<Set<string>>(new Set())
|
||||||
|
|
||||||
|
const rows = computed(() =>
|
||||||
|
Object.entries(types.value).map(([id, t]) => ({ id, ...t }))
|
||||||
|
)
|
||||||
|
|
||||||
|
function toggle(id: string) {
|
||||||
|
if (expanded.value.has(id)) {
|
||||||
|
expanded.value.delete(id)
|
||||||
|
} else {
|
||||||
|
expanded.value.add(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="research-tab">
|
||||||
|
<div v-if="rows.length === 0" class="empty">No research items</div>
|
||||||
|
<div v-for="row in rows" :key="row.id" class="research-row">
|
||||||
|
<button class="row-header" @click="toggle(row.id)">
|
||||||
|
<i class="pi" :class="expanded.has(row.id) ? 'pi-chevron-down' : 'pi-chevron-right'" />
|
||||||
|
<span class="row-name">{{ row.display_name }}</span>
|
||||||
|
<span class="row-id">{{ row.id }}</span>
|
||||||
|
</button>
|
||||||
|
<div v-if="expanded.has(row.id)" class="row-body">
|
||||||
|
<span v-if="row.description">{{ row.description }}</span>
|
||||||
|
<span v-else class="no-desc">No description</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.research-tab {
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.empty {
|
||||||
|
color: #555;
|
||||||
|
text-align: center;
|
||||||
|
padding: 16px;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.research-row {
|
||||||
|
border-bottom: 1px solid #1e1e1e;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
width: 100%;
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
padding: 5px 10px;
|
||||||
|
cursor: pointer;
|
||||||
|
text-align: left;
|
||||||
|
color: #dbdbdb;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-header:hover {
|
||||||
|
background: #1a1a1a;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-header .pi {
|
||||||
|
color: #666;
|
||||||
|
font-size: 10px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-name {
|
||||||
|
flex: 1;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-id {
|
||||||
|
color: #555;
|
||||||
|
font-size: 11px;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.row-body {
|
||||||
|
padding: 6px 26px 8px;
|
||||||
|
font-size: 12px;
|
||||||
|
color: #aaa;
|
||||||
|
line-height: 1.5;
|
||||||
|
background: #0d0d0d;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.no-desc {
|
||||||
|
color: #444;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -60,8 +60,6 @@ export function useStateSync(stores: Record<string, Store>) {
|
|||||||
currentSeqs[msg.store] = msg.seq;
|
currentSeqs[msg.store] = msg.seq;
|
||||||
saveStoredSeqs(currentSeqs);
|
saveStoredSeqs(currentSeqs);
|
||||||
console.log('[StateSync] Snapshot applied, new seq:', msg.seq);
|
console.log('[StateSync] Snapshot applied, new seq:', msg.seq);
|
||||||
} else {
|
|
||||||
console.warn('[StateSync] Store not found:', msg.store);
|
|
||||||
}
|
}
|
||||||
} else if (msg.type === 'patch') {
|
} else if (msg.type === 'patch') {
|
||||||
console.log('[StateSync] Processing patch for store:', msg.store, 'seq:', msg.seq);
|
console.log('[StateSync] Processing patch for store:', msg.store, 'seq:', msg.seq);
|
||||||
@@ -89,8 +87,6 @@ export function useStateSync(stores: Record<string, Store>) {
|
|||||||
currentSeqs[msg.store] = msg.seq;
|
currentSeqs[msg.store] = msg.seq;
|
||||||
saveStoredSeqs(currentSeqs);
|
saveStoredSeqs(currentSeqs);
|
||||||
console.log('[StateSync] Patch applied successfully, new seq:', msg.seq);
|
console.log('[StateSync] Patch applied successfully, new seq:', msg.seq);
|
||||||
} else {
|
|
||||||
console.warn('[StateSync] Store not found:', msg.store);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -263,7 +263,10 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
|||||||
throw err
|
throw err
|
||||||
})
|
})
|
||||||
.then((response) => {
|
.then((response) => {
|
||||||
if (response.history) {
|
if (response.error) {
|
||||||
|
console.error('[TradingView Datafeed] getBars server error:', response.error)
|
||||||
|
onError(response.error)
|
||||||
|
} else if (response.history) {
|
||||||
console.log('[TradingView Datafeed] Raw bar sample:', response.history.bars?.[0])
|
console.log('[TradingView Datafeed] Raw bar sample:', response.history.bars?.[0])
|
||||||
console.log('[TradingView Datafeed] Denominators:', denoms)
|
console.log('[TradingView Datafeed] Denominators:', denoms)
|
||||||
|
|
||||||
@@ -309,7 +312,7 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
|||||||
this.sendRequest<any>({
|
this.sendRequest<any>({
|
||||||
type: 'subscribe_bars',
|
type: 'subscribe_bars',
|
||||||
symbol: symbolInfo.ticker || symbolInfo.name,
|
symbol: symbolInfo.ticker || symbolInfo.name,
|
||||||
resolution: resolution,
|
period_seconds: intervalToSeconds(resolution),
|
||||||
subscription_id: listenerGuid
|
subscription_id: listenerGuid
|
||||||
})
|
})
|
||||||
.then((response) => {
|
.then((response) => {
|
||||||
@@ -328,8 +331,10 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsubscribeBars(listenerGuid: string): void {
|
unsubscribeBars(listenerGuid: string): void {
|
||||||
|
const sub = this.subscriptions.get(listenerGuid)
|
||||||
this.sendRequest<any>({
|
this.sendRequest<any>({
|
||||||
type: 'unsubscribe_bars',
|
type: 'unsubscribe_bars',
|
||||||
|
period_seconds: sub ? intervalToSeconds(sub.resolution) : 60,
|
||||||
subscription_id: listenerGuid
|
subscription_id: listenerGuid
|
||||||
})
|
})
|
||||||
.then(() => {
|
.then(() => {
|
||||||
|
|||||||
@@ -30,8 +30,14 @@ class WebSocketManager {
|
|||||||
async connect(token: string): Promise<void> {
|
async connect(token: string): Promise<void> {
|
||||||
this.token = token
|
this.token = token
|
||||||
|
|
||||||
// Close existing connection if any
|
// Close existing connection if any — null out handlers first so the async
|
||||||
|
// onclose event from the old socket cannot reset sessionStatus after the
|
||||||
|
// new socket has already reached 'ready'.
|
||||||
if (this.ws) {
|
if (this.ws) {
|
||||||
|
this.ws.onopen = null
|
||||||
|
this.ws.onmessage = null
|
||||||
|
this.ws.onerror = null
|
||||||
|
this.ws.onclose = null
|
||||||
this.ws.close()
|
this.ws.close()
|
||||||
this.ws = null
|
this.ws = null
|
||||||
}
|
}
|
||||||
|
|||||||
14
web/src/stores/researchTypes.ts
Normal file
14
web/src/stores/researchTypes.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { defineStore } from 'pinia'
|
||||||
|
import { ref } from 'vue'
|
||||||
|
|
||||||
|
export interface ResearchType {
|
||||||
|
display_name: string
|
||||||
|
description?: string
|
||||||
|
created_at: number
|
||||||
|
modified_at: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export const useResearchTypesStore = defineStore('research_types', () => {
|
||||||
|
const types = ref<Record<string, ResearchType>>({})
|
||||||
|
return { types }
|
||||||
|
})
|
||||||
14
web/src/stores/strategyTypes.ts
Normal file
14
web/src/stores/strategyTypes.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { defineStore } from 'pinia'
|
||||||
|
import { ref } from 'vue'
|
||||||
|
|
||||||
|
export interface StrategyType {
|
||||||
|
display_name: string
|
||||||
|
description?: string
|
||||||
|
created_at: number
|
||||||
|
modified_at: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export const useStrategyTypesStore = defineStore('strategy_types', () => {
|
||||||
|
const types = ref<Record<string, StrategyType>>({})
|
||||||
|
return { types }
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user