bugfixes; research subproc; higher sandbox limits
This commit is contained in:
37
bin/init
37
bin/init
@@ -204,32 +204,17 @@ if [ -z "$USER_ID" ]; then
|
||||
fi
|
||||
echo -e "${GREEN}User ID: $USER_ID${NC}"
|
||||
|
||||
# Build license JSON based on type
|
||||
case "$LICENSE_TYPE" in
|
||||
enterprise)
|
||||
LICENSE_JSON='{"licenseType":"enterprise","features":{"maxIndicators":200,"maxStrategies":100,"maxBacktestDays":1825,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":20,"maxMessagesPerDay":10000,"maxTokensPerMessage":32768,"rateLimitPerMinute":300},"k8sResources":{"memoryRequest":"1Gi","memoryLimit":"4Gi","cpuRequest":"500m","cpuLimit":"4000m","storage":"50Gi","tmpSizeLimit":"1Gi","enableIdleShutdown":true,"idleTimeoutMinutes":120},"preferredModel":{"provider":"anthropic","model":"claude-opus-4-6","temperature":0.7}}'
|
||||
;;
|
||||
free)
|
||||
LICENSE_JSON='{"licenseType":"free","features":{"maxIndicators":10,"maxStrategies":3,"maxBacktestDays":30,"realtimeData":false,"customExecutors":false,"apiAccess":false},"resourceLimits":{"maxConcurrentSessions":1,"maxMessagesPerDay":100,"maxTokensPerMessage":4096,"rateLimitPerMinute":20},"k8sResources":{"memoryRequest":"256Mi","memoryLimit":"512Mi","cpuRequest":"100m","cpuLimit":"500m","storage":"2Gi","tmpSizeLimit":"128Mi","enableIdleShutdown":true,"idleTimeoutMinutes":30},"preferredModel":{"provider":"anthropic","model":"claude-haiku-4-5-20251001","temperature":0.7}}'
|
||||
;;
|
||||
pro|*)
|
||||
LICENSE_JSON='{"licenseType":"pro","features":{"maxIndicators":50,"maxStrategies":20,"maxBacktestDays":365,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":5,"maxMessagesPerDay":1000,"maxTokensPerMessage":8192,"rateLimitPerMinute":60},"k8sResources":{"memoryRequest":"512Mi","memoryLimit":"2Gi","cpuRequest":"250m","cpuLimit":"2000m","storage":"10Gi","tmpSizeLimit":"256Mi","enableIdleShutdown":true,"idleTimeoutMinutes":60},"preferredModel":{"provider":"anthropic","model":"claude-sonnet-4-6","temperature":0.7}}'
|
||||
;;
|
||||
esac
|
||||
|
||||
echo -e "${GREEN}→${NC} Creating $LICENSE_TYPE license..."
|
||||
$KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -c "
|
||||
INSERT INTO user_licenses (user_id, email, license, mcp_server_url)
|
||||
VALUES (
|
||||
'$USER_ID',
|
||||
'$USER_EMAIL',
|
||||
'$LICENSE_JSON',
|
||||
'$MCP_URL'
|
||||
)
|
||||
ON CONFLICT (user_id) DO UPDATE SET
|
||||
license = EXCLUDED.license,
|
||||
updated_at = NOW();
|
||||
" > /dev/null
|
||||
echo -e "${GREEN}→${NC} Setting $LICENSE_TYPE license..."
|
||||
HTTP_CODE=$(curl -s -o /tmp/dexorder-set-tier-response.json -w "%{http_code}" \
|
||||
-X POST "$BASE_URL/api/admin/users/$USER_ID/set-tier" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"tier\": \"$LICENSE_TYPE\"}")
|
||||
if [[ "$HTTP_CODE" != "200" ]]; then
|
||||
echo -e "${RED}✗ Failed to set license tier (HTTP $HTTP_CODE)${NC}"
|
||||
cat /tmp/dexorder-set-tier-response.json 2>/dev/null
|
||||
exit 1
|
||||
fi
|
||||
rm -f /tmp/dexorder-set-tier-response.json
|
||||
|
||||
echo -e "${GREEN}✓ User ready: $USER_EMAIL ($LICENSE_TYPE)${NC}"
|
||||
echo ""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# RBAC for gateway to CREATE sandbox deployments only
|
||||
# Principle of least privilege: gateway can ONLY create deployments/services/PVCs
|
||||
# in the sandbox namespace. Deletion is handled by the lifecycle sidecar.
|
||||
# RBAC for gateway to manage sandbox deployments
|
||||
# Principle of least privilege: gateway can create/delete deployments in the
|
||||
# sandbox namespace. PVC deletion is still handled by the lifecycle sidecar.
|
||||
# No pods, secrets, exec, or cross-namespace access.
|
||||
---
|
||||
apiVersion: v1
|
||||
@@ -15,10 +15,10 @@ metadata:
|
||||
name: sandbox-creator
|
||||
namespace: sandbox
|
||||
rules:
|
||||
# Deployments: create and read only (deletion handled by sidecar)
|
||||
# Deployments: full management (delete used for license tier changes; PVC deletion still via sidecar)
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments"]
|
||||
verbs: ["create", "get", "list", "watch", "patch", "update"]
|
||||
verbs: ["create", "get", "list", "watch", "patch", "update", "delete"]
|
||||
|
||||
# PVCs: create and read (deletion handled by sidecar)
|
||||
- apiGroups: [""]
|
||||
@@ -41,7 +41,6 @@ rules:
|
||||
verbs: ["get"]
|
||||
|
||||
# Explicitly NOT included:
|
||||
# - deployments/delete - handled by lifecycle sidecar
|
||||
# - pvc/delete - handled by lifecycle sidecar
|
||||
# - services/delete - handled by lifecycle sidecar
|
||||
# - pods (create/delete) - must go through deployments
|
||||
|
||||
@@ -83,10 +83,10 @@ spec:
|
||||
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
memory: "512Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
memory: "2Gi"
|
||||
cpu: "500m"
|
||||
|
||||
livenessProbe:
|
||||
|
||||
@@ -19,8 +19,8 @@ spec:
|
||||
cpu: "100m"
|
||||
# Maximum any single container can request
|
||||
max:
|
||||
memory: "2Gi"
|
||||
cpu: "2000m"
|
||||
memory: "8Gi"
|
||||
cpu: "4000m"
|
||||
min:
|
||||
memory: "32Mi"
|
||||
cpu: "10m"
|
||||
|
||||
@@ -4,18 +4,32 @@
|
||||
flink_hostname: flink-jobmanager
|
||||
ingestor_broker_port: 5567
|
||||
|
||||
# Supported exchanges (subscribe to these prefixes)
|
||||
# Supported exchanges (used for symbol metadata generation)
|
||||
supported_exchanges:
|
||||
- BINANCE
|
||||
- COINBASE
|
||||
- KRAKEN
|
||||
|
||||
# Per-exchange work slot capacity.
|
||||
# Each slot is one concurrent job. historical_slots limits parallel OHLC fetches;
|
||||
# realtime_slots limits concurrent tick subscriptions. Set based on exchange rate
|
||||
# limits and connection constraints — these are conservative starting values.
|
||||
exchange_capacity:
|
||||
BINANCE:
|
||||
historical_slots: 1
|
||||
realtime_slots: 5
|
||||
COINBASE:
|
||||
historical_slots: 1
|
||||
realtime_slots: 4
|
||||
KRAKEN:
|
||||
historical_slots: 1
|
||||
realtime_slots: 3
|
||||
|
||||
# Kafka configuration
|
||||
kafka_brokers:
|
||||
- kafka:9092
|
||||
|
||||
# Worker configuration
|
||||
max_concurrent: 10
|
||||
poll_interval_ms: 10000
|
||||
|
||||
# Logging
|
||||
|
||||
@@ -46,6 +46,11 @@ data:
|
||||
alerts:
|
||||
max_active: 100
|
||||
|
||||
# Memory guard: soft RLIMIT_AS limit as a fraction of the cgroup memory.max.
|
||||
# Set below 1.0 so Python raises MemoryError before the kernel OOM-kills the pod.
|
||||
memory:
|
||||
limit_fraction: 0.85
|
||||
|
||||
# Logging
|
||||
logging:
|
||||
level: "INFO"
|
||||
|
||||
10
doc/plan.md
Normal file
10
doc/plan.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Development Plan
|
||||
|
||||
* Realtime data
|
||||
* Triggers
|
||||
* Strategy UI
|
||||
* Backtesting TV integration
|
||||
* Paper Trading
|
||||
* User secrets
|
||||
* Live Execution
|
||||
* Sandbox <=> Dexorder auth
|
||||
139
doc/prod_deployment.md
Normal file
139
doc/prod_deployment.md
Normal file
@@ -0,0 +1,139 @@
|
||||
# Production Deployment Guide
|
||||
|
||||
This document describes the full process for deploying the AI platform to the production Kubernetes cluster, including the special steps required when the Iceberg schema has changed.
|
||||
|
||||
## Overview
|
||||
|
||||
The production cluster runs under `kubectl --context prod`, defaulting to the `ai` namespace. The `sandbox` namespace is shared between dev and prod.
|
||||
|
||||
Deployment consists of two parts:
|
||||
|
||||
1. **Standard deploy** — rebuild and push all images, apply k8s manifests, roll out services
|
||||
2. **Iceberg schema wipe** *(when schema has changed)* — clear both the Iceberg REST catalog (postgres) and the MinIO data warehouse before deploying
|
||||
|
||||
---
|
||||
|
||||
## Standard Deployment (no schema changes)
|
||||
|
||||
```bash
|
||||
bin/deploy-all --sandboxes
|
||||
```
|
||||
|
||||
This script (hardcoded to `--context=prod`) performs:
|
||||
|
||||
1. Applies base kustomize manifests (`deploy/k8s/prod/`) — namespaces, RBAC, policies
|
||||
2. Applies `deploy/k8s/prod/infrastructure.yaml` — statefulsets, deployments
|
||||
3. Runs `bin/config-update prod` — updates ConfigMaps
|
||||
4. Builds and pushes images for all 7 services: `gateway`, `web`, `sandbox`, `lifecycle-sidecar`, `flink`, `relay`, `ingestor`
|
||||
5. *(with `--sandboxes`)* Deletes sandbox Deployments and Services in the `sandbox` namespace (PVCs are retained; gateway recreates them on next login)
|
||||
6. Waits for rollouts on all 6 main deployments
|
||||
|
||||
> **Secrets are NOT updated by this script.** Run `bin/secret-update prod` separately if secrets have changed.
|
||||
|
||||
---
|
||||
|
||||
## Full Deploy with Iceberg Schema Wipe
|
||||
|
||||
Use this when the Iceberg table schema has changed (e.g. protobuf/column changes in the `trading.ohlc` table).
|
||||
|
||||
### Architecture note
|
||||
|
||||
The Iceberg REST catalog uses **two storage layers** that must both be cleared:
|
||||
|
||||
| Layer | What it stores | How to clear |
|
||||
|---|---|---|
|
||||
| PostgreSQL `iceberg` database | Table/namespace metadata (catalog) | Drop and recreate the database |
|
||||
| MinIO `warehouse` bucket | Parquet data files | `mc rm --recursive --force` |
|
||||
|
||||
**Important:** The gateway also uses the `iceberg` postgres database for its own auth tables (`user`, `user_licenses`, `session`, etc.). Wiping the database removes all user accounts. After the wipe, the schema must be re-applied and users recreated.
|
||||
|
||||
### Step-by-step
|
||||
|
||||
#### 1. Scale down Iceberg consumers
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai scale deployment iceberg-catalog flink-jobmanager flink-taskmanager --replicas=0
|
||||
```
|
||||
|
||||
This prevents in-flight writes during the wipe.
|
||||
|
||||
#### 2. Wipe the Iceberg PostgreSQL catalog
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai exec postgres-0 -- psql -U postgres -c "DROP DATABASE iceberg;"
|
||||
kubectl --context prod -n ai exec postgres-0 -- psql -U postgres -c "CREATE DATABASE iceberg;"
|
||||
```
|
||||
|
||||
#### 3. Wipe the MinIO warehouse bucket
|
||||
|
||||
Get MinIO credentials from the cluster secret:
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai get secret minio-secret -o jsonpath='{.data.root-user}' | base64 -d
|
||||
kubectl --context prod -n ai get secret minio-secret -o jsonpath='{.data.root-password}' | base64 -d
|
||||
```
|
||||
|
||||
Configure the `mc` client inside the MinIO pod and remove all objects:
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai exec minio-0 -- mc alias set local http://localhost:9000 <user> <password>
|
||||
kubectl --context prod -n ai exec minio-0 -- mc rm --recursive --force local/warehouse/
|
||||
```
|
||||
|
||||
#### 4. Run the full deploy
|
||||
|
||||
```bash
|
||||
bin/deploy-all --sandboxes
|
||||
```
|
||||
|
||||
This rebuilds and redeploys all services, including `iceberg-catalog`, `flink-jobmanager`, and `flink-taskmanager` (which were scaled to zero above — `deploy-all` will restore them to their manifest replica counts).
|
||||
|
||||
#### 5. Re-apply the gateway database schema
|
||||
|
||||
The gateway does **not** auto-migrate. After the `iceberg` database is recreated, the schema must be applied manually:
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai exec -i postgres-0 -- psql -U postgres -d iceberg < gateway/schema.sql
|
||||
```
|
||||
|
||||
This creates the `user`, `session`, `user_licenses`, and related tables.
|
||||
|
||||
#### 6. Recreate all users
|
||||
|
||||
```bash
|
||||
bin/create-all-users prod
|
||||
```
|
||||
|
||||
This registers all alpha test users via the gateway API and assigns their licenses. Users are defined in the script itself (`bin/create-all-users`).
|
||||
|
||||
To add or modify users, edit that file or run `bin/create-user prod` interactively.
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
curl -I https://dexorder.ai/api/health
|
||||
```
|
||||
|
||||
Check gateway logs for errors:
|
||||
|
||||
```bash
|
||||
kubectl --context prod -n ai logs deployment/gateway --tail=100
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Login fails after Iceberg wipe
|
||||
|
||||
**Symptom:** `Sign in failed` (401) or `User creation failed` (postgres error `42P01: undefined table`)
|
||||
|
||||
**Cause:** Dropping the `iceberg` database removes the gateway's auth tables along with the Iceberg catalog metadata — they share the same database.
|
||||
|
||||
**Fix:** Re-apply the schema and recreate users (steps 5 and 6 above).
|
||||
|
||||
### Gateway shows `42P01` errors but pod is running
|
||||
|
||||
The gateway does not auto-migrate on startup. The schema file must be applied manually after any database recreation. A gateway restart alone will not fix this.
|
||||
@@ -81,18 +81,29 @@ All sockets bind on **Relay** (well-known endpoint). Components connect to relay
|
||||
- Relay publishes DataRequest to ingestor work queue
|
||||
- No request tracking - relay is stateless
|
||||
|
||||
### 2. Ingestor Work Queue (Relay → Ingestors)
|
||||
**Pattern**: PUB/SUB with exchange prefix filtering
|
||||
- **Socket Type**: Relay uses PUB (bind), Ingestors use SUB (connect)
|
||||
- **Endpoint**: `tcp://*:5555` (Relay binds)
|
||||
- **Message Types**: `DataRequest` (historical or realtime)
|
||||
- **Topic Prefix**: Market name (e.g., `BTC/USDT.`, `ETH/BTC.`)
|
||||
- **Behavior**:
|
||||
- Relay publishes work with exchange prefix from ticker
|
||||
- Ingestors subscribe only to exchanges they support
|
||||
- Multiple ingestors can compete for same exchange
|
||||
- Ingestors write data to Kafka only (no direct response)
|
||||
- Flink processes Kafka → Iceberg → notification
|
||||
### 2. Ingestor Work Queue (Flink ↔ Ingestors)
|
||||
**Pattern**: ROUTER/DEALER slot-based broker
|
||||
- **Socket Type**: Flink `IngestorBroker` uses ROUTER (bind), Ingestors use DEALER (connect)
|
||||
- **Endpoint**: `tcp://*:5567` (Flink binds)
|
||||
- **Message Types**: `WorkerReady` (slot offer), `DataRequest` (work assignment), `WorkComplete`, `WorkHeartbeat`, `WorkReject`, `WorkStop`
|
||||
- **Capacity model**:
|
||||
- Each `WorkerReady` (0x20) is ONE slot offer for one exchange and one job type (`SlotType`: `HISTORICAL=1`, `REALTIME=2`, `ANY=0`)
|
||||
- Ingestors send N `WorkerReady` messages at startup — one per available slot per exchange per type
|
||||
- Flink dispatches a job by matching the slot's exchange and SlotType to the request
|
||||
- The slot is consumed on dispatch; the ingestor re-offers it (new `WorkerReady`) when the job ends
|
||||
- Rate-limit backoff: if the exchange returns a 429, the ingestor delays the re-offer by the `Retry-After` duration from the response header
|
||||
- **Historical job lifecycle**:
|
||||
- Flink dispatches `DataRequest` (HISTORICAL_OHLC) → ingestor fetches and writes to Kafka → sends `WorkComplete` (0x21) → sends new `WorkerReady` for that slot
|
||||
- **Realtime job lifecycle**:
|
||||
- Flink dispatches `DataRequest` (REALTIME_TICKS) → ingestor polls exchange and writes ticks to Kafka → sends `WorkHeartbeat` (0x22) every 5 s → on `WorkStop` (0x25) from Flink: cancels and sends new `WorkerReady`
|
||||
- **Slot configuration** (per ingestor, per exchange):
|
||||
```yaml
|
||||
exchange_capacity:
|
||||
BINANCE: { historical_slots: 3, realtime_slots: 5 }
|
||||
KRAKEN: { historical_slots: 2, realtime_slots: 3 }
|
||||
COINBASE: { historical_slots: 2, realtime_slots: 4 }
|
||||
```
|
||||
- **Flink restart**: when Flink restarts its `freeSlots` deque is cleared; all in-flight jobs time out on the ingestor side, releasing their slots, which then re-offer via `WorkerReady`
|
||||
|
||||
### 3. Market Data Fanout (Relay ↔ Flink ↔ Clients)
|
||||
**Pattern**: XPUB/XSUB proxy
|
||||
|
||||
@@ -1,4 +1 @@
|
||||
what conclusions can you make by analyzing historical data on ETH price direction changes near market session overlaps and market sessions changes on monday and tuesday?
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.dexorder.flink.ingestor;
|
||||
import com.dexorder.flink.zmq.ZmqChannelManager;
|
||||
import com.dexorder.proto.DataRequest;
|
||||
import com.dexorder.proto.RealtimeParams;
|
||||
import com.dexorder.proto.SlotType;
|
||||
import com.dexorder.proto.SubmitHistoricalRequest;
|
||||
import com.dexorder.proto.WorkComplete;
|
||||
import com.dexorder.proto.WorkHeartbeat;
|
||||
@@ -17,27 +18,27 @@ import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Deque;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
/**
|
||||
* LRU-style work broker for ingestors.
|
||||
* Slot-based work broker for ingestors.
|
||||
*
|
||||
* Ingestors connect via DEALER to the ROUTER socket on port 5567. They register with READY,
|
||||
* are dispatched WORK messages, and respond with COMPLETE (historical) or HEARTBEAT (realtime).
|
||||
* If a heartbeat times out the job is re-queued and dispatched to another available worker.
|
||||
* Each WorkerReady message from an ingestor represents ONE available slot for a
|
||||
* specific exchange and job type (HISTORICAL or REALTIME). Flink consumes the slot
|
||||
* by dispatching a DataRequest to it. The ingestor re-offers the slot (sends another
|
||||
* WorkerReady) once the job completes, subject to any rate-limit backoff.
|
||||
*
|
||||
* Also receives SubmitHistoricalRequest messages forwarded by the relay on the PULL socket (5566).
|
||||
* Also receives SubmitHistoricalRequest messages forwarded by the relay on the PULL
|
||||
* socket (5566), and realtime job requests from RealtimeSubscriptionManager.
|
||||
*
|
||||
* Message type IDs (ZMQ framing, not Kafka):
|
||||
* 0x10 SubmitHistoricalRequest (relay → Flink via PULL, same as client wire type)
|
||||
* 0x20 WorkerReady (ingestor → Flink)
|
||||
* Message type IDs (ZMQ framing):
|
||||
* 0x10 SubmitHistoricalRequest (relay → Flink via PULL)
|
||||
* 0x20 WorkerReady (ingestor → Flink: one slot offer)
|
||||
* 0x21 WorkComplete (ingestor → Flink)
|
||||
* 0x22 WorkHeartbeat (ingestor → Flink)
|
||||
* 0x23 WorkReject (ingestor → Flink)
|
||||
@@ -53,7 +54,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
private static final byte MSG_TYPE_WORK_COMPLETE = 0x21;
|
||||
private static final byte MSG_TYPE_WORK_HEARTBEAT = 0x22;
|
||||
private static final byte MSG_TYPE_WORK_REJECT = 0x23;
|
||||
private static final byte MSG_TYPE_WORK_ASSIGN = 0x01; // DataRequest type on wire
|
||||
private static final byte MSG_TYPE_WORK_ASSIGN = 0x01;
|
||||
private static final byte MSG_TYPE_WORK_STOP = 0x25;
|
||||
|
||||
/** Re-queue realtime job if no heartbeat received within this window (ms) */
|
||||
@@ -65,20 +66,20 @@ public class IngestorBroker implements AutoCloseable {
|
||||
private volatile boolean running;
|
||||
private Thread brokerThread;
|
||||
|
||||
// ── Worker tracking ──────────────────────────────────────────────────────
|
||||
// ── Slot tracking ─────────────────────────────────────────────────────────
|
||||
|
||||
/** Workers ready to accept a job, in LRU order (head = least recently used) */
|
||||
private final Deque<WorkerInfo> freeWorkers = new ArrayDeque<>();
|
||||
/**
|
||||
* Available slots, in LRU order (head = least recently used).
|
||||
* Each entry is one WorkerReady slot offer from an ingestor.
|
||||
*/
|
||||
private final Deque<WorkerSlot> freeSlots = new ArrayDeque<>();
|
||||
|
||||
/** Jobs waiting for a compatible free worker */
|
||||
/** Jobs waiting for a compatible free slot */
|
||||
private final Queue<DataRequest> pendingJobs = new ArrayDeque<>();
|
||||
|
||||
/** Jobs currently executing on a worker */
|
||||
/** Jobs currently executing on a slot */
|
||||
private final Map<String, ActiveJob> activeJobs = new ConcurrentHashMap<>();
|
||||
|
||||
/** Worker identity → supported exchanges (set once on READY) */
|
||||
private final Map<String, WorkerInfo> knownWorkers = new ConcurrentHashMap<>();
|
||||
|
||||
// ── Thread-safe inbound queue from RealtimeSubscriptionManager ───────────
|
||||
|
||||
private final Queue<DataRequest> externalSubmissions = new ConcurrentLinkedQueue<>();
|
||||
@@ -134,8 +135,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
|
||||
/**
|
||||
* Stop all realtime jobs for a ticker (called when last subscriber leaves).
|
||||
* Thread-safe — posts a stop marker via externalSubmissions is complex; instead we
|
||||
* directly find and stop active jobs. Protected by ConcurrentHashMap.
|
||||
* Thread-safe via ConcurrentHashMap.
|
||||
*/
|
||||
public void stopRealtimeJobsForTicker(String ticker) {
|
||||
List<String> toStop = new ArrayList<>();
|
||||
@@ -154,7 +154,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Broker loop ──────────────────────────────────────────────────────────
|
||||
// ── Broker loop ───────────────────────────────────────────────────────────
|
||||
|
||||
private void brokerLoop() {
|
||||
ZMQ.Socket pullSocket = zmqManager.getSocket(ZmqChannelManager.Channel.CLIENT_REQUEST);
|
||||
@@ -174,18 +174,15 @@ public class IngestorBroker implements AutoCloseable {
|
||||
enqueueJob(ext);
|
||||
}
|
||||
|
||||
// Poll sockets (100ms timeout)
|
||||
poller.poll(100);
|
||||
|
||||
if (poller.pollin(0)) {
|
||||
handleClientRequest(pullSocket);
|
||||
}
|
||||
|
||||
if (poller.pollin(1)) {
|
||||
handleWorkerMessage(routerSocket);
|
||||
}
|
||||
|
||||
// Check for heartbeat / completion timeouts
|
||||
checkTimeouts();
|
||||
|
||||
} catch (Exception e) {
|
||||
@@ -235,7 +232,8 @@ public class IngestorBroker implements AutoCloseable {
|
||||
.setClientId(req.hasClientId() ? req.getClientId() : "")
|
||||
.build();
|
||||
enqueueJob(dataRequest);
|
||||
LOG.info("Received historical request from relay: request_id={}, ticker={}", req.getRequestId(), req.getTicker());
|
||||
LOG.info("Received historical request from relay: request_id={}, ticker={}",
|
||||
req.getRequestId(), req.getTicker());
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to parse SubmitHistoricalRequest from relay", e);
|
||||
}
|
||||
@@ -277,23 +275,28 @@ public class IngestorBroker implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A WorkerReady message represents ONE slot offer for one exchange and job type.
|
||||
* Add it directly to freeSlots — no deduplication (multiple slots per ingestor are expected).
|
||||
*/
|
||||
private void handleWorkerReady(byte[] identity, String identityKey, byte[] payload) throws Exception {
|
||||
WorkerReady ready = WorkerReady.parseFrom(payload);
|
||||
Set<String> exchanges = new HashSet<>(ready.getExchangesList());
|
||||
SlotType slotType = ready.getJobType();
|
||||
|
||||
WorkerInfo worker = knownWorkers.computeIfAbsent(identityKey,
|
||||
k -> new WorkerInfo(identity, identityKey, exchanges));
|
||||
worker.exchanges = exchanges; // update in case re-READY with different config
|
||||
worker.identity = identity;
|
||||
|
||||
if (!freeWorkers.contains(worker)) {
|
||||
freeWorkers.addLast(worker);
|
||||
for (String exchange : ready.getExchangesList()) {
|
||||
WorkerSlot slot = new WorkerSlot(identity, identityKey, exchange.toUpperCase(), slotType);
|
||||
freeSlots.addLast(slot);
|
||||
LOG.info("Worker slot READY: id={}, exchange={}, type={}, totalFreeSlots={}",
|
||||
identityKey, exchange, slotType, freeSlots.size());
|
||||
}
|
||||
LOG.info("Ingestor READY: id={}, exchanges={}, freeWorkers={}", identityKey, exchanges, freeWorkers.size());
|
||||
|
||||
dispatchPending();
|
||||
}
|
||||
|
||||
/**
|
||||
* Historical job completed. Remove from activeJobs.
|
||||
* The ingestor will send a new typed WorkerReady to re-offer the slot.
|
||||
*/
|
||||
private void handleWorkComplete(String identityKey, byte[] payload) throws Exception {
|
||||
WorkComplete complete = WorkComplete.parseFrom(payload);
|
||||
String jobId = complete.getJobId();
|
||||
@@ -304,13 +307,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
} else {
|
||||
LOG.info("Job COMPLETE: jobId={}, ticker={}, success={}", jobId, job.ticker, complete.getSuccess());
|
||||
}
|
||||
|
||||
// Worker is free again
|
||||
WorkerInfo worker = knownWorkers.get(identityKey);
|
||||
if (worker != null) {
|
||||
freeWorkers.addLast(worker);
|
||||
dispatchPending();
|
||||
}
|
||||
// Slot re-registration is driven by the ingestor via a new WorkerReady.
|
||||
}
|
||||
|
||||
private void handleWorkHeartbeat(String identityKey, byte[] payload) throws Exception {
|
||||
@@ -325,6 +322,10 @@ public class IngestorBroker implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingestor rejected the job. Re-queue it with a new ID.
|
||||
* The ingestor will send a new typed WorkerReady when it's ready again.
|
||||
*/
|
||||
private void handleWorkReject(String identityKey, byte[] payload) throws Exception {
|
||||
WorkReject reject = WorkReject.parseFrom(payload);
|
||||
String jobId = reject.getJobId();
|
||||
@@ -332,31 +333,23 @@ public class IngestorBroker implements AutoCloseable {
|
||||
|
||||
ActiveJob job = activeJobs.remove(jobId);
|
||||
if (job != null) {
|
||||
// Re-queue with fresh job_id so a different ingestor may pick it up
|
||||
DataRequest requeued = job.request.toBuilder()
|
||||
.setJobId(UUID.randomUUID().toString())
|
||||
.build();
|
||||
pendingJobs.add(requeued);
|
||||
}
|
||||
|
||||
// Worker is still free (it rejected, not crashed)
|
||||
WorkerInfo worker = knownWorkers.get(identityKey);
|
||||
if (worker != null) {
|
||||
freeWorkers.addLast(worker);
|
||||
dispatchPending();
|
||||
}
|
||||
// Slot re-registration is driven by the ingestor via a new WorkerReady.
|
||||
}
|
||||
|
||||
// ── Dispatch ─────────────────────────────────────────────────────────────
|
||||
// ── Dispatch ──────────────────────────────────────────────────────────────
|
||||
|
||||
private void enqueueJob(DataRequest request) {
|
||||
// Check if we can immediately dispatch
|
||||
WorkerInfo worker = findFreeWorker(exchangeOf(request.getTicker()));
|
||||
if (worker != null) {
|
||||
dispatch(worker, request);
|
||||
WorkerSlot slot = findFreeSlot(exchangeOf(request.getTicker()), request.getType());
|
||||
if (slot != null) {
|
||||
dispatch(slot, request);
|
||||
} else {
|
||||
pendingJobs.add(request);
|
||||
LOG.debug("No free worker for {}, queued (pendingJobs={})", request.getTicker(), pendingJobs.size());
|
||||
LOG.debug("No free slot for {}, queued (pendingJobs={})", request.getTicker(), pendingJobs.size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -364,9 +357,9 @@ public class IngestorBroker implements AutoCloseable {
|
||||
Queue<DataRequest> remaining = new ArrayDeque<>();
|
||||
DataRequest job;
|
||||
while ((job = pendingJobs.poll()) != null) {
|
||||
WorkerInfo worker = findFreeWorker(exchangeOf(job.getTicker()));
|
||||
if (worker != null) {
|
||||
dispatch(worker, job);
|
||||
WorkerSlot slot = findFreeSlot(exchangeOf(job.getTicker()), job.getType());
|
||||
if (slot != null) {
|
||||
dispatch(slot, job);
|
||||
} else {
|
||||
remaining.add(job);
|
||||
}
|
||||
@@ -374,28 +367,30 @@ public class IngestorBroker implements AutoCloseable {
|
||||
pendingJobs.addAll(remaining);
|
||||
}
|
||||
|
||||
private void dispatch(WorkerInfo worker, DataRequest request) {
|
||||
freeWorkers.remove(worker);
|
||||
|
||||
private void dispatch(WorkerSlot slot, DataRequest request) {
|
||||
try {
|
||||
byte[] protoBytes = request.toByteArray();
|
||||
boolean sent = zmqManager.sendToWorker(worker.identity, PROTOCOL_VERSION, MSG_TYPE_WORK_ASSIGN, protoBytes);
|
||||
boolean sent = zmqManager.sendToWorker(slot.identity, PROTOCOL_VERSION, MSG_TYPE_WORK_ASSIGN, protoBytes);
|
||||
if (!sent) {
|
||||
LOG.error("Failed to dispatch job to worker={}, re-queuing", worker.identityKey);
|
||||
freeWorkers.addLast(worker);
|
||||
// ROUTER_MANDATORY: identity is disconnected — purge all stale slots for this
|
||||
// worker and re-queue the job so dispatchPending() can try a live slot.
|
||||
int purged = purgeWorkerSlots(slot.identityKey);
|
||||
LOG.warn("Worker {} unreachable, purged {} stale free slots, re-queuing job={}",
|
||||
slot.identityKey, purged, request.getJobId());
|
||||
pendingJobs.add(request);
|
||||
return;
|
||||
}
|
||||
|
||||
ActiveJob active = new ActiveJob(worker.identity, worker.identityKey,
|
||||
ActiveJob active = new ActiveJob(slot.identity, slot.identityKey,
|
||||
request, request.getTicker(), request.getType());
|
||||
activeJobs.put(request.getJobId(), active);
|
||||
|
||||
LOG.info("Dispatched job: jobId={}, ticker={}, type={}, worker={}",
|
||||
request.getJobId(), request.getTicker(), request.getType(), worker.identityKey);
|
||||
LOG.info("Dispatched job: jobId={}, ticker={}, type={}, worker={}, slotType={}",
|
||||
request.getJobId(), request.getTicker(), request.getType(),
|
||||
slot.identityKey, slot.slotType);
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error dispatching job", e);
|
||||
freeWorkers.addLast(worker);
|
||||
freeSlots.addLast(slot);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,7 +403,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Timeout checking ─────────────────────────────────────────────────────
|
||||
// ── Timeout checking ──────────────────────────────────────────────────────
|
||||
|
||||
private void checkTimeouts() {
|
||||
long now = System.currentTimeMillis();
|
||||
@@ -426,10 +421,9 @@ public class IngestorBroker implements AutoCloseable {
|
||||
for (String jobId : timedOut) {
|
||||
ActiveJob job = activeJobs.remove(jobId);
|
||||
if (job == null) continue;
|
||||
LOG.warn("Job timed out (no heartbeat/completion): jobId={}, ticker={}, type={}, worker={}",
|
||||
LOG.warn("Job timed out: jobId={}, ticker={}, type={}, worker={}",
|
||||
jobId, job.ticker, job.type, job.workerIdentityKey);
|
||||
|
||||
// Re-queue with a new job_id
|
||||
DataRequest requeued = job.request.toBuilder()
|
||||
.setJobId(UUID.randomUUID().toString())
|
||||
.build();
|
||||
@@ -438,7 +432,7 @@ public class IngestorBroker implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
/** Extract exchange name from ticker, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
|
||||
private static String exchangeOf(String ticker) {
|
||||
@@ -446,12 +440,32 @@ public class IngestorBroker implements AutoCloseable {
|
||||
return dot >= 0 ? ticker.substring(dot + 1).toUpperCase() : "";
|
||||
}
|
||||
|
||||
/** Find and remove a free worker that supports the given exchange. */
|
||||
private WorkerInfo findFreeWorker(String exchange) {
|
||||
for (WorkerInfo w : freeWorkers) {
|
||||
if (exchange.isEmpty() || w.exchanges.contains(exchange)) {
|
||||
freeWorkers.remove(w);
|
||||
return w;
|
||||
/**
|
||||
* Remove all free slots offered by a given worker identity.
|
||||
* Called when a dispatch to that identity fails (ROUTER_MANDATORY unreachable).
|
||||
* Returns the number of slots removed.
|
||||
*/
|
||||
private int purgeWorkerSlots(String identityKey) {
|
||||
int before = freeSlots.size();
|
||||
freeSlots.removeIf(slot -> slot.identityKey.equals(identityKey));
|
||||
return before - freeSlots.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find and remove a free slot that supports the given exchange and request type.
|
||||
* A slot with SlotType.ANY matches any request type.
|
||||
*/
|
||||
private WorkerSlot findFreeSlot(String exchange, DataRequest.RequestType requestType) {
|
||||
for (WorkerSlot slot : freeSlots) {
|
||||
boolean exchangeMatch = exchange.isEmpty() || slot.exchange.equals(exchange);
|
||||
boolean typeMatch = slot.slotType == SlotType.ANY
|
||||
|| (slot.slotType == SlotType.HISTORICAL
|
||||
&& requestType == DataRequest.RequestType.HISTORICAL_OHLC)
|
||||
|| (slot.slotType == SlotType.REALTIME
|
||||
&& requestType == DataRequest.RequestType.REALTIME_TICKS);
|
||||
if (exchangeMatch && typeMatch) {
|
||||
freeSlots.remove(slot);
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
@@ -468,17 +482,20 @@ public class IngestorBroker implements AutoCloseable {
|
||||
stop();
|
||||
}
|
||||
|
||||
// ── Inner types ──────────────────────────────────────────────────────────
|
||||
// ── Inner types ───────────────────────────────────────────────────────────
|
||||
|
||||
private static class WorkerInfo {
|
||||
byte[] identity;
|
||||
/** One available work slot offered by an ingestor via WorkerReady. */
|
||||
private static class WorkerSlot {
|
||||
final byte[] identity;
|
||||
final String identityKey;
|
||||
Set<String> exchanges;
|
||||
final String exchange;
|
||||
final SlotType slotType;
|
||||
|
||||
WorkerInfo(byte[] identity, String identityKey, Set<String> exchanges) {
|
||||
WorkerSlot(byte[] identity, String identityKey, String exchange, SlotType slotType) {
|
||||
this.identity = identity;
|
||||
this.identityKey = identityKey;
|
||||
this.exchanges = exchanges;
|
||||
this.exchange = exchange;
|
||||
this.slotType = slotType;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -87,6 +87,11 @@ public class ZmqChannelManager implements Closeable {
|
||||
socket.setLinger(1000);
|
||||
socket.setSndHWM(10000);
|
||||
socket.setRcvHWM(10000);
|
||||
if (socketType == SocketType.ROUTER) {
|
||||
// Return false (EHOSTUNREACH) instead of silently dropping messages to
|
||||
// unknown/disconnected peer identities. Enables immediate stale-slot detection.
|
||||
socket.setRouterMandatory(true);
|
||||
}
|
||||
socket.bind(endpoint);
|
||||
sockets.put(channel.name(), socket);
|
||||
LOG.info("Bound {} to {}", description, endpoint);
|
||||
|
||||
@@ -595,12 +595,13 @@ export class WebSocketHandler {
|
||||
case 'get_bars': {
|
||||
if (!ohlcService) {
|
||||
socket.send(JSON.stringify({
|
||||
type: 'error',
|
||||
type: 'get_bars_response',
|
||||
request_id: requestId,
|
||||
error_message: 'OHLC service not available'
|
||||
error: 'OHLC service not available',
|
||||
}));
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const history = await ohlcService.fetchOHLC(
|
||||
payload.symbol,
|
||||
payload.period_seconds,
|
||||
@@ -609,14 +610,13 @@ export class WebSocketHandler {
|
||||
payload.countback
|
||||
);
|
||||
logger.info({ requestId, barCount: history.bars?.length ?? 0, noData: history.noData, socketState: socket.readyState }, 'Sending get_bars_response');
|
||||
socket.send(
|
||||
jsonStringifySafe({
|
||||
type: 'get_bars_response',
|
||||
request_id: requestId,
|
||||
history,
|
||||
})
|
||||
);
|
||||
socket.send(jsonStringifySafe({ type: 'get_bars_response', request_id: requestId, history }));
|
||||
logger.info({ requestId }, 'get_bars_response sent');
|
||||
} catch (err: any) {
|
||||
const errorMessage = err?.message ?? String(err);
|
||||
logger.error({ requestId, ticker: payload.symbol, errorMessage }, 'get_bars failed');
|
||||
socket.send(JSON.stringify({ type: 'get_bars_response', request_id: requestId, error: errorMessage }));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Pool } from 'pg';
|
||||
import type { UserLicense } from '../types/user.js';
|
||||
import { UserLicenseSchema } from '../types/user.js';
|
||||
import type { UserLicense, License, LicenseTier } from '../types/user.js';
|
||||
import { UserLicenseSchema, LICENSE_TIER_TEMPLATES } from '../types/user.js';
|
||||
import type { AuthService } from '../auth/auth-service.js';
|
||||
|
||||
export class UserService {
|
||||
@@ -114,6 +114,54 @@ export class UserService {
|
||||
return await this.authService.verifyToken(token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-apply the current canonical template for every user's declared licenseType.
|
||||
* Updates only the DB — does not touch deployments, so running pods are unaffected
|
||||
* until their next natural restart.
|
||||
*/
|
||||
async migrateAllLicenses(): Promise<{ updated: number }> {
|
||||
const client = await this.pool.connect();
|
||||
try {
|
||||
const rows = await client.query(
|
||||
`SELECT user_id, license->>'licenseType' AS tier FROM user_licenses`
|
||||
);
|
||||
let updated = 0;
|
||||
for (const row of rows.rows) {
|
||||
const tier = row.tier as LicenseTier;
|
||||
if (!LICENSE_TIER_TEMPLATES[tier]) continue;
|
||||
await client.query(
|
||||
`UPDATE user_licenses SET license = $1::jsonb, updated_at = NOW() WHERE user_id = $2`,
|
||||
[JSON.stringify(LICENSE_TIER_TEMPLATES[tier]), row.user_id]
|
||||
);
|
||||
updated++;
|
||||
}
|
||||
return { updated };
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a user's license to a canonical tier template.
|
||||
* Overwrites the existing license with the current template for that tier.
|
||||
*/
|
||||
async setUserLicenseTier(userId: string, tier: LicenseTier): Promise<License> {
|
||||
const license = LICENSE_TIER_TEMPLATES[tier];
|
||||
const client = await this.pool.connect();
|
||||
try {
|
||||
await client.query(
|
||||
`INSERT INTO user_licenses (user_id, license, mcp_server_url, updated_at)
|
||||
VALUES ($1, $2::jsonb, 'pending', NOW())
|
||||
ON CONFLICT (user_id) DO UPDATE
|
||||
SET license = EXCLUDED.license, updated_at = NOW()`,
|
||||
[userId, JSON.stringify(license)]
|
||||
);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
return license;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close database pool
|
||||
*/
|
||||
|
||||
@@ -16,6 +16,7 @@ import type { ResearchSubagent } from './subagents/research/index.js';
|
||||
import type { IndicatorSubagent } from './subagents/indicator/index.js';
|
||||
import type { WebExploreSubagent } from './subagents/web-explore/index.js';
|
||||
import type { StrategySubagent } from './subagents/strategy/index.js';
|
||||
import { BaseSubagent } from './subagents/base-subagent.js';
|
||||
import type { DynamicStructuredTool } from '@langchain/core/tools';
|
||||
import { getToolRegistry } from '../tools/tool-registry.js';
|
||||
import type { MCPToolInfo } from '../tools/mcp/mcp-tool-wrapper.js';
|
||||
@@ -237,12 +238,22 @@ export class AgentHarness {
|
||||
try {
|
||||
const { createResearchSubagent } = await import('./subagents/research/index.js');
|
||||
|
||||
// Create a model for the research subagent
|
||||
// Path resolution: use the compiled output path
|
||||
const researchSubagentPath = join(__dirname, 'subagents', 'research');
|
||||
this.config.logger.debug({ researchSubagentPath }, 'Using research subagent path');
|
||||
|
||||
// Load the subagent config to get maxTokens — research scripts require more tokens
|
||||
// than the provider default (4096) because python_write arguments include full code bodies
|
||||
const researchSubagentConfig = await BaseSubagent.loadConfig(researchSubagentPath);
|
||||
|
||||
// Create a model for the research subagent — always use the complex model
|
||||
// since research tasks involve data analysis, charting, and code generation
|
||||
const { model } = await this.modelRouter.route(
|
||||
'research analysis', // dummy query
|
||||
'analyze and backtest research data', // triggers complex routing
|
||||
this.config.license,
|
||||
RoutingStrategy.COMPLEXITY,
|
||||
this.config.userId
|
||||
this.config.userId,
|
||||
researchSubagentConfig.maxTokens // honour the subagent's maxTokens (e.g. 8192)
|
||||
);
|
||||
|
||||
// Get tools for research subagent from registry
|
||||
@@ -274,10 +285,6 @@ export class AgentHarness {
|
||||
}));
|
||||
}
|
||||
|
||||
// Path resolution: use the compiled output path
|
||||
const researchSubagentPath = join(__dirname, 'subagents', 'research');
|
||||
this.config.logger.debug({ researchSubagentPath }, 'Using research subagent path');
|
||||
|
||||
this.researchSubagent = await createResearchSubagent(
|
||||
model,
|
||||
this.config.logger,
|
||||
@@ -535,10 +542,12 @@ export class AgentHarness {
|
||||
const stream = await model.stream(messagesCopy, { signal });
|
||||
for await (const chunk of stream) {
|
||||
if (typeof chunk.content === 'string' && chunk.content.length > 0) {
|
||||
this.config.logger.trace({ content: chunk.content }, 'raw chunk');
|
||||
yield { type: 'chunk', content: chunk.content };
|
||||
} else if (Array.isArray(chunk.content)) {
|
||||
for (const block of chunk.content) {
|
||||
if (block.type === 'text' && block.text) {
|
||||
this.config.logger.trace({ content: block.text }, 'raw chunk');
|
||||
yield { type: 'chunk', content: block.text };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,11 @@ Dexorder trading platform provides OHLC data at a 1-minute resolution and suppor
|
||||
|
||||
Dexorder does not support:
|
||||
* tick-by-tick trading or high-frequency strategies.
|
||||
* long-running computations like paramater optimizations or training machine learning models.
|
||||
* long-running computations like parameter optimizations or training machine learning models during live execution.
|
||||
* portfolio optimization or trading strategies that require a large number of symbols.
|
||||
* LLM calls inside strategy scripts — strategies must be deterministic and lightweight for backtesting to be reliable and repeatable. LLMs are slow, expensive, and introduce temperature-based non-determinism that breaks backtesting. (Walk-forward LLM integration via timer/data triggers is planned but not yet available.)
|
||||
* TradFi data (equities, forex, bonds, options, etc.) — only crypto pricing data is available.
|
||||
* Alternative data sources such as news feeds, Twitter/social sentiment, on-chain data, or economic calendars — these are not yet available.
|
||||
|
||||
Dexorder does support:
|
||||
* backtesting strategies against historical data.
|
||||
@@ -33,6 +36,27 @@ If the user asks for a capability not provided by Dexorder, decline and explain
|
||||
|
||||
# Important Instructions
|
||||
|
||||
## Switching Chart Symbol or Timeframe
|
||||
|
||||
**IMPORTANT: When the user asks to switch, change, or update the chart symbol or timeframe, you MUST call `workspace_patch` directly. Do NOT use web_explore, do NOT delegate to the indicator tool.**
|
||||
|
||||
Call `workspace_patch` with `store_name = "chartState"` and the appropriate JSON patch:
|
||||
|
||||
To switch symbol only:
|
||||
```json
|
||||
[{ "op": "replace", "path": "/symbol", "value": "SOL/USDT.BINANCE" }]
|
||||
```
|
||||
|
||||
To switch symbol and period (period is seconds: 60=1m, 300=5m, 900=15m, 3600=1h, 86400=1D):
|
||||
```json
|
||||
[
|
||||
{ "op": "replace", "path": "/symbol", "value": "SOL/USDT.BINANCE" },
|
||||
{ "op": "replace", "path": "/period", "value": 900 }
|
||||
]
|
||||
```
|
||||
|
||||
You already know this format — do not search for it. After patching, confirm the change to the user.
|
||||
|
||||
## Investment Advice
|
||||
**NEVER** recommend any specific ticker, trade, or position. You may suggest mechanical adjustments or improvements to strategies, but you must **NEVER** offer an opinion on a specific trade or position. You are **NOT** a registered investment advisor.
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
This is your first chat with a new user. Welcome them to Dexorder and describe who are you and what can you do.
|
||||
This is your first chat with a new user. Welcome them to Dexorder, and describe who you are and what can you do.
|
||||
@@ -83,6 +83,15 @@ self.config.initial_capital # starting capital in quote currency
|
||||
| `sell_vol` | float | Sell-side volume (taker sells) |
|
||||
| `open_interest` | float | Open interest (futures only; NaN for spot) |
|
||||
|
||||
### Available data — crypto only
|
||||
|
||||
Strategies have access **only** to crypto OHLC feeds with volume, buy/sell volume split, and open interest. The following are **not available** and must never be referenced in a strategy:
|
||||
|
||||
- **TradFi data** — equities, forex, bonds, futures spreads, options, macro indicators, interest rates, etc.
|
||||
- **Alternative data** — news feeds, social sentiment (Twitter/Reddit), on-chain metrics, economic calendars, earnings, etc.
|
||||
|
||||
If a user requests a strategy that depends on unavailable data, explain the limitation and offer a crypto-native alternative (e.g. use order-flow imbalance instead of news sentiment).
|
||||
|
||||
---
|
||||
|
||||
## Section B — Strategy Metadata
|
||||
@@ -355,3 +364,16 @@ deactivate_strategy(strategy_name) # Stop and get final PnL
|
||||
- 4h bars: 100k bars ≈ 45 years → cap at 5 years (≈ 10,950 bars)
|
||||
|
||||
7. **Never `import` from `dexorder` inside `evaluate()`** — the strategy file is exec'd in a sandbox with PandasStrategy and pandas_ta pre-loaded. Standard library and pandas/numpy/pandas_ta are available.
|
||||
|
||||
8. **No LLM calls inside strategies** — strategies must be fully deterministic. LLM invocations are prohibited because:
|
||||
- They are slow and expensive, making backtesting impractical.
|
||||
- Any temperature > 0 produces non-repeatable outputs, breaking backtest reproducibility.
|
||||
- The correct model is: the LLM *writes* the strategy; the strategy runs without LLM involvement.
|
||||
- Walk-forward LLM integration (via timer or data triggers) is a planned feature but is **not yet implemented**. Do not attempt to approximate it now.
|
||||
|
||||
9. **`evaluate()` must be fast, lightweight, and deterministic** — it is called on every bar during backtesting across potentially hundreds of thousands of bars. Specifically:
|
||||
- **No heavy computation at runtime**: model inference, large matrix operations, file I/O, network calls, or database queries are forbidden inside `evaluate()`.
|
||||
- **ML is allowed with restrictions**: a model may be trained offline (e.g. in `__init__` using warm-up data), but inference in `evaluate()` must be fast (microseconds, not milliseconds). If training is compute-intensive, note this clearly in the strategy description.
|
||||
- **No randomness**: do not use `random`, `np.random`, or any non-seeded stochastic operation. All outputs given the same data must be identical across runs.
|
||||
|
||||
10. **Data scope** — strategies may only use data available in the `dfs` feeds. Do not attempt to fetch external data, call APIs, read files, or access anything outside the provided DataFrames. Crypto OHLCV + buy/sell volume + open interest is what is available; nothing else.
|
||||
|
||||
@@ -306,6 +306,25 @@ export class KubernetesClient {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete only the Deployment, preserving PVC (user data) and Service (stable DNS).
|
||||
* Used when applying a license tier change — next ensureContainerRunning recreates
|
||||
* the deployment with updated resource limits.
|
||||
*/
|
||||
async deleteDeploymentOnly(userId: string): Promise<void> {
|
||||
const deploymentName = KubernetesClient.getDeploymentName(userId);
|
||||
try {
|
||||
await this.appsApi.deleteNamespacedDeployment({
|
||||
name: deploymentName,
|
||||
namespace: this.config.namespace
|
||||
});
|
||||
this.config.logger.info({ deploymentName }, 'Deleted deployment (tier change)');
|
||||
} catch (error: any) {
|
||||
const is404 = error.code === 404 || error.response?.statusCode === 404 || error.statusCode === 404;
|
||||
if (!is404) throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete deployment and associated resources
|
||||
* (Used for cleanup/testing - normally handled by lifecycle sidecar)
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import { KubernetesClient, type DeploymentSpec } from './client.js';
|
||||
import type { License } from '../types/user.js';
|
||||
import type { License, LicenseTier } from '../types/user.js';
|
||||
import type { UserService } from '../db/user-service.js';
|
||||
|
||||
export interface ContainerManagerConfig {
|
||||
k8sClient: KubernetesClient;
|
||||
userService: UserService;
|
||||
sandboxImage: string;
|
||||
sidecarImage: string;
|
||||
storageClass: string;
|
||||
@@ -139,6 +141,17 @@ export class ContainerManager {
|
||||
return { exists: true, ready, mcpEndpoint };
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a canonical license tier to a user: updates DB and deletes the deployment
|
||||
* so it is recreated with the new resource limits on next connect.
|
||||
*/
|
||||
async applyLicenseTier(userId: string, tier: LicenseTier): Promise<License> {
|
||||
const license = await this.config.userService.setUserLicenseTier(userId, tier);
|
||||
await this.config.k8sClient.deleteDeploymentOnly(userId);
|
||||
this.config.logger.info({ userId, tier }, 'License tier applied; deployment will recreate on next connect');
|
||||
return license;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete container (for cleanup/testing)
|
||||
*/
|
||||
|
||||
@@ -42,7 +42,8 @@ export class ModelRouter {
|
||||
message: string,
|
||||
license: License,
|
||||
strategy: RoutingStrategy = RoutingStrategy.USER_PREFERENCE,
|
||||
userId?: string
|
||||
userId?: string,
|
||||
maxTokens?: number
|
||||
): Promise<{ model: BaseChatModel; middleware: ModelMiddleware }> {
|
||||
let modelConfig: ModelConfig;
|
||||
|
||||
@@ -67,12 +68,17 @@ export class ModelRouter {
|
||||
modelConfig = this.defaultModel;
|
||||
}
|
||||
|
||||
if (maxTokens !== undefined) {
|
||||
modelConfig = { ...modelConfig, maxTokens };
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
{
|
||||
userId,
|
||||
strategy,
|
||||
provider: modelConfig.provider,
|
||||
model: modelConfig.model,
|
||||
maxTokens: modelConfig.maxTokens,
|
||||
},
|
||||
'Routing to model'
|
||||
);
|
||||
|
||||
@@ -22,6 +22,7 @@ import { AgentHarness, type HarnessSessionConfig } from './harness/agent-harness
|
||||
import { OHLCService } from './services/ohlc-service.js';
|
||||
import { SymbolIndexService } from './services/symbol-index-service.js';
|
||||
import { SymbolRoutes } from './routes/symbol-routes.js';
|
||||
import { AdminRoutes } from './routes/admin-routes.js';
|
||||
|
||||
// Catch unhandled promise rejections for better debugging
|
||||
process.on('unhandledRejection', (reason: any, promise) => {
|
||||
@@ -309,6 +310,7 @@ const k8sClient = new KubernetesClient({
|
||||
|
||||
const containerManager = new ContainerManager({
|
||||
k8sClient,
|
||||
userService,
|
||||
sandboxImage: config.kubernetes.sandboxImage,
|
||||
sidecarImage: config.kubernetes.sidecarImage,
|
||||
storageClass: config.kubernetes.storageClass,
|
||||
@@ -439,6 +441,9 @@ const getSymbolService = () => symbolIndexService;
|
||||
const symbolRoutes = new SymbolRoutes({ getSymbolIndexService: getSymbolService });
|
||||
symbolRoutes.register(app);
|
||||
|
||||
// Register admin routes
|
||||
new AdminRoutes(containerManager, userService).register(app);
|
||||
|
||||
app.log.debug('All routes registered');
|
||||
|
||||
// Health check
|
||||
@@ -715,7 +720,6 @@ try {
|
||||
icebergClient,
|
||||
logger: app.log,
|
||||
});
|
||||
await indexService.initialize();
|
||||
|
||||
// Assign to module-level variable so onMetadataUpdate callback can use it
|
||||
symbolIndexService = indexService;
|
||||
@@ -723,7 +727,17 @@ try {
|
||||
// Update websocket handler's config so it can use the service
|
||||
(websocketHandler as any).config.symbolIndexService = indexService;
|
||||
|
||||
app.log.info({ stats: symbolIndexService.getStats() }, 'Symbol index service initialized');
|
||||
// Retry until we get at least some symbol metadata
|
||||
while (true) {
|
||||
await indexService.initialize();
|
||||
const stats = indexService.getStats();
|
||||
if (stats.symbolCount > 0) {
|
||||
app.log.info({ stats }, 'Symbol index service initialized');
|
||||
break;
|
||||
}
|
||||
app.log.warn('Symbol index has no metadata yet, retrying in 5 seconds...');
|
||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||
}
|
||||
} catch (error) {
|
||||
app.log.warn({ error }, 'Failed to initialize symbol index service - symbol search will not be available');
|
||||
}
|
||||
|
||||
35
gateway/src/routes/admin-routes.ts
Normal file
35
gateway/src/routes/admin-routes.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import type { ContainerManager } from '../k8s/container-manager.js';
|
||||
import type { UserService } from '../db/user-service.js';
|
||||
import type { LicenseTier } from '../types/user.js';
|
||||
|
||||
const VALID_TIERS: LicenseTier[] = ['free', 'pro', 'enterprise'];
|
||||
|
||||
export class AdminRoutes {
|
||||
private containerManager: ContainerManager;
|
||||
private userService: UserService;
|
||||
|
||||
constructor(containerManager: ContainerManager, userService: UserService) {
|
||||
this.containerManager = containerManager;
|
||||
this.userService = userService;
|
||||
}
|
||||
|
||||
register(app: FastifyInstance): void {
|
||||
app.post<{ Params: { userId: string }; Body: { tier: string } }>(
|
||||
'/admin/users/:userId/set-tier',
|
||||
async (req, reply) => {
|
||||
const { userId } = req.params;
|
||||
const { tier } = req.body;
|
||||
if (!VALID_TIERS.includes(tier as LicenseTier)) {
|
||||
return reply.code(400).send({ error: `Invalid tier. Must be one of: ${VALID_TIERS.join(', ')}` });
|
||||
}
|
||||
const license = await this.containerManager.applyLicenseTier(userId, tier as LicenseTier);
|
||||
return { userId, tier, license };
|
||||
}
|
||||
);
|
||||
|
||||
app.post('/admin/migrate-licenses', async () => {
|
||||
return await this.userService.migrateAllLicenses();
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -167,11 +167,7 @@ export class OHLCService {
|
||||
period_seconds,
|
||||
}, 'Failed to fetch historical data');
|
||||
|
||||
// Return empty result on error
|
||||
return {
|
||||
bars: [],
|
||||
noData: true,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
87
gateway/src/test-deepinfra-chunks.ts
Normal file
87
gateway/src/test-deepinfra-chunks.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Direct DeepInfra streaming test — bypasses LangChain entirely.
|
||||
* Logs each delta.content with JSON.stringify so spaces are unambiguous.
|
||||
*
|
||||
* Usage:
|
||||
* DEEPINFRA_API_KEY=$(op read "op://Private/DeepInfra/credential") npx tsx src/test-deepinfra-chunks.ts
|
||||
*/
|
||||
|
||||
export {};
|
||||
|
||||
const DEEP_INFRA_URL = 'https://api.deepinfra.com/v1/openai/chat/completions';
|
||||
const MODEL = 'zai-org/GLM-5';
|
||||
|
||||
const apiKey = process.env.DEEPINFRA_API_KEY;
|
||||
if (!apiKey) {
|
||||
console.error('DEEPINFRA_API_KEY is not set');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const res = await fetch(DEEP_INFRA_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: MODEL,
|
||||
stream: true,
|
||||
messages: [
|
||||
{ role: 'user', content: 'Write two sentences about ETH price analysis.' },
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok || !res.body) {
|
||||
console.error(`HTTP ${res.status}: ${await res.text()}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let chunkIndex = 0;
|
||||
let assembled = '';
|
||||
|
||||
console.log(`Testing model: ${MODEL}`);
|
||||
console.log('--- chunks ---');
|
||||
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value, { stream: true });
|
||||
|
||||
for (const line of text.split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed.startsWith('data:')) continue;
|
||||
const data = trimmed.slice(5).trimStart();
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(data);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const choice = (parsed as { choices?: Array<{ delta?: Record<string, unknown> }> })
|
||||
?.choices?.[0];
|
||||
const delta = choice?.delta;
|
||||
const content = delta?.content as string | undefined;
|
||||
|
||||
if (content !== undefined) {
|
||||
const endsSpace = content.endsWith(' ');
|
||||
const startsSpace = content.startsWith(' ');
|
||||
// Log full delta so we can see all available fields (logprobs, token_ids, etc.)
|
||||
console.log(
|
||||
`chunk[${chunkIndex++}]: ${JSON.stringify(content)} ` +
|
||||
`(len=${content.length}, startsSpace=${startsSpace}, endsSpace=${endsSpace}) ` +
|
||||
`delta=${JSON.stringify(delta)}`,
|
||||
);
|
||||
assembled += content;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('--- assembled ---');
|
||||
console.log(assembled);
|
||||
@@ -42,7 +42,8 @@ Use this tool for:
|
||||
- Recommending indicators for a given strategy or analysis goal
|
||||
|
||||
ALWAYS use this tool for any request about the chart's indicators.
|
||||
NEVER modify the indicators workspace store directly.`,
|
||||
NEVER modify the indicators workspace store directly.
|
||||
NEVER use this tool to switch the chart symbol or timeframe — that is done via workspace_patch on chartState.`,
|
||||
schema: z.object({
|
||||
instruction: z.string().describe(
|
||||
'The indicator task to perform. Be specific about which indicators, parameters, ' +
|
||||
|
||||
@@ -30,13 +30,18 @@ export function createWebExploreAgentTool(config: WebExploreAgentToolConfig): Dy
|
||||
|
||||
const tool = new DynamicStructuredTool({
|
||||
name: 'web_explore',
|
||||
description: `Search the web or academic databases and return a summarized answer.
|
||||
description: `Search the EXTERNAL web or academic databases and return a summarized answer.
|
||||
|
||||
Use this tool when the user asks about:
|
||||
Use this tool ONLY for external, public information:
|
||||
- Current events, news, or real-time information
|
||||
- Documentation, tutorials, or how-to guides
|
||||
- External documentation, tutorials, or how-to guides for third-party libraries/tools
|
||||
- Academic papers, research findings, or scientific topics
|
||||
- Any topic that benefits from external sources
|
||||
- Any topic requiring external sources
|
||||
|
||||
NEVER use this tool for:
|
||||
- Questions about the Dexorder platform itself (workspace tools, chartState, indicators, strategies)
|
||||
- Internal API usage (workspace_patch, workspace_read, etc.) — consult the system prompt instead
|
||||
- Anything that can be answered from the context already available
|
||||
|
||||
The subagent will search the web (or arXiv for academic queries), fetch relevant content, and return a markdown summary with cited sources.`,
|
||||
schema: z.object({
|
||||
|
||||
@@ -76,7 +76,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
||||
maxTokensPerMessage: 4096, rateLimitPerMinute: 10,
|
||||
},
|
||||
k8sResources: {
|
||||
memoryRequest: '256Mi', memoryLimit: '512Mi',
|
||||
memoryRequest: '256Mi', memoryLimit: '8Gi',
|
||||
cpuRequest: '100m', cpuLimit: '500m',
|
||||
storage: '1Gi', tmpSizeLimit: '128Mi',
|
||||
enableIdleShutdown: true, idleTimeoutMinutes: 15,
|
||||
@@ -93,7 +93,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
||||
maxTokensPerMessage: 8192, rateLimitPerMinute: 60,
|
||||
},
|
||||
k8sResources: {
|
||||
memoryRequest: '512Mi', memoryLimit: '2Gi',
|
||||
memoryRequest: '512Mi', memoryLimit: '8Gi',
|
||||
cpuRequest: '250m', cpuLimit: '2000m',
|
||||
storage: '10Gi', tmpSizeLimit: '256Mi',
|
||||
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
||||
@@ -110,7 +110,7 @@ export const LICENSE_TIER_TEMPLATES: Record<LicenseTier, License> = {
|
||||
maxTokensPerMessage: 32768, rateLimitPerMinute: 300,
|
||||
},
|
||||
k8sResources: {
|
||||
memoryRequest: '1Gi', memoryLimit: '4Gi',
|
||||
memoryRequest: '1Gi', memoryLimit: '8Gi',
|
||||
cpuRequest: '500m', cpuLimit: '4000m',
|
||||
storage: '50Gi', tmpSizeLimit: '512Mi',
|
||||
enableIdleShutdown: false, idleTimeoutMinutes: 0,
|
||||
|
||||
@@ -79,12 +79,12 @@ export interface StoreConfig {
|
||||
export const DEFAULT_STORES: StoreConfig[] = [
|
||||
{
|
||||
name: 'chartState',
|
||||
persistent: false,
|
||||
persistent: true,
|
||||
initialState: () => ({
|
||||
symbol: 'BTC/USDT.BINANCE',
|
||||
start_time: null,
|
||||
end_time: null,
|
||||
period: '15',
|
||||
period: 900,
|
||||
selected_shapes: [],
|
||||
}),
|
||||
},
|
||||
|
||||
@@ -1,6 +1,37 @@
|
||||
// CCXT data fetcher for historical OHLC and realtime ticks
|
||||
import ccxt from 'ccxt';
|
||||
|
||||
/**
|
||||
* Thrown when an exchange returns a 429 rate-limit response.
|
||||
* retryAfterMs is derived from the exchange's Retry-After header when available.
|
||||
*/
|
||||
export class ExchangeRateLimitError extends Error {
|
||||
constructor(exchange, retryAfterMs, originalMessage) {
|
||||
super(`Rate limit on ${exchange}: retry after ${retryAfterMs}ms (${originalMessage})`);
|
||||
this.name = 'ExchangeRateLimitError';
|
||||
this.exchange = exchange.toUpperCase();
|
||||
this.retryAfterMs = retryAfterMs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract retry-after duration in milliseconds from a CCXT RateLimitExceeded error.
|
||||
* Priority: Retry-After header → error message numeric → 30s fallback.
|
||||
*/
|
||||
function extractRetryAfterMs(exchange, error) {
|
||||
const header = exchange.last_response_headers?.['retry-after'];
|
||||
if (header) {
|
||||
const secs = parseFloat(header);
|
||||
if (!isNaN(secs)) return Math.ceil(secs * 1000);
|
||||
}
|
||||
// Some exchanges embed the delay in the message (e.g. "retry after 5000 ms")
|
||||
const msMatch = error.message?.match(/(\d+)\s*ms/i);
|
||||
if (msMatch) return parseInt(msMatch[1], 10);
|
||||
const secMatch = error.message?.match(/(\d+(?:\.\d+)?)\s*s(?:ec|econds?)?/i);
|
||||
if (secMatch) return Math.ceil(parseFloat(secMatch[1]) * 1000);
|
||||
return 30_000;
|
||||
}
|
||||
|
||||
export class CCXTFetcher {
|
||||
constructor(config, logger, metadataGenerator = null) {
|
||||
this.config = config;
|
||||
@@ -135,9 +166,12 @@ export class CCXTFetcher {
|
||||
break;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
const isRetryable = error.constructor?.name === 'NetworkError' ||
|
||||
const isRateLimit = error.constructor?.name === 'RateLimitExceeded';
|
||||
const isRetryable = !isRateLimit && (
|
||||
error.constructor?.name === 'NetworkError' ||
|
||||
error.constructor?.name === 'RequestTimeout' ||
|
||||
error.constructor?.name === 'ExchangeNotAvailable';
|
||||
error.constructor?.name === 'ExchangeNotAvailable'
|
||||
);
|
||||
this.logger.warn(
|
||||
{
|
||||
errorType: error.constructor?.name,
|
||||
@@ -146,15 +180,21 @@ export class CCXTFetcher {
|
||||
ticker,
|
||||
since,
|
||||
attempt,
|
||||
retryable: isRetryable
|
||||
retryable: isRetryable,
|
||||
rateLimit: isRateLimit
|
||||
},
|
||||
'OHLC fetch attempt failed'
|
||||
);
|
||||
if (!isRetryable || attempt === FETCH_RETRIES) break;
|
||||
if (isRateLimit || !isRetryable || attempt === FETCH_RETRIES) break;
|
||||
await exchange.sleep(FETCH_RETRY_DELAY_MS * attempt);
|
||||
}
|
||||
}
|
||||
if (lastError) {
|
||||
if (lastError.constructor?.name === 'RateLimitExceeded') {
|
||||
const retryAfterMs = extractRetryAfterMs(exchange, lastError);
|
||||
this.logger.warn({ ticker, retryAfterMs }, 'OHLC fetch rate-limited by exchange');
|
||||
throw new ExchangeRateLimitError(exchangeName, retryAfterMs, lastError.message);
|
||||
}
|
||||
this.logger.error(
|
||||
{
|
||||
errorType: lastError.constructor?.name,
|
||||
@@ -278,6 +318,11 @@ export class CCXTFetcher {
|
||||
// Convert to our Tick format
|
||||
return trades.map(trade => this.convertToTick(trade, ticker, metadata));
|
||||
} catch (error) {
|
||||
if (error.constructor?.name === 'RateLimitExceeded') {
|
||||
const retryAfterMs = extractRetryAfterMs(exchange, error);
|
||||
this.logger.warn({ ticker, retryAfterMs }, 'Trades fetch rate-limited by exchange');
|
||||
throw new ExchangeRateLimitError(exchangeName, retryAfterMs, error.message);
|
||||
}
|
||||
this.logger.error(
|
||||
{ error: error.message, ticker },
|
||||
'Error fetching trades'
|
||||
|
||||
@@ -6,9 +6,10 @@ import { parse as parseYaml } from 'yaml';
|
||||
import pino from 'pino';
|
||||
import { ZmqClient } from './zmq-client.js';
|
||||
import { KafkaProducer } from './kafka-producer.js';
|
||||
import { CCXTFetcher } from './ccxt-fetcher.js';
|
||||
import { CCXTFetcher, ExchangeRateLimitError } from './ccxt-fetcher.js';
|
||||
import { RealtimePoller } from './realtime-poller.js';
|
||||
import { SymbolMetadataGenerator } from './symbol-metadata-generator.js';
|
||||
import { SlotType } from './proto/messages.js';
|
||||
|
||||
// Logger setup
|
||||
const logger = pino({
|
||||
@@ -64,10 +65,162 @@ function loadConfig() {
|
||||
supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'],
|
||||
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000,
|
||||
|
||||
// Per-exchange slot capacity
|
||||
exchange_capacity: config.exchange_capacity || {
|
||||
BINANCE: { historical_slots: 3, realtime_slots: 5 },
|
||||
KRAKEN: { historical_slots: 2, realtime_slots: 3 },
|
||||
COINBASE: { historical_slots: 2, realtime_slots: 4 }
|
||||
},
|
||||
|
||||
...secrets
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages work slots per exchange per job type.
|
||||
*
|
||||
* Each slot corresponds to one WorkerReady message sent to Flink. Flink consumes
|
||||
* a slot when it dispatches a job. The slot is re-offered (via another WorkerReady)
|
||||
* once the job completes, subject to any rate-limit backoff dictated by the exchange.
|
||||
*/
|
||||
class SlotPool {
|
||||
constructor(exchangeCapacity, zmqClient, logger) {
|
||||
this.zmqClient = zmqClient;
|
||||
this.logger = logger;
|
||||
|
||||
// Key: 'EXCHANGE|TYPE' (e.g. 'BINANCE|HISTORICAL')
|
||||
// Value: { max, active: Set<jobId>, backoffUntil: ms timestamp }
|
||||
this.slots = new Map();
|
||||
|
||||
for (const [exchange, cap] of Object.entries(exchangeCapacity)) {
|
||||
const ex = exchange.toUpperCase();
|
||||
this.slots.set(`${ex}|HISTORICAL`, {
|
||||
max: cap.historical_slots ?? 2,
|
||||
active: new Set(),
|
||||
backoffUntil: 0
|
||||
});
|
||||
this.slots.set(`${ex}|REALTIME`, {
|
||||
max: cap.realtime_slots ?? 3,
|
||||
active: new Set(),
|
||||
backoffUntil: 0
|
||||
});
|
||||
}
|
||||
|
||||
// jobId → { exchange, type } for release tracking
|
||||
this.jobMap = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the onConnected callback so slot offers are sent on every
|
||||
* TCP (re)connect rather than once at startup. Handles both the initial
|
||||
* connection race (Flink ROUTER not yet ready) and Flink restarts.
|
||||
*/
|
||||
init() {
|
||||
this.zmqClient.onConnected = () => this._offerAllFreeSlots();
|
||||
this.logger.info(
|
||||
{ slots: [...this.slots.entries()].map(([k, v]) => `${k}:${v.max}`) },
|
||||
'Slot pool initialized — will offer slots on connect'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-offer all currently-free slots. Called on every TCP (re)connect.
|
||||
* Sends (max - active) WorkerReady messages per exchange+type key.
|
||||
*/
|
||||
async _offerAllFreeSlots() {
|
||||
const summary = [];
|
||||
for (const [key, slot] of this.slots) {
|
||||
const [exchange, type] = key.split('|');
|
||||
const freeCount = slot.max - slot.active.size;
|
||||
for (let i = 0; i < freeCount; i++) {
|
||||
await this.zmqClient.sendTypedReady(exchange, SlotType[type]);
|
||||
}
|
||||
summary.push(`${key}:${freeCount}/${slot.max}`);
|
||||
}
|
||||
this.logger.info({ offered: summary }, 'Re-offered all free slots on connect');
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a slot as occupied by jobId.
|
||||
* @param {string} jobId
|
||||
* @param {string} exchange - e.g. 'BINANCE'
|
||||
* @param {string} type - 'HISTORICAL' | 'REALTIME'
|
||||
*/
|
||||
consumeSlot(jobId, exchange, type) {
|
||||
const key = `${exchange.toUpperCase()}|${type}`;
|
||||
const slot = this.slots.get(key);
|
||||
if (slot) {
|
||||
if (slot.active.size >= slot.max) {
|
||||
this.logger.warn({ jobId, key, active: slot.active.size, max: slot.max }, 'Slot capacity exceeded — rejecting job');
|
||||
return false;
|
||||
}
|
||||
slot.active.add(jobId);
|
||||
this.jobMap.set(jobId, { exchange: exchange.toUpperCase(), type });
|
||||
this.logger.debug({ jobId, key, active: slot.active.size, max: slot.max }, 'Slot consumed');
|
||||
return true;
|
||||
}
|
||||
this.logger.warn({ jobId, key }, 'No slot config for this exchange+type');
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the slot occupied by jobId and re-offer it to Flink (after any backoff).
|
||||
*/
|
||||
async releaseSlot(jobId) {
|
||||
const info = this.jobMap.get(jobId);
|
||||
if (!info) {
|
||||
this.logger.warn({ jobId }, 'releaseSlot called for unknown jobId');
|
||||
return;
|
||||
}
|
||||
this.jobMap.delete(jobId);
|
||||
const key = `${info.exchange}|${info.type}`;
|
||||
const slot = this.slots.get(key);
|
||||
if (slot) {
|
||||
slot.active.delete(jobId);
|
||||
await this._offerSlot(info.exchange, info.type, slot);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a rate limit from the exchange. Delays slot re-offer by retryAfterMs.
|
||||
* @param {string} exchange
|
||||
* @param {string} type - 'HISTORICAL' | 'REALTIME'
|
||||
* @param {number} retryAfterMs
|
||||
*/
|
||||
reportRateLimit(exchange, type, retryAfterMs) {
|
||||
const key = `${exchange.toUpperCase()}|${type}`;
|
||||
const slot = this.slots.get(key);
|
||||
if (slot) {
|
||||
slot.backoffUntil = Math.max(slot.backoffUntil, Date.now() + retryAfterMs);
|
||||
this.logger.warn({ exchange, type, retryAfterMs }, 'Rate limit backoff set for slot');
|
||||
}
|
||||
}
|
||||
|
||||
async _offerSlot(exchange, type, slot) {
|
||||
const now = Date.now();
|
||||
if (now < slot.backoffUntil) {
|
||||
const delay = slot.backoffUntil - now;
|
||||
this.logger.info({ exchange, type, delayMs: delay }, 'Slot in backoff — scheduling re-offer');
|
||||
setTimeout(() => this._offerSlot(exchange, type, slot), delay);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this.zmqClient.sendTypedReady(exchange, SlotType[type]);
|
||||
this.logger.debug({ exchange, type }, 'Slot re-offered to Flink');
|
||||
} catch (err) {
|
||||
this.logger.error({ exchange, type, error: err.message }, 'Failed to re-offer slot');
|
||||
}
|
||||
}
|
||||
|
||||
shutdown() {}
|
||||
}
|
||||
|
||||
/** Extract exchange name from ticker string, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
|
||||
function exchangeOf(ticker) {
|
||||
const lastDot = ticker?.lastIndexOf('.');
|
||||
return (lastDot >= 0) ? ticker.slice(lastDot + 1).toUpperCase() : 'UNKNOWN';
|
||||
}
|
||||
|
||||
class IngestorWorker {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
@@ -92,7 +245,22 @@ class IngestorWorker {
|
||||
logger.child({ component: 'poller' })
|
||||
);
|
||||
|
||||
// jobId → active realtime subscription (for stop handling)
|
||||
this.pool = new SlotPool(
|
||||
config.exchange_capacity,
|
||||
this.zmqClient,
|
||||
logger.child({ component: 'pool' })
|
||||
);
|
||||
|
||||
// When realtime poller terminates a subscription due to repeated errors, release its slot.
|
||||
this.realtimePoller.onJobComplete = (jobId, error) => {
|
||||
if (error instanceof ExchangeRateLimitError) {
|
||||
this.pool.reportRateLimit(error.exchange, 'REALTIME', error.retryAfterMs);
|
||||
}
|
||||
this.pool.releaseSlot(jobId).catch(err =>
|
||||
this.logger.error({ jobId, error: err.message }, 'Failed to release slot after realtime error'));
|
||||
};
|
||||
|
||||
// jobId set for active realtime subscriptions
|
||||
this.activeRealtime = new Set();
|
||||
|
||||
this.isShutdown = false;
|
||||
@@ -108,7 +276,10 @@ class IngestorWorker {
|
||||
this.zmqClient.onWorkAssign = req => this.handleWorkAssign(req);
|
||||
this.zmqClient.onWorkStop = jobId => this.handleWorkStop(jobId);
|
||||
|
||||
await this.zmqClient.connect(); // also sends WorkerReady
|
||||
// Register slot offer callback before connecting so we don't miss the event
|
||||
this.pool.init();
|
||||
|
||||
await this.zmqClient.connect();
|
||||
|
||||
// Generate symbol metadata on startup
|
||||
this.logger.info('Generating initial symbol metadata');
|
||||
@@ -139,18 +310,26 @@ class IngestorWorker {
|
||||
*/
|
||||
handleWorkAssign(request) {
|
||||
const { jobId, requestId, type, ticker } = request;
|
||||
const exchange = exchangeOf(ticker);
|
||||
|
||||
this.logger.info({ jobId, requestId, type, ticker }, 'Received WorkAssign');
|
||||
this.logger.info({ jobId, requestId, type, ticker, exchange }, 'Received WorkAssign');
|
||||
|
||||
// HISTORICAL_OHLC = 0 (proto3 default, may appear as undefined or 'HISTORICAL_OHLC')
|
||||
const isHistorical = !type || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
||||
|
||||
if (isHistorical) {
|
||||
if (!this.pool.consumeSlot(jobId, exchange, 'HISTORICAL')) {
|
||||
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
|
||||
return;
|
||||
}
|
||||
this.handleHistoricalRequest(request).catch(err => {
|
||||
this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in historical handler');
|
||||
});
|
||||
} else if (isRealtime) {
|
||||
if (!this.pool.consumeSlot(jobId, exchange, 'REALTIME')) {
|
||||
this.zmqClient.sendReject(jobId, 'Slot capacity exceeded').catch(() => {});
|
||||
return;
|
||||
}
|
||||
this.handleRealtimeRequest(request);
|
||||
} else {
|
||||
this.logger.warn({ jobId, type }, 'Unknown request type — rejecting');
|
||||
@@ -165,7 +344,9 @@ class IngestorWorker {
|
||||
this.logger.info({ jobId }, 'Received WorkStop — cancelling realtime subscription');
|
||||
this.realtimePoller.cancelSubscription(jobId);
|
||||
this.activeRealtime.delete(jobId);
|
||||
// No WorkComplete needed — Flink sent the stop, it already knows
|
||||
this.pool.releaseSlot(jobId).catch(err =>
|
||||
this.logger.warn({ jobId, error: err.message }, 'Failed to release slot after WorkStop'));
|
||||
// No WorkComplete needed — Flink sent the stop, it already knows.
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -174,10 +355,14 @@ class IngestorWorker {
|
||||
*/
|
||||
async handleHistoricalRequest(request) {
|
||||
const { jobId, requestId, ticker, historical, clientId: client_id } = request;
|
||||
const exchange = exchangeOf(ticker);
|
||||
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical || {};
|
||||
|
||||
this.logger.info({ jobId, requestId, ticker, period_seconds }, 'Processing historical OHLC request');
|
||||
|
||||
// Immediately ack to reset Flink's dispatch-time timeout clock.
|
||||
await this.zmqClient.sendHeartbeat(jobId);
|
||||
|
||||
try {
|
||||
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
||||
ticker, start_time, end_time, period_seconds, limit
|
||||
@@ -193,7 +378,10 @@ class IngestorWorker {
|
||||
const isLastPage = (i + PAGE_SIZE) >= candles.length;
|
||||
await this.kafkaProducer.writeOHLCs(this.config.kafka_ohlc_topic, page, metadata, isLastPage);
|
||||
}
|
||||
this.logger.info({ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) }, 'Wrote all pages to Kafka');
|
||||
this.logger.info(
|
||||
{ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) },
|
||||
'Wrote all pages to Kafka'
|
||||
);
|
||||
} else {
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
||||
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
||||
@@ -207,6 +395,10 @@ class IngestorWorker {
|
||||
} catch (error) {
|
||||
this.logger.error({ jobId, requestId, ticker, error: error.message }, 'Historical request failed');
|
||||
|
||||
if (error instanceof ExchangeRateLimitError) {
|
||||
this.pool.reportRateLimit(exchange, 'HISTORICAL', error.retryAfterMs);
|
||||
}
|
||||
|
||||
try {
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
|
||||
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
|
||||
@@ -218,11 +410,14 @@ class IngestorWorker {
|
||||
|
||||
await this.zmqClient.sendComplete(jobId, false, error.message);
|
||||
}
|
||||
|
||||
// Release slot regardless of success or failure
|
||||
this.pool.releaseSlot(jobId).catch(err =>
|
||||
this.logger.error({ jobId, error: err.message }, 'Failed to release historical slot'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Start realtime tick polling for a job dispatched by Flink.
|
||||
* Ticks flow: exchange → Kafka market-tick → Flink → OHLC bars → clients.
|
||||
*/
|
||||
handleRealtimeRequest(request) {
|
||||
const { jobId, requestId, ticker } = request;
|
||||
@@ -247,6 +442,7 @@ class IngestorWorker {
|
||||
|
||||
if (this.metadataInterval) clearInterval(this.metadataInterval);
|
||||
|
||||
this.pool.shutdown();
|
||||
this.realtimePoller.shutdown();
|
||||
await this.ccxtFetcher.close();
|
||||
await this.metadataGenerator.close();
|
||||
|
||||
@@ -18,6 +18,10 @@ export class RealtimePoller {
|
||||
|
||||
this.pollingLoop = null;
|
||||
this.heartbeatLoop = null;
|
||||
|
||||
// Called with (jobId, error) when a subscription terminates abnormally.
|
||||
// Set by IngestorWorker to release the slot in SlotPool.
|
||||
this.onJobComplete = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -147,6 +151,7 @@ export class RealtimePoller {
|
||||
} catch (zmqErr) {
|
||||
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
|
||||
}
|
||||
if (this.onJobComplete) this.onJobComplete(jobId, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,63 +28,61 @@ export class ZmqClient {
|
||||
|
||||
this.dealerSocket = null;
|
||||
this.isShutdown = false;
|
||||
this.activeJobId = null;
|
||||
this._idleHeartbeatInterval = null;
|
||||
|
||||
this.supportedExchanges = (config.supported_exchanges || ['BINANCE', 'COINBASE'])
|
||||
.map(e => e.toUpperCase());
|
||||
|
||||
// Callbacks set by IngestorWorker
|
||||
// Callbacks set by IngestorWorker / SlotPool
|
||||
this.onWorkAssign = null; // (DataRequest) => void
|
||||
this.onWorkStop = null; // (jobId) => void
|
||||
this.onConnected = null; // async () => void — fires on initial connect AND reconnect
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect DEALER socket to Flink IngestorBroker (ROUTER).
|
||||
* Sends WorkerReady immediately so Flink knows this worker is available.
|
||||
* Fires onConnected on every TCP (re)connect so SlotPool can re-offer slots.
|
||||
*/
|
||||
async connect() {
|
||||
const { flink_hostname, ingestor_broker_port = 5567 } = this.config;
|
||||
|
||||
this.dealerSocket = new zmq.Dealer();
|
||||
const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
|
||||
await this.dealerSocket.connect(endpoint);
|
||||
this.logger.info(`Connected DEALER to Flink IngestorBroker at ${endpoint}`);
|
||||
|
||||
// Register as available
|
||||
await this.sendReady();
|
||||
|
||||
// Periodically re-send WorkerReady when idle, to recover from missed initial registration
|
||||
this._idleHeartbeatInterval = setInterval(() => {
|
||||
if (this.activeJobId === null && !this.isShutdown) {
|
||||
this.sendReady().catch(err =>
|
||||
this.logger.warn({ error: err.message }, 'Failed to re-send WorkerReady'));
|
||||
// Subscribe to connection events BEFORE calling connect() so we catch the
|
||||
// initial establishment. The 'connect' event fires on initial TCP handshake
|
||||
// and again after every ZMQ reconnect (e.g. Flink restart).
|
||||
this.dealerSocket.events.on('connect', ({ address }) => {
|
||||
this.logger.info({ address }, 'DEALER connected to broker');
|
||||
if (this.onConnected) {
|
||||
this.onConnected().catch(err =>
|
||||
this.logger.error({ error: err.message }, 'onConnected callback failed'));
|
||||
}
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
|
||||
this.dealerSocket.connect(endpoint);
|
||||
this.logger.info(`Connecting DEALER to Flink IngestorBroker at ${endpoint}`);
|
||||
|
||||
// Start receiving work in background
|
||||
this._receiveLoop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send WorkerReady — called on connect and after each COMPLETE.
|
||||
* Send one typed WorkerReady slot offer.
|
||||
* @param {string} exchange - Exchange name (e.g. 'BINANCE')
|
||||
* @param {number} slotType - SlotType enum value (0=ANY, 1=HISTORICAL, 2=REALTIME)
|
||||
*/
|
||||
async sendReady() {
|
||||
async sendTypedReady(exchange, slotType) {
|
||||
const frames = encodeBrokerMessage(
|
||||
MessageTypeId.WORKER_READY,
|
||||
{ exchanges: this.supportedExchanges },
|
||||
{ exchanges: [exchange], jobType: slotType },
|
||||
WorkerReady
|
||||
);
|
||||
await this.dealerSocket.send(frames);
|
||||
this.logger.info({ exchanges: this.supportedExchanges }, 'Sent WorkerReady');
|
||||
this.logger.debug({ exchange, slotType }, 'Sent WorkerReady slot offer');
|
||||
}
|
||||
|
||||
/**
|
||||
* Send WorkComplete after a historical job finishes.
|
||||
* Automatically sends WorkerReady so Flink returns us to the free pool.
|
||||
* Slot re-registration is handled by SlotPool after this call.
|
||||
*/
|
||||
async sendComplete(jobId, success, errorMessage) {
|
||||
this.activeJobId = null;
|
||||
const frames = encodeBrokerMessage(
|
||||
MessageTypeId.WORK_COMPLETE,
|
||||
{
|
||||
@@ -96,9 +94,6 @@ export class ZmqClient {
|
||||
);
|
||||
await this.dealerSocket.send(frames);
|
||||
this.logger.info({ jobId, success }, 'Sent WorkComplete');
|
||||
|
||||
// Return to free pool
|
||||
await this.sendReady();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -153,12 +148,10 @@ export class ZmqClient {
|
||||
const payload = frames[2].slice(1);
|
||||
|
||||
if (typeId === MessageTypeId.WORK_ASSIGN) {
|
||||
// DataRequest protobuf
|
||||
const request = DataRequest.decode(payload);
|
||||
const req = DataRequest.toObject(request, {
|
||||
longs: String, enums: String, bytes: Buffer
|
||||
});
|
||||
this.activeJobId = req.jobId;
|
||||
this.logger.info(
|
||||
{ jobId: req.jobId, requestId: req.requestId, type: req.type, ticker: req.ticker },
|
||||
'Received WorkAssign from broker'
|
||||
@@ -192,10 +185,6 @@ export class ZmqClient {
|
||||
|
||||
async shutdown() {
|
||||
this.isShutdown = true;
|
||||
if (this._idleHeartbeatInterval) {
|
||||
clearInterval(this._idleHeartbeatInterval);
|
||||
this._idleHeartbeatInterval = null;
|
||||
}
|
||||
this.logger.info('Shutting down ZMQ DEALER connection');
|
||||
if (this.dealerSocket) {
|
||||
this.dealerSocket.close();
|
||||
|
||||
@@ -333,12 +333,27 @@ message FieldValue {
|
||||
|
||||
// ─── Ingestor Broker Protocol (Flink ROUTER ↔ Ingestor DEALER, port 5567) ───
|
||||
// Message type IDs 0x20–0x25
|
||||
//
|
||||
// Capacity model: each WorkerReady is ONE slot offer for a specific exchange
|
||||
// and job type. The ingestor sends N WorkerReady messages at startup (one per
|
||||
// available slot) and re-sends one after each job completes, subject to any
|
||||
// rate-limit backoff.
|
||||
|
||||
// Ingestor → Flink: register as available (type 0x20)
|
||||
// Sent on DEALER connect and after every COMPLETE.
|
||||
// Job type for a slot offer or assignment.
|
||||
enum SlotType {
|
||||
ANY = 0; // accepts any job type
|
||||
HISTORICAL = 1; // historical OHLC fetch slot
|
||||
REALTIME = 2; // realtime tick subscription slot
|
||||
}
|
||||
|
||||
// Ingestor → Flink: offer one work slot (type 0x20)
|
||||
// Sent once per available slot at startup and after each job completes.
|
||||
// One WorkerReady = one slot for one exchange and one job type.
|
||||
message WorkerReady {
|
||||
// Exchanges this ingestor supports (e.g. ["BINANCE", "COINBASE"])
|
||||
// Exchange this slot handles (single entry, e.g. ["BINANCE"])
|
||||
repeated string exchanges = 1;
|
||||
// Job type this slot accepts
|
||||
SlotType job_type = 2;
|
||||
}
|
||||
|
||||
// Ingestor → Flink: historical job finished (type 0x21)
|
||||
|
||||
@@ -510,3 +510,44 @@ def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dic
|
||||
log.info(f"Conda package sync complete: {len(result['removed'])} packages removed")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Async wrappers — non-blocking equivalents for use from asyncio contexts
|
||||
# =============================================================================
|
||||
|
||||
import asyncio as _asyncio
|
||||
|
||||
|
||||
async def get_installed_packages_async() -> Set[str]:
|
||||
"""Non-blocking wrapper around get_installed_packages()."""
|
||||
return await _asyncio.to_thread(get_installed_packages)
|
||||
|
||||
|
||||
async def install_packages_async(
|
||||
packages: list[str],
|
||||
data_dir: Optional[Path] = None,
|
||||
) -> dict:
|
||||
"""Non-blocking wrapper around install_packages()."""
|
||||
return await _asyncio.to_thread(install_packages, packages, data_dir)
|
||||
|
||||
|
||||
async def remove_packages_async(packages: list[str]) -> dict:
|
||||
"""Non-blocking wrapper around remove_packages()."""
|
||||
return await _asyncio.to_thread(remove_packages, packages)
|
||||
|
||||
|
||||
async def cleanup_extra_packages_async(
|
||||
data_dir: Path,
|
||||
environment_yml: Optional[Path] = None,
|
||||
) -> dict:
|
||||
"""Non-blocking wrapper around cleanup_extra_packages()."""
|
||||
return await _asyncio.to_thread(cleanup_extra_packages, data_dir, environment_yml)
|
||||
|
||||
|
||||
async def sync_packages_async(
|
||||
data_dir: Path,
|
||||
environment_yml: Optional[Path] = None,
|
||||
) -> dict:
|
||||
"""Non-blocking wrapper around sync_packages()."""
|
||||
return await _asyncio.to_thread(sync_packages, data_dir, environment_yml)
|
||||
|
||||
54
sandbox/dexorder/event_loop.py
Normal file
54
sandbox/dexorder/event_loop.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Thread-safe asyncio.run() for the sandbox.
|
||||
|
||||
Installs a global replacement for asyncio.run() that, when called from a
|
||||
non-async thread while uvicorn's event loop is running, dispatches the
|
||||
coroutine to that loop via run_coroutine_threadsafe(). The calling thread
|
||||
blocks on future.result() — releasing the GIL — so uvicorn's loop runs
|
||||
freely (health checks, MCP requests, etc.).
|
||||
|
||||
Usage:
|
||||
from dexorder.event_loop import install_thread_safe_asyncio_run
|
||||
install_thread_safe_asyncio_run(asyncio.get_running_loop()) # call once at startup
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_main_loop: asyncio.AbstractEventLoop | None = None
|
||||
_original_asyncio_run = asyncio.run
|
||||
|
||||
|
||||
def install_thread_safe_asyncio_run(loop: asyncio.AbstractEventLoop) -> None:
|
||||
"""
|
||||
Patch asyncio.run globally to cooperate with uvicorn's event loop.
|
||||
Call once from the lifespan startup (main thread, loop already running).
|
||||
"""
|
||||
global _main_loop
|
||||
_main_loop = loop
|
||||
|
||||
def _thread_safe_run(coro, *, debug=None):
|
||||
# Detect if we're in a thread (no running loop in this thread)
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
# We're already inside an async context — asyncio.run() is not
|
||||
# valid here regardless; let it raise the normal error.
|
||||
raise RuntimeError(
|
||||
"asyncio.run() cannot be called when another event loop is running "
|
||||
"in the same thread."
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
if "cannot be called" in str(exc):
|
||||
raise
|
||||
# No running loop in this thread — safe to dispatch to main loop.
|
||||
if _main_loop is not None and _main_loop.is_running():
|
||||
log.debug("asyncio.run() from thread → run_coroutine_threadsafe")
|
||||
return asyncio.run_coroutine_threadsafe(coro, _main_loop).result()
|
||||
|
||||
# Fallback: main loop not available (e.g., called before startup or in tests)
|
||||
return _original_asyncio_run(coro, debug=debug)
|
||||
|
||||
asyncio.run = _thread_safe_run
|
||||
log.info("Installed thread-safe asyncio.run()")
|
||||
@@ -5,6 +5,8 @@ Tickers use Nautilus format: "BTC/USDT.BINANCE"
|
||||
All timestamps are nanoseconds since epoch.
|
||||
"""
|
||||
|
||||
import tracemalloc
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Tuple
|
||||
import pandas as pd
|
||||
import logging
|
||||
@@ -19,6 +21,19 @@ from pyiceberg.expressions import (
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _rss_mb() -> str:
|
||||
"""Return current VmRSS and VmPeak from /proc/self/status as a short string."""
|
||||
try:
|
||||
info = {}
|
||||
for line in Path("/proc/self/status").read_text().splitlines():
|
||||
for key in ("VmRSS", "VmPeak", "VmSize"):
|
||||
if line.startswith(f"{key}:"):
|
||||
info[key] = int(line.split()[1]) // 1024 # kB → MB
|
||||
return f"RSS={info.get('VmRSS','?')}MB peak={info.get('VmPeak','?')}MB virt={info.get('VmSize','?')}MB"
|
||||
except Exception:
|
||||
return "?"
|
||||
|
||||
|
||||
class IcebergClient:
|
||||
"""
|
||||
Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1).
|
||||
@@ -114,8 +129,21 @@ class IcebergClient:
|
||||
if fetch_columns is not None:
|
||||
scan = scan.select(*fetch_columns)
|
||||
|
||||
if not tracemalloc.is_tracing():
|
||||
tracemalloc.start()
|
||||
tm_before = tracemalloc.take_snapshot()
|
||||
log.info("MEM before scan.to_pandas(): %s", _rss_mb())
|
||||
|
||||
df = scan.to_pandas()
|
||||
|
||||
log.info("MEM after scan.to_pandas(): %s | rows=%d cols=%s mem=%dMB",
|
||||
_rss_mb(), len(df), list(df.columns),
|
||||
df.memory_usage(deep=True).sum() // (1024 * 1024))
|
||||
tm_after = tracemalloc.take_snapshot()
|
||||
top = tm_after.compare_to(tm_before, "lineno")
|
||||
for stat in top[:5]:
|
||||
log.info("TRACEMALLOC: %s", stat)
|
||||
|
||||
if not df.empty:
|
||||
# Deduplicate: keep the most-recently-ingested row per timestamp.
|
||||
if "ingested_at" in df.columns:
|
||||
@@ -123,6 +151,7 @@ class IcebergClient:
|
||||
df.sort_values("ingested_at", ascending=False)
|
||||
.drop_duplicates(subset=["timestamp"])
|
||||
)
|
||||
log.info("MEM after dedup: %s | rows=%d", _rss_mb(), len(df))
|
||||
# Drop ingested_at if the caller did not ask for it
|
||||
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
|
||||
df = df.drop(columns=["ingested_at"])
|
||||
|
||||
85
sandbox/dexorder/memory_guard.py
Normal file
85
sandbox/dexorder/memory_guard.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Memory guard for sandbox containers.
|
||||
|
||||
Sets a soft RLIMIT_AS limit derived from the cgroup memory limit at a
|
||||
configurable fraction, so Python raises MemoryError before the kernel's
|
||||
OOM killer fires. The MCP session survives; only the tool call fails.
|
||||
"""
|
||||
|
||||
import gc
|
||||
import logging
|
||||
import resource
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _read_cgroup_limit_bytes() -> int | None:
|
||||
"""Read container memory.max from cgroup v2. Returns bytes or None."""
|
||||
try:
|
||||
val = Path("/sys/fs/cgroup/memory.max").read_text().strip()
|
||||
if val == "max":
|
||||
return None
|
||||
return int(val)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def setup_memory_limit(fraction: float) -> None:
|
||||
"""
|
||||
Set RLIMIT_AS soft limit to baseline VmSize + allowed growth.
|
||||
|
||||
RLIMIT_AS caps total virtual address space, which includes shared libraries
|
||||
and memory-mapped files that don't consume physical RAM. The baseline VmSize
|
||||
at startup can be 3+ GB even when RSS is only ~200 MB. Setting the limit to
|
||||
a flat cgroup fraction would crash immediately.
|
||||
|
||||
Instead: limit = current VmSize + (cgroup_limit * fraction)
|
||||
This allows `fraction` worth of new allocations (numpy arrays, pandas
|
||||
dataframes, etc.) above the startup baseline before raising MemoryError.
|
||||
|
||||
Args:
|
||||
fraction: Proportion of cgroup memory.max to allow as new growth, e.g. 0.85.
|
||||
"""
|
||||
cgroup_bytes = _read_cgroup_limit_bytes()
|
||||
|
||||
# Read baseline VmSize (total virtual address space at startup)
|
||||
vmsize_bytes: int | None = None
|
||||
try:
|
||||
for line in Path("/proc/self/status").read_text().splitlines():
|
||||
if line.startswith("VmSize:"):
|
||||
vmsize_bytes = int(line.split()[1]) * 1024 # kB → bytes
|
||||
log.info("Memory baseline: %s", line.strip())
|
||||
elif line.startswith("VmRSS:"):
|
||||
log.info("Memory baseline: %s", line.strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if cgroup_bytes is None:
|
||||
log.warning("cgroup memory.max is unlimited; RLIMIT_AS not set")
|
||||
return
|
||||
|
||||
allowed_growth_bytes = int(cgroup_bytes * fraction)
|
||||
baseline = vmsize_bytes or 0
|
||||
limit_bytes = baseline + allowed_growth_bytes
|
||||
|
||||
_, hard = resource.getrlimit(resource.RLIMIT_AS)
|
||||
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, hard))
|
||||
log.info(
|
||||
"RLIMIT_AS soft limit set to %d MB (baseline %d MB + allowed growth %d MB, %.0f%% of cgroup %d MB)",
|
||||
limit_bytes // (1024 * 1024),
|
||||
baseline // (1024 * 1024),
|
||||
allowed_growth_bytes // (1024 * 1024),
|
||||
fraction * 100,
|
||||
cgroup_bytes // (1024 * 1024),
|
||||
)
|
||||
|
||||
|
||||
def cleanup_memory() -> None:
|
||||
"""
|
||||
Called after a MemoryError is caught in a tool execution thread.
|
||||
Runs gc.collect() to free objects held by the failed script.
|
||||
Hook here for future recovery strategies (cache eviction, etc.).
|
||||
"""
|
||||
log.warning("MemoryError in tool thread — running gc.collect()")
|
||||
gc.collect()
|
||||
186
sandbox/dexorder/tools/backtest_harness.py
Normal file
186
sandbox/dexorder/tools/backtest_harness.py
Normal file
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
backtest_harness — runs a strategy backtest as a subprocess.
|
||||
|
||||
Reads a JSON config from stdin:
|
||||
{
|
||||
"strategy_name": str,
|
||||
"feeds": [{"symbol": str, "period_seconds": int}, ...],
|
||||
"from_time": ...,
|
||||
"to_time": ...,
|
||||
"initial_capital": float,
|
||||
"paper": bool
|
||||
}
|
||||
|
||||
Outputs JSON to stdout on success:
|
||||
{
|
||||
"strategy_name": str,
|
||||
"feeds": [...],
|
||||
"initial_capital": float,
|
||||
"paper": bool,
|
||||
"total_candles": int,
|
||||
... (metrics from run_backtest)
|
||||
}
|
||||
|
||||
On error:
|
||||
{"error": str}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure dexorder package is importable when run as a subprocess
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
|
||||
|
||||
_OHLC_EXTRA_COLUMNS = [
|
||||
"volume", "buy_vol", "sell_vol",
|
||||
"open_time", "high_time", "low_time", "close_time",
|
||||
"open_interest",
|
||||
]
|
||||
|
||||
|
||||
async def _run(cfg: dict) -> dict:
|
||||
strategy_name = cfg["strategy_name"]
|
||||
feeds = cfg["feeds"]
|
||||
from_time = cfg.get("from_time")
|
||||
to_time = cfg.get("to_time")
|
||||
initial_capital = float(cfg.get("initial_capital", 10_000.0))
|
||||
paper = bool(cfg.get("paper", True))
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Initialize API
|
||||
# -------------------------------------------------------------------------
|
||||
try:
|
||||
import yaml
|
||||
|
||||
config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml")
|
||||
secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml")
|
||||
|
||||
config_data = {}
|
||||
secrets_data = {}
|
||||
if Path(config_path).exists():
|
||||
with open(config_path) as f:
|
||||
config_data = yaml.safe_load(f) or {}
|
||||
if Path(secrets_path).exists():
|
||||
with open(secrets_path) as f:
|
||||
secrets_data = yaml.safe_load(f) or {}
|
||||
|
||||
data_cfg = config_data.get("data", {})
|
||||
iceberg_cfg = data_cfg.get("iceberg", {})
|
||||
relay_cfg = data_cfg.get("relay", {})
|
||||
|
||||
from dexorder.api import set_api, API
|
||||
from dexorder.impl.charting_api_impl import ChartingAPIImpl
|
||||
from dexorder.impl.data_api_impl import DataAPIImpl
|
||||
|
||||
data_api = DataAPIImpl(
|
||||
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
|
||||
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
|
||||
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
|
||||
namespace=iceberg_cfg.get("namespace", "trading"),
|
||||
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
|
||||
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
|
||||
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
|
||||
s3_region=iceberg_cfg.get("s3_region") or secrets_data.get("s3_region"),
|
||||
request_timeout=240.0,
|
||||
)
|
||||
set_api(API(charting=ChartingAPIImpl(), data=data_api))
|
||||
except Exception as e:
|
||||
return {"error": f"API initialization failed: {e}"}
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Locate strategy
|
||||
# -------------------------------------------------------------------------
|
||||
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
|
||||
try:
|
||||
from dexorder.tools.python_tools import get_category_manager, sanitize_name
|
||||
category_manager = get_category_manager(data_dir)
|
||||
safe_name = sanitize_name(strategy_name)
|
||||
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
|
||||
if not impl_path.exists():
|
||||
return {"error": f"Strategy '{strategy_name}' not found (looked at {impl_path})"}
|
||||
except Exception as exc:
|
||||
return {"error": f"Failed to locate strategy: {exc}"}
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Register custom indicators and load strategy class
|
||||
# -------------------------------------------------------------------------
|
||||
try:
|
||||
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
|
||||
_setup_custom_indicators(category_manager.src_dir)
|
||||
except Exception as exc:
|
||||
sys.stderr.write(f"WARNING: custom indicator setup failed: {exc}\n")
|
||||
|
||||
try:
|
||||
from dexorder.nautilus.backtest_runner import _load_strategy_class
|
||||
strategy_class = _load_strategy_class(impl_path)
|
||||
except Exception:
|
||||
return {"error": f"Strategy load failed:\n{traceback.format_exc()}"}
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Fetch OHLC data
|
||||
# -------------------------------------------------------------------------
|
||||
from dexorder.api import get_api
|
||||
from dexorder.nautilus.pandas_strategy import make_feed_key
|
||||
|
||||
api = get_api()
|
||||
parsed_feeds = [(f["symbol"], int(f["period_seconds"])) for f in feeds]
|
||||
ohlc_dfs = {}
|
||||
total_candles = 0
|
||||
|
||||
for ticker, period_seconds in parsed_feeds:
|
||||
feed_key = make_feed_key(ticker, period_seconds)
|
||||
try:
|
||||
df = await api.data.historical_ohlc(
|
||||
ticker=ticker,
|
||||
period_seconds=period_seconds,
|
||||
start_time=from_time,
|
||||
end_time=to_time,
|
||||
extra_columns=_OHLC_EXTRA_COLUMNS,
|
||||
)
|
||||
except Exception as exc:
|
||||
return {"error": f"OHLC fetch failed for {feed_key}: {exc}"}
|
||||
|
||||
if df.empty:
|
||||
return {"error": f"No OHLC data for {feed_key} in the requested range"}
|
||||
|
||||
ohlc_dfs[feed_key] = df
|
||||
total_candles += len(df)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Run backtest (synchronous)
|
||||
# -------------------------------------------------------------------------
|
||||
try:
|
||||
from dexorder.nautilus.backtest_runner import run_backtest
|
||||
metrics = run_backtest(
|
||||
strategy_class=strategy_class,
|
||||
feeds=parsed_feeds,
|
||||
ohlc_dfs=ohlc_dfs,
|
||||
initial_capital=initial_capital,
|
||||
paper=paper,
|
||||
)
|
||||
except Exception:
|
||||
return {"error": f"Backtest failed:\n{traceback.format_exc()}"}
|
||||
|
||||
return {
|
||||
"strategy_name": strategy_name,
|
||||
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
|
||||
"initial_capital": initial_capital,
|
||||
"paper": paper,
|
||||
"total_candles": total_candles,
|
||||
**metrics,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
cfg = json.loads(sys.stdin.read())
|
||||
result = asyncio.run(_run(cfg))
|
||||
print(json.dumps(result))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,25 +1,21 @@
|
||||
"""
|
||||
backtest_strategy — run a PandasStrategy against historical OHLC data.
|
||||
|
||||
Called directly from the MCP server's async handle_tool_call.
|
||||
|
||||
Returns a JSON payload with backtest metrics and equity curve, following the
|
||||
same pattern as evaluate_indicator.py.
|
||||
Spawns backtest_harness.py as a subprocess so user strategy code is isolated
|
||||
from the MCP server process. The harness handles API init, data fetch, and
|
||||
the synchronous BacktestEngine internally.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# All OHLC+ columns to request from the DataAPI
|
||||
_OHLC_EXTRA_COLUMNS = [
|
||||
"volume", "buy_vol", "sell_vol",
|
||||
"open_time", "high_time", "low_time", "close_time",
|
||||
"open_interest",
|
||||
]
|
||||
_BACKTEST_HARNESS = Path(__file__).parent / "backtest_harness.py"
|
||||
|
||||
|
||||
async def backtest_strategy(
|
||||
@@ -42,23 +38,8 @@ async def backtest_strategy(
|
||||
paper: Always True for historical backtest (flag reserved for forward testing)
|
||||
|
||||
Returns:
|
||||
list[TextContent] with JSON payload:
|
||||
{
|
||||
"strategy_name": str,
|
||||
"feeds": [...],
|
||||
"initial_capital": float,
|
||||
"paper": bool,
|
||||
"total_candles": int,
|
||||
"total_return": float, # fractional (0.15 = +15%)
|
||||
"sharpe_ratio": float,
|
||||
"max_drawdown": float, # fractional (0.10 = 10% drawdown)
|
||||
"win_rate": float,
|
||||
"trade_count": int,
|
||||
"equity_curve": [{"timestamp": int, "equity": float}, ...]
|
||||
}
|
||||
|
||||
On error:
|
||||
{"error": str}
|
||||
list[TextContent] with JSON payload containing backtest metrics.
|
||||
On error: [TextContent] with {"error": str}
|
||||
"""
|
||||
from mcp.types import TextContent
|
||||
|
||||
@@ -66,102 +47,52 @@ async def backtest_strategy(
|
||||
log.error("backtest_strategy '%s': %s", strategy_name, msg)
|
||||
return [TextContent(type="text", text=json.dumps({"error": msg}))]
|
||||
|
||||
# --- 1. Validate feeds input ---
|
||||
if not feeds:
|
||||
return _err("feeds list is empty — provide at least one {symbol, period_seconds} entry")
|
||||
|
||||
parsed_feeds: list[tuple[str, int]] = []
|
||||
for f in feeds:
|
||||
sym = f.get("symbol", "")
|
||||
ps = f.get("period_seconds", 3600)
|
||||
if not sym:
|
||||
if not f.get("symbol"):
|
||||
return _err(f"Feed entry missing 'symbol': {f}")
|
||||
parsed_feeds.append((sym, int(ps)))
|
||||
|
||||
# --- 2. Resolve strategy implementation file ---
|
||||
try:
|
||||
from dexorder.tools.python_tools import get_category_manager, sanitize_name
|
||||
category_manager = get_category_manager()
|
||||
safe_name = sanitize_name(strategy_name)
|
||||
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
|
||||
if not impl_path.exists():
|
||||
return _err(f"Strategy '{strategy_name}' not found (looked at {impl_path})")
|
||||
except Exception as exc:
|
||||
return _err(f"Failed to locate strategy: {exc}")
|
||||
|
||||
# --- 3. Register custom indicators with pandas-ta ---
|
||||
try:
|
||||
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
|
||||
_setup_custom_indicators(category_manager.src_dir)
|
||||
except Exception as exc:
|
||||
log.warning("backtest_strategy: custom indicator setup failed: %s", exc)
|
||||
|
||||
# --- 4. Load strategy class ---
|
||||
try:
|
||||
from dexorder.nautilus.backtest_runner import _load_strategy_class
|
||||
strategy_class = _load_strategy_class(impl_path)
|
||||
except Exception as exc:
|
||||
log.exception("backtest_strategy: strategy load failed")
|
||||
return _err(f"Strategy load failed: {exc}")
|
||||
|
||||
# --- 5. Fetch OHLC+ data for each feed ---
|
||||
try:
|
||||
from dexorder.api import get_api
|
||||
api = get_api()
|
||||
except Exception as exc:
|
||||
return _err(f"API not available: {exc}")
|
||||
|
||||
ohlc_dfs: dict[str, Any] = {}
|
||||
total_candles = 0
|
||||
|
||||
for ticker, period_seconds in parsed_feeds:
|
||||
from dexorder.nautilus.pandas_strategy import make_feed_key
|
||||
feed_key = make_feed_key(ticker, period_seconds)
|
||||
try:
|
||||
df = await api.data.historical_ohlc(
|
||||
ticker=ticker,
|
||||
period_seconds=period_seconds,
|
||||
start_time=from_time,
|
||||
end_time=to_time,
|
||||
extra_columns=_OHLC_EXTRA_COLUMNS,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.exception("backtest_strategy: OHLC fetch failed for %s", feed_key)
|
||||
return _err(f"OHLC fetch failed for {feed_key}: {exc}")
|
||||
|
||||
if df.empty:
|
||||
return _err(f"No OHLC data for {feed_key} in the requested range")
|
||||
|
||||
ohlc_dfs[feed_key] = df
|
||||
total_candles += len(df)
|
||||
|
||||
# --- 6. Run backtest in thread executor (BacktestEngine is synchronous) ---
|
||||
try:
|
||||
import asyncio
|
||||
from dexorder.nautilus.backtest_runner import run_backtest
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
metrics = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: run_backtest(
|
||||
strategy_class=strategy_class,
|
||||
feeds=parsed_feeds,
|
||||
ohlc_dfs=ohlc_dfs,
|
||||
initial_capital=initial_capital,
|
||||
paper=paper,
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
log.exception("backtest_strategy: backtest run failed")
|
||||
return _err(f"Backtest failed: {exc}")
|
||||
|
||||
# --- 7. Return results ---
|
||||
payload = {
|
||||
cfg = {
|
||||
"strategy_name": strategy_name,
|
||||
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
|
||||
"feeds": feeds,
|
||||
"from_time": from_time,
|
||||
"to_time": to_time,
|
||||
"initial_capital": initial_capital,
|
||||
"paper": paper,
|
||||
"total_candles": total_candles,
|
||||
**metrics, # keys: summary, statistics, trades, equity_curve
|
||||
}
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
sys.executable, str(_BACKTEST_HARNESS),
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(json.dumps(cfg).encode()),
|
||||
timeout=600,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return _err("Backtest timed out (10 minutes)")
|
||||
except Exception as exc:
|
||||
return _err(f"Failed to launch backtest harness: {exc}")
|
||||
|
||||
if proc.returncode != 0:
|
||||
err_text = stderr.decode(errors="replace")
|
||||
log.error("backtest_strategy '%s': harness exited %d: %s", strategy_name, proc.returncode, err_text[:500])
|
||||
return _err(f"Backtest harness failed:\n{err_text}")
|
||||
|
||||
if stderr:
|
||||
log.warning("backtest_strategy '%s' stderr: %s", strategy_name, stderr.decode(errors="replace")[:500])
|
||||
|
||||
try:
|
||||
payload = json.loads(stdout.decode())
|
||||
except json.JSONDecodeError:
|
||||
return _err(f"Harness produced invalid JSON: {stdout.decode(errors='replace')[:200]}")
|
||||
|
||||
if "error" in payload:
|
||||
return _err(payload["error"])
|
||||
|
||||
return [TextContent(type="text", text=json.dumps(payload))]
|
||||
|
||||
@@ -18,51 +18,32 @@ After write/edit operations, a category-specific test harness runs to validate
|
||||
the code and capture errors/output for agent feedback.
|
||||
"""
|
||||
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from dexorder.tools.subprocess_runner import run_subprocess_argv, run_in_thread
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _run_inprocess(fn, *args, timeout: int) -> dict:
|
||||
"""
|
||||
Run fn(*args) in a one-shot thread and return its result dict.
|
||||
|
||||
Uses a thread so the calling coroutine is not blocked and the calling
|
||||
process does not fork a new Python interpreter. All already-loaded
|
||||
libraries (numpy, pandas, matplotlib, etc.) are shared with the thread.
|
||||
|
||||
On timeout returns a dict with _timeout=True. On unexpected exception
|
||||
returns a dict with error=True and the traceback in stderr.
|
||||
"""
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(fn, *args)
|
||||
try:
|
||||
return future.result(timeout=timeout)
|
||||
except concurrent.futures.TimeoutError:
|
||||
return {"_timeout": True, "error": True,
|
||||
"stdout": "", "stderr": "", "images": []}
|
||||
except Exception:
|
||||
return {"error": True, "stdout": "",
|
||||
"stderr": traceback.format_exc(), "images": []}
|
||||
# Paths to harness scripts run as subprocesses
|
||||
_RESEARCH_HARNESS = Path(__file__).parent / "research_harness.py"
|
||||
_STRATEGY_HARNESS = Path(__file__).parent / "strategy_harness.py"
|
||||
|
||||
|
||||
# Import conda manager for package installation and tracking
|
||||
try:
|
||||
from dexorder.conda_manager import install_packages, cleanup_extra_packages
|
||||
from dexorder.conda_manager import install_packages_async, cleanup_extra_packages_async
|
||||
except ImportError:
|
||||
log.warning("conda_manager not available - package installation disabled")
|
||||
install_packages = None
|
||||
cleanup_extra_packages = None
|
||||
install_packages_async = None
|
||||
cleanup_extra_packages_async = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -355,6 +336,39 @@ class GitManager:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Async variants — delegates to sync methods via asyncio.to_thread
|
||||
# so the event loop stays responsive during git operations.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def commit_async(self, message: str) -> Optional[str]:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.commit, message)
|
||||
|
||||
async def log_async(self, path: Optional[Path] = None, n: int = 20) -> list[dict]:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.log, path, n)
|
||||
|
||||
async def restore_async(self, revision: str, path: Optional[Path] = None) -> Optional[str]:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.restore, revision, path)
|
||||
|
||||
async def head_short_hash_async(self) -> str:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.head_short_hash)
|
||||
|
||||
async def create_worktree_async(self, worktree_path: Path, revision: str = "HEAD") -> str:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.create_worktree, worktree_path, revision)
|
||||
|
||||
async def remove_worktree_async(self, worktree_path: Path) -> None:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.remove_worktree, worktree_path)
|
||||
|
||||
async def prune_worktrees_async(self) -> None:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self.prune_worktrees)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Custom Indicator Setup
|
||||
@@ -484,7 +498,7 @@ class CategoryFileManager:
|
||||
"""Root of the versioned category code (git repo root)."""
|
||||
return self.data_dir / "src"
|
||||
|
||||
def write(
|
||||
async def write(
|
||||
self,
|
||||
category: str,
|
||||
name: str,
|
||||
@@ -547,7 +561,7 @@ class CategoryFileManager:
|
||||
return {"success": False, "error": f"Failed to write metadata: {e}"}
|
||||
|
||||
# Run validation harness
|
||||
validation = self._validate(cat, item_dir)
|
||||
validation = await self._validate(cat, item_dir)
|
||||
|
||||
result = {
|
||||
"success": validation["success"],
|
||||
@@ -559,19 +573,19 @@ class CategoryFileManager:
|
||||
if validation["success"]:
|
||||
if cat == Category.RESEARCH:
|
||||
log.info(f"Auto-executing research script: {name}")
|
||||
result["execution"] = self.execute_research(name)
|
||||
result["execution"] = await self.execute_research(name)
|
||||
elif cat == Category.INDICATOR:
|
||||
log.info(f"Auto-executing indicator test: {name}")
|
||||
result["execution"] = self._execute_indicator(item_dir)
|
||||
result["execution"] = await self._execute_indicator(item_dir)
|
||||
|
||||
# Commit to git
|
||||
commit_hash = self.git.commit(f"create({category}): {name}")
|
||||
commit_hash = await self.git.commit_async(f"create({category}): {name}")
|
||||
if commit_hash:
|
||||
result["revision"] = commit_hash
|
||||
|
||||
return result
|
||||
|
||||
def edit(
|
||||
async def edit(
|
||||
self,
|
||||
category: str,
|
||||
name: str,
|
||||
@@ -671,7 +685,7 @@ class CategoryFileManager:
|
||||
# Run validation harness if code was updated
|
||||
validation = None
|
||||
if code is not None:
|
||||
validation = self._validate(cat, item_dir)
|
||||
validation = await self._validate(cat, item_dir)
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
@@ -685,15 +699,15 @@ class CategoryFileManager:
|
||||
if code is not None and result["success"]:
|
||||
if cat == Category.RESEARCH:
|
||||
log.info(f"Auto-executing research script after edit: {name}")
|
||||
result["execution"] = self.execute_research(name)
|
||||
result["execution"] = await self.execute_research(name)
|
||||
elif cat == Category.INDICATOR:
|
||||
log.info(f"Auto-executing indicator test after edit: {name}")
|
||||
result["execution"] = self._execute_indicator(item_dir)
|
||||
result["execution"] = await self._execute_indicator(item_dir)
|
||||
|
||||
# Commit to git if code changed
|
||||
if code is not None and result["success"]:
|
||||
action = "patch" if patches is not None else "edit"
|
||||
commit_hash = self.git.commit(f"{action}({category}): {name}")
|
||||
commit_hash = await self.git.commit_async(f"{action}({category}): {name}")
|
||||
if commit_hash:
|
||||
result["revision"] = commit_hash
|
||||
|
||||
@@ -776,7 +790,7 @@ class CategoryFileManager:
|
||||
|
||||
return {"items": items}
|
||||
|
||||
def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
|
||||
async def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
|
||||
"""
|
||||
Run category-specific validation harness.
|
||||
|
||||
@@ -793,13 +807,13 @@ class CategoryFileManager:
|
||||
|
||||
# Install required packages before validation
|
||||
packages_installed = []
|
||||
if install_packages and meta_path.exists():
|
||||
if install_packages_async and meta_path.exists():
|
||||
try:
|
||||
metadata = json.loads(meta_path.read_text())
|
||||
conda_packages = metadata.get("conda_packages", [])
|
||||
if conda_packages:
|
||||
log.info(f"Installing packages for validation: {conda_packages}")
|
||||
install_result = install_packages(conda_packages, data_dir=self.data_dir)
|
||||
install_result = await install_packages_async(conda_packages, data_dir=self.data_dir)
|
||||
if install_result.get("success"):
|
||||
packages_installed = install_result.get("installed", [])
|
||||
if packages_installed:
|
||||
@@ -811,11 +825,11 @@ class CategoryFileManager:
|
||||
|
||||
# Run validation
|
||||
if category == Category.STRATEGY:
|
||||
result = self._validate_strategy(impl_path)
|
||||
result = await self._validate_strategy(impl_path)
|
||||
elif category == Category.INDICATOR:
|
||||
result = self._validate_indicator(impl_path)
|
||||
result = await self._validate_indicator(impl_path)
|
||||
elif category == Category.RESEARCH:
|
||||
result = self._validate_research(impl_path, item_dir)
|
||||
result = await self._validate_research(impl_path, item_dir)
|
||||
else:
|
||||
result = {"success": False, "error": f"No validator for category {category}"}
|
||||
|
||||
@@ -825,19 +839,18 @@ class CategoryFileManager:
|
||||
|
||||
return result
|
||||
|
||||
def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
|
||||
async def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
|
||||
"""
|
||||
Validate a strategy by running it against synthetic OHLC data.
|
||||
|
||||
Runs strategy_harness.py in-process via a thread. Catches import errors,
|
||||
Runs strategy_harness.py as a subprocess. Catches import errors,
|
||||
runtime errors in evaluate(), and wrong class hierarchy — not just syntax.
|
||||
"""
|
||||
meta_path = impl_path.parent / "metadata.json"
|
||||
return self._execute_strategy(impl_path.parent, timeout=45)
|
||||
return await self._execute_strategy(impl_path.parent, timeout=45)
|
||||
|
||||
def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
|
||||
async def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
|
||||
"""
|
||||
Run a strategy against synthetic OHLC data in-process via a thread.
|
||||
Run a strategy against synthetic OHLC data via strategy_harness.py subprocess.
|
||||
|
||||
Returns:
|
||||
dict with success, output (human-readable summary), trade_count, error
|
||||
@@ -850,24 +863,26 @@ class CategoryFileManager:
|
||||
if not meta_path.exists():
|
||||
return {"success": False, "error": "metadata.json not found"}
|
||||
|
||||
from dexorder.tools.strategy_harness import run as _strategy_run
|
||||
result = _run_inprocess(_strategy_run, impl_path, meta_path, timeout=timeout)
|
||||
|
||||
if result.get("_timeout"):
|
||||
data = await run_subprocess_argv(
|
||||
sys.executable, str(_STRATEGY_HARNESS), str(impl_path), str(meta_path),
|
||||
timeout=timeout,
|
||||
)
|
||||
if data.get("_timeout"):
|
||||
return {"success": False, "error": f"Strategy test timed out after {timeout}s"}
|
||||
return result
|
||||
if data.get("error") and not data.get("success"):
|
||||
return {"success": False, "error": data.get("stderr") or "Harness failed"}
|
||||
return data
|
||||
|
||||
def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
|
||||
async def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
|
||||
"""
|
||||
Validate an indicator by running it against synthetic OHLC data.
|
||||
|
||||
Runs indicator_harness.py in-process via a thread. Catches import errors,
|
||||
runtime errors, and wrong return types — not just syntax.
|
||||
Runs indicator_harness.py in-process via a thread (main proc). Catches
|
||||
import errors, runtime errors, and wrong return types — not just syntax.
|
||||
"""
|
||||
meta_path = impl_path.parent / "metadata.json"
|
||||
return self._execute_indicator(impl_path.parent, timeout=30)
|
||||
return await self._execute_indicator(impl_path.parent, timeout=30)
|
||||
|
||||
def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
|
||||
async def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
|
||||
"""
|
||||
Run an indicator against synthetic OHLC data in-process via a thread.
|
||||
|
||||
@@ -883,29 +898,32 @@ class CategoryFileManager:
|
||||
return {"success": False, "error": "metadata.json not found"}
|
||||
|
||||
from dexorder.tools.indicator_harness import run as _indicator_run
|
||||
result = _run_inprocess(_indicator_run, impl_path, meta_path, timeout=timeout)
|
||||
result = await run_in_thread(_indicator_run, impl_path, meta_path, timeout=timeout)
|
||||
|
||||
if result.get("_timeout"):
|
||||
return {"success": False, "error": f"Indicator test timed out after {timeout}s"}
|
||||
return result
|
||||
|
||||
def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
|
||||
async def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
|
||||
"""
|
||||
Run a research script in-process via a thread and return captured results.
|
||||
Run a research script via research_harness.py subprocess and return captured results.
|
||||
|
||||
Returns:
|
||||
dict with stdout, stderr, images, error fields — or an error dict.
|
||||
dict with stdout, stderr, images, error fields.
|
||||
"""
|
||||
from dexorder.tools.research_harness import run as _research_run
|
||||
return _run_inprocess(_research_run, impl_path, item_dir, timeout=timeout)
|
||||
return await run_subprocess_argv(
|
||||
sys.executable, str(_RESEARCH_HARNESS), str(impl_path),
|
||||
timeout=timeout,
|
||||
cwd=item_dir,
|
||||
)
|
||||
|
||||
def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
|
||||
async def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
|
||||
"""
|
||||
Validate a research script.
|
||||
|
||||
Runs the script via the harness and captures output + pyplot images.
|
||||
"""
|
||||
data = self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||
|
||||
if data.get("_timeout"):
|
||||
return {"success": False, "error": "Research script timeout"}
|
||||
@@ -923,7 +941,7 @@ class CategoryFileManager:
|
||||
"images": data["images"],
|
||||
}
|
||||
|
||||
def execute_research(self, name: str) -> dict[str, Any]:
|
||||
async def execute_research(self, name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Execute a research script and return structured content with images.
|
||||
|
||||
@@ -944,7 +962,7 @@ class CategoryFileManager:
|
||||
if not impl_path.exists():
|
||||
return {"error": f"Implementation file not found for '{name}'"}
|
||||
|
||||
data = self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
|
||||
|
||||
if data.get("_timeout"):
|
||||
log.error(f"execute_research '{name}': timeout")
|
||||
@@ -995,7 +1013,7 @@ class CategoryFileManager:
|
||||
return {"content": content}
|
||||
|
||||
|
||||
def delete(self, category: str, name: str) -> dict[str, Any]:
|
||||
async def delete(self, category: str, name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Delete a category script directory and commit the removal to git.
|
||||
|
||||
@@ -1031,13 +1049,13 @@ class CategoryFileManager:
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"Failed to delete: {e}"}
|
||||
|
||||
commit_hash = self.git.commit(f"delete({category}): {name}")
|
||||
commit_hash = await self.git.commit_async(f"delete({category}): {name}")
|
||||
result: dict[str, Any] = {"success": True, "category": category, "name": name}
|
||||
if commit_hash:
|
||||
result["revision"] = commit_hash
|
||||
return result
|
||||
|
||||
def git_log(
|
||||
async def git_log(
|
||||
self,
|
||||
category: Optional[str] = None,
|
||||
name: Optional[str] = None,
|
||||
@@ -1061,10 +1079,10 @@ class CategoryFileManager:
|
||||
path = get_category_path(self.src_dir, cat, name)
|
||||
else:
|
||||
path = self.src_dir / cat.value
|
||||
entries = self.git.log(path=path, n=limit)
|
||||
entries = await self.git.log_async(path=path, n=limit)
|
||||
return {"success": True, "commits": entries}
|
||||
|
||||
def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
|
||||
async def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Restore a category item to a previous git revision (creates a new commit).
|
||||
|
||||
@@ -1085,11 +1103,11 @@ class CategoryFileManager:
|
||||
return {"success": False, "error": f"Item '{name}' not found in '{category}'"}
|
||||
|
||||
try:
|
||||
commit_hash = self.git.restore(revision, path=item_dir)
|
||||
commit_hash = await self.git.restore_async(revision, path=item_dir)
|
||||
except RuntimeError as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
validation = self._validate(cat, item_dir)
|
||||
validation = await self._validate(cat, item_dir)
|
||||
return {
|
||||
"success": validation["success"],
|
||||
"revision": commit_hash,
|
||||
|
||||
@@ -119,11 +119,39 @@ def run(impl_path: Path, item_dir: Path) -> dict:
|
||||
stdout_buf = io.StringIO()
|
||||
stderr_buf = io.StringIO()
|
||||
|
||||
# Eagerly capture figures when user scripts call plt.close() so images are
|
||||
# not lost even if the script closes figures immediately after savefig().
|
||||
captured_images: list[dict] = []
|
||||
|
||||
def _capture_fig(fig) -> None:
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
||||
buf.seek(0)
|
||||
captured_images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
|
||||
buf.close()
|
||||
|
||||
_orig_plt_close = plt.close
|
||||
|
||||
def _patched_close(fig=None):
|
||||
if fig is None:
|
||||
for fn in plt.get_fignums():
|
||||
_capture_fig(plt.figure(fn))
|
||||
elif fig == 'all':
|
||||
for fn in plt.get_fignums():
|
||||
_capture_fig(plt.figure(fn))
|
||||
else:
|
||||
try:
|
||||
_capture_fig(fig if hasattr(fig, 'savefig') else plt.figure(fig))
|
||||
except Exception:
|
||||
pass
|
||||
_orig_plt_close(fig)
|
||||
|
||||
error_occurred = False
|
||||
old_stdout, old_stderr = sys.stdout, sys.stderr
|
||||
old_cwd = os.getcwd()
|
||||
sys.stdout = stdout_buf
|
||||
sys.stderr = stderr_buf
|
||||
plt.close = _patched_close
|
||||
|
||||
try:
|
||||
os.chdir(impl_path.parent)
|
||||
@@ -136,22 +164,26 @@ def run(impl_path: Path, item_dir: Path) -> dict:
|
||||
sys.stdout = old_stdout
|
||||
sys.stderr = old_stderr
|
||||
os.chdir(old_cwd)
|
||||
plt.close = _orig_plt_close
|
||||
|
||||
stdout_output = stdout_buf.getvalue()
|
||||
stderr_output = stderr_buf.getvalue()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Capture matplotlib figures
|
||||
# Capture any figures still open after script completion
|
||||
# ---------------------------------------------------------------------------
|
||||
images = []
|
||||
images = captured_images
|
||||
if not error_occurred:
|
||||
already_seen = {img["data"] for img in images}
|
||||
for fig_num in plt.get_fignums():
|
||||
fig = plt.figure(fig_num)
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
||||
buf.seek(0)
|
||||
images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
|
||||
data = base64.b64encode(buf.read()).decode('utf-8')
|
||||
buf.close()
|
||||
if data not in already_seen:
|
||||
images.append({"format": "png", "data": data})
|
||||
plt.close('all')
|
||||
|
||||
return {
|
||||
|
||||
182
sandbox/dexorder/tools/subprocess_runner.py
Normal file
182
sandbox/dexorder/tools/subprocess_runner.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
subprocess_runner — non-blocking subprocess primitives for the MCP sandbox.
|
||||
|
||||
All three entrypoints return the same dict shape as the legacy _run_inprocess():
|
||||
{
|
||||
"error": bool,
|
||||
"stdout": str,
|
||||
"stderr": str,
|
||||
"images": list, # always [] for non-research invocations
|
||||
"_timeout": bool # present and True only on timeout
|
||||
}
|
||||
|
||||
Callers can therefore pattern-match on {"_timeout", "error", "stdout", "stderr"}
|
||||
uniformly regardless of whether the work ran in a subprocess or a thread.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalise(data: dict, stderr_fallback: str = "") -> dict:
|
||||
"""Ensure the standard shape keys are present in a harness result dict."""
|
||||
data.setdefault("error", False)
|
||||
data.setdefault("stdout", "")
|
||||
data.setdefault("stderr", stderr_fallback)
|
||||
data.setdefault("images", [])
|
||||
return data
|
||||
|
||||
|
||||
def _err_dict(stderr: str = "", stdout: str = "") -> dict:
|
||||
return {"error": True, "stdout": stdout, "stderr": stderr, "images": []}
|
||||
|
||||
|
||||
def _timeout_dict() -> dict:
|
||||
return {"_timeout": True, "error": True, "stdout": "", "stderr": "", "images": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Primitive 1: run_subprocess_argv
|
||||
#
|
||||
# Non-blocking equivalent of:
|
||||
# subprocess.run([sys.executable, harness, arg1, arg2, ...],
|
||||
# capture_output=True, text=True, timeout=N, cwd=cwd)
|
||||
#
|
||||
# Used by: _execute_strategy, _run_research_harness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_subprocess_argv(
|
||||
*cmd: str,
|
||||
timeout: int,
|
||||
cwd: Path | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Spawn cmd as a subprocess, await completion, and return a normalised result dict.
|
||||
|
||||
stdout is expected to contain a JSON object written by the harness. It is
|
||||
decoded and normalised to the standard shape. On JSON decode failure the
|
||||
raw stdout text is preserved in "stdout" and error is set to True.
|
||||
"""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=str(cwd) if cwd else None,
|
||||
)
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return _timeout_dict()
|
||||
except Exception as exc:
|
||||
return _err_dict(stderr=f"Harness launch failed: {exc}")
|
||||
|
||||
stdout_text = stdout_bytes.decode(errors="replace")
|
||||
stderr_text = stderr_bytes.decode(errors="replace")
|
||||
|
||||
if proc.returncode != 0:
|
||||
return _err_dict(
|
||||
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
|
||||
stdout=stdout_text,
|
||||
)
|
||||
|
||||
try:
|
||||
data = json.loads(stdout_text)
|
||||
return _normalise(data, stderr_fallback=stderr_text)
|
||||
except json.JSONDecodeError:
|
||||
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Primitive 2: run_subprocess_stdin
|
||||
#
|
||||
# Non-blocking equivalent of the backtest pattern — JSON config fed via stdin.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_subprocess_stdin(
|
||||
*cmd: str,
|
||||
stdin_data: bytes,
|
||||
timeout: int,
|
||||
) -> dict:
|
||||
"""
|
||||
Spawn cmd, write stdin_data to its stdin, await completion.
|
||||
|
||||
Returns the same normalised dict shape as run_subprocess_argv.
|
||||
"""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(stdin_data), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return _timeout_dict()
|
||||
except Exception as exc:
|
||||
return _err_dict(stderr=f"Harness launch failed: {exc}")
|
||||
|
||||
stdout_text = stdout_bytes.decode(errors="replace")
|
||||
stderr_text = stderr_bytes.decode(errors="replace")
|
||||
|
||||
if proc.returncode != 0:
|
||||
return _err_dict(
|
||||
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
|
||||
stdout=stdout_text,
|
||||
)
|
||||
|
||||
try:
|
||||
data = json.loads(stdout_text)
|
||||
return _normalise(data, stderr_fallback=stderr_text)
|
||||
except json.JSONDecodeError:
|
||||
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Primitive 3: run_in_thread
|
||||
#
|
||||
# Async wrapper around asyncio.to_thread so the event loop stays responsive
|
||||
# while CPU-bound or blocking-IO callables run in a worker thread.
|
||||
#
|
||||
# Used by: _execute_indicator (in-process indicator harness)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def run_in_thread(
|
||||
fn: Callable,
|
||||
*args: Any,
|
||||
timeout: int,
|
||||
) -> dict:
|
||||
"""
|
||||
Run fn(*args) in a thread pool worker and yield to the event loop while waiting.
|
||||
|
||||
On timeout the thread is abandoned (daemon) and _timeout_dict() is returned.
|
||||
On MemoryError or unexpected exception a standard error dict is returned.
|
||||
The returned dict is normalised to the standard shape.
|
||||
"""
|
||||
from dexorder.memory_guard import cleanup_memory
|
||||
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
asyncio.to_thread(fn, *args),
|
||||
timeout=timeout,
|
||||
)
|
||||
return _normalise(result)
|
||||
except asyncio.TimeoutError:
|
||||
return _timeout_dict()
|
||||
except MemoryError:
|
||||
cleanup_memory()
|
||||
return _err_dict(
|
||||
stderr="Script exceeded memory limit. Try reducing the data range or batch size."
|
||||
)
|
||||
except Exception:
|
||||
return _err_dict(stderr=traceback.format_exc())
|
||||
@@ -33,7 +33,7 @@ from starlette.routing import Route, Mount
|
||||
|
||||
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
|
||||
from dexorder.api import set_api, API
|
||||
from dexorder.conda_manager import sync_packages, install_packages, cleanup_extra_packages
|
||||
from dexorder.conda_manager import sync_packages_async, install_packages_async, cleanup_extra_packages_async
|
||||
from dexorder.events import EventType, UserEvent, DeliverySpec
|
||||
from dexorder.impl.charting_api_impl import ChartingAPIImpl
|
||||
from dexorder.impl.data_api_impl import DataAPIImpl
|
||||
@@ -893,7 +893,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
arguments.get("patch", [])
|
||||
)
|
||||
elif name == "python_write":
|
||||
result = category_manager.write(
|
||||
result = await category_manager.write(
|
||||
category=arguments.get("category", ""),
|
||||
name=arguments.get("name", ""),
|
||||
description=arguments.get("description", ""),
|
||||
@@ -920,10 +920,10 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
|
||||
if result.get("success"):
|
||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||
cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
||||
await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||
return content
|
||||
elif name == "python_edit":
|
||||
result = category_manager.edit(
|
||||
result = await category_manager.edit(
|
||||
category=arguments.get("category", ""),
|
||||
name=arguments.get("name", ""),
|
||||
code=arguments.get("code"),
|
||||
@@ -951,7 +951,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
logging.info(f"python_edit '{arguments.get('name')}': no execution result")
|
||||
if result.get("success"):
|
||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||
cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
||||
await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||
return content
|
||||
elif name == "python_read":
|
||||
return category_manager.read(
|
||||
@@ -963,7 +963,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
category=arguments.get("category", "")
|
||||
)
|
||||
elif name == "python_log":
|
||||
result = category_manager.git_log(
|
||||
result = await category_manager.git_log(
|
||||
category=arguments.get("category"),
|
||||
name=arguments.get("name"),
|
||||
limit=int(arguments.get("limit", 20))
|
||||
@@ -973,7 +973,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
lines.append(f"{c['short_hash']} {c['date'][:10]} {c['message']}")
|
||||
return [TextContent(type="text", text="\n".join(lines))]
|
||||
elif name == "python_revert":
|
||||
result = category_manager.git_revert(
|
||||
result = await category_manager.git_revert(
|
||||
revision=arguments.get("revision", ""),
|
||||
category=arguments.get("category", ""),
|
||||
name=arguments.get("name", "")
|
||||
@@ -989,13 +989,13 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
|
||||
return [TextContent(type="text", text="\n".join(meta_parts))]
|
||||
elif name == "python_delete":
|
||||
result = category_manager.delete(
|
||||
result = await category_manager.delete(
|
||||
category=arguments.get("category", ""),
|
||||
name=arguments.get("name", "")
|
||||
)
|
||||
if result.get("success"):
|
||||
_remove_type(workspace_store, arguments.get("category", ""), arguments.get("name", ""))
|
||||
cleanup_result = cleanup_extra_packages(get_data_dir(), _get_env_yml())
|
||||
cleanup_result = await cleanup_extra_packages_async(get_data_dir(), _get_env_yml())
|
||||
if cleanup_result.get("removed"):
|
||||
result["packages_removed"] = cleanup_result["removed"]
|
||||
parts = [f"success: {result['success']}"]
|
||||
@@ -1004,14 +1004,14 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
|
||||
parts.append(f"{k}: {result[k]}")
|
||||
return [TextContent(type="text", text="\n".join(parts))]
|
||||
elif name == "conda_sync":
|
||||
return sync_packages(
|
||||
return await sync_packages_async(
|
||||
data_dir=get_data_dir(),
|
||||
environment_yml=_get_env_yml()
|
||||
)
|
||||
elif name == "conda_install":
|
||||
return install_packages(arguments.get("packages", []))
|
||||
return await install_packages_async(arguments.get("packages", []))
|
||||
elif name == "execute_research":
|
||||
result = category_manager.execute_research(name=arguments.get("name", ""))
|
||||
result = await category_manager.execute_research(name=arguments.get("name", ""))
|
||||
if "error" in result:
|
||||
logging.error(f"execute_research '{arguments.get('name')}': {result['error']}")
|
||||
return [TextContent(type="text", text=f"Error: {result['error']}")]
|
||||
@@ -1113,6 +1113,8 @@ def create_streamable_http_app(mcp_server: Server) -> Starlette:
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def lifespan(app: Starlette):
|
||||
from dexorder.event_loop import install_thread_safe_asyncio_run
|
||||
install_thread_safe_asyncio_run(asyncio.get_running_loop())
|
||||
async with session_manager.run():
|
||||
yield
|
||||
|
||||
@@ -1156,6 +1158,14 @@ class UserContainer:
|
||||
# Load configuration
|
||||
self.config.load()
|
||||
|
||||
# Python-level memory guard (RLIMIT_AS soft limit) — DISABLED.
|
||||
# We assume nodes have ample memory (8Gi limits) and will revisit a
|
||||
# proper RSS-based cgroup monitor later. The implementation is in
|
||||
# dexorder/memory_guard.py if we want to re-enable.
|
||||
# from dexorder.memory_guard import setup_memory_limit
|
||||
# mem_cfg = self.config.config_data.get("memory", {})
|
||||
# setup_memory_limit(fraction=float(mem_cfg.get("limit_fraction", 0.85)))
|
||||
|
||||
# Initialize data and charting API
|
||||
data_cfg = self.config.config_data.get("data", {})
|
||||
iceberg_cfg = data_cfg.get("iceberg", {})
|
||||
|
||||
@@ -9,6 +9,8 @@ import { useShapeStore } from './stores/shapes'
|
||||
import { useIndicatorStore } from './stores/indicators'
|
||||
import { useIndicatorTypesStore } from './stores/indicatorTypes'
|
||||
import { useChannelStore } from './stores/channel'
|
||||
import { useResearchTypesStore } from './stores/researchTypes'
|
||||
import { useStrategyTypesStore } from './stores/strategyTypes'
|
||||
import { useStateSync } from './composables/useStateSync'
|
||||
import { wsManager } from './composables/useWebSocket'
|
||||
import { authService } from './composables/useAuth'
|
||||
@@ -44,9 +46,18 @@ function onHDragMove(e: PointerEvent) {
|
||||
chartWidth.value = Math.max(CHART_MIN_PX, Math.min(maxWidth, hDragStartWidth + delta))
|
||||
}
|
||||
|
||||
// Clamp chartWidth so chart + chat always fit within the window
|
||||
function clampChartWidth() {
|
||||
const maxWidth = window.innerWidth - CHAT_MIN_PX - 4
|
||||
if (maxWidth >= CHART_MIN_PX) {
|
||||
chartWidth.value = Math.max(CHART_MIN_PX, Math.min(maxWidth, chartWidth.value))
|
||||
}
|
||||
}
|
||||
|
||||
// Check screen width for mobile layout
|
||||
const checkMobile = () => {
|
||||
isMobile.value = window.innerWidth < 768
|
||||
if (!isMobile.value) clampChartWidth()
|
||||
}
|
||||
|
||||
const chartStore = useChartStore()
|
||||
@@ -108,11 +119,15 @@ const initializeApp = async () => {
|
||||
const indicatorStore = useIndicatorStore()
|
||||
const indicatorTypesStore = useIndicatorTypesStore()
|
||||
const channelStore = useChannelStore()
|
||||
const researchTypesStore = useResearchTypesStore()
|
||||
const strategyTypesStore = useStrategyTypesStore()
|
||||
const stateSync = useStateSync({
|
||||
chartState: chartStore,
|
||||
shapes: shapeStore,
|
||||
indicators: indicatorStore,
|
||||
indicator_types: indicatorTypesStore,
|
||||
research_types: researchTypesStore,
|
||||
strategy_types: strategyTypesStore,
|
||||
channelState: channelStore
|
||||
})
|
||||
stateSyncCleanup = stateSync.cleanup
|
||||
@@ -195,7 +210,7 @@ onBeforeUnmount(() => {
|
||||
|
||||
.chat-panel {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
min-width: 240px;
|
||||
height: 100%;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
|
||||
@@ -7,6 +7,7 @@ import TabPanels from 'primevue/tabpanels'
|
||||
import TabPanel from 'primevue/tabpanel'
|
||||
import OrdersTab from './tabs/OrdersTab.vue'
|
||||
import PlaceholderTab from './tabs/PlaceholderTab.vue'
|
||||
import ResearchTab from './tabs/ResearchTab.vue'
|
||||
|
||||
interface TempTab {
|
||||
id: string
|
||||
@@ -81,9 +82,10 @@ defineExpose({
|
||||
<template>
|
||||
<div class="bottom-tray" :style="trayStyle">
|
||||
<div v-if="isExpanded" class="tray-resize-handle" @pointerdown="startResize" @pointermove="onResizeMove" />
|
||||
<Tabs :value="activeTab" class="tray-tabs">
|
||||
<Tabs :value="isExpanded ? activeTab : null" class="tray-tabs">
|
||||
<TabList class="tray-tab-list">
|
||||
<Tab value="orders" @click="onTabClick('orders')">Orders</Tab>
|
||||
<Tab value="research" @click="onTabClick('research')">Research</Tab>
|
||||
<Tab value="strategies" @click="onTabClick('strategies')">Strategies</Tab>
|
||||
<Tab value="positions" @click="onTabClick('positions')">Positions</Tab>
|
||||
<Tab
|
||||
@@ -102,9 +104,10 @@ defineExpose({
|
||||
</button>
|
||||
</TabList>
|
||||
<TabPanels v-if="isExpanded" class="tray-panels">
|
||||
<TabPanel value="positions" class="tray-panel"><PlaceholderTab label="Positions" /></TabPanel>
|
||||
<TabPanel value="orders" class="tray-panel"><OrdersTab /></TabPanel>
|
||||
<TabPanel value="strategies" class="tray-panel"><PlaceholderTab label="Strategies" /></TabPanel>
|
||||
<TabPanel value="positions" class="tray-panel"><PlaceholderTab label="Positions" /></TabPanel>
|
||||
<TabPanel value="research" class="tray-panel"><ResearchTab /></TabPanel>
|
||||
<TabPanel
|
||||
v-for="tab in tempTabs"
|
||||
:key="tab.id"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted, onBeforeUnmount, watch } from 'vue'
|
||||
import { ref, onMounted, onBeforeUnmount, watch, type WatchStopHandle } from 'vue'
|
||||
import Card from 'primevue/card'
|
||||
import { createTradingViewDatafeed } from '../composables/useTradingViewDatafeed'
|
||||
import { useTradingViewShapes } from '../composables/useTradingViewShapes'
|
||||
@@ -11,10 +11,11 @@ import type { IChartingLibraryWidget } from '../types/tradingview'
|
||||
import { intervalToSeconds } from '../utils'
|
||||
import { wsManager } from '../composables/useWebSocket'
|
||||
|
||||
// Convert seconds to TradingView interval string
|
||||
// Convert seconds to TradingView interval string.
|
||||
// TradingView uses plain minute numbers ("60", "240") for intraday,
|
||||
// and "1D", "2D" etc for daily. Never use "H" suffix — it's not in supported_resolutions.
|
||||
function secondsToInterval(seconds: number): string {
|
||||
if (seconds % 86400 === 0) return `${seconds / 86400}D`
|
||||
if (seconds % 3600 === 0) return `${seconds / 3600}H`
|
||||
return `${seconds / 60}` // plain number = minutes
|
||||
}
|
||||
|
||||
@@ -27,9 +28,23 @@ let shapeCleanup: (() => void) | null = null // Cleanup function for shape sync
|
||||
let indicatorCleanup: (() => void) | null = null // Cleanup function for indicator sync
|
||||
let customIndicatorCleanup: (() => void) | null = null // Cleanup for custom TV studies
|
||||
let chartInitialized = false // Guard against double-init on reconnect
|
||||
let symbolWatcher: WatchStopHandle | null = null
|
||||
|
||||
const maybeInitChart = () => {
|
||||
if (chartInitialized || !chartContainer.value) return
|
||||
if (!chartStore.symbol) {
|
||||
// Defer until backend provides a symbol
|
||||
if (!symbolWatcher) {
|
||||
symbolWatcher = watch(() => chartStore.symbol, (sym) => {
|
||||
if (sym) {
|
||||
symbolWatcher?.()
|
||||
symbolWatcher = null
|
||||
maybeInitChart()
|
||||
}
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
chartInitialized = true
|
||||
initChart()
|
||||
}
|
||||
@@ -205,6 +220,10 @@ function setupStoreWatchers() {
|
||||
}
|
||||
|
||||
onBeforeUnmount(() => {
|
||||
if (symbolWatcher) {
|
||||
symbolWatcher()
|
||||
symbolWatcher = null
|
||||
}
|
||||
// Cleanup shape synchronization
|
||||
if (shapeCleanup) {
|
||||
shapeCleanup()
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
import { ref, onMounted, onUnmounted, computed, onBeforeUnmount, watch, nextTick } from 'vue'
|
||||
import { register } from 'vue-advanced-chat'
|
||||
import Badge from 'primevue/badge'
|
||||
import Button from 'primevue/button'
|
||||
import { wsManager } from '../composables/useWebSocket'
|
||||
import type { WebSocketMessage } from '../composables/useWebSocket'
|
||||
import { useChannelStore } from '../stores/channel'
|
||||
@@ -188,12 +187,13 @@ const handleMessage = (data: WebSocketMessage) => {
|
||||
}
|
||||
} else if (data.type === 'agent_chunk') {
|
||||
console.log('[ChatPanel] Processing agent_chunk, content:', data.content, 'done:', data.done)
|
||||
// Always remove any tool-call bubble when the agent sends text, whether this
|
||||
// is a new message or a continuation of an existing one (e.g. after a retry).
|
||||
removeToolCallBubble()
|
||||
const timestamp = new Date().toTimeString().split(' ')[0].slice(0, 5)
|
||||
|
||||
if (!currentStreamingMessageId) {
|
||||
console.log('[ChatPanel] Starting new streaming message')
|
||||
// Remove any ephemeral tool-call bubble before starting the real response
|
||||
removeToolCallBubble()
|
||||
// Set up streaming state and mark user message as seen
|
||||
isAgentProcessing.value = true
|
||||
currentStreamingMessageId = generateMessageId()
|
||||
@@ -314,6 +314,7 @@ const stopAgent = () => {
|
||||
|
||||
// Send message handler
|
||||
const sendMessage = async (event: any) => {
|
||||
if (isAgentProcessing.value) { stopAgent(); return }
|
||||
// Extract data from CustomEvent.detail[0]
|
||||
const data = event.detail?.[0] || event
|
||||
|
||||
@@ -617,7 +618,11 @@ onUnmounted(() => {
|
||||
|
||||
<!-- Workspace loading overlay -->
|
||||
<div v-if="!channelStore.isReady" class="workspace-loading">
|
||||
<i class="pi pi-spin pi-spinner workspace-loading-spinner" />
|
||||
<svg class="workspace-loading-spinner" viewBox="0 0 50 50" xmlns="http://www.w3.org/2000/svg">
|
||||
<circle cx="25" cy="25" r="20" fill="none" stroke="rgba(8,153,129,0.2)" stroke-width="4"/>
|
||||
<circle cx="25" cy="25" r="20" fill="none" stroke="#089981" stroke-width="4"
|
||||
stroke-dasharray="80 200" stroke-linecap="round"/>
|
||||
</svg>
|
||||
<span class="workspace-loading-message">{{ channelStore.statusMessage || 'Connecting...' }}</span>
|
||||
</div>
|
||||
|
||||
@@ -643,18 +648,18 @@ onUnmounted(() => {
|
||||
@send-message="sendMessage"
|
||||
@fetch-messages="fetchMessages"
|
||||
@open-file="openFile"
|
||||
/>
|
||||
|
||||
<!-- Stop button overlay -->
|
||||
<div v-if="isAgentProcessing" class="stop-button-container">
|
||||
<Button
|
||||
icon="pi pi-stop-circle"
|
||||
label="Stop"
|
||||
severity="danger"
|
||||
@click="stopAgent"
|
||||
class="stop-button"
|
||||
/>
|
||||
>
|
||||
<div
|
||||
v-if="isAgentProcessing"
|
||||
slot="send-icon"
|
||||
@click.stop="stopAgent"
|
||||
style="display:flex;align-items:center;justify-content:center;width:100%;height:100%"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">
|
||||
<rect x="4" y="4" width="16" height="16" rx="2" fill="#f23645"/>
|
||||
</svg>
|
||||
</div>
|
||||
</vue-advanced-chat>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
@@ -682,8 +687,13 @@ onUnmounted(() => {
|
||||
}
|
||||
|
||||
.workspace-loading-spinner {
|
||||
font-size: 2rem;
|
||||
color: #089981;
|
||||
width: 2rem;
|
||||
height: 2rem;
|
||||
animation: workspace-spin 0.8s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes workspace-spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.workspace-loading-message {
|
||||
@@ -721,24 +731,4 @@ onUnmounted(() => {
|
||||
color: var(--p-surface-900);
|
||||
}
|
||||
|
||||
.stop-button-container {
|
||||
position: absolute;
|
||||
bottom: 80px;
|
||||
right: 20px;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
.stop-button {
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||
animation: pulse 2s infinite;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0%, 100% {
|
||||
opacity: 1;
|
||||
}
|
||||
50% {
|
||||
opacity: 0.8;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
108
web/src/components/tabs/ResearchTab.vue
Normal file
108
web/src/components/tabs/ResearchTab.vue
Normal file
@@ -0,0 +1,108 @@
|
||||
<script setup lang="ts">
|
||||
import { ref, computed } from 'vue'
|
||||
import { storeToRefs } from 'pinia'
|
||||
import { useResearchTypesStore } from '../../stores/researchTypes'
|
||||
|
||||
const store = useResearchTypesStore()
|
||||
const { types } = storeToRefs(store)
|
||||
|
||||
const expanded = ref<Set<string>>(new Set())
|
||||
|
||||
const rows = computed(() =>
|
||||
Object.entries(types.value).map(([id, t]) => ({ id, ...t }))
|
||||
)
|
||||
|
||||
function toggle(id: string) {
|
||||
if (expanded.value.has(id)) {
|
||||
expanded.value.delete(id)
|
||||
} else {
|
||||
expanded.value.add(id)
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div class="research-tab">
|
||||
<div v-if="rows.length === 0" class="empty">No research items</div>
|
||||
<div v-for="row in rows" :key="row.id" class="research-row">
|
||||
<button class="row-header" @click="toggle(row.id)">
|
||||
<i class="pi" :class="expanded.has(row.id) ? 'pi-chevron-down' : 'pi-chevron-right'" />
|
||||
<span class="row-name">{{ row.display_name }}</span>
|
||||
<span class="row-id">{{ row.id }}</span>
|
||||
</button>
|
||||
<div v-if="expanded.has(row.id)" class="row-body">
|
||||
<span v-if="row.description">{{ row.description }}</span>
|
||||
<span v-else class="no-desc">No description</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
.research-tab {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.empty {
|
||||
color: #555;
|
||||
text-align: center;
|
||||
padding: 16px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.research-row {
|
||||
border-bottom: 1px solid #1e1e1e;
|
||||
}
|
||||
|
||||
.row-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
width: 100%;
|
||||
background: none;
|
||||
border: none;
|
||||
padding: 5px 10px;
|
||||
cursor: pointer;
|
||||
text-align: left;
|
||||
color: #dbdbdb;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.row-header:hover {
|
||||
background: #1a1a1a;
|
||||
}
|
||||
|
||||
.row-header .pi {
|
||||
color: #666;
|
||||
font-size: 10px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.row-name {
|
||||
flex: 1;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.row-id {
|
||||
color: #555;
|
||||
font-size: 11px;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
.row-body {
|
||||
padding: 6px 26px 8px;
|
||||
font-size: 12px;
|
||||
color: #aaa;
|
||||
line-height: 1.5;
|
||||
background: #0d0d0d;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.no-desc {
|
||||
color: #444;
|
||||
font-style: italic;
|
||||
}
|
||||
</style>
|
||||
@@ -60,8 +60,6 @@ export function useStateSync(stores: Record<string, Store>) {
|
||||
currentSeqs[msg.store] = msg.seq;
|
||||
saveStoredSeqs(currentSeqs);
|
||||
console.log('[StateSync] Snapshot applied, new seq:', msg.seq);
|
||||
} else {
|
||||
console.warn('[StateSync] Store not found:', msg.store);
|
||||
}
|
||||
} else if (msg.type === 'patch') {
|
||||
console.log('[StateSync] Processing patch for store:', msg.store, 'seq:', msg.seq);
|
||||
@@ -89,8 +87,6 @@ export function useStateSync(stores: Record<string, Store>) {
|
||||
currentSeqs[msg.store] = msg.seq;
|
||||
saveStoredSeqs(currentSeqs);
|
||||
console.log('[StateSync] Patch applied successfully, new seq:', msg.seq);
|
||||
} else {
|
||||
console.warn('[StateSync] Store not found:', msg.store);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -263,7 +263,10 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
||||
throw err
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.history) {
|
||||
if (response.error) {
|
||||
console.error('[TradingView Datafeed] getBars server error:', response.error)
|
||||
onError(response.error)
|
||||
} else if (response.history) {
|
||||
console.log('[TradingView Datafeed] Raw bar sample:', response.history.bars?.[0])
|
||||
console.log('[TradingView Datafeed] Denominators:', denoms)
|
||||
|
||||
@@ -309,7 +312,7 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
||||
this.sendRequest<any>({
|
||||
type: 'subscribe_bars',
|
||||
symbol: symbolInfo.ticker || symbolInfo.name,
|
||||
resolution: resolution,
|
||||
period_seconds: intervalToSeconds(resolution),
|
||||
subscription_id: listenerGuid
|
||||
})
|
||||
.then((response) => {
|
||||
@@ -328,8 +331,10 @@ export class WebSocketDatafeed implements IBasicDataFeed {
|
||||
}
|
||||
|
||||
unsubscribeBars(listenerGuid: string): void {
|
||||
const sub = this.subscriptions.get(listenerGuid)
|
||||
this.sendRequest<any>({
|
||||
type: 'unsubscribe_bars',
|
||||
period_seconds: sub ? intervalToSeconds(sub.resolution) : 60,
|
||||
subscription_id: listenerGuid
|
||||
})
|
||||
.then(() => {
|
||||
|
||||
@@ -30,8 +30,14 @@ class WebSocketManager {
|
||||
async connect(token: string): Promise<void> {
|
||||
this.token = token
|
||||
|
||||
// Close existing connection if any
|
||||
// Close existing connection if any — null out handlers first so the async
|
||||
// onclose event from the old socket cannot reset sessionStatus after the
|
||||
// new socket has already reached 'ready'.
|
||||
if (this.ws) {
|
||||
this.ws.onopen = null
|
||||
this.ws.onmessage = null
|
||||
this.ws.onerror = null
|
||||
this.ws.onclose = null
|
||||
this.ws.close()
|
||||
this.ws = null
|
||||
}
|
||||
|
||||
14
web/src/stores/researchTypes.ts
Normal file
14
web/src/stores/researchTypes.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref } from 'vue'
|
||||
|
||||
export interface ResearchType {
|
||||
display_name: string
|
||||
description?: string
|
||||
created_at: number
|
||||
modified_at: number
|
||||
}
|
||||
|
||||
export const useResearchTypesStore = defineStore('research_types', () => {
|
||||
const types = ref<Record<string, ResearchType>>({})
|
||||
return { types }
|
||||
})
|
||||
14
web/src/stores/strategyTypes.ts
Normal file
14
web/src/stores/strategyTypes.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref } from 'vue'
|
||||
|
||||
export interface StrategyType {
|
||||
display_name: string
|
||||
description?: string
|
||||
created_at: number
|
||||
modified_at: number
|
||||
}
|
||||
|
||||
export const useStrategyTypesStore = defineStore('strategy_types', () => {
|
||||
const types = ref<Record<string, StrategyType>>({})
|
||||
return { types }
|
||||
})
|
||||
Reference in New Issue
Block a user