data pipeline refactor and fix

This commit is contained in:
2026-04-13 18:30:04 -04:00
parent 6418729b16
commit 326bf80846
96 changed files with 7107 additions and 1763 deletions

4
.gitignore vendored
View File

@@ -1,6 +1,7 @@
/backend.old/data /backend.old/data
/backend.old/uploads/ /backend.old/uploads/
chat/ chat/
bin/create-all-users
# Environment variables # Environment variables
.env .env
@@ -114,6 +115,9 @@ deploy/k8s/prod/secrets/*.yaml
# Dev environment image tags # Dev environment image tags
.dev-image-tag .dev-image-tag
# Dev gateway-config is generated from gateway-config.yaml.tpl by bin/dev
deploy/k8s/dev/configs/gateway-config.yaml
# Protobuf copies (canonical files are in /protobuf/) # Protobuf copies (canonical files are in /protobuf/)
flink/protobuf/ flink/protobuf/
relay/protobuf/ relay/protobuf/

View File

@@ -21,6 +21,10 @@ usage() {
} }
ENV="${1:-dev}" ENV="${1:-dev}"
ARG_EMAIL="${2:-}"
ARG_PASSWORD="${3:-}"
ARG_NAME="${4:-}"
ARG_LICENSE="${5:-}"
if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then
echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}" echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}"
@@ -44,16 +48,36 @@ if [ -z "$PG_POD" ]; then
exit 1 exit 1
fi fi
# Prompt for credentials # Get credentials — from args or interactively
read -p "Email: " USER_EMAIL if [[ -n "$ARG_EMAIL" ]]; then
read -rs -p "Password (min 8 chars): " USER_PASSWORD USER_EMAIL="$ARG_EMAIL"
echo "" else
read -p "Email: " USER_EMAIL
fi
if [[ -n "$ARG_PASSWORD" ]]; then
USER_PASSWORD="$ARG_PASSWORD"
else
read -rs -p "Password (min 8 chars): " USER_PASSWORD
echo ""
fi
if [[ ${#USER_PASSWORD} -lt 8 ]]; then if [[ ${#USER_PASSWORD} -lt 8 ]]; then
echo -e "${RED}✗ Password must be at least 8 characters${NC}" echo -e "${RED}✗ Password must be at least 8 characters${NC}"
exit 1 exit 1
fi fi
read -p "Display name: " USER_NAME
read -p "License type [free|pro|enterprise] (default: pro): " LICENSE_TYPE if [[ -n "$ARG_NAME" ]]; then
USER_NAME="$ARG_NAME"
else
read -p "Display name: " USER_NAME
fi
if [[ -n "$ARG_LICENSE" ]]; then
LICENSE_TYPE="$ARG_LICENSE"
else
read -p "License type [free|pro|enterprise] (default: pro): " LICENSE_TYPE
fi
LICENSE_TYPE="${LICENSE_TYPE:-pro}" LICENSE_TYPE="${LICENSE_TYPE:-pro}"
# Check if user already exists # Check if user already exists

View File

@@ -43,7 +43,7 @@ if [ "$PROJECT" == "dev" ]; then
fi fi
if [ "$DEV" == "1" ]; then if [ "$DEV" == "1" ]; then
TAG="dev`date +%Y%m%d%H%M%S`" TAG="dev`date -u +%Y%m%d%H%M%S`"
if [ "$1" != "" ]; then if [ "$1" != "" ]; then
CONFIG=$1 CONFIG=$1
shift shift

158
bin/deploy-all Executable file
View File

@@ -0,0 +1,158 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
KUBECTL="kubectl --context=prod"
CLEAR_SANDBOXES=0
usage() {
echo "Usage: $0 [--sandboxes]"
echo ""
echo "Deploy all services to production. Does NOT update secrets (use bin/secret-update)."
echo ""
echo "Steps performed:"
echo " 1. Apply base kustomize manifests (namespaces, RBAC, policies)"
echo " 2. Apply infrastructure.yaml (statefulsets, deployments)"
echo " 3. Run bin/config-update prod"
echo " 4. Build and deploy all application images"
echo " 5. Wait for rollouts"
echo ""
echo "Options:"
echo " --sandboxes Delete sandbox Deployments and Services (PVCs are retained)."
echo " The gateway will recreate sandboxes on next user login."
echo ""
exit 1
}
for arg in "$@"; do
case "$arg" in
--sandboxes)
CLEAR_SANDBOXES=1
;;
--help|-h)
usage
;;
*)
echo -e "${RED}Unknown argument: $arg${NC}"
usage
;;
esac
done
echo -e "${YELLOW}╔══════════════════════════════════════════╗${NC}"
echo -e "${YELLOW}║ PRODUCTION FULL DEPLOY ║${NC}"
echo -e "${YELLOW}╚══════════════════════════════════════════╝${NC}"
echo ""
echo -e "${YELLOW}⚠️ This will update ALL production services.${NC}"
echo -e "${YELLOW} Secrets are NOT updated (run bin/secret-update prod separately).${NC}"
if [ "$CLEAR_SANDBOXES" == "1" ]; then
echo -e "${YELLOW} Sandbox deployments will be DELETED (PVCs retained).${NC}"
fi
echo ""
read -p "Are you sure you want to continue? (yes/no): " confirm
if [[ "$confirm" != "yes" ]]; then
echo "Aborted."
exit 0
fi
step() {
echo ""
echo -e "${BLUE}━━━ $1 ━━━${NC}"
}
ok() {
echo -e "${GREEN}✓${NC} $1"
}
fail() {
echo -e "${RED}✗ $1${NC}"
exit 1
}
# ── Step 1: Base kustomize manifests ─────────────────────────────────────────
step "Step 1/5: Applying base kustomize manifests"
cd "$ROOT_DIR"
$KUBECTL apply -k deploy/k8s/prod/
ok "Base manifests applied (namespaces, RBAC, policies, quotas)"
# ── Step 2: Infrastructure ────────────────────────────────────────────────────
step "Step 2/5: Applying infrastructure.yaml"
$KUBECTL -n ai apply -f deploy/k8s/prod/infrastructure.yaml
ok "Infrastructure applied"
# ── Step 3: Configs ───────────────────────────────────────────────────────────
step "Step 3/5: Updating configs"
# config-update prod will prompt for confirmation; we already confirmed above,
# so feed "yes" automatically via stdin.
echo "yes" | "$SCRIPT_DIR/config-update" prod
ok "Configs updated"
# ── Step 4: Build and deploy all application images ───────────────────────────
step "Step 4/5: Building and deploying application images"
echo ""
SERVICES=(gateway web sandbox lifecycle-sidecar flink relay ingestor)
for service in "${SERVICES[@]}"; do
echo -e "${GREEN}→${NC} Deploying $service..."
"$SCRIPT_DIR/deploy" "$service" prod
ok "$service deployed"
echo ""
done
# ── Step 4b: Optionally clear sandbox deployments ─────────────────────────────
if [ "$CLEAR_SANDBOXES" == "1" ]; then
step "Step 4b: Clearing sandbox deployments"
SANDBOX_DEPLOYS=$($KUBECTL -n sandbox get deployments -o name 2>/dev/null || true)
SANDBOX_SVCS=$($KUBECTL -n sandbox get services -o name 2>/dev/null || true)
if [ -z "$SANDBOX_DEPLOYS" ]; then
echo " No sandbox deployments found."
else
echo " Deleting sandbox deployments..."
echo "$SANDBOX_DEPLOYS" | xargs $KUBECTL -n sandbox delete
ok "Sandbox deployments deleted"
fi
if [ -n "$SANDBOX_SVCS" ]; then
echo " Deleting sandbox services..."
echo "$SANDBOX_SVCS" | xargs $KUBECTL -n sandbox delete
ok "Sandbox services deleted"
fi
echo -e "${YELLOW} PVCs retained — gateway will recreate sandboxes on next login.${NC}"
fi
# ── Step 5: Wait for rollouts ─────────────────────────────────────────────────
step "Step 5/5: Waiting for rollouts"
ROLLOUTS=(
"deployment/gateway"
"deployment/ai-web"
"deployment/relay"
"deployment/ingestor"
"deployment/flink-jobmanager"
"deployment/flink-taskmanager"
)
for r in "${ROLLOUTS[@]}"; do
echo -e "${GREEN}→${NC} Waiting for $r..."
$KUBECTL -n ai rollout status "$r" --timeout=180s || echo -e "${YELLOW} ⚠ $r did not become ready within 3 minutes${NC}"
done
echo ""
echo -e "${GREEN}╔══════════════════════════════════════════╗${NC}"
echo -e "${GREEN}║ Deploy complete! ║${NC}"
echo -e "${GREEN}╚══════════════════════════════════════════╝${NC}"
echo ""
echo " Verify: curl -I https://dexorder.ai/api/health"
echo ""

46
bin/dev
View File

@@ -99,6 +99,12 @@ start_minikube() {
fi fi
} }
generate_gateway_config_dev() {
sed "s|SANDBOX_IMAGE_TAG|dexorder/ai-sandbox:$SANDBOX_TAG|g; s|SIDECAR_IMAGE_TAG|dexorder/ai-lifecycle-sidecar:$SIDECAR_TAG|g" \
"$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml.tpl" \
> "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
}
rebuild_images() { rebuild_images() {
local service="${1:-all}" local service="${1:-all}"
echo -e "${BLUE}Building custom images...${NC}" echo -e "${BLUE}Building custom images...${NC}"
@@ -221,12 +227,7 @@ deploy_services() {
# Update configs # Update configs
echo -e "${GREEN}→${NC} Updating configs..." echo -e "${GREEN}→${NC} Updating configs..."
# Template gateway-config.yaml with actual image tags (backup first for safe restore) generate_gateway_config_dev
local _gw_bak
_gw_bak=$(mktemp)
cp "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml" "$_gw_bak"
sed -i "s|sandbox_image: dexorder/ai-sandbox:.*|sandbox_image: dexorder/ai-sandbox:$SANDBOX_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
sed -i "s|sidecar_image: dexorder/ai-lifecycle-sidecar:.*|sidecar_image: dexorder/ai-lifecycle-sidecar:$SIDECAR_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
"$SCRIPT_DIR/config-update" dev "$SCRIPT_DIR/config-update" dev
@@ -264,10 +265,6 @@ EOF
# Clean up the appended image tags from kustomization.yaml # Clean up the appended image tags from kustomization.yaml
sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml
# Restore gateway-config.yaml from backup
cp "$_gw_bak" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
rm "$_gw_bak"
echo -e "${GREEN}✓ Services deployed${NC}" echo -e "${GREEN}✓ Services deployed${NC}"
echo "" echo ""
@@ -525,6 +522,9 @@ deep_restart() {
;; ;;
esac esac
echo -e "${GREEN}→${NC} Rebuilding application images..."
rebuild_images
echo -e "${GREEN}→${NC} Redeploying services..." echo -e "${GREEN}→${NC} Redeploying services..."
deploy_services deploy_services
@@ -589,11 +589,7 @@ deploy_service() {
gateway) gateway)
image_name="dexorder/ai-gateway" image_name="dexorder/ai-gateway"
image_tag="$GATEWAY_TAG" image_tag="$GATEWAY_TAG"
# Also need to template gateway-config.yaml (backup for safe restore) generate_gateway_config_dev
_gw_bak_single=$(mktemp)
cp "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml" "$_gw_bak_single"
sed -i "s|sandbox_image: dexorder/ai-sandbox:.*|sandbox_image: dexorder/ai-sandbox:$SANDBOX_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
sed -i "s|sidecar_image: dexorder/ai-lifecycle-sidecar:.*|sidecar_image: dexorder/ai-lifecycle-sidecar:$SIDECAR_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
"$SCRIPT_DIR/config-update" dev "$SCRIPT_DIR/config-update" dev
;; ;;
web) web)
@@ -623,12 +619,6 @@ EOF
# Clean up the appended image tags from kustomization.yaml # Clean up the appended image tags from kustomization.yaml
sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml
# Restore gateway-config.yaml from backup if we modified it
if [ "$service" == "gateway" ]; then
cp "$_gw_bak_single" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
rm "$_gw_bak_single"
fi
echo -e "${GREEN}✓ $service deployed${NC}" echo -e "${GREEN}✓ $service deployed${NC}"
} }
@@ -713,15 +703,10 @@ case "$COMMAND" in
cd "$ROOT_DIR/deploy/k8s/dev" cd "$ROOT_DIR/deploy/k8s/dev"
# Template gateway-config if gateway is in the list (backup for safe restore) # Regenerate gateway-config if gateway is in the list
_ms_gw_bak=""
for svc in "${deploy_services_list[@]}"; do for svc in "${deploy_services_list[@]}"; do
if [ "$svc" == "gateway" ]; then if [ "$svc" == "gateway" ]; then
_ms_gw_bak=$(mktemp) generate_gateway_config_dev
cp "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml" "$_ms_gw_bak"
sed -i "s|sandbox_image: dexorder/ai-sandbox:.*|sandbox_image: dexorder/ai-sandbox:$SANDBOX_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
sed -i "s|sidecar_image: dexorder/ai-lifecycle-sidecar:.*|sidecar_image: dexorder/ai-lifecycle-sidecar:$SIDECAR_TAG|g" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
"$SCRIPT_DIR/config-update" dev
break break
fi fi
done done
@@ -744,11 +729,6 @@ case "$COMMAND" in
sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml sed -i '/# Image tags (added by bin\/dev)/,$d' kustomization.yaml
# Restore gateway-config from backup if we modified it
if [ -n "$_ms_gw_bak" ]; then
cp "$_ms_gw_bak" "$ROOT_DIR/deploy/k8s/dev/configs/gateway-config.yaml"
rm "$_ms_gw_bak"
fi
fi fi
# Handle sandbox separately # Handle sandbox separately

View File

@@ -45,6 +45,29 @@ else
MCP_URL="http://localhost:8080/mcp" MCP_URL="http://localhost:8080/mcp"
fi fi
# ---------- MinIO Bucket Initialization ----------
echo ""
echo -e "${BLUE}=== MinIO Storage Setup ===${NC}"
echo ""
echo -e "${BLUE}Waiting for MinIO pod...${NC}"
$KUBECTL wait --for=condition=ready --timeout=120s pod -l app=minio 2>/dev/null || {
echo -e "${YELLOW}⚠️ MinIO not ready after 120s, skipping bucket setup${NC}"
}
MINIO_POD=$($KUBECTL get pods -l app=minio -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -n "$MINIO_POD" ]; then
echo -e "${GREEN}→${NC} Ensuring warehouse bucket exists..."
MINIO_USER=$($KUBECTL exec "$MINIO_POD" -- sh -c 'echo $MINIO_ROOT_USER' 2>/dev/null | tr -d '\r')
MINIO_PASS=$($KUBECTL exec "$MINIO_POD" -- sh -c 'echo $MINIO_ROOT_PASSWORD' 2>/dev/null | tr -d '\r')
$KUBECTL exec "$MINIO_POD" -- mc alias set local http://localhost:9000 "$MINIO_USER" "$MINIO_PASS" > /dev/null 2>&1
$KUBECTL exec "$MINIO_POD" -- mc mb --ignore-existing local/warehouse > /dev/null 2>&1
echo -e "${GREEN}✓ Warehouse bucket ready${NC}"
else
echo -e "${YELLOW}⚠️ MinIO pod not found, skipping bucket setup${NC}"
fi
# ---------- Schema Initialization ---------- # ---------- Schema Initialization ----------
echo "" echo ""

View File

@@ -56,8 +56,8 @@ data:
namespace: sandbox namespace: sandbox
service_namespace: default service_namespace: default
in_cluster: true in_cluster: true
sandbox_image: dexorder/ai-sandbox:dev20260409143116 sandbox_image: SANDBOX_IMAGE_TAG
sidecar_image: dexorder/ai-lifecycle-sidecar:dev20260408103634 sidecar_image: SIDECAR_IMAGE_TAG
storage_class: standard storage_class: standard
image_pull_policy: Never # For minikube dev - use local images image_pull_policy: Never # For minikube dev - use local images

View File

@@ -1,9 +1,8 @@
# CCXT Ingestor Configuration # CCXT Ingestor Configuration
# Relay ZMQ endpoints (relay is the well-known gateway) # Flink IngestorBroker (ROUTER) endpoint
flink_hostname: relay flink_hostname: flink-jobmanager
ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix ingestor_broker_port: 5567
# Note: No response port needed - async architecture via Kafka!
# Supported exchanges (subscribe to these prefixes) # Supported exchanges (subscribe to these prefixes)
supported_exchanges: supported_exchanges:

View File

@@ -455,6 +455,14 @@ spec:
protocol: TCP protocol: TCP
port: 5561 port: 5561
targetPort: 5561 targetPort: 5561
- name: zmq-client-pull
protocol: TCP
port: 5566
targetPort: 5566
- name: zmq-ingestor-broker
protocol: TCP
port: 5567
targetPort: 5567
type: ClusterIP type: ClusterIP
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
@@ -583,14 +591,6 @@ spec:
selector: selector:
app: relay app: relay
ports: ports:
- name: work-queue
protocol: TCP
port: 5555
targetPort: 5555
- name: responses
protocol: TCP
port: 5556
targetPort: 5556
- name: market-data - name: market-data
protocol: TCP protocol: TCP
port: 5558 port: 5558
@@ -620,10 +620,6 @@ spec:
image: dexorder/ai-relay image: dexorder/ai-relay
imagePullPolicy: Never imagePullPolicy: Never
ports: ports:
- containerPort: 5555
name: work-queue
- containerPort: 5556
name: responses
- containerPort: 5558 - containerPort: 5558
name: market-data name: market-data
- containerPort: 5559 - containerPort: 5559
@@ -657,9 +653,9 @@ spec:
app: ingestor app: ingestor
spec: spec:
initContainers: initContainers:
- name: wait-for-relay - name: wait-for-flink
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z relay 5555; do echo waiting for relay; sleep 2; done;'] command: ['sh', '-c', 'until nc -z flink-jobmanager 5567; do echo waiting for flink broker; sleep 2; done;']
- name: wait-for-kafka - name: wait-for-kafka
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;']

View File

@@ -30,6 +30,7 @@ data:
namespace: "trading" namespace: "trading"
# S3 endpoint for MinIO in default namespace # S3 endpoint for MinIO in default namespace
s3_endpoint: "http://minio.default.svc.cluster.local:9000" s3_endpoint: "http://minio.default.svc.cluster.local:9000"
s3_region: "us-east-1"
relay: relay:
endpoint: "tcp://relay.default.svc.cluster.local:5559" endpoint: "tcp://relay.default.svc.cluster.local:5559"

View File

@@ -1,9 +1,8 @@
# CCXT Ingestor Configuration # CCXT Ingestor Configuration
# Relay ZMQ endpoints (relay is the well-known gateway) # Flink IngestorBroker (ROUTER) endpoint
flink_hostname: relay flink_hostname: flink-jobmanager
ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix ingestor_broker_port: 5567
# Note: No response port needed - async architecture via Kafka!
# Supported exchanges (subscribe to these prefixes) # Supported exchanges (subscribe to these prefixes)
supported_exchanges: supported_exchanges:

View File

@@ -451,6 +451,14 @@ spec:
protocol: TCP protocol: TCP
port: 5561 port: 5561
targetPort: 5561 targetPort: 5561
- name: zmq-client-pull
protocol: TCP
port: 5566
targetPort: 5566
- name: zmq-ingestor-broker
protocol: TCP
port: 5567
targetPort: 5567
type: ClusterIP type: ClusterIP
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
@@ -579,14 +587,6 @@ spec:
selector: selector:
app: relay app: relay
ports: ports:
- name: work-queue
protocol: TCP
port: 5555
targetPort: 5555
- name: responses
protocol: TCP
port: 5556
targetPort: 5556
- name: market-data - name: market-data
protocol: TCP protocol: TCP
port: 5558 port: 5558
@@ -616,10 +616,6 @@ spec:
image: dexorder/ai-relay image: dexorder/ai-relay
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 5555
name: work-queue
- containerPort: 5556
name: responses
- containerPort: 5558 - containerPort: 5558
name: market-data name: market-data
- containerPort: 5559 - containerPort: 5559
@@ -653,9 +649,9 @@ spec:
app: ingestor app: ingestor
spec: spec:
initContainers: initContainers:
- name: wait-for-relay - name: wait-for-flink
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z relay 5555; do echo waiting for relay; sleep 2; done;'] command: ['sh', '-c', 'until nc -z flink-jobmanager 5567; do echo waiting for flink broker; sleep 2; done;']
- name: wait-for-kafka - name: wait-for-kafka
image: busybox:1.36 image: busybox:1.36
command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;']

View File

@@ -22,6 +22,7 @@ data:
catalog_uri: "http://iceberg-catalog.ai.svc.cluster.local:8181" catalog_uri: "http://iceberg-catalog.ai.svc.cluster.local:8181"
namespace: "trading" namespace: "trading"
s3_endpoint: "http://minio.ai.svc.cluster.local:9000" s3_endpoint: "http://minio.ai.svc.cluster.local:9000"
s3_region: "us-east-1"
relay: relay:
endpoint: "tcp://relay.ai.svc.cluster.local:5559" endpoint: "tcp://relay.ai.svc.cluster.local:5559"

View File

@@ -2,7 +2,8 @@ package com.dexorder.flink;
import com.dexorder.flink.config.AppConfig; import com.dexorder.flink.config.AppConfig;
import com.dexorder.flink.iceberg.SchemaInitializer; import com.dexorder.flink.iceberg.SchemaInitializer;
import com.dexorder.flink.ingestor.IngestorWorkQueue; import com.dexorder.flink.ingestor.IngestorBroker;
import com.dexorder.flink.ingestor.RealtimeSubscriptionManager;
import com.dexorder.flink.kafka.TopicManager; import com.dexorder.flink.kafka.TopicManager;
import com.dexorder.flink.publisher.HistoryNotificationForwarder; import com.dexorder.flink.publisher.HistoryNotificationForwarder;
import com.dexorder.flink.publisher.HistoryNotificationFunction; import com.dexorder.flink.publisher.HistoryNotificationFunction;
@@ -10,6 +11,11 @@ import com.dexorder.flink.publisher.OHLCBatchWrapper;
import com.dexorder.flink.publisher.OHLCBatchDeserializer; import com.dexorder.flink.publisher.OHLCBatchDeserializer;
import com.dexorder.flink.publisher.MarketWrapper; import com.dexorder.flink.publisher.MarketWrapper;
import com.dexorder.flink.publisher.MarketDeserializer; import com.dexorder.flink.publisher.MarketDeserializer;
import com.dexorder.flink.publisher.RealtimeBar;
import com.dexorder.flink.publisher.RealtimeBarFunction;
import com.dexorder.flink.publisher.RealtimeBarPublisher;
import com.dexorder.flink.publisher.TickWrapper;
import com.dexorder.flink.publisher.TickDeserializer;
import com.dexorder.flink.sink.HistoricalBatchWriter; import com.dexorder.flink.sink.HistoricalBatchWriter;
import com.dexorder.flink.sink.SymbolMetadataWriter; import com.dexorder.flink.sink.SymbolMetadataWriter;
import com.dexorder.flink.zmq.ZmqChannelManager; import com.dexorder.flink.zmq.ZmqChannelManager;
@@ -83,11 +89,16 @@ public class TradingFlinkApp {
catalogProps catalogProps
); );
String warehouse = config.getString("iceberg_warehouse", "s3://warehouse/");
String warehouseBucket = warehouse.replaceFirst("^s3://", "").split("/")[0];
org.apache.iceberg.catalog.Catalog catalog = catalogLoader.loadCatalog(); org.apache.iceberg.catalog.Catalog catalog = catalogLoader.loadCatalog();
try { try {
SchemaInitializer schemaInitializer = new SchemaInitializer( SchemaInitializer schemaInitializer = new SchemaInitializer(
catalog, catalog,
config.getIcebergNamespace() config.getIcebergNamespace(),
config.getString("s3_endpoint", "http://minio:9000"),
warehouseBucket
); );
schemaInitializer.initializeSchemas(); schemaInitializer.initializeSchemas();
} finally { } finally {
@@ -107,20 +118,28 @@ public class TradingFlinkApp {
zmqManager.initializeChannels(); zmqManager.initializeChannels();
LOG.info("ZeroMQ channels initialized"); LOG.info("ZeroMQ channels initialized");
// Initialize history notification forwarder (runs in job manager) // Initialize ingestor broker — manages ROUTER/DEALER work queue for all ingestors
// Binds PULL socket to receive notifications from task managers, forwards to MARKET_DATA_PUB IngestorBroker broker = new IngestorBroker(zmqManager);
broker.start();
LOG.info("IngestorBroker started");
// Initialize realtime subscription manager — owns MARKET_DATA_PUB socket exclusively,
// detects XPUB subscription events, and calls broker for realtime job lifecycle.
// Other components publish via subscriptionManager.enqueuePublish() (thread-safe).
RealtimeSubscriptionManager subscriptionManager = new RealtimeSubscriptionManager(zmqManager, broker);
subscriptionManager.start();
LOG.info("RealtimeSubscriptionManager started");
// Initialize history notification forwarder (runs in job manager).
// Binds PULL socket to receive notifications from task managers, enqueues them for
// publication via RealtimeSubscriptionManager (sole owner of MARKET_DATA_PUB).
HistoryNotificationForwarder notificationForwarder = new HistoryNotificationForwarder( HistoryNotificationForwarder notificationForwarder = new HistoryNotificationForwarder(
config.getNotificationPullPort(), config.getNotificationPullPort(),
zmqManager.getSocket(ZmqChannelManager.Channel.MARKET_DATA_PUB) subscriptionManager::enqueuePublish
); );
notificationForwarder.start(); notificationForwarder.start();
LOG.info("History notification forwarder started on port {}", config.getNotificationPullPort()); LOG.info("History notification forwarder started on port {}", config.getNotificationPullPort());
// Initialize ingestor work queue
IngestorWorkQueue workQueue = new IngestorWorkQueue(zmqManager);
workQueue.start();
LOG.info("Ingestor work queue started");
// Set up Flink streaming environment // Set up Flink streaming environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
@@ -224,8 +243,37 @@ public class TradingFlinkApp {
LOG.info("Symbol metadata pipeline configured: SymbolMetadataWriter -> Iceberg -> METADATA_UPDATE notification"); LOG.info("Symbol metadata pipeline configured: SymbolMetadataWriter -> Iceberg -> METADATA_UPDATE notification");
// Realtime tick pipeline: Kafka market-tick → OHLC bars → ZMQ notify → clients
KafkaSource<TickWrapper> tickSource = KafkaSource.<TickWrapper>builder()
.setBootstrapServers(config.getKafkaBootstrapServers())
.setTopics(config.getKafkaTickTopic())
.setGroupId("flink-tick-consumer")
.setStartingOffsets(OffsetsInitializer.latest())
.setValueOnlyDeserializer(new TickDeserializer())
.build();
DataStream<TickWrapper> tickStream = env
.fromSource(tickSource, WatermarkStrategy.noWatermarks(), "Tick Kafka Source")
.filter(t -> t != null)
.setParallelism(1);
// Aggregate ticks into OHLC bars for each configured period.
// keyBy ticker so all ticks for a ticker land on the same slot and accumulate together.
int[] periods = config.getRealtimePeriods();
DataStream<RealtimeBar> barStream = tickStream
.keyBy(TickWrapper::getTicker)
.flatMap(new RealtimeBarFunction(periods))
.setParallelism(1);
barStream.addSink(new RealtimeBarPublisher(notificationEndpoint))
.setParallelism(1)
.name("RealtimeBarPublisher");
LOG.info("Realtime tick pipeline configured: market-tick → OHLC bars → clients (periods={})",
java.util.Arrays.toString(periods));
// TODO: Set up CEP patterns and triggers // TODO: Set up CEP patterns and triggers
// TODO: Set up realtime tick processing
LOG.info("Flink job configured, starting execution"); LOG.info("Flink job configured, starting execution");
@@ -233,15 +281,10 @@ public class TradingFlinkApp {
Runtime.getRuntime().addShutdownHook(new Thread(() -> { Runtime.getRuntime().addShutdownHook(new Thread(() -> {
LOG.info("Shutting down Trading Flink Application"); LOG.info("Shutting down Trading Flink Application");
try { try {
// Stop work queue
workQueue.stop();
// Stop notification forwarder
notificationForwarder.close(); notificationForwarder.close();
subscriptionManager.stop();
// Close ZMQ channels broker.stop();
zmqManager.close(); zmqManager.close();
LOG.info("Shutdown complete"); LOG.info("Shutdown complete");
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error during shutdown", e); LOG.error("Error during shutdown", e);

View File

@@ -91,14 +91,20 @@ public class AppConfig {
} }
// ZMQ port getters // ZMQ port getters
public int getIngestorWorkQueuePort() {
return getInt("zmq_ingestor_work_queue_port", 5555);
}
public int getMarketDataPubPort() { public int getMarketDataPubPort() {
return getInt("zmq_market_data_pub_port", 5558); return getInt("zmq_market_data_pub_port", 5558);
} }
/** Port where Flink's IngestorBroker binds a PULL socket to receive requests from relay PUSH */
public int getFlinkRequestPullPort() {
return getInt("zmq_flink_request_pull_port", 5566);
}
/** Port where Flink's IngestorBroker binds a ROUTER for ingestor DEALER connections */
public int getIngestorBrokerPort() {
return getInt("zmq_ingestor_broker_port", 5567);
}
public String getBindAddress() { public String getBindAddress() {
return getString("zmq_bind_address", "tcp://*"); return getString("zmq_bind_address", "tcp://*");
} }
@@ -112,6 +118,20 @@ public class AppConfig {
return getString("kafka_tick_topic", "market-tick"); return getString("kafka_tick_topic", "market-tick");
} }
/**
* Comma-separated OHLC period lengths in seconds for realtime bar computation.
* Default covers common chart periods: 1m, 5m, 15m, 1h, 4h, 1d.
*/
public int[] getRealtimePeriods() {
String raw = getString("realtime_periods", "60,300,900,3600,14400,86400");
String[] parts = raw.split(",");
int[] periods = new int[parts.length];
for (int i = 0; i < parts.length; i++) {
periods[i] = Integer.parseInt(parts[i].trim());
}
return periods;
}
public String getKafkaOhlcTopic() { public String getKafkaOhlcTopic() {
return getString("kafka_ohlc_topic", "market-ohlc"); return getString("kafka_ohlc_topic", "market-ohlc");
} }

View File

@@ -9,8 +9,16 @@ import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Configuration;
import software.amazon.awssdk.services.s3.model.CreateBucketRequest;
import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
import software.amazon.awssdk.services.s3.model.NoSuchBucketException;
import java.io.IOException; import java.io.IOException;
import java.net.URI;
import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required; import static org.apache.iceberg.types.Types.NestedField.required;
@@ -26,10 +34,14 @@ public class SchemaInitializer {
private final Catalog catalog; private final Catalog catalog;
private final String namespace; private final String namespace;
private final String s3Endpoint;
private final String warehouseBucket;
public SchemaInitializer(Catalog catalog, String namespace) { public SchemaInitializer(Catalog catalog, String namespace, String s3Endpoint, String warehouseBucket) {
this.catalog = catalog; this.catalog = catalog;
this.namespace = namespace; this.namespace = namespace;
this.s3Endpoint = s3Endpoint;
this.warehouseBucket = warehouseBucket;
} }
/** /**
@@ -40,6 +52,9 @@ public class SchemaInitializer {
public void initializeSchemas() throws IOException { public void initializeSchemas() throws IOException {
LOG.info("Initializing Iceberg schemas in namespace: {}", namespace); LOG.info("Initializing Iceberg schemas in namespace: {}", namespace);
// Ensure S3 bucket exists before attempting to create tables
ensureS3BucketExists();
// Ensure namespace exists // Ensure namespace exists
ensureNamespaceExists(); ensureNamespaceExists();
@@ -52,6 +67,36 @@ public class SchemaInitializer {
LOG.info("Schema initialization completed successfully"); LOG.info("Schema initialization completed successfully");
} }
/**
* Ensure the S3 warehouse bucket exists, creating it if necessary.
* Runs before any table creation so a fresh MinIO deployment doesn't crash Flink.
*/
private void ensureS3BucketExists() {
if (s3Endpoint == null || warehouseBucket == null || warehouseBucket.isEmpty()) {
LOG.warn("S3 endpoint or warehouse bucket not configured, skipping bucket check");
return;
}
LOG.info("Ensuring S3 bucket '{}' exists at {}", warehouseBucket, s3Endpoint);
try (S3Client s3 = S3Client.builder()
.endpointOverride(URI.create(s3Endpoint))
.region(Region.of("us-east-1"))
.serviceConfiguration(S3Configuration.builder().pathStyleAccessEnabled(true).build())
.credentialsProvider(DefaultCredentialsProvider.create())
.build()) {
try {
s3.headBucket(HeadBucketRequest.builder().bucket(warehouseBucket).build());
LOG.info("S3 bucket '{}' already exists", warehouseBucket);
} catch (NoSuchBucketException e) {
LOG.warn("S3 bucket '{}' not found — creating it now", warehouseBucket);
s3.createBucket(CreateBucketRequest.builder().bucket(warehouseBucket).build());
LOG.info("Created S3 bucket '{}'", warehouseBucket);
}
} catch (Exception e) {
LOG.error("Failed to ensure S3 bucket '{}' exists at {}", warehouseBucket, s3Endpoint, e);
throw new RuntimeException("S3 bucket initialization failed for: " + warehouseBucket, e);
}
}
/** /**
* Ensure the namespace exists in the catalog. * Ensure the namespace exists in the catalog.
*/ */

View File

@@ -0,0 +1,503 @@
package com.dexorder.flink.ingestor;
import com.dexorder.flink.zmq.ZmqChannelManager;
import com.dexorder.proto.DataRequest;
import com.dexorder.proto.RealtimeParams;
import com.dexorder.proto.SubmitHistoricalRequest;
import com.dexorder.proto.WorkComplete;
import com.dexorder.proto.WorkHeartbeat;
import com.dexorder.proto.WorkReject;
import com.dexorder.proto.WorkStop;
import com.dexorder.proto.WorkerReady;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.zeromq.ZMQ;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
/**
* LRU-style work broker for ingestors.
*
* Ingestors connect via DEALER to the ROUTER socket on port 5567. They register with READY,
* are dispatched WORK messages, and respond with COMPLETE (historical) or HEARTBEAT (realtime).
* If a heartbeat times out the job is re-queued and dispatched to another available worker.
*
* Also receives SubmitHistoricalRequest messages forwarded by the relay on the PULL socket (5566).
*
* Message type IDs (ZMQ framing, not Kafka):
* 0x10 SubmitHistoricalRequest (relay → Flink via PULL, same as client wire type)
* 0x20 WorkerReady (ingestor → Flink)
* 0x21 WorkComplete (ingestor → Flink)
* 0x22 WorkHeartbeat (ingestor → Flink)
* 0x23 WorkReject (ingestor → Flink)
* 0x01 DataRequest/WorkAssign (Flink → ingestor via ROUTER)
* 0x25 WorkStop (Flink → ingestor via ROUTER)
*/
public class IngestorBroker implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(IngestorBroker.class);
private static final byte PROTOCOL_VERSION = 0x01;
private static final byte MSG_TYPE_SUBMIT_REQUEST = 0x10;
private static final byte MSG_TYPE_WORKER_READY = 0x20;
private static final byte MSG_TYPE_WORK_COMPLETE = 0x21;
private static final byte MSG_TYPE_WORK_HEARTBEAT = 0x22;
private static final byte MSG_TYPE_WORK_REJECT = 0x23;
private static final byte MSG_TYPE_WORK_ASSIGN = 0x01; // DataRequest type on wire
private static final byte MSG_TYPE_WORK_STOP = 0x25;
/** Re-queue realtime job if no heartbeat received within this window (ms) */
private static final long HEARTBEAT_TIMEOUT_MS = 25_000;
/** Re-queue historical job if not completed within this window (ms) */
private static final long HISTORICAL_TIMEOUT_MS = 60_000;
private final ZmqChannelManager zmqManager;
private volatile boolean running;
private Thread brokerThread;
// ── Worker tracking ──────────────────────────────────────────────────────
/** Workers ready to accept a job, in LRU order (head = least recently used) */
private final Deque<WorkerInfo> freeWorkers = new ArrayDeque<>();
/** Jobs waiting for a compatible free worker */
private final Queue<DataRequest> pendingJobs = new ArrayDeque<>();
/** Jobs currently executing on a worker */
private final Map<String, ActiveJob> activeJobs = new ConcurrentHashMap<>();
/** Worker identity → supported exchanges (set once on READY) */
private final Map<String, WorkerInfo> knownWorkers = new ConcurrentHashMap<>();
// ── Thread-safe inbound queue from RealtimeSubscriptionManager ───────────
private final Queue<DataRequest> externalSubmissions = new ConcurrentLinkedQueue<>();
public IngestorBroker(ZmqChannelManager zmqManager) {
this.zmqManager = zmqManager;
}
public void start() {
if (running) {
LOG.warn("IngestorBroker already running");
return;
}
running = true;
brokerThread = new Thread(this::brokerLoop, "IngestorBroker-Thread");
brokerThread.setDaemon(false);
brokerThread.start();
LOG.info("IngestorBroker started");
}
public void stop() {
running = false;
if (brokerThread != null) {
brokerThread.interrupt();
try {
brokerThread.join(5000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
LOG.info("IngestorBroker stopped");
}
/**
* Submit a realtime data request from outside the broker thread (thread-safe).
* Called by RealtimeSubscriptionManager when subscription ref count goes 0→1.
*/
public void submitRealtimeRequest(String ticker) {
String jobId = UUID.randomUUID().toString();
DataRequest request = DataRequest.newBuilder()
.setRequestId(jobId)
.setJobId(jobId)
.setType(DataRequest.RequestType.REALTIME_TICKS)
.setTicker(ticker)
.setRealtime(RealtimeParams.newBuilder()
.setIncludeTicks(true)
.setIncludeOhlc(false)
.build())
.build();
externalSubmissions.add(request);
LOG.info("Enqueued realtime request: ticker={}, jobId={}", ticker, jobId);
}
/**
* Stop all realtime jobs for a ticker (called when last subscriber leaves).
* Thread-safe — posts a stop marker via externalSubmissions is complex; instead we
* directly find and stop active jobs. Protected by ConcurrentHashMap.
*/
public void stopRealtimeJobsForTicker(String ticker) {
List<String> toStop = new ArrayList<>();
for (Map.Entry<String, ActiveJob> entry : activeJobs.entrySet()) {
if (entry.getValue().ticker.equals(ticker) &&
entry.getValue().type == DataRequest.RequestType.REALTIME_TICKS) {
toStop.add(entry.getKey());
}
}
for (String jobId : toStop) {
ActiveJob job = activeJobs.remove(jobId);
if (job != null) {
sendStop(job.workerIdentity, jobId);
LOG.info("Sent STOP to ingestor: ticker={}, jobId={}", ticker, jobId);
}
}
}
// ── Broker loop ──────────────────────────────────────────────────────────
private void brokerLoop() {
ZMQ.Socket pullSocket = zmqManager.getSocket(ZmqChannelManager.Channel.CLIENT_REQUEST);
ZMQ.Socket routerSocket = zmqManager.getSocket(ZmqChannelManager.Channel.INGESTOR_BROKER);
ZMQ.Poller poller = zmqManager.createPoller(2);
poller.register(pullSocket, ZMQ.Poller.POLLIN);
poller.register(routerSocket, ZMQ.Poller.POLLIN);
LOG.info("IngestorBroker loop running");
while (running) {
try {
// Drain external submissions (realtime requests from subscription manager)
DataRequest ext;
while ((ext = externalSubmissions.poll()) != null) {
enqueueJob(ext);
}
// Poll sockets (100ms timeout)
poller.poll(100);
if (poller.pollin(0)) {
handleClientRequest(pullSocket);
}
if (poller.pollin(1)) {
handleWorkerMessage(routerSocket);
}
// Check for heartbeat / completion timeouts
checkTimeouts();
} catch (Exception e) {
if (running) {
LOG.error("Error in broker loop", e);
}
}
}
LOG.info("IngestorBroker loop exited");
}
/** Receive a SubmitHistoricalRequest forwarded by relay and enqueue it. */
private void handleClientRequest(ZMQ.Socket pullSocket) {
byte[] versionFrame = pullSocket.recv(ZMQ.DONTWAIT);
if (versionFrame == null) return;
if (!pullSocket.hasReceiveMore()) return;
byte[] messageFrame = pullSocket.recv(0);
if (messageFrame == null || messageFrame.length < 2) return;
if (versionFrame.length != 1 || versionFrame[0] != PROTOCOL_VERSION) {
LOG.warn("Bad protocol version on PULL socket");
return;
}
byte msgType = messageFrame[0];
byte[] payload = Arrays.copyOfRange(messageFrame, 1, messageFrame.length);
if (msgType != MSG_TYPE_SUBMIT_REQUEST) {
LOG.warn("Unexpected message type on PULL socket: 0x{}", Integer.toHexString(msgType & 0xFF));
return;
}
try {
SubmitHistoricalRequest req = SubmitHistoricalRequest.parseFrom(payload);
String jobId = UUID.randomUUID().toString();
DataRequest dataRequest = DataRequest.newBuilder()
.setRequestId(req.getRequestId())
.setJobId(jobId)
.setType(DataRequest.RequestType.HISTORICAL_OHLC)
.setTicker(req.getTicker())
.setHistorical(com.dexorder.proto.HistoricalParams.newBuilder()
.setStartTime(req.getStartTime())
.setEndTime(req.getEndTime())
.setPeriodSeconds(req.getPeriodSeconds())
.build())
.setClientId(req.hasClientId() ? req.getClientId() : "")
.build();
enqueueJob(dataRequest);
LOG.info("Received historical request from relay: request_id={}, ticker={}", req.getRequestId(), req.getTicker());
} catch (Exception e) {
LOG.error("Failed to parse SubmitHistoricalRequest from relay", e);
}
}
/** Receive and dispatch a message from an ingestor DEALER. */
private void handleWorkerMessage(ZMQ.Socket routerSocket) {
// ROUTER frame layout: [identity][empty][version][type+payload]
byte[] identity = routerSocket.recv(ZMQ.DONTWAIT);
if (identity == null) return;
if (!routerSocket.hasReceiveMore()) return;
routerSocket.recv(0); // empty delimiter
if (!routerSocket.hasReceiveMore()) return;
byte[] versionFrame = routerSocket.recv(0);
if (!routerSocket.hasReceiveMore()) return;
byte[] messageFrame = routerSocket.recv(0);
if (versionFrame == null || versionFrame.length != 1 || versionFrame[0] != PROTOCOL_VERSION) {
LOG.warn("Bad protocol version from ingestor");
return;
}
if (messageFrame == null || messageFrame.length < 1) return;
byte msgType = messageFrame[0];
byte[] payload = Arrays.copyOfRange(messageFrame, 1, messageFrame.length);
String identityKey = bytesToHex(identity);
try {
switch (msgType & 0xFF) {
case 0x20: handleWorkerReady(identity, identityKey, payload); break;
case 0x21: handleWorkComplete(identityKey, payload); break;
case 0x22: handleWorkHeartbeat(identityKey, payload); break;
case 0x23: handleWorkReject(identityKey, payload); break;
default:
LOG.warn("Unknown message type from ingestor: 0x{}", Integer.toHexString(msgType & 0xFF));
}
} catch (Exception e) {
LOG.error("Error handling worker message type 0x{}", Integer.toHexString(msgType & 0xFF), e);
}
}
private void handleWorkerReady(byte[] identity, String identityKey, byte[] payload) throws Exception {
WorkerReady ready = WorkerReady.parseFrom(payload);
Set<String> exchanges = new HashSet<>(ready.getExchangesList());
WorkerInfo worker = knownWorkers.computeIfAbsent(identityKey,
k -> new WorkerInfo(identity, identityKey, exchanges));
worker.exchanges = exchanges; // update in case re-READY with different config
worker.identity = identity;
if (!freeWorkers.contains(worker)) {
freeWorkers.addLast(worker);
}
LOG.info("Ingestor READY: id={}, exchanges={}, freeWorkers={}", identityKey, exchanges, freeWorkers.size());
dispatchPending();
}
private void handleWorkComplete(String identityKey, byte[] payload) throws Exception {
WorkComplete complete = WorkComplete.parseFrom(payload);
String jobId = complete.getJobId();
ActiveJob job = activeJobs.remove(jobId);
if (job == null) {
LOG.warn("COMPLETE for unknown jobId={}", jobId);
} else {
LOG.info("Job COMPLETE: jobId={}, ticker={}, success={}", jobId, job.ticker, complete.getSuccess());
}
// Worker is free again
WorkerInfo worker = knownWorkers.get(identityKey);
if (worker != null) {
freeWorkers.addLast(worker);
dispatchPending();
}
}
private void handleWorkHeartbeat(String identityKey, byte[] payload) throws Exception {
WorkHeartbeat hb = WorkHeartbeat.parseFrom(payload);
String jobId = hb.getJobId();
ActiveJob job = activeJobs.get(jobId);
if (job != null) {
job.lastHeartbeat = System.currentTimeMillis();
} else {
LOG.warn("HEARTBEAT for unknown jobId={} from worker={}", jobId, identityKey);
}
}
private void handleWorkReject(String identityKey, byte[] payload) throws Exception {
WorkReject reject = WorkReject.parseFrom(payload);
String jobId = reject.getJobId();
LOG.warn("Job REJECTED by worker={}: jobId={}, reason={}", identityKey, jobId, reject.getReason());
ActiveJob job = activeJobs.remove(jobId);
if (job != null) {
// Re-queue with fresh job_id so a different ingestor may pick it up
DataRequest requeued = job.request.toBuilder()
.setJobId(UUID.randomUUID().toString())
.build();
pendingJobs.add(requeued);
}
// Worker is still free (it rejected, not crashed)
WorkerInfo worker = knownWorkers.get(identityKey);
if (worker != null) {
freeWorkers.addLast(worker);
dispatchPending();
}
}
// ── Dispatch ─────────────────────────────────────────────────────────────
private void enqueueJob(DataRequest request) {
// Check if we can immediately dispatch
WorkerInfo worker = findFreeWorker(exchangeOf(request.getTicker()));
if (worker != null) {
dispatch(worker, request);
} else {
pendingJobs.add(request);
LOG.debug("No free worker for {}, queued (pendingJobs={})", request.getTicker(), pendingJobs.size());
}
}
private void dispatchPending() {
Queue<DataRequest> remaining = new ArrayDeque<>();
DataRequest job;
while ((job = pendingJobs.poll()) != null) {
WorkerInfo worker = findFreeWorker(exchangeOf(job.getTicker()));
if (worker != null) {
dispatch(worker, job);
} else {
remaining.add(job);
}
}
pendingJobs.addAll(remaining);
}
private void dispatch(WorkerInfo worker, DataRequest request) {
freeWorkers.remove(worker);
try {
byte[] protoBytes = request.toByteArray();
boolean sent = zmqManager.sendToWorker(worker.identity, PROTOCOL_VERSION, MSG_TYPE_WORK_ASSIGN, protoBytes);
if (!sent) {
LOG.error("Failed to dispatch job to worker={}, re-queuing", worker.identityKey);
freeWorkers.addLast(worker);
pendingJobs.add(request);
return;
}
ActiveJob active = new ActiveJob(worker.identity, worker.identityKey,
request, request.getTicker(), request.getType());
activeJobs.put(request.getJobId(), active);
LOG.info("Dispatched job: jobId={}, ticker={}, type={}, worker={}",
request.getJobId(), request.getTicker(), request.getType(), worker.identityKey);
} catch (Exception e) {
LOG.error("Error dispatching job", e);
freeWorkers.addLast(worker);
}
}
private void sendStop(byte[] workerIdentity, String jobId) {
try {
WorkStop stop = WorkStop.newBuilder().setJobId(jobId).build();
zmqManager.sendToWorker(workerIdentity, PROTOCOL_VERSION, MSG_TYPE_WORK_STOP, stop.toByteArray());
} catch (Exception e) {
LOG.error("Error sending STOP for jobId={}", jobId, e);
}
}
// ── Timeout checking ─────────────────────────────────────────────────────
private void checkTimeouts() {
long now = System.currentTimeMillis();
List<String> timedOut = new ArrayList<>();
for (Map.Entry<String, ActiveJob> entry : activeJobs.entrySet()) {
ActiveJob job = entry.getValue();
long timeout = job.type == DataRequest.RequestType.REALTIME_TICKS
? HEARTBEAT_TIMEOUT_MS : HISTORICAL_TIMEOUT_MS;
if (now - job.lastHeartbeat > timeout) {
timedOut.add(entry.getKey());
}
}
for (String jobId : timedOut) {
ActiveJob job = activeJobs.remove(jobId);
if (job == null) continue;
LOG.warn("Job timed out (no heartbeat/completion): jobId={}, ticker={}, type={}, worker={}",
jobId, job.ticker, job.type, job.workerIdentityKey);
// Re-queue with a new job_id
DataRequest requeued = job.request.toBuilder()
.setJobId(UUID.randomUUID().toString())
.build();
pendingJobs.add(requeued);
dispatchPending();
}
}
// ── Helpers ──────────────────────────────────────────────────────────────
/** Extract exchange name from ticker, e.g. "BTC/USDT.BINANCE" → "BINANCE" */
private static String exchangeOf(String ticker) {
int dot = ticker.lastIndexOf('.');
return dot >= 0 ? ticker.substring(dot + 1).toUpperCase() : "";
}
/** Find and remove a free worker that supports the given exchange. */
private WorkerInfo findFreeWorker(String exchange) {
for (WorkerInfo w : freeWorkers) {
if (exchange.isEmpty() || w.exchanges.contains(exchange)) {
freeWorkers.remove(w);
return w;
}
}
return null;
}
private static String bytesToHex(byte[] bytes) {
StringBuilder sb = new StringBuilder();
for (byte b : bytes) sb.append(String.format("%02x", b));
return sb.toString();
}
@Override
public void close() {
stop();
}
// ── Inner types ──────────────────────────────────────────────────────────
private static class WorkerInfo {
byte[] identity;
final String identityKey;
Set<String> exchanges;
WorkerInfo(byte[] identity, String identityKey, Set<String> exchanges) {
this.identity = identity;
this.identityKey = identityKey;
this.exchanges = exchanges;
}
}
private static class ActiveJob {
final byte[] workerIdentity;
final String workerIdentityKey;
final DataRequest request;
final String ticker;
final DataRequest.RequestType type;
long lastHeartbeat;
ActiveJob(byte[] workerIdentity, String workerIdentityKey,
DataRequest request, String ticker, DataRequest.RequestType type) {
this.workerIdentity = workerIdentity;
this.workerIdentityKey = workerIdentityKey;
this.request = request;
this.ticker = ticker;
this.type = type;
this.lastHeartbeat = System.currentTimeMillis();
}
}
}

View File

@@ -119,7 +119,7 @@ public class IngestorWorkQueue {
String exchangePrefix = extractExchangePrefix(ticker); String exchangePrefix = extractExchangePrefix(ticker);
boolean sent = zmqManager.sendTopicMessage( boolean sent = zmqManager.sendTopicMessage(
ZmqChannelManager.Channel.INGESTOR_WORK_QUEUE, ZmqChannelManager.Channel.INGESTOR_BROKER,
exchangePrefix, exchangePrefix,
PROTOCOL_VERSION, PROTOCOL_VERSION,
MSG_TYPE_DATA_REQUEST, MSG_TYPE_DATA_REQUEST,

View File

@@ -0,0 +1,204 @@
package com.dexorder.flink.ingestor;
import com.dexorder.flink.zmq.ZmqChannelManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.zeromq.ZMQ;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Monitors XPUB subscription events from the relay and manages realtime ingestor lifecycle.
*
* This class is the <em>sole owner</em> of the MARKET_DATA_PUB XPUB socket. All outbound
* publishes from other threads (e.g., HistoryNotificationForwarder, RealtimeOHLCPublisher)
* must go through {@link #enqueuePublish(byte[]...)} so they are sent from the single loop
* thread — ZMQ sockets are not thread-safe.
*
* Topic format: {@code {ticker}|ohlc:{period_seconds}}
* Example: {@code BTC/USDT.BINANCE|ohlc:60}
*
* Reference counting:
* tickerRefs — across all periods for a ticker; 0→1 triggers ingestor activation
* topicRefs — per (ticker, period); consulted by RealtimeOHLCPublisher to filter output
*/
public class RealtimeSubscriptionManager implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(RealtimeSubscriptionManager.class);
private static final Pattern TOPIC_PATTERN = Pattern.compile("^(.+)\\|ohlc:(\\d+)$");
private final ZmqChannelManager zmqManager;
private final ZMQ.Socket xpubSocket;
private final IngestorBroker broker;
/** Per-ticker reference count (across all subscribed periods for that ticker) */
private final Map<String, Integer> tickerRefs = new HashMap<>();
/** Per-topic reference count (ticker|ohlc:period → subscriber count) */
private final Map<String, Integer> topicRefs = new HashMap<>();
/**
* Thread-safe outbound publish queue.
* Each entry is one multi-frame message: {@code byte[][] frames}.
*/
private final ConcurrentLinkedQueue<byte[][]> publishQueue = new ConcurrentLinkedQueue<>();
private volatile boolean running;
private Thread thread;
public RealtimeSubscriptionManager(ZmqChannelManager zmqManager, IngestorBroker broker) {
this.zmqManager = zmqManager;
this.xpubSocket = zmqManager.getSocket(ZmqChannelManager.Channel.MARKET_DATA_PUB);
this.broker = broker;
}
/**
* Queue a multi-frame message for publication on MARKET_DATA_PUB.
* Thread-safe — may be called from any thread (HistoryNotificationForwarder,
* RealtimeOHLCPublisher, etc.).
*/
public void enqueuePublish(byte[]... frames) {
publishQueue.add(frames);
}
/**
* Returns the current subscriber count for a topic.
* Thread-safe for reads (value is written only from the loop thread but read from others).
*/
public int getTopicRefCount(String topic) {
return topicRefs.getOrDefault(topic, 0);
}
public void start() {
if (running) {
LOG.warn("RealtimeSubscriptionManager already running");
return;
}
running = true;
thread = new Thread(this::subscriptionLoop, "RealtimeSubscriptionManager");
thread.setDaemon(false);
thread.start();
LOG.info("RealtimeSubscriptionManager started");
}
public void stop() {
running = false;
if (thread != null) {
thread.interrupt();
try {
thread.join(5000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
LOG.info("RealtimeSubscriptionManager stopped");
}
private void subscriptionLoop() {
// Build a poller so we can block-wait rather than busy-spin
ZMQ.Poller poller = zmqManager.createPoller(1);
poller.register(xpubSocket, ZMQ.Poller.POLLIN);
LOG.info("RealtimeSubscriptionManager loop running");
while (running) {
try {
// 1. Flush any queued outbound messages before blocking
byte[][] frames;
while ((frames = publishQueue.poll()) != null) {
sendFrames(frames);
}
// 2. Wait up to 50ms for a subscription event
poller.poll(50);
// 3. Drain all available subscription events
if (poller.pollin(0)) {
byte[] event;
while ((event = xpubSocket.recv(ZMQ.DONTWAIT)) != null) {
if (event.length > 0) {
processSubscriptionEvent(event);
}
}
}
} catch (Exception e) {
if (running) {
LOG.error("Error in subscription loop", e);
}
}
}
LOG.info("RealtimeSubscriptionManager loop exited");
}
private void sendFrames(byte[][] frames) {
for (int i = 0; i < frames.length; i++) {
if (i < frames.length - 1) {
xpubSocket.sendMore(frames[i]);
} else {
xpubSocket.send(frames[i], 0);
}
}
}
private void processSubscriptionEvent(byte[] event) {
// XPUB subscription frame: first byte is 0x01 (subscribe) or 0x00 (unsubscribe);
// remaining bytes are the raw topic string.
boolean isSubscribe = event[0] == 0x01;
String topic = new String(event, 1, event.length - 1, ZMQ.CHARSET);
Matcher m = TOPIC_PATTERN.matcher(topic);
if (!m.matches()) {
// Not a realtime OHLC topic — e.g. RESPONSE: or HISTORY_READY: prefixes
LOG.debug("Ignoring subscription event for non-realtime topic: action={}, topic={}",
isSubscribe ? "subscribe" : "unsubscribe", topic);
return;
}
String ticker = m.group(1);
LOG.info("Subscription event: action={}, topic={}", isSubscribe ? "subscribe" : "unsubscribe", topic);
if (isSubscribe) {
handleSubscribe(ticker, topic);
} else {
handleUnsubscribe(ticker, topic);
}
}
private void handleSubscribe(String ticker, String topic) {
int newTopicRef = topicRefs.merge(topic, 1, Integer::sum);
LOG.debug("topicRefs[{}]={}", topic, newTopicRef);
int newTickerRef = tickerRefs.merge(ticker, 1, Integer::sum);
if (newTickerRef == 1) {
LOG.info("First subscriber for ticker={} — submitting realtime request", ticker);
broker.submitRealtimeRequest(ticker);
}
LOG.debug("tickerRefs[{}]={}", ticker, newTickerRef);
}
private void handleUnsubscribe(String ticker, String topic) {
int newTopicRef = topicRefs.merge(topic, -1, Integer::sum);
if (newTopicRef <= 0) {
topicRefs.remove(topic);
}
LOG.debug("topicRefs[{}]={}", topic, newTopicRef);
int newTickerRef = tickerRefs.merge(ticker, -1, Integer::sum);
if (newTickerRef <= 0) {
tickerRefs.remove(ticker);
LOG.info("Last subscriber for ticker={} left — stopping realtime jobs", ticker);
broker.stopRealtimeJobsForTicker(ticker);
}
LOG.debug("tickerRefs[{}]={}", ticker, newTickerRef);
}
@Override
public void close() {
stop();
}
}

View File

@@ -6,14 +6,24 @@ import org.zeromq.SocketType;
import org.zeromq.ZContext; import org.zeromq.ZContext;
import org.zeromq.ZMQ; import org.zeromq.ZMQ;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
/** /**
* Runs in the job manager. Pulls notifications from task managers (via PUSH/PULL) * Runs in the job manager. Pulls notifications from task managers (via PUSH/PULL)
* and republishes them on the MARKET_DATA_PUB socket that the relay subscribes to. * and enqueues them for publication on MARKET_DATA_PUB via the provided publish callback.
*
* The publish callback must be thread-safe (e.g., RealtimeSubscriptionManager.enqueuePublish).
* Direct socket access is avoided here because the MARKET_DATA_PUB XPUB socket is owned
* exclusively by RealtimeSubscriptionManager to satisfy ZMQ's single-thread-per-socket rule.
* *
* Flow: * Flow:
* Task manager HistoryNotificationPublisher → PUSH * Task manager HistoryNotificationPublisher → PUSH
* ↓ * ↓
* Job manager HistoryNotificationForwarder PULL → MARKET_DATA_PUB * Job manager HistoryNotificationForwarder PULL → publishCallback (queue)
* ↓ (RealtimeSubscriptionManager loop)
* MARKET_DATA_PUB
* ↓ * ↓
* Relay (XSUB) → Relay (XPUB) → Clients * Relay (XSUB) → Relay (XPUB) → Clients
*/ */
@@ -21,17 +31,17 @@ public class HistoryNotificationForwarder implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(HistoryNotificationForwarder.class); private static final Logger LOG = LoggerFactory.getLogger(HistoryNotificationForwarder.class);
private final ZMQ.Socket pullSocket; private final ZMQ.Socket pullSocket;
private final ZMQ.Socket pubSocket; private final Consumer<byte[][]> publishCallback;
private final ZContext context; private final ZContext context;
private volatile boolean running = true; private volatile boolean running = true;
private Thread thread; private Thread thread;
/** /**
* @param pullPort Port to bind PULL socket on (task managers connect PUSH here) * @param pullPort Port to bind PULL socket on (task managers connect PUSH here)
* @param pubSocket Existing MARKET_DATA_PUB socket from ZmqChannelManager * @param publishCallback Thread-safe callback to enqueue outbound multi-frame messages
*/ */
public HistoryNotificationForwarder(int pullPort, ZMQ.Socket pubSocket) { public HistoryNotificationForwarder(int pullPort, Consumer<byte[][]> publishCallback) {
this.pubSocket = pubSocket; this.publishCallback = publishCallback;
this.context = new ZContext(); this.context = new ZContext();
this.pullSocket = context.createSocket(SocketType.PULL); this.pullSocket = context.createSocket(SocketType.PULL);
this.pullSocket.setRcvHWM(10000); this.pullSocket.setRcvHWM(10000);
@@ -53,32 +63,24 @@ public class HistoryNotificationForwarder implements AutoCloseable {
pullSocket.setReceiveTimeOut(200); // ms, so we can check running flag pullSocket.setReceiveTimeOut(200); // ms, so we can check running flag
while (running) { while (running) {
// Receive all frames of a multi-part message and forward to PUB
byte[] frame = pullSocket.recv(0); byte[] frame = pullSocket.recv(0);
if (frame == null) { if (frame == null) {
continue; // timeout, check running flag continue; // timeout check running flag
} }
boolean more = pullSocket.hasReceiveMore(); // Collect all frames of the multi-part message, then enqueue atomically
if (more) { List<byte[]> frames = new ArrayList<>();
pubSocket.sendMore(frame); frames.add(frame);
} else {
pubSocket.send(frame, 0);
continue;
}
// Receive remaining frames while (pullSocket.hasReceiveMore()) {
while (more) { byte[] next = pullSocket.recv(0);
frame = pullSocket.recv(0); if (next != null) {
more = pullSocket.hasReceiveMore(); frames.add(next);
if (more) {
pubSocket.sendMore(frame);
} else {
pubSocket.send(frame, 0);
} }
} }
LOG.debug("Forwarded notification to MARKET_DATA_PUB"); publishCallback.accept(frames.toArray(new byte[0][]));
LOG.debug("Enqueued notification ({} frames) for MARKET_DATA_PUB", frames.size());
} }
LOG.info("Notification forwarder loop stopped"); LOG.info("Notification forwarder loop stopped");

View File

@@ -64,8 +64,13 @@ public class HistoryNotificationFunction extends ProcessFunction<OHLCBatchWrappe
String status = batch.getStatus(); String status = batch.getStatus();
int rowCount = batch.getRowCount(); int rowCount = batch.getRowCount();
LOG.info("Processing OHLCBatch: request_id={}, status={}, rows={}", LOG.info("Processing OHLCBatch: request_id={}, status={}, rows={}, isLastPage={}",
requestId, status, rowCount); requestId, status, rowCount, batch.isLastPage());
// Intermediate pages: data is written to Iceberg but no notification yet
if (!batch.isLastPage()) {
return;
}
// Determine Iceberg table name based on period // Determine Iceberg table name based on period
String tableName = getIcebergTableName(ticker, periodSeconds); String tableName = getIcebergTableName(ticker, periodSeconds);

View File

@@ -87,7 +87,8 @@ public class OHLCBatchDeserializer implements DeserializationSchema<OHLCBatchWra
meta.getEndTime(), meta.getEndTime(),
status, status,
meta.hasErrorMessage() ? meta.getErrorMessage() : null, meta.hasErrorMessage() ? meta.getErrorMessage() : null,
rows rows,
meta.getIsLastPage()
); );
} }

View File

@@ -19,6 +19,7 @@ public class OHLCBatchWrapper implements Serializable {
private final String status; // OK, NOT_FOUND, ERROR private final String status; // OK, NOT_FOUND, ERROR
private final String errorMessage; private final String errorMessage;
private final List<OHLCRow> rows; private final List<OHLCRow> rows;
private final boolean isLastPage;
public OHLCBatchWrapper( public OHLCBatchWrapper(
String requestId, String requestId,
@@ -29,7 +30,8 @@ public class OHLCBatchWrapper implements Serializable {
long endTime, long endTime,
String status, String status,
String errorMessage, String errorMessage,
List<OHLCRow> rows List<OHLCRow> rows,
boolean isLastPage
) { ) {
this.requestId = requestId; this.requestId = requestId;
this.clientId = clientId; this.clientId = clientId;
@@ -40,6 +42,7 @@ public class OHLCBatchWrapper implements Serializable {
this.status = status; this.status = status;
this.errorMessage = errorMessage; this.errorMessage = errorMessage;
this.rows = rows; this.rows = rows;
this.isLastPage = isLastPage;
} }
public String getRequestId() { public String getRequestId() {
@@ -94,6 +97,10 @@ public class OHLCBatchWrapper implements Serializable {
return "OK".equals(status); return "OK".equals(status);
} }
public boolean isLastPage() {
return isLastPage;
}
@Override @Override
public String toString() { public String toString() {
return "OHLCBatchWrapper{" + return "OHLCBatchWrapper{" +
@@ -103,6 +110,7 @@ public class OHLCBatchWrapper implements Serializable {
", periodSeconds=" + periodSeconds + ", periodSeconds=" + periodSeconds +
", status='" + status + '\'' + ", status='" + status + '\'' +
", rowCount=" + getRowCount() + ", rowCount=" + getRowCount() +
", isLastPage=" + isLastPage +
'}'; '}';
} }

View File

@@ -0,0 +1,73 @@
package com.dexorder.flink.publisher;
import java.io.Serializable;
/**
* A single completed OHLC bar for a given ticker and period.
* Output type of RealtimeBarFunction, input type of RealtimeBarPublisher.
*/
public class RealtimeBar implements Serializable {
private static final long serialVersionUID = 1L;
private String ticker;
/** Period in seconds (e.g., 60, 300, 3600) */
private int periodSeconds;
/** Window start timestamp in milliseconds since epoch */
private long windowStartMs;
/** Scaled integer price values (same precision as source Tick) */
private long open;
private long high;
private long low;
private long close;
/** Summed base amount across ticks in this window */
private long volume;
/** Number of ticks in this window */
private int tickCount;
public RealtimeBar() {}
public RealtimeBar(String ticker, int periodSeconds, long windowStartMs,
long open, long high, long low, long close, long volume, int tickCount) {
this.ticker = ticker;
this.periodSeconds = periodSeconds;
this.windowStartMs = windowStartMs;
this.open = open;
this.high = high;
this.low = low;
this.close = close;
this.volume = volume;
this.tickCount = tickCount;
}
public String getTicker() { return ticker; }
public int getPeriodSeconds() { return periodSeconds; }
public long getWindowStartMs() { return windowStartMs; }
public long getOpen() { return open; }
public long getHigh() { return high; }
public long getLow() { return low; }
public long getClose() { return close; }
public long getVolume() { return volume; }
public int getTickCount() { return tickCount; }
public void setTicker(String ticker) { this.ticker = ticker; }
public void setPeriodSeconds(int periodSeconds) { this.periodSeconds = periodSeconds; }
public void setWindowStartMs(long windowStartMs) { this.windowStartMs = windowStartMs; }
public void setOpen(long open) { this.open = open; }
public void setHigh(long high) { this.high = high; }
public void setLow(long low) { this.low = low; }
public void setClose(long close) { this.close = close; }
public void setVolume(long volume) { this.volume = volume; }
public void setTickCount(int tickCount) { this.tickCount = tickCount; }
/** ZMQ topic for this bar: e.g., "BTC/USDT.BINANCE|ohlc:60" */
public String topic() {
return ticker + "|ohlc:" + periodSeconds;
}
@Override
public String toString() {
return "RealtimeBar{ticker='" + ticker + "', period=" + periodSeconds +
"s, windowStart=" + windowStartMs + ", O=" + open + " H=" + high +
" L=" + low + " C=" + close + ", ticks=" + tickCount + '}';
}
}

View File

@@ -0,0 +1,116 @@
package com.dexorder.flink.publisher;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Accumulates ticks into OHLC bars for each configured period.
*
* Keyed by ticker. Maintains per-period accumulators in MapState.
* Uses a "lazy boundary" approach: a new window is detected when a tick arrives after
* the previous window's end time (based on processing clock). The completed bar is
* emitted immediately when the boundary is crossed, so bars are delayed by at most
* one tick interval (~10s for realtime polling).
*
* Periods are configurable at construction time. All configured periods are computed
* for every ticker receiving ticks; the ZMQ publisher filters to active subscriptions.
*
* Accumulator layout (long[7]):
* [0] open
* [1] high
* [2] low
* [3] close
* [4] volume (sum of base amount)
* [5] windowStartMs (epoch ms)
* [6] tickCount
*/
public class RealtimeBarFunction extends RichFlatMapFunction<TickWrapper, RealtimeBar> {
private static final Logger LOG = LoggerFactory.getLogger(RealtimeBarFunction.class);
private static final long serialVersionUID = 1L;
private final int[] periods;
private transient MapState<Integer, long[]> accumState;
/**
* @param periods Period lengths in seconds (e.g., 60, 300, 900, 3600)
*/
public RealtimeBarFunction(int[] periods) {
this.periods = periods;
}
@Override
public void open(Configuration parameters) {
MapStateDescriptor<Integer, long[]> desc = new MapStateDescriptor<>(
"ohlcAccum",
BasicTypeInfo.INT_TYPE_INFO,
PrimitiveArrayTypeInfo.LONG_PRIMITIVE_ARRAY_TYPE_INFO
);
accumState = getRuntimeContext().getMapState(desc);
}
@Override
public void flatMap(TickWrapper tick, Collector<RealtimeBar> out) throws Exception {
if (tick == null) return;
long nowMs = System.currentTimeMillis();
for (int period : periods) {
long periodMs = period * 1000L;
long windowStart = (nowMs / periodMs) * periodMs;
long[] accum = accumState.get(period);
if (accum == null) {
// First tick for this period
accumState.put(period, openWindow(tick, windowStart));
} else if (accum[5] != windowStart) {
// Window boundary crossed — emit completed bar then start fresh
if (accum[6] > 0) {
out.collect(toBar(tick.getTicker(), period, accum));
LOG.debug("Emitted bar: ticker={}, period={}s, windowStart={}, ticks={}",
tick.getTicker(), period, accum[5], accum[6]);
}
accumState.put(period, openWindow(tick, windowStart));
} else {
// Same window — update
accum[1] = Math.max(accum[1], tick.getPrice()); // high
accum[2] = Math.min(accum[2], tick.getPrice()); // low
accum[3] = tick.getPrice(); // close
accum[4] += tick.getAmount(); // volume
accum[6]++; // tick count
accumState.put(period, accum);
}
}
}
private static long[] openWindow(TickWrapper tick, long windowStart) {
return new long[]{
tick.getPrice(), // open
tick.getPrice(), // high
tick.getPrice(), // low
tick.getPrice(), // close
tick.getAmount(), // volume
windowStart,
1L // tickCount
};
}
private static RealtimeBar toBar(String ticker, int period, long[] accum) {
return new RealtimeBar(
ticker, period,
accum[5], // windowStartMs
accum[0], accum[1], accum[2], accum[3], // O H L C
accum[4], // volume
(int) accum[6] // tickCount
);
}
}

View File

@@ -0,0 +1,91 @@
package com.dexorder.flink.publisher;
import com.dexorder.proto.OHLC;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.zeromq.SocketType;
import org.zeromq.ZContext;
import org.zeromq.ZMQ;
/**
* Flink sink that publishes completed realtime OHLC bars to clients.
*
* Connects a ZMQ PUSH socket to the job manager's notification PULL endpoint.
* The HistoryNotificationForwarder (already running on the job manager) receives these
* frames and enqueues them to RealtimeSubscriptionManager, which publishes them on
* the MARKET_DATA_PUB XPUB socket. Clients subscribed to the matching topic receive the bar.
*
* Wire format (matches HistoryNotificationPublisher):
* Frame 1: topic bytes (e.g., "BTC/USDT.BINANCE|ohlc:60")
* Frame 2: [0x01] (protocol version)
* Frame 3: [0x04][OHLC protobuf bytes] (type 0x04 = OHLC single bar)
*
* Parallelism MUST be 1 (same as the rest of the notification pipeline).
*/
public class RealtimeBarPublisher extends RichSinkFunction<RealtimeBar> {
private static final Logger LOG = LoggerFactory.getLogger(RealtimeBarPublisher.class);
private static final long serialVersionUID = 1L;
private static final byte PROTOCOL_VERSION = 0x01;
private static final byte MSG_TYPE_OHLC = 0x04;
private final String jobManagerPullEndpoint;
private transient ZContext context;
private transient ZMQ.Socket pushSocket;
public RealtimeBarPublisher(String jobManagerPullEndpoint) {
this.jobManagerPullEndpoint = jobManagerPullEndpoint;
}
@Override
public void open(Configuration parameters) {
context = new ZContext();
pushSocket = context.createSocket(SocketType.PUSH);
pushSocket.setLinger(1000);
pushSocket.setSndHWM(10000);
pushSocket.connect(jobManagerPullEndpoint);
LOG.info("RealtimeBarPublisher PUSH connected to {}", jobManagerPullEndpoint);
}
@Override
public void invoke(RealtimeBar bar, Context context) {
try {
// Build OHLC proto — timestamp in nanoseconds (bar uses ms, convert)
OHLC ohlc = OHLC.newBuilder()
.setTimestamp(bar.getWindowStartMs() * 1_000_000L) // ms → ns
.setTicker(bar.getTicker())
.setOpen(bar.getOpen())
.setHigh(bar.getHigh())
.setLow(bar.getLow())
.setClose(bar.getClose())
.setVolume(bar.getVolume())
.build();
byte[] protoBytes = ohlc.toByteArray();
byte[] messageFrame = new byte[1 + protoBytes.length];
messageFrame[0] = MSG_TYPE_OHLC;
System.arraycopy(protoBytes, 0, messageFrame, 1, protoBytes.length);
String topic = bar.topic();
pushSocket.sendMore(topic.getBytes(ZMQ.CHARSET));
pushSocket.sendMore(new byte[]{PROTOCOL_VERSION});
pushSocket.send(messageFrame, 0);
LOG.debug("Published realtime bar: topic={}, ticks={}", topic, bar.getTickCount());
} catch (Exception e) {
LOG.error("Failed to publish realtime bar: ticker={}, period={}",
bar.getTicker(), bar.getPeriodSeconds(), e);
}
}
@Override
public void close() {
if (pushSocket != null) pushSocket.close();
if (context != null) context.close();
LOG.info("RealtimeBarPublisher closed");
}
}

View File

@@ -0,0 +1,69 @@
package com.dexorder.flink.publisher;
import com.dexorder.proto.Tick;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* Kafka deserializer for Tick protobuf messages from the market-tick topic.
*
* Wire format: [0x01 version][0x03 TICK type][Tick protobuf bytes]
*/
public class TickDeserializer implements DeserializationSchema<TickWrapper> {
private static final Logger LOG = LoggerFactory.getLogger(TickDeserializer.class);
private static final long serialVersionUID = 1L;
private static final byte PROTOCOL_VERSION = 0x01;
private static final byte MSG_TYPE_TICK = 0x03;
@Override
public TickWrapper deserialize(byte[] message) throws IOException {
try {
if (message.length < 2) {
throw new IOException("Message too short: " + message.length + " bytes");
}
if (message[0] != PROTOCOL_VERSION) {
throw new IOException("Unsupported protocol version: 0x" + Integer.toHexString(message[0] & 0xFF));
}
if (message[1] != MSG_TYPE_TICK) {
throw new IOException("Unexpected message type: 0x" + Integer.toHexString(message[1] & 0xFF));
}
byte[] payload = new byte[message.length - 2];
System.arraycopy(message, 2, payload, 0, payload.length);
Tick tick = Tick.parseFrom(payload);
return new TickWrapper(
tick.getTicker(),
tick.getTradeId(),
tick.getTimestamp(),
tick.getPrice(),
tick.getAmount(),
tick.getQuoteAmount(),
tick.getTakerBuy()
);
} catch (Exception e) {
LOG.warn("Failed to deserialize Tick, skipping: {}", e.getMessage());
// Return null; Flink's KafkaSource skips nulls via filter
return null;
}
}
@Override
public boolean isEndOfStream(TickWrapper nextElement) {
return false;
}
@Override
public TypeInformation<TickWrapper> getProducedType() {
return TypeInformation.of(TickWrapper.class);
}
}

View File

@@ -0,0 +1,58 @@
package com.dexorder.flink.publisher;
import java.io.Serializable;
/**
* Flink-serializable wrapper for a single Tick.
* Fields mirror the Tick protobuf, using primitives to avoid proto-class serialization issues.
*/
public class TickWrapper implements Serializable {
private static final long serialVersionUID = 1L;
private String ticker;
private String tradeId;
/** Timestamp in nanoseconds since epoch */
private long timestamp;
/** Price as scaled integer */
private long price;
/** Base amount as scaled integer */
private long amount;
/** Quote amount as scaled integer */
private long quoteAmount;
private boolean takerBuy;
public TickWrapper() {}
public TickWrapper(String ticker, String tradeId, long timestamp,
long price, long amount, long quoteAmount, boolean takerBuy) {
this.ticker = ticker;
this.tradeId = tradeId;
this.timestamp = timestamp;
this.price = price;
this.amount = amount;
this.quoteAmount = quoteAmount;
this.takerBuy = takerBuy;
}
public String getTicker() { return ticker; }
public String getTradeId() { return tradeId; }
public long getTimestamp() { return timestamp; }
public long getPrice() { return price; }
public long getAmount() { return amount; }
public long getQuoteAmount() { return quoteAmount; }
public boolean isTakerBuy() { return takerBuy; }
public void setTicker(String ticker) { this.ticker = ticker; }
public void setTradeId(String tradeId) { this.tradeId = tradeId; }
public void setTimestamp(long timestamp) { this.timestamp = timestamp; }
public void setPrice(long price) { this.price = price; }
public void setAmount(long amount) { this.amount = amount; }
public void setQuoteAmount(long quoteAmount) { this.quoteAmount = quoteAmount; }
public void setTakerBuy(boolean takerBuy) { this.takerBuy = takerBuy; }
@Override
public String toString() {
return "TickWrapper{ticker='" + ticker + "', tradeId='" + tradeId +
"', timestamp=" + timestamp + ", price=" + price + '}';
}
}

View File

@@ -13,7 +13,14 @@ import java.util.Map;
/** /**
* Manages all ZeroMQ channels for the Flink application. * Manages all ZeroMQ channels for the Flink application.
* Each channel is bound to a specific port and socket type. *
* Port layout:
* 5558 XPUB MARKET_DATA_PUB — market data + notifications to clients (via relay XSUB)
* XPUB exposes subscription frames so Flink can detect
* which realtime topics clients are interested in.
* 5561 PULL (internal) — task manager → job manager notifications (unchanged)
* 5566 PULL CLIENT_REQUEST — receives forwarded SubmitHistoricalRequest from relay PUSH
* 5567 ROUTER INGESTOR_BROKER — exclusive work queue; ingestors connect with DEALER
*/ */
public class ZmqChannelManager implements Closeable { public class ZmqChannelManager implements Closeable {
private static final Logger LOG = LoggerFactory.getLogger(ZmqChannelManager.class); private static final Logger LOG = LoggerFactory.getLogger(ZmqChannelManager.class);
@@ -23,8 +30,9 @@ public class ZmqChannelManager implements Closeable {
private final AppConfig config; private final AppConfig config;
public enum Channel { public enum Channel {
INGESTOR_WORK_QUEUE,
MARKET_DATA_PUB, MARKET_DATA_PUB,
CLIENT_REQUEST,
INGESTOR_BROKER,
} }
public ZmqChannelManager(AppConfig config) { public ZmqChannelManager(AppConfig config) {
@@ -41,20 +49,33 @@ public class ZmqChannelManager implements Closeable {
LOG.info("Initializing ZeroMQ channels on {}", bindAddress); LOG.info("Initializing ZeroMQ channels on {}", bindAddress);
// 1. Ingestor Work Queue - PUB socket for topic-based work distribution (exchange prefix filtering) // 1. Market Data Publication — XPUB so subscription events are visible to Flink
// Relay's XSUB connects here to proxy data to clients.
// Subscription frames from relay (forwarded from clients) arrive as readable messages.
ZMQ.Socket marketDataSocket = context.createSocket(SocketType.XPUB);
marketDataSocket.setXpubVerbose(true); // emit every sub/unsub, not just first/last
marketDataSocket.setLinger(1000);
marketDataSocket.setSndHWM(10000);
marketDataSocket.setRcvHWM(10000);
String marketDataEndpoint = bindAddress + ":" + config.getMarketDataPubPort();
marketDataSocket.bind(marketDataEndpoint);
sockets.put(Channel.MARKET_DATA_PUB.name(), marketDataSocket);
LOG.info("Bound Market Data Publication (XPUB) to {}", marketDataEndpoint);
// 2. Client Request Pull — receives SubmitHistoricalRequest forwarded by relay PUSH
createAndBind( createAndBind(
Channel.INGESTOR_WORK_QUEUE, Channel.CLIENT_REQUEST,
SocketType.PUB, SocketType.PULL,
bindAddress + ":" + config.getIngestorWorkQueuePort(), bindAddress + ":" + config.getFlinkRequestPullPort(),
"Ingestor Work Queue (PUB)" "Client Request (PULL)"
); );
// 2. Market Data Publication - PUB socket for market data streaming and HistoryReadyNotification // 3. Ingestor Broker — ROUTER for exclusive work dispatch to ingestor DEALER workers
createAndBind( createAndBind(
Channel.MARKET_DATA_PUB, Channel.INGESTOR_BROKER,
SocketType.PUB, SocketType.ROUTER,
bindAddress + ":" + config.getMarketDataPubPort(), bindAddress + ":" + config.getIngestorBrokerPort(),
"Market Data Publication (PUB)" "Ingestor Broker (ROUTER)"
); );
LOG.info("All ZeroMQ channels initialized successfully"); LOG.info("All ZeroMQ channels initialized successfully");
@@ -63,15 +84,10 @@ public class ZmqChannelManager implements Closeable {
private void createAndBind(Channel channel, SocketType socketType, String endpoint, String description) { private void createAndBind(Channel channel, SocketType socketType, String endpoint, String description) {
try { try {
ZMQ.Socket socket = context.createSocket(socketType); ZMQ.Socket socket = context.createSocket(socketType);
socket.setLinger(1000);
// Set socket options socket.setSndHWM(10000);
socket.setLinger(1000); // 1 second linger on close socket.setRcvHWM(10000);
socket.setSndHWM(10000); // High water mark for outbound messages
socket.setRcvHWM(10000); // High water mark for inbound messages
// Bind the socket
socket.bind(endpoint); socket.bind(endpoint);
sockets.put(channel.name(), socket); sockets.put(channel.name(), socket);
LOG.info("Bound {} to {}", description, endpoint); LOG.info("Bound {} to {}", description, endpoint);
} catch (Exception e) { } catch (Exception e) {
@@ -80,6 +96,13 @@ public class ZmqChannelManager implements Closeable {
} }
} }
/**
* Create a ZMQ Poller backed by this manager's context.
*/
public ZMQ.Poller createPoller(int size) {
return context.getContext().poller(size);
}
/** /**
* Get a socket by channel type. * Get a socket by channel type.
*/ */
@@ -92,18 +115,11 @@ public class ZmqChannelManager implements Closeable {
} }
/** /**
* Send a message on the specified channel. * Send a message on a channel (no topic prefix — for PULL/PUSH or direct sends).
*
* @param channel The channel to send on
* @param versionByte Protocol version byte
* @param messageTypeByte Message type ID byte
* @param protobufData Serialized protobuf message
* @return true if sent successfully
*/ */
public boolean sendMessage(Channel channel, byte versionByte, byte messageTypeByte, byte[] protobufData) { public boolean sendMessage(Channel channel, byte versionByte, byte messageTypeByte, byte[] protobufData) {
ZMQ.Socket socket = getSocket(channel); ZMQ.Socket socket = getSocket(channel);
// Send as two frames: [version byte] [type byte + protobuf data]
boolean sentFrame1 = socket.send(new byte[]{versionByte}, ZMQ.SNDMORE); boolean sentFrame1 = socket.send(new byte[]{versionByte}, ZMQ.SNDMORE);
if (!sentFrame1) { if (!sentFrame1) {
LOG.error("Failed to send version frame on channel {}", channel); LOG.error("Failed to send version frame on channel {}", channel);
@@ -124,27 +140,18 @@ public class ZmqChannelManager implements Closeable {
} }
/** /**
* Send a message with a topic prefix (for PUB sockets). * Send a topic-prefixed message (for XPUB market data publishing).
* * Frame layout: [topic][version][type+payload]
* @param channel The channel to send on
* @param topic Topic string for subscription filtering
* @param versionByte Protocol version byte
* @param messageTypeByte Message type ID byte
* @param protobufData Serialized protobuf message
* @return true if sent successfully
*/ */
public boolean sendTopicMessage(Channel channel, String topic, byte versionByte, byte messageTypeByte, byte[] protobufData) { public boolean sendTopicMessage(Channel channel, String topic, byte versionByte, byte messageTypeByte, byte[] protobufData) {
ZMQ.Socket socket = getSocket(channel); ZMQ.Socket socket = getSocket(channel);
// Send as three frames: [topic] [version byte] [type byte + protobuf data] if (!socket.send(topic.getBytes(ZMQ.CHARSET), ZMQ.SNDMORE)) {
boolean sentTopic = socket.send(topic.getBytes(ZMQ.CHARSET), ZMQ.SNDMORE);
if (!sentTopic) {
LOG.error("Failed to send topic frame on channel {}", channel); LOG.error("Failed to send topic frame on channel {}", channel);
return false; return false;
} }
boolean sentFrame1 = socket.send(new byte[]{versionByte}, ZMQ.SNDMORE); if (!socket.send(new byte[]{versionByte}, ZMQ.SNDMORE)) {
if (!sentFrame1) {
LOG.error("Failed to send version frame on channel {}", channel); LOG.error("Failed to send version frame on channel {}", channel);
return false; return false;
} }
@@ -153,8 +160,7 @@ public class ZmqChannelManager implements Closeable {
frame2[0] = messageTypeByte; frame2[0] = messageTypeByte;
System.arraycopy(protobufData, 0, frame2, 1, protobufData.length); System.arraycopy(protobufData, 0, frame2, 1, protobufData.length);
boolean sentFrame2 = socket.send(frame2, 0); if (!socket.send(frame2, 0)) {
if (!sentFrame2) {
LOG.error("Failed to send message frame on channel {}", channel); LOG.error("Failed to send message frame on channel {}", channel);
return false; return false;
} }
@@ -162,6 +168,24 @@ public class ZmqChannelManager implements Closeable {
return true; return true;
} }
/**
* Send a targeted message to a specific DEALER worker via ROUTER.
* Frame layout: [identity][empty][version][type+payload]
*/
public boolean sendToWorker(byte[] identity, byte versionByte, byte messageTypeByte, byte[] protobufData) {
ZMQ.Socket socket = getSocket(Channel.INGESTOR_BROKER);
if (!socket.send(identity, ZMQ.SNDMORE)) return false;
if (!socket.send(new byte[0], ZMQ.SNDMORE)) return false;
if (!socket.send(new byte[]{versionByte}, ZMQ.SNDMORE)) return false;
byte[] frame = new byte[1 + protobufData.length];
frame[0] = messageTypeByte;
System.arraycopy(protobufData, 0, frame, 1, protobufData.length);
return socket.send(frame, 0);
}
@Override @Override
public void close() { public void close() {
LOG.info("Closing ZeroMQ channels"); LOG.info("Closing ZeroMQ channels");

View File

@@ -72,7 +72,7 @@ export class Authenticator {
); );
} }
const sessionId = `ws_${userId}_${Date.now()}`; const sessionId = `ws_${userId}`;
return { return {
authContext: { authContext: {

View File

@@ -2,12 +2,14 @@ import type { FastifyInstance, FastifyRequest } from 'fastify';
import type { WebSocket } from '@fastify/websocket'; import type { WebSocket } from '@fastify/websocket';
import type { Authenticator } from '../auth/authenticator.js'; import type { Authenticator } from '../auth/authenticator.js';
import type { AgentHarness, HarnessFactory } from '../harness/agent-harness.js'; import type { AgentHarness, HarnessFactory } from '../harness/agent-harness.js';
import type { HarnessEvent } from '../harness/harness-events.js';
import type { InboundMessage } from '../types/messages.js'; import type { InboundMessage } from '../types/messages.js';
import { randomUUID } from 'crypto'; import { randomUUID } from 'crypto';
import type { SessionRegistry, EventSubscriber, Session } from '../events/index.js'; import type { SessionRegistry, EventSubscriber, Session } from '../events/index.js';
import type { OHLCService } from '../services/ohlc-service.js'; import type { OHLCService, BarUpdateCallback } from '../services/ohlc-service.js';
import type { SymbolIndexService } from '../services/symbol-index-service.js'; import type { SymbolIndexService } from '../services/symbol-index-service.js';
import type { ContainerManager } from '../k8s/container-manager.js'; import type { ContainerManager } from '../k8s/container-manager.js';
import type { ConversationService } from '../services/conversation-service.js';
import { import {
WorkspaceManager, WorkspaceManager,
ContainerSync, ContainerSync,
@@ -42,6 +44,7 @@ export interface WebSocketHandlerConfig {
createHarness: HarnessFactory; createHarness: HarnessFactory;
ohlcService?: OHLCService; // Optional for historical data support ohlcService?: OHLCService; // Optional for historical data support
symbolIndexService?: SymbolIndexService; // Optional for symbol search symbolIndexService?: SymbolIndexService; // Optional for symbol search
conversationService?: ConversationService; // Optional for history replay on reconnect
} }
/** /**
@@ -50,10 +53,18 @@ export interface WebSocketHandlerConfig {
* Handles WebSocket connections for chat and integrates with the event system * Handles WebSocket connections for chat and integrates with the event system
* for container-to-client notifications. * for container-to-client notifications.
*/ */
interface BarSubscription {
ticker: string;
periodSeconds: number;
callback: BarUpdateCallback;
}
export class WebSocketHandler { export class WebSocketHandler {
private config: WebSocketHandlerConfig; private config: WebSocketHandlerConfig;
private harnesses = new Map<string, AgentHarness>(); private harnesses = new Map<string, AgentHarness>();
private workspaces = new Map<string, WorkspaceManager>(); private workspaces = new Map<string, WorkspaceManager>();
/** Per-session realtime bar subscriptions for cleanup on disconnect */
private barSubscriptions = new Map<string, BarSubscription[]>();
constructor(config: WebSocketHandlerConfig) { constructor(config: WebSocketHandlerConfig) {
this.config = config; this.config = config;
@@ -106,17 +117,22 @@ export class WebSocketHandler {
// If container is spinning up, wait for it to be ready before continuing // If container is spinning up, wait for it to be ready before continuing
if (isSpinningUp) { if (isSpinningUp) {
sendStatus(socket, 'spinning_up', 'Your workspace is starting up, please wait...'); sendStatus(socket, 'spinning_up', 'Your personal agent is starting up, please wait...');
const startupPingInterval = setInterval(() => {
if (socket.readyState === 1) socket.ping();
}, 10000);
const ready = await this.config.containerManager.waitForContainerReady(authContext.userId, 120000); const ready = await this.config.containerManager.waitForContainerReady(authContext.userId, 120000);
clearInterval(startupPingInterval);
if (!ready) { if (!ready) {
logger.warn({ userId: authContext.userId }, 'Container failed to become ready within timeout'); logger.warn({ userId: authContext.userId }, 'Sandbox failed to become ready within timeout');
socket.send(JSON.stringify({ type: 'error', message: 'Workspace failed to start. Please try again later.' })); socket.send(JSON.stringify({ type: 'error', message: 'Agent workspace failed to start. Please try again later.' }));
socket.close(1011, 'Container startup timeout'); socket.close(1011, 'Container startup timeout');
return; return;
} }
logger.info({ userId: authContext.userId }, 'Container is ready, proceeding with session setup'); logger.info({ userId: authContext.userId }, 'Sandbox is ready, proceeding with session setup');
} }
sendStatus(socket, 'initializing', 'Starting your workspace...'); sendStatus(socket, 'initializing', 'Starting your workspace...');
@@ -241,6 +257,17 @@ export class WebSocketHandler {
}) })
); );
// Replay conversation history so the UI pre-populates on reconnect
if (this.config.conversationService) {
const history = await this.config.conversationService.getHistory(
authContext.userId,
authContext.sessionId
);
if (history.length > 0) {
socket.send(JSON.stringify({ type: 'conversation_history', messages: history }));
}
}
// Handle messages // Handle messages
socket.on('message', async (data: Buffer) => { socket.on('message', async (data: Buffer) => {
try { try {
@@ -266,15 +293,45 @@ export class WebSocketHandler {
return; return;
} }
// Chunks are streamed via channelAdapter.sendChunk() during handleMessage
try { try {
// Acknowledge receipt immediately so the client can show the seen indicator // Acknowledge receipt immediately so the client can show the seen indicator
socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false })); socket.send(JSON.stringify({ type: 'agent_chunk', content: '', done: false }));
logger.info('Calling harness.handleMessage'); logger.info('Streaming harness response');
await harness.handleMessage(inboundMessage); let fatalError = false;
for await (const event of harness.streamMessage(inboundMessage)) {
const e = event as HarnessEvent;
switch (e.type) {
case 'chunk':
socket.send(JSON.stringify({ type: 'agent_chunk', content: e.content, done: false }));
break;
case 'tool_call':
socket.send(JSON.stringify({ type: 'agent_tool_call', toolName: e.toolName, label: e.label }));
break;
case 'subagent_tool_call':
socket.send(JSON.stringify({ type: 'subagent_tool_call', agentName: e.agentName, toolName: e.toolName, label: e.label }));
break;
case 'subagent_chunk':
socket.send(JSON.stringify({ type: 'subagent_chunk', agentName: e.agentName, content: e.content }));
break;
case 'image':
socket.send(JSON.stringify({ type: 'image', data: e.data, mimeType: e.mimeType, caption: e.caption }));
break;
case 'error':
socket.send(JSON.stringify({ type: 'text', text: `An unrecoverable error occurred in the ${e.source}.` }));
if (e.fatal) fatalError = true;
break;
case 'done':
break;
}
}
// Send done marker after all chunks have been streamed if (fatalError) {
socket.close(1011, 'Fatal error');
return;
}
// Send done marker after all events have been streamed
logger.debug('Sending done marker to client'); logger.debug('Sending done marker to client');
socket.send( socket.send(
JSON.stringify({ JSON.stringify({
@@ -332,6 +389,17 @@ export class WebSocketHandler {
await this.config.eventSubscriber.onSessionDisconnect(removedSession); await this.config.eventSubscriber.onSessionDisconnect(removedSession);
} }
// Cleanup realtime bar subscriptions
const sessionId = authContext.sessionId;
const subs = this.barSubscriptions.get(sessionId);
if (subs && this.config.ohlcService) {
for (const { ticker, periodSeconds, callback } of subs) {
this.config.ohlcService.unsubscribeFromTicker(ticker, periodSeconds, callback);
}
this.barSubscriptions.delete(sessionId);
logger.info({ sessionId, count: subs.length }, 'Cleaned up realtime bar subscriptions');
}
// Cleanup workspace // Cleanup workspace
await workspace!.shutdown(); await workspace!.shutdown();
this.workspaces.delete(authContext.sessionId); this.workspaces.delete(authContext.sessionId);
@@ -356,6 +424,7 @@ export class WebSocketHandler {
}, 30000); }, 30000);
} catch (error) { } catch (error) {
logger.error({ error }, 'Failed to initialize session'); logger.error({ error }, 'Failed to initialize session');
socket.send(JSON.stringify({ type: 'text', text: 'An unrecoverable error occurred in the agent harness.' }));
socket.close(1011, 'Internal server error'); socket.close(1011, 'Internal server error');
if (workspace) { if (workspace) {
await workspace.shutdown(); await workspace.shutdown();
@@ -527,19 +596,92 @@ export class WebSocketHandler {
break; break;
} }
case 'subscribe_bars': case 'subscribe_bars': {
case 'unsubscribe_bars': if (!ohlcService || !authContext) {
// TODO: Implement real-time subscriptions socket.send(JSON.stringify({
socket.send( type: 'subscribe_bars_response',
JSON.stringify({
type: `${payload.type}_response`,
request_id: requestId, request_id: requestId,
subscription_id: payload.subscription_id, subscription_id: payload.subscription_id,
success: false, success: false,
message: 'Real-time subscriptions not yet implemented', message: 'Realtime service not available',
}) }));
); break;
}
const subTicker: string = payload.symbol;
const subPeriod: number = payload.period_seconds ?? payload.resolution ?? 60;
const sessionId = authContext.sessionId;
// Create a per-subscription callback that forwards bars to this socket
const barCallback: BarUpdateCallback = (bar) => {
if (socket.readyState !== 1 /* OPEN */) return;
socket.send(JSON.stringify({
type: 'bar_update',
subscription_id: payload.subscription_id,
ticker: bar.ticker,
period_seconds: bar.periodSeconds,
bar: {
// Convert nanoseconds → seconds for client compatibility
time: Number(bar.timestamp / 1_000_000_000n),
open: bar.open,
high: bar.high,
low: bar.low,
close: bar.close,
volume: bar.volume,
},
}));
};
ohlcService.subscribeToTicker(subTicker, subPeriod, barCallback);
// Track for cleanup on disconnect
if (!this.barSubscriptions.has(sessionId)) {
this.barSubscriptions.set(sessionId, []);
}
this.barSubscriptions.get(sessionId)!.push({
ticker: subTicker,
periodSeconds: subPeriod,
callback: barCallback,
});
logger.info({ sessionId, ticker: subTicker, period: subPeriod }, 'Subscribed to realtime bars');
socket.send(JSON.stringify({
type: 'subscribe_bars_response',
request_id: requestId,
subscription_id: payload.subscription_id,
success: true,
}));
break; break;
}
case 'unsubscribe_bars': {
if (!ohlcService || !authContext) break;
const unsubTicker: string = payload.symbol;
const unsubPeriod: number = payload.period_seconds ?? payload.resolution ?? 60;
const sessionId = authContext.sessionId;
const subs = this.barSubscriptions.get(sessionId);
if (subs) {
const idx = subs.findIndex(
s => s.ticker === unsubTicker && s.periodSeconds === unsubPeriod
);
if (idx >= 0) {
const [removed] = subs.splice(idx, 1);
ohlcService.unsubscribeFromTicker(unsubTicker, unsubPeriod, removed.callback);
logger.info({ sessionId, ticker: unsubTicker, period: unsubPeriod }, 'Unsubscribed from realtime bars');
}
}
socket.send(JSON.stringify({
type: 'unsubscribe_bars_response',
request_id: requestId,
subscription_id: payload.subscription_id,
success: true,
}));
break;
}
case 'evaluate_indicator': { case 'evaluate_indicator': {
// Direct MCP call — bypasses the agent/LLM for performance // Direct MCP call — bypasses the agent/LLM for performance

View File

@@ -632,6 +632,118 @@ export class DuckDBClient {
} }
} }
/**
* Append a batch of image/audio blobs as a Parquet file in S3.
* Called once per assistant turn that produces binary output.
*/
async appendBlobs(
userId: string,
sessionId: string,
messageId: string,
blobs: Array<{
id: string;
user_id: string;
session_id: string;
message_id: string;
blob_type: string;
mime_type: string;
data: string;
caption: string | null;
timestamp: number;
}>
): Promise<void> {
await this.initialize();
if (!this.conversationsBucket || blobs.length === 0) {
return;
}
const now = new Date();
const year = now.getUTCFullYear();
const month = String(now.getUTCMonth() + 1).padStart(2, '0');
const s3Path = `s3://${this.conversationsBucket}/gateway/blobs/year=${year}/month=${month}/user_id=${userId}/${sessionId}_${messageId}.parquet`;
const tempTable = `blob_flush_${Date.now()}`;
try {
await this.query(`
CREATE TEMP TABLE ${tempTable} (
id VARCHAR,
user_id VARCHAR,
session_id VARCHAR,
message_id VARCHAR,
blob_type VARCHAR,
mime_type VARCHAR,
data VARCHAR,
caption VARCHAR,
timestamp BIGINT
)
`);
for (const blob of blobs) {
await this.query(
`INSERT INTO ${tempTable} VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[blob.id, blob.user_id, blob.session_id, blob.message_id, blob.blob_type, blob.mime_type, blob.data, blob.caption, blob.timestamp]
);
}
await this.query(`COPY ${tempTable} TO '${s3Path}' (FORMAT PARQUET)`);
this.logger.info({ userId, sessionId, messageId, count: blobs.length, s3Path }, 'Blobs flushed to Parquet');
} finally {
await this.query(`DROP TABLE IF EXISTS ${tempTable}`).catch(() => {});
}
}
/**
* Query blobs from S3 by userId/sessionId, optionally filtered to specific blob IDs.
*/
async queryBlobs(
userId: string,
sessionId: string,
blobIds?: string[]
): Promise<any[]> {
await this.initialize();
try {
const tablePath = await this.getTablePath(this.namespace, 'blobs', this.catalogUri);
if (!tablePath) {
// Fallback: scan per-turn Parquet files written directly to S3
if (this.conversationsBucket) {
this.logger.debug({ userId, sessionId }, 'REST catalog miss, scanning blob Parquet files');
const parquetPath = `s3://${this.conversationsBucket}/gateway/blobs/**/user_id=${userId}/${sessionId}_*.parquet`;
const idClause = blobIds?.length
? `WHERE id IN (${blobIds.map(id => `'${id.replace(/'/g, "''")}'`).join(', ')})`
: '';
try {
return await this.query(`SELECT * FROM read_parquet('${parquetPath}') ${idClause} ORDER BY timestamp ASC`);
} catch {
// No blobs yet for this session
}
}
return [];
}
const idFilter = blobIds?.length
? `AND id IN (${blobIds.map(() => '?').join(', ')})`
: '';
const params: any[] = [userId, sessionId, ...(blobIds ?? [])];
const sql = `
SELECT id, user_id, session_id, message_id, blob_type, mime_type, data, caption, timestamp
FROM iceberg_scan('${tablePath}')
WHERE user_id = ? AND session_id = ? ${idFilter}
ORDER BY timestamp ASC
`;
const rows = await this.query(sql, params);
this.logger.info({ userId, sessionId, count: rows.length }, 'Loaded blobs from Iceberg');
return rows.map((row: any) => ({ ...row, timestamp: Number(row.timestamp) }));
} catch (error: any) {
this.logger.error({ error: error.message, userId, sessionId }, 'Failed to query blobs');
return [];
}
}
/** /**
* Close the DuckDB connection * Close the DuckDB connection
*/ */

View File

@@ -45,6 +45,21 @@ export interface IcebergMessage {
timestamp: number; // nanoseconds timestamp: number; // nanoseconds
} }
/**
* Blob record for Iceberg storage (images, audio, etc.)
*/
export interface IcebergBlob {
id: string;
user_id: string;
session_id: string;
message_id: string;
blob_type: string;
mime_type: string;
data: string; // base64
caption: string | null;
timestamp: number; // microseconds
}
/** /**
* Checkpoint record for Iceberg storage * Checkpoint record for Iceberg storage
*/ */
@@ -153,6 +168,25 @@ export class IcebergClient {
return this.duckdb.appendMessages(userId, sessionId, messages); return this.duckdb.appendMessages(userId, sessionId, messages);
} }
/**
* Append blobs for one assistant turn as a Parquet file in S3.
*/
async appendBlobs(
userId: string,
sessionId: string,
messageId: string,
blobs: IcebergBlob[]
): Promise<void> {
return this.duckdb.appendBlobs(userId, sessionId, messageId, blobs);
}
/**
* Query blobs from S3/Iceberg, optionally filtered to specific blob IDs.
*/
async queryBlobs(userId: string, sessionId: string, blobIds?: string[]): Promise<IcebergBlob[]> {
return this.duckdb.queryBlobs(userId, sessionId, blobIds);
}
/** /**
* Get table metadata * Get table metadata
*/ */

View File

@@ -298,6 +298,13 @@ export class QdrantClient {
pointsCount: info.points_count || 0, pointsCount: info.points_count || 0,
}; };
} catch (error) { } catch (error) {
// If the collection was lost (e.g. Qdrant restarted without the gateway restarting),
// recreate it and return zeroed stats rather than propagating the error.
if ((error as any)?.status === 404) {
this.logger.warn({ collection: this.collectionName }, 'Collection missing, recreating...');
await this.initialize();
return { vectorsCount: 0, indexedVectorsCount: 0, pointsCount: 0 };
}
this.logger.error({ error }, 'Failed to get collection info'); this.logger.error({ error }, 'Failed to get collection info');
throw error; throw error;
} }

View File

@@ -20,6 +20,22 @@ import type {
NotificationStatus, NotificationStatus,
} from '../types/ohlc.js'; } from '../types/ohlc.js';
export const OHLC_BAR_TOPIC_PATTERN = /^(.+)\|ohlc:(\d+)$/;
/** Decoded realtime OHLC bar received from the XPUB market data stream */
export interface RealtimeBar {
topic: string; // e.g., "BTC/USDT.BINANCE|ohlc:60"
ticker: string; // e.g., "BTC/USDT.BINANCE"
periodSeconds: number;
/** Window open time in nanoseconds since epoch */
timestamp: bigint;
open: number;
high: number;
low: number;
close: number;
volume: number;
}
const __filename = fileURLToPath(import.meta.url); const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename); const __dirname = dirname(__filename);
@@ -39,14 +55,17 @@ export enum MessageType {
const protoDir = join(__dirname, '../..', 'protobuf'); const protoDir = join(__dirname, '../..', 'protobuf');
const root = new protobuf.Root(); const root = new protobuf.Root();
// Load proto file and parse it // Load proto files
const ingestorProto = readFileSync(join(protoDir, 'ingestor.proto'), 'utf8'); const ingestorProto = readFileSync(join(protoDir, 'ingestor.proto'), 'utf8');
const ohlcProto = readFileSync(join(protoDir, 'ohlc.proto'), 'utf8');
protobuf.parse(ingestorProto, root); protobuf.parse(ingestorProto, root);
protobuf.parse(ohlcProto, root);
// Export message types // Export message types
const SubmitHistoricalRequestType = root.lookupType('SubmitHistoricalRequest'); const SubmitHistoricalRequestType = root.lookupType('SubmitHistoricalRequest');
const SubmitResponseType = root.lookupType('SubmitResponse'); const SubmitResponseType = root.lookupType('SubmitResponse');
const HistoryReadyNotificationType = root.lookupType('HistoryReadyNotification'); const HistoryReadyNotificationType = root.lookupType('HistoryReadyNotification');
const OHLCType = root.lookupType('OHLC');
/** /**
* Encode SubmitHistoricalRequest to ZMQ frames * Encode SubmitHistoricalRequest to ZMQ frames
@@ -178,3 +197,39 @@ export function decodeHistoryReadyNotification(frames: Buffer[]): HistoryReadyNo
completed_at: BigInt(payload.completedAt), completed_at: BigInt(payload.completedAt),
}; };
} }
/**
* Decode a realtime OHLC bar from ZMQ SUB frames.
* Frame layout: [topic][version][0x04 OHLC type + OHLC protobuf bytes]
*
* Returns null if the topic doesn't match the realtime bar pattern or decoding fails.
*/
export function decodeRealtimeBar(frames: Buffer[]): RealtimeBar | null {
if (frames.length < 3) return null;
const topic = frames[0].toString();
const match = OHLC_BAR_TOPIC_PATTERN.exec(topic);
if (!match) return null;
const ticker = match[1];
const periodSeconds = parseInt(match[2], 10);
const messageFrame = frames[2];
if (messageFrame[0] !== 0x04) return null; // Must be OHLC type
const payloadBuffer = messageFrame.slice(1);
const decoded = OHLCType.decode(payloadBuffer);
const ohlc = OHLCType.toObject(decoded, { longs: String, defaults: true });
return {
topic,
ticker,
periodSeconds,
timestamp: BigInt(ohlc.timestamp ?? '0'),
open: Number(ohlc.open ?? 0),
high: Number(ohlc.high ?? 0),
low: Number(ohlc.low ?? 0),
close: Number(ohlc.close ?? 0),
volume: Number(ohlc.volume ?? 0),
};
}

View File

@@ -17,6 +17,9 @@ import {
encodeSubmitHistoricalRequest, encodeSubmitHistoricalRequest,
decodeSubmitResponse, decodeSubmitResponse,
decodeHistoryReadyNotification, decodeHistoryReadyNotification,
decodeRealtimeBar,
OHLC_BAR_TOPIC_PATTERN,
type RealtimeBar,
} from './zmq-protocol.js'; } from './zmq-protocol.js';
import type { import type {
SubmitHistoricalRequest, SubmitHistoricalRequest,
@@ -27,6 +30,9 @@ import {
NotificationStatus, NotificationStatus,
} from '../types/ohlc.js'; } from '../types/ohlc.js';
export type BarUpdateCallback = (bar: RealtimeBar) => void;
export type { RealtimeBar };
export interface ZMQRelayConfig { export interface ZMQRelayConfig {
relayRequestEndpoint: string; // e.g., "tcp://relay:5559" relayRequestEndpoint: string; // e.g., "tcp://relay:5559"
relayNotificationEndpoint: string; // e.g., "tcp://relay:5558" relayNotificationEndpoint: string; // e.g., "tcp://relay:5558"
@@ -57,6 +63,12 @@ export class ZMQRelayClient {
private notificationTopic: string; private notificationTopic: string;
private pendingRequests: Map<string, PendingRequest> = new Map(); private pendingRequests: Map<string, PendingRequest> = new Map();
/** Ref count per ZMQ topic (gateway-level dedup before ZMQ subscribe/unsubscribe) */
private topicRefs: Map<string, number> = new Map();
/** Callbacks registered by WebSocket sessions for realtime bar updates */
private barCallbacks: Map<string, Set<BarUpdateCallback>> = new Map();
private connected = false; private connected = false;
private notificationListenerRunning = false; private notificationListenerRunning = false;
@@ -253,8 +265,6 @@ export class ZMQRelayClient {
// Handle metadata update notifications // Handle metadata update notifications
if (topic === 'METADATA_UPDATE') { if (topic === 'METADATA_UPDATE') {
this.logger.info('Received METADATA_UPDATE notification'); this.logger.info('Received METADATA_UPDATE notification');
// Call the onMetadataUpdate callback if configured
if (this.config.onMetadataUpdate) { if (this.config.onMetadataUpdate) {
try { try {
await this.config.onMetadataUpdate(); await this.config.onMetadataUpdate();
@@ -265,6 +275,20 @@ export class ZMQRelayClient {
continue; continue;
} }
// Handle realtime OHLC bar updates (topic pattern: "{ticker}|ohlc:{period}")
if (OHLC_BAR_TOPIC_PATTERN.test(topic)) {
const bar = decodeRealtimeBar(Array.from(frames));
if (bar) {
const callbacks = this.barCallbacks.get(topic);
if (callbacks) {
for (const cb of callbacks) {
try { cb(bar); } catch (e) { /* ignore callback errors */ }
}
}
}
continue;
}
// Handle history ready notifications // Handle history ready notifications
const notification = decodeHistoryReadyNotification(Array.from(frames)); const notification = decodeHistoryReadyNotification(Array.from(frames));
@@ -308,6 +332,69 @@ export class ZMQRelayClient {
this.logger.debug('Notification listener started'); this.logger.debug('Notification listener started');
} }
/**
* Subscribe to realtime OHLC bars for a ticker+period.
*
* ZMQ subscribe is only called on the 0→1 transition (first subscriber).
* This triggers the relay XPUB → Flink subscription detection → ingestor activation.
*
* @param callback Called whenever a new bar arrives for this topic
*/
subscribeToTicker(ticker: string, periodSeconds: number, callback: BarUpdateCallback): void {
const topic = `${ticker}|ohlc:${periodSeconds}`;
// Register callback
if (!this.barCallbacks.has(topic)) {
this.barCallbacks.set(topic, new Set());
}
this.barCallbacks.get(topic)!.add(callback);
// ZMQ subscribe on first ref
const prev = this.topicRefs.get(topic) ?? 0;
this.topicRefs.set(topic, prev + 1);
if (prev === 0 && this.subSocket) {
this.subSocket.subscribe(topic);
this.logger.info({ topic }, 'ZMQ subscribed to realtime topic');
}
}
/**
* Unsubscribe a callback from realtime OHLC bars.
* ZMQ unsubscribe is only called on the 1→0 transition (last subscriber).
*/
unsubscribeFromTicker(ticker: string, periodSeconds: number, callback: BarUpdateCallback): void {
const topic = `${ticker}|ohlc:${periodSeconds}`;
const callbacks = this.barCallbacks.get(topic);
if (callbacks) {
callbacks.delete(callback);
if (callbacks.size === 0) {
this.barCallbacks.delete(topic);
}
}
const prev = this.topicRefs.get(topic) ?? 0;
if (prev <= 1) {
this.topicRefs.delete(topic);
if (this.subSocket) {
this.subSocket.unsubscribe(topic);
this.logger.info({ topic }, 'ZMQ unsubscribed from realtime topic');
}
} else {
this.topicRefs.set(topic, prev - 1);
}
}
/**
* Remove all subscriptions for a set of (topic, callback) pairs.
* Convenience method for WebSocket disconnect cleanup.
*/
cleanupSubscriptions(subscriptions: Array<{ ticker: string; periodSeconds: number; callback: BarUpdateCallback }>): void {
for (const { ticker, periodSeconds, callback } of subscriptions) {
this.unsubscribeFromTicker(ticker, periodSeconds, callback);
}
}
/** /**
* Close the client and cleanup resources * Close the client and cleanup resources
*/ */

View File

@@ -4,6 +4,7 @@ import type { FastifyBaseLogger } from 'fastify';
import type { License } from '../types/user.js'; import type { License } from '../types/user.js';
import { ChannelType } from '../types/user.js'; import { ChannelType } from '../types/user.js';
import type { ConversationStore } from './memory/conversation-store.js'; import type { ConversationStore } from './memory/conversation-store.js';
import type { BlobStore } from './memory/blob-store.js';
import type { InboundMessage, OutboundMessage } from '../types/messages.js'; import type { InboundMessage, OutboundMessage } from '../types/messages.js';
import { MCPClientConnector } from './mcp-client.js'; import { MCPClientConnector } from './mcp-client.js';
import { LLMProviderFactory, type ProviderConfig } from '../llm/provider.js'; import { LLMProviderFactory, type ProviderConfig } from '../llm/provider.js';
@@ -14,13 +15,16 @@ import type { ChannelAdapter, PathTriggerContext } from '../workspace/index.js';
import type { ResearchSubagent } from './subagents/research/index.js'; import type { ResearchSubagent } from './subagents/research/index.js';
import type { IndicatorSubagent } from './subagents/indicator/index.js'; import type { IndicatorSubagent } from './subagents/indicator/index.js';
import type { WebExploreSubagent } from './subagents/web-explore/index.js'; import type { WebExploreSubagent } from './subagents/web-explore/index.js';
import type { StrategySubagent } from './subagents/strategy/index.js';
import type { DynamicStructuredTool } from '@langchain/core/tools'; import type { DynamicStructuredTool } from '@langchain/core/tools';
import { getToolRegistry } from '../tools/tool-registry.js'; import { getToolRegistry } from '../tools/tool-registry.js';
import type { MCPToolInfo } from '../tools/mcp/mcp-tool-wrapper.js'; import type { MCPToolInfo } from '../tools/mcp/mcp-tool-wrapper.js';
import { createResearchAgentTool } from '../tools/platform/research-agent.tool.js'; import { createResearchAgentTool } from '../tools/platform/research-agent.tool.js';
import { createIndicatorAgentTool } from '../tools/platform/indicator-agent.tool.js'; import { createIndicatorAgentTool } from '../tools/platform/indicator-agent.tool.js';
import { createWebExploreAgentTool } from '../tools/platform/web-explore-agent.tool.js'; import { createWebExploreAgentTool } from '../tools/platform/web-explore-agent.tool.js';
import { createStrategyAgentTool } from '../tools/platform/strategy-agent.tool.js';
import { createUserContext } from './memory/session-context.js'; import { createUserContext } from './memory/session-context.js';
import type { HarnessEvent } from './harness-events.js';
import { readFile } from 'fs/promises'; import { readFile } from 'fs/promises';
import { join, dirname } from 'path'; import { join, dirname } from 'path';
import { fileURLToPath } from 'url'; import { fileURLToPath } from 'url';
@@ -54,10 +58,12 @@ export type HarnessFactory = (sessionConfig: HarnessSessionConfig) => AgentHarne
export interface AgentHarnessConfig extends HarnessSessionConfig { export interface AgentHarnessConfig extends HarnessSessionConfig {
providerConfig: ProviderConfig; providerConfig: ProviderConfig;
conversationStore?: ConversationStore; conversationStore?: ConversationStore;
blobStore?: BlobStore;
historyLimit: number; historyLimit: number;
researchSubagent?: ResearchSubagent; researchSubagent?: ResearchSubagent;
indicatorSubagent?: IndicatorSubagent; indicatorSubagent?: IndicatorSubagent;
webExploreSubagent?: WebExploreSubagent; webExploreSubagent?: WebExploreSubagent;
strategySubagent?: StrategySubagent;
} }
/** /**
@@ -87,6 +93,8 @@ export class AgentHarness {
private conversationStore?: ConversationStore; private conversationStore?: ConversationStore;
private indicatorSubagent?: IndicatorSubagent; private indicatorSubagent?: IndicatorSubagent;
private webExploreSubagent?: WebExploreSubagent; private webExploreSubagent?: WebExploreSubagent;
private strategySubagent?: StrategySubagent;
private blobStore?: BlobStore;
private abortController: AbortController | null = null; private abortController: AbortController | null = null;
constructor(config: AgentHarnessConfig) { constructor(config: AgentHarnessConfig) {
@@ -96,10 +104,12 @@ export class AgentHarness {
this.researchSubagent = config.researchSubagent; this.researchSubagent = config.researchSubagent;
this.indicatorSubagent = config.indicatorSubagent; this.indicatorSubagent = config.indicatorSubagent;
this.webExploreSubagent = config.webExploreSubagent; this.webExploreSubagent = config.webExploreSubagent;
this.strategySubagent = config.strategySubagent;
this.modelFactory = new LLMProviderFactory(config.providerConfig, config.logger); this.modelFactory = new LLMProviderFactory(config.providerConfig, config.logger);
this.modelRouter = new ModelRouter(this.modelFactory, config.logger); this.modelRouter = new ModelRouter(this.modelFactory, config.logger);
this.conversationStore = config.conversationStore; this.conversationStore = config.conversationStore;
this.blobStore = config.blobStore;
this.mcpClient = new MCPClientConnector({ this.mcpClient = new MCPClientConnector({
userId: config.userId, userId: config.userId,
@@ -419,17 +429,75 @@ export class AgentHarness {
} }
} }
/**
* Initialize strategy subagent
*/
private async initializeStrategySubagent(): Promise<void> {
if (this.strategySubagent) {
this.config.logger.debug('Strategy subagent already provided');
return;
}
this.config.logger.debug('Creating strategy subagent for session');
try {
const { createStrategySubagent } = await import('./subagents/strategy/index.js');
const { model } = await this.modelRouter.route(
'trading strategy writing and backtesting',
this.config.license,
RoutingStrategy.COMPLEXITY,
this.config.userId
);
const toolRegistry = getToolRegistry();
const strategyTools = await toolRegistry.getToolsForAgent(
'strategy',
this.mcpClient,
this.availableMCPTools,
this.workspaceManager,
undefined,
undefined
);
const strategySubagentPath = join(__dirname, 'subagents', 'strategy');
this.config.logger.debug({ strategySubagentPath }, 'Using strategy subagent path');
this.strategySubagent = await createStrategySubagent(
model,
this.config.logger,
strategySubagentPath,
this.mcpClient,
strategyTools
);
this.config.logger.info(
{
toolCount: strategyTools.length,
toolNames: strategyTools.map(t => t.name),
},
'Strategy subagent created successfully'
);
} catch (error) {
this.config.logger.error(
{ error, errorMessage: (error as Error).message, stack: (error as Error).stack },
'Failed to create strategy subagent'
);
// Don't throw — strategy subagent is optional
}
}
/** /**
* Execute model with tool calling loop * Execute model with tool calling loop
* Handles multi-turn tool calls until the model produces a final text response * Handles multi-turn tool calls until the model produces a final text response
*/ */
private async executeWithToolCalling( private async *executeWithToolCalling(
model: any, model: any,
messages: BaseMessage[], messages: BaseMessage[],
tools: DynamicStructuredTool[], tools: DynamicStructuredTool[],
maxIterations: number = 2, maxIterations: number = 2,
signal?: AbortSignal signal?: AbortSignal
): Promise<string> { ): AsyncGenerator<HarnessEvent> {
this.config.logger.info( this.config.logger.info(
{ toolCount: tools.length, maxIterations }, { toolCount: tools.length, maxIterations },
'Starting tool calling loop' 'Starting tool calling loop'
@@ -437,6 +505,8 @@ export class AgentHarness {
const messagesCopy = [...messages]; const messagesCopy = [...messages];
let iterations = 0; let iterations = 0;
// Track last char of last yielded text chunk to detect missing spaces between tokens
let lastChunkTail = '';
while (iterations < maxIterations) { while (iterations < maxIterations) {
if (signal?.aborted) break; if (signal?.aborted) break;
@@ -455,15 +525,24 @@ export class AgentHarness {
try { try {
const stream = await model.stream(messagesCopy, { signal }); const stream = await model.stream(messagesCopy, { signal });
for await (const chunk of stream) { for await (const chunk of stream) {
const contents: string[] = [];
if (typeof chunk.content === 'string' && chunk.content.length > 0) { if (typeof chunk.content === 'string' && chunk.content.length > 0) {
this.channelAdapter?.sendChunk(chunk.content); contents.push(chunk.content);
} else if (Array.isArray(chunk.content)) { } else if (Array.isArray(chunk.content)) {
for (const block of chunk.content) { for (const block of chunk.content) {
if (block.type === 'text' && block.text) { if (block.type === 'text' && block.text) contents.push(block.text);
this.channelAdapter?.sendChunk(block.text);
}
} }
} }
for (const content of contents) {
// DeepInfra/GLM streams tokens without leading spaces; inject one when
// both the tail of the previous chunk and the head of this chunk are
// word characters (\w), which would otherwise merge two words.
if (lastChunkTail && /\w/.test(lastChunkTail) && /\w/.test(content[0])) {
yield { type: 'chunk', content: ' ' };
}
lastChunkTail = content[content.length - 1];
yield { type: 'chunk', content };
}
response = response ? response.concat(chunk) : chunk; response = response ? response.concat(chunk) : chunk;
} }
} catch (invokeError: any) { } catch (invokeError: any) {
@@ -486,6 +565,8 @@ export class AgentHarness {
contentLength: typeof response.content === 'string' ? response.content.length : 0, contentLength: typeof response.content === 'string' ? response.content.length : 0,
hasToolCalls: !!response.tool_calls, hasToolCalls: !!response.tool_calls,
toolCallCount: response.tool_calls?.length || 0, toolCallCount: response.tool_calls?.length || 0,
usageMetadata: (response as any).usage_metadata,
finishReason: (response as any).response_metadata?.finish_reason,
}, },
'Model response received' 'Model response received'
); );
@@ -508,7 +589,8 @@ export class AgentHarness {
{ finalContentLength: finalContent.length, iterations }, { finalContentLength: finalContent.length, iterations },
'Tool calling loop complete - no more tool calls' 'Tool calling loop complete - no more tool calls'
); );
return finalContent; yield { type: 'done', content: finalContent };
return;
} }
this.config.logger.info( this.config.logger.info(
@@ -540,11 +622,32 @@ export class AgentHarness {
} }
try { try {
this.channelAdapter?.sendToolCall?.(toolCall.name, this.getToolLabel(toolCall.name)); yield { type: 'tool_call', toolName: toolCall.name, label: this.getToolLabel(toolCall.name) };
const result = await tool.func(toolCall.args);
// Process result to extract images and send them via channel adapter // Use streamFunc when available (subagent tools) to forward intermediate events inline
const processedResult = this.processToolResult(result, toolCall.name); let result: string;
const streamFunc = (tool as any).streamFunc as ((args: any, signal?: AbortSignal) => AsyncGenerator<import('./harness-events.js').HarnessEvent, string>) | undefined;
if (streamFunc) {
const gen = streamFunc(toolCall.args, signal);
let next = await gen.next();
while (!next.done) {
if (signal?.aborted) {
gen.return?.('');
break;
}
yield next.value;
next = await gen.next();
}
result = next.done ? next.value : '';
} else {
result = await tool.func(toolCall.args);
}
// Extract images from result and yield them; get text-only version for LLM
const { cleanedResult: processedResult, images } = this.extractImagesFromToolResult(result, toolCall.name);
for (const img of images) {
yield { type: 'image', data: img.data, mimeType: img.mimeType, caption: img.caption };
}
this.config.logger.debug( this.config.logger.debug(
{ {
@@ -567,6 +670,12 @@ export class AgentHarness {
'Tool execution completed' 'Tool execution completed'
); );
} catch (error) { } catch (error) {
// Clean stop — abort signal fired during tool execution; exit without error message
if (signal?.aborted || (error as Error)?.name === 'AbortError') {
this.config.logger.info({ tool: toolCall.name }, 'Tool execution aborted by stop signal');
return;
}
this.config.logger.error( this.config.logger.error(
{ {
error, error,
@@ -578,6 +687,8 @@ export class AgentHarness {
'Tool execution failed' 'Tool execution failed'
); );
yield { type: 'error' as const, source: toolCall.name, fatal: false };
messagesCopy.push( messagesCopy.push(
new ToolMessage({ new ToolMessage({
content: `Error: ${error}`, content: `Error: ${error}`,
@@ -586,11 +697,15 @@ export class AgentHarness {
); );
} }
} }
// After all tool calls complete, emit a space separator before the next LLM streaming pass
yield { type: 'chunk', content: ' ' };
lastChunkTail = ' ';
} }
// Max iterations reached - return what we have // Max iterations reached - yield done with apology
this.config.logger.warn('Max tool calling iterations reached'); this.config.logger.warn('Max tool calling iterations reached');
return 'I apologize, but I encountered an issue processing your request. Please try rephrasing your question.'; yield { type: 'done', content: 'I apologize, but I encountered an issue processing your request. Please try rephrasing your question.' };
} }
/** /**
@@ -617,162 +732,222 @@ export class AgentHarness {
} }
/** /**
* Handle incoming message from user * Stream events for an incoming user message.
* Yields typed HarnessEvents (chunk, tool_call, image, done) and saves the
* conversation to the store once the done event has been emitted.
*/ */
async handleMessage(message: InboundMessage): Promise<OutboundMessage> { async *streamMessage(message: InboundMessage): AsyncGenerator<HarnessEvent> {
this.config.logger.info( this.config.logger.info(
{ messageId: message.messageId, userId: message.userId, content: message.content.substring(0, 100) }, { messageId: message.messageId, userId: message.userId, content: message.content.substring(0, 100) },
'Processing user message' 'Processing user message'
); );
try { // 1. Build system prompt from template
// 1. Build system prompt from template this.config.logger.debug('Building system prompt');
this.config.logger.debug('Building system prompt'); const systemPrompt = await this.buildSystemPrompt();
const systemPrompt = await this.buildSystemPrompt(); this.config.logger.debug({ systemPromptLength: systemPrompt.length }, 'System prompt built');
this.config.logger.debug({ systemPromptLength: systemPrompt.length }, 'System prompt built');
// 2. Load recent conversation history // 2. Load recent conversation history
const channelKey = this.config.channelType ?? ChannelType.WEBSOCKET; const channelKey = this.config.channelType ?? ChannelType.WEBSOCKET;
let storedMessages = this.conversationStore let storedMessages = this.conversationStore
? await this.conversationStore.getRecentMessages( ? await this.conversationStore.getRecentMessages(
this.config.userId, this.config.sessionId, this.config.historyLimit, channelKey
)
: [];
// First turn: seed conversation history with current workspace state
if (storedMessages.length === 0 && this.workspaceManager && this.conversationStore) {
const workspaceJSON = this.workspaceManager.serializeState();
const content = `[Workspace State]\n\`\`\`json\n${workspaceJSON}\n\`\`\``;
await this.conversationStore.saveMessage(
this.config.userId, this.config.sessionId,
'workspace', content, { isWorkspaceContext: true }, channelKey
);
storedMessages = await this.conversationStore.getRecentMessages(
this.config.userId, this.config.sessionId, this.config.historyLimit, channelKey this.config.userId, this.config.sessionId, this.config.historyLimit, channelKey
); )
} : [];
const history = this.conversationStore // First turn: seed conversation history with current workspace state
? this.conversationStore.toLangChainMessages(storedMessages) if (storedMessages.length === 0 && this.workspaceManager && this.conversationStore) {
: []; const workspaceJSON = this.workspaceManager.serializeState();
this.config.logger.debug({ historyLength: history.length }, 'Conversation history loaded'); const content = `[Workspace State]\n\`\`\`json\n${workspaceJSON}\n\`\`\``;
await this.conversationStore.saveMessage(
// 4. Get the configured model this.config.userId, this.config.sessionId,
this.config.logger.debug('Routing to model'); 'workspace', content, { isWorkspaceContext: true }, channelKey
const { model, middleware } = await this.modelRouter.route(
message.content,
this.config.license,
RoutingStrategy.COMPLEXITY,
this.config.userId
); );
this.middleware = middleware; storedMessages = await this.conversationStore.getRecentMessages(
this.config.logger.info({ modelName: model.constructor.name }, 'Model selected'); this.config.userId, this.config.sessionId, this.config.historyLimit, channelKey
// 5. Build LangChain messages
const langchainMessages = this.buildLangChainMessages(systemPrompt, history, message.content);
this.config.logger.debug({ messageCount: langchainMessages.length }, 'LangChain messages built');
// 6. Get tools for main agent from registry
const toolRegistry = getToolRegistry();
const tools = await toolRegistry.getToolsForAgent(
'main',
this.mcpClient,
this.availableMCPTools,
this.workspaceManager // Pass session workspace manager
); );
}
// Build shared subagent context const history = this.conversationStore
const subagentContext = { ? this.conversationStore.toLangChainMessages(storedMessages)
userContext: createUserContext({ : [];
userId: this.config.userId, this.config.logger.debug({ historyLength: history.length }, 'Conversation history loaded');
sessionId: this.config.sessionId,
license: this.config.license,
channelType: this.config.channelType ?? ChannelType.WEBSOCKET,
channelUserId: this.config.channelUserId ?? this.config.userId,
}),
};
// Add research subagent as a tool if available // 4. Get the configured model
if (this.researchSubagent) { this.config.logger.debug('Routing to model');
tools.push(createResearchAgentTool({ const { model, middleware } = await this.modelRouter.route(
researchSubagent: this.researchSubagent, message.content,
context: subagentContext, this.config.license,
logger: this.config.logger, RoutingStrategy.COMPLEXITY,
})); this.config.userId
} );
this.middleware = middleware;
this.config.logger.info({ modelName: model.constructor.name }, 'Model selected');
// Add indicator subagent as a tool if available // 5. Build LangChain messages
if (this.indicatorSubagent) { const langchainMessages = this.buildLangChainMessages(systemPrompt, history, message.content);
tools.push(createIndicatorAgentTool({ this.config.logger.debug({ messageCount: langchainMessages.length }, 'LangChain messages built');
indicatorSubagent: this.indicatorSubagent,
context: subagentContext,
logger: this.config.logger,
}));
}
// Add web explore subagent as a tool if available // 6. Get tools for main agent from registry
if (this.webExploreSubagent) { const toolRegistry = getToolRegistry();
tools.push(createWebExploreAgentTool({ const tools = await toolRegistry.getToolsForAgent(
webExploreSubagent: this.webExploreSubagent, 'main',
context: subagentContext, this.mcpClient,
logger: this.config.logger, this.availableMCPTools,
})); this.workspaceManager
} );
// Build shared subagent context
const subagentContext = {
userContext: createUserContext({
userId: this.config.userId,
sessionId: this.config.sessionId,
license: this.config.license,
channelType: this.config.channelType ?? ChannelType.WEBSOCKET,
channelUserId: this.config.channelUserId ?? this.config.userId,
}),
};
if (this.researchSubagent) {
tools.push(createResearchAgentTool({
researchSubagent: this.researchSubagent,
context: subagentContext,
logger: this.config.logger,
}));
}
if (this.indicatorSubagent) {
tools.push(createIndicatorAgentTool({
indicatorSubagent: this.indicatorSubagent,
context: subagentContext,
logger: this.config.logger,
}));
}
if (this.webExploreSubagent) {
tools.push(createWebExploreAgentTool({
webExploreSubagent: this.webExploreSubagent,
context: subagentContext,
logger: this.config.logger,
}));
}
if (!this.strategySubagent) {
await this.initializeStrategySubagent();
}
if (this.strategySubagent) {
tools.push(createStrategyAgentTool({
strategySubagent: this.strategySubagent,
context: subagentContext,
logger: this.config.logger,
}));
}
this.config.logger.info(
{ toolCount: tools.length, toolNames: tools.map(t => t.name) },
'Tools loaded for main agent'
);
// Apply middleware (e.g. Anthropic prompt caching)
const processedMessages = this.middleware
? this.middleware.processMessages(langchainMessages, tools)
: langchainMessages;
// 7. Bind tools to model
const modelWithTools = tools.length > 0 && model.bindTools ? model.bindTools(tools) : model;
if (tools.length > 0) {
this.config.logger.info( this.config.logger.info(
{ { modelType: modelWithTools.constructor.name, toolsBound: tools.length > 0 && !!model.bindTools },
toolCount: tools.length, 'Model bound with tools'
toolNames: tools.map(t => t.name),
},
'Tools loaded for main agent'
); );
}
// Apply middleware (e.g. Anthropic prompt caching) // 8. Stream tool calling loop and save conversation on completion
const processedMessages = this.middleware this.config.logger.info('Invoking LLM with tool support');
? this.middleware.processMessages(langchainMessages, tools) this.abortController = new AbortController();
: langchainMessages; let finalContent = '';
const collectedImages: Array<{ data: string; mimeType: string; caption?: string }> = [];
// 7. Bind tools to model try {
const modelWithTools = tools.length > 0 && model.bindTools ? model.bindTools(tools) : model; for await (const event of this.executeWithToolCalling(modelWithTools, processedMessages, tools, 10, this.abortController.signal)) {
if (event.type === 'done') {
if (tools.length > 0) { finalContent = event.content;
this.config.logger.info( this.config.logger.info({ responseLength: finalContent.length }, 'LLM response received');
{ modelType: modelWithTools.constructor.name, toolsBound: tools.length > 0 && !!model.bindTools }, } else if (event.type === 'image') {
'Model bound with tools' collectedImages.push({ data: event.data, mimeType: event.mimeType, caption: event.caption });
); }
yield event;
} }
} catch (error) {
// 8. Call LLM with tool calling loop if ((error as Error)?.name === 'AbortError') {
this.config.logger.info('Invoking LLM with tool support'); this.config.logger.info('Agent harness interrupted by stop signal');
this.abortController = new AbortController(); } else {
const assistantMessage = await this.executeWithToolCalling(modelWithTools, processedMessages, tools, 10, this.abortController.signal); this.config.logger.error({ error }, 'Fatal error in agent harness');
yield { type: 'error' as const, source: 'agent harness', fatal: true };
}
} finally {
this.abortController = null; this.abortController = null;
if (finalContent && this.conversationStore) {
// Write blobs to S3 and capture their IDs for message metadata
let blobRefs: Array<{ id: string; mimeType: string; caption?: string }> = [];
if (collectedImages.length > 0 && this.blobStore) {
const assistantMsgId = `${this.config.userId}:${this.config.sessionId}:${Date.now()}`;
const blobIds = await this.blobStore.writeBlobs(
this.config.userId, this.config.sessionId, assistantMsgId,
collectedImages.map(img => ({ blobType: 'image' as const, mimeType: img.mimeType, data: img.data, caption: img.caption }))
);
blobRefs = blobIds.map((id, i) => ({ id, mimeType: collectedImages[i].mimeType, caption: collectedImages[i].caption }));
}
this.config.logger.info(
{ responseLength: assistantMessage.length },
'LLM response received'
);
// Save user message and assistant response to conversation store
if (this.conversationStore) {
await this.conversationStore.saveMessage( await this.conversationStore.saveMessage(
this.config.userId, this.config.sessionId, 'user', message.content, undefined, channelKey this.config.userId, this.config.sessionId, 'user', message.content, undefined, channelKey
); );
await this.conversationStore.saveMessage( await this.conversationStore.saveMessage(
this.config.userId, this.config.sessionId, 'assistant', assistantMessage, undefined, channelKey this.config.userId, this.config.sessionId, 'assistant', finalContent,
blobRefs.length > 0 ? { blobs: blobRefs } : undefined,
channelKey
); );
} }
}
}
return { /**
messageId: `msg_${Date.now()}`, * Handle incoming message from user.
sessionId: message.sessionId, * Consumes streamMessage and dispatches events to the channel adapter for
content: assistantMessage, * backward compatibility with Telegram and other non-streaming callers.
timestamp: new Date(), */
}; async handleMessage(message: InboundMessage): Promise<OutboundMessage> {
let finalContent = '';
try {
for await (const event of this.streamMessage(message)) {
switch (event.type) {
case 'chunk':
this.channelAdapter?.sendChunk(event.content);
break;
case 'tool_call':
this.channelAdapter?.sendToolCall?.(event.toolName, event.label);
break;
case 'image':
this.channelAdapter?.sendImage({ data: event.data, mimeType: event.mimeType, caption: event.caption });
break;
case 'error':
this.channelAdapter?.sendText?.({ text: `An unrecoverable error occurred in the ${event.source}.` });
break;
case 'done':
finalContent = event.content;
break;
}
}
} catch (error) { } catch (error) {
this.config.logger.error({ error }, 'Error processing message'); this.config.logger.error({ error }, 'Error processing message');
throw error; throw error;
} }
return {
messageId: `msg_${Date.now()}`,
sessionId: message.sessionId,
content: finalContent,
timestamp: new Date(),
};
} }
/** /**
@@ -817,21 +992,27 @@ export class AgentHarness {
python_write: 'Coding...', python_write: 'Coding...',
python_read: 'Inspecting...', python_read: 'Inspecting...',
execute_research: 'Running script...', execute_research: 'Running script...',
backtest_strategy: 'Running backtest...', backtest_strategy: 'Backtesting...',
list_active_strategies: 'Checking active strategies...', list_active_strategies: 'Checking active strategies...',
web_explore: 'Searching the web...', web_explore: 'Searching the web...',
strategy: 'Coding a strategy...',
}; };
return labels[toolName] ?? `Running ${toolName}...`; return labels[toolName] ?? `Running ${toolName} tool...`;
} }
/** /**
* Process tool result to extract images and send via channel adapter. * Process tool result to extract images and send via channel adapter.
* Returns text-only version for LLM context (no base64 image data). * Returns text-only version for LLM context (no base64 image data).
*/ */
private processToolResult(result: string, toolName: string): string { private extractImagesFromToolResult(
result: string,
toolName: string
): { cleanedResult: string; images: Array<{ data: string; mimeType: string; caption?: string }> } {
const noImages = { cleanedResult: String(result || ''), images: [] };
// Most tools return plain strings - only process JSON results // Most tools return plain strings - only process JSON results
if (!result || typeof result !== 'string') { if (!result || typeof result !== 'string') {
return String(result || ''); return noImages;
} }
// Try to parse as JSON // Try to parse as JSON
@@ -840,7 +1021,7 @@ export class AgentHarness {
parsedResult = JSON.parse(result); parsedResult = JSON.parse(result);
} catch { } catch {
// Not JSON, return as-is // Not JSON, return as-is
return result; return noImages;
} }
// Check if result has images array (from ResearchSubagent) // Check if result has images array (from ResearchSubagent)
@@ -850,19 +1031,11 @@ export class AgentHarness {
'Extracting images from tool result' 'Extracting images from tool result'
); );
// Send each image via channel adapter const images: Array<{ data: string; mimeType: string; caption?: string }> = [];
for (const image of parsedResult.images) { for (const image of parsedResult.images) {
if (image.data && image.mimeType) { if (image.data && image.mimeType) {
if (this.channelAdapter) { this.config.logger.debug({ mimeType: image.mimeType }, 'Extracted image from tool result');
this.config.logger.debug({ mimeType: image.mimeType }, 'Sending image to channel'); images.push({ data: image.data, mimeType: image.mimeType, caption: undefined });
this.channelAdapter.sendImage({
data: image.data,
mimeType: image.mimeType,
caption: undefined,
});
} else {
this.config.logger.warn('No channel adapter set, cannot send image');
}
} }
} }
@@ -872,15 +1045,13 @@ export class AgentHarness {
images: undefined, images: undefined,
imageCount: parsedResult.images.length, imageCount: parsedResult.images.length,
}; };
// Clean up undefined values
Object.keys(textOnlyResult).forEach(key => { Object.keys(textOnlyResult).forEach(key => {
if (textOnlyResult[key] === undefined) { if (textOnlyResult[key] === undefined) {
delete textOnlyResult[key]; delete textOnlyResult[key];
} }
}); });
return JSON.stringify(textOnlyResult); return { cleanedResult: JSON.stringify(textOnlyResult), images };
} }
// Check for nested chart_images object // Check for nested chart_images object
@@ -890,20 +1061,12 @@ export class AgentHarness {
'Extracting chart images from tool result' 'Extracting chart images from tool result'
); );
// Send each chart image via channel adapter const images: Array<{ data: string; mimeType: string; caption?: string }> = [];
for (const [chartId, chartData] of Object.entries(parsedResult.chart_images)) { for (const [chartId, chartData] of Object.entries(parsedResult.chart_images)) {
const chart = chartData as any; const chart = chartData as any;
if (chart.type === 'image' && chart.data) { if (chart.type === 'image' && chart.data) {
if (this.channelAdapter) { this.config.logger.debug({ chartId }, 'Extracted chart image from tool result');
this.config.logger.debug({ chartId }, 'Sending chart image to channel'); images.push({ data: chart.data, mimeType: 'image/png', caption: undefined });
this.channelAdapter.sendImage({
data: chart.data,
mimeType: 'image/png',
caption: undefined,
});
} else {
this.config.logger.warn('No channel adapter set, cannot send chart image');
}
} }
} }
@@ -913,19 +1076,17 @@ export class AgentHarness {
chart_images: undefined, chart_images: undefined,
chartCount: Object.keys(parsedResult.chart_images).length, chartCount: Object.keys(parsedResult.chart_images).length,
}; };
// Clean up undefined values
Object.keys(textOnlyResult).forEach(key => { Object.keys(textOnlyResult).forEach(key => {
if (textOnlyResult[key] === undefined) { if (textOnlyResult[key] === undefined) {
delete textOnlyResult[key]; delete textOnlyResult[key];
} }
}); });
return JSON.stringify(textOnlyResult); return { cleanedResult: JSON.stringify(textOnlyResult), images };
} }
// No images found, return stringified result // No images found, return as-is
return result; return { cleanedResult: result, images: [] };
} }
/** /**

View File

@@ -0,0 +1,51 @@
export interface ChunkEvent {
type: 'chunk';
content: string;
}
export interface ToolCallEvent {
type: 'tool_call';
toolName: string;
label: string;
}
export interface ImageEvent {
type: 'image';
data: string;
mimeType: string;
caption?: string;
}
export interface DoneEvent {
type: 'done';
content: string;
}
export interface SubagentChunkEvent {
type: 'subagent_chunk';
agentName: string;
content: string;
}
export interface SubagentThinkingEvent {
type: 'subagent_thinking';
agentName: string;
content: string;
}
export interface SubagentToolCallEvent {
type: 'subagent_tool_call';
agentName: string;
toolName: string;
label: string;
}
export interface ErrorEvent {
type: 'error';
/** Name of the agent or tool where the error occurred */
source: string;
/** True if the error is unrecoverable and the chat session should end */
fatal: boolean;
}
export type HarnessEvent = ChunkEvent | ToolCallEvent | ImageEvent | DoneEvent | SubagentChunkEvent | SubagentThinkingEvent | SubagentToolCallEvent | ErrorEvent;

View File

@@ -57,57 +57,74 @@ export class MCPClientConnector {
this.client = null; this.client = null;
} }
try { const maxAttempts = 5;
this.config.logger.info( const retryDelayMs = 1500;
{ userId: this.config.userId, url: this.config.mcpServerUrl },
'Connecting to user MCP server'
);
this.client = new Client( this.config.logger.info(
{ { userId: this.config.userId, url: this.config.mcpServerUrl },
name: 'dexorder-gateway', 'Connecting to user MCP server'
version: '0.1.0', );
},
{ let lastError: unknown;
capabilities: { for (let attempt = 1; attempt <= maxAttempts; attempt++) {
sampling: {}, try {
this.client = new Client(
{
name: 'dexorder-gateway',
version: '0.1.0',
}, },
} {
); capabilities: {
sampling: {},
},
}
);
// Streamable HTTP: single /mcp endpoint, session tracked via mcp-session-id header // Streamable HTTP: single /mcp endpoint, session tracked via mcp-session-id header
const transport = new StreamableHTTPClientTransport( const transport = new StreamableHTTPClientTransport(
new URL(`${this.config.mcpServerUrl}/mcp`) new URL(`${this.config.mcpServerUrl}/mcp`)
); );
await this.client.connect(transport); await this.client.connect(transport);
// Hook client.onerror to detect transport failures (e.g. sandbox restart returning // Hook client.onerror to detect transport failures (e.g. sandbox restart returning
// 404 "session not found"). When fired, mark disconnected so the next callTool / // 404 "session not found"). When fired, mark disconnected so the next callTool /
// listTools call triggers a full reconnect + initialize handshake. // listTools call triggers a full reconnect + initialize handshake.
const connectedClient = this.client; const connectedClient = this.client;
const origOnError = this.client.onerror; const origOnError = this.client.onerror;
this.client.onerror = (error) => { this.client.onerror = (error) => {
origOnError?.(error); origOnError?.(error);
// Only act on the currently-active client (ignore stale closures after reconnect) // Only act on the currently-active client (ignore stale closures after reconnect)
if (this.client === connectedClient && this.connected) { if (this.client === connectedClient && this.connected) {
this.config.logger.warn(
{ error },
'MCP transport error — marking disconnected for lazy reconnect'
);
this.connected = false;
}
};
this.connected = true;
this.config.logger.info('Connected to user MCP server');
return;
} catch (error) {
lastError = error;
this.client = null;
if (attempt < maxAttempts) {
this.config.logger.warn( this.config.logger.warn(
{ error }, { error, userId: this.config.userId, attempt, maxAttempts },
'MCP transport error — marking disconnected for lazy reconnect' 'MCP connect attempt failed, retrying...'
); );
this.connected = false; await new Promise(resolve => setTimeout(resolve, retryDelayMs));
} }
}; }
this.connected = true;
this.config.logger.info('Connected to user MCP server');
} catch (error) {
this.config.logger.error(
{ error, userId: this.config.userId },
'Failed to connect to user MCP server'
);
throw error;
} }
this.config.logger.error(
{ error: lastError, userId: this.config.userId },
'Failed to connect to user MCP server'
);
throw lastError;
} }
/** /**
@@ -134,7 +151,9 @@ export class MCPClientConnector {
try { try {
this.config.logger.debug({ tool: name, args }, 'Calling MCP tool'); this.config.logger.debug({ tool: name, args }, 'Calling MCP tool');
const result = await this.client!.callTool({ name, arguments: args }); // Use a generous timeout: execute_research runs a subprocess with a 300s limit,
// so the default 60s MCP SDK timeout would fire before the script completes.
const result = await this.client!.callTool({ name, arguments: args }, undefined, { timeout: 330000 });
return result; return result;
} catch (error) { } catch (error) {
this.config.logger.error({ error, tool: name }, 'MCP tool call failed'); this.config.logger.error({ error, tool: name }, 'MCP tool call failed');

View File

@@ -0,0 +1,93 @@
import type { FastifyBaseLogger } from 'fastify';
import type { IcebergClient } from '../../clients/iceberg-client.js';
export interface StoredBlob {
id: string;
userId: string;
sessionId: string;
messageId: string;
blobType: 'image' | 'audio';
mimeType: string;
data: string; // base64
caption?: string;
timestamp: number; // microseconds
}
/**
* Blob store for binary attachments (images, audio) referenced by conversation messages.
*
* Unlike text messages (Redis hot + Iceberg cold), blobs write directly to S3 Parquet
* on each turn — they're infrequent enough that per-turn files don't cause fragmentation.
* Blob IDs are stored in the parent message's metadata field for later retrieval.
*/
export class BlobStore {
constructor(
private icebergClient: IcebergClient | undefined,
private logger: FastifyBaseLogger
) {}
/**
* Write all blobs for one assistant turn to a single S3 Parquet file.
* Returns the blob IDs assigned. Failures are logged but do not throw.
*/
async writeBlobs(
userId: string,
sessionId: string,
messageId: string,
blobs: Array<{ blobType: 'image' | 'audio'; mimeType: string; data: string; caption?: string }>
): Promise<string[]> {
if (!this.icebergClient || blobs.length === 0) {
return [];
}
const now = Date.now();
const stored = blobs.map((b, i) => ({
id: `blob_${userId}_${now}_${i}`,
user_id: userId,
session_id: sessionId,
message_id: messageId,
blob_type: b.blobType,
mime_type: b.mimeType,
data: b.data,
caption: b.caption ?? null,
timestamp: now * 1000, // microseconds
}));
try {
await this.icebergClient.appendBlobs(userId, sessionId, messageId, stored);
this.logger.info({ userId, sessionId, count: stored.length }, 'Blobs written to S3');
} catch (error) {
this.logger.error({ error }, 'Failed to write blobs to S3');
// Don't throw — blob failure should not break the conversation turn
}
return stored.map(b => b.id);
}
/**
* Retrieve blobs by their IDs from S3/Iceberg cold storage.
*/
async getBlobsByIds(userId: string, sessionId: string, blobIds: string[]): Promise<StoredBlob[]> {
if (!this.icebergClient || blobIds.length === 0) {
return [];
}
try {
const rows = await this.icebergClient.queryBlobs(userId, sessionId, blobIds);
return rows.map(r => ({
id: r.id,
userId: r.user_id,
sessionId: r.session_id,
messageId: r.message_id,
blobType: r.blob_type as 'image' | 'audio',
mimeType: r.mime_type,
data: r.data,
caption: r.caption ?? undefined,
timestamp: r.timestamp,
}));
} catch (error) {
this.logger.error({ error, blobIds }, 'Failed to retrieve blobs');
return [];
}
}
}

View File

@@ -39,9 +39,9 @@ If the user asks for a capability not provided by Dexorder, decline and explain
## Task Delegation ## Task Delegation
- For ANY research questions, deep analysis, statistical analysis, charting requests, or market data queries that require computation, you MUST use the 'research' tool - For ANY research questions, deep analysis, statistical analysis, charting requests, or market data queries that require computation, you MUST use the 'research' tool
- For ANYTHING related to indicators on the chart — reading, adding, removing, modifying, or creating custom indicators — you MUST use the 'indicator' tool - For ANYTHING related to indicators on the chart — reading, adding, removing, modifying, or creating custom indicators — you MUST use the 'indicator' tool
- For ANY backtesting request — running a strategy against historical data — you MUST use the 'backtest_strategy' tool directly; NEVER use the research tool for backtesting - For ANY request about trading strategies — writing, editing, backtesting, interpreting results, activating, deactivating, or monitoring — you MUST use the 'strategy' tool; NEVER write strategy Python code yourself
- NEVER write Python code directly in your responses to the user - NEVER write Python code directly in your responses to the user
- NEVER show code to the user — delegate to the research or indicator tool instead - NEVER show code to the user — delegate to the research, indicator, or strategy tool instead
- NEVER attempt to do analysis yourself — let the subagents handle it - NEVER attempt to do analysis yourself — let the subagents handle it
## Available Tools ## Available Tools
@@ -110,46 +110,54 @@ Parameters:
- instruction: Natural language description of the analysis to perform (be specific!) - instruction: Natural language description of the analysis to perform (be specific!)
- name: A unique name for the research script (e.g., "BTC Weekly Analysis") - name: A unique name for the research script (e.g., "BTC Weekly Analysis")
**Do NOT include any time range, history length, bar count, period size, resolution, or timestamp guidance in the instruction** — not as numbers, not as natural language ("3-6 months", "1 year", "sufficient data"), not at all. The research subagent has its own rules for selecting resolution and history window. If you add time guidance, the subagent will follow yours instead of its own (which uses much more data). Only pass time constraints if the user explicitly asked for a specific period (e.g. "last week", "show me 2023").
Example usage: Example usage:
- User: "Does Friday price action correlate with Monday?" - User: "Does Friday price action correlate with Monday?"
- You: Call research tool with instruction="Analyze correlation between Friday and Monday price action during NY trading hours (9:30-4:00 ET)", name="Friday-Monday Correlation" - You: Call research tool with instruction="Analyze correlation between Friday and Monday price action during NY trading hours (9:30-4:00 ET)", name="Friday-Monday Correlation"
- WRONG: "...use hourly data and at least 3-6 months..." ← never add this
### strategy
**Use this tool for ALL trading strategy requests without exception.**
The strategy subagent handles the complete strategy lifecycle: writing PandasStrategy classes, running backtests, interpreting results, and activating/deactivating paper trading.
**ALWAYS use strategy for:**
- "Create a strategy that buys when RSI < 30" write a new strategy
- "Edit my momentum strategy to use a tighter stop" modify existing strategy
- "Backtest my RSI strategy over the last year" run backtest
- "How did this strategy perform on BTC?" interpret results
- "Activate my strategy for paper trading" start paper trading
- "What strategies are running?" list active strategies
- "Stop my momentum strategy" deactivate a strategy
- Any question about a strategy's PnL, trades, or performance
**NEVER call `backtest_strategy`, `activate_strategy`, `deactivate_strategy`, or `list_active_strategies` directly** always go through the strategy tool.
**Custom indicators in strategies:**
When writing a new strategy, the strategy subagent will first check for existing custom indicators via `python_list(category="indicator")`. Prefer using custom indicators (via `ta.custom_*`) over computing signals inline this promotes reuse and gives users better visibility into strategy components. If a needed indicator doesn't exist yet, the strategy subagent will create it first via the indicator workflow.
### backtest_strategy ### backtest_strategy
**ALWAYS use this tool — and ONLY this tool — for any backtesting request.** *(Called internally by the strategy tool do not call this directly.)*
Runs a saved trading strategy against historical OHLC data using the Nautilus Trader backtesting engine. Runs a saved trading strategy against historical OHLC data using the Nautilus Trader backtesting engine.
Returns structured performance metrics and an equity curve. Any charts generated are automatically sent to the user. Returns structured performance metrics including trade list, Sortino/Calmar ratios, and equity curve.
**ALWAYS use backtest_strategy for:**
- "Backtest my RSI strategy over the last year"
- "How did this strategy perform on BTC?"
- "Run a backtest from January to June"
- Any request to test or evaluate a strategy on historical data
**NEVER use research for backtesting** — the research tool cannot run strategies through the backtesting engine.
After the tool returns, summarize the results clearly: total return, Sharpe ratio, max drawdown, win rate, and trade count. Present the equity curve description in plain language.
Parameters:
- strategy_name: Display name of the saved strategy (use python_list with category="strategy" to check existing strategies)
- feeds: Array of `{symbol, period_seconds}` feed objects (e.g. `[{"symbol": "BTC/USDT.BINANCE", "period_seconds": 3600}]`)
- from_time / to_time: Date strings ("2024-01-01", "90 days ago", "now") or Unix timestamps
- initial_capital: Starting balance in quote currency (default 10,000)
### list_active_strategies ### list_active_strategies
*(Called internally by the strategy tool do not call this directly.)*
Lists all currently active (live or paper) strategies and their status. Lists all currently active (live or paper) strategies and their status.
Use this when the user asks what strategies are running.
### python_list ### python_list
List existing scripts in a category ("strategy", "indicator", or "research"). List existing scripts in a category ("strategy", "indicator", or "research").
Use this before calling the research tool to check whether a relevant script already exists. Use this before calling the research tool to check whether a relevant script already exists.
If one does, pass its exact name to the research tool so the subagent updates it rather than creating a new one. If one does, pass its exact name to the research tool so the subagent updates it rather than creating a new one.
Also use before calling backtest_strategy to confirm the strategy name. The strategy tool uses this internally to check strategy names before backtesting.
### symbol-lookup ### symbol-lookup
Look up trading symbols and get metadata. Look up trading symbols and get metadata.
Use this when users mention tickers or need symbol information. Use this when users mention tickers or need symbol information.
**Always use symbol_lookup to resolve a proper ticker before passing it to the research or get-chart-data tools.** Symbols must be in `SYMBOL.EXCHANGE` format (e.g., `BTC/USDT.BINANCE`). If the user says "ETHUSDT", "ETH", or any ambiguous ticker, resolve it first with symbol_lookup so the correct formatted ticker is passed downstream.
### get-chart-data ### get-chart-data
**IMPORTANT: This is for QUICK, CASUAL information ONLY. This tool just returns raw data - it does NOT create charts or plots.** **IMPORTANT: This is for QUICK, CASUAL information ONLY. This tool just returns raw data - it does NOT create charts or plots.**

View File

@@ -7,6 +7,7 @@ import type { MCPClientConnector } from '../mcp-client.js';
import type { DynamicStructuredTool } from '@langchain/core/tools'; import type { DynamicStructuredTool } from '@langchain/core/tools';
import { readFile } from 'fs/promises'; import { readFile } from 'fs/promises';
import { join } from 'path'; import { join } from 'path';
import type { HarnessEvent, SubagentChunkEvent, SubagentThinkingEvent } from '../harness-events.js';
/** /**
* Subagent configuration (loaded from config.yaml) * Subagent configuration (loaded from config.yaml)
@@ -122,6 +123,65 @@ export abstract class BaseSubagent {
yield result; yield result;
} }
/**
* Extract subagent_chunk / subagent_thinking events from a LangGraph `messages` stream datum.
*
* LangGraph emits `[message_chunk, metadata]` tuples in `messages` mode. The message content
* can be a plain string (normal text token) or an array of content blocks (extended thinking
* responses with `{type:"thinking", thinking:"..."}` and `{type:"text", text:"..."}`).
*/
static extractStreamChunks(
data: unknown,
agentName: string,
): Array<SubagentChunkEvent | SubagentThinkingEvent> {
const msg = Array.isArray(data) ? (data as unknown[])[0] : data;
const content = (msg as any)?.content;
if (typeof content === 'string') {
return content ? [{ type: 'subagent_chunk', agentName, content }] : [];
}
if (Array.isArray(content)) {
const chunks: Array<SubagentChunkEvent | SubagentThinkingEvent> = [];
for (const block of content as any[]) {
if (block?.type === 'thinking' && typeof block.thinking === 'string' && block.thinking) {
chunks.push({ type: 'subagent_thinking', agentName, content: block.thinking });
} else if (block?.type === 'text' && typeof block.text === 'string' && block.text) {
chunks.push({ type: 'subagent_chunk', agentName, content: block.text });
}
}
return chunks;
}
return [];
}
/**
* Extract the final text from an `updates`-mode agent message.
* Handles both plain string content and array content blocks (extended thinking).
*/
static extractFinalText(msg: any): string {
if (typeof msg?.content === 'string') return msg.content;
if (Array.isArray(msg?.content)) {
return (msg.content as any[])
.filter((b: any) => b?.type === 'text' && typeof b.text === 'string')
.map((b: any) => b.text as string)
.join('');
}
return '';
}
/**
* Stream typed HarnessEvents during execution.
* Subclasses override this to emit subagent_chunk / subagent_tool_call events
* using agent.stream() from LangGraph. Default falls back to execute().
*/
async *streamEvents(
context: SubagentContext,
input: string,
_signal?: AbortSignal,
): AsyncGenerator<HarnessEvent, string> {
const result = await this.execute(context, input);
return result;
}
/** /**
* Build messages with system prompt and memory context * Build messages with system prompt and memory context
*/ */

View File

@@ -11,3 +11,8 @@ export {
createResearchSubagent, createResearchSubagent,
type ResearchResult, type ResearchResult,
} from './research/index.js'; } from './research/index.js';
export {
StrategySubagent,
createStrategySubagent,
} from './strategy/index.js';

View File

@@ -4,6 +4,7 @@ import { SystemMessage } from '@langchain/core/messages';
import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { createReactAgent } from '@langchain/langgraph/prebuilt';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { MCPClientConnector } from '../../mcp-client.js'; import type { MCPClientConnector } from '../../mcp-client.js';
import type { HarnessEvent } from '../../harness-events.js';
/** /**
* Indicator Subagent * Indicator Subagent
@@ -84,6 +85,56 @@ export class IndicatorSubagent extends BaseSubagent {
return finalText; return finalText;
} }
async *streamEvents(context: SubagentContext, instruction: string, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
this.logger.info({ subagent: this.getName() }, 'streamEvents starting');
if (!this.hasMCPClient()) {
throw new Error('MCP client not available for indicator subagent');
}
const initialMessages = this.buildMessages(context, instruction);
const systemMessage = initialMessages[0];
const humanMessage = initialMessages[initialMessages.length - 1];
const agent = createReactAgent({
llm: this.model,
tools: this.tools,
prompt: systemMessage as SystemMessage,
});
const stream = agent.stream(
{ messages: [humanMessage] },
{ streamMode: ['messages', 'updates'], recursionLimit: 25, signal }
);
let finalText = '';
for await (const [mode, data] of await stream) {
if (signal?.aborted) break;
if (mode === 'messages') {
for (const chunk of IndicatorSubagent.extractStreamChunks(data, this.config.name)) {
yield chunk;
}
} else if (mode === 'updates') {
if ((data as any).agent?.messages) {
for (const msg of (data as any).agent.messages as any[]) {
if (msg.tool_calls?.length) {
for (const tc of msg.tool_calls) {
yield { type: 'subagent_tool_call', agentName: this.config.name, toolName: tc.name, label: tc.name };
}
} else {
const content = IndicatorSubagent.extractFinalText(msg);
if (content) finalText = content;
}
}
}
}
}
this.logger.info({ textLength: finalText.length }, 'streamEvents finished');
return finalText;
}
} }
/** /**

View File

@@ -4,6 +4,7 @@ import { SystemMessage } from '@langchain/core/messages';
import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { createReactAgent } from '@langchain/langgraph/prebuilt';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { MCPClientConnector } from '../../mcp-client.js'; import type { MCPClientConnector } from '../../mcp-client.js';
import type { HarnessEvent } from '../../harness-events.js';
/** /**
* Result from research subagent execution * Result from research subagent execution
@@ -50,6 +51,58 @@ export class ResearchSubagent extends BaseSubagent {
this.imageCapture = capture; this.imageCapture = capture;
} }
/**
* Fetch custom indicators from the sandbox and return a formatted system prompt section.
* Returns empty string if there are no custom indicators or the call fails.
*/
private async fetchCustomIndicatorsSection(): Promise<string> {
try {
const raw = await this.callMCPTool('python_list', { category: 'indicator' });
const r = raw as any;
const text = r?.content?.[0]?.text ?? r?.[0]?.text;
const parsed = typeof text === 'string' ? JSON.parse(text) : raw;
const items: any[] = parsed?.items ?? [];
if (items.length === 0) return '';
const lines: string[] = ['\n\n## Custom Indicators\n'];
lines.push('The user has defined the following custom indicators. Use `ta.custom_<name>` where `<name>` is the lowercase sanitized function name shown below.\n');
for (const item of items) {
const displayName: string = item.name ?? 'unknown';
const description: string = item.description ?? '';
const meta: any = item.metadata ?? {};
// Derive the ta attribute name: sanitize display name to lowercase + underscores
const taAttr = `custom_${displayName.toLowerCase().replace(/[^\w]/g, '_').replace(/_+/g, '_').replace(/^_+|_+$/g, '')}`;
const inputSeries: string[] = meta.input_series ?? ['close'];
const params: Record<string, any> = meta.parameters ?? {};
const pane: string = meta.pane ?? 'separate';
const inputStr = inputSeries.map((s: string) => `df['${s}']`).join(', ');
const paramStr = Object.entries(params)
.map(([k, v]: [string, any]) => `${k}=${JSON.stringify(v?.default ?? null)}`)
.join(', ');
const callExample = paramStr
? `ta.${taAttr}(${inputStr}, ${paramStr})`
: `ta.${taAttr}(${inputStr})`;
const outputNames = (meta.output_columns ?? [{ name: 'value' }])
.map((c: any) => c.name)
.join(', ');
lines.push(`### ${displayName}`);
if (description) lines.push(description);
lines.push(`- **Call**: \`${callExample}\``);
lines.push(`- **Outputs**: ${outputNames} | **Pane**: ${pane}`);
lines.push('');
}
return lines.join('\n');
} catch (err) {
this.logger.warn({ err }, 'Failed to fetch custom indicators for prompt injection');
return '';
}
}
/** /**
* Execute research request using LangGraph's createReactAgent. * Execute research request using LangGraph's createReactAgent.
* This is the standard LangChain pattern for agents with tool access — * This is the standard LangChain pattern for agents with tool access —
@@ -79,11 +132,17 @@ export class ResearchSubagent extends BaseSubagent {
this.imageCapture.length = 0; this.imageCapture.length = 0;
this.lastImages = []; this.lastImages = [];
const customIndicatorsSection = await this.fetchCustomIndicatorsSection();
// Build system prompt (with memory context appended) // Build system prompt (with memory context appended)
const initialMessages = this.buildMessages(context, instruction); const initialMessages = this.buildMessages(context, instruction);
// buildMessages returns [SystemMessage, ...history, HumanMessage] // buildMessages returns [SystemMessage, ...history, HumanMessage]
// Extract system content for createReactAgent's prompt parameter // Extract system content for createReactAgent's prompt parameter
const systemMessage = initialMessages[0]; let systemMessage = initialMessages[0] as SystemMessage;
if (customIndicatorsSection) {
const base = typeof systemMessage.content === 'string' ? systemMessage.content : JSON.stringify(systemMessage.content);
systemMessage = new SystemMessage(base + customIndicatorsSection);
}
const humanMessage = initialMessages[initialMessages.length - 1]; const humanMessage = initialMessages[initialMessages.length - 1];
// createReactAgent is the standard LangChain/LangGraph pattern for tool-using agents. // createReactAgent is the standard LangChain/LangGraph pattern for tool-using agents.
@@ -91,12 +150,12 @@ export class ResearchSubagent extends BaseSubagent {
const agent = createReactAgent({ const agent = createReactAgent({
llm: this.model, llm: this.model,
tools: this.tools, tools: this.tools,
prompt: systemMessage as SystemMessage, prompt: systemMessage,
}); });
const result = await agent.invoke( const result = await agent.invoke(
{ messages: [humanMessage] }, { messages: [humanMessage] },
{ recursionLimit: 20 } { recursionLimit: 40 }
); );
// The final message in the graph output is the agent's last AIMessage // The final message in the graph output is the agent's last AIMessage
@@ -146,6 +205,109 @@ export class ResearchSubagent extends BaseSubagent {
return this.lastImages; return this.lastImages;
} }
/**
* Stream typed HarnessEvents using LangGraph's agent.stream().
* Emits subagent_tool_call when tools fire, subagent_chunk for the final AI response.
* Returns the final text string as the generator return value.
*/
async *streamEvents(context: SubagentContext, instruction: string, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
this.logger.info({ subagent: this.getName() }, 'streamEvents starting');
if (!this.hasMCPClient()) {
throw new Error('MCP client not available for research subagent');
}
this.imageCapture.length = 0;
this.lastImages = [];
// Emit immediately so the UI shows the subagent has started — LLM generation
// can take minutes with non-streaming models and nothing else reaches the UI until
// the first `updates` event fires (after the LLM finishes its first response).
yield { type: 'subagent_tool_call', agentName: this.config.name, toolName: 'Thinking...', label: 'Thinking...' };
const customIndicatorsSection = await this.fetchCustomIndicatorsSection();
const initialMessages = this.buildMessages(context, instruction);
let systemMessage = initialMessages[0] as SystemMessage;
if (customIndicatorsSection) {
const base = typeof systemMessage.content === 'string' ? systemMessage.content : JSON.stringify(systemMessage.content);
systemMessage = new SystemMessage(base + customIndicatorsSection);
}
const humanMessage = initialMessages[initialMessages.length - 1];
const agent = createReactAgent({
llm: this.model,
tools: this.tools,
prompt: systemMessage,
});
this.logger.debug(
{ toolCount: this.tools.length, toolNames: this.tools.map(t => t.name) },
'Research subagent: starting stream with tools'
);
const systemChars = typeof systemMessage.content === 'string'
? systemMessage.content.length
: JSON.stringify(systemMessage.content).length;
const humanChars = typeof humanMessage.content === 'string'
? humanMessage.content.length
: JSON.stringify(humanMessage.content).length;
this.logger.info(
{ systemChars, humanChars, approxInputKB: Math.round((systemChars + humanChars) / 1024) },
'Research subagent: input context size'
);
const stream = agent.stream(
{ messages: [humanMessage] },
{ streamMode: ['messages', 'updates'], recursionLimit: 40, signal }
);
let finalText = '';
let updateCount = 0;
for await (const [mode, data] of await stream) {
if (signal?.aborted) break;
if (mode === 'messages') {
// Real-time token streaming from the LLM — data is [BaseMessage, metadata]
for (const chunk of ResearchSubagent.extractStreamChunks(data, this.config.name)) {
yield chunk;
}
} else if (mode === 'updates') {
updateCount++;
const updateKeys = Object.keys(data as any);
this.logger.debug({ updateCount, updateKeys }, 'Research subagent: graph update');
// Agent node fired — yield tool call decisions before tools run
if ((data as any).agent?.messages) {
for (const msg of (data as any).agent.messages as any[]) {
if (msg.tool_calls?.length) {
for (const tc of msg.tool_calls) {
yield { type: 'subagent_tool_call', agentName: this.config.name, toolName: tc.name, label: tc.name };
}
} else {
// Capture final text for return value (already streamed via messages above)
const content = ResearchSubagent.extractFinalText(msg);
if (content) finalText = content;
}
}
}
}
}
this.lastImages = [...this.imageCapture];
if (!finalText) {
this.logger.warn(
{ imageCount: this.lastImages.length },
'Research subagent: model returned empty output'
);
} else {
this.logger.info(
{ textLength: finalText.length, imageCount: this.lastImages.length },
'streamEvents finished'
);
}
return finalText;
}
/** /**
* Stream research execution * Stream research execution
*/ */

View File

@@ -421,6 +421,7 @@ For research scripts, import and use get_api() to access the API:
""" """
import logging import logging
import threading
from typing import Optional from typing import Optional
from dexorder.api.api import API from dexorder.api.api import API
@@ -432,10 +433,13 @@ log = logging.getLogger(__name__)
# Global API instance - managed by main.py # Global API instance - managed by main.py
_global_api: Optional[API] = None _global_api: Optional[API] = None
# Thread-local API — used by harness threads so they don't overwrite the global
_thread_local = threading.local()
def get_api() -> API: def get_api() -> API:
""" """
Get the global API instance for accessing market data and charts. Get the API instance for accessing market data and charts.
Use this in research scripts to access the data and charting APIs. Use this in research scripts to access the data and charting APIs.
@@ -462,15 +466,27 @@ def get_api() -> API:
# Create chart # Create chart
fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT") fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT")
""" """
# Thread-local takes priority (set by harness threads)
api = getattr(_thread_local, 'api', None)
if api is not None:
return api
if _global_api is None: if _global_api is None:
raise RuntimeError("API not initialized") raise RuntimeError("API not initialized")
return _global_api return _global_api
def set_api(api: API) -> None: def set_api(api: API) -> None:
"""Set the global API instance. Internal use only.""" """Set the API instance.
global _global_api
_global_api = api When called from the main thread, sets the global API used by all threads.
When called from a non-main thread (e.g. harness threads), sets a thread-local
API so the global is not overwritten.
"""
if threading.current_thread() is threading.main_thread():
global _global_api
_global_api = api
else:
_thread_local.api = api
__all__ = ['API', 'ChartingAPI', 'DataAPI', 'get_api', 'set_api'] __all__ = ['API', 'ChartingAPI', 'DataAPI', 'get_api', 'set_api']

View File

@@ -28,11 +28,12 @@ from datetime import datetime
api = get_api() api = get_api()
# Method 1: Using Unix timestamps (seconds) # Method 1: Using Unix timestamps (seconds)
# 1609459200 = 2021-01-01, 1735689600 = 2025-01-01
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, # 1 hour candles period_seconds=3600, # 1 hour candles
start_time=1640000000, # Unix timestamp in seconds start_time=1609459200, # 2021-01-01
end_time=1640086400, end_time=1735689600, # 2025-01-01 (~4 years, ~35,000 bars)
extra_columns=["volume"] extra_columns=["volume"]
)) ))
@@ -40,8 +41,8 @@ df = asyncio.run(api.data.historical_ohlc(
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, period_seconds=3600,
start_time="2021-12-20", # Simple date string start_time="2021-01-01",
end_time="2021-12-21", end_time="2025-01-01", # ~4 years of 1h bars ≈ 35,000 bars
extra_columns=["volume"] extra_columns=["volume"]
)) ))
@@ -49,21 +50,24 @@ df = asyncio.run(api.data.historical_ohlc(
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, period_seconds=3600,
start_time="2021-12-20 00:00:00", start_time="2021-01-01 00:00:00",
end_time="2021-12-20 23:59:59", end_time="2025-01-01 00:00:00",
extra_columns=["volume"] extra_columns=["volume"]
)) ))
# Method 4: Using datetime objects # Method 4: Using datetime objects
from datetime import datetime, timedelta
end_time = datetime.now()
start_time = end_time - timedelta(days=4*365) # 4 years back
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, period_seconds=3600,
start_time=datetime(2021, 12, 20), start_time=start_time,
end_time=datetime(2021, 12, 21), end_time=end_time,
extra_columns=["volume"] extra_columns=["volume"]
)) ))
print(f"Loaded {len(df)} candles") print(f"Loaded {len(df)} candles from {df.index[0]} to {df.index[-1]}")
print(df.head()) print(df.head())
``` ```
@@ -94,8 +98,8 @@ api = get_api()
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, period_seconds=3600,
start_time="2021-12-20", start_time="2021-01-01",
end_time="2021-12-21", end_time="2025-01-01", # ~4 years of 1h bars
extra_columns=["volume"] extra_columns=["volume"]
)) ))
@@ -125,8 +129,8 @@ api = get_api()
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, period_seconds=3600,
start_time="2021-12-20", start_time="2021-01-01",
end_time="2021-12-21" end_time="2025-01-01"
)) ))
# Calculate indicators using pandas-ta # Calculate indicators using pandas-ta
@@ -190,14 +194,19 @@ import pandas_ta as ta
# Get API instance # Get API instance
api = get_api() api = get_api()
# Fetch historical data using date strings (easiest for research) # Fetch historical data — use max history for research (target 100k-200k bars)
from datetime import datetime, timedelta
end_time = datetime.now()
start_time = end_time - timedelta(days=3*365) # 3 years of 1h bars ≈ 26,000 bars
df = asyncio.run(api.data.historical_ohlc( df = asyncio.run(api.data.historical_ohlc(
ticker="BTC/USDT.BINANCE", ticker="BTC/USDT.BINANCE",
period_seconds=3600, # 1 hour period_seconds=3600, # 1 hour
start_time="2021-12-20", start_time=start_time,
end_time="2021-12-21", end_time=end_time,
extra_columns=["volume"] extra_columns=["volume"]
)) ))
print(f"[Data] {len(df)} bars | {df.index[0]}{df.index[-1]} | period=3600s")
# Add moving averages using pandas-ta # Add moving averages using pandas-ta
df['sma_20'] = ta.sma(df['close'], length=20) df['sma_20'] = ta.sma(df['close'], length=20)
@@ -218,7 +227,7 @@ ax.plot(range(len(df)), df['ema_50'], label="EMA 50", color="red", linewidth=1.5
ax.legend() ax.legend()
# Print summary statistics # Print summary statistics
print(f"Period: {len(df)} candles") print(f"[Data] {len(df)} bars | {df.index[0]}{df.index[-1]} | period=3600s")
print(f"High: {df['high'].max()}") print(f"High: {df['high'].max()}")
print(f"Low: {df['low'].min()}") print(f"Low: {df['low'].min()}")
print(f"Mean Volume: {df['volume'].mean():.2f}") print(f"Mean Volume: {df['volume'].mean():.2f}")

View File

@@ -10,6 +10,33 @@ Create Python scripts that:
- Generate professional charts using matplotlib via the ChartingAPI - Generate professional charts using matplotlib via the ChartingAPI
- All matplotlib figures are automatically captured and sent to the user as images - All matplotlib figures are automatically captured and sent to the user as images
## Data Selection: Resolution and Time Window
> **Rule**: Every research script must fetch the maximum useful history — target 100,000200,000 bars, hard cap at 5 years. **Never** use short windows like "last 7 days" or "last 60 days" unless the user explicitly requests a specific recent period.
Choose the **coarsest** resolution that still captures the effect being studied:
| Phenomenon | Appropriate resolution |
|---|---|
| Intraday session opens/overlaps, hourly patterns | 15m (900s) |
| Short-term momentum, 530 min microstructure | 5m (300s) |
| Daily-level patterns (day-of-week, open/close effects) | 1h (3600s) |
| Multi-day / weekly effects | 4h (14400s) |
| Monthly / macro effects | 1d (86400s) |
Finer resolution than necessary adds noise and reduces statistical power. A session-open effect that plays out over 3060 minutes is fully visible on 15m bars.
Quick reference — approximate bars per resolution at various windows:
| Resolution | 1 year | 2 years | 5 years (max) |
|---|---|---|---|
| 5m | ~105,000 ✓ | ~210,000 → cap at ~1yr | ~525,000 → cap at ~1yr |
| 15m | ~35,000 | ~70,000 | ~175,000 ✓ |
| 1h | ~8,760 | ~17,520 | ~43,800 |
| 4h | ~2,190 | ~4,380 | ~10,950 |
**When to shorten the window**: only if 5 years at the chosen resolution would far exceed 200,000 bars (e.g., 5m over 5 years ≈ 525k → shorten to ~2 years). Otherwise always use the full 5 years.
## Available Tools ## Available Tools
You have direct access to these MCP tools: You have direct access to these MCP tools:
@@ -17,13 +44,15 @@ You have direct access to these MCP tools:
- **python_write**: Create a new script (research, strategy, or indicator category) - **python_write**: Create a new script (research, strategy, or indicator category)
- Required: category, name, description, code - Required: category, name, description, code
- Optional: metadata (category-specific fields — see below) - Optional: metadata (category-specific fields — see below)
- For research: automatically executes the script after writing - **For research**: fully executes the script and returns all output (stdout, stderr) and captured chart images. The response IS the execution result — **do not call `execute_research` afterward**.
- Returns validation results and execution output (text + images) - **For indicator/strategy**: runs against synthetic test data to catch compile/runtime errors; no chart images are generated.
- Returns validation results and execution output (text + images for research)
- **python_edit**: Update an existing script - **python_edit**: Update an existing script
- Required: category, name - Required: category, name
- Optional: code, description, metadata - Optional: code, description, metadata
- For research: automatically re-executes if code is updated - **For research**: re-executes the script when code is changed and returns all output and images. **Do not call `execute_research` afterward**.
- **For indicator/strategy**: re-runs the validation test only.
- Returns validation results and execution output - Returns validation results and execution output
- **python_read**: Read an existing research script - **python_read**: Read an existing research script
@@ -32,8 +61,9 @@ You have direct access to these MCP tools:
- **python_list**: List all research scripts - **python_list**: List all research scripts
- Returns: array of {name, description, metadata} - Returns: array of {name, description, metadata}
- **execute_research**: Manually run a research script - **execute_research**: Run a research script that already exists on disk
- Note: Usually not needed since write/edit auto-execute - Use this **only** when the user explicitly asks to re-run a script, or to run a script that was written in a previous session and already exists
- **Do not call this after `python_write` or `python_edit`** — those tools already executed the script and returned its output
- Returns: text output and images - Returns: text output and images
## Research Script API ## Research Script API
@@ -55,180 +85,8 @@ See your knowledge base for complete API documentation, examples, and the full p
## Technical Indicators — pandas-ta ## Technical Indicators — pandas-ta
The sandbox environment uses **pandas-ta** as the standard indicator library. Always use it for technical indicator calculations; do not write manual rolling/ewm implementations. Use `import pandas_ta as ta` for all indicator calculations. Never write manual rolling/ewm implementations. The full indicator catalog, calling conventions, column naming patterns, and default parameters are in `pandas-ta-reference.md` in your knowledge base.
```python
import pandas_ta as ta
```
### Calling Convention
pandas-ta functions accept a Series (or OHLCV columns) plus keyword parameters that match pandas-ta's documented argument names:
```python
# Single-series indicator
rsi = ta.rsi(df['close'], length=14) # returns Series
# OHLCV indicator
atr = ta.atr(df['high'], df['low'], df['close'], length=14)
# Multi-output indicator (returns DataFrame)
macd_df = ta.macd(df['close'], fast=12, slow=26, signal=9)
# columns: MACD_12_26_9, MACDh_12_26_9, MACDs_12_26_9
bbands_df = ta.bbands(df['close'], length=20, std=2.0)
# columns: BBL_20_2.0, BBM_20_2.0, BBU_20_2.0, BBB_20_2.0, BBP_20_2.0
```
### Available Indicators (canonical list)
These match the indicators supported by the TradingView web client. Use the pandas-ta function name shown here (lowercase):
**Overlap / Moving Averages** — plotted on the price pane
| Function | Description |
|----------|-------------|
| `sma` | Simple Moving Average — plain arithmetic mean over `length` periods |
| `ema` | Exponential Moving Average — more weight on recent prices |
| `wma` | Weighted Moving Average — linearly increasing weights |
| `dema` | Double EMA — two layers of EMA to reduce lag |
| `tema` | Triple EMA — three layers of EMA, even less lag than DEMA |
| `trima` | Triangular MA — double-smoothed SMA, very smooth |
| `kama` | Kaufman Adaptive MA — adapts speed to market noise/trending conditions |
| `t3` | T3 Moving Average — Tillson's smooth, low-lag MA using six EMAs |
| `hma` | Hull MA — very low-lag MA using WMAs |
| `alma` | Arnaud Legoux MA — Gaussian-weighted MA with reduced lag and noise |
| `midpoint` | Midpoint of close over `length` periods: (highest + lowest) / 2 |
| `midprice` | Midpoint of high/low over `length` periods |
| `supertrend` | Trend-following band (ATR-based) that flips above/below price |
| `ichimoku` | Ichimoku Cloud — multi-line Japanese trend/support/resistance system |
| `vwap` | Volume-Weighted Average Price — average price weighted by volume, resets on `anchor` |
| `vwma` | Volume-Weighted MA — like SMA but candles weighted by volume |
| `bbands` | Bollinger Bands — SMA ± N standard deviations; returns upper, mid, lower bands |
**Momentum** — typically plotted in a separate pane
| Function | Description |
|----------|-------------|
| `rsi` | Relative Strength Index — 0100 oscillator measuring speed of price changes |
| `macd` | MACD — difference of two EMAs plus signal line and histogram |
| `stoch` | Stochastic Oscillator — %K/%D, measures close vs recent high/low range |
| `stochrsi` | Stochastic RSI — applies stochastic formula to RSI values |
| `cci` | Commodity Channel Index — deviation of price from its statistical mean |
| `willr` | Williams %R — inverse stochastic, 100 to 0 oscillator |
| `mom` | Momentum — raw price change over `length` periods |
| `roc` | Rate of Change — percentage price change over `length` periods |
| `trix` | TRIX — 1-period % change of a triple-smoothed EMA |
| `cmo` | Chande Momentum Oscillator — ratio of up/down momentum, 100 to 100 |
| `adx` | Average Directional Index — strength of trend (0100, direction-agnostic) |
| `aroon` | Aroon — measures how recently the highest/lowest price occurred; returns Up, Down, Oscillator |
| `ao` | Awesome Oscillator — difference of 5- and 34-period simple MAs of midprice |
| `bop` | Balance of Power — measures buying vs selling pressure: (closeopen)/(highlow) |
| `uo` | Ultimate Oscillator — weighted combo of three period (fast/medium/slow) buying pressure ratios |
| `apo` | Absolute Price Oscillator — difference between two EMAs (like MACD without signal line) |
| `mfi` | Money Flow Index — RSI-like oscillator using price × volume |
| `coppock` | Coppock Curve — long-term momentum oscillator based on rate-of-change |
| `dpo` | Detrended Price Oscillator — removes trend to show cycle oscillations |
| `fisher` | Fisher Transform — converts price into a Gaussian normal distribution |
| `rvgi` | Relative Vigor Index — compares closeopen to highlow to measure trend vigor |
| `kst` | Know Sure Thing — momentum oscillator from four ROC periods, smoothed |
**Volatility** — plotted on price pane or separate
| Function | Description |
|----------|-------------|
| `atr` | Average True Range — average of true range (greatest of HL, HprevC, LprevC) |
| `kc` | Keltner Channels — EMA ± N × ATR bands around price |
| `donchian` | Donchian Channels — highest high / lowest low over `length` periods |
**Volume** — plotted in separate pane
| Function | Description |
|----------|-------------|
| `obv` | On Balance Volume — cumulative volume, added on up days, subtracted on down days |
| `ad` | Accumulation/Distribution — running total of the money flow multiplier × volume |
| `adosc` | Chaikin Oscillator — EMA difference of the A/D line |
| `cmf` | Chaikin Money Flow — sum of (money flow volume) / sum of volume over `length` |
| `eom` | Ease of Movement — relates price change to volume; high = price moves easily |
| `efi` | Elder's Force Index — combines price change direction with volume magnitude |
| `kvo` | Klinger Volume Oscillator — EMA difference of volume force |
| `pvt` | Price Volume Trend — cumulative: volume × percentage price change |
**Statistics / Price Transforms**
| Function | Description |
|----------|-------------|
| `stdev` | Standard Deviation of close over `length` periods |
| `linreg` | Linear Regression Curve — least-squares line endpoint value over `length` periods |
| `slope` | Linear Regression Slope — gradient of the regression line |
| `hl2` | Median Price — (high + low) / 2 |
| `hlc3` | Typical Price — (high + low + close) / 3 |
| `ohlc4` | Average Price — (open + high + low + close) / 4 |
**Trend**
| Function | Description |
|----------|-------------|
| `psar` | Parabolic SAR — trailing stop-and-reverse dots that follow price |
| `vortex` | Vortex Indicator — VI+ / VI lines measuring upward vs downward trend movement |
| `chop` | Choppiness Index — 0100, high = choppy/sideways, low = strong trend |
### Default Parameters
Key defaults to keep in mind:
- Most period/length indicators: `length=14` (use `length=` not `timeperiod=`)
- `bbands`: `length=20, std=2.0` (note: single `std`, not separate upper/lower)
- `macd`: `fast=12, slow=26, signal=9`
- `stoch`: `k=14, d=3, smooth_k=3`
- `psar`: `af0=0.02, af=0.02, max_af=0.2`
- `vwap`: `anchor='D'` (requires DatetimeIndex)
- `ichimoku`: `tenkan=9, kijun=26, senkou=52`
For multi-output indicator column extraction patterns and complete charting examples, fetch `pandas-ta-reference.md` from your knowledge base.
## Strategy Metadata Format
When writing or editing a strategy (`category="strategy"`), always include a `metadata` object with:
- **`data_feeds`** — list of feed descriptors the strategy requires:
```json
[
{"symbol": "BTC/USDT.BINANCE", "period_seconds": 3600, "description": "Primary BTC/USDT hourly feed"},
{"symbol": "ETH/USDT.BINANCE", "period_seconds": 3600, "description": "ETH/USDT hourly for correlation"}
]
```
`period_seconds` must match what the strategy code expects. Use the same values when calling `backtest_strategy`.
- **`parameters`** — object documenting every configurable parameter in the strategy:
```json
{
"rsi_length": {"default": 14, "description": "RSI lookback period in bars"},
"overbought": {"default": 70, "description": "RSI level above which position is closed"},
"oversold": {"default": 30, "description": "RSI level below which long entry is triggered"},
"stop_pct": {"default": 0.02, "description": "Stop-loss as a fraction of entry price (e.g. 0.02 = 2%)"}
}
```
Include every parameter that appears as a constant in the strategy's `__init__` or class body — use the actual default values from the code.
Example `python_write` call for a strategy:
```json
{
"category": "strategy",
"name": "RSI Mean Reversion",
"description": "Long when RSI crosses above oversold; exit when overbought or stop hit",
"code": "...",
"metadata": {
"data_feeds": [
{"symbol": "BTC/USDT.BINANCE", "period_seconds": 3600, "description": "BTC/USDT hourly OHLCV + order flow"}
],
"parameters": {
"rsi_length": {"default": 14, "description": "RSI lookback period"},
"overbought": {"default": 70, "description": "Exit long above this RSI level"},
"oversold": {"default": 30, "description": "Enter long below this RSI level"}
}
}
}
```
## Coding Loop Pattern ## Coding Loop Pattern
@@ -244,11 +102,11 @@ When a user requests analysis:
- Use appropriate ticker symbols, time ranges, and periods - Use appropriate ticker symbols, time ranges, and periods
- The script will auto-execute after writing - The script will auto-execute after writing
4. **Check execution results**: The tool returns: 4. **Check execution results**: The tool returns the execution result directly — this is the script's actual output:
- `validation.success`: Whether script ran without errors - `success`: Whether the script ran without errors
- `validation.output`: Any stdout/stderr text output - Text output from stdout/stderr is visible to you
- `execution.content`: Array of text and image results - Chart images are captured and sent to the user (you cannot see them)
- Note: Images are NOT included in your context - only text output is visible to you - **Do NOT call `execute_research` after this step** — the script has already run and the results are in the response above
5. **Iterate if needed**: If there are errors: 5. **Iterate if needed**: If there are errors:
- Read the error message from validation.output or execution text - Read the error message from validation.output or execution text
@@ -259,8 +117,28 @@ When a user requests analysis:
- The user will receive both your text response AND the chart images - The user will receive both your text response AND the chart images
- Don't try to describe the images in detail - the user can see them - Don't try to describe the images in detail - the user can see them
## Ticker Format
All tickers passed to `api.data.historical_ohlc()` and other data methods **must** use the `SYMBOL.EXCHANGE` format, e.g.:
- `BTC/USDT.BINANCE`
- `ETH/USDT.BINANCE`
- `SOL/USDT.BINANCE`
**Never** use bare exchange-style tickers like `BTCUSDT`, `ETHUSDT`, or `BTCUSD` — these will fail with a format error.
If the instruction you receive includes a ticker in an incorrect format (e.g., `ETHUSDT`), convert it to the proper format (`ETH/USDT.BINANCE`) before writing the script. When in doubt about which exchange to use, default to `BINANCE`.
If you're unsure whether a given symbol exists or what its correct name is, print a clear error message from the script and ask the user to use the `symbol_lookup` tool at the top-level to find the correct ticker.
## Important Guidelines ## Important Guidelines
- **Always print data stats after fetching**: Immediately after every `historical_ohlc` call, print the bar count and date range so it appears in the output:
```python
print(f"[Data] {len(df)} bars | {df.index[0]} → {df.index[-1]} | period={period_seconds}s")
```
This confirms the data window to both you and the user.
- **Images are pass-through only**: Chart images go directly to the user. You only see text output (print statements, errors). Don't try to analyze or describe images you can't see. - **Images are pass-through only**: Chart images go directly to the user. You only see text output (print statements, errors). Don't try to analyze or describe images you can't see.
- **Async data fetching**: All `api.data` methods are async. Always use `asyncio.run()`: - **Async data fetching**: All `api.data` methods are async. Always use `asyncio.run()`:
@@ -268,15 +146,6 @@ When a user requests analysis:
df = asyncio.run(api.data.historical_ohlc(...)) df = asyncio.run(api.data.historical_ohlc(...))
``` ```
- **Charting is sync**: All `api.charting` methods are synchronous:
```python
fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT")
```
- **Automatic figure capture**: All matplotlib figures are automatically captured. Don't save manually.
- **Print for debugging**: Use `print()` statements for debugging - you'll see this output.
- **Package management**: If script needs packages beyond base environment (pandas, numpy, matplotlib): - **Package management**: If script needs packages beyond base environment (pandas, numpy, matplotlib):
- Add `conda_packages: ["package-name"]` to metadata - Add `conda_packages: ["package-name"]` to metadata
- Packages are auto-installed during validation - Packages are auto-installed during validation
@@ -287,16 +156,18 @@ When a user requests analysis:
## Example Workflow ## Example Workflow
User: "Show me BTC price action for the last 7 days with volume" User: "Show me BTC/ETH price correlation over time"
You: You:
1. Call `python_write` with: 1. Identify timescale: daily return correlation → 1h bars are sufficient
- name: "BTC 7-Day Price Action" 2. Compute window: 1h bars × 5 years ≈ 43,800 bars (under 100k, but 5yr is the hard max — use it)
- description: "BTC/USDT price and volume analysis for the last 7 days" 3. Call `python_write` with:
- code: (Python script that fetches data and creates chart) - name: "BTC ETH Price Correlation"
2. Check execution results - description: "Rolling correlation of BTC/USDT and ETH/USDT daily returns using 5 years of 1h data"
3. If successful, respond: "I've created a 7-day BTC price chart with volume analysis. The chart shows [brief summary of what the script does]." - code: (Python script fetching 5yr of 1h OHLC for both tickers and plotting rolling correlation)
4. User receives: Your text response + the actual chart image 4. Check execution results
5. If successful, respond with a brief summary of what the script does
6. User receives: Your text response + the chart image
## Response Format ## Response Format

View File

@@ -0,0 +1,37 @@
name: strategy
description: Writes and manages PandasStrategy classes, runs backtests, and manages strategy activation
# Model configuration
model: claude-sonnet-4-6
temperature: 0.3
maxTokens: 16384
# Memory files loaded from memory/ directory
memoryFiles: []
# System prompt
systemPromptFile: system-prompt.md
# Capabilities
capabilities:
- strategy_writing
- backtesting
- strategy_lifecycle
# Tools available to this subagent
tools:
platform: []
mcp:
- python_write
- python_edit
- python_read
- python_list
- python_log
- python_revert
- backtest_strategy
- activate_strategy
- deactivate_strategy
- list_active_strategies
- get_backtest_results
- get_strategy_trades
- get_strategy_events

View File

@@ -0,0 +1,159 @@
import { BaseSubagent, type SubagentConfig, type SubagentContext } from '../base-subagent.js';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { SystemMessage } from '@langchain/core/messages';
import { createReactAgent } from '@langchain/langgraph/prebuilt';
import type { FastifyBaseLogger } from 'fastify';
import type { MCPClientConnector } from '../../mcp-client.js';
import type { HarnessEvent } from '../../harness-events.js';
/**
* Strategy Subagent
*
* Specialized agent for writing PandasStrategy classes, running backtests,
* and managing strategy activation/deactivation.
*
* Mirrors the pattern of IndicatorSubagent in indicator/index.ts.
*/
export class StrategySubagent extends BaseSubagent {
constructor(
config: SubagentConfig,
model: BaseChatModel,
logger: FastifyBaseLogger,
mcpClient?: MCPClientConnector,
tools?: any[]
) {
super(config, model, logger, mcpClient, tools);
}
/**
* Execute a strategy request using LangGraph's createReactAgent.
*/
async execute(context: SubagentContext, instruction: string): Promise<string> {
this.logger.info(
{
subagent: this.getName(),
userId: context.userContext.userId,
instruction: instruction.substring(0, 200),
toolCount: this.tools.length,
toolNames: this.tools.map(t => t.name),
},
'Strategy subagent starting'
);
if (!this.hasMCPClient()) {
throw new Error('MCP client not available for strategy subagent');
}
if (this.tools.length === 0) {
this.logger.warn('Strategy subagent has no tools');
}
const initialMessages = this.buildMessages(context, instruction);
const systemMessage = initialMessages[0];
const humanMessage = initialMessages[initialMessages.length - 1];
const agent = createReactAgent({
llm: this.model,
tools: this.tools,
prompt: systemMessage as SystemMessage,
});
const result = await agent.invoke(
{ messages: [humanMessage] },
{ recursionLimit: 30 }
);
const allMessages: any[] = result.messages ?? [];
this.logger.info(
{ messageCount: allMessages.length },
'Strategy subagent graph completed'
);
const lastAI = [...allMessages].reverse().find(
(m: any) => m.constructor?.name === 'AIMessage' || m._getType?.() === 'ai'
);
const finalText = lastAI
? (typeof lastAI.content === 'string' ? lastAI.content : JSON.stringify(lastAI.content))
: 'Strategy task completed.';
this.logger.info({ textLength: finalText.length }, 'Strategy subagent finished');
return finalText;
}
async *streamEvents(context: SubagentContext, instruction: string, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
this.logger.info({ subagent: this.getName() }, 'streamEvents starting');
if (!this.hasMCPClient()) {
throw new Error('MCP client not available for strategy subagent');
}
const initialMessages = this.buildMessages(context, instruction);
const systemMessage = initialMessages[0];
const humanMessage = initialMessages[initialMessages.length - 1];
const agent = createReactAgent({
llm: this.model,
tools: this.tools,
prompt: systemMessage as SystemMessage,
});
const stream = agent.stream(
{ messages: [humanMessage] },
{ streamMode: ['messages', 'updates'], recursionLimit: 30, signal }
);
let finalText = '';
for await (const [mode, data] of await stream) {
if (signal?.aborted) break;
if (mode === 'messages') {
for (const chunk of StrategySubagent.extractStreamChunks(data, this.config.name)) {
yield chunk;
}
} else if (mode === 'updates') {
if ((data as any).agent?.messages) {
for (const msg of (data as any).agent.messages as any[]) {
if (msg.tool_calls?.length) {
for (const tc of msg.tool_calls) {
yield { type: 'subagent_tool_call', agentName: this.config.name, toolName: tc.name, label: tc.name };
}
} else {
const content = StrategySubagent.extractFinalText(msg);
if (content) finalText = content;
}
}
}
}
}
this.logger.info({ textLength: finalText.length }, 'streamEvents finished');
return finalText;
}
}
/**
* Factory function to create and initialize StrategySubagent
*/
export async function createStrategySubagent(
model: BaseChatModel,
logger: FastifyBaseLogger,
basePath: string,
mcpClient?: MCPClientConnector,
tools?: any[]
): Promise<StrategySubagent> {
const { readFile } = await import('fs/promises');
const { join } = await import('path');
const yaml = await import('js-yaml');
const configPath = join(basePath, 'config.yaml');
const configContent = await readFile(configPath, 'utf-8');
const config = yaml.load(configContent) as SubagentConfig;
const subagent = new StrategySubagent(config, model, logger, mcpClient, tools);
await subagent.initialize(basePath);
return subagent;
}

View File

@@ -0,0 +1,357 @@
# Strategy Subagent
You are a specialized assistant for writing, testing, and managing trading strategies on the Dexorder platform. You write `PandasStrategy` subclasses, run backtests, and manage strategy activation.
---
## Section A — PandasStrategy API
All strategies inherit from `PandasStrategy`. Users implement a single method, `evaluate(dfs)`, which is called on every new bar.
### Class structure
```python
from dexorder.nautilus.pandas_strategy import PandasStrategy, PandasStrategyConfig
class MyStrategy(PandasStrategy):
def evaluate(self, dfs: dict[str, pd.DataFrame]) -> None:
"""
Called after every new bar across all feeds.
Args:
dfs: dict mapping feed_key → pd.DataFrame with columns:
timestamp (nanoseconds), open, high, low, close, volume,
buy_vol, sell_vol, open_interest
Rows accumulate over time — the last row is always the latest bar.
"""
df = dfs.get("BTC/USDT.BINANCE:300")
if df is None or len(df) < 20:
return # Not enough data yet
close = df["close"]
# ... compute signals ...
if buy_signal:
self.buy(quantity=0.1)
elif sell_signal:
self.sell(quantity=0.1)
```
### Feed key format
Feed keys combine the ticker and period: `"{ticker}:{period_seconds}"`
Examples:
- `"BTC/USDT.BINANCE:300"` — BTC/USDT on Binance, 5-minute bars
- `"BTC/USDT.BINANCE:900"` — BTC/USDT on Binance, 15-minute bars
- `"BTC/USDT.BINANCE:3600"` — BTC/USDT on Binance, 1-hour bars
- `"ETH/USDT.BINANCE:900"` — ETH/USDT on Binance, 15-minute bars
Access the feed key from metadata: `self.config.feed_keys` is a tuple of all feed keys.
### Order API
```python
self.buy(quantity: float, feed_key: str = None)
self.sell(quantity: float, feed_key: str = None)
self.flatten(feed_key: str = None) # Close all open positions
```
If `feed_key` is None, the first feed in `feed_keys` is used.
`quantity` is in base currency units (e.g. 0.1 BTC). Use `self.config.initial_capital` to size appropriately.
### Configuration available inside evaluate()
```python
self.config.feed_keys # tuple of feed key strings
self.config.initial_capital # starting capital in quote currency
```
### DataFrame columns
| Column | Type | Description |
|--------|------|-------------|
| `timestamp` | int64 (ns) | Bar open time in nanoseconds |
| `open` | float | Open price |
| `high` | float | High price |
| `low` | float | Low price |
| `close` | float | Close price |
| `volume` | float | Total volume |
| `buy_vol` | float | Buy-side volume (taker buys) |
| `sell_vol` | float | Sell-side volume (taker sells) |
| `open_interest` | float | Open interest (futures only; NaN for spot) |
---
## Section B — Strategy Metadata
When writing a strategy with `python_write(category="strategy", ...)`, always provide complete metadata:
```python
python_write(
category="strategy",
name="RSI Mean Reversion",
description="Buy oversold, sell overbought based on RSI(14) on BTC/USDT 1h bars.",
code="""...""",
metadata={
"data_feeds": [
{"symbol": "BTC/USDT.BINANCE", "period_seconds": 300, "description": "Primary BTC/USDT 5m feed"}
],
"parameters": {
"rsi_length": {"default": 14, "description": "RSI lookback period"},
"oversold": {"default": 30, "description": "RSI oversold threshold"},
"overbought": {"default": 70, "description": "RSI overbought threshold"},
"trade_qty": {"default": 0.01, "description": "Trade quantity in BTC"}
},
"conda_packages": []
}
)
```
### Metadata fields
| Field | Required | Description |
|-------|----------|-------------|
| `data_feeds` | yes | List of `{symbol, period_seconds, description}` — one per feed the strategy needs |
| `parameters` | yes | Dict of `{param_name: {default, description}}` for user-configurable values |
| `conda_packages` | no | Extra Python packages to install |
---
## Section C — Custom Indicators in Strategies
**Prefer using custom indicators defined in the `indicator` category rather than computing signals inline.**
Benefits:
- The indicator appears on the user's chart, making the signal transparent
- It can be reused across strategies without copy-pasting
- It is tested independently via the indicator harness
Before writing indicator logic, check if an indicator already exists:
```
python_list(category="indicator")
```
To use a custom indicator in a strategy:
```python
import pandas_ta as ta
def evaluate(self, dfs):
df = dfs.get("BTC/USDT.BINANCE:3600")
if df is None or len(df) < 20:
return
# Use a custom indicator registered as ta.custom_vw_rsi
vw_rsi = ta.custom_vw_rsi(df["close"], df["volume"], length=14)
if vw_rsi.iloc[-1] < 30:
self.buy(0.01)
elif vw_rsi.iloc[-1] > 70:
self.sell(0.01)
```
Custom indicator names follow the pattern `ta.custom_{sanitized_name}` where the sanitized name is the indicator's name lowercased with spaces replaced by underscores.
**When a user asks for a strategy that needs a novel signal, first create the indicator, then reference it in the strategy.**
---
## Section D — Complete Strategy Examples
### Example 1: RSI Mean Reversion (simple, single feed)
```python
import pandas as pd
import pandas_ta as ta
class RSIMeanReversion(PandasStrategy):
def evaluate(self, dfs: dict[str, pd.DataFrame]) -> None:
df = dfs.get("BTC/USDT.BINANCE:300")
if df is None or len(df) < 30:
return
rsi = ta.rsi(df["close"], length=14)
if rsi is None or rsi.isna().all():
return
last_rsi = rsi.iloc[-1]
trade_qty = 0.001 * self.config.initial_capital / df["close"].iloc[-1]
if last_rsi < 30:
self.buy(trade_qty)
elif last_rsi > 70:
self.sell(trade_qty)
```
Metadata:
```python
{
"data_feeds": [{"symbol": "BTC/USDT.BINANCE", "period_seconds": 300, "description": "BTC/USDT 5m"}],
"parameters": {
"rsi_length": {"default": 14, "description": "RSI period"},
"oversold": {"default": 30, "description": "Buy threshold"},
"overbought": {"default": 70, "description": "Sell threshold"}
},
"conda_packages": []
}
```
### Example 2: MACD Momentum (multi-feed dual timeframe)
```python
import pandas as pd
import pandas_ta as ta
class MACDMomentum(PandasStrategy):
def evaluate(self, dfs: dict[str, pd.DataFrame]) -> None:
df_15m = dfs.get("BTC/USDT.BINANCE:900")
df_4h = dfs.get("BTC/USDT.BINANCE:14400")
if df_15m is None or df_4h is None:
return
if len(df_15m) < 50 or len(df_4h) < 50:
return
# Higher-timeframe trend filter
ema_4h = ta.ema(df_4h["close"], length=20)
bullish_trend = df_4h["close"].iloc[-1] > ema_4h.iloc[-1]
# Entry signal on 15m
macd_df = ta.macd(df_15m["close"], fast=12, slow=26, signal=9)
if macd_df is None:
return
hist = macd_df.iloc[:, 2] # histogram
trade_qty = 0.002 * self.config.initial_capital / df_15m["close"].iloc[-1]
if bullish_trend and hist.iloc[-1] > 0 and hist.iloc[-2] <= 0:
self.buy(trade_qty, feed_key="BTC/USDT.BINANCE:900")
elif hist.iloc[-1] < 0 and hist.iloc[-2] >= 0:
self.flatten()
```
Metadata:
```python
{
"data_feeds": [
{"symbol": "BTC/USDT.BINANCE", "period_seconds": 900, "description": "BTC/USDT 15m entry"},
{"symbol": "BTC/USDT.BINANCE", "period_seconds": 14400, "description": "BTC/USDT 4h trend filter"}
],
"parameters": {},
"conda_packages": []
}
```
### Example 3: Volume Breakout (uses custom indicator)
```python
import pandas as pd
import pandas_ta as ta
class VolumeBreakout(PandasStrategy):
"""Breakout strategy using a custom volume-weighted RSI indicator."""
def evaluate(self, dfs: dict[str, pd.DataFrame]) -> None:
df = dfs.get("ETH/USDT.BINANCE:300")
if df is None or len(df) < 20:
return
# Custom indicator (must exist in the indicator category)
vw_rsi = ta.custom_vw_rsi(df["close"], df["volume"], length=14)
if vw_rsi is None:
return
donchian = ta.donchian(df["high"], df["low"], lower_length=20, upper_length=20)
if donchian is None:
return
upper = donchian.iloc[:, 0]
close = df["close"]
qty = 0.01 * self.config.initial_capital / close.iloc[-1]
if close.iloc[-1] > upper.iloc[-2] and vw_rsi.iloc[-1] > 60:
self.buy(qty)
elif close.iloc[-1] < donchian.iloc[:, 1].iloc[-1]:
self.flatten()
```
---
## Section E — Workflow
### Writing and validating a strategy
1. **Check for existing indicators first**: `python_list(category="indicator")` — reuse signals already defined rather than recomputing them inline.
2. **Write the strategy**:
```
python_write(category="strategy", name="...", description="...", code="...", metadata={...})
```
After writing, the system automatically runs the strategy against synthetic data. If validation fails, fix the reported error before proceeding.
3. **Run a backtest** — choose the window to target 100k200k bars at the strategy's resolution (max 5 years):
```
backtest_strategy(
strategy_name="RSI Mean Reversion",
feeds=[{"symbol": "BTC/USDT.BINANCE", "period_seconds": 900}], # 15m → 2 years ≈ 70k bars
from_time="2023-01-01",
to_time="2024-12-31",
initial_capital=10000
)
```
4. **Interpret results**:
- `summary.total_return` — total fractional return (0.15 = +15%)
- `summary.sharpe_ratio` — annualized Sharpe (>1.0 good, >2.0 excellent)
- `summary.max_drawdown` — maximum peak-to-trough loss (0.20 = 20%)
- `summary.win_rate` — fraction of trades profitable
- `statistics.profit_factor` — gross profit / gross loss (>1.5 good)
- `statistics.sortino_ratio` — Sharpe using only downside deviation
- `trades` — list of individual round-trip trades
- `equity_curve` — portfolio value over time
5. **Iterate**: edit with `python_edit`, re-run backtest, compare results. Use `get_backtest_results` to compare multiple runs.
6. **Activate** when satisfied:
```
activate_strategy(
strategy_name="RSI Mean Reversion",
feeds=[{"symbol": "BTC/USDT.BINANCE", "period_seconds": 900}],
allocation=5000.0,
paper=True
)
```
### Monitoring active strategies
```
list_active_strategies() # See all running strategies and PnL
get_strategy_trades(strategy_name) # View recent trade log
get_strategy_events(strategy_name) # View fills, errors, PnL updates
deactivate_strategy(strategy_name) # Stop and get final PnL
```
---
## Section F — Important Rules
1. **Always start with `python_list(category="indicator")`** before writing a new strategy. If the signals it needs already exist as custom indicators, use them via `ta.custom_*` rather than duplicating the computation.
2. **Wait for validation output** after `python_write` or `python_edit`. If the harness reports an error, fix it before running a backtest.
3. **Size positions conservatively** based on `self.config.initial_capital`. A typical trade quantity is `0.0010.01 * initial_capital / price`.
4. **Guard for insufficient data**: always check `len(df) >= min_required` before computing indicators that need a lookback period.
5. **Multi-feed strategies**: access each feed by its exact feed key. Missing feeds (not yet warmed up) will be absent from `dfs` — always use `.get()` and check for `None`.
6. **Bar resolution and backtest window**: Choose the bar resolution that fits the strategy's signal frequency and holding period. Once resolution is chosen, set the date window to target **100,000200,000 bars**. **Never request more than 5 years of data.** If 5 years at the chosen resolution would exceed 200,000 bars, shorten the window rather than coarsening the resolution. Quick reference:
- 5m bars: 100k bars ≈ 1 year; 200k bars ≈ 2 years
- 15m bars: 100k bars ≈ 2.9 years; 200k bars ≈ 5 years (at limit)
- 1h bars: 100k bars ≈ 11.4 years → cap at 5 years (≈ 43,800 bars)
- 4h bars: 100k bars ≈ 45 years → cap at 5 years (≈ 10,950 bars)
7. **Never `import` from `dexorder` inside `evaluate()`** — the strategy file is exec'd in a sandbox with PandasStrategy and pandas_ta pre-loaded. Standard library and pandas/numpy/pandas_ta are available.

View File

@@ -3,6 +3,7 @@ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
import { SystemMessage } from '@langchain/core/messages'; import { SystemMessage } from '@langchain/core/messages';
import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { createReactAgent } from '@langchain/langgraph/prebuilt';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { HarnessEvent } from '../../harness-events.js';
/** /**
* Web Explore Subagent * Web Explore Subagent
@@ -66,6 +67,52 @@ export class WebExploreSubagent extends BaseSubagent {
return finalText; return finalText;
} }
async *streamEvents(context: SubagentContext, instruction: string, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
this.logger.info({ subagent: this.getName() }, 'streamEvents starting');
const initialMessages = this.buildMessages(context, instruction);
const systemMessage = initialMessages[0];
const humanMessage = initialMessages[initialMessages.length - 1];
const agent = createReactAgent({
llm: this.model,
tools: this.tools,
prompt: systemMessage as SystemMessage,
});
const stream = agent.stream(
{ messages: [humanMessage] },
{ streamMode: ['messages', 'updates'], recursionLimit: 15, signal }
);
let finalText = '';
for await (const [mode, data] of await stream) {
if (signal?.aborted) break;
if (mode === 'messages') {
for (const chunk of WebExploreSubagent.extractStreamChunks(data, this.config.name)) {
yield chunk;
}
} else if (mode === 'updates') {
if ((data as any).agent?.messages) {
for (const msg of (data as any).agent.messages as any[]) {
if (msg.tool_calls?.length) {
for (const tc of msg.tool_calls) {
yield { type: 'subagent_tool_call', agentName: this.config.name, toolName: tc.name, label: tc.name };
}
} else {
const content = WebExploreSubagent.extractFinalText(msg);
if (content) finalText = content;
}
}
}
}
}
this.logger.info({ textLength: finalText.length }, 'streamEvents finished');
return finalText;
}
} }
/** /**

View File

@@ -16,6 +16,8 @@ import { ContainerManager } from './k8s/container-manager.js';
import { ZMQRelayClient } from './clients/zmq-relay-client.js'; import { ZMQRelayClient } from './clients/zmq-relay-client.js';
import { IcebergClient } from './clients/iceberg-client.js'; import { IcebergClient } from './clients/iceberg-client.js';
import { ConversationStore } from './harness/memory/conversation-store.js'; import { ConversationStore } from './harness/memory/conversation-store.js';
import { BlobStore } from './harness/memory/blob-store.js';
import { ConversationService } from './services/conversation-service.js';
import { AgentHarness, type HarnessSessionConfig } from './harness/agent-harness.js'; import { AgentHarness, type HarnessSessionConfig } from './harness/agent-harness.js';
import { OHLCService } from './services/ohlc-service.js'; import { OHLCService } from './services/ohlc-service.js';
import { SymbolIndexService } from './services/symbol-index-service.js'; import { SymbolIndexService } from './services/symbol-index-service.js';
@@ -369,12 +371,17 @@ try {
const conversationStore = new ConversationStore(redis, app.log, icebergClient); const conversationStore = new ConversationStore(redis, app.log, icebergClient);
app.log.debug('Conversation store initialized'); app.log.debug('Conversation store initialized');
const blobStore = new BlobStore(icebergClient, app.log);
const conversationService = new ConversationService(conversationStore, blobStore, app.log);
app.log.debug('Blob store and conversation service initialized');
// Harness factory: captures infrastructure deps; channel handlers stay infrastructure-free // Harness factory: captures infrastructure deps; channel handlers stay infrastructure-free
function createHarness(sessionConfig: HarnessSessionConfig): AgentHarness { function createHarness(sessionConfig: HarnessSessionConfig): AgentHarness {
return new AgentHarness({ return new AgentHarness({
...sessionConfig, ...sessionConfig,
providerConfig: config.providerConfig, providerConfig: config.providerConfig,
conversationStore, conversationStore,
blobStore,
historyLimit: config.conversationHistoryLimit, historyLimit: config.conversationHistoryLimit,
}); });
} }
@@ -391,6 +398,7 @@ const websocketHandler = new WebSocketHandler({
createHarness, createHarness,
ohlcService, // Optional ohlcService, // Optional
symbolIndexService, // Optional symbolIndexService, // Optional
conversationService, // Optional - for history replay on reconnect
}); });
app.log.debug('WebSocket handler initialized'); app.log.debug('WebSocket handler initialized');
@@ -614,6 +622,19 @@ try {
mcpTools: [], mcpTools: [],
}); });
// Strategy subagent: all strategy-related MCP tools
toolRegistry.registerAgentTools({
agentName: 'strategy',
platformTools: [],
mcpTools: [
'python_write', 'python_edit', 'python_read', 'python_list',
'python_log', 'python_revert',
'backtest_strategy', 'activate_strategy', 'deactivate_strategy',
'list_active_strategies', 'get_backtest_results',
'get_strategy_trades', 'get_strategy_events',
],
});
app.log.info( app.log.info(
{ {
agents: toolRegistry.getRegisteredAgents(), agents: toolRegistry.getRegisteredAgents(),

View File

@@ -0,0 +1,59 @@
import type { FastifyBaseLogger } from 'fastify';
import type { ConversationStore } from '../harness/memory/conversation-store.js';
import type { BlobStore, StoredBlob } from '../harness/memory/blob-store.js';
export interface EnrichedMessage {
id: string;
userId: string;
sessionId: string;
role: 'user' | 'assistant';
content: string;
timestamp: number; // microseconds
files: StoredBlob[];
}
/**
* Generic conversation history service.
*
* Combines text messages (ConversationStore) with binary blobs (BlobStore)
* into enriched message records. Used by:
* - WebSocket handler: replay history on reconnect
* - Future admin panel: conversation browser
*/
export class ConversationService {
constructor(
private conversationStore: ConversationStore,
private blobStore: BlobStore,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
_logger: FastifyBaseLogger
) {}
async getHistory(
userId: string,
sessionId: string,
limit = 50,
channelType = 'websocket'
): Promise<EnrichedMessage[]> {
const messages = await this.conversationStore.getFullHistory(userId, sessionId, limit, channelType);
const chatMessages = messages.filter(m => m.role === 'user' || m.role === 'assistant');
return Promise.all(
chatMessages.map(async (m) => {
const blobRefs = m.metadata?.blobs as Array<{ id: string; mimeType: string; caption?: string }> | undefined;
const files = blobRefs?.length
? await this.blobStore.getBlobsByIds(userId, sessionId, blobRefs.map(b => b.id))
: [];
return {
id: m.id,
userId: m.userId,
sessionId: m.sessionId,
role: m.role as 'user' | 'assistant',
content: m.content,
timestamp: m.timestamp,
files,
};
})
);
}
}

View File

@@ -16,7 +16,8 @@
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { IcebergClient } from '../clients/iceberg-client.js'; import type { IcebergClient } from '../clients/iceberg-client.js';
import type { ZMQRelayClient } from '../clients/zmq-relay-client.js'; import type { ZMQRelayClient, BarUpdateCallback } from '../clients/zmq-relay-client.js';
export type { BarUpdateCallback } from '../clients/zmq-relay-client.js';
import type { import type {
HistoryResult, HistoryResult,
SymbolInfo, SymbolInfo,
@@ -53,6 +54,23 @@ export class OHLCService {
this.logger = config.logger; this.logger = config.logger;
} }
/**
* Subscribe to realtime OHLC bar updates for a ticker+period.
* ZMQ subscribe is issued on the first call for a given topic; subsequent calls
* for the same topic only add the callback (no extra ZMQ events).
*/
subscribeToTicker(ticker: string, periodSeconds: number, callback: BarUpdateCallback): void {
this.relayClient.subscribeToTicker(ticker, periodSeconds, callback);
}
/**
* Unsubscribe a callback from realtime OHLC bar updates.
* ZMQ unsubscribe is issued when the last callback for a topic is removed.
*/
unsubscribeFromTicker(ticker: string, periodSeconds: number, callback: BarUpdateCallback): void {
this.relayClient.unsubscribeFromTicker(ticker, periodSeconds, callback);
}
/** /**
* Fetch OHLC data with smart caching * Fetch OHLC data with smart caching
* *

View File

@@ -28,23 +28,29 @@ export function createGetChartDataTool(config: GetChartDataToolConfig): DynamicS
**IMPORTANT: Use this tool ONLY for quick, casual data viewing. For any analysis, plotting, statistics, or deep research, use the 'research' tool instead.** **IMPORTANT: Use this tool ONLY for quick, casual data viewing. For any analysis, plotting, statistics, or deep research, use the 'research' tool instead.**
**Hard limit: returns at most 500 bars (the most recent 500). This tool is not suitable for analysis requiring longer sequences — use the 'research' tool for that.**
Parameters: Parameters:
- ticker (optional): Market symbol (defaults to workspace chartState.symbol) - ticker (optional): Market symbol in SYMBOL.EXCHANGE format, e.g. "BTC/USDT.BINANCE" (defaults to workspace chartState.symbol)
- period (optional): OHLC period in seconds (defaults to workspace chartState.period) - period (optional): OHLC period in seconds (defaults to workspace chartState.period)
- from_time (optional): Start time as Unix timestamp (number or string like "1774126800") OR date string like "2 days ago", "2024-01-01" (defaults to workspace chartState.start_time) - from_time (optional): Start time as Unix timestamp (number or string like "1774126800") OR date string like "2 days ago", "2024-01-01" (defaults to workspace chartState.start_time)
- to_time (optional): End time as Unix timestamp (number or string like "1774732500") OR date string like "now", "yesterday" (defaults to workspace chartState.end_time) - to_time (optional): End time as Unix timestamp (number or string like "1774732500") OR date string like "now", "yesterday" (defaults to workspace chartState.end_time)
- countback (optional): Limit number of bars returned - countback (optional): Limit number of bars returned (max 500)
- columns (optional): Extra columns beyond OHLC: ["volume", "buy_vol", "sell_vol", "open_time", "high_time", "low_time", "close_time", "open_interest"]`, - columns (optional): Extra columns beyond OHLC: ["volume", "buy_vol", "sell_vol", "open_time", "high_time", "low_time", "close_time", "open_interest"]`,
schema: z.object({ schema: z.object({
ticker: z.string().optional().describe('Market symbol (defaults to workspace chartState.symbol)'), ticker: z.string().optional().describe('Market symbol (defaults to workspace chartState.symbol)'),
period: z.number().optional().describe('OHLC period in seconds (defaults to workspace chartState.period)'), period: z.number().optional().describe('OHLC period in seconds (defaults to workspace chartState.period)'),
from_time: z.union([z.number(), z.string()]).optional().describe('Start time: Unix seconds OR date string (defaults to workspace chartState.start_time)'), from_time: z.union([z.number(), z.string()]).optional().describe('Start time: Unix seconds OR date string (defaults to workspace chartState.start_time)'),
to_time: z.union([z.number(), z.string()]).optional().describe('End time: Unix seconds OR date string (defaults to workspace chartState.end_time)'), to_time: z.union([z.number(), z.string()]).optional().describe('End time: Unix seconds OR date string (defaults to workspace chartState.end_time)'),
countback: z.number().optional().describe('Limit number of bars returned'), countback: z.number().optional().describe('Limit number of bars returned (max 500)'),
columns: z.array(z.enum(['volume', 'buy_vol', 'sell_vol', 'open_time', 'high_time', 'low_time', 'close_time', 'open_interest'])).optional().describe('Extra columns beyond OHLC'), columns: z.array(z.enum(['volume', 'buy_vol', 'sell_vol', 'open_time', 'high_time', 'low_time', 'close_time', 'open_interest'])).optional().describe('Extra columns beyond OHLC'),
}), }),
func: async ({ ticker, period, from_time, to_time, countback, columns }) => { func: async ({ ticker, period, from_time, to_time, countback, columns }) => {
logger.debug({ ticker, period, from_time, to_time, countback, columns }, 'Executing get_chart_data tool'); const MAX_BARS = 500;
// Enforce hard cap — never return more than MAX_BARS bars
const effectiveCountback = countback !== undefined ? Math.min(countback, MAX_BARS) : MAX_BARS;
logger.debug({ ticker, period, from_time, to_time, countback: effectiveCountback, columns }, 'Executing get_chart_data tool');
try { try {
// Get workspace chart state // Get workspace chart state
@@ -86,7 +92,7 @@ Parameters:
finalPeriod, finalPeriod,
finalFromTime, finalFromTime,
finalToTime, finalToTime,
countback effectiveCountback
); );
if (historyResult.noData || !historyResult.bars || historyResult.bars.length === 0) { if (historyResult.noData || !historyResult.bars || historyResult.bars.length === 0) {
@@ -98,8 +104,13 @@ Parameters:
}); });
} }
// Enforce hard cap — keep the most recent bars
const sourceBars = historyResult.bars.length > MAX_BARS
? historyResult.bars.slice(-MAX_BARS)
: historyResult.bars;
// Filter/format bars with requested columns // Filter/format bars with requested columns
const bars = historyResult.bars.map(bar => { const bars = sourceBars.map(bar => {
const result: any = { const result: any = {
time: bar.time, time: bar.time,
open: bar.open, open: bar.open,

View File

@@ -3,6 +3,7 @@ import { z } from 'zod';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { IndicatorSubagent } from '../../harness/subagents/indicator/index.js'; import type { IndicatorSubagent } from '../../harness/subagents/indicator/index.js';
import type { SubagentContext } from '../../harness/subagents/base-subagent.js'; import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
import type { HarnessEvent } from '../../harness/harness-events.js';
export interface IndicatorAgentToolConfig { export interface IndicatorAgentToolConfig {
indicatorSubagent: IndicatorSubagent; indicatorSubagent: IndicatorSubagent;
@@ -14,10 +15,20 @@ export interface IndicatorAgentToolConfig {
* Creates a LangChain tool that delegates to the indicator subagent. * Creates a LangChain tool that delegates to the indicator subagent.
* Mirrors the pattern of research-agent.tool.ts. * Mirrors the pattern of research-agent.tool.ts.
*/ */
export function createIndicatorAgentTool(config: IndicatorAgentToolConfig): DynamicStructuredTool { export function createIndicatorAgentTool(config: IndicatorAgentToolConfig): DynamicStructuredTool & { streamFunc: (args: { instruction: string }) => AsyncGenerator<HarnessEvent, string> } {
const { indicatorSubagent, context, logger } = config; const { indicatorSubagent, context, logger } = config;
return new DynamicStructuredTool({ async function* streamFunc({ instruction }: { instruction: string }, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
logger.info({ instruction: instruction.substring(0, 100) }, 'Streaming indicator subagent');
const gen = indicatorSubagent.streamEvents(context, instruction, signal);
let step: IteratorResult<HarnessEvent, string>;
while (!(step = await gen.next()).done) {
yield step.value;
}
return step.value;
}
const tool = new DynamicStructuredTool({
name: 'indicator', name: 'indicator',
description: `Delegate to the indicator subagent for all indicator-related tasks on the chart. description: `Delegate to the indicator subagent for all indicator-related tasks on the chart.
@@ -50,4 +61,6 @@ NEVER modify the indicators workspace store directly.`,
} }
}, },
}); });
return Object.assign(tool, { streamFunc });
} }

View File

@@ -3,6 +3,7 @@ import { z } from 'zod';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { ResearchSubagent } from '../../harness/subagents/research/index.js'; import type { ResearchSubagent } from '../../harness/subagents/research/index.js';
import type { SubagentContext } from '../../harness/subagents/base-subagent.js'; import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
import type { HarnessEvent } from '../../harness/harness-events.js';
export interface ResearchAgentToolConfig { export interface ResearchAgentToolConfig {
researchSubagent: ResearchSubagent; researchSubagent: ResearchSubagent;
@@ -15,10 +16,24 @@ export interface ResearchAgentToolConfig {
* This is the standard LangChain pattern for exposing a subagent as a tool * This is the standard LangChain pattern for exposing a subagent as a tool
* to a parent agent. * to a parent agent.
*/ */
export function createResearchAgentTool(config: ResearchAgentToolConfig): DynamicStructuredTool { export function createResearchAgentTool(config: ResearchAgentToolConfig): DynamicStructuredTool & { streamFunc: (args: { name: string; instruction: string }) => AsyncGenerator<HarnessEvent, string> } {
const { researchSubagent, context, logger } = config; const { researchSubagent, context, logger } = config;
return new DynamicStructuredTool({ const prompt = (name: string, instruction: string) => `Research script name: "${name}"\n\n${instruction}`;
async function* streamFunc({ name, instruction }: { name: string; instruction: string }, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
logger.info({ name, instruction: instruction.substring(0, 100) }, 'Streaming research subagent');
const gen = researchSubagent.streamEvents(context, prompt(name, instruction), signal);
let step: IteratorResult<HarnessEvent, string>;
while (!(step = await gen.next()).done) {
yield step.value;
}
const finalText = step.value;
const images = researchSubagent.getLastImages();
return JSON.stringify({ text: finalText, images });
}
const tool = new DynamicStructuredTool({
name: 'research', name: 'research',
description: `Delegate to the research subagent for data analysis, charting, statistics, and Python script execution. description: `Delegate to the research subagent for data analysis, charting, statistics, and Python script execution.
@@ -36,21 +51,15 @@ The research subagent will write and execute Python scripts, capture output and
func: async ({ name, instruction }: { name: string; instruction: string }): Promise<string> => { func: async ({ name, instruction }: { name: string; instruction: string }): Promise<string> => {
logger.info({ name, instruction: instruction.substring(0, 100) }, 'Delegating to research subagent'); logger.info({ name, instruction: instruction.substring(0, 100) }, 'Delegating to research subagent');
const prompt = `Research script name: "${name}"\n\n${instruction}`;
try { try {
const result = await researchSubagent.executeWithImages(context, prompt); const result = await researchSubagent.executeWithImages(context, prompt(name, instruction));
return JSON.stringify({ text: result.text, images: result.images });
// Return in the format that AgentHarness.processToolResult() knows how to handle
// (extracts images and passes them to channelAdapter)
return JSON.stringify({
text: result.text,
images: result.images,
});
} catch (error) { } catch (error) {
logger.error({ error, errorMessage: (error as Error)?.message }, 'Research subagent failed'); logger.error({ error, errorMessage: (error as Error)?.message }, 'Research subagent failed');
throw error; throw error;
} }
}, },
}); });
return Object.assign(tool, { streamFunc });
} }

View File

@@ -0,0 +1,66 @@
import { DynamicStructuredTool } from '@langchain/core/tools';
import { z } from 'zod';
import type { FastifyBaseLogger } from 'fastify';
import type { StrategySubagent } from '../../harness/subagents/strategy/index.js';
import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
import type { HarnessEvent } from '../../harness/harness-events.js';
export interface StrategyAgentToolConfig {
strategySubagent: StrategySubagent;
context: SubagentContext;
logger: FastifyBaseLogger;
}
/**
* Creates a LangChain tool that delegates to the strategy subagent.
* Mirrors the pattern of indicator-agent.tool.ts.
*/
export function createStrategyAgentTool(config: StrategyAgentToolConfig): DynamicStructuredTool & { streamFunc: (args: { instruction: string }, signal?: AbortSignal) => AsyncGenerator<HarnessEvent, string> } {
const { strategySubagent, context, logger } = config;
async function* streamFunc({ instruction }: { instruction: string }, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
logger.info({ instruction: instruction.substring(0, 100) }, 'Streaming strategy subagent');
const gen = strategySubagent.streamEvents(context, instruction, signal);
let step: IteratorResult<HarnessEvent, string>;
while (!(step = await gen.next()).done) {
yield step.value;
}
return step.value;
}
const tool = new DynamicStructuredTool({
name: 'strategy',
description: `Delegate to the strategy subagent for all trading strategy tasks.
Use this tool for:
- Writing new PandasStrategy classes ("create a strategy that...")
- Editing or improving existing strategies
- Running backtests on a strategy
- Interpreting backtest results (Sharpe ratio, drawdown, trade list)
- Activating or deactivating strategies for paper trading
- Monitoring running strategy PnL and trade logs
- Checking which strategies already exist
ALWAYS use this tool for any request about trading strategies, backtesting, or strategy activation.
NEVER write strategy Python code or call backtest_strategy directly — delegate here instead.`,
schema: z.object({
instruction: z.string().describe(
'The strategy task to perform. Be specific: include the strategy name, ' +
'desired signals (e.g. RSI < 30 = buy), timeframe, and symbol if known. ' +
'For backtest requests include the date range and starting capital.'
),
}),
func: async ({ instruction }: { instruction: string }): Promise<string> => {
logger.info({ instruction: instruction.substring(0, 100) }, 'Delegating to strategy subagent');
try {
return await strategySubagent.execute(context, instruction);
} catch (error) {
logger.error({ error, errorMessage: (error as Error)?.message }, 'Strategy subagent failed');
throw error;
}
},
});
return Object.assign(tool, { streamFunc });
}

View File

@@ -3,6 +3,7 @@ import { z } from 'zod';
import type { FastifyBaseLogger } from 'fastify'; import type { FastifyBaseLogger } from 'fastify';
import type { WebExploreSubagent } from '../../harness/subagents/web-explore/index.js'; import type { WebExploreSubagent } from '../../harness/subagents/web-explore/index.js';
import type { SubagentContext } from '../../harness/subagents/base-subagent.js'; import type { SubagentContext } from '../../harness/subagents/base-subagent.js';
import type { HarnessEvent } from '../../harness/harness-events.js';
export interface WebExploreAgentToolConfig { export interface WebExploreAgentToolConfig {
webExploreSubagent: WebExploreSubagent; webExploreSubagent: WebExploreSubagent;
@@ -14,10 +15,20 @@ export interface WebExploreAgentToolConfig {
* Creates a LangChain tool that delegates to the web-explore subagent. * Creates a LangChain tool that delegates to the web-explore subagent.
* The subagent decides whether to use web search or arXiv based on the instruction. * The subagent decides whether to use web search or arXiv based on the instruction.
*/ */
export function createWebExploreAgentTool(config: WebExploreAgentToolConfig): DynamicStructuredTool { export function createWebExploreAgentTool(config: WebExploreAgentToolConfig): DynamicStructuredTool & { streamFunc: (args: { instruction: string }, signal?: AbortSignal) => AsyncGenerator<HarnessEvent, string> } {
const { webExploreSubagent, context, logger } = config; const { webExploreSubagent, context, logger } = config;
return new DynamicStructuredTool({ async function* streamFunc({ instruction }: { instruction: string }, signal?: AbortSignal): AsyncGenerator<HarnessEvent, string> {
logger.info({ instruction: instruction.substring(0, 100) }, 'Streaming web-explore subagent');
const gen = webExploreSubagent.streamEvents(context, instruction, signal);
let step: IteratorResult<HarnessEvent, string>;
while (!(step = await gen.next()).done) {
yield step.value;
}
return step.value;
}
const tool = new DynamicStructuredTool({
name: 'web_explore', name: 'web_explore',
description: `Search the web or academic databases and return a summarized answer. description: `Search the web or academic databases and return a summarized answer.
@@ -46,4 +57,6 @@ The subagent will search the web (or arXiv for academic queries), fetch relevant
} }
}, },
}); });
return Object.assign(tool, { streamFunc });
} }

View File

@@ -103,6 +103,16 @@ export const DEFAULT_STORES: StoreConfig[] = [
persistent: true, persistent: true,
initialState: () => ({}), initialState: () => ({}),
}, },
{
name: 'strategy_types',
persistent: true,
initialState: () => ({}),
},
{
name: 'research_types',
persistent: true,
initialState: () => ({}),
},
{ {
name: 'channelState', name: 'channelState',
persistent: false, persistent: false,

View File

@@ -47,24 +47,22 @@ function loadConfig() {
logger.warn({ error: error.message }, 'Could not load secrets'); logger.warn({ error: error.message }, 'Could not load secrets');
} }
// Merge config and secrets
return { return {
// Flink ZMQ endpoints // Flink ZMQ endpoints
flink_hostname: config.flink_hostname || 'localhost', flink_hostname: config.flink_hostname || 'localhost',
ingestor_work_port: config.ingestor_work_port || 5555, ingestor_broker_port: config.ingestor_broker_port || 5567,
ingestor_control_port: config.ingestor_control_port || 5556,
// Kafka configuration // Kafka configuration
kafka_brokers: config.kafka_brokers || ['localhost:9092'], kafka_brokers: config.kafka_brokers || ['localhost:9092'],
kafka_topic: 'market-ohlc', kafka_ohlc_topic: config.kafka_ohlc_topic || 'market-ohlc',
kafka_tick_topic: config.kafka_tick_topic || 'market-tick',
// Worker configuration // Worker configuration
max_concurrent: config.max_concurrent || 10,
poll_interval_ms: config.poll_interval_ms || 10000, poll_interval_ms: config.poll_interval_ms || 10000,
// Symbol metadata configuration // Symbol metadata configuration
supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'], supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'],
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000, // 6 hours symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000,
...secrets ...secrets
}; };
@@ -76,11 +74,7 @@ class IngestorWorker {
this.logger = logger; this.logger = logger;
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' })); this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
this.kafkaProducer = new KafkaProducer( this.kafkaProducer = new KafkaProducer(config, logger.child({ component: 'kafka' }));
config,
logger.child({ component: 'kafka' })
);
// Create metadata generator first so ccxtFetcher can use it
this.metadataGenerator = new SymbolMetadataGenerator( this.metadataGenerator = new SymbolMetadataGenerator(
config, config,
this.kafkaProducer, this.kafkaProducer,
@@ -94,33 +88,27 @@ class IngestorWorker {
this.realtimePoller = new RealtimePoller( this.realtimePoller = new RealtimePoller(
this.ccxtFetcher, this.ccxtFetcher,
this.kafkaProducer, this.kafkaProducer,
this.zmqClient,
logger.child({ component: 'poller' }) logger.child({ component: 'poller' })
); );
// Track active requests // jobId → active realtime subscription (for stop handling)
this.activeRequests = new Map(); this.activeRealtime = new Set();
this.isShutdown = false;
// Metadata generation interval this.isShutdown = false;
this.metadataIntervalMs = config.symbol_metadata_interval_ms;
this.metadataInterval = null; this.metadataInterval = null;
} }
/**
* Start the ingestor worker
*/
async start() { async start() {
this.logger.info('Starting CCXT ingestor worker'); this.logger.info('Starting CCXT ingestor worker');
// Connect to services
await this.kafkaProducer.connect(); await this.kafkaProducer.connect();
await this.zmqClient.connect();
// Start control message listener // Wire event callbacks before connecting so we don't miss early messages
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg)); this.zmqClient.onWorkAssign = req => this.handleWorkAssign(req);
this.zmqClient.onWorkStop = jobId => this.handleWorkStop(jobId);
// Start work loop await this.zmqClient.connect(); // also sends WorkerReady
this.workLoop();
// Generate symbol metadata on startup // Generate symbol metadata on startup
this.logger.info('Generating initial symbol metadata'); this.logger.info('Generating initial symbol metadata');
@@ -140,281 +128,126 @@ class IngestorWorker {
} catch (error) { } catch (error) {
this.logger.error({ error: error.message }, 'Failed to generate periodic symbol metadata'); this.logger.error({ error: error.message }, 'Failed to generate periodic symbol metadata');
} }
}, this.metadataIntervalMs); }, this.config.symbol_metadata_interval_ms);
this.logger.info('Ingestor worker started successfully'); this.logger.info('Ingestor worker started successfully');
} }
/** /**
* Main work loop - pull and process data requests * Handle a WorkAssign message dispatched by Flink IngestorBroker.
* Called from the ZmqClient receive loop — do not block.
*/ */
async workLoop() { handleWorkAssign(request) {
while (!this.isShutdown) { const { jobId, requestId, type, ticker } = request;
try {
// Check if we can handle more requests
if (this.activeRequests.size >= this.config.max_concurrent) {
await new Promise(resolve => setTimeout(resolve, 1000));
continue;
}
// Pull next data request this.logger.info({ jobId, requestId, type, ticker }, 'Received WorkAssign');
const request = await this.zmqClient.pullDataRequest();
if (!request) {
continue;
}
// Handle request asynchronously // HISTORICAL_OHLC = 0 (proto3 default, may appear as undefined or 'HISTORICAL_OHLC')
this.handleDataRequest(request).catch(error => { const isHistorical = !type || type === 'HISTORICAL_OHLC' || type === 0;
this.logger.error( const isRealtime = type === 'REALTIME_TICKS' || type === 1;
{ error: error.message, requestId: request.requestId },
'Error handling data request' if (isHistorical) {
); this.handleHistoricalRequest(request).catch(err => {
}); this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in historical handler');
} catch (error) { });
if (!this.isShutdown) { } else if (isRealtime) {
this.logger.error({ error: error.message }, 'Error in work loop'); this.handleRealtimeRequest(request);
await new Promise(resolve => setTimeout(resolve, 1000)); } else {
} this.logger.warn({ jobId, type }, 'Unknown request type — rejecting');
} this.zmqClient.sendReject(jobId, `Unknown request type: ${type}`).catch(() => {});
} }
} }
/** /**
* Handle a data request * Handle WorkStop sent by Flink (e.g., all subscribers left).
*/ */
async handleDataRequest(request) { handleWorkStop(jobId) {
const { requestId: request_id, type, ticker } = request; this.logger.info({ jobId }, 'Received WorkStop — cancelling realtime subscription');
this.realtimePoller.cancelSubscription(jobId);
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request'); this.activeRealtime.delete(jobId);
// No WorkComplete needed — Flink sent the stop, it already knows
this.activeRequests.set(request_id, request);
try {
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
if (isHistorical) {
await this.handleHistoricalRequest(request);
} else if (isRealtime) {
await this.handleRealtimeRequest(request);
} else {
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
}
} finally {
// For historical requests, remove from active requests when done
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
if (isHistorical) {
this.activeRequests.delete(request_id);
}
}
} }
/** /**
* Handle historical OHLC request * Fetch historical OHLC data and write to Kafka.
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only. * Sends WorkComplete when done (success or error).
* Flink will process from Kafka, write to Iceberg, and publish notification.
*/ */
async handleHistoricalRequest(request) { async handleHistoricalRequest(request) {
const { requestId: request_id, ticker, historical, clientId: client_id } = request; const { jobId, requestId, ticker, historical, clientId: client_id } = request;
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical; const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical || {};
this.logger.info( this.logger.info({ jobId, requestId, ticker, period_seconds }, 'Processing historical OHLC request');
{ request_id, ticker, period_seconds, client_id },
'Processing historical OHLC request (async mode - write to Kafka only)'
);
try { try {
// Fetch historical data from exchange
const candles = await this.ccxtFetcher.fetchHistoricalOHLC( const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
ticker, ticker, start_time, end_time, period_seconds, limit
start_time,
end_time,
period_seconds,
limit
); );
this.logger.info( this.logger.info({ jobId, requestId, ticker, count: candles.length }, 'Fetched from exchange');
{ request_id, ticker, count: candles.length },
'Fetched data from exchange'
);
// Write to Kafka - THIS IS THE ONLY OUTPUT
// Flink will:
// 1. Read from Kafka
// 2. Write to Iceberg
// 3. Publish HistoryReadyNotification
// 4. Client receives notification via relay pub/sub
if (candles.length > 0) { if (candles.length > 0) {
// Add metadata to first candle for Flink tracking const metadata = { request_id: requestId, client_id, ticker, period_seconds, start_time, end_time };
const enrichedCandles = candles.map((candle, idx) => ({ const PAGE_SIZE = 1000;
...candle, for (let i = 0; i < candles.length; i += PAGE_SIZE) {
__metadata: idx === 0 ? { const page = candles.slice(i, i + PAGE_SIZE);
request_id, const isLastPage = (i + PAGE_SIZE) >= candles.length;
client_id, await this.kafkaProducer.writeOHLCs(this.config.kafka_ohlc_topic, page, metadata, isLastPage);
ticker,
period_seconds,
start_time,
end_time
} : undefined
}));
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
} else {
// Write a marker message even if no data found
// Flink will see this and publish a NOT_FOUND notification
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'NOT_FOUND',
message: 'No data available for requested period'
});
}
this.logger.info(
{ request_id, ticker, count: candles.length },
'Completed historical OHLC request - data written to Kafka'
);
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
} catch (error) {
this.logger.error(
{
errorType: error.constructor?.name,
error: error.message,
errorUrl: error.url,
request_id,
ticker,
stack: error.stack
},
'Failed to process historical request'
);
// Write error marker to Kafka so Flink can notify client
try {
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'ERROR',
error_message: error.message
});
} catch (kafkaError) {
this.logger.error(
{ error: kafkaError.message, request_id },
'Failed to write error marker to Kafka'
);
}
// Do not throw - request is handled, Flink will notify client of error
}
}
/**
* Handle realtime tick subscription request
*/
async handleRealtimeRequest(request) {
const { requestId: request_id, ticker } = request;
this.logger.info(
{ request_id, ticker },
'Processing realtime subscription request'
);
try {
// Start realtime polling
this.realtimePoller.startSubscription(
request_id,
ticker,
this.config.kafka_topic
);
} catch (error) {
this.logger.error(
{ error: error.message, request_id, ticker },
'Failed to start realtime subscription'
);
this.activeRequests.delete(request_id);
throw error;
}
}
/**
* Handle control messages from Flink
*/
async handleControlMessage(message) {
const { action, requestId: request_id } = message;
this.logger.info({ action, request_id }, 'Received control message');
switch (action) {
case 'CANCEL':
if (request_id) {
// Cancel specific request
this.realtimePoller.cancelSubscription(request_id);
this.activeRequests.delete(request_id);
} }
break; this.logger.info({ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) }, 'Wrote all pages to Kafka');
} else {
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
status: 'NOT_FOUND', message: 'No data available for requested period'
});
}
case 'SHUTDOWN': this.logger.info({ jobId, requestId, ticker }, 'Historical request complete — sending WorkComplete');
this.logger.info('Received shutdown signal'); await this.zmqClient.sendComplete(jobId, true);
await this.shutdown();
break;
case 'CONFIG_UPDATE': } catch (error) {
// Handle config update if needed this.logger.error({ jobId, requestId, ticker, error: error.message }, 'Historical request failed');
this.logger.info('Received config update');
break;
case 'HEARTBEAT': try {
// Just acknowledge heartbeat await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
break; request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
status: 'ERROR', error_message: error.message
});
} catch (kafkaErr) {
this.logger.error({ jobId, error: kafkaErr.message }, 'Failed to write error marker to Kafka');
}
default: await this.zmqClient.sendComplete(jobId, false, error.message);
this.logger.warn({ action }, 'Unknown control action');
} }
} }
/** /**
* Get worker status * Start realtime tick polling for a job dispatched by Flink.
* Ticks flow: exchange → Kafka market-tick → Flink → OHLC bars → clients.
*/ */
handleRealtimeRequest(request) {
const { jobId, requestId, ticker } = request;
this.logger.info({ jobId, requestId, ticker }, 'Processing realtime subscription request');
this.activeRealtime.add(jobId);
this.realtimePoller.startSubscription(jobId, requestId, ticker, this.config.kafka_tick_topic);
}
getStatus() { getStatus() {
return { return {
activeRequests: this.activeRequests.size, activeRealtime: this.activeRealtime.size,
maxConcurrent: this.config.max_concurrent,
pollerStats: this.realtimePoller.getStats(), pollerStats: this.realtimePoller.getStats(),
metadataStatus: this.metadataGenerator.getStatus() metadataStatus: this.metadataGenerator.getStatus()
}; };
} }
/**
* Shutdown worker gracefully
*/
async shutdown() { async shutdown() {
if (this.isShutdown) { if (this.isShutdown) return;
return;
}
this.isShutdown = true; this.isShutdown = true;
this.logger.info('Shutting down ingestor worker'); this.logger.info('Shutting down ingestor worker');
// Stop metadata generation interval if (this.metadataInterval) clearInterval(this.metadataInterval);
if (this.metadataInterval) {
clearInterval(this.metadataInterval);
}
// Stop polling
this.realtimePoller.shutdown(); this.realtimePoller.shutdown();
// Close connections
await this.ccxtFetcher.close(); await this.ccxtFetcher.close();
await this.metadataGenerator.close(); await this.metadataGenerator.close();
await this.kafkaProducer.disconnect(); await this.kafkaProducer.disconnect();
@@ -430,31 +263,23 @@ async function main() {
const config = loadConfig(); const config = loadConfig();
const worker = new IngestorWorker(config, logger); const worker = new IngestorWorker(config, logger);
// Handle shutdown signals
process.on('SIGINT', () => worker.shutdown()); process.on('SIGINT', () => worker.shutdown());
process.on('SIGTERM', () => worker.shutdown()); process.on('SIGTERM', () => worker.shutdown());
// Handle errors
process.on('uncaughtException', error => { process.on('uncaughtException', error => {
logger.error({ error }, 'Uncaught exception'); logger.error({ error }, 'Uncaught exception');
worker.shutdown(); worker.shutdown();
}); });
process.on('unhandledRejection', reason => {
process.on('unhandledRejection', (reason, promise) => {
logger.error({ reason }, 'Unhandled rejection'); logger.error({ reason }, 'Unhandled rejection');
}); });
// Start worker
await worker.start(); await worker.start();
// Log status periodically
setInterval(() => { setInterval(() => {
const status = worker.getStatus(); logger.info({ status: worker.getStatus() }, 'Worker status');
logger.info({ status }, 'Worker status');
}, 60000); }, 60000);
} }
// Run
main().catch(error => { main().catch(error => {
logger.error({ error }, 'Fatal error'); logger.error({ error }, 'Fatal error');
process.exit(1); process.exit(1);

View File

@@ -116,12 +116,17 @@ export class KafkaProducer {
} }
/** /**
* Write multiple OHLC candles to Kafka as an OHLCBatch message * Write multiple OHLC candles to Kafka as an OHLCBatch message.
* Uses protobuf encoding with metadata in batch wrapper *
* Historical mode: pass explicit metadata and isLastPage flag.
* Realtime mode: omit metadata (null/undefined) — writes individual OHLC messages instead.
*
* @param {string} topic - Kafka topic name * @param {string} topic - Kafka topic name
* @param {Array<object>} ohlcData - Array of OHLC data objects (may include __metadata in first record) * @param {Array<object>} ohlcData - Array of OHLC candle objects
* @param {object|null} metadata - Request metadata for historical batches; null for realtime
* @param {boolean} isLastPage - True if this is the final page of a historical query
*/ */
async writeOHLCs(topic, ohlcData) { async writeOHLCs(topic, ohlcData, metadata = null, isLastPage = false) {
if (!this.isConnected) { if (!this.isConnected) {
throw new Error('Kafka producer not connected'); throw new Error('Kafka producer not connected');
} }
@@ -130,12 +135,8 @@ export class KafkaProducer {
return; return;
} }
// Extract metadata from first record if present
const firstCandle = ohlcData[0];
const metadata = firstCandle.__metadata;
if (!metadata) { if (!metadata) {
// No metadata - write individual OHLC messages (realtime mode) // Realtime mode — write individual OHLC messages (no batch wrapper)
const messages = ohlcData.map(candle => { const messages = ohlcData.map(candle => {
const protoCandle = { const protoCandle = {
timestamp: candle.timestamp, timestamp: candle.timestamp,
@@ -156,10 +157,7 @@ export class KafkaProducer {
}; };
}); });
await this.producer.send({ await this.producer.send({ topic, messages });
topic,
messages
});
this.logger.debug( this.logger.debug(
{ count: ohlcData.length, topic, type: 'individual' }, { count: ohlcData.length, topic, type: 'individual' },
@@ -168,7 +166,7 @@ export class KafkaProducer {
return; return;
} }
// Historical mode - write as OHLCBatch with metadata // Historical mode write as OHLCBatch with metadata
const batch = { const batch = {
metadata: { metadata: {
requestId: metadata.request_id, requestId: metadata.request_id,
@@ -178,7 +176,8 @@ export class KafkaProducer {
startTime: metadata.start_time, startTime: metadata.start_time,
endTime: metadata.end_time, endTime: metadata.end_time,
status: metadata.status || 'OK', status: metadata.status || 'OK',
errorMessage: metadata.error_message errorMessage: metadata.error_message,
isLastPage
}, },
rows: ohlcData.map(candle => { rows: ohlcData.map(candle => {
const row = { const row = {
@@ -194,22 +193,16 @@ export class KafkaProducer {
}) })
}; };
// Encode as protobuf OHLCBatch with ZMQ envelope
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch); const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
const value = Buffer.concat([frame1, frame2]); const value = Buffer.concat([frame1, frame2]);
await this.producer.send({ await this.producer.send({
topic, topic,
messages: [ messages: [{ key: metadata.ticker, value }]
{
key: metadata.ticker,
value
}
]
}); });
this.logger.debug( this.logger.debug(
{ request_id: metadata.request_id, count: ohlcData.length, topic, type: 'batch' }, { request_id: metadata.request_id, count: ohlcData.length, isLastPage, topic },
'Wrote OHLCBatch to Kafka' 'Wrote OHLCBatch to Kafka'
); );
} }
@@ -225,7 +218,8 @@ export class KafkaProducer {
throw new Error('Kafka producer not connected'); throw new Error('Kafka producer not connected');
} }
// Create an empty OHLCBatch with status in metadata // Create an empty OHLCBatch with status in metadata.
// Markers are always the terminal message for a request (is_last_page = true).
const batch = { const batch = {
metadata: { metadata: {
requestId: marker.request_id, requestId: marker.request_id,
@@ -235,7 +229,8 @@ export class KafkaProducer {
startTime: marker.start_time, startTime: marker.start_time,
endTime: marker.end_time, endTime: marker.end_time,
status: marker.status, // 'NOT_FOUND' or 'ERROR' status: marker.status, // 'NOT_FOUND' or 'ERROR'
errorMessage: marker.error_message || marker.message errorMessage: marker.error_message || marker.message,
isLastPage: true
}, },
rows: [] // Empty rows array indicates marker message rows: [] // Empty rows array indicates marker message
}; };

View File

@@ -1,33 +1,40 @@
// Realtime tick data poller using 10-second polling // Realtime tick data poller — polls exchange every 10s, writes ticks to market-tick Kafka topic.
// Heartbeats every 5s so Flink IngestorBroker knows the job is alive.
export class RealtimePoller { export class RealtimePoller {
constructor(ccxtFetcher, kafkaProducer, logger) { constructor(ccxtFetcher, kafkaProducer, zmqClient, logger) {
this.ccxtFetcher = ccxtFetcher; this.ccxtFetcher = ccxtFetcher;
this.kafkaProducer = kafkaProducer; this.kafkaProducer = kafkaProducer;
this.zmqClient = zmqClient;
this.logger = logger; this.logger = logger;
// Active subscriptions: requestId -> subscription info // Active subscriptions: jobId -> subscription info
this.subscriptions = new Map(); this.subscriptions = new Map();
// Poll interval in milliseconds (10 seconds) // Poll interval in milliseconds (10 seconds)
this.pollInterval = 10000; this.pollInterval = 10000;
// Main polling loop // Heartbeat interval (5 seconds)
this.heartbeatInterval = 5000;
this.pollingLoop = null; this.pollingLoop = null;
this.heartbeatLoop = null;
} }
/** /**
* Start a realtime subscription * Start a realtime subscription for a job dispatched by IngestorBroker.
* @param {string} requestId - Unique request ID * @param {string} jobId - Broker-assigned job ID (for heartbeats and COMPLETE)
* @param {string} ticker - Ticker to subscribe to * @param {string} requestId - Original request ID (for metadata)
* @param {string} kafkaTopic - Kafka topic to write to * @param {string} ticker - Ticker to subscribe to
* @param {string} kafkaTopic - Kafka topic to write ticks to (market-tick)
*/ */
startSubscription(requestId, ticker, kafkaTopic) { startSubscription(jobId, requestId, ticker, kafkaTopic) {
if (this.subscriptions.has(requestId)) { if (this.subscriptions.has(jobId)) {
this.logger.warn({ requestId }, 'Subscription already exists'); this.logger.warn({ jobId }, 'Subscription already exists');
return; return;
} }
const subscription = { const subscription = {
jobId,
requestId, requestId,
ticker, ticker,
kafkaTopic, kafkaTopic,
@@ -36,93 +43,81 @@ export class RealtimePoller {
errorCount: 0 errorCount: 0
}; };
this.subscriptions.set(requestId, subscription); this.subscriptions.set(jobId, subscription);
this.logger.info({ jobId, requestId, ticker, kafkaTopic }, 'Started realtime subscription');
this.logger.info(
{ requestId, ticker, kafkaTopic },
'Started realtime subscription'
);
// Start polling loop if not already running
if (!this.pollingLoop) { if (!this.pollingLoop) {
this.startPollingLoop(); this.startPollingLoop();
} }
if (!this.heartbeatLoop) {
this.startHeartbeatLoop();
}
} }
/** /**
* Cancel a realtime subscription * Stop a realtime subscription. Called when Flink sends WorkStop or on error.
* @param {string} requestId - Request ID to cancel * Does NOT send WorkComplete — caller is responsible for that.
*/ */
cancelSubscription(requestId) { cancelSubscription(jobId) {
const subscription = this.subscriptions.get(requestId); const subscription = this.subscriptions.get(jobId);
if (subscription) { if (subscription) {
subscription.isActive = false; subscription.isActive = false;
this.subscriptions.delete(requestId); this.subscriptions.delete(jobId);
this.logger.info({ jobId, ticker: subscription.ticker }, 'Cancelled realtime subscription');
this.logger.info(
{ requestId, ticker: subscription.ticker },
'Cancelled realtime subscription'
);
} }
// Stop polling loop if no active subscriptions if (this.subscriptions.size === 0) {
if (this.subscriptions.size === 0 && this.pollingLoop) { if (this.pollingLoop) {
clearInterval(this.pollingLoop); clearInterval(this.pollingLoop);
this.pollingLoop = null; this.pollingLoop = null;
this.logger.info('Stopped polling loop - no active subscriptions'); }
if (this.heartbeatLoop) {
clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
this.logger.info('Stopped polling/heartbeat loops — no active subscriptions');
} }
} }
/**
* Start the main polling loop
*/
startPollingLoop() { startPollingLoop() {
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop'); this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
this.pollingLoop = setInterval(() => this.pollAllSubscriptions(), this.pollInterval);
this.pollingLoop = setInterval(async () => { // Immediate first poll
await this.pollAllSubscriptions();
}, this.pollInterval);
// Do an immediate poll
this.pollAllSubscriptions(); this.pollAllSubscriptions();
} }
/** startHeartbeatLoop() {
* Poll all active subscriptions this.logger.info({ interval: this.heartbeatInterval }, 'Starting heartbeat loop');
*/ this.heartbeatLoop = setInterval(async () => {
async pollAllSubscriptions() { for (const { jobId } of this.subscriptions.values()) {
const subscriptions = Array.from(this.subscriptions.values()); try {
await this.zmqClient.sendHeartbeat(jobId);
// Poll subscriptions in parallel } catch (err) {
await Promise.allSettled( this.logger.error({ jobId, error: err.message }, 'Failed to send heartbeat');
subscriptions.map(sub => this.pollSubscription(sub)) }
); }
}, this.heartbeatInterval);
} }
/** async pollAllSubscriptions() {
* Poll a single subscription const subscriptions = Array.from(this.subscriptions.values());
* @param {object} subscription - Subscription object await Promise.allSettled(subscriptions.map(sub => this.pollSubscription(sub)));
*/ }
async pollSubscription(subscription) {
if (!subscription.isActive) {
return;
}
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription; async pollSubscription(subscription) {
if (!subscription.isActive) return;
const { jobId, requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
try { try {
// Fetch trades since last timestamp const trades = await this.ccxtFetcher.fetchRecentTrades(ticker, lastTimestamp);
const trades = await this.ccxtFetcher.fetchRecentTrades(
ticker,
lastTimestamp
);
if (trades.length === 0) { if (trades.length === 0) {
this.logger.debug({ requestId, ticker }, 'No new trades'); this.logger.debug({ jobId, ticker }, 'No new trades');
return; return;
} }
// Filter out trades we've already seen // Skip trades we've already seen (timestamp-based dedup)
let newTrades = trades; let newTrades = trades;
if (lastTimestamp) { if (lastTimestamp) {
const lastTs = BigInt(lastTimestamp); const lastTs = BigInt(lastTimestamp);
@@ -130,88 +125,59 @@ export class RealtimePoller {
} }
if (newTrades.length > 0) { if (newTrades.length > 0) {
// Write trades to Kafka
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades); await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
subscription.lastTimestamp = newTrades[newTrades.length - 1].timestamp;
// Update last timestamp this.logger.info({ jobId, ticker, count: newTrades.length, kafkaTopic }, 'Wrote ticks to Kafka');
const latestTrade = newTrades[newTrades.length - 1];
subscription.lastTimestamp = latestTrade.timestamp;
this.logger.info(
{
requestId,
ticker,
count: newTrades.length,
kafkaTopic
},
'Wrote new trades to Kafka'
);
} }
// Reset error count on success
subscription.errorCount = 0; subscription.errorCount = 0;
} catch (error) { } catch (error) {
subscription.errorCount++; subscription.errorCount++;
this.logger.error( this.logger.error(
{ { error: error.message, jobId, ticker, errorCount: subscription.errorCount },
error: error.message,
requestId,
ticker,
errorCount: subscription.errorCount
},
'Error polling subscription' 'Error polling subscription'
); );
// Cancel subscription after too many errors // After 5 consecutive errors, give up and notify Flink
if (subscription.errorCount >= 5) { if (subscription.errorCount >= 5) {
this.logger.error( this.logger.error({ jobId, ticker }, 'Cancelling subscription due to repeated errors');
{ requestId, ticker }, this.cancelSubscription(jobId);
'Cancelling subscription due to repeated errors' try {
); await this.zmqClient.sendComplete(jobId, false, `Polling failed after 5 errors: ${error.message}`);
this.cancelSubscription(requestId); } catch (zmqErr) {
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
}
} }
} }
} }
/**
* Get subscription statistics
*/
getStats() { getStats() {
const stats = { return {
totalSubscriptions: this.subscriptions.size, totalSubscriptions: this.subscriptions.size,
subscriptions: [] subscriptions: Array.from(this.subscriptions.values()).map(sub => ({
}; jobId: sub.jobId,
requestId: sub.requestId,
for (const [requestId, sub] of this.subscriptions) {
stats.subscriptions.push({
requestId,
ticker: sub.ticker, ticker: sub.ticker,
isActive: sub.isActive, isActive: sub.isActive,
errorCount: sub.errorCount, errorCount: sub.errorCount,
lastTimestamp: sub.lastTimestamp lastTimestamp: sub.lastTimestamp
}); }))
} };
return stats;
} }
/**
* Shutdown poller and cancel all subscriptions
*/
shutdown() { shutdown() {
this.logger.info('Shutting down realtime poller'); this.logger.info('Shutting down realtime poller');
if (this.pollingLoop) { if (this.pollingLoop) {
clearInterval(this.pollingLoop); clearInterval(this.pollingLoop);
this.pollingLoop = null; this.pollingLoop = null;
} }
if (this.heartbeatLoop) {
// Mark all subscriptions as inactive clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
for (const subscription of this.subscriptions.values()) { for (const subscription of this.subscriptions.values()) {
subscription.isActive = false; subscription.isActive = false;
} }
this.subscriptions.clear(); this.subscriptions.clear();
} }
} }

View File

@@ -1,116 +1,204 @@
// ZeroMQ client for connecting to Flink control channels // ZeroMQ DEALER client connecting to Flink IngestorBroker (ROUTER, port 5567)
import * as zmq from 'zeromq'; import * as zmq from 'zeromq';
import { decodeMessage } from './proto/messages.js'; import {
DataRequest,
WorkerReady, WorkComplete, WorkHeartbeat, WorkReject, WorkStop,
MessageTypeId, PROTOCOL_VERSION
} from './proto/messages.js';
const PROTOCOL_VERSION_BUF = Buffer.from([PROTOCOL_VERSION]);
/**
* Encodes a broker protocol message for sending over DEALER.
* Frame layout (DEALER → ROUTER):
* Frame 0: empty delimiter (required for ROUTER peering)
* Frame 1: [0x01] version
* Frame 2: [typeId][protobuf bytes]
*/
function encodeBrokerMessage(typeId, messageData, MessageType) {
const protoBytes = MessageType.encode(MessageType.create(messageData)).finish();
const frame2 = Buffer.concat([Buffer.from([typeId]), Buffer.from(protoBytes)]);
return [Buffer.alloc(0), PROTOCOL_VERSION_BUF, frame2];
}
export class ZmqClient { export class ZmqClient {
constructor(config, logger) { constructor(config, logger) {
this.config = config; this.config = config;
this.logger = logger; this.logger = logger;
// Work queue - SUB socket to receive data requests with exchange prefix filtering this.dealerSocket = null;
this.workSocket = null;
// NOTE: NO RESPONSE SOCKET - Async architecture via Kafka!
// Ingestors write data to Kafka only
// Flink processes and publishes notifications
this.isShutdown = false; this.isShutdown = false;
this.supportedExchanges = config.supported_exchanges || ['BINANCE', 'COINBASE']; this.activeJobId = null;
this._idleHeartbeatInterval = null;
this.supportedExchanges = (config.supported_exchanges || ['BINANCE', 'COINBASE'])
.map(e => e.toUpperCase());
// Callbacks set by IngestorWorker
this.onWorkAssign = null; // (DataRequest) => void
this.onWorkStop = null; // (jobId) => void
} }
/** /**
* Connect to Relay ZMQ endpoints * Connect DEALER socket to Flink IngestorBroker (ROUTER).
* Sends WorkerReady immediately so Flink knows this worker is available.
*/ */
async connect() { async connect() {
const { flink_hostname, ingestor_work_port } = this.config; const { flink_hostname, ingestor_broker_port = 5567 } = this.config;
// Connect to work queue (SUB with exchange prefix filtering) this.dealerSocket = new zmq.Dealer();
this.workSocket = new zmq.Subscriber(); const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
const workEndpoint = `tcp://${flink_hostname}:${ingestor_work_port}`; await this.dealerSocket.connect(endpoint);
await this.workSocket.connect(workEndpoint); this.logger.info(`Connected DEALER to Flink IngestorBroker at ${endpoint}`);
// Subscribe to each supported exchange suffix (Nautilus format: "BTC/USDT.BINANCE") // Register as available
for (const exchange of this.supportedExchanges) { await this.sendReady();
const prefix = `${exchange}.`;
this.workSocket.subscribe(prefix); // Periodically re-send WorkerReady when idle, to recover from missed initial registration
this.logger.info(`Subscribed to exchange prefix: ${prefix}`); this._idleHeartbeatInterval = setInterval(() => {
} if (this.activeJobId === null && !this.isShutdown) {
this.logger.info(`Connected to work queue at ${workEndpoint}`); this.sendReady().catch(err =>
this.logger.info('ASYNC MODE: No response socket - data flows via Kafka → Flink → pub/sub notification'); this.logger.warn({ error: err.message }, 'Failed to re-send WorkerReady'));
}
}, 30_000);
// Start receiving work in background
this._receiveLoop();
} }
/** /**
* Pull a data request from the work queue * Send WorkerReady — called on connect and after each COMPLETE.
* @returns {Promise<object>} Decoded DataRequest message
*/ */
async pullDataRequest() { async sendReady() {
if (this.isShutdown) { const frames = encodeBrokerMessage(
return null; MessageTypeId.WORKER_READY,
} { exchanges: this.supportedExchanges },
WorkerReady
);
await this.dealerSocket.send(frames);
this.logger.info({ exchanges: this.supportedExchanges }, 'Sent WorkerReady');
}
/**
* Send WorkComplete after a historical job finishes.
* Automatically sends WorkerReady so Flink returns us to the free pool.
*/
async sendComplete(jobId, success, errorMessage) {
this.activeJobId = null;
const frames = encodeBrokerMessage(
MessageTypeId.WORK_COMPLETE,
{
jobId,
success,
...(errorMessage ? { errorMessage } : {})
},
WorkComplete
);
await this.dealerSocket.send(frames);
this.logger.info({ jobId, success }, 'Sent WorkComplete');
// Return to free pool
await this.sendReady();
}
/**
* Send WorkHeartbeat for an active realtime job.
*/
async sendHeartbeat(jobId) {
const frames = encodeBrokerMessage(
MessageTypeId.WORK_HEARTBEAT,
{ jobId },
WorkHeartbeat
);
await this.dealerSocket.send(frames);
this.logger.debug({ jobId }, 'Sent WorkHeartbeat');
}
/**
* Send WorkReject if we cannot handle the dispatched job.
*/
async sendReject(jobId, reason) {
const frames = encodeBrokerMessage(
MessageTypeId.WORK_REJECT,
{ jobId, reason },
WorkReject
);
await this.dealerSocket.send(frames);
this.logger.warn({ jobId, reason }, 'Sent WorkReject');
}
/**
* Background loop: receive WorkAssign (DataRequest) or WorkStop from Flink.
* ROUTER→DEALER frame layout: [empty][version][typeId+payload]
*/
async _receiveLoop() {
try { try {
const frames = await this.workSocket.receive(); for await (const frames of this.dealerSocket) {
this.logger.info({ if (this.isShutdown) break;
frameCount: frames.length,
frame0Len: frames[0]?.length,
frame1Len: frames[1]?.length,
frame2Len: frames[2]?.length,
frame0: frames[0]?.toString('utf8').substring(0, 50),
frame1Hex: frames[1]?.toString('hex').substring(0, 20),
frame2Hex: frames[2]?.toString('hex').substring(0, 20)
}, 'Received raw ZMQ frames');
// First frame is the topic (exchange prefix), skip it try {
// Remaining frames are: [version_frame, message_frame] // frames[0] = empty delimiter, frames[1] = version, frames[2] = type+payload
if (frames.length < 3) { if (frames.length < 3) {
this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count'); this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count from broker');
return null; continue;
}
const versionByte = frames[1][0];
if (versionByte !== PROTOCOL_VERSION) {
this.logger.warn({ versionByte }, 'Unexpected protocol version from broker');
continue;
}
const typeId = frames[2][0];
const payload = frames[2].slice(1);
if (typeId === MessageTypeId.WORK_ASSIGN) {
// DataRequest protobuf
const request = DataRequest.decode(payload);
const req = DataRequest.toObject(request, {
longs: String, enums: String, bytes: Buffer
});
this.activeJobId = req.jobId;
this.logger.info(
{ jobId: req.jobId, requestId: req.requestId, type: req.type, ticker: req.ticker },
'Received WorkAssign from broker'
);
if (this.onWorkAssign) {
this.onWorkAssign(req);
}
} else if (typeId === MessageTypeId.WORK_STOP) {
const stop = WorkStop.decode(payload);
const { jobId } = WorkStop.toObject(stop);
this.logger.info({ jobId }, 'Received WorkStop from broker');
if (this.onWorkStop) {
this.onWorkStop(jobId);
}
} else {
this.logger.warn({ typeId: `0x${typeId.toString(16)}` }, 'Unknown message type from broker');
}
} catch (err) {
this.logger.error({ error: err.message }, 'Error processing broker message');
}
} }
const messageFrames = frames.slice(1); // Skip topic, keep version + message } catch (err) {
const { version, typeId, message } = decodeMessage(messageFrames);
this.logger.info({
version,
typeId: `0x${typeId.toString(16)}`,
requestId: message.requestId,
type: message.type,
typeOf: typeof message.type,
ticker: message.ticker
}, 'Decoded data request');
return message;
} catch (error) {
if (!this.isShutdown) { if (!this.isShutdown) {
this.logger.error({ error: error.message, stack: error.stack }, 'Error receiving data request'); this.logger.error({ error: err.message }, 'DEALER receive loop error');
} }
return null;
} }
} }
/**
* Start listening for control messages in the background
* @param {Function} handler - Callback function to handle control messages
*
* NOTE: Control channel not implemented yet. This is a stub for future use.
* For now, just log and ignore.
*/
startControlListener(handler) {
this.logger.info('Control channel listener stub - not implemented yet');
// TODO: Implement control channel when needed
// Control messages would be used for:
// - Canceling realtime subscriptions
// - Graceful shutdown signals
// - Configuration updates
}
/**
* Shutdown and close connections
*/
async shutdown() { async shutdown() {
this.isShutdown = true; this.isShutdown = true;
this.logger.info('Shutting down ZMQ connections'); if (this._idleHeartbeatInterval) {
clearInterval(this._idleHeartbeatInterval);
if (this.workSocket) { this._idleHeartbeatInterval = null;
await this.workSocket.close(); }
this.logger.info('Shutting down ZMQ DEALER connection');
if (this.dealerSocket) {
this.dealerSocket.close();
} }
} }
} }

View File

@@ -24,6 +24,9 @@ message DataRequest {
// Flink uses this to determine notification topic // Flink uses this to determine notification topic
optional string client_id = 6; optional string client_id = 6;
// Job ID assigned by the IngestorBroker for work tracking and heartbeating
optional string job_id = 7;
enum RequestType { enum RequestType {
HISTORICAL_OHLC = 0; HISTORICAL_OHLC = 0;
REALTIME_TICKS = 1; REALTIME_TICKS = 1;
@@ -327,3 +330,40 @@ message FieldValue {
uint64 timestamp_val = 6; uint64 timestamp_val = 6;
} }
} }
// ─── Ingestor Broker Protocol (Flink ROUTER ↔ Ingestor DEALER, port 5567) ───
// Message type IDs 0x200x25
// Ingestor → Flink: register as available (type 0x20)
// Sent on DEALER connect and after every COMPLETE.
message WorkerReady {
// Exchanges this ingestor supports (e.g. ["BINANCE", "COINBASE"])
repeated string exchanges = 1;
}
// Ingestor → Flink: historical job finished (type 0x21)
message WorkComplete {
string job_id = 1;
bool success = 2;
optional string error_message = 3;
}
// Ingestor → Flink: realtime job still alive — sent every 5s (type 0x22)
message WorkHeartbeat {
string job_id = 1;
}
// Ingestor → Flink: unable to handle this job (type 0x23)
message WorkReject {
string job_id = 1;
string reason = 2;
}
// Flink → Ingestor: dispatch a job — wraps DataRequest (type 0x24)
// DataRequest.job_id is populated by IngestorBroker
// (DataRequest itself is type 0x01; this is the framing type for broker dispatch)
// Flink → Ingestor: stop a realtime job (type 0x25)
message WorkStop {
string job_id = 1;
}

View File

@@ -58,4 +58,8 @@ message OHLCBatchMetadata {
// Error message if status is ERROR // Error message if status is ERROR
optional string error_message = 8; optional string error_message = 8;
// True on the final page of a historical query (including error/not-found markers).
// Flink publishes HistoryReadyNotification only when this is true.
bool is_last_page = 9;
} }

View File

@@ -16,14 +16,15 @@ pub struct Config {
#[serde(default = "default_market_data_pub_port")] #[serde(default = "default_market_data_pub_port")]
pub market_data_pub_port: u16, pub market_data_pub_port: u16,
/// Ingestor work queue port (PUB - publish work with exchange prefix) /// Flink market data endpoint (XSUB - relay subscribes to Flink XPUB)
#[serde(default = "default_ingestor_work_port")]
pub ingestor_work_port: u16,
/// Flink market data endpoint (XSUB - relay subscribes to Flink)
#[serde(default = "default_flink_market_data_endpoint")] #[serde(default = "default_flink_market_data_endpoint")]
pub flink_market_data_endpoint: String, pub flink_market_data_endpoint: String,
/// Flink request endpoint (PUSH - relay forwards client requests to Flink PULL)
/// Flink's IngestorBroker binds a PULL socket on port 5566
#[serde(default = "default_flink_request_endpoint")]
pub flink_request_endpoint: String,
/// Request timeout in seconds /// Request timeout in seconds
#[serde(default = "default_request_timeout_secs")] #[serde(default = "default_request_timeout_secs")]
pub request_timeout_secs: u64, pub request_timeout_secs: u64,
@@ -45,14 +46,14 @@ fn default_market_data_pub_port() -> u16 {
5558 5558
} }
fn default_ingestor_work_port() -> u16 {
5555
}
fn default_flink_market_data_endpoint() -> String { fn default_flink_market_data_endpoint() -> String {
"tcp://flink-jobmanager:5558".to_string() "tcp://flink-jobmanager:5558".to_string()
} }
fn default_flink_request_endpoint() -> String {
"tcp://flink-jobmanager:5566".to_string()
}
fn default_request_timeout_secs() -> u64 { fn default_request_timeout_secs() -> u64 {
30 30
} }
@@ -67,8 +68,8 @@ impl Default for Config {
bind_address: default_bind_address(), bind_address: default_bind_address(),
client_request_port: default_client_request_port(), client_request_port: default_client_request_port(),
market_data_pub_port: default_market_data_pub_port(), market_data_pub_port: default_market_data_pub_port(),
ingestor_work_port: default_ingestor_work_port(),
flink_market_data_endpoint: default_flink_market_data_endpoint(), flink_market_data_endpoint: default_flink_market_data_endpoint(),
flink_request_endpoint: default_flink_request_endpoint(),
request_timeout_secs: default_request_timeout_secs(), request_timeout_secs: default_request_timeout_secs(),
high_water_mark: default_hwm(), high_water_mark: default_hwm(),
} }

View File

@@ -7,8 +7,6 @@ use tracing::{debug, error, info, warn};
const PROTOCOL_VERSION: u8 = 0x01; const PROTOCOL_VERSION: u8 = 0x01;
const MSG_TYPE_SUBMIT_REQUEST: u8 = 0x10; const MSG_TYPE_SUBMIT_REQUEST: u8 = 0x10;
const MSG_TYPE_SUBMIT_RESPONSE: u8 = 0x11; const MSG_TYPE_SUBMIT_RESPONSE: u8 = 0x11;
const MSG_TYPE_DATA_REQUEST: u8 = 0x01;
const MSG_TYPE_HISTORY_READY: u8 = 0x12;
pub struct Relay { pub struct Relay {
config: Config, config: Config,
@@ -26,24 +24,21 @@ impl Relay {
} }
pub async fn run(self) -> Result<()> { pub async fn run(self) -> Result<()> {
info!("Initializing Stateless ZMQ Relay"); info!("Initializing ZMQ Relay");
// Bind sockets
let client_request_socket = self.create_client_request_socket()?; let client_request_socket = self.create_client_request_socket()?;
let market_data_frontend = self.create_market_data_frontend()?; let market_data_frontend = self.create_market_data_frontend()?;
let market_data_backend = self.create_market_data_backend()?; let market_data_backend = self.create_market_data_backend()?;
let ingestor_work_socket = self.create_ingestor_work_socket()?; let flink_request_socket = self.create_flink_request_socket()?;
info!("All sockets initialized successfully - relay is STATELESS"); info!("All sockets initialized — relay forwards requests to Flink");
info!("No pending requests tracked - all async via pub/sub");
// Run main loop
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
Self::proxy_loop( Self::proxy_loop(
client_request_socket, client_request_socket,
market_data_frontend, market_data_frontend,
market_data_backend, market_data_backend,
ingestor_work_socket, flink_request_socket,
) )
}) })
.await? .await?
@@ -58,7 +53,6 @@ impl Relay {
let endpoint = format!("{}:{}", self.config.bind_address, self.config.client_request_port); let endpoint = format!("{}:{}", self.config.bind_address, self.config.client_request_port);
socket.bind(&endpoint)?; socket.bind(&endpoint)?;
info!("Client request socket (ROUTER) bound to {}", endpoint); info!("Client request socket (ROUTER) bound to {}", endpoint);
info!(" → Accepts SubmitHistoricalRequest, returns SubmitResponse immediately");
Ok(socket) Ok(socket)
} }
@@ -71,7 +65,7 @@ impl Relay {
let endpoint = format!("{}:{}", self.config.bind_address, self.config.market_data_pub_port); let endpoint = format!("{}:{}", self.config.bind_address, self.config.market_data_pub_port);
socket.bind(&endpoint)?; socket.bind(&endpoint)?;
info!("Market data frontend (XPUB) bound to {}", endpoint); info!("Market data frontend (XPUB) bound to {}", endpoint);
info!(" → Clients subscribe here for HistoryReadyNotification and market data"); info!(" → Clients subscribe here; subscription events forwarded to Flink for realtime activation");
Ok(socket) Ok(socket)
} }
@@ -82,20 +76,19 @@ impl Relay {
socket.connect(&self.config.flink_market_data_endpoint)?; socket.connect(&self.config.flink_market_data_endpoint)?;
info!("Market data backend (XSUB) connected to {}", self.config.flink_market_data_endpoint); info!("Market data backend (XSUB) connected to {}", self.config.flink_market_data_endpoint);
info!(" → Receives HistoryReadyNotification and market data from Flink"); info!(" → Receives market data and notifications from Flink");
Ok(socket) Ok(socket)
} }
fn create_ingestor_work_socket(&self) -> Result<zmq::Socket> { fn create_flink_request_socket(&self) -> Result<zmq::Socket> {
let socket = self.context.socket(zmq::PUB)?; let socket = self.context.socket(zmq::PUSH)?;
socket.set_sndhwm(self.config.high_water_mark)?; socket.set_sndhwm(self.config.high_water_mark)?;
socket.set_linger(1000)?; socket.set_linger(1000)?;
let endpoint = format!("{}:{}", self.config.bind_address, self.config.ingestor_work_port); socket.connect(&self.config.flink_request_endpoint)?;
socket.bind(&endpoint)?; info!("Flink request socket (PUSH) connected to {}", self.config.flink_request_endpoint);
info!("Ingestor work queue (PUB) bound to {}", endpoint); info!(" → Forwards SubmitHistoricalRequest to Flink for dispatch to ingestors");
info!(" → Publishes DataRequest with exchange prefix");
Ok(socket) Ok(socket)
} }
@@ -104,7 +97,7 @@ impl Relay {
client_request_socket: zmq::Socket, client_request_socket: zmq::Socket,
market_data_frontend: zmq::Socket, market_data_frontend: zmq::Socket,
market_data_backend: zmq::Socket, market_data_backend: zmq::Socket,
ingestor_work_socket: zmq::Socket, flink_request_socket: zmq::Socket,
) -> Result<()> { ) -> Result<()> {
let mut items = [ let mut items = [
client_request_socket.as_poll_item(zmq::POLLIN), client_request_socket.as_poll_item(zmq::POLLIN),
@@ -112,10 +105,9 @@ impl Relay {
market_data_backend.as_poll_item(zmq::POLLIN), market_data_backend.as_poll_item(zmq::POLLIN),
]; ];
info!("Entering stateless proxy loop"); info!("Entering relay proxy loop");
loop { loop {
// Poll with 100ms timeout
zmq::poll(&mut items, 100) zmq::poll(&mut items, 100)
.context("Failed to poll sockets")?; .context("Failed to poll sockets")?;
@@ -123,21 +115,20 @@ impl Relay {
if items[0].is_readable() { if items[0].is_readable() {
if let Err(e) = Self::handle_client_submission( if let Err(e) = Self::handle_client_submission(
&client_request_socket, &client_request_socket,
&ingestor_work_socket, &flink_request_socket,
) { ) {
error!("Error handling client submission: {}", e); error!("Error handling client submission: {}", e);
} }
} }
// Handle market data subscriptions from clients (XPUB → XSUB) // Proxy client subscription events → Flink (XPUB → XSUB)
if items[1].is_readable() { if items[1].is_readable() {
if let Err(e) = Self::proxy_subscription(&market_data_frontend, &market_data_backend) { if let Err(e) = Self::proxy_subscription(&market_data_frontend, &market_data_backend) {
error!("Error proxying subscription: {}", e); error!("Error proxying subscription: {}", e);
} }
} }
// Handle market data from Flink (XSUB → XPUB) // Proxy market data from Flink → clients (XSUB → XPUB)
// This includes HistoryReadyNotification and regular market data
if items[2].is_readable() { if items[2].is_readable() {
if let Err(e) = Self::proxy_market_data(&market_data_backend, &market_data_frontend) { if let Err(e) = Self::proxy_market_data(&market_data_backend, &market_data_frontend) {
error!("Error proxying market data: {}", e); error!("Error proxying market data: {}", e);
@@ -148,7 +139,7 @@ impl Relay {
fn handle_client_submission( fn handle_client_submission(
client_socket: &zmq::Socket, client_socket: &zmq::Socket,
ingestor_socket: &zmq::Socket, flink_socket: &zmq::Socket,
) -> Result<()> { ) -> Result<()> {
// Receive from client: [identity][empty][version][message] // Receive from client: [identity][empty][version][message]
let identity = client_socket.recv_bytes(0)?; let identity = client_socket.recv_bytes(0)?;
@@ -177,7 +168,7 @@ impl Relay {
identity, identity,
payload, payload,
client_socket, client_socket,
ingestor_socket, flink_socket,
)?; )?;
} }
_ => { _ => {
@@ -192,61 +183,27 @@ impl Relay {
client_identity: Vec<u8>, client_identity: Vec<u8>,
payload: &[u8], payload: &[u8],
client_socket: &zmq::Socket, client_socket: &zmq::Socket,
ingestor_socket: &zmq::Socket, flink_socket: &zmq::Socket,
) -> Result<()> { ) -> Result<()> {
// Parse protobuf request // Parse just enough to build the SubmitResponse — relay stays thin
let request = proto::SubmitHistoricalRequest::decode(payload) let request = proto::SubmitHistoricalRequest::decode(payload)
.context("Failed to parse SubmitHistoricalRequest")?; .context("Failed to parse SubmitHistoricalRequest")?;
let request_id = request.request_id.clone(); let request_id = request.request_id.clone();
let ticker = request.ticker.clone();
let client_id = request.client_id.clone(); let client_id = request.client_id.clone();
info!("Handling request submission: request_id={}, ticker={}, client_id={:?}", info!("Forwarding request to Flink: request_id={}, ticker={}", request_id, request.ticker);
request_id, ticker, client_id);
// Extract exchange suffix from ticker (Nautilus format: "BTC/USDT.BINANCE") // Forward the raw request to Flink via PUSH
let exchange_prefix = ticker.rsplitn(2, '.').next() // Flink builds DataRequest and dispatches to ingestors via IngestorBroker
.map(|s| format!("{}.", s))
.unwrap_or_else(|| String::from(""));
if exchange_prefix.is_empty() {
warn!("Ticker '{}' missing exchange suffix", ticker);
}
// Build DataRequest protobuf for ingestors
let data_request = proto::DataRequest {
request_id: request_id.clone(),
r#type: proto::data_request::RequestType::HistoricalOhlc as i32,
ticker: ticker.clone(),
historical: Some(proto::HistoricalParams {
start_time: request.start_time,
end_time: request.end_time,
period_seconds: request.period_seconds,
limit: request.limit,
}),
realtime: None,
client_id: client_id.clone(),
};
let mut data_request_bytes = Vec::new();
data_request.encode(&mut data_request_bytes)?;
// Publish to ingestors with exchange prefix
let version_frame = vec![PROTOCOL_VERSION]; let version_frame = vec![PROTOCOL_VERSION];
let mut message_frame = vec![MSG_TYPE_DATA_REQUEST]; let mut message_frame = vec![MSG_TYPE_SUBMIT_REQUEST];
message_frame.extend_from_slice(&data_request_bytes); message_frame.extend_from_slice(payload);
ingestor_socket.send(&exchange_prefix, zmq::SNDMORE)?; flink_socket.send(&version_frame, zmq::SNDMORE)?;
ingestor_socket.send(&version_frame, zmq::SNDMORE)?; flink_socket.send(&message_frame, 0)?;
ingestor_socket.send(&message_frame, 0)?;
info!("Published to ingestors: prefix={}, request_id={}", exchange_prefix, request_id); // Build SubmitResponse — relay still acks the client immediately
// Build SubmitResponse protobuf
// NOTE: This topic is DETERMINISTIC based on client-generated values.
// Client should have already subscribed to this topic BEFORE sending the request
// to prevent race condition where notification arrives before client subscribes.
let notification_topic = if let Some(cid) = &client_id { let notification_topic = if let Some(cid) = &client_id {
format!("RESPONSE:{}", cid) format!("RESPONSE:{}", cid)
} else { } else {
@@ -263,20 +220,16 @@ impl Relay {
let mut response_bytes = Vec::new(); let mut response_bytes = Vec::new();
response.encode(&mut response_bytes)?; response.encode(&mut response_bytes)?;
// Send immediate response to client
let version_frame = vec![PROTOCOL_VERSION]; let version_frame = vec![PROTOCOL_VERSION];
let mut message_frame = vec![MSG_TYPE_SUBMIT_RESPONSE]; let mut resp_message_frame = vec![MSG_TYPE_SUBMIT_RESPONSE];
message_frame.extend_from_slice(&response_bytes); resp_message_frame.extend_from_slice(&response_bytes);
client_socket.send(&client_identity, zmq::SNDMORE)?; client_socket.send(&client_identity, zmq::SNDMORE)?;
client_socket.send(&[] as &[u8], zmq::SNDMORE)?; client_socket.send(&[] as &[u8], zmq::SNDMORE)?;
client_socket.send(&version_frame, zmq::SNDMORE)?; client_socket.send(&version_frame, zmq::SNDMORE)?;
client_socket.send(&message_frame, 0)?; client_socket.send(&resp_message_frame, 0)?;
info!("Sent SubmitResponse to client: request_id={}, topic={}", request_id, notification_topic); info!("Acked client and forwarded to Flink: request_id={}, notification_topic={}", request_id, notification_topic);
// Relay is now DONE with this request - completely stateless!
// Client will receive notification via pub/sub when Flink publishes HistoryReadyNotification
Ok(()) Ok(())
} }
@@ -285,7 +238,7 @@ impl Relay {
frontend: &zmq::Socket, frontend: &zmq::Socket,
backend: &zmq::Socket, backend: &zmq::Socket,
) -> Result<()> { ) -> Result<()> {
// Forward subscription message from XPUB to XSUB // Forward subscription event from XPUB to XSUB so Flink can detect realtime interest
let msg = frontend.recv_bytes(0)?; let msg = frontend.recv_bytes(0)?;
backend.send(&msg, 0)?; backend.send(&msg, 0)?;
@@ -302,10 +255,7 @@ impl Relay {
backend: &zmq::Socket, backend: &zmq::Socket,
frontend: &zmq::Socket, frontend: &zmq::Socket,
) -> Result<()> { ) -> Result<()> {
// Forward all messages from XSUB to XPUB (zero-copy proxy) // Zero-copy proxy: XSUB (Flink) → XPUB (clients)
// This includes:
// - Regular market data (ticks, OHLC)
// - HistoryReadyNotification from Flink
loop { loop {
let msg = backend.recv_bytes(0)?; let msg = backend.recv_bytes(0)?;
let more = backend.get_rcvmore()?; let more = backend.get_rcvmore()?;

View File

@@ -11,7 +11,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Copy dependency specifications # Copy dependency specifications
COPY setup.py .
COPY environment.yml . COPY environment.yml .
COPY dexorder/ dexorder/ COPY dexorder/ dexorder/
@@ -27,9 +26,6 @@ RUN mkdir -p dexorder/generated && \
RUN conda env create -f environment.yml -p /build/env && \ RUN conda env create -f environment.yml -p /build/env && \
conda clean -afy conda clean -afy
# Install the local package into the conda environment
RUN /build/env/bin/pip install --no-cache-dir .
# ============================================================================= # =============================================================================
# Runtime stage # Runtime stage
# ============================================================================= # =============================================================================
@@ -75,7 +71,8 @@ RUN chmod 755 /app/entrypoint.sh && chown root:root /app/entrypoint.sh
USER dexorder USER dexorder
# Environment variables (can be overridden in k8s) # Environment variables (can be overridden in k8s)
ENV PYTHONUNBUFFERED=1 \ ENV PYTHONPATH=/app \
PYTHONUNBUFFERED=1 \
MPLCONFIGDIR=/tmp \ MPLCONFIGDIR=/tmp \
NUMBA_CACHE_DIR=/tmp/numba_cache \ NUMBA_CACHE_DIR=/tmp/numba_cache \
LOG_LEVEL=INFO \ LOG_LEVEL=INFO \

View File

@@ -12,6 +12,7 @@ For research scripts, import and use get_api() to access the API:
""" """
import logging import logging
import threading
from typing import Optional from typing import Optional
from dexorder.api.api import API from dexorder.api.api import API
@@ -23,10 +24,13 @@ log = logging.getLogger(__name__)
# Global API instance - managed by main.py # Global API instance - managed by main.py
_global_api: Optional[API] = None _global_api: Optional[API] = None
# Thread-local API — used by harness threads so they don't overwrite the global
_thread_local = threading.local()
def get_api() -> API: def get_api() -> API:
""" """
Get the global API instance for accessing market data and charts. Get the API instance for accessing market data and charts.
Use this in research scripts to access the data and charting APIs. Use this in research scripts to access the data and charting APIs.
@@ -53,15 +57,27 @@ def get_api() -> API:
# Create chart # Create chart
fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT") fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT")
""" """
# Thread-local takes priority (set by harness threads)
api = getattr(_thread_local, 'api', None)
if api is not None:
return api
if _global_api is None: if _global_api is None:
raise RuntimeError("API not initialized") raise RuntimeError("API not initialized")
return _global_api return _global_api
def set_api(api: API) -> None: def set_api(api: API) -> None:
"""Set the global API instance. Internal use only.""" """Set the API instance.
global _global_api
_global_api = api When called from the main thread, sets the global API used by all threads.
When called from a non-main thread (e.g. harness threads), sets a thread-local
API so the global is not overwritten.
"""
if threading.current_thread() is threading.main_thread():
global _global_api
_global_api = api
else:
_thread_local.api = api
__all__ = ['API', 'ChartingAPI', 'DataAPI', 'get_api', 'set_api'] __all__ = ['API', 'ChartingAPI', 'DataAPI', 'get_api', 'set_api']

View File

@@ -3,6 +3,12 @@ Conda Package Manager
Manages dynamic installation and cleanup of conda packages for user components. Manages dynamic installation and cleanup of conda packages for user components.
Scans metadata files to determine required packages and syncs the conda environment. Scans metadata files to determine required packages and syncs the conda environment.
Extra packages (user-installed beyond the base container) are tracked in
``extra_packages.json`` under ``data_dir`` so they can be removed when no
script references them. Packages that are later promoted into the base image
(i.e. appear in ``environment.yml``) are silently evicted from tracking
rather than uninstalled.
""" """
import json import json
@@ -12,6 +18,10 @@ import sys
from pathlib import Path from pathlib import Path
from typing import Optional, Set from typing import Optional, Set
# Filename (stored under data_dir, outside the git repo) for tracking
# user-installed extra packages.
EXTRA_PACKAGES_FILENAME = "extra_packages.json"
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -102,12 +112,35 @@ def get_installed_packages() -> Set[str]:
return set() return set()
def install_packages(packages: list[str]) -> dict: def load_extra_packages(data_dir: Path) -> Set[str]:
"""Load the set of user-installed extra packages (beyond the base container)."""
path = data_dir / EXTRA_PACKAGES_FILENAME
if path.exists():
try:
return set(json.loads(path.read_text()))
except Exception as e:
log.error(f"Failed to load extra packages: {e}")
return set()
def save_extra_packages(data_dir: Path, packages: Set[str]) -> None:
"""Persist the set of user-installed extra packages."""
path = data_dir / EXTRA_PACKAGES_FILENAME
try:
path.write_text(json.dumps(sorted(packages)))
except Exception as e:
log.error(f"Failed to save extra packages: {e}")
def install_packages(packages: list[str], data_dir: Optional[Path] = None) -> dict:
""" """
Install conda packages if not already installed. Install conda packages if not already installed.
Args: Args:
packages: List of package names to install packages: List of package names to install
data_dir: If provided, newly installed packages are added to the extra
package tracking file (``extra_packages.json``) so they can
be cleaned up when no longer needed.
Returns: Returns:
dict with: dict with:
@@ -154,6 +187,10 @@ def install_packages(packages: list[str]) -> dict:
if result.returncode == 0: if result.returncode == 0:
log.info(f"Successfully installed packages: {to_install}") log.info(f"Successfully installed packages: {to_install}")
if data_dir:
extras = load_extra_packages(data_dir)
extras.update(to_install)
save_extra_packages(data_dir, extras)
return { return {
"success": True, "success": True,
"installed": to_install, "installed": to_install,
@@ -324,9 +361,59 @@ def get_base_packages(environment_yml: Path) -> Set[str]:
# ============================================================================= # =============================================================================
# Sync Operation # Cleanup and Sync Operations
# ============================================================================= # =============================================================================
def cleanup_extra_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dict:
"""
Remove tracked extra packages that are no longer needed by any script.
Only packages previously recorded in ``extra_packages.json`` are ever
considered for removal — base container packages are never touched.
Packages that have since been promoted into the base container image
(i.e. now appear in ``environment.yml``) are quietly evicted from the
tracking file without being uninstalled.
Args:
data_dir: Base data directory (tracking file lives here)
environment_yml: Path to environment.yml for base package reconciliation
Returns:
dict with:
- success: bool
- to_remove: list[str] - packages identified for removal
- removed: list[str] - packages actually removed
- error: str (if any)
"""
src_dir = data_dir / "src"
required = scan_metadata_packages(src_dir)
base = get_base_packages(environment_yml) if environment_yml and environment_yml.exists() else set()
extras = load_extra_packages(data_dir)
# Packages promoted into the base image are no longer "extra" — evict from tracking
now_base = extras & base
if now_base:
log.info(f"Packages promoted to base image, evicting from extra tracking: {now_base}")
extras -= now_base
# Only remove packages that are tracked as extras and no longer referenced by any script
to_remove = sorted(extras - required)
result: dict = {"success": True, "to_remove": to_remove, "removed": []}
if to_remove:
remove_result = remove_packages(to_remove)
result["success"] = remove_result["success"]
result["removed"] = remove_result.get("removed", [])
if remove_result["success"]:
extras -= set(to_remove)
else:
result["error"] = remove_result.get("error")
save_extra_packages(data_dir, extras)
return result
def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dict: def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dict:
""" """
Sync conda packages with metadata requirements. Sync conda packages with metadata requirements.
@@ -350,8 +437,8 @@ def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dic
""" """
log.info("Starting conda package sync") log.info("Starting conda package sync")
# Get required packages from metadata # Metadata lives under data_dir/src/category/item/metadata.json
required_packages = scan_metadata_packages(data_dir) required_packages = scan_metadata_packages(data_dir / "src")
log.info(f"Required packages from metadata: {required_packages}") log.info(f"Required packages from metadata: {required_packages}")
# Get base packages from environment.yml # Get base packages from environment.yml

View File

@@ -42,6 +42,7 @@ class IcebergClient:
s3_endpoint: Optional[str] = None, s3_endpoint: Optional[str] = None,
s3_access_key: Optional[str] = None, s3_access_key: Optional[str] = None,
s3_secret_key: Optional[str] = None, s3_secret_key: Optional[str] = None,
s3_region: Optional[str] = None,
): ):
""" """
Initialize Iceberg client. Initialize Iceberg client.
@@ -52,6 +53,7 @@ class IcebergClient:
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000") s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
s3_access_key: S3/MinIO access key s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key s3_secret_key: S3/MinIO secret key
s3_region: S3/MinIO region (e.g., "us-east-1")
""" """
self.catalog_uri = catalog_uri self.catalog_uri = catalog_uri
self.namespace = namespace self.namespace = namespace
@@ -64,6 +66,8 @@ class IcebergClient:
catalog_props["s3.access-key-id"] = s3_access_key catalog_props["s3.access-key-id"] = s3_access_key
if s3_secret_key: if s3_secret_key:
catalog_props["s3.secret-access-key"] = s3_secret_key catalog_props["s3.secret-access-key"] = s3_secret_key
if s3_region:
catalog_props["s3.region"] = s3_region
self.catalog = load_catalog("trading", **catalog_props) self.catalog = load_catalog("trading", **catalog_props)
self.table = self.catalog.load_table(f"{namespace}.ohlc") self.table = self.catalog.load_table(f"{namespace}.ohlc")

View File

@@ -15,6 +15,13 @@ log = logging.getLogger(__name__)
# Standard OHLC columns always returned # Standard OHLC columns always returned
STANDARD_COLUMNS = ["timestamp", "open", "high", "low", "close"] STANDARD_COLUMNS = ["timestamp", "open", "high", "low", "close"]
# All optional columns from the OHLC proto spec, returned by default when extra_columns=None
OHLC_OPTIONAL_COLUMNS = [
"volume", "buy_vol", "sell_vol",
"open_time", "high_time", "low_time", "close_time",
"open_interest",
]
# All valid extra columns available in the Iceberg schema # All valid extra columns available in the Iceberg schema
VALID_EXTRA_COLUMNS = { VALID_EXTRA_COLUMNS = {
"volume", "buy_vol", "sell_vol", "volume", "buy_vol", "sell_vol",
@@ -43,6 +50,7 @@ class DataAPIImpl(DataAPI):
s3_endpoint: Optional[str] = None, s3_endpoint: Optional[str] = None,
s3_access_key: Optional[str] = None, s3_access_key: Optional[str] = None,
s3_secret_key: Optional[str] = None, s3_secret_key: Optional[str] = None,
s3_region: Optional[str] = None,
request_timeout: float = 30.0, request_timeout: float = 30.0,
): ):
""" """
@@ -56,6 +64,7 @@ class DataAPIImpl(DataAPI):
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://minio:9000") s3_endpoint: S3/MinIO endpoint URL (e.g., "http://minio:9000")
s3_access_key: S3/MinIO access key s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key s3_secret_key: S3/MinIO secret key
s3_region: S3/MinIO region (e.g., "us-east-1")
request_timeout: Default timeout for historical data requests in seconds (default: 30) request_timeout: Default timeout for historical data requests in seconds (default: 30)
""" """
self.ohlc_client = OHLCClient( self.ohlc_client = OHLCClient(
@@ -66,6 +75,7 @@ class DataAPIImpl(DataAPI):
s3_endpoint=s3_endpoint, s3_endpoint=s3_endpoint,
s3_access_key=s3_access_key, s3_access_key=s3_access_key,
s3_secret_key=s3_secret_key, s3_secret_key=s3_secret_key,
s3_region=s3_region,
) )
self.request_timeout = request_timeout self.request_timeout = request_timeout
self._started = False self._started = False
@@ -120,7 +130,9 @@ class DataAPIImpl(DataAPI):
# Determine which columns to fetch # Determine which columns to fetch
columns_to_fetch = STANDARD_COLUMNS.copy() columns_to_fetch = STANDARD_COLUMNS.copy()
if extra_columns: if extra_columns is None:
columns_to_fetch.extend(OHLC_OPTIONAL_COLUMNS)
elif extra_columns:
columns_to_fetch.extend(extra_columns) columns_to_fetch.extend(extra_columns)
# Use OHLCClient which handles smart caching: # Use OHLCClient which handles smart caching:

View File

@@ -93,6 +93,82 @@ def _load_strategy_class(impl_path: Path) -> type:
# Metrics extraction # Metrics extraction
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _money_to_float(val) -> float | None:
"""Convert a Nautilus Money object or string like '15.32 USDT' to float."""
if val is None:
return None
try:
if hasattr(val, "as_decimal"):
return float(val.as_decimal())
s = str(val).strip()
if s and s.lower() not in ("none", "nan"):
return float(s.split()[0])
except (ValueError, TypeError, IndexError):
pass
return None
def _ts_to_s(raw) -> int | None:
"""Convert a Nautilus nanosecond timestamp to Unix seconds."""
try:
return int(raw) // 1_000_000_000
except (TypeError, ValueError):
return None
def _extract_fills(engine) -> pd.DataFrame:
"""Return a sorted fills DataFrame from BacktestEngine, or empty DataFrame."""
try:
df = engine.trader.generate_order_fills_report()
if df is not None and len(df) > 0:
if "ts_event" in df.columns:
df = df.sort_values("ts_event")
return df
except Exception as exc:
log.debug("generate_order_fills_report() failed: %s", exc)
return pd.DataFrame()
def _extract_trades(fills_df: pd.DataFrame, initial_capital: float) -> list[dict]:
"""
Pair fills into round-trip trades: buy → sell or sell → buy.
Returns a list of trade dicts (capped at 500 for large backtests).
"""
if fills_df.empty:
return []
trades: list[dict] = []
open_positions: dict[str, dict] = {} # instrument_id -> pending entry
for _, fill in fills_df.iterrows():
instrument = str(fill.get("instrument_id", ""))
side = str(fill.get("order_side", "")).upper()
qty = _money_to_float(fill.get("last_qty")) or 0.0
price = _money_to_float(fill.get("last_px")) or 0.0
ts_s = _ts_to_s(fill.get("ts_event"))
rpnl = _money_to_float(fill.get("realized_pnl"))
if rpnl is not None and rpnl != 0.0:
# This fill closes a position — record as a completed trade
entry = open_positions.pop(instrument, None)
trade = {
"instrument": instrument,
"side": side,
"quantity": round(qty, 8),
"entry_price": round(entry["price"], 8) if entry else None,
"exit_price": round(price, 8),
"entry_time": entry["ts_s"] if entry else None,
"exit_time": ts_s,
"pnl": round(rpnl, 6),
}
trades.append(trade)
else:
# Opening fill — store for pairing
open_positions[instrument] = {"price": price, "ts_s": ts_s, "side": side}
return trades[:500] # cap for large backtests
def _compute_metrics( def _compute_metrics(
engine, engine,
venue_strs: list[str], venue_strs: list[str],
@@ -100,17 +176,18 @@ def _compute_metrics(
all_bars: list, all_bars: list,
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Extract performance metrics from a completed BacktestEngine. Extract structured performance metrics from a completed BacktestEngine.
Returns dict with: Returns dict with:
total_return float — fractional (0.15 = +15%) summary dict — core metrics (total_return, sharpe, drawdown, etc.)
sharpe_ratio float — annualized; 0.0 if no trades or constant equity statistics dict — extended stats (sortino, calmar, profit_factor, etc.)
max_drawdown float — max peak-to-trough as fraction (0.10 = 10% drawdown) trades list — individual round-trip trades (capped at 500)
win_rate float — fraction of trades with positive realized PnL equity_curve list[{timestamp: int_unix_s, equity: float}]
trade_count int
equity_curve list[{timestamp: int_unix_s, equity: float}]
""" """
# Reconstruct equity curve from fills fills_df = _extract_fills(engine)
trades = _extract_trades(fills_df, initial_capital)
# --- Equity curve reconstruction ---
equity_points: list[dict] = [] equity_points: list[dict] = []
if all_bars: if all_bars:
equity_points.append({ equity_points.append({
@@ -121,51 +198,24 @@ def _compute_metrics(
running_equity = initial_capital running_equity = initial_capital
trade_count = 0 trade_count = 0
winning_trades = 0 winning_trades = 0
total_profit = 0.0
total_loss = 0.0
try: if not fills_df.empty:
fills_df = engine.trader.generate_order_fills_report()
except Exception as exc:
log.debug("generate_order_fills_report() failed: %s", exc)
fills_df = None
if fills_df is not None and len(fills_df) > 0:
# Sort by event time
if "ts_event" in fills_df.columns:
fills_df = fills_df.sort_values("ts_event")
for _, fill in fills_df.iterrows(): for _, fill in fills_df.iterrows():
rpnl = fill.get("realized_pnl") if hasattr(fill, "get") else None rpnl = _money_to_float(fill.get("realized_pnl"))
if rpnl is None: if rpnl is None or rpnl == 0.0:
continue continue
ts_s = _ts_to_s(fill.get("ts_event"))
# Nautilus Money objects: str form is "15.32 USDT" running_equity += rpnl
rpnl_float: float | None = None trade_count += 1
try: if rpnl > 0:
if hasattr(rpnl, "as_decimal"): winning_trades += 1
rpnl_float = float(rpnl.as_decimal()) total_profit += rpnl
elif rpnl is not None: else:
rpnl_str = str(rpnl).strip() total_loss += abs(rpnl)
if rpnl_str and rpnl_str.lower() not in ("none", "nan"): if ts_s is not None:
rpnl_float = float(rpnl_str.split()[0]) equity_points.append({"timestamp": ts_s, "equity": running_equity})
except (ValueError, TypeError, IndexError):
pass
if rpnl_float is not None and rpnl_float != 0.0:
ts_s: int | None = None
raw_ts = fill.get("ts_event") if hasattr(fill, "get") else None
if raw_ts is not None:
try:
ts_s = int(raw_ts) // 1_000_000_000
except (TypeError, ValueError):
pass
running_equity += rpnl_float
trade_count += 1
if rpnl_float > 0:
winning_trades += 1
if ts_s is not None:
equity_points.append({"timestamp": ts_s, "equity": running_equity})
if all_bars: if all_bars:
equity_points.append({ equity_points.append({
@@ -173,19 +223,16 @@ def _compute_metrics(
"equity": running_equity, "equity": running_equity,
}) })
# Try to get actual final balance from the account (more accurate than fill reconstruction) # Prefer definitive final balance from account cache
try: try:
from nautilus_trader.model.identifiers import Venue from nautilus_trader.model.identifiers import Venue
for venue_str in venue_strs: for venue_str in venue_strs:
account = engine.cache.account_for_venue(Venue(venue_str)) account = engine.cache.account_for_venue(Venue(venue_str))
if account is None: if account is None:
continue continue
# Sum all balances (quote currency is what we started with)
for bal in account.balances().values(): for bal in account.balances().values():
total = getattr(bal, "total", None) final_val = _money_to_float(getattr(bal, "total", None))
if total is not None: if final_val is not None:
final_val = float(str(total).split()[0]) if not hasattr(total, "as_decimal") else float(total.as_decimal())
# Use the account balance as the definitive final equity
running_equity = final_val running_equity = final_val
if equity_points: if equity_points:
equity_points[-1]["equity"] = running_equity equity_points[-1]["equity"] = running_equity
@@ -193,36 +240,71 @@ def _compute_metrics(
except Exception as exc: except Exception as exc:
log.debug("Account balance extraction failed: %s", exc) log.debug("Account balance extraction failed: %s", exc)
# Core metrics # --- Core metrics ---
total_return = (running_equity - initial_capital) / initial_capital if initial_capital else 0.0 total_return = (running_equity - initial_capital) / initial_capital if initial_capital else 0.0
win_rate = winning_trades / trade_count if trade_count > 0 else 0.0 win_rate = winning_trades / trade_count if trade_count > 0 else 0.0
profit_factor = (total_profit / total_loss) if total_loss > 0 else (float("inf") if total_profit > 0 else 0.0)
# Determine bar duration for annualisation
bar_duration_ns = 0.0
if all_bars and len(all_bars) > 1:
bar_duration_ns = (all_bars[-1].ts_event - all_bars[0].ts_event) / max(len(all_bars) - 1, 1)
bars_per_year = (365 * 24 * 3600 * 1e9) / bar_duration_ns if bar_duration_ns > 0 else 0.0
equity_series = pd.Series([p["equity"] for p in equity_points]) if len(equity_points) > 2 else pd.Series([initial_capital, running_equity])
returns = equity_series.pct_change().dropna()
# Sharpe ratio (annualized) from equity curve returns
sharpe = 0.0 sharpe = 0.0
if len(equity_points) > 2 and all_bars and len(all_bars) > 1: sortino = 0.0
equity_series = pd.Series([p["equity"] for p in equity_points]) if len(returns) > 1 and bars_per_year > 0:
returns = equity_series.pct_change().dropna() mean_r = returns.mean()
if len(returns) > 1 and returns.std() > 0: std_r = returns.std()
bar_duration_ns = (all_bars[-1].ts_event - all_bars[0].ts_event) / max(len(all_bars) - 1, 1) if std_r > 0:
if bar_duration_ns > 0: sharpe = float((mean_r / std_r) * (bars_per_year ** 0.5))
bars_per_year = (365 * 24 * 3600 * 1e9) / bar_duration_ns downside = returns[returns < 0]
sharpe = float((returns.mean() / returns.std()) * (bars_per_year ** 0.5)) downside_std = downside.std() if len(downside) > 1 else 0.0
if downside_std > 0:
sortino = float((mean_r / downside_std) * (bars_per_year ** 0.5))
# Max drawdown # Max drawdown
max_drawdown = 0.0 max_drawdown = 0.0
if len(equity_points) > 1: if len(equity_series) > 1:
equity_arr = pd.Series([p["equity"] for p in equity_points]) rolling_max = equity_series.cummax()
rolling_max = equity_arr.cummax() drawdowns = (equity_series - rolling_max) / rolling_max.replace(0, float("nan"))
drawdowns = (equity_arr - rolling_max) / rolling_max.replace(0, float("nan"))
max_drawdown = float(abs(drawdowns.min())) if len(drawdowns) > 0 else 0.0 max_drawdown = float(abs(drawdowns.min())) if len(drawdowns) > 0 else 0.0
# Calmar ratio
annualized_return = 0.0
if bars_per_year > 0 and len(all_bars) > 1:
years = (all_bars[-1].ts_event - all_bars[0].ts_event) / (365 * 24 * 3600 * 1e9)
if years > 0:
annualized_return = (running_equity / initial_capital) ** (1.0 / years) - 1 if initial_capital else 0.0
calmar = annualized_return / max_drawdown if max_drawdown > 0 else 0.0
# Average win / average loss
avg_win = total_profit / winning_trades if winning_trades > 0 else 0.0
avg_loss = total_loss / (trade_count - winning_trades) if (trade_count - winning_trades) > 0 else 0.0
return { return {
"total_return": round(total_return, 6), "summary": {
"sharpe_ratio": round(sharpe, 4), "total_return": round(total_return, 6),
"max_drawdown": round(max_drawdown, 6), "sharpe_ratio": round(sharpe, 4),
"win_rate": round(win_rate, 4), "max_drawdown": round(max_drawdown, 6),
"trade_count": trade_count, "win_rate": round(win_rate, 4),
"equity_curve": equity_points, "trade_count": trade_count,
"total_trades": len(trades),
},
"statistics": {
"sortino_ratio": round(sortino, 4),
"calmar_ratio": round(calmar, 4),
"profit_factor": round(profit_factor, 4) if profit_factor != float("inf") else None,
"avg_win": round(avg_win, 4),
"avg_loss": round(avg_loss, 4),
"total_profit": round(total_profit, 4),
"total_loss": round(total_loss, 4),
},
"trades": trades,
"equity_curve": equity_points,
} }

View File

@@ -13,6 +13,7 @@ make_instrument_from_metadata — instrument with best-effort precision
from __future__ import annotations from __future__ import annotations
import logging import logging
from decimal import Decimal
from typing import Optional from typing import Optional
import pandas as pd import pandas as pd
@@ -71,8 +72,8 @@ def make_instrument(
size_precision: int = 8, size_precision: int = 8,
tick_size: Optional[float] = None, tick_size: Optional[float] = None,
lot_size: Optional[float] = None, lot_size: Optional[float] = None,
maker_fee: float = 0.001, maker_fee: float = 0.0,
taker_fee: float = 0.001, taker_fee: float = 0.0,
margin_init: float = 0.0, margin_init: float = 0.0,
margin_maint: float = 0.0, margin_maint: float = 0.0,
) -> CurrencyPair: ) -> CurrencyPair:
@@ -118,8 +119,8 @@ def make_instrument(
min_price=None, min_price=None,
margin_init=margin_init, margin_init=margin_init,
margin_maint=margin_maint, margin_maint=margin_maint,
maker_fee=maker_fee, maker_fee=Decimal(str(maker_fee)),
taker_fee=taker_fee, taker_fee=Decimal(str(taker_fee)),
ts_event=ts_now, ts_event=ts_now,
ts_init=ts_now, ts_init=ts_now,
) )
@@ -154,8 +155,8 @@ def make_instrument_from_metadata(ticker: str) -> tuple[CurrencyPair, int, int]:
size_precision=sp, size_precision=sp,
tick_size=meta.tick_size, tick_size=meta.tick_size,
lot_size=meta.lot_size, lot_size=meta.lot_size,
maker_fee=meta.maker_fee or 0.001, maker_fee=meta.maker_fee or 0.0,
taker_fee=meta.taker_fee or 0.001, taker_fee=meta.taker_fee or 0.0,
margin_init=meta.margin_init or 0.0, margin_init=meta.margin_init or 0.0,
margin_maint=meta.margin_maint or 0.0, margin_maint=meta.margin_maint or 0.0,
) )

View File

@@ -39,6 +39,7 @@ class OHLCClient:
s3_endpoint: str = None, s3_endpoint: str = None,
s3_access_key: str = None, s3_access_key: str = None,
s3_secret_key: str = None, s3_secret_key: str = None,
s3_region: str = None,
): ):
""" """
Initialize OHLC client. Initialize OHLC client.
@@ -51,12 +52,14 @@ class OHLCClient:
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000") s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
s3_access_key: S3/MinIO access key s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key s3_secret_key: S3/MinIO secret key
s3_region: S3/MinIO region (e.g., "us-east-1")
""" """
self.iceberg = IcebergClient( self.iceberg = IcebergClient(
iceberg_catalog_uri, namespace, iceberg_catalog_uri, namespace,
s3_endpoint=s3_endpoint, s3_endpoint=s3_endpoint,
s3_access_key=s3_access_key, s3_access_key=s3_access_key,
s3_secret_key=s3_secret_key, s3_secret_key=s3_secret_key,
s3_region=s3_region,
) )
self.history = HistoryClient(relay_endpoint, notification_endpoint) self.history = HistoryClient(relay_endpoint, notification_endpoint)
log.info("OHLCClient initialized") log.info("OHLCClient initialized")
@@ -122,7 +125,7 @@ class OHLCClient:
if not missing_ranges: if not missing_ranges:
# All data exists in Iceberg # All data exists in Iceberg
return self._forward_fill_gaps(df, period_seconds) return df
# Step 3: Request missing data for each range # Step 3: Request missing data for each range
# For simplicity, request entire range (relay can merge adjacent requests) # For simplicity, request entire range (relay can merge adjacent requests)

View File

@@ -0,0 +1 @@
# Strategy runtime package

View File

@@ -0,0 +1,361 @@
"""
SQLite database for strategy execution state, trade logs, and backtest history.
All data is stored under DATA_DIR/dexorder.db.
Uses aiosqlite for async compatibility with the MCP server's event loop.
"""
from __future__ import annotations
import json
import logging
import time
from pathlib import Path
from typing import Any, Optional
log = logging.getLogger(__name__)
_SCHEMA = """
CREATE TABLE IF NOT EXISTS strategies (
name TEXT PRIMARY KEY,
status TEXT NOT NULL DEFAULT 'stopped',
git_rev TEXT,
worktree_path TEXT,
started_at REAL,
stopped_at REAL,
allocation REAL NOT NULL DEFAULT 0,
paper INTEGER NOT NULL DEFAULT 1,
feeds_json TEXT,
config_json TEXT
);
CREATE TABLE IF NOT EXISTS strategy_state (
name TEXT PRIMARY KEY,
realized_pnl REAL NOT NULL DEFAULT 0,
unrealized_pnl REAL NOT NULL DEFAULT 0,
trade_count INTEGER NOT NULL DEFAULT 0,
positions_json TEXT,
updated_at REAL NOT NULL DEFAULT 0
);
CREATE TABLE IF NOT EXISTS trades (
id INTEGER PRIMARY KEY AUTOINCREMENT,
strategy_name TEXT NOT NULL,
instrument TEXT NOT NULL,
side TEXT NOT NULL,
quantity REAL NOT NULL,
entry_price REAL,
exit_price REAL NOT NULL,
entry_time REAL,
exit_time REAL NOT NULL,
pnl REAL NOT NULL,
recorded_at REAL NOT NULL DEFAULT (unixepoch())
);
CREATE TABLE IF NOT EXISTS backtest_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
strategy_name TEXT NOT NULL,
ran_at REAL NOT NULL DEFAULT (unixepoch()),
from_time REAL,
to_time REAL,
initial_capital REAL,
feeds_json TEXT,
summary_json TEXT,
statistics_json TEXT,
trades_json TEXT,
equity_curve_json TEXT
);
CREATE TABLE IF NOT EXISTS strategy_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
strategy_name TEXT NOT NULL,
event_type TEXT NOT NULL,
payload_json TEXT,
recorded_at REAL NOT NULL DEFAULT (unixepoch())
);
CREATE INDEX IF NOT EXISTS idx_trades_strategy ON trades(strategy_name);
CREATE INDEX IF NOT EXISTS idx_backtest_strategy ON backtest_runs(strategy_name);
CREATE INDEX IF NOT EXISTS idx_events_strategy ON strategy_events(strategy_name);
"""
class StrategyDB:
"""Async SQLite interface for strategy persistence."""
def __init__(self, db_path: Path):
self.db_path = db_path
async def initialize(self) -> None:
"""Create tables if they don't exist."""
import aiosqlite
self.db_path.parent.mkdir(parents=True, exist_ok=True)
async with aiosqlite.connect(self.db_path) as db:
await db.executescript(_SCHEMA)
await db.commit()
log.info("StrategyDB initialized at %s", self.db_path)
# ------------------------------------------------------------------
# Strategy lifecycle
# ------------------------------------------------------------------
async def upsert_strategy(
self,
name: str,
status: str,
allocation: float,
paper: bool,
feeds: list[dict],
git_rev: Optional[str] = None,
worktree_path: Optional[str] = None,
config: Optional[dict] = None,
) -> None:
import aiosqlite
now = time.time()
async with aiosqlite.connect(self.db_path) as db:
await db.execute("""
INSERT INTO strategies
(name, status, git_rev, worktree_path, started_at, allocation, paper, feeds_json, config_json)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(name) DO UPDATE SET
status=excluded.status,
git_rev=excluded.git_rev,
worktree_path=excluded.worktree_path,
started_at=excluded.started_at,
allocation=excluded.allocation,
paper=excluded.paper,
feeds_json=excluded.feeds_json,
config_json=excluded.config_json
""", (
name, status, git_rev, worktree_path, now,
allocation, int(paper),
json.dumps(feeds),
json.dumps(config or {}),
))
await db.commit()
async def update_strategy_status(self, name: str, status: str) -> None:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
if status == "stopped":
await db.execute(
"UPDATE strategies SET status=?, stopped_at=? WHERE name=?",
(status, time.time(), name)
)
else:
await db.execute("UPDATE strategies SET status=? WHERE name=?", (status, name))
await db.commit()
async def get_strategy(self, name: str) -> Optional[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute("SELECT * FROM strategies WHERE name=?", (name,)) as cur:
row = await cur.fetchone()
return dict(row) if row else None
async def get_all_strategies(self) -> list[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute("SELECT * FROM strategies ORDER BY started_at DESC") as cur:
rows = await cur.fetchall()
return [dict(r) for r in rows]
async def get_running_strategies(self) -> list[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT * FROM strategies WHERE status='running' OR status='starting'",
) as cur:
rows = await cur.fetchall()
return [dict(r) for r in rows]
# ------------------------------------------------------------------
# PnL state
# ------------------------------------------------------------------
async def update_pnl_state(
self,
name: str,
realized_pnl: float,
unrealized_pnl: float,
trade_count: int,
positions: Optional[dict] = None,
) -> None:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
await db.execute("""
INSERT INTO strategy_state
(name, realized_pnl, unrealized_pnl, trade_count, positions_json, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(name) DO UPDATE SET
realized_pnl=excluded.realized_pnl,
unrealized_pnl=excluded.unrealized_pnl,
trade_count=excluded.trade_count,
positions_json=excluded.positions_json,
updated_at=excluded.updated_at
""", (name, realized_pnl, unrealized_pnl, trade_count,
json.dumps(positions or {}), time.time()))
await db.commit()
async def get_pnl_state(self, name: str) -> Optional[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute("SELECT * FROM strategy_state WHERE name=?", (name,)) as cur:
row = await cur.fetchone()
return dict(row) if row else None
# ------------------------------------------------------------------
# Trades
# ------------------------------------------------------------------
async def insert_trade(self, strategy_name: str, trade: dict) -> None:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
await db.execute("""
INSERT INTO trades
(strategy_name, instrument, side, quantity, entry_price,
exit_price, entry_time, exit_time, pnl)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
strategy_name,
trade.get("instrument", ""),
trade.get("side", ""),
trade.get("quantity", 0),
trade.get("entry_price"),
trade.get("exit_price", 0),
trade.get("entry_time"),
trade.get("exit_time", time.time()),
trade.get("pnl", 0),
))
await db.commit()
async def get_trades(self, strategy_name: str, limit: int = 200) -> list[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT * FROM trades WHERE strategy_name=? ORDER BY exit_time DESC LIMIT ?",
(strategy_name, limit),
) as cur:
rows = await cur.fetchall()
return [dict(r) for r in rows]
# ------------------------------------------------------------------
# Backtest runs
# ------------------------------------------------------------------
async def insert_backtest(
self,
strategy_name: str,
from_time: Any,
to_time: Any,
initial_capital: float,
feeds: list[dict],
summary: dict,
statistics: dict,
trades: list[dict],
equity_curve: list[dict],
) -> int:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute("""
INSERT INTO backtest_runs
(strategy_name, from_time, to_time, initial_capital, feeds_json,
summary_json, statistics_json, trades_json, equity_curve_json)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
strategy_name,
float(from_time) if from_time else None,
float(to_time) if to_time else None,
initial_capital,
json.dumps(feeds),
json.dumps(summary),
json.dumps(statistics),
json.dumps(trades[:500]), # cap
json.dumps(equity_curve),
))
await db.commit()
return cur.lastrowid
async def get_backtests(self, strategy_name: str, limit: int = 10) -> list[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT * FROM backtest_runs WHERE strategy_name=? ORDER BY ran_at DESC LIMIT ?",
(strategy_name, limit),
) as cur:
rows = await cur.fetchall()
result = []
for r in rows:
d = dict(r)
for key in ("feeds_json", "summary_json", "statistics_json",
"trades_json", "equity_curve_json"):
if d.get(key):
plain = key.replace("_json", "")
d[plain] = json.loads(d.pop(key))
else:
d.pop(key, None)
result.append(d)
return result
# ------------------------------------------------------------------
# Events
# ------------------------------------------------------------------
async def insert_event(self, strategy_name: str, event_type: str, payload: dict) -> None:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"INSERT INTO strategy_events (strategy_name, event_type, payload_json) VALUES (?, ?, ?)",
(strategy_name, event_type, json.dumps(payload)),
)
await db.commit()
async def get_events(
self,
strategy_name: str,
event_type: Optional[str] = None,
limit: int = 100,
) -> list[dict]:
import aiosqlite
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
if event_type:
async with db.execute(
"SELECT * FROM strategy_events WHERE strategy_name=? AND event_type=? "
"ORDER BY recorded_at DESC LIMIT ?",
(strategy_name, event_type, limit),
) as cur:
rows = await cur.fetchall()
else:
async with db.execute(
"SELECT * FROM strategy_events WHERE strategy_name=? "
"ORDER BY recorded_at DESC LIMIT ?",
(strategy_name, limit),
) as cur:
rows = await cur.fetchall()
result = []
for r in rows:
d = dict(r)
if d.get("payload_json"):
d["payload"] = json.loads(d.pop("payload_json"))
result.append(d)
return result
# Singleton
_db: Optional[StrategyDB] = None
def get_strategy_db(data_dir: Optional[Path] = None) -> StrategyDB:
global _db
if _db is None:
if data_dir is None:
import os
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
_db = StrategyDB(data_dir / "dexorder.db")
return _db

View File

@@ -0,0 +1,152 @@
"""
StrategyEventBridge — receives internal strategy events from subprocesses
and forwards them to the user-facing EventPublisher.
Architecture:
Strategy subprocess ──PUSH──> [IPC socket] ──PULL──> StrategyEventBridge
└─> EventPublisher
├── XPUB (informational)
└── DEALER (critical)
"""
from __future__ import annotations
import asyncio
import logging
import time
from typing import Optional
import zmq
import zmq.asyncio
from .events import StrategyEvent, StrategyEventType, IPC_ENDPOINT
log = logging.getLogger(__name__)
# How long without a heartbeat before a strategy is considered dead (seconds)
HEARTBEAT_TIMEOUT = 60.0
class StrategyEventBridge:
"""
Binds a ZMQ PULL socket and relays strategy events to EventPublisher.
Also monitors heartbeats to detect crashed strategy subprocesses.
"""
def __init__(self, event_publisher, strategy_lifecycle=None):
"""
Args:
event_publisher: dexorder.events.publisher.EventPublisher instance
strategy_lifecycle: StrategyLifecycleManager (optional) for marking crashed strategies
"""
self._publisher = event_publisher
self._lifecycle = strategy_lifecycle
self._ctx: Optional[zmq.asyncio.Context] = None
self._socket: Optional[zmq.asyncio.Socket] = None
self._task: Optional[asyncio.Task] = None
self._heartbeat_task: Optional[asyncio.Task] = None
self._last_heartbeat: dict[str, float] = {} # strategy_name -> timestamp
self._running = False
async def start(self) -> None:
"""Bind PULL socket and start receive loop."""
self._ctx = zmq.asyncio.Context.instance()
self._socket = self._ctx.socket(zmq.PULL)
self._socket.bind(IPC_ENDPOINT)
self._running = True
self._task = asyncio.create_task(self._receive_loop())
self._heartbeat_task = asyncio.create_task(self._heartbeat_monitor())
log.info("StrategyEventBridge started on %s", IPC_ENDPOINT)
async def stop(self) -> None:
"""Stop receive loop and close socket."""
self._running = False
for task in [self._task, self._heartbeat_task]:
if task:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
if self._socket:
self._socket.close()
log.info("StrategyEventBridge stopped")
def notify_strategy_started(self, strategy_name: str) -> None:
"""Called by lifecycle manager when a strategy subprocess starts."""
self._last_heartbeat[strategy_name] = time.time()
def notify_strategy_stopped(self, strategy_name: str) -> None:
"""Called by lifecycle manager when a strategy is deactivated."""
self._last_heartbeat.pop(strategy_name, None)
async def _receive_loop(self) -> None:
while self._running:
try:
raw = await asyncio.wait_for(self._socket.recv(), timeout=1.0)
event = StrategyEvent.deserialize(raw)
await self._handle_event(event)
except asyncio.TimeoutError:
continue
except asyncio.CancelledError:
raise
except Exception as e:
log.error("Error receiving strategy event: %s", e)
async def _handle_event(self, event: StrategyEvent) -> None:
"""Translate internal StrategyEvent to UserEvent and publish."""
from dexorder.events.types import EventType, Priority
from dexorder.events.publisher import UserEvent, DeliverySpec
name = event.strategy_name
if event.event_type == StrategyEventType.HEARTBEAT:
self._last_heartbeat[name] = time.time()
return # heartbeats are not forwarded to the user
# Map to UserEvent types
type_map = {
StrategyEventType.STARTED: (EventType.STRATEGY_STARTED, Priority.INFORMATIONAL),
StrategyEventType.STOPPED: (EventType.STRATEGY_STOPPED, Priority.INFORMATIONAL),
StrategyEventType.ORDER_SUBMITTED: (EventType.ORDER_PLACED, Priority.NORMAL),
StrategyEventType.ORDER_FILLED: (EventType.ORDER_FILLED, Priority.CRITICAL),
StrategyEventType.POSITION_UPDATE: (EventType.POSITION_UPDATED, Priority.INFORMATIONAL),
StrategyEventType.PNL_UPDATE: (EventType.STRATEGY_LOG, Priority.INFORMATIONAL),
StrategyEventType.ERROR: (EventType.STRATEGY_ERROR, Priority.CRITICAL),
StrategyEventType.LOG: (EventType.STRATEGY_LOG, Priority.INFORMATIONAL),
}
et, priority = type_map.get(event.event_type, (EventType.STRATEGY_LOG, Priority.INFORMATIONAL))
payload = {"strategy_name": name, **event.payload}
delivery = (
DeliverySpec.critical() if priority == Priority.CRITICAL
else DeliverySpec.informational()
)
try:
from dexorder.events.types import UserEvent as UE
await self._publisher.publish(UE(
event_type=et,
payload=payload,
delivery=delivery,
))
except Exception as e:
log.error("Failed to publish strategy event %s: %s", event.event_type, e)
async def _heartbeat_monitor(self) -> None:
"""Periodically check for strategies that stopped sending heartbeats."""
while self._running:
try:
await asyncio.sleep(30)
now = time.time()
for name, last_seen in list(self._last_heartbeat.items()):
if now - last_seen > HEARTBEAT_TIMEOUT:
log.warning("Strategy '%s' missed heartbeat, marking as crashed", name)
self._last_heartbeat.pop(name, None)
if self._lifecycle:
await self._lifecycle.mark_crashed(name)
except asyncio.CancelledError:
raise
except Exception as e:
log.error("Heartbeat monitor error: %s", e)

View File

@@ -0,0 +1,61 @@
"""
Internal strategy event types for subprocess → main-process communication.
Strategy subprocesses push StrategyEvents via ZMQ PUSH socket.
The main process's StrategyEventBridge receives them via PULL and forwards
them to the user-facing EventPublisher (dexorder/events/publisher.py).
"""
from __future__ import annotations
import json
import time
import uuid
from dataclasses import dataclass, field
from enum import IntEnum
class StrategyEventType(IntEnum):
"""Internal event types produced by strategy subprocesses."""
STARTED = 1
STOPPED = 2
HEARTBEAT = 3
ORDER_SUBMITTED = 10
ORDER_FILLED = 11
POSITION_UPDATE = 20
PNL_UPDATE = 21
ERROR = 30
LOG = 31
@dataclass
class StrategyEvent:
"""Internal event envelope sent from strategy subprocess to main process."""
event_type: StrategyEventType
strategy_name: str
payload: dict
timestamp: float = field(default_factory=time.time)
event_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
def serialize(self) -> bytes:
return json.dumps({
"event_type": int(self.event_type),
"strategy_name": self.strategy_name,
"payload": self.payload,
"timestamp": self.timestamp,
"event_id": self.event_id,
}).encode()
@classmethod
def deserialize(cls, data: bytes) -> "StrategyEvent":
d = json.loads(data.decode())
return cls(
event_type=StrategyEventType(d["event_type"]),
strategy_name=d["strategy_name"],
payload=d.get("payload", {}),
timestamp=d.get("timestamp", time.time()),
event_id=d.get("event_id", ""),
)
# IPC endpoint used for strategy subprocess → main process communication
IPC_ENDPOINT = "ipc:///tmp/dexorder-strategy-events.sock"

View File

@@ -0,0 +1,322 @@
"""
StrategyLifecycleManager — manages running strategy subprocesses.
Responsibilities:
- Starting strategy subprocesses from git worktrees
- Stopping subprocesses on deactivation
- Persisting state to SQLite for crash recovery
- Registering strategies as LifecycleManager triggers (prevents idle shutdown)
- Enforcing max concurrent strategy limit
"""
from __future__ import annotations
import asyncio
import json
import logging
import threading
import time
from pathlib import Path
from typing import Optional
log = logging.getLogger(__name__)
MAX_CONCURRENT_STRATEGIES = 5
DEFAULT_POLL_INTERVAL = 60 # seconds between bar checks
class StrategyLifecycleManager:
def __init__(self, data_dir: Path, event_bridge=None, lifecycle_manager=None):
self.data_dir = data_dir
self.worktrees_dir = data_dir / "worktrees"
self.configs_dir = data_dir / "strategy_configs"
self._bridge = event_bridge
self._lifecycle = lifecycle_manager # dexorder LifecycleManager
self._runners: dict[str, tuple[threading.Thread, threading.Event]] = {} # name -> (thread, stop_event)
self._db: Optional["StrategyDB"] = None
async def initialize(self) -> None:
"""Initialize DB and prune stale worktrees."""
from dexorder.strategy.db import get_strategy_db
from dexorder.tools.python_tools import get_category_manager
self._db = get_strategy_db(self.data_dir)
await self._db.initialize()
self.worktrees_dir.mkdir(parents=True, exist_ok=True)
self.configs_dir.mkdir(parents=True, exist_ok=True)
# Prune any git worktrees that are no longer registered
try:
mgr = get_category_manager(self.data_dir)
mgr.git.prune_worktrees()
except Exception as e:
log.warning("git worktree prune failed: %s", e)
async def resume_running(self) -> None:
"""On container restart, re-launch strategies that were 'running' at shutdown."""
if self._db is None:
return
try:
running = await self._db.get_running_strategies()
for row in running:
name = row["name"]
log.info("Resuming strategy '%s' after container restart", name)
feeds = json.loads(row.get("feeds_json") or "[]")
await self.activate(
strategy_name=name,
feeds=feeds,
allocation=row.get("allocation", 10_000.0),
paper=bool(row.get("paper", 1)),
_resume=True,
)
except Exception as e:
log.error("Failed to resume strategies: %s", e)
# ------------------------------------------------------------------
# Activate / Deactivate
# ------------------------------------------------------------------
async def activate(
self,
strategy_name: str,
feeds: list[dict],
allocation: float,
paper: bool = True,
git_revision: str = "HEAD",
_resume: bool = False,
) -> dict:
"""
Activate a strategy.
Creates a git worktree at the given revision, writes a config file,
and spawns a subprocess running runner.py.
Returns a dict with status and details.
"""
if strategy_name in self._runners:
return {"error": f"Strategy '{strategy_name}' is already running"}
if len(self._runners) >= MAX_CONCURRENT_STRATEGIES:
return {
"error": f"Maximum concurrent strategies ({MAX_CONCURRENT_STRATEGIES}) reached. "
"Deactivate a running strategy first."
}
# Build worktree
from dexorder.tools.python_tools import get_category_manager, sanitize_name
mgr = get_category_manager(self.data_dir)
safe_name = sanitize_name(strategy_name)
impl_path = self.data_dir / "src" / "strategy" / safe_name / "implementation.py"
if not impl_path.exists():
return {"error": f"Strategy '{strategy_name}' not found at {impl_path}"}
try:
short_hash = mgr.git.head_short_hash() if git_revision == "HEAD" else git_revision[:7]
worktree_name = f"{safe_name}_{short_hash}"
worktree_path = self.worktrees_dir / worktree_name
if not worktree_path.exists():
actual_hash = mgr.git.create_worktree(worktree_path, git_revision)
else:
actual_hash = short_hash
except Exception as e:
return {"error": f"Failed to create git worktree: {e}"}
worktree_impl = worktree_path / "src" / "strategy" / safe_name / "implementation.py"
if not worktree_impl.exists():
# Fall back to live impl (worktree may not include subdirs on first use)
worktree_impl = impl_path
# Feed configs as list of [ticker, period_seconds]
feed_configs = [[f.get("symbol", ""), int(f.get("period_seconds", 3600))] for f in feeds]
# Write runner config to a temp file under DATA_DIR
runner_config = {
"strategy_name": strategy_name,
"impl_path": str(worktree_impl),
"feed_configs": feed_configs,
"allocation": allocation,
"ipc_endpoint": "ipc:///tmp/dexorder-strategy-events.sock",
"data_dir": str(self.data_dir),
"poll_interval": DEFAULT_POLL_INTERVAL,
}
config_file = self.configs_dir / f"{safe_name}.json"
config_file.write_text(json.dumps(runner_config, indent=2))
# Launch strategy in a daemon thread
try:
from dexorder.strategy.runner import run_thread
stop_event = threading.Event()
thread = threading.Thread(
target=run_thread,
args=(runner_config, stop_event),
daemon=True,
name=f"strategy-{safe_name}",
)
thread.start()
except Exception as e:
return {"error": f"Failed to start strategy thread: {e}"}
self._runners[strategy_name] = (thread, stop_event)
# Register as lifecycle trigger
if self._lifecycle:
self._lifecycle.add_trigger(f"strategy:{strategy_name}")
# Notify event bridge
if self._bridge:
self._bridge.notify_strategy_started(strategy_name)
# Persist to DB
if self._db:
await self._db.upsert_strategy(
name=strategy_name,
status="running",
allocation=allocation,
paper=paper,
feeds=feeds,
git_rev=actual_hash,
worktree_path=str(worktree_path),
config=runner_config,
)
log.info("Strategy '%s' activated (thread=%d, rev=%s)", strategy_name, thread.ident, actual_hash)
return {
"status": "activated",
"strategy_name": strategy_name,
"paper": paper,
"allocation": allocation,
"git_revision": actual_hash,
"thread_id": thread.ident,
}
async def deactivate(self, strategy_name: str) -> dict:
"""Stop a running strategy and clean up its worktree."""
entry = self._runners.pop(strategy_name, None)
if entry is None:
return {"error": f"Strategy '{strategy_name}' is not running"}
thread, stop_event = entry
# Signal the runner to stop and wait for the thread to exit
stop_event.set()
await asyncio.get_event_loop().run_in_executor(
None, lambda: thread.join(timeout=15)
)
if thread.is_alive():
log.warning("Strategy '%s' thread did not exit within timeout", strategy_name)
# Remove lifecycle trigger
if self._lifecycle:
self._lifecycle.remove_trigger(f"strategy:{strategy_name}")
# Notify bridge
if self._bridge:
self._bridge.notify_strategy_stopped(strategy_name)
# Get final PnL from DB
final_pnl = 0.0
if self._db:
state = await self._db.get_pnl_state(strategy_name)
if state:
final_pnl = state.get("realized_pnl", 0.0)
await self._db.update_strategy_status(strategy_name, "stopped")
# Clean up worktree
await self._cleanup_worktree(strategy_name)
log.info("Strategy '%s' deactivated, final_pnl=%.4f", strategy_name, final_pnl)
return {
"status": "deactivated",
"strategy_name": strategy_name,
"final_pnl": final_pnl,
}
async def mark_crashed(self, strategy_name: str) -> None:
"""Mark a strategy as crashed (called by heartbeat monitor)."""
self._runners.pop(strategy_name, None)
if self._lifecycle:
self._lifecycle.remove_trigger(f"strategy:{strategy_name}")
if self._db:
await self._db.update_strategy_status(strategy_name, "error")
log.error("Strategy '%s' marked as crashed (heartbeat timeout)", strategy_name)
async def update_pnl(self, strategy_name: str, payload: dict) -> None:
"""Called by event bridge when a PNL_UPDATE event arrives."""
if self._db:
await self._db.update_pnl_state(
name=strategy_name,
realized_pnl=payload.get("realized_pnl", 0.0),
unrealized_pnl=payload.get("unrealized_pnl", 0.0),
trade_count=payload.get("trade_count", 0),
)
# ------------------------------------------------------------------
# Listing
# ------------------------------------------------------------------
async def list_active(self) -> list[dict]:
"""Return currently running strategies with PnL state."""
if self._db is None:
return []
strategies = await self._db.get_running_strategies()
result = []
for s in strategies:
name = s["name"]
state = await self._db.get_pnl_state(name)
entry = {
"strategy_name": name,
"status": s.get("status", "unknown"),
"paper": bool(s.get("paper", 1)),
"allocation": s.get("allocation", 0),
"git_revision": s.get("git_rev"),
"started_at": s.get("started_at"),
"feeds": json.loads(s.get("feeds_json") or "[]"),
"realized_pnl": state.get("realized_pnl", 0.0) if state else 0.0,
"unrealized_pnl": state.get("unrealized_pnl", 0.0) if state else 0.0,
"trade_count": state.get("trade_count", 0) if state else 0,
}
result.append(entry)
return result
# ------------------------------------------------------------------
# Shutdown
# ------------------------------------------------------------------
async def shutdown(self) -> None:
"""Stop all running strategies on container shutdown."""
names = list(self._runners.keys())
for name in names:
await self.deactivate(name)
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
async def _cleanup_worktree(self, strategy_name: str) -> None:
if self._db is None:
return
try:
row = await self._db.get_strategy(strategy_name)
wt = row.get("worktree_path") if row else None
if wt:
from dexorder.tools.python_tools import get_category_manager
mgr = get_category_manager(self.data_dir)
mgr.git.remove_worktree(Path(wt))
except Exception as e:
log.warning("Worktree cleanup failed for '%s': %s", strategy_name, e)
# Singleton
_lifecycle_manager: Optional[StrategyLifecycleManager] = None
def get_strategy_lifecycle(data_dir: Optional[Path] = None) -> StrategyLifecycleManager:
global _lifecycle_manager
if _lifecycle_manager is None:
if data_dir is None:
import os
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
_lifecycle_manager = StrategyLifecycleManager(data_dir)
return _lifecycle_manager

View File

@@ -0,0 +1,196 @@
"""
Lightweight paper trading account for strategy subprocesses.
Simulates order execution at bar-close prices without requiring Nautilus TradingNode.
Tracks positions, PnL, and trade history. All amounts are in the quote currency.
"""
from __future__ import annotations
import logging
import time
from dataclasses import dataclass, field
from typing import Optional
log = logging.getLogger(__name__)
@dataclass
class Position:
"""An open position."""
instrument: str
side: str # "long" or "short"
quantity: float
entry_price: float
entry_time: float # Unix timestamp
@dataclass
class Trade:
"""A completed round-trip trade."""
instrument: str
side: str # direction of the entry
quantity: float
entry_price: float
exit_price: float
entry_time: float
exit_time: float
pnl: float
class PaperAccount:
"""
Simulates a cash paper account for a single strategy.
Positions are opened/closed by calling buy(), sell(), and flatten().
Fills execute at the provided price (e.g. bar close).
"""
def __init__(self, initial_capital: float, feed_key: Optional[str] = None):
self.initial_capital = initial_capital
self.balance = initial_capital
self._positions: dict[str, Position] = {} # feed_key → Position
self._trades: list[Trade] = []
self._default_feed_key = feed_key
# ------------------------------------------------------------------
# Order API (mirrors PandasStrategy's order API)
# ------------------------------------------------------------------
def buy(self, quantity: float, price: float, feed_key: Optional[str] = None) -> None:
"""Open a long or close a short at price."""
fk = feed_key or self._default_feed_key or "default"
existing = self._positions.get(fk)
if existing and existing.side == "short":
# Close short
pnl = (existing.entry_price - price) * existing.quantity
self._close_position(fk, price, pnl)
elif not existing:
# Open long
cost = price * quantity
if cost > self.balance:
quantity = self.balance / price # size down to available capital
if quantity > 0:
self._positions[fk] = Position(
instrument=fk, side="long", quantity=quantity,
entry_price=price, entry_time=time.time(),
)
log.debug("Paper BUY %.6f @ %.2f (%s)", quantity, price, fk)
def sell(self, quantity: float, price: float, feed_key: Optional[str] = None) -> None:
"""Open a short or close a long at price."""
fk = feed_key or self._default_feed_key or "default"
existing = self._positions.get(fk)
if existing and existing.side == "long":
# Close long
pnl = (price - existing.entry_price) * existing.quantity
self._close_position(fk, price, pnl)
elif not existing:
# Open short (using margin — simplified: require 2x capital)
cost = price * quantity * 2
if cost > self.balance:
quantity = self.balance / (price * 2)
if quantity > 0:
self._positions[fk] = Position(
instrument=fk, side="short", quantity=quantity,
entry_price=price, entry_time=time.time(),
)
log.debug("Paper SELL %.6f @ %.2f (%s)", quantity, price, fk)
def flatten(self, price: float, feed_key: Optional[str] = None) -> None:
"""Close any open position at price."""
if feed_key:
keys = [feed_key]
else:
keys = list(self._positions.keys())
for fk in keys:
pos = self._positions.get(fk)
if pos is None:
continue
if pos.side == "long":
pnl = (price - pos.entry_price) * pos.quantity
else:
pnl = (pos.entry_price - price) * pos.quantity
self._close_position(fk, price, pnl)
# ------------------------------------------------------------------
# Reporting
# ------------------------------------------------------------------
def unrealized_pnl(self, current_prices: dict[str, float]) -> float:
"""Compute unrealized PnL using current prices."""
total = 0.0
for fk, pos in self._positions.items():
price = current_prices.get(fk)
if price is None:
continue
if pos.side == "long":
total += (price - pos.entry_price) * pos.quantity
else:
total += (pos.entry_price - price) * pos.quantity
return total
def realized_pnl(self) -> float:
return sum(t.pnl for t in self._trades)
def total_pnl(self, current_prices: dict[str, float] | None = None) -> float:
rpnl = self.realized_pnl()
upnl = self.unrealized_pnl(current_prices) if current_prices else 0.0
return rpnl + upnl
def trade_count(self) -> int:
return len(self._trades)
def win_rate(self) -> float:
if not self._trades:
return 0.0
wins = sum(1 for t in self._trades if t.pnl > 0)
return wins / len(self._trades)
def positions(self) -> dict[str, dict]:
return {
fk: {
"side": p.side,
"quantity": p.quantity,
"entry_price": p.entry_price,
}
for fk, p in self._positions.items()
}
def recent_trades(self, n: int = 50) -> list[dict]:
return [
{
"instrument": t.instrument,
"side": t.side,
"quantity": round(t.quantity, 8),
"entry_price": round(t.entry_price, 8),
"exit_price": round(t.exit_price, 8),
"entry_time": t.entry_time,
"exit_time": t.exit_time,
"pnl": round(t.pnl, 6),
}
for t in self._trades[-n:]
]
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
def _close_position(self, fk: str, price: float, pnl: float) -> None:
pos = self._positions.pop(fk, None)
if pos is None:
return
self.balance += pnl
self._trades.append(Trade(
instrument=fk,
side=pos.side,
quantity=pos.quantity,
entry_price=pos.entry_price,
exit_price=price,
entry_time=pos.entry_time,
exit_time=time.time(),
pnl=pnl,
))
log.debug("Paper trade closed: pnl=%.4f balance=%.2f (%s)", pnl, self.balance, fk)

View File

@@ -0,0 +1,395 @@
#!/usr/bin/env python3
"""
Strategy subprocess runner.
Loads a PandasStrategy from a git worktree path, subscribes to live bar data
(polling DataAPI), runs the paper trading loop, and pushes events to the main
MCP process via ZMQ PUSH.
Usage:
python -m dexorder.strategy.runner --config <json_config_path>
Config JSON:
{
"strategy_name": "My RSI Strategy",
"impl_path": "/app/data/worktrees/my_rsi_strategy_abc1234/strategy/my_rsi_strategy/implementation.py",
"feed_configs": [["BTC/USDT.BINANCE", 3600]],
"allocation": 5000.0,
"ipc_endpoint": "ipc:///tmp/dexorder-strategy-events.sock",
"data_dir": "/app/data",
"poll_interval": 60
}
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import sys
import threading
import time
from pathlib import Path
# Ensure the worktree's parent (which contains dexorder package) is on the path.
# Also ensure the original dexorder package is importable.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
log = logging.getLogger(__name__)
class StrategyRunner:
"""Runs a PandasStrategy in paper trading mode using DataAPI polling."""
def __init__(self, config: dict, stop_event: threading.Event | None = None):
self.strategy_name = config["strategy_name"]
self.impl_path = Path(config["impl_path"])
self.feed_configs: list[tuple[str, int]] = [
(f[0], int(f[1])) for f in config["feed_configs"]
]
self.allocation = float(config.get("allocation", 10_000.0))
self.ipc_endpoint = config.get("ipc_endpoint", "ipc:///tmp/dexorder-strategy-events.sock")
self.data_dir = Path(config.get("data_dir", "/app/data"))
self.poll_interval = int(config.get("poll_interval", 60)) # seconds
self._stop_event = stop_event or threading.Event()
self._running = False
self._push_socket = None
self._strategy = None
self._paper: "PaperAccount | None" = None
self._last_timestamps: dict[str, int] = {} # feed_key -> last seen timestamp_ns
async def run(self) -> None:
"""Main async entry point."""
self._setup_zmq()
await self._push_event("STARTED", {})
try:
await self._setup_strategy()
await self._trading_loop()
except asyncio.CancelledError:
pass
except Exception as e:
log.exception("Strategy runner fatal error")
await self._push_event("ERROR", {"message": str(e)})
finally:
await self._push_event("STOPPED", {
"pnl": self._paper.realized_pnl() if self._paper else 0.0,
"trade_count": self._paper.trade_count() if self._paper else 0,
})
self._cleanup_zmq()
# ------------------------------------------------------------------
# Setup
# ------------------------------------------------------------------
def _setup_zmq(self) -> None:
import zmq
ctx = zmq.Context.instance()
self._push_socket = ctx.socket(zmq.PUSH)
self._push_socket.connect(self.ipc_endpoint)
log.info("Connected PUSH socket to %s", self.ipc_endpoint)
def _cleanup_zmq(self) -> None:
if self._push_socket:
self._push_socket.close()
async def _setup_strategy(self) -> None:
from dexorder.nautilus.backtest_runner import _load_strategy_class, _setup_custom_indicators
from dexorder.nautilus.pandas_strategy import PandasStrategyConfig, make_feed_key
from dexorder.strategy.paper_account import PaperAccount
# Register custom indicators
try:
_setup_custom_indicators(self.data_dir)
except Exception as e:
log.warning("Custom indicator setup failed: %s", e)
# Load strategy class from worktree impl path
strategy_class = _load_strategy_class(self.impl_path)
log.info("Loaded strategy class: %s", strategy_class.__name__)
feed_keys = tuple(make_feed_key(t, p) for t, p in self.feed_configs)
config = PandasStrategyConfig(
strategy_id=f"{strategy_class.__name__}-PAPER",
feed_keys=feed_keys,
initial_capital=self.allocation,
)
self._strategy = strategy_class(config=config)
self._paper = PaperAccount(self.allocation, feed_keys[0] if feed_keys else None)
# Wire paper account into strategy's order methods
self._wire_paper_account(feed_keys)
log.info("Strategy '%s' initialized with %d feed(s)", self.strategy_name, len(feed_keys))
def _wire_paper_account(self, feed_keys: tuple) -> None:
"""Replace strategy's order methods with paper account calls."""
paper = self._paper
from dexorder.nautilus.pandas_strategy import make_feed_key
def paper_buy(quantity, feed_key=None):
fk = feed_key or (feed_keys[0] if feed_keys else "default")
# Get current close price from last seen bars
price = self._current_price(fk)
if price:
paper.buy(quantity, price, fk)
asyncio.create_task(self._push_event("ORDER_FILLED", {
"side": "buy", "quantity": quantity,
"price": price, "feed_key": fk,
"pnl": paper.realized_pnl(),
}))
def paper_sell(quantity, feed_key=None):
fk = feed_key or (feed_keys[0] if feed_keys else "default")
price = self._current_price(fk)
if price:
paper.sell(quantity, price, fk)
asyncio.create_task(self._push_event("ORDER_FILLED", {
"side": "sell", "quantity": quantity,
"price": price, "feed_key": fk,
"pnl": paper.realized_pnl(),
}))
def paper_flatten(feed_key=None):
if feed_key:
fk_list = [feed_key]
else:
fk_list = list(feed_keys)
for fk in fk_list:
price = self._current_price(fk)
if price:
paper.flatten(price, fk)
self._strategy.buy = paper_buy
self._strategy.sell = paper_sell
self._strategy.flatten = paper_flatten
# ------------------------------------------------------------------
# Trading loop
# ------------------------------------------------------------------
async def _trading_loop(self) -> None:
"""Poll DataAPI for new bars and call strategy.evaluate() on each update."""
import pandas as pd
from dexorder.api import get_api
from dexorder.nautilus.pandas_strategy import make_feed_key
api = get_api()
accumulated: dict[str, list[dict]] = {
make_feed_key(t, p): [] for t, p in self.feed_configs
}
self._current_prices: dict[str, float] = {}
heartbeat_task = asyncio.create_task(self._heartbeat_loop())
self._running = True
try:
while self._running and not self._stop_event.is_set():
now = int(time.time())
updated_any = False
for ticker, period_seconds in self.feed_configs:
fk = make_feed_key(ticker, period_seconds)
last_ts_ns = self._last_timestamps.get(fk, 0)
# Request last N bars to catch up
lookback = now - max(last_ts_ns // 1_000_000_000, now - 7 * 24 * 3600)
from_time = lookback if last_ts_ns == 0 else (last_ts_ns // 1_000_000_000)
try:
df = await api.data.historical_ohlc(
ticker=ticker,
period_seconds=period_seconds,
start_time=from_time,
end_time=now,
extra_columns=["volume", "buy_vol", "sell_vol",
"open_time", "high_time", "low_time", "close_time",
"open_interest"],
)
except Exception as e:
log.warning("OHLC fetch failed for %s: %s", fk, e)
continue
if df.empty:
continue
# Find new bars
ts_col = "timestamp" if "timestamp" in df.columns else df.columns[0]
new_bars = df[df[ts_col] > last_ts_ns] if last_ts_ns else df
for _, row in new_bars.iterrows():
ts_ns = int(row.get(ts_col, 0))
entry = {
"timestamp": ts_ns,
"open": float(row.get("open", 0)),
"high": float(row.get("high", 0)),
"low": float(row.get("low", 0)),
"close": float(row.get("close", 0)),
"volume": float(row.get("volume", 0)),
"buy_vol": float(row.get("buy_vol", 0)) if "buy_vol" in row else None,
"sell_vol": float(row.get("sell_vol", 0)) if "sell_vol" in row else None,
"open_interest": float(row.get("open_interest", 0)) if "open_interest" in row else None,
}
accumulated[fk].append(entry)
self._last_timestamps[fk] = max(self._last_timestamps.get(fk, 0), ts_ns)
self._current_prices[fk] = entry["close"]
updated_any = True
if updated_any:
# Build DataFrames and call evaluate
dfs = {fk: pd.DataFrame(rows) for fk, rows in accumulated.items() if rows}
try:
self._strategy.evaluate(dfs)
except Exception as e:
log.error("evaluate() error: %s", e)
await self._push_event("ERROR", {"message": f"evaluate() error: {e}"})
# Push PnL update
rpnl = self._paper.realized_pnl() if self._paper else 0.0
upnl = self._paper.unrealized_pnl(self._current_prices) if self._paper else 0.0
await self._push_event("PNL_UPDATE", {
"realized_pnl": rpnl,
"unrealized_pnl": upnl,
"total_pnl": rpnl + upnl,
"trade_count": self._paper.trade_count() if self._paper else 0,
})
# Sleep in 1s increments so stop_event is checked promptly
for _ in range(self.poll_interval):
if self._stop_event.is_set():
self._running = False
break
await asyncio.sleep(1)
finally:
heartbeat_task.cancel()
try:
await heartbeat_task
except asyncio.CancelledError:
pass
async def _heartbeat_loop(self) -> None:
while True:
await asyncio.sleep(10)
await self._push_event("HEARTBEAT", {})
def _current_price(self, feed_key: str) -> float | None:
return getattr(self, "_current_prices", {}).get(feed_key)
# ------------------------------------------------------------------
# Event publishing
# ------------------------------------------------------------------
async def _push_event(self, event_type: str, payload: dict) -> None:
from dexorder.strategy.events import StrategyEvent, StrategyEventType
type_map = {
"STARTED": StrategyEventType.STARTED,
"STOPPED": StrategyEventType.STOPPED,
"HEARTBEAT": StrategyEventType.HEARTBEAT,
"ORDER_FILLED": StrategyEventType.ORDER_FILLED,
"POSITION_UPDATE": StrategyEventType.POSITION_UPDATE,
"PNL_UPDATE": StrategyEventType.PNL_UPDATE,
"ERROR": StrategyEventType.ERROR,
"LOG": StrategyEventType.LOG,
}
et = type_map.get(event_type, StrategyEventType.LOG)
event = StrategyEvent(
event_type=et,
strategy_name=self.strategy_name,
payload=payload,
)
try:
if self._push_socket:
self._push_socket.send(event.serialize(), flags=1) # NOBLOCK
except Exception as e:
log.debug("Failed to push event %s: %s", event_type, e)
def _init_api() -> None:
"""Initialize thread-local API from environment config. Non-fatal on error."""
try:
import yaml
config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml")
secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml")
config_data, secrets_data = {}, {}
if Path(config_path).exists():
with open(config_path) as f:
config_data = yaml.safe_load(f) or {}
if Path(secrets_path).exists():
with open(secrets_path) as f:
secrets_data = yaml.safe_load(f) or {}
data_cfg = config_data.get("data", {})
iceberg_cfg = data_cfg.get("iceberg", {})
relay_cfg = data_cfg.get("relay", {})
from dexorder.api import set_api, API
from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl
data_api = DataAPIImpl(
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
namespace=iceberg_cfg.get("namespace", "trading"),
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
)
set_api(API(charting=ChartingAPIImpl(), data=data_api))
except Exception as e:
log.warning("API initialization failed: %s", e)
def run_thread(config: dict, stop_event: threading.Event) -> None:
"""
Entry point for running a strategy in a daemon thread.
Initializes a thread-local API, creates a StrategyRunner with the given
stop_event, and runs the async trading loop until stop_event is set.
"""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
_init_api()
runner = StrategyRunner(config, stop_event=stop_event)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(runner.run())
finally:
loop.close()
def main():
"""Subprocess entry point (backward compatibility)."""
import signal
parser = argparse.ArgumentParser(description="Dexorder strategy subprocess runner")
parser.add_argument("--config", required=True, help="Path to JSON config file")
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
with open(args.config) as f:
config = json.load(f)
stop_event = threading.Event()
def _shutdown(signum, frame):
log.info("Received signal %d, stopping runner", signum)
stop_event.set()
signal.signal(signal.SIGTERM, _shutdown)
signal.signal(signal.SIGINT, _shutdown)
run_thread(config, stop_event)
if __name__ == "__main__":
main()

View File

@@ -1,15 +1,14 @@
""" """
activate_strategy / deactivate_strategy — start and stop live or paper trading. activate_strategy / deactivate_strategy / list_active_strategies
paper=True (default): forward paper trading — strategy runs on live data with paper=True (default): forward paper trading — strategy runs on live data with
simulated fills. No API keys required. simulated fills via PaperAccount.
paper=False: live trading — real order execution via user's exchange API keys, paper=False: live trading — not yet implemented (requires secrets vault).
retrieved from the user secrets vault. Currently raises
NotImplementedError until the vault is implemented.
Full live-data feed streaming for forward testing is TBD (requires a live bar Each activated strategy runs in its own subprocess from a git worktree,
source). This module establishes the interface and stubs the runtime loop. ensuring the production version is isolated from edits in the working tree.
Events (fills, PnL updates, errors) flow via ZMQ PUSH/PULL to EventPublisher.
""" """
import json import json
@@ -18,10 +17,6 @@ from typing import Any
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Registry of active strategies: {strategy_name → runtime state dict}
# In a future implementation this will hold live strategy runners.
_active_strategies: dict[str, dict] = {}
async def activate_strategy( async def activate_strategy(
strategy_name: str, strategy_name: str,
@@ -34,16 +29,14 @@ async def activate_strategy(
Args: Args:
strategy_name: Display name as saved via python_write("strategy", ...) strategy_name: Display name as saved via python_write("strategy", ...)
feeds: List of feed dicts, e.g. [{"symbol": "BTC/USDT.BINANCE", "period_seconds": 3600}] feeds: List of feed dicts: [{"symbol": "BTC/USDT.BINANCE", "period_seconds": 3600}]
allocation: Capital allocated in quote currency (e.g. 5000.0 USDT) allocation: Capital allocated in quote currency (e.g. 5000.0 USDT)
paper: True = paper/simulated fills (default); False = live execution paper: True = paper/simulated fills (default); False = live (not yet implemented)
Returns: Returns:
list[TextContent] with JSON: list[TextContent] with JSON:
{"status": "activated", "strategy_name": str, "paper": bool, "allocation": float} {"status": "activated", "strategy_name": str, "paper": bool, "allocation": float,
"git_revision": str, "pid": int}
On error:
{"error": str}
""" """
from mcp.types import TextContent from mcp.types import TextContent
@@ -51,87 +44,45 @@ async def activate_strategy(
log.error("activate_strategy '%s': %s", strategy_name, msg) log.error("activate_strategy '%s': %s", strategy_name, msg)
return [TextContent(type="text", text=json.dumps({"error": msg}))] return [TextContent(type="text", text=json.dumps({"error": msg}))]
if strategy_name in _active_strategies: if not paper:
return _err( return _err(
f"Strategy '{strategy_name}' is already active. " "Live trading (paper=False) requires the user secrets vault, "
"Call deactivate_strategy first." "which is not yet implemented. Use paper=True for paper forward testing."
) )
if not paper:
# Live execution requires the user secrets vault for API keys.
# The vault is not yet implemented.
try:
from dexorder.secrets_vault import SecretsVault
_vault = SecretsVault()
_vault.get_secret("__probe__") # will raise NotImplementedError
except NotImplementedError:
return _err(
"Live trading (paper=False) requires the user secrets vault, "
"which is not yet implemented. Use paper=True for paper forward testing."
)
# Validate feeds
if not feeds: if not feeds:
return _err("feeds list is empty") return _err("feeds list is empty")
parsed_feeds: list[tuple[str, int]] = []
for f in feeds: for f in feeds:
sym = f.get("symbol", "") if not f.get("symbol"):
ps = f.get("period_seconds", 3600)
if not sym:
return _err(f"Feed entry missing 'symbol': {f}") return _err(f"Feed entry missing 'symbol': {f}")
parsed_feeds.append((sym, int(ps)))
# TODO: Full implementation — start a live/paper trading loop: try:
# 1. Load strategy class from category files from dexorder.strategy.lifecycle import get_strategy_lifecycle
# 2. Set up custom indicators via _setup_custom_indicators() lifecycle = get_strategy_lifecycle()
# 3. Subscribe to live bar stream for each feed result = await lifecycle.activate(
# 4. Initialize paper account (Nautilus SimulatedExchange) or live account strategy_name=strategy_name,
# 5. Run strategy event loop (on_bar → evaluate → submit orders) feeds=feeds,
# This requires a live data feed adapter (TBD). allocation=allocation,
paper=paper,
)
except Exception as exc:
log.exception("activate_strategy: lifecycle activation failed")
return _err(f"Activation failed: {exc}")
log.info( if "error" in result:
"activate_strategy: registering '%s' (paper=%s, allocation=%.2f) — " return _err(result["error"])
"live feed loop is TBD",
strategy_name, paper, allocation,
)
_active_strategies[strategy_name] = { return [TextContent(type="text", text=json.dumps(result))]
"strategy_name": strategy_name,
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
"allocation": allocation,
"paper": paper,
"status": "registered",
"pnl": 0.0,
}
payload = {
"status": "activated",
"strategy_name": strategy_name,
"paper": paper,
"allocation": allocation,
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
"note": (
"Strategy registered. Live data feed streaming is not yet implemented — "
"forward trading will begin when the live feed adapter is available."
),
}
return [TextContent(type="text", text=json.dumps(payload))]
async def deactivate_strategy(strategy_name: str) -> list: async def deactivate_strategy(strategy_name: str) -> list:
""" """
Deactivate a running strategy and return its final P&L summary. Deactivate a running strategy and return its final P&L summary.
Args:
strategy_name: Display name of the active strategy
Returns: Returns:
list[TextContent] with JSON: list[TextContent] with JSON:
{"status": "deactivated", "strategy_name": str, "final_pnl": float} {"status": "deactivated", "strategy_name": str, "final_pnl": float}
On error:
{"error": str}
""" """
from mcp.types import TextContent from mcp.types import TextContent
@@ -139,35 +90,36 @@ async def deactivate_strategy(strategy_name: str) -> list:
log.error("deactivate_strategy '%s': %s", strategy_name, msg) log.error("deactivate_strategy '%s': %s", strategy_name, msg)
return [TextContent(type="text", text=json.dumps({"error": msg}))] return [TextContent(type="text", text=json.dumps({"error": msg}))]
if strategy_name not in _active_strategies: try:
return _err(f"Strategy '{strategy_name}' is not active") from dexorder.strategy.lifecycle import get_strategy_lifecycle
lifecycle = get_strategy_lifecycle()
result = await lifecycle.deactivate(strategy_name)
except Exception as exc:
log.exception("deactivate_strategy: failed")
return _err(f"Deactivation failed: {exc}")
state = _active_strategies.pop(strategy_name) if "error" in result:
return _err(result["error"])
# TODO: Stop the live feed loop and collect final P&L from the running engine. return [TextContent(type="text", text=json.dumps(result))]
final_pnl = state.get("pnl", 0.0)
log.info("deactivate_strategy: stopped '%s', final_pnl=%.4f", strategy_name, final_pnl)
payload = {
"status": "deactivated",
"strategy_name": strategy_name,
"final_pnl": final_pnl,
}
return [TextContent(type="text", text=json.dumps(payload))]
async def list_active_strategies() -> list: async def list_active_strategies() -> list:
""" """
Return a list of currently active strategies and their status. Return a list of currently active strategies with PnL state.
Returns: Returns:
list[TextContent] with JSON: list[TextContent] with JSON:
{"active_strategies": [{strategy_name, paper, allocation, feeds, pnl}, ...]} {"active_strategies": [{strategy_name, paper, allocation, feeds, realized_pnl, ...}]}
""" """
from mcp.types import TextContent from mcp.types import TextContent
payload = { try:
"active_strategies": list(_active_strategies.values()), from dexorder.strategy.lifecycle import get_strategy_lifecycle
} lifecycle = get_strategy_lifecycle()
return [TextContent(type="text", text=json.dumps(payload))] active = await lifecycle.list_active()
except Exception as exc:
log.exception("list_active_strategies: failed")
active = []
return [TextContent(type="text", text=json.dumps({"active_strategies": active}))]

View File

@@ -15,7 +15,11 @@ from typing import Any
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# All OHLC+ columns to request from the DataAPI # All OHLC+ columns to request from the DataAPI
_OHLC_EXTRA_COLUMNS = ["volume", "buy_vol", "sell_vol", "open_interest"] _OHLC_EXTRA_COLUMNS = [
"volume", "buy_vol", "sell_vol",
"open_time", "high_time", "low_time", "close_time",
"open_interest",
]
async def backtest_strategy( async def backtest_strategy(
@@ -153,11 +157,11 @@ async def backtest_strategy(
# --- 7. Return results --- # --- 7. Return results ---
payload = { payload = {
"strategy_name": strategy_name, "strategy_name": strategy_name,
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds], "feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
"initial_capital": initial_capital, "initial_capital": initial_capital,
"paper": paper, "paper": paper,
"total_candles": total_candles, "total_candles": total_candles,
**metrics, **metrics, # keys: summary, statistics, trades, equity_curve
} }
return [TextContent(type="text", text=json.dumps(payload))] return [TextContent(type="text", text=json.dumps(payload))]

View File

@@ -2,9 +2,10 @@
""" """
Indicator harness — tests a custom indicator against synthetic OHLC data. Indicator harness — tests a custom indicator against synthetic OHLC data.
Runs in a subprocess so the indicator code is isolated from the MCP server process. Can be called in-process (preferred) via run() or as a subprocess for backward
compatibility.
Usage: python indicator_harness.py <impl_path> <metadata_path> Usage (subprocess): python indicator_harness.py <impl_path> <metadata_path>
Outputs JSON to stdout: Outputs JSON to stdout:
{ {
@@ -21,7 +22,7 @@ import traceback
import types import types
from pathlib import Path from pathlib import Path
# Ensure dexorder package is importable (same as research_harness.py) # Ensure dexorder package is importable when run as a subprocess
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
@@ -84,13 +85,15 @@ def summarize(result, n: int) -> str:
return f"Unexpected return type: {type(result).__name__}" return f"Unexpected return type: {type(result).__name__}"
def main(): def run(impl_path: Path, metadata_path: Path) -> dict:
if len(sys.argv) < 3: """
print(json.dumps({"success": False, "error": "Usage: indicator_harness.py <impl_path> <metadata_path>"})) Run an indicator against synthetic OHLC data and return results.
sys.exit(1)
impl_path = sys.argv[1] Returns:
metadata_path = sys.argv[2] dict with success, output, error fields
"""
impl_path = Path(impl_path)
metadata_path = Path(metadata_path)
# --- Load metadata --- # --- Load metadata ---
input_series = ["close"] input_series = ["close"]
@@ -107,34 +110,32 @@ def main():
# bare value (legacy) # bare value (legacy)
parameters[pname] = pinfo parameters[pname] = pinfo
except Exception as e: except Exception as e:
print(json.dumps({"success": False, "error": f"Failed to read metadata: {e}"})) return {"success": False, "error": f"Failed to read metadata: {e}"}
sys.exit(0)
# --- Generate synthetic data --- # --- Generate synthetic data ---
try: try:
import numpy # noqa: F401 — verify numpy available import numpy # noqa: F401 — verify numpy available
import pandas as pd import pandas as pd
except ImportError as e: except ImportError as e:
print(json.dumps({"success": False, "error": f"Missing required package: {e}"})) return {"success": False, "error": f"Missing required package: {e}"}
sys.exit(0)
df = make_synthetic_ohlcv(n=200) df = make_synthetic_ohlcv(n=200)
n = len(df) n = len(df)
# --- Load implementation --- # --- Load implementation ---
# Clear from sys.modules first so edits are picked up
module_name = f"_dexorder_indicator_{impl_path.parent.name}"
sys.modules.pop(module_name, None)
try: try:
spec = importlib.util.spec_from_file_location("_indicator_impl", impl_path) spec = importlib.util.spec_from_file_location(module_name, impl_path)
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore[union-attr] spec.loader.exec_module(module) # type: ignore[union-attr]
except Exception: except Exception:
tb = traceback.format_exc() tb = traceback.format_exc()
print(json.dumps({"success": False, "error": f"Import failed:\n{tb}"})) return {"success": False, "error": f"Import failed:\n{tb}"}
sys.exit(0)
# --- Find the indicator function --- # --- Find the indicator function ---
# Prefer a function whose name matches the sanitized directory name, fn_name = impl_path.parent.name.lower()
# fall back to the first public function in the module.
fn_name = os.path.basename(os.path.dirname(impl_path)).lower()
fn = getattr(module, fn_name, None) fn = getattr(module, fn_name, None)
if fn is None: if fn is None:
candidates = [ candidates = [
@@ -144,15 +145,13 @@ def main():
fn = candidates[0] if candidates else None fn = candidates[0] if candidates else None
if fn is None: if fn is None:
print(json.dumps({"success": False, "error": "No callable function found in implementation.py"})) return {"success": False, "error": "No callable function found in implementation.py"}
sys.exit(0)
# --- Build positional args from input_series --- # --- Build positional args from input_series ---
args = [] args = []
for col in input_series: for col in input_series:
if col not in df.columns: if col not in df.columns:
print(json.dumps({"success": False, "error": f"input_series '{col}' not in synthetic df columns {list(df.columns)}"})) return {"success": False, "error": f"input_series '{col}' not in synthetic df columns {list(df.columns)}"}
sys.exit(0)
args.append(df[col]) args.append(df[col])
# --- Execute --- # --- Execute ---
@@ -160,22 +159,29 @@ def main():
result = fn(*args, **parameters) result = fn(*args, **parameters)
except Exception: except Exception:
tb = traceback.format_exc() tb = traceback.format_exc()
print(json.dumps({"success": False, "error": f"Execution failed:\n{tb}"})) return {"success": False, "error": f"Execution failed:\n{tb}"}
sys.exit(0)
# --- Validate output type --- # --- Validate output type ---
if not isinstance(result, (pd.Series, pd.DataFrame)): if not isinstance(result, (pd.Series, pd.DataFrame)):
print(json.dumps({ return {
"success": False, "success": False,
"error": ( "error": (
f"Indicator must return pd.Series or pd.DataFrame, " f"Indicator must return pd.Series or pd.DataFrame, "
f"got {type(result).__name__}. " f"got {type(result).__name__}. "
"Wrap the output if using pandas-ta internally." "Wrap the output if using pandas-ta internally."
), ),
})) }
sys.exit(0)
print(json.dumps({"success": True, "output": summarize(result, n)})) return {"success": True, "output": summarize(result, n)}
def main():
if len(sys.argv) < 3:
print(json.dumps({"success": False, "error": "Usage: indicator_harness.py <impl_path> <metadata_path>"}))
sys.exit(1)
result = run(Path(sys.argv[1]), Path(sys.argv[2]))
print(json.dumps(result))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -18,11 +18,13 @@ After write/edit operations, a category-specific test harness runs to validate
the code and capture errors/output for agent feedback. the code and capture errors/output for agent feedback.
""" """
import concurrent.futures
import json import json
import logging import logging
import re import re
import subprocess import subprocess
import sys import sys
import traceback
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
@@ -30,16 +32,37 @@ from typing import Any, Optional
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Path to the harness scripts (written to disk, not inline)
_RESEARCH_HARNESS = Path(__file__).parent / "research_harness.py"
_INDICATOR_HARNESS = Path(__file__).parent / "indicator_harness.py"
# Import conda manager for package installation def _run_inprocess(fn, *args, timeout: int) -> dict:
"""
Run fn(*args) in a one-shot thread and return its result dict.
Uses a thread so the calling coroutine is not blocked and the calling
process does not fork a new Python interpreter. All already-loaded
libraries (numpy, pandas, matplotlib, etc.) are shared with the thread.
On timeout returns a dict with _timeout=True. On unexpected exception
returns a dict with error=True and the traceback in stderr.
"""
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(fn, *args)
try:
return future.result(timeout=timeout)
except concurrent.futures.TimeoutError:
return {"_timeout": True, "error": True,
"stdout": "", "stderr": "", "images": []}
except Exception:
return {"error": True, "stdout": "",
"stderr": traceback.format_exc(), "images": []}
# Import conda manager for package installation and tracking
try: try:
from dexorder.conda_manager import install_packages from dexorder.conda_manager import install_packages, cleanup_extra_packages
except ImportError: except ImportError:
log.warning("conda_manager not available - package installation disabled") log.warning("conda_manager not available - package installation disabled")
install_packages = None install_packages = None
cleanup_extra_packages = None
# ============================================================================= # =============================================================================
@@ -289,6 +312,49 @@ class GitManager:
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise RuntimeError(e.stderr.strip()) from e raise RuntimeError(e.stderr.strip()) from e
def head_short_hash(self) -> str:
"""Return the short hash of HEAD, or 'unknown' on error."""
try:
result = self._run("rev-parse", "--short", "HEAD")
return result.stdout.strip()
except Exception:
return "unknown"
def create_worktree(self, worktree_path: Path, revision: str = "HEAD") -> str:
"""
Create a git worktree at worktree_path pinned to revision.
Returns the short hash of the checked-out commit.
"""
worktree_path.parent.mkdir(parents=True, exist_ok=True)
try:
self._run("worktree", "add", "--detach", str(worktree_path), revision)
# Get short hash of the worktree's HEAD
result = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
cwd=str(worktree_path),
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
raise RuntimeError(f"git worktree add failed: {e.stderr.strip()}") from e
def remove_worktree(self, worktree_path: Path) -> None:
"""Remove a git worktree, silently ignoring errors if it no longer exists."""
try:
self._run("worktree", "remove", "--force", str(worktree_path), check=False)
except Exception as e:
log.warning("git worktree remove failed (non-fatal): %s", e)
def prune_worktrees(self) -> None:
"""Prune stale worktree references."""
try:
self._run("worktree", "prune", check=False)
except Exception:
pass
# ============================================================================= # =============================================================================
# Custom Indicator Setup # Custom Indicator Setup
@@ -733,7 +799,7 @@ class CategoryFileManager:
conda_packages = metadata.get("conda_packages", []) conda_packages = metadata.get("conda_packages", [])
if conda_packages: if conda_packages:
log.info(f"Installing packages for validation: {conda_packages}") log.info(f"Installing packages for validation: {conda_packages}")
install_result = install_packages(conda_packages) install_result = install_packages(conda_packages, data_dir=self.data_dir)
if install_result.get("success"): if install_result.get("success"):
packages_installed = install_result.get("installed", []) packages_installed = install_result.get("installed", [])
if packages_installed: if packages_installed:
@@ -761,48 +827,49 @@ class CategoryFileManager:
def _validate_strategy(self, impl_path: Path) -> dict[str, Any]: def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
""" """
Validate a strategy implementation. Validate a strategy by running it against synthetic OHLC data.
Runs basic syntax check and imports. Runs strategy_harness.py in-process via a thread. Catches import errors,
runtime errors in evaluate(), and wrong class hierarchy — not just syntax.
""" """
try: meta_path = impl_path.parent / "metadata.json"
result = subprocess.run( return self._execute_strategy(impl_path.parent, timeout=45)
[sys.executable, "-m", "py_compile", str(impl_path)],
capture_output=True,
text=True,
timeout=10,
)
if result.returncode == 0: def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
return { """
"success": True, Run a strategy against synthetic OHLC data in-process via a thread.
"output": "Strategy syntax valid",
} Returns:
else: dict with success, output (human-readable summary), trade_count, error
return { """
"success": False, impl_path = item_dir / "implementation.py"
"output": result.stderr, meta_path = item_dir / "metadata.json"
"error": "Syntax error in strategy",
} if not impl_path.exists():
except subprocess.TimeoutExpired: return {"success": False, "error": "implementation.py not found"}
return {"success": False, "error": "Validation timeout"} if not meta_path.exists():
except Exception as e: return {"success": False, "error": "metadata.json not found"}
return {"success": False, "error": f"Validation failed: {e}"}
from dexorder.tools.strategy_harness import run as _strategy_run
result = _run_inprocess(_strategy_run, impl_path, meta_path, timeout=timeout)
if result.get("_timeout"):
return {"success": False, "error": f"Strategy test timed out after {timeout}s"}
return result
def _validate_indicator(self, impl_path: Path) -> dict[str, Any]: def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
""" """
Validate an indicator by running it against synthetic OHLC data. Validate an indicator by running it against synthetic OHLC data.
Uses indicator_harness.py in a subprocess so the indicator code is Runs indicator_harness.py in-process via a thread. Catches import errors,
isolated from the MCP server process. Catches import errors, runtime runtime errors, and wrong return types — not just syntax.
errors, and wrong return types — not just syntax.
""" """
meta_path = impl_path.parent / "metadata.json" meta_path = impl_path.parent / "metadata.json"
return self._execute_indicator(impl_path.parent, timeout=30) return self._execute_indicator(impl_path.parent, timeout=30)
def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]: def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
""" """
Run an indicator against synthetic OHLC data via indicator_harness.py. Run an indicator against synthetic OHLC data in-process via a thread.
Returns: Returns:
dict with success, output (human-readable summary), error dict with success, output (human-readable summary), error
@@ -815,77 +882,22 @@ class CategoryFileManager:
if not meta_path.exists(): if not meta_path.exists():
return {"success": False, "error": "metadata.json not found"} return {"success": False, "error": "metadata.json not found"}
try: from dexorder.tools.indicator_harness import run as _indicator_run
result = subprocess.run( result = _run_inprocess(_indicator_run, impl_path, meta_path, timeout=timeout)
[sys.executable, str(_INDICATOR_HARNESS), str(impl_path), str(meta_path)],
capture_output=True, if result.get("_timeout"):
text=True,
timeout=timeout,
cwd=str(item_dir),
)
except subprocess.TimeoutExpired:
return {"success": False, "error": f"Indicator test timed out after {timeout}s"} return {"success": False, "error": f"Indicator test timed out after {timeout}s"}
except Exception as e: return result
return {"success": False, "error": f"Harness launch failed: {e}"}
if result.returncode != 0: def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
return {
"success": False,
"error": f"Harness process failed:\n{result.stderr}",
}
try:
data = json.loads(result.stdout)
except json.JSONDecodeError:
return {
"success": False,
"error": f"Harness produced invalid JSON:\n{result.stdout[:500]}",
}
return data
def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
""" """
Run a research script via the on-disk harness and return parsed results. Run a research script in-process via a thread and return captured results.
The harness (research_harness.py) handles API initialization, stdout/stderr
capture, matplotlib figure capture, and outputs JSON to stdout.
Returns: Returns:
dict with stdout, stderr, images, error fields — or an error dict. dict with stdout, stderr, images, error fields — or an error dict.
""" """
try: from dexorder.tools.research_harness import run as _research_run
result = subprocess.run( return _run_inprocess(_research_run, impl_path, item_dir, timeout=timeout)
[sys.executable, str(_RESEARCH_HARNESS), str(impl_path)],
capture_output=True,
text=True,
timeout=timeout,
cwd=str(item_dir),
)
if result.returncode == 0:
try:
return json.loads(result.stdout)
except json.JSONDecodeError:
return {
"stdout": result.stdout,
"stderr": result.stderr,
"images": [],
"error": True,
}
else:
# Harness itself failed (import error, bad args, etc.)
return {
"stdout": "",
"stderr": result.stderr,
"images": [],
"error": True,
}
except subprocess.TimeoutExpired:
return {"stdout": "", "stderr": "", "images": [], "error": True,
"_timeout": True}
except Exception as e:
return {"stdout": "", "stderr": str(e), "images": [], "error": True}
def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]: def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
""" """
@@ -893,7 +905,7 @@ class CategoryFileManager:
Runs the script via the harness and captures output + pyplot images. Runs the script via the harness and captures output + pyplot images.
""" """
data = self._run_research_harness(impl_path, item_dir, timeout=30) data = self._run_research_harness(impl_path, item_dir, timeout=300)
if data.get("_timeout"): if data.get("_timeout"):
return {"success": False, "error": "Research script timeout"} return {"success": False, "error": "Research script timeout"}
@@ -983,6 +995,48 @@ class CategoryFileManager:
return {"content": content} return {"content": content}
def delete(self, category: str, name: str) -> dict[str, Any]:
"""
Delete a category script directory and commit the removal to git.
Args:
category: Category name (strategy, indicator, research)
name: Display name of the item to delete
Returns:
dict with:
- success: bool
- category: str
- name: str
- revision: str - git commit hash of the deletion commit
- error: str (if any)
"""
import shutil
try:
cat = Category(category)
except ValueError:
return {
"success": False,
"error": f"Invalid category '{category}'. Must be one of: {', '.join(c.value for c in Category)}"
}
item_dir = get_category_path(self.src_dir, cat, name)
if not item_dir.exists():
return {"success": False, "error": f"{category} '{name}' not found"}
try:
shutil.rmtree(item_dir)
log.info(f"Deleted {cat.value}: {item_dir}")
except Exception as e:
return {"success": False, "error": f"Failed to delete: {e}"}
commit_hash = self.git.commit(f"delete({category}): {name}")
result: dict[str, Any] = {"success": True, "category": category, "name": name}
if commit_hash:
result["revision"] = commit_hash
return result
def git_log( def git_log(
self, self,
category: Optional[str] = None, category: Optional[str] = None,

View File

@@ -1,13 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Research script harness - runs implementation.py in a subprocess with API Research script harness - runs implementation.py with API initialization,
initialization, stdout/stderr capture, and matplotlib figure capture. stdout/stderr capture, and matplotlib figure capture.
This file is written to disk and invoked by python_tools.py rather than Can be called in-process (preferred) via run() or as a subprocess for backward
being passed inline via `python -c`, so the harness code is inspectable and compatibility.
not regenerated on every call.
Usage: Usage (subprocess):
python -m dexorder.tools.research_harness <implementation_path> python -m dexorder.tools.research_harness <implementation_path>
Output (JSON to stdout): Output (JSON to stdout):
@@ -19,73 +18,148 @@ Output (JSON to stdout):
} }
""" """
import sys
import io import io
import os import os
import base64 import base64
import json import json
import sys
import traceback
from pathlib import Path from pathlib import Path
# Non-interactive matplotlib backend (must be set before importing pyplot) # Non-interactive matplotlib backend (must be set before importing pyplot).
# Idempotent — safe to call multiple times.
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
# Ensure dexorder package is importable # Ensure dexorder package is importable when run as a subprocess
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
# ---------------------------------------------------------------------------
# Initialize API from config files so research scripts can call get_api()
# ---------------------------------------------------------------------------
try:
import yaml
config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml") def run(impl_path: Path, item_dir: Path) -> dict:
secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml") """
Run a research script in-process and return captured results.
config_data = {} Creates a fresh DataAPIImpl per call (thread-safe: API stored in thread-local
secrets_data = {} via set_api() so the global API is not overwritten).
if Path(config_path).exists():
with open(config_path) as f:
config_data = yaml.safe_load(f) or {}
if Path(secrets_path).exists():
with open(secrets_path) as f:
secrets_data = yaml.safe_load(f) or {}
data_cfg = config_data.get("data", {}) Returns:
iceberg_cfg = data_cfg.get("iceberg", {}) dict with stdout, stderr, images, error fields
relay_cfg = data_cfg.get("relay", {}) """
impl_path = Path(impl_path)
from dexorder.api import set_api, API if not impl_path.exists():
from dexorder.impl.charting_api_impl import ChartingAPIImpl return {
from dexorder.impl.data_api_impl import DataAPIImpl "stdout": "",
"stderr": f"Implementation file not found: {impl_path}",
"images": [],
"error": True,
}
_data_api = DataAPIImpl( # ---------------------------------------------------------------------------
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"), # Initialize a fresh API instance for this execution (thread-local)
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"), # ---------------------------------------------------------------------------
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"), try:
namespace=iceberg_cfg.get("namespace", "trading"), import yaml
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
)
# NOTE: We intentionally do NOT call asyncio.run(_data_api.start()) here.
# DataAPIImpl.historical_ohlc() auto-starts on first use, which ensures the
# ZMQ context and notification listener are created inside the user's own
# asyncio.run() event loop — avoiding cross-loop lifecycle issues.
set_api(API(charting=ChartingAPIImpl(), data=_data_api))
except Exception as e:
print(f"WARNING: API initialization failed: {e}", file=sys.stderr)
# --------------------------------------------------------------------------- config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml")
# Register custom indicators so research scripts can use df.ta.my_indicator() secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml")
# ---------------------------------------------------------------------------
try: config_data = {}
from dexorder.tools.python_tools import setup_custom_indicators secrets_data = {}
_data_dir = Path(os.environ.get("DATA_DIR", "/app/data")) if Path(config_path).exists():
setup_custom_indicators(_data_dir) with open(config_path) as f:
except Exception as e: config_data = yaml.safe_load(f) or {}
print(f"WARNING: Custom indicator registration failed: {e}", file=sys.stderr) if Path(secrets_path).exists():
with open(secrets_path) as f:
secrets_data = yaml.safe_load(f) or {}
data_cfg = config_data.get("data", {})
iceberg_cfg = data_cfg.get("iceberg", {})
relay_cfg = data_cfg.get("relay", {})
from dexorder.api import set_api, API
from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl
_data_api = DataAPIImpl(
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
namespace=iceberg_cfg.get("namespace", "trading"),
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
s3_region=iceberg_cfg.get("s3_region") or secrets_data.get("s3_region"),
request_timeout=240.0,
)
# NOTE: We intentionally do NOT call asyncio.run(_data_api.start()) here.
# DataAPIImpl.historical_ohlc() auto-starts on first use, which ensures the
# ZMQ context and notification listener are created inside the user's own
# asyncio.run() event loop — avoiding cross-loop lifecycle issues.
# In a harness thread, set_api() stores to thread-local (not the global).
set_api(API(charting=ChartingAPIImpl(), data=_data_api))
except Exception as e:
# Non-fatal — script may not use the API
sys.stderr.write(f"WARNING: API initialization failed: {e}\n")
# ---------------------------------------------------------------------------
# Register custom indicators
# ---------------------------------------------------------------------------
try:
from dexorder.tools.python_tools import setup_custom_indicators
_data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
setup_custom_indicators(_data_dir)
except Exception as e:
sys.stderr.write(f"WARNING: Custom indicator registration failed: {e}\n")
# ---------------------------------------------------------------------------
# Execute user script with captured stdout/stderr
# ---------------------------------------------------------------------------
stdout_buf = io.StringIO()
stderr_buf = io.StringIO()
error_occurred = False
old_stdout, old_stderr = sys.stdout, sys.stderr
old_cwd = os.getcwd()
sys.stdout = stdout_buf
sys.stderr = stderr_buf
try:
os.chdir(impl_path.parent)
exec(compile(impl_path.read_text(), str(impl_path), 'exec'), {})
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
traceback.print_exc(file=sys.stderr)
error_occurred = True
finally:
sys.stdout = old_stdout
sys.stderr = old_stderr
os.chdir(old_cwd)
stdout_output = stdout_buf.getvalue()
stderr_output = stderr_buf.getvalue()
# ---------------------------------------------------------------------------
# Capture matplotlib figures
# ---------------------------------------------------------------------------
images = []
if not error_occurred:
for fig_num in plt.get_fignums():
fig = plt.figure(fig_num)
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
buf.close()
plt.close('all')
return {
"stdout": stdout_output,
"stderr": stderr_output,
"images": images,
"error": error_occurred,
}
def main(): def main():
@@ -94,55 +168,8 @@ def main():
sys.exit(2) sys.exit(2)
impl_path = Path(sys.argv[1]) impl_path = Path(sys.argv[1])
if not impl_path.exists(): item_dir = impl_path.parent
print(json.dumps({ result = run(impl_path, item_dir)
"stdout": "",
"stderr": f"Implementation file not found: {impl_path}",
"images": [],
"error": True,
}))
sys.exit(0)
# Capture stdout and stderr
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = io.StringIO()
sys.stderr = io.StringIO()
error_occurred = False
try:
exec(compile(impl_path.read_text(), str(impl_path), 'exec'), {})
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
error_occurred = True
# Restore stdout/stderr
stdout_output = sys.stdout.getvalue()
stderr_output = sys.stderr.getvalue()
sys.stdout = old_stdout
sys.stderr = old_stderr
# Capture all matplotlib figures as base64 PNGs
images = []
for fig_num in plt.get_fignums():
fig = plt.figure(fig_num)
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
img_b64 = base64.b64encode(buf.read()).decode('utf-8')
images.append({"format": "png", "data": img_b64})
buf.close()
plt.close('all')
# Output results as JSON to real stdout
result = {
"stdout": stdout_output,
"stderr": stderr_output,
"images": images,
"error": error_occurred,
}
print(json.dumps(result)) print(json.dumps(result))

View File

@@ -0,0 +1,228 @@
#!/usr/bin/env python3
"""
Strategy harness — validates a PandasStrategy against synthetic OHLC data.
Can be called in-process (preferred) via run() or as a subprocess for backward
compatibility.
Usage (subprocess): python strategy_harness.py <impl_path> <metadata_path>
Outputs JSON to stdout:
{
"success": bool,
"output": str, # human-readable summary on success
"trade_count": int, # number of trades executed in the mini-backtest
"error": str | null # error message / traceback if failed
}
"""
import json
import os
import sys
import traceback
from pathlib import Path
# Ensure dexorder package is importable when run as a subprocess
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
# ---------------------------------------------------------------------------
# Synthetic OHLCV data — 100 deterministic bars, no network required
# ---------------------------------------------------------------------------
def make_synthetic_ohlcv(n: int = 100):
import numpy as np
import pandas as pd
rng = np.random.default_rng(42)
returns = rng.normal(0, 0.015, n)
closes = 40_000.0 * np.cumprod(1.0 + returns)
opens = np.empty(n)
opens[0] = closes[0]
opens[1:] = closes[:-1]
noise = np.abs(rng.normal(0, 0.005, n))
highs = np.maximum(opens, closes) * (1.0 + noise)
lows = np.minimum(opens, closes) * (1.0 - noise)
volumes = rng.uniform(1e6, 1e8, n)
buy_vols = volumes * rng.uniform(0.4, 0.6, n)
now_ns = 1_700_000_000_000_000_000 # arbitrary epoch in nanoseconds
step_ns = 3_600_000_000_000 # 1 hour in nanoseconds
timestamps = [now_ns + i * step_ns for i in range(n)]
return pd.DataFrame({
"timestamp": timestamps,
"open": opens,
"high": highs,
"low": lows,
"close": closes,
"volume": volumes,
"buy_vol": buy_vols,
"sell_vol": volumes - buy_vols,
"open_interest": rng.uniform(1e8, 1e9, n),
})
def run(impl_path: Path, metadata_path: Path) -> dict:
"""
Validate a strategy against synthetic OHLC data and return results.
Returns:
dict with success, output, trade_count, error fields
"""
impl_path = Path(impl_path)
metadata_path = Path(metadata_path)
# --- Load metadata (feeds, parameters) ---
data_feeds: list[dict] = []
parameters: dict = {}
try:
with open(metadata_path) as f:
meta = json.load(f)
data_feeds = meta.get("data_feeds") or []
param_schema = meta.get("parameters") or {}
for pname, pinfo in param_schema.items():
if isinstance(pinfo, dict) and "default" in pinfo:
parameters[pname] = pinfo["default"]
elif not isinstance(pinfo, dict):
parameters[pname] = pinfo
except Exception as e:
return {"success": False, "output": "", "trade_count": 0, "error": f"Failed to read metadata: {e}"}
# --- Build synthetic feed keys ---
if data_feeds:
feed_configs = [(f.get("symbol", "BTC/USDT.SYNTH"), int(f.get("period_seconds", 3600)))
for f in data_feeds]
else:
feed_configs = [("BTC/USDT.SYNTH", 3600)]
# --- Register custom indicators ---
try:
from dexorder.tools.python_tools import setup_custom_indicators
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
setup_custom_indicators(data_dir)
except Exception:
pass
# --- Load strategy class ---
try:
from dexorder.nautilus.backtest_runner import _load_strategy_class
strategy_class = _load_strategy_class(impl_path)
except Exception:
tb = traceback.format_exc()
return {"success": False, "output": "", "trade_count": 0, "error": f"Strategy load failed:\n{tb}"}
# --- Run a minimal backtest with synthetic data ---
try:
import pandas as pd
from dexorder.nautilus.pandas_strategy import PandasStrategyConfig, make_feed_key
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
try:
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
_setup_custom_indicators(data_dir)
except Exception:
pass
# Build one synthetic DataFrame per feed
feed_dfs: dict[str, pd.DataFrame] = {}
for ticker, period_seconds in feed_configs:
fk = make_feed_key(ticker, period_seconds)
feed_dfs[fk] = make_synthetic_ohlcv(100)
feed_keys = tuple(make_feed_key(t, p) for t, p in feed_configs)
config = PandasStrategyConfig(
strategy_id=f"{strategy_class.__name__}-HARNESS",
feed_keys=feed_keys,
initial_capital=10_000.0,
)
strat = strategy_class(config=config)
for pname, pval in parameters.items():
if hasattr(strat, pname):
setattr(strat, pname, pval)
# Replay bars: accumulate rows and call evaluate()
buy_count = 0
sell_count = 0
evaluate_errors: list[str] = []
rows_by_feed: dict[str, list] = {fk: [] for fk in feed_keys}
for i in range(len(next(iter(feed_dfs.values())))):
for fk, df in feed_dfs.items():
row = df.iloc[i].to_dict()
rows_by_feed[fk].append(row)
current_dfs = {k: pd.DataFrame(v) for k, v in rows_by_feed.items()}
_orig_buy = strat.buy
_orig_sell = strat.sell
_orig_flatten = strat.flatten
class _BuyCounter:
def __call__(inner_self, *a, **kw):
nonlocal buy_count
buy_count += 1
class _SellCounter:
def __call__(inner_self, *a, **kw):
nonlocal sell_count
sell_count += 1
strat.buy = _BuyCounter()
strat.sell = _SellCounter()
strat.flatten = lambda *a, **kw: None
try:
strat.evaluate(current_dfs)
except Exception as e:
evaluate_errors.append(f"Bar {i}: {e}")
if len(evaluate_errors) > 3:
break
finally:
strat.buy = _orig_buy
strat.sell = _orig_sell
strat.flatten = _orig_flatten
if evaluate_errors and len(evaluate_errors) > 3:
break
if evaluate_errors:
return {
"success": False,
"output": "",
"trade_count": 0,
"error": "evaluate() raised errors:\n" + "\n".join(evaluate_errors[:3]),
}
trade_count = buy_count + sell_count
n_bars = len(next(iter(feed_dfs.values())))
n_feeds = len(feed_dfs)
output = (
f"Strategy validated OK: {n_bars} bars × {n_feeds} feed(s), "
f"buy_signals={buy_count}, sell_signals={sell_count}"
)
return {"success": True, "output": output, "trade_count": trade_count, "error": None}
except Exception:
tb = traceback.format_exc()
return {"success": False, "output": "", "trade_count": 0, "error": f"Harness execution failed:\n{tb}"}
def main():
if len(sys.argv) < 3:
print(json.dumps({
"success": False,
"output": "",
"trade_count": 0,
"error": "Usage: strategy_harness.py <impl_path> <metadata_path>",
}))
sys.exit(1)
result = run(Path(sys.argv[1]), Path(sys.argv[2]))
print(json.dumps(result))
if __name__ == "__main__":
main()

View File

@@ -51,3 +51,4 @@ dependencies:
- uvicorn>=0.27.0 - uvicorn>=0.27.0
- sse-starlette>=1.6.0 - sse-starlette>=1.6.0
- nautilus_trader>=1.200.0 - nautilus_trader>=1.200.0
- aiosqlite>=0.19.0

View File

@@ -11,6 +11,7 @@ Brings together:
import asyncio import asyncio
import contextlib import contextlib
import json
import logging import logging
import os import os
import signal import signal
@@ -32,7 +33,7 @@ from starlette.routing import Route, Mount
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
from dexorder.api import set_api, API from dexorder.api import set_api, API
from dexorder.conda_manager import sync_packages, install_packages from dexorder.conda_manager import sync_packages, install_packages, cleanup_extra_packages
from dexorder.events import EventType, UserEvent, DeliverySpec from dexorder.events import EventType, UserEvent, DeliverySpec
from dexorder.impl.charting_api_impl import ChartingAPIImpl from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl from dexorder.impl.data_api_impl import DataAPIImpl
@@ -41,6 +42,8 @@ from dexorder.tools.workspace_tools import get_workspace_store
from dexorder.tools.evaluate_indicator import evaluate_indicator from dexorder.tools.evaluate_indicator import evaluate_indicator
from dexorder.tools.backtest_strategy import backtest_strategy from dexorder.tools.backtest_strategy import backtest_strategy
from dexorder.tools.activate_strategy import activate_strategy, deactivate_strategy, list_active_strategies from dexorder.tools.activate_strategy import activate_strategy, deactivate_strategy, list_active_strategies
from dexorder.strategy.event_bridge import StrategyEventBridge
from dexorder.strategy.lifecycle import get_strategy_lifecycle
# ============================================================================= # =============================================================================
# Global Data Directory # Global Data Directory
@@ -59,19 +62,34 @@ def get_data_dir() -> Path:
# ============================================================================= # =============================================================================
# Indicator Types Helpers # Category Types Helpers
# ============================================================================= # =============================================================================
def _build_indicator_type_entry(meta: dict) -> dict: def _type_store_name(category: str) -> str:
"""Build an indicator_types workspace entry from indicator metadata dict.""" return f"{category}_types"
def _type_store_key(category: str, name: str) -> str:
sanitized = sanitize_name(name).lower()
return f"custom_{sanitized}" if category == "indicator" else sanitized
def _build_type_entry(category: str, meta: dict) -> dict:
"""Build a {category}_types workspace entry from a metadata dict."""
name = meta.get('name', '') name = meta.get('name', '')
pandas_ta_name = f"custom_{sanitize_name(name).lower()}" key = _type_store_key(category, name)
now = int(time.time()) now = int(time.time())
return { entry = {
'pandas_ta_name': pandas_ta_name, 'key': key,
'display_name': name, 'display_name': name,
'description': meta.get('description', ''), 'description': meta.get('description', ''),
'metadata': { 'metadata': {},
'created_at': now,
'modified_at': now,
}
if category == "indicator":
entry['pandas_ta_name'] = key
entry['metadata'] = {
'display_name': name, 'display_name': name,
'parameters': meta.get('parameters') or {}, 'parameters': meta.get('parameters') or {},
'input_series': meta.get('input_series') or ['close'], 'input_series': meta.get('input_series') or ['close'],
@@ -79,31 +97,89 @@ def _build_indicator_type_entry(meta: dict) -> dict:
'pane': meta.get('pane', 'separate'), 'pane': meta.get('pane', 'separate'),
'filled_areas': meta.get('filled_areas') or [], 'filled_areas': meta.get('filled_areas') or [],
'bands': meta.get('bands') or [], 'bands': meta.get('bands') or [],
}, }
'created_at': now, elif category == "strategy":
'modified_at': now, entry['metadata'] = {
} 'data_feeds': meta.get('data_feeds') or [],
'parameters': meta.get('parameters') or {},
}
# research: metadata stays empty (no fields beyond base)
return entry
def _upsert_indicator_type(workspace_store, category_manager, name: str) -> None: def _upsert_type(workspace_store, category_manager, category: str, name: str) -> None:
"""Read indicator metadata from disk and upsert into indicator_types workspace store.""" """Read category metadata from disk and upsert into the {category}_types workspace store."""
read_result = category_manager.read('indicator', name) read_result = category_manager.read(category, name)
if not read_result.get('exists') or not read_result.get('metadata'): if not read_result.get('exists') or not read_result.get('metadata'):
return return
meta = read_result['metadata'] entry = _build_type_entry(category, read_result['metadata'])
entry = _build_indicator_type_entry(meta) key = entry['key']
pandas_ta_name = entry['pandas_ta_name'] store = _type_store_name(category)
# Preserve original created_at if already present # Preserve original created_at if already present
existing = workspace_store.read('indicator_types') existing = workspace_store.read(store)
existing_types = (existing.get('data') or {}).get('types') or {} existing_types = (existing.get('data') or {}).get('types') or {}
if pandas_ta_name in existing_types: if key in existing_types:
entry['created_at'] = existing_types[pandas_ta_name].get('created_at', entry['created_at']) entry['created_at'] = existing_types[key].get('created_at', entry['created_at'])
workspace_store.patch('indicator_types', [ workspace_store.patch(store, [{'op': 'add', 'path': f'/types/{key}', 'value': entry}])
{'op': 'add', 'path': f'/types/{pandas_ta_name}', 'value': entry} logging.info(f"Upserted {store}/{key} for '{name}'")
])
logging.info(f"Upserted indicator_types/{pandas_ta_name} for '{name}'")
def _remove_type(workspace_store, category: str, name: str) -> None:
"""Remove a category item from the {category}_types workspace store."""
key = _type_store_key(category, name)
store = _type_store_name(category)
try:
workspace_store.patch(store, [{'op': 'remove', 'path': f'/types/{key}'}])
logging.info(f"Removed {store}/{key} for '{name}'")
except Exception:
pass # entry may not exist; that's fine
if category == "indicator":
_remove_indicator_instances(workspace_store, key)
def _remove_indicator_instances(workspace_store, pandas_ta_name: str) -> None:
"""Remove all instances of a custom indicator from the indicators workspace store."""
existing = workspace_store.read('indicators')
instances = (existing.get('data') or {}).get('indicators') or {}
to_remove = [inst_id for inst_id, inst in instances.items()
if inst.get('pandas_ta_name') == pandas_ta_name]
if not to_remove:
return
patches = [{'op': 'remove', 'path': f'/indicators/{inst_id}'} for inst_id in to_remove]
try:
workspace_store.patch('indicators', patches)
logging.info(f"Removed {len(to_remove)} instance(s) of {pandas_ta_name} from indicators store")
except Exception:
logging.warning(f"Failed to remove indicator instances for {pandas_ta_name}", exc_info=True)
def _populate_types_from_disk(workspace_store, category_manager, category: str) -> None:
"""Scan existing category items and add any missing entries to the {category}_types store."""
store = _type_store_name(category)
existing = workspace_store.read(store)
existing_types = (existing.get('data') or {}).get('types') or {}
items = category_manager.list_items(category).get('items', [])
added = 0
for item in items:
item_name = item.get('name', '')
if not item_name:
continue
key = _type_store_key(category, item_name)
if key not in existing_types:
_upsert_type(workspace_store, category_manager, category, item_name)
added += 1
if added > 0:
logging.info(f"Populated {added} {category} type(s) from disk into {store}")
def _get_env_yml() -> Optional[Path]:
"""Return the path to environment.yml if it exists alongside main.py."""
p = Path(__file__).parent / "environment.yml"
return p if p.exists() else None
def _populate_indicator_types_from_disk(workspace_store, category_manager) -> None: def _populate_indicator_types_from_disk(workspace_store, category_manager) -> None:
@@ -226,8 +302,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
category_manager = get_category_manager(config.data_dir) category_manager = get_category_manager(config.data_dir)
logging.info(f"Category manager initialized at {config.data_dir}") logging.info(f"Category manager initialized at {config.data_dir}")
# Populate indicator_types store from existing indicators on disk (migration/startup sync) # Populate {category}_types stores from existing items on disk (migration/startup sync)
_populate_indicator_types_from_disk(workspace_store, category_manager) for _cat in ("indicator", "strategy", "research"):
_populate_types_from_disk(workspace_store, category_manager, _cat)
@server.list_resources() @server.list_resources()
async def list_resources(): async def list_resources():
@@ -503,6 +580,25 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
"required": ["revision", "category", "name"] "required": ["revision", "category", "name"]
} }
), ),
Tool(
name="python_delete",
description="Delete a category script permanently. Commits removal to git history and removes any conda packages that are no longer needed.",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category of the script"
},
"name": {
"type": "string",
"description": "Display name of the item to delete"
}
},
"required": ["category", "name"]
}
),
Tool( Tool(
name="conda_sync", name="conda_sync",
description="Sync conda packages: scan all metadata, remove unused packages (excluding base environment)", description="Sync conda packages: scan all metadata, remove unused packages (excluding base environment)",
@@ -699,6 +795,77 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
"required": [] "required": []
} }
), ),
Tool(
name="get_backtest_results",
description=(
"Retrieve stored backtest results for a strategy. "
"Returns the most recent backtest runs with summary stats, "
"extended statistics, trade list, and equity curve."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"limit": {
"type": "integer",
"description": "Maximum number of backtest runs to return (default 5)",
"default": 5
}
},
"required": ["strategy_name"]
}
),
Tool(
name="get_strategy_trades",
description=(
"Retrieve the trade log for a strategy (live/paper or backtest). "
"Returns individual round-trip trades with entry/exit prices and PnL."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"limit": {
"type": "integer",
"description": "Maximum number of trades to return (default 100)",
"default": 100
}
},
"required": ["strategy_name"]
}
),
Tool(
name="get_strategy_events",
description=(
"Retrieve the event log for a strategy "
"(PnL updates, fills, errors, status changes)."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"event_type": {
"type": "string",
"description": "Filter by event type (optional): PNL_UPDATE, ORDER_FILLED, ERROR, etc."
},
"limit": {
"type": "integer",
"description": "Maximum number of events to return (default 50)",
"default": 50
}
},
"required": ["strategy_name"]
}
),
] ]
@@ -734,7 +901,11 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
metadata=arguments.get("metadata") metadata=arguments.get("metadata")
) )
content = [] content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"] meta_parts = [f"success: {result['success']}"]
if result.get('path'):
meta_parts.append(f"path: {result['path']}")
if result.get('error'):
meta_parts.append(f"error: {result['error']}")
if result.get("revision"): if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}") meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"): if result.get("validation") and not result["validation"].get("success"):
@@ -747,8 +918,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
logging.info(f"python_write '{arguments.get('name')}': returning {len(content)} items, {image_count} images") logging.info(f"python_write '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else: else:
logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})") logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
if result.get("success") and arguments.get("category") == "indicator": if result.get("success"):
_upsert_indicator_type(workspace_store, category_manager, arguments.get("name", "")) _upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
cleanup_extra_packages(get_data_dir(), _get_env_yml())
return content return content
elif name == "python_edit": elif name == "python_edit":
result = category_manager.edit( result = category_manager.edit(
@@ -760,7 +932,11 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
metadata=arguments.get("metadata") metadata=arguments.get("metadata")
) )
content = [] content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"] meta_parts = [f"success: {result['success']}"]
if result.get('path'):
meta_parts.append(f"path: {result['path']}")
if result.get('error'):
meta_parts.append(f"error: {result['error']}")
if result.get("revision"): if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}") meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"): if result.get("validation") and not result["validation"].get("success"):
@@ -773,8 +949,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
logging.info(f"python_edit '{arguments.get('name')}': returning {len(content)} items, {image_count} images") logging.info(f"python_edit '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else: else:
logging.info(f"python_edit '{arguments.get('name')}': no execution result") logging.info(f"python_edit '{arguments.get('name')}': no execution result")
if result.get("success") and arguments.get("category") == "indicator": if result.get("success"):
_upsert_indicator_type(workspace_store, category_manager, arguments.get("name", "")) _upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
cleanup_extra_packages(get_data_dir(), _get_env_yml())
return content return content
elif name == "python_read": elif name == "python_read":
return category_manager.read( return category_manager.read(
@@ -808,13 +985,28 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
meta_parts.append(f"error: {result['error']}") meta_parts.append(f"error: {result['error']}")
if result.get("validation") and not result["validation"].get("success"): if result.get("validation") and not result["validation"].get("success"):
meta_parts.append(f"validation errors: {result['validation'].get('errors', [])}") meta_parts.append(f"validation errors: {result['validation'].get('errors', [])}")
if result.get("success"):
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
return [TextContent(type="text", text="\n".join(meta_parts))] return [TextContent(type="text", text="\n".join(meta_parts))]
elif name == "python_delete":
result = category_manager.delete(
category=arguments.get("category", ""),
name=arguments.get("name", "")
)
if result.get("success"):
_remove_type(workspace_store, arguments.get("category", ""), arguments.get("name", ""))
cleanup_result = cleanup_extra_packages(get_data_dir(), _get_env_yml())
if cleanup_result.get("removed"):
result["packages_removed"] = cleanup_result["removed"]
parts = [f"success: {result['success']}"]
for k in ("category", "name", "revision", "packages_removed", "error"):
if result.get(k):
parts.append(f"{k}: {result[k]}")
return [TextContent(type="text", text="\n".join(parts))]
elif name == "conda_sync": elif name == "conda_sync":
# Get environment.yml path relative to main.py
env_yml = Path(__file__).parent / "environment.yml"
return sync_packages( return sync_packages(
data_dir=get_data_dir(), data_dir=get_data_dir(),
environment_yml=env_yml if env_yml.exists() else None environment_yml=_get_env_yml()
) )
elif name == "conda_install": elif name == "conda_install":
return install_packages(arguments.get("packages", [])) return install_packages(arguments.get("packages", []))
@@ -837,7 +1029,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
parameters=arguments.get("parameters") or {}, parameters=arguments.get("parameters") or {},
) )
elif name == "backtest_strategy": elif name == "backtest_strategy":
return await backtest_strategy( result = await backtest_strategy(
strategy_name=arguments.get("strategy_name", ""), strategy_name=arguments.get("strategy_name", ""),
feeds=arguments.get("feeds", []), feeds=arguments.get("feeds", []),
from_time=arguments.get("from_time"), from_time=arguments.get("from_time"),
@@ -845,6 +1037,26 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
initial_capital=float(arguments.get("initial_capital", 10_000.0)), initial_capital=float(arguments.get("initial_capital", 10_000.0)),
paper=bool(arguments.get("paper", True)), paper=bool(arguments.get("paper", True)),
) )
# Persist backtest to DB (non-fatal)
try:
payload = json.loads(result[0].text) if result and isinstance(result[0], TextContent) else {}
if payload and "summary" in payload:
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
await db.insert_backtest(
strategy_name=arguments.get("strategy_name", ""),
from_time=arguments.get("from_time"),
to_time=arguments.get("to_time"),
initial_capital=float(arguments.get("initial_capital", 10_000.0)),
feeds=arguments.get("feeds", []),
summary=payload.get("summary", {}),
statistics=payload.get("statistics", {}),
trades=payload.get("trades", []),
equity_curve=payload.get("equity_curve", []),
)
except Exception as _e:
logging.debug("Failed to persist backtest results: %s", _e)
return result
elif name == "activate_strategy": elif name == "activate_strategy":
return await activate_strategy( return await activate_strategy(
strategy_name=arguments.get("strategy_name", ""), strategy_name=arguments.get("strategy_name", ""),
@@ -858,6 +1070,31 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
) )
elif name == "list_active_strategies": elif name == "list_active_strategies":
return await list_active_strategies() return await list_active_strategies()
elif name == "get_backtest_results":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
results = await db.get_backtests(
strategy_name=arguments.get("strategy_name", ""),
limit=int(arguments.get("limit", 5)),
)
return [TextContent(type="text", text=json.dumps({"backtest_runs": results}))]
elif name == "get_strategy_trades":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
trades = await db.get_trades(
strategy_name=arguments.get("strategy_name", ""),
limit=int(arguments.get("limit", 100)),
)
return [TextContent(type="text", text=json.dumps({"trades": trades}))]
elif name == "get_strategy_events":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
events = await db.get_events(
strategy_name=arguments.get("strategy_name", ""),
event_type=arguments.get("event_type"),
limit=int(arguments.get("limit", 50)),
)
return [TextContent(type="text", text=json.dumps({"events": events}))]
else: else:
raise ValueError(f"Unknown tool: {name}") raise ValueError(f"Unknown tool: {name}")
@@ -909,6 +1146,7 @@ class UserContainer:
self.event_publisher: Optional[EventPublisher] = None self.event_publisher: Optional[EventPublisher] = None
self.mcp_server: Optional[Server] = None self.mcp_server: Optional[Server] = None
self.data_api: Optional[DataAPIImpl] = None self.data_api: Optional[DataAPIImpl] = None
self.event_bridge: Optional[StrategyEventBridge] = None
self.running = False self.running = False
async def start(self) -> None: async def start(self) -> None:
@@ -933,6 +1171,7 @@ class UserContainer:
s3_endpoint=s3_cfg.get("s3_endpoint") or secrets.get("s3_endpoint"), s3_endpoint=s3_cfg.get("s3_endpoint") or secrets.get("s3_endpoint"),
s3_access_key=s3_cfg.get("s3_access_key") or secrets.get("s3_access_key"), s3_access_key=s3_cfg.get("s3_access_key") or secrets.get("s3_access_key"),
s3_secret_key=s3_cfg.get("s3_secret_key") or secrets.get("s3_secret_key"), s3_secret_key=s3_cfg.get("s3_secret_key") or secrets.get("s3_secret_key"),
s3_region=s3_cfg.get("s3_region") or secrets.get("s3_region"),
) )
await self.data_api.start() await self.data_api.start()
set_api(API(charting=ChartingAPIImpl(), data=self.data_api)) set_api(API(charting=ChartingAPIImpl(), data=self.data_api))
@@ -965,6 +1204,23 @@ class UserContainer:
delivery=DeliverySpec.active_or_telegram(), delivery=DeliverySpec.active_or_telegram(),
)) ))
# Initialize strategy lifecycle manager (sets up DB + worktrees dir)
strategy_lifecycle = get_strategy_lifecycle(self.config.data_dir)
await strategy_lifecycle.initialize()
# Start strategy event bridge (PULL socket for subprocess events)
self.event_bridge = StrategyEventBridge(
event_publisher=self.event_publisher,
strategy_lifecycle=strategy_lifecycle,
)
await self.event_bridge.start()
strategy_lifecycle._bridge = self.event_bridge
strategy_lifecycle._lifecycle = get_lifecycle_manager()
logging.info("Strategy event bridge started")
# Resume any strategies that were running before container restart
await strategy_lifecycle.resume_running()
# Create MCP server # Create MCP server
self.mcp_server = create_mcp_server(self.config, self.event_publisher) self.mcp_server = create_mcp_server(self.config, self.event_publisher)
@@ -998,6 +1254,20 @@ class UserContainer:
delivery=DeliverySpec.active_or_telegram(), delivery=DeliverySpec.active_or_telegram(),
)) ))
# Stop running strategies gracefully
try:
from dexorder.strategy.lifecycle import get_strategy_lifecycle
strategy_lifecycle = get_strategy_lifecycle()
await strategy_lifecycle.shutdown()
logging.info("Strategy lifecycle manager stopped")
except Exception as e:
logging.warning("Error stopping strategy lifecycle: %s", e)
# Stop event bridge
if self.event_bridge:
await self.event_bridge.stop()
logging.info("Strategy event bridge stopped")
# Stop subsystems # Stop subsystems
if self.data_api: if self.data_api:
await self.data_api.stop() await self.data_api.stop()

View File

@@ -1,30 +0,0 @@
from setuptools import setup, find_packages
setup(
name="dexorder-sandbox",
version="0.1.0",
description="Dexorder Trading Platform Sandbox",
packages=find_packages(),
python_requires=">=3.9",
install_requires=[
"pyiceberg>=0.6.0",
"pyarrow>=14.0.0",
"pandas>=2.0.0",
"pyzmq>=25.0.0",
"protobuf>=4.25.0",
"pyyaml>=6.0",
"aiofiles>=23.0.0",
"mcp>=1.0.0",
"jsonpatch>=1.33",
"starlette>=0.27.0",
"uvicorn>=0.27.0",
"sse-starlette>=1.6.0",
"matplotlib>=3.7.0",
],
extras_require={
"dev": [
"pytest>=7.0.0",
"pytest-asyncio>=0.21.0",
]
},
)

View File

@@ -59,6 +59,18 @@ const addToolCallBubble = (label: string) => {
}] }]
} }
const appendToolCallStatus = (status: string) => {
if (!toolCallMessageId) return
const idx = messages.value.findIndex(m => m._id === toolCallMessageId)
if (idx !== -1) {
messages.value[idx] = {
...messages.value[idx],
content: messages.value[idx].content + `\n↳ ${status}`
}
messages.value = [...messages.value]
}
}
const removeToolCallBubble = () => { const removeToolCallBubble = () => {
if (toolCallMessageId) { if (toolCallMessageId) {
messages.value = messages.value.filter(m => m._id !== toolCallMessageId) messages.value = messages.value.filter(m => m._id !== toolCallMessageId)
@@ -76,11 +88,47 @@ const streamingImages = ref<any[]>([])
const handleMessage = (data: WebSocketMessage) => { const handleMessage = (data: WebSocketMessage) => {
console.log('[ChatPanel] Received message:', data) console.log('[ChatPanel] Received message:', data)
if (data.type === 'conversation_history') {
messages.value = (data.messages as any[]).map((m: any) => {
const ts = new Date(m.timestamp / 1000) // microseconds → ms
const files = (m.files ?? []).map((b: any) => ({
name: `image_${b.id}.png`,
size: 0,
type: b.mimeType.split('/')[1] ?? 'png',
url: `data:${b.mimeType};base64,${b.data}`,
preview: `data:${b.mimeType};base64,${b.data}`,
}))
return {
_id: m.id,
content: m.content,
senderId: m.role === 'user' ? CURRENT_USER_ID : AGENT_ID,
timestamp: ts.toTimeString().split(' ')[0].slice(0, 5),
date: ts.toLocaleDateString(),
saved: true,
distributed: true,
seen: true,
files,
}
})
messagesLoaded.value = true
return
}
if (data.type === 'agent_tool_call') { if (data.type === 'agent_tool_call') {
addToolCallBubble(data.label ?? data.toolName ?? 'Tool call...') addToolCallBubble(data.label ?? data.toolName ?? 'Tool call...')
return return
} }
if (data.type === 'subagent_tool_call') {
appendToolCallStatus(data.toolName ?? data.label ?? 'tool')
return
}
if (data.type === 'subagent_chunk') {
// Subagent final text — not shown separately; the main agent will incorporate it in its response
return
}
if (data.type === 'image') { if (data.type === 'image') {
// Handle image message - attach to current streaming message or create standalone // Handle image message - attach to current streaming message or create standalone
console.log('[ChatPanel] Processing image message') console.log('[ChatPanel] Processing image message')

View File

@@ -3,6 +3,24 @@ import * as jsonpatch from 'fast-json-patch';
import type { BackendMessage, FrontendMessage, HelloMessage, PatchMessage } from '../types/sync'; import type { BackendMessage, FrontendMessage, HelloMessage, PatchMessage } from '../types/sync';
import { wsManager } from './useWebSocket'; import { wsManager } from './useWebSocket';
function deepReplace(target: Record<string, any>, source: Record<string, any>) {
for (const key of Object.keys(target)) {
if (!(key in source)) {
delete target[key]
}
}
for (const [key, value] of Object.entries(source)) {
if (
value !== null && typeof value === 'object' && !Array.isArray(value) &&
target[key] !== null && typeof target[key] === 'object' && !Array.isArray(target[key])
) {
deepReplace(target[key], value)
} else {
target[key] = value
}
}
}
export function useStateSync(stores: Record<string, Store>) { export function useStateSync(stores: Record<string, Store>) {
console.log('[StateSync] Initializing with stores:', Object.keys(stores)); console.log('[StateSync] Initializing with stores:', Object.keys(stores));
@@ -35,7 +53,7 @@ export function useStateSync(stores: Record<string, Store>) {
if (store) { if (store) {
console.log('[StateSync] Applying snapshot state:', msg.state); console.log('[StateSync] Applying snapshot state:', msg.state);
isApplyingBackendPatch[msg.store] = true; isApplyingBackendPatch[msg.store] = true;
store.$patch(msg.state); store.$patch((state) => deepReplace(state as Record<string, any>, msg.state as Record<string, any>));
// Update previousState to stay in sync // Update previousState to stay in sync
previousStates[msg.store] = JSON.parse(JSON.stringify(store.$state)); previousStates[msg.store] = JSON.parse(JSON.stringify(store.$state));
isApplyingBackendPatch[msg.store] = false; isApplyingBackendPatch[msg.store] = false;
@@ -64,7 +82,7 @@ export function useStateSync(stores: Record<string, Store>) {
const newState = jsonpatch.applyPatch(currentState, msg.patch, false, false).newDocument; const newState = jsonpatch.applyPatch(currentState, msg.patch, false, false).newDocument;
console.log('[StateSync] New state after patch:', newState); console.log('[StateSync] New state after patch:', newState);
isApplyingBackendPatch[msg.store] = true; isApplyingBackendPatch[msg.store] = true;
store.$patch(newState); store.$patch((state) => deepReplace(state as Record<string, any>, newState as Record<string, any>));
// Update previousState to stay in sync // Update previousState to stay in sync
previousStates[msg.store] = JSON.parse(JSON.stringify(store.$state)); previousStates[msg.store] = JSON.parse(JSON.stringify(store.$state));
isApplyingBackendPatch[msg.store] = false; isApplyingBackendPatch[msg.store] = false;

View File

@@ -123,8 +123,9 @@ class WebSocketManager {
this.statusMessage.value = '' this.statusMessage.value = ''
console.log('WebSocket disconnected:', event.code, event.reason) console.log('WebSocket disconnected:', event.code, event.reason)
// Attempt to reconnect if we have a token // Attempt to reconnect if we have a token and it wasn't an intentional close.
if (this.token && !event.wasClean) { // Check code instead of wasClean: code 1005 has wasClean=true but still needs retry.
if (this.token && event.code !== 1000 && event.code !== 1001) {
this.scheduleReconnect() this.scheduleReconnect()
} }
} }