diff --git a/bin/create-user b/bin/create-user index 236eec27..39203cd0 100755 --- a/bin/create-user +++ b/bin/create-user @@ -137,13 +137,13 @@ echo -e "${GREEN}User ID: $USER_ID${NC}" # Build license JSON case "$LICENSE_TYPE" in enterprise) - LICENSE_JSON='{"licenseType":"enterprise","features":{"maxIndicators":200,"maxStrategies":100,"maxBacktestDays":1825,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":20,"maxMessagesPerDay":10000,"maxTokensPerMessage":32768,"rateLimitPerMinute":300},"k8sResources":{"memoryRequest":"1Gi","memoryLimit":"4Gi","cpuRequest":"500m","cpuLimit":"4000m","storage":"50Gi","tmpSizeLimit":"1Gi","enableIdleShutdown":true,"idleTimeoutMinutes":120},"preferredModel":{"provider":"anthropic","model":"claude-opus-4-6","temperature":0.7}}' + LICENSE_JSON='{"licenseType":"enterprise","features":{"maxIndicators":999,"maxStrategies":999,"maxBacktestDays":3650,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":20,"maxMessagesPerDay":10000,"maxTokensPerMessage":32768,"rateLimitPerMinute":300},"k8sResources":{"memoryRequest":"1Gi","memoryLimit":"8Gi","cpuRequest":"500m","cpuLimit":"4000m","storage":"50Gi","tmpSizeLimit":"512Mi","enableIdleShutdown":false,"idleTimeoutMinutes":0}}' ;; free) - LICENSE_JSON='{"licenseType":"free","features":{"maxIndicators":10,"maxStrategies":3,"maxBacktestDays":30,"realtimeData":false,"customExecutors":false,"apiAccess":false},"resourceLimits":{"maxConcurrentSessions":1,"maxMessagesPerDay":100,"maxTokensPerMessage":4096,"rateLimitPerMinute":20},"k8sResources":{"memoryRequest":"256Mi","memoryLimit":"512Mi","cpuRequest":"100m","cpuLimit":"500m","storage":"2Gi","tmpSizeLimit":"128Mi","enableIdleShutdown":true,"idleTimeoutMinutes":30},"preferredModel":{"provider":"anthropic","model":"claude-haiku-4-5-20251001","temperature":0.7}}' + LICENSE_JSON='{"licenseType":"free","features":{"maxIndicators":5,"maxStrategies":3,"maxBacktestDays":30,"realtimeData":false,"customExecutors":false,"apiAccess":false},"resourceLimits":{"maxConcurrentSessions":1,"maxMessagesPerDay":100,"maxTokensPerMessage":4096,"rateLimitPerMinute":10},"k8sResources":{"memoryRequest":"256Mi","memoryLimit":"8Gi","cpuRequest":"100m","cpuLimit":"500m","storage":"1Gi","tmpSizeLimit":"128Mi","enableIdleShutdown":true,"idleTimeoutMinutes":15}}' ;; pro|*) - LICENSE_JSON='{"licenseType":"pro","features":{"maxIndicators":50,"maxStrategies":20,"maxBacktestDays":365,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":5,"maxMessagesPerDay":1000,"maxTokensPerMessage":8192,"rateLimitPerMinute":60},"k8sResources":{"memoryRequest":"512Mi","memoryLimit":"2Gi","cpuRequest":"250m","cpuLimit":"2000m","storage":"10Gi","tmpSizeLimit":"256Mi","enableIdleShutdown":true,"idleTimeoutMinutes":60},"preferredModel":{"provider":"anthropic","model":"claude-sonnet-4-6","temperature":0.7}}' + LICENSE_JSON='{"licenseType":"pro","features":{"maxIndicators":50,"maxStrategies":20,"maxBacktestDays":365,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":5,"maxMessagesPerDay":1000,"maxTokensPerMessage":8192,"rateLimitPerMinute":60},"k8sResources":{"memoryRequest":"512Mi","memoryLimit":"8Gi","cpuRequest":"250m","cpuLimit":"2000m","storage":"10Gi","tmpSizeLimit":"256Mi","enableIdleShutdown":false,"idleTimeoutMinutes":0}}' ;; esac diff --git a/deploy/k8s/dev/configs/flink-config.yaml b/deploy/k8s/dev/configs/flink-config.yaml index 374601a4..d5f17956 100644 --- a/deploy/k8s/dev/configs/flink-config.yaml +++ b/deploy/k8s/dev/configs/flink-config.yaml @@ -2,7 +2,7 @@ # ZeroMQ bind address and ports zmq_bind_address: "tcp://*" -zmq_ingestor_work_queue_port: 5555 +zmq_ingestor_work_queue_port: 5567 zmq_market_data_pub_port: 5558 # Notification endpoints diff --git a/deploy/k8s/prod/configs/flink-config.yaml b/deploy/k8s/prod/configs/flink-config.yaml index 73f27e27..ab561b50 100644 --- a/deploy/k8s/prod/configs/flink-config.yaml +++ b/deploy/k8s/prod/configs/flink-config.yaml @@ -2,7 +2,7 @@ # ZeroMQ bind address and ports zmq_bind_address: "tcp://*" -zmq_ingestor_work_queue_port: 5555 +zmq_ingestor_work_queue_port: 5567 zmq_market_data_pub_port: 5558 # Notification endpoints (internal Flink task manager → job manager path) diff --git a/deploy/k8s/prod/configs/gateway-config.yaml b/deploy/k8s/prod/configs/gateway-config.yaml index e9bc6d11..77edef4a 100644 --- a/deploy/k8s/prod/configs/gateway-config.yaml +++ b/deploy/k8s/prod/configs/gateway-config.yaml @@ -21,6 +21,30 @@ data: model_provider: deepinfra model: zai-org/GLM-5 + # License tier model configuration + license_models: + # Free tier models + free: + default: zai-org/GLM-5 + cost_optimized: zai-org/GLM-5 + complex: zai-org/GLM-5 + allowed_models: + - zai-org/GLM-5 + + # Pro tier models + pro: + default: zai-org/GLM-5 + cost_optimized: zai-org/GLM-5 + complex: zai-org/GLM-5 + blocked_models: + - Qwen/Qwen3-235B-A22B-Instruct-2507 + + # Enterprise tier models + enterprise: + default: zai-org/GLM-5 + cost_optimized: zai-org/GLM-5 + complex: Qwen/Qwen3-235B-A22B-Instruct-2507 + # Kubernetes configuration kubernetes: namespace: sandbox @@ -40,10 +64,17 @@ data: url: http://qdrant:6333 collection: gateway_memory + # Agent configuration + agent: + # Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end + conversation_history_limit: 20 + # Iceberg (for durable storage via REST catalog) iceberg: catalog_uri: http://iceberg-catalog:8181 namespace: gateway + ohlc_catalog_uri: http://iceberg-catalog:8181 + ohlc_namespace: trading s3_endpoint: http://minio:9000 conversations_bucket: warehouse diff --git a/deploy/k8s/prod/configs/ingestor-config.yaml b/deploy/k8s/prod/configs/ingestor-config.yaml index 6d4bfd9c..57b3541d 100644 --- a/deploy/k8s/prod/configs/ingestor-config.yaml +++ b/deploy/k8s/prod/configs/ingestor-config.yaml @@ -10,12 +10,25 @@ supported_exchanges: - COINBASE - KRAKEN +# Per-exchange work slot capacity. +# Each slot is one concurrent job. historical_slots limits parallel OHLC fetches; +# realtime_slots limits concurrent tick subscriptions. +exchange_capacity: + BINANCE: + historical_slots: 1 + realtime_slots: 5 + COINBASE: + historical_slots: 1 + realtime_slots: 4 + KRAKEN: + historical_slots: 1 + realtime_slots: 3 + # Kafka configuration kafka_brokers: - kafka:9092 # Worker configuration -max_concurrent: 10 poll_interval_ms: 10000 # Logging diff --git a/deploy/k8s/prod/kustomization.yaml b/deploy/k8s/prod/kustomization.yaml index c6c70bac..c31152aa 100644 --- a/deploy/k8s/prod/kustomization.yaml +++ b/deploy/k8s/prod/kustomization.yaml @@ -15,9 +15,8 @@ resources: - infrastructure.yaml # Sandbox namespace resources (go to sandbox namespace, not ai) - sandbox-config.yaml - # gateway-config ConfigMap is intentionally excluded from kustomize. - # It contains an op:// reference for the DB password. Apply via: - # bin/config-update prod gateway-config + # gateway-config ConfigMap (database URL is in secrets, not here) + - configs/gateway-config.yaml patches: - path: patch-gateway-rbac-subject.yaml diff --git a/doc/plan.md b/doc/plan.md index 3102409a..8808c718 100644 --- a/doc/plan.md +++ b/doc/plan.md @@ -1,5 +1,6 @@ # Development Plan +* Single conversation in gateway * Realtime data * Triggers * Strategy UI @@ -8,3 +9,8 @@ * User secrets * Live Execution * Sandbox <=> Dexorder auth +* Chat channels +* MCP channel (with or without images) +* TradingView indicator import tool +* Trader preferences tool +* \ No newline at end of file diff --git a/doc/prod_deployment.md b/doc/prod_deployment.md index 29f9d9d7..0c256154 100644 --- a/doc/prod_deployment.md +++ b/doc/prod_deployment.md @@ -30,6 +30,16 @@ This script (hardcoded to `--context=prod`) performs: > **Secrets are NOT updated by this script.** Run `bin/secret-update prod` separately if secrets have changed. +### Post-deploy: refresh user licenses + +After any deploy that changes license tier templates (`gateway/src/types/user.ts`), run: + +```bash +bin/create-all-users prod +``` + +This upserts all alpha users and re-applies the current tier template to their `user_licenses` row. Safe to run on an existing database — it will not delete users or lose data. New sandbox deployments will pick up the updated resource limits on next login. + --- ## Full Deploy with Iceberg Schema Wipe @@ -137,3 +147,20 @@ kubectl --context prod -n ai logs deployment/gateway --tail=100 ### Gateway shows `42P01` errors but pod is running The gateway does not auto-migrate on startup. The schema file must be applied manually after any database recreation. A gateway restart alone will not fix this. + +### Gateway CrashLoopBackOff — `ECONNREFUSED postgresql://localhost/dexorder` + +**Symptom:** New gateway pod crashes immediately with `Database connection failed` and logs show `databaseUrl: "postgresql://localhost/dexorder"`. + +**Cause:** The gateway reads `database.url` from `config.yaml` (via `configData`). If that key is absent, it falls back to the localhost default — even if `secrets.yaml` has `database.url`. The code checks `configData.database?.url || secretsData.database?.url || ...` (as of `c8fa99c`), so both sources work, but both files must be present and correctly mounted. + +**What to check:** +1. Does the `gateway-config` ConfigMap have a `database:` section? (It should not — credentials belong in secrets as of the nautilus branch.) +2. Does `gateway-secrets` have `database.url`? Verify: `kubectl --context prod -n ai get secret gateway-secrets -o jsonpath='{.data.secrets\.yaml}' | base64 -d` +3. If the secret is missing the database section, run `bin/secret-update prod` (requires 1Password desktop to be unlocked — must run interactively, not via pipe). + +### `bin/secret-update prod` fails with "authorization prompt dismissed" + +1Password's `op inject` requires interactive desktop authentication. Running it via `echo "yes" | bin/secret-update prod` or any background/piped invocation will fail silently (the script prints `✓` even though `kubectl apply` received empty input). + +**Fix:** Run `bin/secret-update prod` in an interactive terminal with 1Password unlocked. diff --git a/gateway/src/auth/authenticator.ts b/gateway/src/auth/authenticator.ts index ee53f38f..86cad79f 100644 --- a/gateway/src/auth/authenticator.ts +++ b/gateway/src/auth/authenticator.ts @@ -129,7 +129,7 @@ export class Authenticator { 'Container is ready' ); - const sessionId = `tg_${telegramUserId}_${Date.now()}`; + const sessionId = `tg_${telegramUserId}`; return { userId, diff --git a/gateway/src/channels/telegram-handler.ts b/gateway/src/channels/telegram-handler.ts index e393ea42..0197e93c 100644 --- a/gateway/src/channels/telegram-handler.ts +++ b/gateway/src/channels/telegram-handler.ts @@ -248,7 +248,7 @@ export class TelegramHandler { * Clean up sessions that have been idle longer than maxAgeMs. * Triggers Iceberg flush for each expired session via harness.cleanup(). */ - async cleanupSessions(maxAgeMs = 30 * 60 * 1000): Promise { + async cleanupSessions(maxAgeMs = 2 * 60 * 60 * 1000): Promise { const now = Date.now(); const expired: string[] = []; diff --git a/gateway/src/channels/websocket-handler.ts b/gateway/src/channels/websocket-handler.ts index 8d4c449f..a3d9f2f2 100644 --- a/gateway/src/channels/websocket-handler.ts +++ b/gateway/src/channels/websocket-handler.ts @@ -511,13 +511,18 @@ export class WebSocketHandler { hasSymbolIndexService: !!symbolIndexService }, 'Service availability'); + const requestId = payload.request_id || randomUUID(); + if (!ohlcService && !symbolIndexService) { - logger.warn('No datafeed services available'); + logger.warn({ requestId }, 'No datafeed services available yet'); + socket.send(JSON.stringify({ + type: 'error', + request_id: requestId, + error_message: 'Services initializing, please retry', + })); return; } - const requestId = payload.request_id || randomUUID(); - try { switch (payload.type) { case 'get_config': { diff --git a/gateway/src/harness/memory/checkpoint-saver.ts b/gateway/src/harness/memory/checkpoint-saver.ts index 7e05df4b..35e19fc5 100644 --- a/gateway/src/harness/memory/checkpoint-saver.ts +++ b/gateway/src/harness/memory/checkpoint-saver.ts @@ -14,7 +14,7 @@ import type { FastifyBaseLogger } from 'fastify'; * Iceberg for durable storage with time-travel capabilities. */ export class TieredCheckpointSaver extends BaseCheckpointSaver { - private readonly HOT_TTL_SECONDS = 3600; // 1 hour + private readonly HOT_TTL_SECONDS = 86400; // 24 hours private readonly KEY_PREFIX = 'ckpt:'; constructor( diff --git a/gateway/src/harness/memory/conversation-store.ts b/gateway/src/harness/memory/conversation-store.ts index 1e04fd4f..d96e7a59 100644 --- a/gateway/src/harness/memory/conversation-store.ts +++ b/gateway/src/harness/memory/conversation-store.ts @@ -30,7 +30,7 @@ export interface StoredMessage { */ export class ConversationStore { private readonly HOT_MESSAGE_LIMIT = 50; // Redis buffer ceiling - private readonly HOT_TTL_SECONDS = 3600; // 1 hour + private readonly HOT_TTL_SECONDS = 86400; // 24 hours constructor( private redis: Redis, diff --git a/ingestor/src/index.js b/ingestor/src/index.js index 5bbb8efb..bdb86900 100644 --- a/ingestor/src/index.js +++ b/ingestor/src/index.js @@ -372,7 +372,10 @@ class IngestorWorker { if (candles.length > 0) { const metadata = { request_id: requestId, client_id, ticker, period_seconds, start_time, end_time }; - const PAGE_SIZE = 1000; + // 8000 rows/page: each OHLC row is ~77 bytes typical (9 populated fields as + // protobuf varints + ticker string). Worst-case is ~124 bytes, so 8000 rows + // stays safely under Kafka's 1MB message limit in all realistic scenarios. + const PAGE_SIZE = 8000; for (let i = 0; i < candles.length; i += PAGE_SIZE) { const page = candles.slice(i, i + PAGE_SIZE); const isLastPage = (i + PAGE_SIZE) >= candles.length; diff --git a/ingestor/src/symbol-metadata-generator.js b/ingestor/src/symbol-metadata-generator.js index 1c2cb9a3..eda81e68 100644 --- a/ingestor/src/symbol-metadata-generator.js +++ b/ingestor/src/symbol-metadata-generator.js @@ -249,7 +249,6 @@ export class SymbolMetadataGenerator { if (!this.publishedSymbols.has(key)) { uniqueMetadata.push(metadata); - this.publishedSymbols.add(key); } else { duplicateCount++; } @@ -275,6 +274,12 @@ export class SymbolMetadataGenerator { await this.kafkaProducer.writeMarketMetadata(topic, messages); + // Mark as published only after successful Kafka write + for (const metadata of uniqueMetadata) { + const key = `${metadata.marketId}.${metadata.exchangeId}`; + this.publishedSymbols.add(key); + } + this.logger.info( { count: messages.length, duplicateCount, topic }, 'Wrote symbol metadata to Kafka' diff --git a/web/src/composables/useTradingViewDatafeed.ts b/web/src/composables/useTradingViewDatafeed.ts index 41c0c171..4c9660f8 100644 --- a/web/src/composables/useTradingViewDatafeed.ts +++ b/web/src/composables/useTradingViewDatafeed.ts @@ -45,6 +45,9 @@ export class WebSocketDatafeed implements IBasicDataFeed { private configuration: DatafeedConfiguration | null = null private messageHandler: MessageHandler private symbolDenominators: Map = new Map() // Track denominators per symbol + // Tracks the last bar time (ms) returned by getBars per "symbolKey_periodSeconds". + // bar_updates with time < this watermark are stale and already covered by history. + private lastBarTimes: Map = new Map() constructor() { // Use the shared WebSocket connection (managed by App.vue authentication) @@ -118,6 +121,14 @@ export class WebSocketDatafeed implements IBasicDataFeed { const symbolKey = subscription.symbolInfo.ticker || subscription.symbolInfo.name const denoms = this.symbolDenominators.get(symbolKey) || { tick: 1, base: 1 } + // Drop bars already covered by getBars history to prevent time-order violations + const barTimeMs = message.bar.time * 1000 + const barKey = `${symbolKey}_${message.period_seconds}` + const watermark = this.lastBarTimes.get(barKey) ?? 0 + if (barTimeMs < watermark) { + return + } + const bar: Bar = { time: message.bar.time * 1000, // Convert to milliseconds open: parseFloat(message.bar.open) / denoms.tick, @@ -284,6 +295,16 @@ export class WebSocketDatafeed implements IBasicDataFeed { bars.sort((a, b) => a.time - b.time) + // Update last-bar watermark so bar_update handler can drop stale replays + if (bars.length > 0) { + const barKey = `${symbolKey}_${intervalToSeconds(resolution)}` + const newLast = bars[bars.length - 1].time + const prevLast = this.lastBarTimes.get(barKey) ?? 0 + if (newLast > prevLast) { + this.lastBarTimes.set(barKey, newLast) + } + } + console.log('[TradingView Datafeed] Scaled bar sample:', bars[0]) const meta: HistoryMetadata = {