From e99ef5d2dd3ac03edfefed6b78879d464f273ee5 Mon Sep 17 00:00:00 2001 From: Tim Olson Date: Wed, 11 Mar 2026 18:47:11 -0400 Subject: [PATCH] backend redesign --- .gitignore | 13 +- .idea/ai.iml | 8 +- {backend => backend.old}/config.yaml | 0 .../examples/ccxt_integration_example.py | 0 .../memory/chart_context.md | 0 .../memory/python_analysis.md | 0 .../memory/tradingview_shapes.md | 0 {backend => backend.old}/requirements-pre.txt | 0 {backend => backend.old}/requirements.txt | 0 .../soul/automation_agent.md | 0 {backend => backend.old}/soul/chart_agent.md | 0 {backend => backend.old}/soul/data_agent.md | 0 {backend => backend.old}/soul/main_agent.md | 0 .../soul/research_agent.md | 0 .../src/agent/__init__.py | 0 {backend => backend.old}/src/agent/core.py | 0 {backend => backend.old}/src/agent/memory.py | 0 {backend => backend.old}/src/agent/prompts.py | 0 {backend => backend.old}/src/agent/routers.py | 0 {backend => backend.old}/src/agent/session.py | 0 .../src/agent/subagent.py | 0 .../src/agent/tools/CHART_UTILS_README.md | 0 .../src/agent/tools/TRIGGER_TOOLS.md | 0 .../src/agent/tools/__init__.py | 0 .../src/agent/tools/chart_tools.py | 0 .../src/agent/tools/chart_utils.py | 0 .../src/agent/tools/chart_utils_example.py | 0 .../src/agent/tools/datasource_tools.py | 0 .../src/agent/tools/indicator_tools.py | 0 .../src/agent/tools/research_tools.py | 0 .../src/agent/tools/shape_tools.py | 0 .../src/agent/tools/sync_tools.py | 0 .../src/agent/tools/trigger_tools.py | 0 .../src/datasource/__init__.py | 0 .../src/datasource/adapters/__init__.py | 0 .../src/datasource/adapters/ccxt_adapter.py | 0 .../src/datasource/adapters/demo.py | 0 .../src/datasource/base.py | 0 .../src/datasource/registry.py | 0 .../src/datasource/schema.py | 0 .../src/datasource/subscription_manager.py | 0 .../src/datasource/websocket_handler.py | 0 .../src/datasource/websocket_protocol.py | 0 .../src/exchange_kernel/README.md | 0 .../src/exchange_kernel/__init__.py | 0 .../src/exchange_kernel/base.py | 0 .../src/exchange_kernel/events.py | 0 .../src/exchange_kernel/models.py | 0 .../src/exchange_kernel/state.py | 0 .../src/gateway/__init__.py | 0 .../src/gateway/channels/__init__.py | 0 .../src/gateway/channels/base.py | 0 .../src/gateway/channels/websocket.py | 0 {backend => backend.old}/src/gateway/hub.py | 0 .../src/gateway/protocol.py | 0 .../src/gateway/user_session.py | 0 .../src/indicator/__init__.py | 0 .../src/indicator/base.py | 0 .../src/indicator/custom_indicators.py | 0 .../src/indicator/pipeline.py | 0 .../src/indicator/registry.py | 0 .../src/indicator/schema.py | 0 .../src/indicator/talib_adapter.py | 0 .../src/indicator/tv_mapping.py | 0 {backend => backend.old}/src/main.py | 0 .../src/schema/chart_state.py | 0 .../src/schema/indicator.py | 0 .../src/schema/order_spec.py | 0 {backend => backend.old}/src/schema/shape.py | 0 .../src/secrets_manager/__init__.py | 0 .../src/secrets_manager/cli.py | 0 .../src/secrets_manager/crypto.py | 0 .../src/secrets_manager/store.py | 0 {backend => backend.old}/src/sync/protocol.py | 0 {backend => backend.old}/src/sync/registry.py | 0 .../src/trigger/PRIORITIES.md | 0 .../src/trigger/README.md | 0 .../src/trigger/__init__.py | 0 .../src/trigger/context.py | 0 .../src/trigger/coordinator.py | 0 .../src/trigger/handlers.py | 0 {backend => backend.old}/src/trigger/queue.py | 0 .../src/trigger/scheduler.py | 0 {backend => backend.old}/src/trigger/store.py | 0 {backend => backend.old}/src/trigger/types.py | 0 {backend => backend.old}/tests/__init__.py | 0 .../tests/datafeed_client_example.py | 0 .../tests/test_ccxt_datasource.py | 0 .../tests/test_datafeed_websocket.py | 0 .../tests/test_websocket.py | 0 bin/build-all | 17 + bin/config-update | 128 ++ bin/deploy | 19 +- bin/dev | 364 ++++ bin/secret-update | 117 ++ client-py/README.md | 259 +++ client-py/__init__.py | 3 + client-py/dexorder/__init__.py | 16 + client-py/dexorder/history_client.py | 296 ++++ client-py/dexorder/iceberg_client.py | 179 ++ client-py/dexorder/ohlc_client.py | 142 ++ client-py/setup.py | 23 + deploy/Dockerfile-alpine-backend | 38 - deploy/Dockerfile-backend | 65 - deploy/ingress.yaml | 38 - deploy/k8s/README.md | 287 ++++ deploy/{ => k8s/base}/backend.yaml | 0 deploy/k8s/base/ingress.yaml | 17 + deploy/k8s/base/init.yaml | 9 + deploy/k8s/base/kustomization.yaml | 5 + deploy/{ => k8s/base}/web.yaml | 0 deploy/k8s/dev/configs/flink-config.yaml | 40 + deploy/k8s/dev/configs/ingestor-config.yaml | 24 + deploy/k8s/dev/configs/relay-config.yaml | 19 + deploy/k8s/dev/infrastructure.yaml | 519 ++++++ deploy/k8s/dev/ingress-dev.yaml | 11 + deploy/k8s/dev/kustomization.yaml | 32 + .../k8s/dev/secrets/ai-secrets.yaml.example | 7 + .../dev/secrets/flink-secrets.yaml.example | 9 + .../dev/secrets/ingestor-secrets.yaml.example | 13 + .../k8s/dev/secrets/minio-secret.yaml.example | 8 + .../dev/secrets/postgres-secret.yaml.example | 7 + deploy/k8s/prod/configs/flink-config.yaml | 30 + deploy/k8s/prod/configs/ingestor-config.yaml | 24 + deploy/k8s/prod/configs/relay-config.yaml | 19 + deploy/k8s/prod/kustomization.yaml | 40 + deploy/k8s/prod/patches.yaml | 52 + .../k8s/prod/secrets/ai-secrets.yaml.example | 7 + .../secrets/ingestor-secrets.yaml.example | 13 + .../prod/secrets/minio-secret.yaml.example | 8 + .../prod/secrets/postgres-secret.yaml.example | 7 + {doc => doc.old}/auth.md | 0 {doc => doc.old}/data.md | 0 {doc => doc.old}/design.md | 0 {doc => doc.old}/libraries.md | 0 {doc => doc.old}/mvp.md | 2 +- doc.old/trendspider.md | 38 + doc/agent_redesign.md | 9 + doc/backend_redesign.md | 110 ++ doc/config.md | 18 + docker-compose.yaml | 139 ++ flink/.gitignore | 13 + flink/Dockerfile | 37 + flink/README.md | 77 + flink/config.example.yaml | 31 + flink/flink-cluster.yaml | 42 + flink/pom.xml | 250 +++ flink/secrets.example.yaml | 8 + .../com/dexorder/flink/TradingFlinkApp.java | 253 +++ .../com/dexorder/flink/config/AppConfig.java | 154 ++ .../flink/iceberg/SchemaInitializer.java | 155 ++ .../flink/ingestor/DataRequestMessage.java | 111 ++ .../flink/ingestor/DataResponseMessage.java | 91 + .../ingestor/IngestorControlChannel.java | 165 ++ .../ingestor/IngestorResponseListener.java | 172 ++ .../flink/ingestor/IngestorWorkQueue.java | 164 ++ .../com/dexorder/flink/kafka/TopicConfig.java | 60 + .../dexorder/flink/kafka/TopicManager.java | 224 +++ .../HistoryNotificationForwarder.java | 100 ++ .../HistoryNotificationFunction.java | 137 ++ .../HistoryNotificationPublisher.java | 130 ++ .../publisher/OHLCBatchDeserializer.java | 103 ++ .../flink/publisher/OHLCBatchWrapper.java | 175 ++ .../flink/sink/HistoricalBatchWriter.java | 117 ++ .../dexorder/flink/sink/IcebergOHLCSink.java | 121 ++ .../dexorder/flink/zmq/ZmqChannelManager.java | 286 ++++ .../resources/iceberg-schemas/ohlc_schema.sql | 54 + flink/src/main/resources/topics-dev.yaml | 29 + flink/src/main/resources/topics.yaml | 29 + flink/values.yaml | 8 + iceberg/README.md | 138 ++ iceberg/ohlc_schema.sql | 53 + ingestor/.gitignore | 8 + ingestor/Dockerfile | 30 + ingestor/README.md | 226 +++ ingestor/config.example.yaml | 24 + ingestor/package.json | 33 + ingestor/src/ccxt-fetcher.js | 248 +++ ingestor/src/index.js | 411 +++++ ingestor/src/kafka-producer.js | 270 +++ ingestor/src/realtime-poller.js | 217 +++ ingestor/src/zmq-client.js | 116 ++ kafka/README.md | 74 + kafka/kafka-cluster.yaml | 85 + kafka/kafka-metrics-config.yaml | 44 + kafka/values.yaml | 9 + protobuf/ingestor.proto | 329 ++++ protobuf/market.proto | 19 + protobuf/ohlc.proto | 61 + protobuf/protocol.md | 168 ++ protobuf/tick.proto | 48 + relay/.gitignore | 7 + relay/Cargo.lock | 1466 +++++++++++++++++ relay/Cargo.toml | 27 + relay/Dockerfile | 52 + relay/README.md | 238 +++ relay/build.rs | 16 + relay/config.example.yaml | 19 + relay/src/config.rs | 104 ++ relay/src/main.rs | 47 + relay/src/proto.rs | 3 + relay/src/relay.rs | 323 ++++ test/README.md | 109 ++ test/history_client/Dockerfile | 23 + test/history_client/README.md | 46 + test/history_client/client.py | 200 +++ test/history_client/client_async.py | 308 ++++ test/history_client/client_ohlc_api.py | 126 ++ test/history_client/run-test.sh | 29 + deploy/Dockerfile-web => web/Dockerfile | 4 +- 210 files changed, 12147 insertions(+), 155 deletions(-) rename {backend => backend.old}/config.yaml (100%) rename {backend => backend.old}/examples/ccxt_integration_example.py (100%) rename {backend => backend.old}/memory/chart_context.md (100%) rename {backend => backend.old}/memory/python_analysis.md (100%) rename {backend => backend.old}/memory/tradingview_shapes.md (100%) rename {backend => backend.old}/requirements-pre.txt (100%) rename {backend => backend.old}/requirements.txt (100%) rename {backend => backend.old}/soul/automation_agent.md (100%) rename {backend => backend.old}/soul/chart_agent.md (100%) rename {backend => backend.old}/soul/data_agent.md (100%) rename {backend => backend.old}/soul/main_agent.md (100%) rename {backend => backend.old}/soul/research_agent.md (100%) rename {backend => backend.old}/src/agent/__init__.py (100%) rename {backend => backend.old}/src/agent/core.py (100%) rename {backend => backend.old}/src/agent/memory.py (100%) rename {backend => backend.old}/src/agent/prompts.py (100%) rename {backend => backend.old}/src/agent/routers.py (100%) rename {backend => backend.old}/src/agent/session.py (100%) rename {backend => backend.old}/src/agent/subagent.py (100%) rename {backend => backend.old}/src/agent/tools/CHART_UTILS_README.md (100%) rename {backend => backend.old}/src/agent/tools/TRIGGER_TOOLS.md (100%) rename {backend => backend.old}/src/agent/tools/__init__.py (100%) rename {backend => backend.old}/src/agent/tools/chart_tools.py (100%) rename {backend => backend.old}/src/agent/tools/chart_utils.py (100%) rename {backend => backend.old}/src/agent/tools/chart_utils_example.py (100%) rename {backend => backend.old}/src/agent/tools/datasource_tools.py (100%) rename {backend => backend.old}/src/agent/tools/indicator_tools.py (100%) rename {backend => backend.old}/src/agent/tools/research_tools.py (100%) rename {backend => backend.old}/src/agent/tools/shape_tools.py (100%) rename {backend => backend.old}/src/agent/tools/sync_tools.py (100%) rename {backend => backend.old}/src/agent/tools/trigger_tools.py (100%) rename {backend => backend.old}/src/datasource/__init__.py (100%) rename {backend => backend.old}/src/datasource/adapters/__init__.py (100%) rename {backend => backend.old}/src/datasource/adapters/ccxt_adapter.py (100%) rename {backend => backend.old}/src/datasource/adapters/demo.py (100%) rename {backend => backend.old}/src/datasource/base.py (100%) rename {backend => backend.old}/src/datasource/registry.py (100%) rename {backend => backend.old}/src/datasource/schema.py (100%) rename {backend => backend.old}/src/datasource/subscription_manager.py (100%) rename {backend => backend.old}/src/datasource/websocket_handler.py (100%) rename {backend => backend.old}/src/datasource/websocket_protocol.py (100%) rename {backend => backend.old}/src/exchange_kernel/README.md (100%) rename {backend => backend.old}/src/exchange_kernel/__init__.py (100%) rename {backend => backend.old}/src/exchange_kernel/base.py (100%) rename {backend => backend.old}/src/exchange_kernel/events.py (100%) rename {backend => backend.old}/src/exchange_kernel/models.py (100%) rename {backend => backend.old}/src/exchange_kernel/state.py (100%) rename {backend => backend.old}/src/gateway/__init__.py (100%) rename {backend => backend.old}/src/gateway/channels/__init__.py (100%) rename {backend => backend.old}/src/gateway/channels/base.py (100%) rename {backend => backend.old}/src/gateway/channels/websocket.py (100%) rename {backend => backend.old}/src/gateway/hub.py (100%) rename {backend => backend.old}/src/gateway/protocol.py (100%) rename {backend => backend.old}/src/gateway/user_session.py (100%) rename {backend => backend.old}/src/indicator/__init__.py (100%) rename {backend => backend.old}/src/indicator/base.py (100%) rename {backend => backend.old}/src/indicator/custom_indicators.py (100%) rename {backend => backend.old}/src/indicator/pipeline.py (100%) rename {backend => backend.old}/src/indicator/registry.py (100%) rename {backend => backend.old}/src/indicator/schema.py (100%) rename {backend => backend.old}/src/indicator/talib_adapter.py (100%) rename {backend => backend.old}/src/indicator/tv_mapping.py (100%) rename {backend => backend.old}/src/main.py (100%) rename {backend => backend.old}/src/schema/chart_state.py (100%) rename {backend => backend.old}/src/schema/indicator.py (100%) rename {backend => backend.old}/src/schema/order_spec.py (100%) rename {backend => backend.old}/src/schema/shape.py (100%) rename {backend => backend.old}/src/secrets_manager/__init__.py (100%) rename {backend => backend.old}/src/secrets_manager/cli.py (100%) rename {backend => backend.old}/src/secrets_manager/crypto.py (100%) rename {backend => backend.old}/src/secrets_manager/store.py (100%) rename {backend => backend.old}/src/sync/protocol.py (100%) rename {backend => backend.old}/src/sync/registry.py (100%) rename {backend => backend.old}/src/trigger/PRIORITIES.md (100%) rename {backend => backend.old}/src/trigger/README.md (100%) rename {backend => backend.old}/src/trigger/__init__.py (100%) rename {backend => backend.old}/src/trigger/context.py (100%) rename {backend => backend.old}/src/trigger/coordinator.py (100%) rename {backend => backend.old}/src/trigger/handlers.py (100%) rename {backend => backend.old}/src/trigger/queue.py (100%) rename {backend => backend.old}/src/trigger/scheduler.py (100%) rename {backend => backend.old}/src/trigger/store.py (100%) rename {backend => backend.old}/src/trigger/types.py (100%) rename {backend => backend.old}/tests/__init__.py (100%) rename {backend => backend.old}/tests/datafeed_client_example.py (100%) rename {backend => backend.old}/tests/test_ccxt_datasource.py (100%) rename {backend => backend.old}/tests/test_datafeed_websocket.py (100%) rename {backend => backend.old}/tests/test_websocket.py (100%) create mode 100755 bin/build-all create mode 100755 bin/config-update create mode 100755 bin/dev create mode 100755 bin/secret-update create mode 100644 client-py/README.md create mode 100644 client-py/__init__.py create mode 100644 client-py/dexorder/__init__.py create mode 100644 client-py/dexorder/history_client.py create mode 100644 client-py/dexorder/iceberg_client.py create mode 100644 client-py/dexorder/ohlc_client.py create mode 100644 client-py/setup.py delete mode 100644 deploy/Dockerfile-alpine-backend delete mode 100644 deploy/Dockerfile-backend delete mode 100644 deploy/ingress.yaml create mode 100644 deploy/k8s/README.md rename deploy/{ => k8s/base}/backend.yaml (100%) create mode 100644 deploy/k8s/base/ingress.yaml create mode 100644 deploy/k8s/base/init.yaml create mode 100644 deploy/k8s/base/kustomization.yaml rename deploy/{ => k8s/base}/web.yaml (100%) create mode 100644 deploy/k8s/dev/configs/flink-config.yaml create mode 100644 deploy/k8s/dev/configs/ingestor-config.yaml create mode 100644 deploy/k8s/dev/configs/relay-config.yaml create mode 100644 deploy/k8s/dev/infrastructure.yaml create mode 100644 deploy/k8s/dev/ingress-dev.yaml create mode 100644 deploy/k8s/dev/kustomization.yaml create mode 100644 deploy/k8s/dev/secrets/ai-secrets.yaml.example create mode 100644 deploy/k8s/dev/secrets/flink-secrets.yaml.example create mode 100644 deploy/k8s/dev/secrets/ingestor-secrets.yaml.example create mode 100644 deploy/k8s/dev/secrets/minio-secret.yaml.example create mode 100644 deploy/k8s/dev/secrets/postgres-secret.yaml.example create mode 100644 deploy/k8s/prod/configs/flink-config.yaml create mode 100644 deploy/k8s/prod/configs/ingestor-config.yaml create mode 100644 deploy/k8s/prod/configs/relay-config.yaml create mode 100644 deploy/k8s/prod/kustomization.yaml create mode 100644 deploy/k8s/prod/patches.yaml create mode 100644 deploy/k8s/prod/secrets/ai-secrets.yaml.example create mode 100644 deploy/k8s/prod/secrets/ingestor-secrets.yaml.example create mode 100644 deploy/k8s/prod/secrets/minio-secret.yaml.example create mode 100644 deploy/k8s/prod/secrets/postgres-secret.yaml.example rename {doc => doc.old}/auth.md (100%) rename {doc => doc.old}/data.md (100%) rename {doc => doc.old}/design.md (100%) rename {doc => doc.old}/libraries.md (100%) rename {doc => doc.old}/mvp.md (93%) create mode 100644 doc.old/trendspider.md create mode 100644 doc/agent_redesign.md create mode 100644 doc/backend_redesign.md create mode 100644 doc/config.md create mode 100644 docker-compose.yaml create mode 100644 flink/.gitignore create mode 100644 flink/Dockerfile create mode 100644 flink/README.md create mode 100644 flink/config.example.yaml create mode 100644 flink/flink-cluster.yaml create mode 100644 flink/pom.xml create mode 100644 flink/secrets.example.yaml create mode 100644 flink/src/main/java/com/dexorder/flink/TradingFlinkApp.java create mode 100644 flink/src/main/java/com/dexorder/flink/config/AppConfig.java create mode 100644 flink/src/main/java/com/dexorder/flink/iceberg/SchemaInitializer.java create mode 100644 flink/src/main/java/com/dexorder/flink/ingestor/DataRequestMessage.java create mode 100644 flink/src/main/java/com/dexorder/flink/ingestor/DataResponseMessage.java create mode 100644 flink/src/main/java/com/dexorder/flink/ingestor/IngestorControlChannel.java create mode 100644 flink/src/main/java/com/dexorder/flink/ingestor/IngestorResponseListener.java create mode 100644 flink/src/main/java/com/dexorder/flink/ingestor/IngestorWorkQueue.java create mode 100644 flink/src/main/java/com/dexorder/flink/kafka/TopicConfig.java create mode 100644 flink/src/main/java/com/dexorder/flink/kafka/TopicManager.java create mode 100644 flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationForwarder.java create mode 100644 flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationFunction.java create mode 100644 flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationPublisher.java create mode 100644 flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchDeserializer.java create mode 100644 flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchWrapper.java create mode 100644 flink/src/main/java/com/dexorder/flink/sink/HistoricalBatchWriter.java create mode 100644 flink/src/main/java/com/dexorder/flink/sink/IcebergOHLCSink.java create mode 100644 flink/src/main/java/com/dexorder/flink/zmq/ZmqChannelManager.java create mode 100644 flink/src/main/resources/iceberg-schemas/ohlc_schema.sql create mode 100644 flink/src/main/resources/topics-dev.yaml create mode 100644 flink/src/main/resources/topics.yaml create mode 100644 flink/values.yaml create mode 100644 iceberg/README.md create mode 100644 iceberg/ohlc_schema.sql create mode 100644 ingestor/.gitignore create mode 100644 ingestor/Dockerfile create mode 100644 ingestor/README.md create mode 100644 ingestor/config.example.yaml create mode 100644 ingestor/package.json create mode 100644 ingestor/src/ccxt-fetcher.js create mode 100644 ingestor/src/index.js create mode 100644 ingestor/src/kafka-producer.js create mode 100644 ingestor/src/realtime-poller.js create mode 100644 ingestor/src/zmq-client.js create mode 100644 kafka/README.md create mode 100644 kafka/kafka-cluster.yaml create mode 100644 kafka/kafka-metrics-config.yaml create mode 100644 kafka/values.yaml create mode 100644 protobuf/ingestor.proto create mode 100644 protobuf/market.proto create mode 100644 protobuf/ohlc.proto create mode 100644 protobuf/protocol.md create mode 100644 protobuf/tick.proto create mode 100644 relay/.gitignore create mode 100644 relay/Cargo.lock create mode 100644 relay/Cargo.toml create mode 100644 relay/Dockerfile create mode 100644 relay/README.md create mode 100644 relay/build.rs create mode 100644 relay/config.example.yaml create mode 100644 relay/src/config.rs create mode 100644 relay/src/main.rs create mode 100644 relay/src/proto.rs create mode 100644 relay/src/relay.rs create mode 100644 test/README.md create mode 100644 test/history_client/Dockerfile create mode 100644 test/history_client/README.md create mode 100644 test/history_client/client.py create mode 100644 test/history_client/client_async.py create mode 100755 test/history_client/client_ohlc_api.py create mode 100755 test/history_client/run-test.sh rename deploy/Dockerfile-web => web/Dockerfile (86%) diff --git a/.gitignore b/.gitignore index a8baa26..4540092 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -/backend/data -/backend/uploads/ +/backend.old/data +/backend.old/uploads/ # Environment variables .env @@ -101,3 +101,12 @@ Thumbs.db *.swp *.swo *.bak + +# Kubernetes secrets (never commit actual secrets!) +deploy/k8s/dev/secrets/*.yaml +deploy/k8s/prod/secrets/*.yaml +!deploy/k8s/dev/secrets/*.yaml.example +!deploy/k8s/prod/secrets/*.yaml.example + +# Dev environment image tags +.dev-image-tag diff --git a/.idea/ai.iml b/.idea/ai.iml index f13d735..1394582 100644 --- a/.idea/ai.iml +++ b/.idea/ai.iml @@ -2,10 +2,12 @@ - - + + + - + + diff --git a/backend/config.yaml b/backend.old/config.yaml similarity index 100% rename from backend/config.yaml rename to backend.old/config.yaml diff --git a/backend/examples/ccxt_integration_example.py b/backend.old/examples/ccxt_integration_example.py similarity index 100% rename from backend/examples/ccxt_integration_example.py rename to backend.old/examples/ccxt_integration_example.py diff --git a/backend/memory/chart_context.md b/backend.old/memory/chart_context.md similarity index 100% rename from backend/memory/chart_context.md rename to backend.old/memory/chart_context.md diff --git a/backend/memory/python_analysis.md b/backend.old/memory/python_analysis.md similarity index 100% rename from backend/memory/python_analysis.md rename to backend.old/memory/python_analysis.md diff --git a/backend/memory/tradingview_shapes.md b/backend.old/memory/tradingview_shapes.md similarity index 100% rename from backend/memory/tradingview_shapes.md rename to backend.old/memory/tradingview_shapes.md diff --git a/backend/requirements-pre.txt b/backend.old/requirements-pre.txt similarity index 100% rename from backend/requirements-pre.txt rename to backend.old/requirements-pre.txt diff --git a/backend/requirements.txt b/backend.old/requirements.txt similarity index 100% rename from backend/requirements.txt rename to backend.old/requirements.txt diff --git a/backend/soul/automation_agent.md b/backend.old/soul/automation_agent.md similarity index 100% rename from backend/soul/automation_agent.md rename to backend.old/soul/automation_agent.md diff --git a/backend/soul/chart_agent.md b/backend.old/soul/chart_agent.md similarity index 100% rename from backend/soul/chart_agent.md rename to backend.old/soul/chart_agent.md diff --git a/backend/soul/data_agent.md b/backend.old/soul/data_agent.md similarity index 100% rename from backend/soul/data_agent.md rename to backend.old/soul/data_agent.md diff --git a/backend/soul/main_agent.md b/backend.old/soul/main_agent.md similarity index 100% rename from backend/soul/main_agent.md rename to backend.old/soul/main_agent.md diff --git a/backend/soul/research_agent.md b/backend.old/soul/research_agent.md similarity index 100% rename from backend/soul/research_agent.md rename to backend.old/soul/research_agent.md diff --git a/backend/src/agent/__init__.py b/backend.old/src/agent/__init__.py similarity index 100% rename from backend/src/agent/__init__.py rename to backend.old/src/agent/__init__.py diff --git a/backend/src/agent/core.py b/backend.old/src/agent/core.py similarity index 100% rename from backend/src/agent/core.py rename to backend.old/src/agent/core.py diff --git a/backend/src/agent/memory.py b/backend.old/src/agent/memory.py similarity index 100% rename from backend/src/agent/memory.py rename to backend.old/src/agent/memory.py diff --git a/backend/src/agent/prompts.py b/backend.old/src/agent/prompts.py similarity index 100% rename from backend/src/agent/prompts.py rename to backend.old/src/agent/prompts.py diff --git a/backend/src/agent/routers.py b/backend.old/src/agent/routers.py similarity index 100% rename from backend/src/agent/routers.py rename to backend.old/src/agent/routers.py diff --git a/backend/src/agent/session.py b/backend.old/src/agent/session.py similarity index 100% rename from backend/src/agent/session.py rename to backend.old/src/agent/session.py diff --git a/backend/src/agent/subagent.py b/backend.old/src/agent/subagent.py similarity index 100% rename from backend/src/agent/subagent.py rename to backend.old/src/agent/subagent.py diff --git a/backend/src/agent/tools/CHART_UTILS_README.md b/backend.old/src/agent/tools/CHART_UTILS_README.md similarity index 100% rename from backend/src/agent/tools/CHART_UTILS_README.md rename to backend.old/src/agent/tools/CHART_UTILS_README.md diff --git a/backend/src/agent/tools/TRIGGER_TOOLS.md b/backend.old/src/agent/tools/TRIGGER_TOOLS.md similarity index 100% rename from backend/src/agent/tools/TRIGGER_TOOLS.md rename to backend.old/src/agent/tools/TRIGGER_TOOLS.md diff --git a/backend/src/agent/tools/__init__.py b/backend.old/src/agent/tools/__init__.py similarity index 100% rename from backend/src/agent/tools/__init__.py rename to backend.old/src/agent/tools/__init__.py diff --git a/backend/src/agent/tools/chart_tools.py b/backend.old/src/agent/tools/chart_tools.py similarity index 100% rename from backend/src/agent/tools/chart_tools.py rename to backend.old/src/agent/tools/chart_tools.py diff --git a/backend/src/agent/tools/chart_utils.py b/backend.old/src/agent/tools/chart_utils.py similarity index 100% rename from backend/src/agent/tools/chart_utils.py rename to backend.old/src/agent/tools/chart_utils.py diff --git a/backend/src/agent/tools/chart_utils_example.py b/backend.old/src/agent/tools/chart_utils_example.py similarity index 100% rename from backend/src/agent/tools/chart_utils_example.py rename to backend.old/src/agent/tools/chart_utils_example.py diff --git a/backend/src/agent/tools/datasource_tools.py b/backend.old/src/agent/tools/datasource_tools.py similarity index 100% rename from backend/src/agent/tools/datasource_tools.py rename to backend.old/src/agent/tools/datasource_tools.py diff --git a/backend/src/agent/tools/indicator_tools.py b/backend.old/src/agent/tools/indicator_tools.py similarity index 100% rename from backend/src/agent/tools/indicator_tools.py rename to backend.old/src/agent/tools/indicator_tools.py diff --git a/backend/src/agent/tools/research_tools.py b/backend.old/src/agent/tools/research_tools.py similarity index 100% rename from backend/src/agent/tools/research_tools.py rename to backend.old/src/agent/tools/research_tools.py diff --git a/backend/src/agent/tools/shape_tools.py b/backend.old/src/agent/tools/shape_tools.py similarity index 100% rename from backend/src/agent/tools/shape_tools.py rename to backend.old/src/agent/tools/shape_tools.py diff --git a/backend/src/agent/tools/sync_tools.py b/backend.old/src/agent/tools/sync_tools.py similarity index 100% rename from backend/src/agent/tools/sync_tools.py rename to backend.old/src/agent/tools/sync_tools.py diff --git a/backend/src/agent/tools/trigger_tools.py b/backend.old/src/agent/tools/trigger_tools.py similarity index 100% rename from backend/src/agent/tools/trigger_tools.py rename to backend.old/src/agent/tools/trigger_tools.py diff --git a/backend/src/datasource/__init__.py b/backend.old/src/datasource/__init__.py similarity index 100% rename from backend/src/datasource/__init__.py rename to backend.old/src/datasource/__init__.py diff --git a/backend/src/datasource/adapters/__init__.py b/backend.old/src/datasource/adapters/__init__.py similarity index 100% rename from backend/src/datasource/adapters/__init__.py rename to backend.old/src/datasource/adapters/__init__.py diff --git a/backend/src/datasource/adapters/ccxt_adapter.py b/backend.old/src/datasource/adapters/ccxt_adapter.py similarity index 100% rename from backend/src/datasource/adapters/ccxt_adapter.py rename to backend.old/src/datasource/adapters/ccxt_adapter.py diff --git a/backend/src/datasource/adapters/demo.py b/backend.old/src/datasource/adapters/demo.py similarity index 100% rename from backend/src/datasource/adapters/demo.py rename to backend.old/src/datasource/adapters/demo.py diff --git a/backend/src/datasource/base.py b/backend.old/src/datasource/base.py similarity index 100% rename from backend/src/datasource/base.py rename to backend.old/src/datasource/base.py diff --git a/backend/src/datasource/registry.py b/backend.old/src/datasource/registry.py similarity index 100% rename from backend/src/datasource/registry.py rename to backend.old/src/datasource/registry.py diff --git a/backend/src/datasource/schema.py b/backend.old/src/datasource/schema.py similarity index 100% rename from backend/src/datasource/schema.py rename to backend.old/src/datasource/schema.py diff --git a/backend/src/datasource/subscription_manager.py b/backend.old/src/datasource/subscription_manager.py similarity index 100% rename from backend/src/datasource/subscription_manager.py rename to backend.old/src/datasource/subscription_manager.py diff --git a/backend/src/datasource/websocket_handler.py b/backend.old/src/datasource/websocket_handler.py similarity index 100% rename from backend/src/datasource/websocket_handler.py rename to backend.old/src/datasource/websocket_handler.py diff --git a/backend/src/datasource/websocket_protocol.py b/backend.old/src/datasource/websocket_protocol.py similarity index 100% rename from backend/src/datasource/websocket_protocol.py rename to backend.old/src/datasource/websocket_protocol.py diff --git a/backend/src/exchange_kernel/README.md b/backend.old/src/exchange_kernel/README.md similarity index 100% rename from backend/src/exchange_kernel/README.md rename to backend.old/src/exchange_kernel/README.md diff --git a/backend/src/exchange_kernel/__init__.py b/backend.old/src/exchange_kernel/__init__.py similarity index 100% rename from backend/src/exchange_kernel/__init__.py rename to backend.old/src/exchange_kernel/__init__.py diff --git a/backend/src/exchange_kernel/base.py b/backend.old/src/exchange_kernel/base.py similarity index 100% rename from backend/src/exchange_kernel/base.py rename to backend.old/src/exchange_kernel/base.py diff --git a/backend/src/exchange_kernel/events.py b/backend.old/src/exchange_kernel/events.py similarity index 100% rename from backend/src/exchange_kernel/events.py rename to backend.old/src/exchange_kernel/events.py diff --git a/backend/src/exchange_kernel/models.py b/backend.old/src/exchange_kernel/models.py similarity index 100% rename from backend/src/exchange_kernel/models.py rename to backend.old/src/exchange_kernel/models.py diff --git a/backend/src/exchange_kernel/state.py b/backend.old/src/exchange_kernel/state.py similarity index 100% rename from backend/src/exchange_kernel/state.py rename to backend.old/src/exchange_kernel/state.py diff --git a/backend/src/gateway/__init__.py b/backend.old/src/gateway/__init__.py similarity index 100% rename from backend/src/gateway/__init__.py rename to backend.old/src/gateway/__init__.py diff --git a/backend/src/gateway/channels/__init__.py b/backend.old/src/gateway/channels/__init__.py similarity index 100% rename from backend/src/gateway/channels/__init__.py rename to backend.old/src/gateway/channels/__init__.py diff --git a/backend/src/gateway/channels/base.py b/backend.old/src/gateway/channels/base.py similarity index 100% rename from backend/src/gateway/channels/base.py rename to backend.old/src/gateway/channels/base.py diff --git a/backend/src/gateway/channels/websocket.py b/backend.old/src/gateway/channels/websocket.py similarity index 100% rename from backend/src/gateway/channels/websocket.py rename to backend.old/src/gateway/channels/websocket.py diff --git a/backend/src/gateway/hub.py b/backend.old/src/gateway/hub.py similarity index 100% rename from backend/src/gateway/hub.py rename to backend.old/src/gateway/hub.py diff --git a/backend/src/gateway/protocol.py b/backend.old/src/gateway/protocol.py similarity index 100% rename from backend/src/gateway/protocol.py rename to backend.old/src/gateway/protocol.py diff --git a/backend/src/gateway/user_session.py b/backend.old/src/gateway/user_session.py similarity index 100% rename from backend/src/gateway/user_session.py rename to backend.old/src/gateway/user_session.py diff --git a/backend/src/indicator/__init__.py b/backend.old/src/indicator/__init__.py similarity index 100% rename from backend/src/indicator/__init__.py rename to backend.old/src/indicator/__init__.py diff --git a/backend/src/indicator/base.py b/backend.old/src/indicator/base.py similarity index 100% rename from backend/src/indicator/base.py rename to backend.old/src/indicator/base.py diff --git a/backend/src/indicator/custom_indicators.py b/backend.old/src/indicator/custom_indicators.py similarity index 100% rename from backend/src/indicator/custom_indicators.py rename to backend.old/src/indicator/custom_indicators.py diff --git a/backend/src/indicator/pipeline.py b/backend.old/src/indicator/pipeline.py similarity index 100% rename from backend/src/indicator/pipeline.py rename to backend.old/src/indicator/pipeline.py diff --git a/backend/src/indicator/registry.py b/backend.old/src/indicator/registry.py similarity index 100% rename from backend/src/indicator/registry.py rename to backend.old/src/indicator/registry.py diff --git a/backend/src/indicator/schema.py b/backend.old/src/indicator/schema.py similarity index 100% rename from backend/src/indicator/schema.py rename to backend.old/src/indicator/schema.py diff --git a/backend/src/indicator/talib_adapter.py b/backend.old/src/indicator/talib_adapter.py similarity index 100% rename from backend/src/indicator/talib_adapter.py rename to backend.old/src/indicator/talib_adapter.py diff --git a/backend/src/indicator/tv_mapping.py b/backend.old/src/indicator/tv_mapping.py similarity index 100% rename from backend/src/indicator/tv_mapping.py rename to backend.old/src/indicator/tv_mapping.py diff --git a/backend/src/main.py b/backend.old/src/main.py similarity index 100% rename from backend/src/main.py rename to backend.old/src/main.py diff --git a/backend/src/schema/chart_state.py b/backend.old/src/schema/chart_state.py similarity index 100% rename from backend/src/schema/chart_state.py rename to backend.old/src/schema/chart_state.py diff --git a/backend/src/schema/indicator.py b/backend.old/src/schema/indicator.py similarity index 100% rename from backend/src/schema/indicator.py rename to backend.old/src/schema/indicator.py diff --git a/backend/src/schema/order_spec.py b/backend.old/src/schema/order_spec.py similarity index 100% rename from backend/src/schema/order_spec.py rename to backend.old/src/schema/order_spec.py diff --git a/backend/src/schema/shape.py b/backend.old/src/schema/shape.py similarity index 100% rename from backend/src/schema/shape.py rename to backend.old/src/schema/shape.py diff --git a/backend/src/secrets_manager/__init__.py b/backend.old/src/secrets_manager/__init__.py similarity index 100% rename from backend/src/secrets_manager/__init__.py rename to backend.old/src/secrets_manager/__init__.py diff --git a/backend/src/secrets_manager/cli.py b/backend.old/src/secrets_manager/cli.py similarity index 100% rename from backend/src/secrets_manager/cli.py rename to backend.old/src/secrets_manager/cli.py diff --git a/backend/src/secrets_manager/crypto.py b/backend.old/src/secrets_manager/crypto.py similarity index 100% rename from backend/src/secrets_manager/crypto.py rename to backend.old/src/secrets_manager/crypto.py diff --git a/backend/src/secrets_manager/store.py b/backend.old/src/secrets_manager/store.py similarity index 100% rename from backend/src/secrets_manager/store.py rename to backend.old/src/secrets_manager/store.py diff --git a/backend/src/sync/protocol.py b/backend.old/src/sync/protocol.py similarity index 100% rename from backend/src/sync/protocol.py rename to backend.old/src/sync/protocol.py diff --git a/backend/src/sync/registry.py b/backend.old/src/sync/registry.py similarity index 100% rename from backend/src/sync/registry.py rename to backend.old/src/sync/registry.py diff --git a/backend/src/trigger/PRIORITIES.md b/backend.old/src/trigger/PRIORITIES.md similarity index 100% rename from backend/src/trigger/PRIORITIES.md rename to backend.old/src/trigger/PRIORITIES.md diff --git a/backend/src/trigger/README.md b/backend.old/src/trigger/README.md similarity index 100% rename from backend/src/trigger/README.md rename to backend.old/src/trigger/README.md diff --git a/backend/src/trigger/__init__.py b/backend.old/src/trigger/__init__.py similarity index 100% rename from backend/src/trigger/__init__.py rename to backend.old/src/trigger/__init__.py diff --git a/backend/src/trigger/context.py b/backend.old/src/trigger/context.py similarity index 100% rename from backend/src/trigger/context.py rename to backend.old/src/trigger/context.py diff --git a/backend/src/trigger/coordinator.py b/backend.old/src/trigger/coordinator.py similarity index 100% rename from backend/src/trigger/coordinator.py rename to backend.old/src/trigger/coordinator.py diff --git a/backend/src/trigger/handlers.py b/backend.old/src/trigger/handlers.py similarity index 100% rename from backend/src/trigger/handlers.py rename to backend.old/src/trigger/handlers.py diff --git a/backend/src/trigger/queue.py b/backend.old/src/trigger/queue.py similarity index 100% rename from backend/src/trigger/queue.py rename to backend.old/src/trigger/queue.py diff --git a/backend/src/trigger/scheduler.py b/backend.old/src/trigger/scheduler.py similarity index 100% rename from backend/src/trigger/scheduler.py rename to backend.old/src/trigger/scheduler.py diff --git a/backend/src/trigger/store.py b/backend.old/src/trigger/store.py similarity index 100% rename from backend/src/trigger/store.py rename to backend.old/src/trigger/store.py diff --git a/backend/src/trigger/types.py b/backend.old/src/trigger/types.py similarity index 100% rename from backend/src/trigger/types.py rename to backend.old/src/trigger/types.py diff --git a/backend/tests/__init__.py b/backend.old/tests/__init__.py similarity index 100% rename from backend/tests/__init__.py rename to backend.old/tests/__init__.py diff --git a/backend/tests/datafeed_client_example.py b/backend.old/tests/datafeed_client_example.py similarity index 100% rename from backend/tests/datafeed_client_example.py rename to backend.old/tests/datafeed_client_example.py diff --git a/backend/tests/test_ccxt_datasource.py b/backend.old/tests/test_ccxt_datasource.py similarity index 100% rename from backend/tests/test_ccxt_datasource.py rename to backend.old/tests/test_ccxt_datasource.py diff --git a/backend/tests/test_datafeed_websocket.py b/backend.old/tests/test_datafeed_websocket.py similarity index 100% rename from backend/tests/test_datafeed_websocket.py rename to backend.old/tests/test_datafeed_websocket.py diff --git a/backend/tests/test_websocket.py b/backend.old/tests/test_websocket.py similarity index 100% rename from backend/tests/test_websocket.py rename to backend.old/tests/test_websocket.py diff --git a/bin/build-all b/bin/build-all new file mode 100755 index 0000000..4ff59e0 --- /dev/null +++ b/bin/build-all @@ -0,0 +1,17 @@ +#!/bin/bash + +# Build all container images +set -e + +DIR="$(cd "$(dirname "$0")" && pwd)" + +echo "Building all container images..." +echo + +"$DIR/build" flink "$@" +"$DIR/build" relay "$@" +"$DIR/build" ingestor "$@" +"$DIR/build" web "$@" + +echo +echo "All images built successfully!" diff --git a/bin/config-update b/bin/config-update new file mode 100755 index 0000000..120efbf --- /dev/null +++ b/bin/config-update @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +usage() { + echo "Usage: $0 [ENVIRONMENT] [CONFIG_NAME]" + echo "" + echo "Update Kubernetes ConfigMaps from YAML files" + echo "" + echo "Arguments:" + echo " ENVIRONMENT Target environment: dev or prod (default: dev)" + echo " CONFIG_NAME Specific config to update (optional, updates all if not specified)" + echo "" + echo "Available configs:" + echo " relay-config - ZMQ relay configuration" + echo " ingestor-config - CCXT ingestor configuration" + echo " flink-config - Flink job configuration" + echo "" + echo "Examples:" + echo " $0 # Update all dev configs" + echo " $0 dev # Update all dev configs" + echo " $0 dev relay-config # Update only relay-config in dev" + echo " $0 prod # Update all prod configs" + echo " $0 prod flink-config # Update only flink-config in prod" + exit 1 +} + +# Parse arguments +ENV="${1:-dev}" +CONFIG_NAME="${2:-}" + +if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then + echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}" + usage +fi + +CONFIG_DIR="$ROOT_DIR/deploy/k8s/$ENV/configs" + +if [ ! -d "$CONFIG_DIR" ]; then + echo -e "${RED}Error: Config directory not found: $CONFIG_DIR${NC}" + exit 1 +fi + +# Get kubectl context +if [[ "$ENV" == "prod" ]]; then + CONTEXT=$(kubectl config current-context) + echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION configs!${NC}" + echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}" + read -p "Are you sure you want to continue? (yes/no): " confirm + if [[ "$confirm" != "yes" ]]; then + echo "Aborted." + exit 0 + fi +fi + +apply_config() { + local config_name="$1" + local config_file="$CONFIG_DIR/$config_name.yaml" + + if [ ! -f "$config_file" ]; then + echo -e "${RED}✗ Config file not found: $config_file${NC}" + return 1 + fi + + echo -e "${GREEN}→${NC} Creating/updating ConfigMap $config_name..." + kubectl create configmap "$config_name" \ + --from-file=config.yaml="$config_file" \ + --dry-run=client -o yaml | kubectl apply -f - + echo -e "${GREEN}✓${NC} $config_name updated" + + # Optionally restart pods that use this config + local restart_pods="" + case "$config_name" in + relay-config) + restart_pods="deployment/relay" + ;; + ingestor-config) + restart_pods="deployment/ingestor" + ;; + flink-config) + restart_pods="deployment/flink-jobmanager deployment/flink-taskmanager" + ;; + esac + + if [ -n "$restart_pods" ]; then + echo -e "${YELLOW} Restarting pods...${NC}" + kubectl rollout restart $restart_pods 2>/dev/null || echo -e "${YELLOW} (No pods found to restart)${NC}" + fi +} + +# Update specific config or all configs +if [ -n "$CONFIG_NAME" ]; then + # Update single config + apply_config "$CONFIG_NAME" +else + # Update all configs + echo -e "${GREEN}Updating all $ENV configs...${NC}" + echo "" + + CONFIGS=( + "relay-config" + "ingestor-config" + "flink-config" + ) + + FAILED=0 + for config in "${CONFIGS[@]}"; do + if ! apply_config "$config"; then + FAILED=$((FAILED + 1)) + fi + done + + echo "" + if [ $FAILED -gt 0 ]; then + echo -e "${YELLOW}⚠️ $FAILED config(s) failed to apply${NC}" + exit 1 + else + echo -e "${GREEN}✓ All configs updated successfully${NC}" + fi +fi diff --git a/bin/deploy b/bin/deploy index 6b1b7b4..c6bbf93 100755 --- a/bin/deploy +++ b/bin/deploy @@ -1,11 +1,11 @@ #!/bin/bash #REMOTE=northamerica-northeast2-docker.pkg.dev/dexorder-430504/dexorder -REMOTE=git.dxod.org/dexorder/dexorder +REMOTE=${REMOTE:-git.dxod.org/dexorder/dexorder} -if [ "$1" != "backend" ] && [ "$1" != "web" ]; then +if [ "$1" != "flink" ] && [ "$1" != "relay" ] && [ "$1" != "ingestor" ] && [ "$1" != "web" ]; then echo - echo usage: "$0 "'{backend|web} [''dev''] [config] [deployment] [kubernetes] [image_tag]' + echo usage: "$0 "'{flink|relay|ingestor|web} [''dev''] [config] [deployment] [kubernetes] [image_tag]' echo echo ' [''dev''] if the literal string ''dev'' is not the second argument, then the build refuses to run if source code is not checked in. Otherwise, the git revision numbers are used in the image tag.' echo @@ -86,14 +86,21 @@ fi if [ "$DEPLOY" == "0" ]; then ACTION=Building - NO_CACHE=--no-cache + #NO_CACHE=--no-cache else ACTION=Making fi echo $ACTION $PROJECT config=$CONFIG deployment=$DEPLOYMENT '=>' $TAG -docker build $NO_CACHE -f deploy/Dockerfile-$PROJECT --build-arg="CONFIG=$CONFIG" --build-arg="DEPLOYMENT=$DEPLOYMENT" -t dexorder/ai-$PROJECT:latest . || exit 1 + +# Copy protobuf definitions into project directory for Docker build +# Using rsync --checksum so unchanged files keep their timestamps (preserves docker layer cache) +rsync -a --checksum --delete protobuf/ $PROJECT/protobuf/ + +docker build $NO_CACHE -f $PROJECT/Dockerfile --build-arg="CONFIG=$CONFIG" --build-arg="DEPLOYMENT=$DEPLOYMENT" -t dexorder/ai-$PROJECT:latest $PROJECT || exit 1 + +# Cleanup is handled by trap docker tag dexorder/ai-$PROJECT:latest dexorder/ai-$PROJECT:$TAG docker tag dexorder/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:$TAG docker tag $REMOTE/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:latest @@ -105,7 +112,7 @@ echo "$(date)" built $REMOTE/ai-$PROJECT:$TAG if [ "$DEPLOY" == "1" ]; then docker push $REMOTE/ai-$PROJECT:$TAG - YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/$KUBERNETES.yaml) + YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/k8s/$KUBERNETES.yaml) echo "$YAML" | kubectl apply -f - || echo "$YAML" "\nkubectl apply failed" && exit 1 echo deployed $KUBERNETES.yaml $REMOTE/ai-$PROJECT:$TAG fi diff --git a/bin/dev b/bin/dev new file mode 100755 index 0000000..5d84c91 --- /dev/null +++ b/bin/dev @@ -0,0 +1,364 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +usage() { + echo "Usage: $0 [COMMAND]" + echo "" + echo "Manage the minikube development environment" + echo "" + echo "Commands:" + echo " start Start minikube and deploy all services" + echo " stop Stop minikube" + echo " restart [svc] Rebuild and redeploy all services, or just one (relay|ingestor|flink)" + echo " rebuild [svc] Rebuild all custom images, or just one" + echo " deploy [svc] Deploy/update all services, or just one" + echo " status Show status of all services" + echo " logs Tail logs for a service" + echo " shell Open a shell in a service pod" + echo " clean Delete all resources and volumes" + echo " tunnel Start minikube tunnel (for LoadBalancer access)" + echo "" + echo "Examples:" + echo " $0 start # Start minikube and deploy everything" + echo " $0 rebuild # Rebuild all custom images" + echo " $0 logs relay # Tail logs for relay service" + echo " $0 shell ingestor # Open shell in ingestor pod" + exit 1 +} + +COMMAND="${1:-start}" + +check_minikube() { + if ! command -v minikube &> /dev/null; then + echo -e "${RED}Error: minikube not found. Please install minikube first.${NC}" + echo "https://minikube.sigs.k8s.io/docs/start/" + exit 1 + fi +} + +check_kubectl() { + if ! command -v kubectl &> /dev/null; then + echo -e "${RED}Error: kubectl not found. Please install kubectl first.${NC}" + exit 1 + fi +} + +start_minikube() { + echo -e "${BLUE}Starting minikube...${NC}" + + if minikube status &> /dev/null; then + echo -e "${GREEN}✓ Minikube already running${NC}" + else + minikube start --cpus=6 --memory=12g --driver=docker + echo -e "${GREEN}✓ Minikube started${NC}" + fi + + # Enable ingress addon + echo -e "${BLUE}Enabling ingress addon...${NC}" + minikube addons enable ingress + echo -e "${GREEN}✓ Ingress enabled${NC}" + + # Set docker environment + echo -e "${YELLOW}Setting docker environment to minikube...${NC}" + eval $(minikube docker-env) + echo -e "${GREEN}✓ Docker environment set${NC}" + + # Add /etc/hosts entry + MINIKUBE_IP=$(minikube ip) + if ! grep -q "dexorder.local" /etc/hosts; then + echo -e "${YELLOW}Adding dexorder.local to /etc/hosts (requires sudo)...${NC}" + echo "$MINIKUBE_IP dexorder.local" | sudo tee -a /etc/hosts + else + echo -e "${GREEN}✓ /etc/hosts entry exists${NC}" + fi +} + +rebuild_images() { + local service="${1:-all}" + echo -e "${BLUE}Building custom images...${NC}" + + # Use minikube's docker daemon + eval $(minikube docker-env) + + # Build images using the standard bin/build script with dev flag + cd "$ROOT_DIR" + + # Load existing tags so we preserve whichever services we're not rebuilding + if [ -f "$ROOT_DIR/.dev-image-tag" ]; then + source "$ROOT_DIR/.dev-image-tag" + fi + + # Helper: run build, show output, and return just the dev tag via stdout + # Build output goes to stderr so the caller can capture only the tag via $() + build_and_get_tag() { + local svc="$1" + local output + output=$("$SCRIPT_DIR/build" "$svc" dev 2>&1) || { echo "$output" >&2; return 1; } + echo "$output" >&2 + # Extract tag from "built /ai-:" line + echo "$output" | grep -oE "ai-${svc}:dev[0-9]+" | tail -1 | cut -d: -f2 + } + + if [ "$service" == "all" ] || [ "$service" == "relay" ]; then + echo -e "${GREEN}→${NC} Building relay..." + RELAY_TAG=$(build_and_get_tag relay) || exit 1 + docker tag "dexorder/ai-relay:$RELAY_TAG" "dexorder/relay:$RELAY_TAG" + fi + + if [ "$service" == "all" ] || [ "$service" == "ingestor" ]; then + echo -e "${GREEN}→${NC} Building ingestor..." + INGEST_TAG=$(build_and_get_tag ingestor) || exit 1 + docker tag "dexorder/ai-ingestor:$INGEST_TAG" "dexorder/ingestor:$INGEST_TAG" + fi + + if [ "$service" == "all" ] || [ "$service" == "flink" ]; then + echo -e "${GREEN}→${NC} Building flink..." + FLINK_TAG=$(build_and_get_tag flink) || exit 1 + docker tag "dexorder/ai-flink:$FLINK_TAG" "dexorder/flink:$FLINK_TAG" + fi + + # Save the tags for deployment (all three, preserving any we didn't rebuild) + echo "RELAY_TAG=$RELAY_TAG" > "$ROOT_DIR/.dev-image-tag" + echo "INGEST_TAG=$INGEST_TAG" >> "$ROOT_DIR/.dev-image-tag" + echo "FLINK_TAG=$FLINK_TAG" >> "$ROOT_DIR/.dev-image-tag" + + echo -e "${GREEN}✓ Images built: relay=$RELAY_TAG, ingestor=$INGEST_TAG, flink=$FLINK_TAG${NC}" +} + +deploy_services() { + echo -e "${BLUE}Deploying services to minikube...${NC}" + + cd "$ROOT_DIR" + + # Get the dev image tags + if [ -f "$ROOT_DIR/.dev-image-tag" ]; then + source "$ROOT_DIR/.dev-image-tag" + echo -e "${BLUE}Using image tags:${NC}" + echo -e " Relay: $RELAY_TAG" + echo -e " Ingestor: $INGEST_TAG" + echo -e " Flink: $FLINK_TAG" + else + echo -e "${YELLOW}⚠️ No dev tags found. Using 'latest'. Run rebuild first.${NC}" + RELAY_TAG="latest" + INGEST_TAG="latest" + FLINK_TAG="latest" + fi + + # Create secrets first (if they exist) + echo -e "${GREEN}→${NC} Checking secrets..." + if ls deploy/k8s/dev/secrets/*.yaml &> /dev/null; then + "$SCRIPT_DIR/secret-update" dev || echo -e "${YELLOW} (Some secrets missing - copy from .example files)${NC}" + else + echo -e "${YELLOW}⚠️ No secrets found. Copy from .example files:${NC}" + echo -e "${YELLOW} cd deploy/k8s/dev/secrets${NC}" + echo -e "${YELLOW} cp ai-secrets.yaml.example ai-secrets.yaml${NC}" + echo -e "${YELLOW} # Edit with actual values, then run: bin/secret-update dev${NC}" + fi + + # Update configs + echo -e "${GREEN}→${NC} Updating configs..." + "$SCRIPT_DIR/config-update" dev + + # Apply kustomize with image tag substitution + echo -e "${GREEN}→${NC} Applying Kubernetes manifests..." + kubectl kustomize deploy/k8s/dev/ | \ + sed "s|image: dexorder/flink:latest|image: dexorder/flink:$FLINK_TAG|g" | \ + sed "s|image: dexorder/relay:latest|image: dexorder/relay:$RELAY_TAG|g" | \ + sed "s|image: dexorder/ingestor:latest|image: dexorder/ingestor:$INGEST_TAG|g" | \ + kubectl apply -f - + + echo -e "${GREEN}✓ Services deployed${NC}" + + echo "" + echo -e "${BLUE}Waiting for deployments to be ready...${NC}" + kubectl wait --for=condition=available --timeout=300s \ + deployment/relay \ + deployment/ingestor \ + deployment/iceberg-catalog \ + deployment/flink-jobmanager \ + deployment/flink-taskmanager \ + 2>/dev/null || echo -e "${YELLOW}(Some deployments not ready yet)${NC}" + + echo "" + echo -e "${GREEN}✓ Dev environment ready!${NC}" + echo "" + echo -e "${BLUE}Access the application:${NC}" + echo -e " Web UI: http://dexorder.local/cryptochimp/" + echo -e " Backend WS: ws://dexorder.local/ws" + echo "" + echo -e "${BLUE}Admin UIs (use port-forward):${NC}" + echo -e " Flink UI: kubectl port-forward svc/flink-jobmanager 8081:8081" + echo -e " Then open http://localhost:8081" + echo -e " MinIO Console: kubectl port-forward svc/minio 9001:9001" + echo -e " Then open http://localhost:9001" + echo "" + echo -e "${YELLOW}Note: Run 'minikube tunnel' in another terminal for dexorder.local ingress to work${NC}" +} + +show_status() { + echo -e "${BLUE}Kubernetes Resources:${NC}" + echo "" + kubectl get pods,svc,ingress +} + +show_logs() { + local service="$1" + if [ -z "$service" ]; then + echo -e "${RED}Error: Please specify a service name${NC}" + echo "Available services: relay, ingestor, flink-jobmanager, flink-taskmanager, kafka, postgres, minio, iceberg-catalog" + exit 1 + fi + + # Try to find pod by label or name + local pod=$(kubectl get pods -l app="$service" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + if [ -z "$pod" ]; then + pod=$(kubectl get pods | grep "$service" | head -n1 | awk '{print $1}') + fi + + if [ -z "$pod" ]; then + echo -e "${RED}Error: No pod found for service '$service'${NC}" + exit 1 + fi + + echo -e "${BLUE}Tailing logs for $pod...${NC}" + kubectl logs -f "$pod" +} + +open_shell() { + local service="$1" + if [ -z "$service" ]; then + echo -e "${RED}Error: Please specify a service name${NC}" + exit 1 + fi + + local pod=$(kubectl get pods -l app="$service" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + if [ -z "$pod" ]; then + pod=$(kubectl get pods | grep "$service" | head -n1 | awk '{print $1}') + fi + + if [ -z "$pod" ]; then + echo -e "${RED}Error: No pod found for service '$service'${NC}" + exit 1 + fi + + echo -e "${BLUE}Opening shell in $pod...${NC}" + kubectl exec -it "$pod" -- /bin/sh || kubectl exec -it "$pod" -- /bin/bash +} + +clean_all() { + echo -e "${RED}⚠️ WARNING: This will delete all resources and volumes!${NC}" + read -p "Are you sure? (yes/no): " confirm + if [[ "$confirm" != "yes" ]]; then + echo "Aborted." + exit 0 + fi + + echo -e "${BLUE}Deleting all resources...${NC}" + kubectl delete -k deploy/k8s/dev/ || true + kubectl delete pvc --all || true + echo -e "${GREEN}✓ Resources deleted${NC}" +} + +start_tunnel() { + echo -e "${BLUE}Starting minikube tunnel...${NC}" + echo -e "${YELLOW}This requires sudo and will run in the foreground.${NC}" + echo -e "${YELLOW}Press Ctrl+C to stop.${NC}" + echo "" + minikube tunnel +} + +# Deploy a single service using kubectl set image with the dev tag (never uses 'latest') +deploy_service() { + local service="$1" + + if [ -f "$ROOT_DIR/.dev-image-tag" ]; then + source "$ROOT_DIR/.dev-image-tag" + fi + + local image + case "$service" in + relay) image="dexorder/relay:$RELAY_TAG" ;; + ingestor) image="dexorder/ingestor:$INGEST_TAG" ;; + flink) image="dexorder/flink:$FLINK_TAG" ;; + *) + echo -e "${RED}Unknown service: $service. Use relay, ingestor, or flink.${NC}" + exit 1 + ;; + esac + + echo -e "${GREEN}→${NC} Deploying $service with image $image..." + case "$service" in + flink) + kubectl set image deployment/flink-jobmanager flink-jobmanager=$image + kubectl set image deployment/flink-taskmanager flink-taskmanager=$image + ;; + *) + kubectl set image deployment/$service $service=$image + ;; + esac + echo -e "${GREEN}✓ $service updated to $image${NC}" +} + +# Main command routing +check_minikube +check_kubectl + +case "$COMMAND" in + start) + start_minikube + rebuild_images + deploy_services + ;; + stop) + echo -e "${BLUE}Stopping minikube...${NC}" + minikube stop + echo -e "${GREEN}✓ Minikube stopped${NC}" + ;; + restart) + if [ -n "$2" ]; then + rebuild_images "$2" + deploy_service "$2" + else + rebuild_images + deploy_services + fi + ;; + rebuild) + rebuild_images "${2:-}" + ;; + deploy) + if [ -n "$2" ]; then + deploy_service "$2" + else + deploy_services + fi + ;; + status) + show_status + ;; + logs) + show_logs "$2" + ;; + shell) + open_shell "$2" + ;; + clean) + clean_all + ;; + tunnel) + start_tunnel + ;; + *) + usage + ;; +esac diff --git a/bin/secret-update b/bin/secret-update new file mode 100755 index 0000000..cdf6d06 --- /dev/null +++ b/bin/secret-update @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +usage() { + echo "Usage: $0 [ENVIRONMENT] [SECRET_NAME]" + echo "" + echo "Update Kubernetes secrets from YAML files" + echo "" + echo "Arguments:" + echo " ENVIRONMENT Target environment: dev or prod (default: dev)" + echo " SECRET_NAME Specific secret to update (optional, updates all if not specified)" + echo "" + echo "Available secrets:" + echo " ai-secrets - AI backend API keys" + echo " postgres-secret - PostgreSQL password" + echo " minio-secret - MinIO credentials" + echo " ingestor-secrets - Exchange API keys" + echo "" + echo "Examples:" + echo " $0 # Update all dev secrets" + echo " $0 dev # Update all dev secrets" + echo " $0 dev ai-secrets # Update only ai-secrets in dev" + echo " $0 prod # Update all prod secrets" + echo " $0 prod minio-secret # Update only minio-secret in prod" + exit 1 +} + +# Parse arguments +ENV="${1:-dev}" +SECRET_NAME="${2:-}" + +if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then + echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}" + usage +fi + +SECRETS_DIR="$ROOT_DIR/deploy/k8s/$ENV/secrets" + +if [ ! -d "$SECRETS_DIR" ]; then + echo -e "${RED}Error: Secrets directory not found: $SECRETS_DIR${NC}" + exit 1 +fi + +# Get kubectl context +if [[ "$ENV" == "prod" ]]; then + CONTEXT=$(kubectl config current-context) + echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION secrets!${NC}" + echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}" + read -p "Are you sure you want to continue? (yes/no): " confirm + if [[ "$confirm" != "yes" ]]; then + echo "Aborted." + exit 0 + fi +fi + +apply_secret() { + local secret_file="$1" + local secret_basename=$(basename "$secret_file" .yaml) + + if [ ! -f "$secret_file" ]; then + echo -e "${RED}✗ Secret file not found: $secret_file${NC}" + echo -e "${YELLOW} Copy from ${secret_basename}.yaml.example and fill in values${NC}" + return 1 + fi + + echo -e "${GREEN}→${NC} Applying $secret_basename..." + kubectl apply -f "$secret_file" + echo -e "${GREEN}✓${NC} $secret_basename updated" +} + +# Update specific secret or all secrets +if [ -n "$SECRET_NAME" ]; then + # Update single secret + SECRET_FILE="$SECRETS_DIR/$SECRET_NAME.yaml" + apply_secret "$SECRET_FILE" +else + # Update all secrets + echo -e "${GREEN}Updating all $ENV secrets...${NC}" + echo "" + + SECRETS=( + "ai-secrets" + "postgres-secret" + "minio-secret" + "ingestor-secrets" + "flink-secrets" + ) + + FAILED=0 + for secret in "${SECRETS[@]}"; do + SECRET_FILE="$SECRETS_DIR/$secret.yaml" + if ! apply_secret "$SECRET_FILE"; then + FAILED=$((FAILED + 1)) + fi + done + + echo "" + if [ $FAILED -gt 0 ]; then + echo -e "${YELLOW}⚠️ $FAILED secret(s) failed to apply${NC}" + echo -e "${YELLOW}Create missing secret files by copying from .example templates:${NC}" + echo -e "${YELLOW} cd $SECRETS_DIR${NC}" + echo -e "${YELLOW} cp SECRET_NAME.yaml.example SECRET_NAME.yaml${NC}" + echo -e "${YELLOW} # Edit SECRET_NAME.yaml with actual values${NC}" + exit 1 + else + echo -e "${GREEN}✓ All secrets updated successfully${NC}" + fi +fi diff --git a/client-py/README.md b/client-py/README.md new file mode 100644 index 0000000..883bdb1 --- /dev/null +++ b/client-py/README.md @@ -0,0 +1,259 @@ +# DexOrder Python Client Library + +High-level Python API for accessing historical OHLC data from the DexOrder trading platform. + +## Features + +- **Smart Caching**: Automatically checks Iceberg warehouse before requesting new data +- **Async Request/Response**: Non-blocking historical data requests via relay +- **Gap Detection**: Identifies and requests only missing data ranges +- **Transparent Access**: Single API for both cached and on-demand data + +## Installation + +```bash +cd redesign/client-py +pip install -e . +``` + +## Quick Start + +```python +import asyncio +from dexorder import OHLCClient + +async def main(): + # Initialize client + client = OHLCClient( + iceberg_catalog_uri="http://iceberg-catalog:8181", + relay_endpoint="tcp://relay:5555", + notification_endpoint="tcp://flink:5557" + ) + + # Start background notification listener + await client.start() + + try: + # Fetch OHLC data (automatically checks cache and requests missing data) + df = await client.fetch_ohlc( + ticker="BINANCE:BTC/USDT", + period_seconds=3600, # 1-hour candles + start_time=1735689600000000, # microseconds + end_time=1736294399000000 + ) + + print(f"Fetched {len(df)} candles") + print(df.head()) + + finally: + await client.stop() + +# Run +asyncio.run(main()) +``` + +## Using Context Manager + +```python +async def main(): + async with OHLCClient(...) as client: + df = await client.fetch_ohlc(...) +``` + +## Architecture + +### Components + +1. **OHLCClient**: High-level API with smart caching +2. **IcebergClient**: Direct queries to Iceberg warehouse +3. **HistoryClient**: Submit requests via relay and wait for notifications + +### Data Flow + +``` +┌─────────┐ +│ Client │ +└────┬────┘ + │ 1. fetch_ohlc() + ▼ +┌─────────────────┐ +│ OHLCClient │ +└────┬────────────┘ + │ 2. Check Iceberg + ▼ +┌─────────────────┐ ┌──────────┐ +│ IcebergClient │─────▶│ Iceberg │ +└─────────────────┘ └──────────┘ + │ 3. Missing data? + ▼ +┌─────────────────┐ ┌──────────┐ +│ HistoryClient │─────▶│ Relay │ +└────┬────────────┘ └──────────┘ + │ │ + │ 4. Wait for notification │ + │◀─────────────────────────┘ + │ 5. Query Iceberg again + ▼ +┌─────────────────┐ +│ Return data │ +└─────────────────┘ +``` + +## API Reference + +### OHLCClient + +#### `__init__(iceberg_catalog_uri, relay_endpoint, notification_endpoint, namespace="trading")` + +Initialize the client with connection parameters. + +#### `async fetch_ohlc(ticker, period_seconds, start_time, end_time, request_timeout=30.0)` + +Fetch OHLC data with smart caching. + +**Parameters:** +- `ticker` (str): Market identifier (e.g., "BINANCE:BTC/USDT") +- `period_seconds` (int): OHLC period in seconds (60, 300, 3600, 86400, etc.) +- `start_time` (int): Start timestamp in microseconds +- `end_time` (int): End timestamp in microseconds +- `request_timeout` (float): Timeout for historical requests in seconds + +**Returns:** `pd.DataFrame` with columns: +- `ticker`: Market identifier +- `period_seconds`: Period in seconds +- `timestamp`: Candle timestamp (microseconds) +- `open`, `high`, `low`, `close`: Prices (integer format) +- `volume`: Trading volume +- Additional fields: `buy_vol`, `sell_vol`, `open_interest`, etc. + +### IcebergClient + +Direct access to Iceberg warehouse. + +#### `query_ohlc(ticker, period_seconds, start_time, end_time)` + +Query OHLC data directly from Iceberg. + +#### `find_missing_ranges(ticker, period_seconds, start_time, end_time)` + +Identify missing data ranges. Returns list of `(start_time, end_time)` tuples. + +#### `has_data(ticker, period_seconds, start_time, end_time)` + +Check if any data exists for the given parameters. + +### HistoryClient + +Low-level client for submitting historical data requests. + +**IMPORTANT**: Always call `connect()` before making requests to prevent race condition. + +#### `async connect()` + +Connect to relay and start notification listener. **MUST be called before making any requests.** + +This subscribes to the notification topic `RESPONSE:{client_id}` BEFORE any requests are sent, +preventing the race condition where notifications arrive before subscription. + +#### `async request_historical_ohlc(ticker, period_seconds, start_time, end_time, timeout=30.0, limit=None)` + +Submit historical data request and wait for completion notification. + +**Returns:** dict with keys: +- `request_id`: The request ID +- `status`: 'OK', 'NOT_FOUND', or 'ERROR' +- `error_message`: Error message if status is 'ERROR' +- `iceberg_namespace`, `iceberg_table`, `row_count`: Available when status is 'OK' + +**Example:** +```python +from dexorder import HistoryClient + +client = HistoryClient( + relay_endpoint="tcp://relay:5559", + notification_endpoint="tcp://relay:5558" +) + +# CRITICAL: Connect first to prevent race condition +await client.connect() + +# Now safe to make requests +result = await client.request_historical_ohlc( + ticker="BINANCE:BTC/USDT", + period_seconds=3600, + start_time=1735689600000000, + end_time=1736294399000000 +) + +await client.close() +``` + +## Configuration + +The client requires the following endpoints: + +- **Iceberg Catalog URI**: REST API endpoint for Iceberg metadata (default: `http://iceberg-catalog:8181`) +- **Relay Endpoint**: ZMQ REQ/REP endpoint for submitting requests (default: `tcp://relay:5555`) +- **Notification Endpoint**: ZMQ PUB/SUB endpoint for receiving notifications (default: `tcp://flink:5557`) + +## Development + +### Generate Protobuf Files + +```bash +cd redesign/protobuf +protoc -I . --python_out=../client-py/dexorder ingestor.proto ohlc.proto +``` + +### Run Tests + +```bash +pytest tests/ +``` + +## Examples + +See `../relay/test/async_client.py` for a complete example. + +## Timestamp Format + +All timestamps are in **microseconds since epoch**: + +```python +# Convert from datetime +from datetime import datetime, timezone + +dt = datetime(2024, 1, 1, tzinfo=timezone.utc) +timestamp_micros = int(dt.timestamp() * 1_000_000) + +# Convert to datetime +dt = datetime.fromtimestamp(timestamp_micros / 1_000_000, tz=timezone.utc) +``` + +## Period Seconds + +Common period values: +- `60` - 1 minute +- `300` - 5 minutes +- `900` - 15 minutes +- `3600` - 1 hour +- `14400` - 4 hours +- `86400` - 1 day +- `604800` - 1 week + +## Error Handling + +```python +try: + df = await client.fetch_ohlc(...) +except TimeoutError: + print("Request timed out") +except ValueError as e: + print(f"Request failed: {e}") +except ConnectionError: + print("Unable to connect to relay") +``` + +## License + +Internal use only. diff --git a/client-py/__init__.py b/client-py/__init__.py new file mode 100644 index 0000000..3988bf1 --- /dev/null +++ b/client-py/__init__.py @@ -0,0 +1,3 @@ +import logging + +log = logging.getLogger(__name__) diff --git a/client-py/dexorder/__init__.py b/client-py/dexorder/__init__.py new file mode 100644 index 0000000..ad3524d --- /dev/null +++ b/client-py/dexorder/__init__.py @@ -0,0 +1,16 @@ +""" +DexOrder Trading Platform Python Client + +Provides high-level APIs for: +- Historical OHLC data retrieval with smart caching +- Async request/response via relay +- Iceberg data warehouse queries +""" + +__version__ = "0.1.0" + +from .ohlc_client import OHLCClient +from .iceberg_client import IcebergClient +from .history_client import HistoryClient + +__all__ = ['OHLCClient', 'IcebergClient', 'HistoryClient'] diff --git a/client-py/dexorder/history_client.py b/client-py/dexorder/history_client.py new file mode 100644 index 0000000..5c30ba6 --- /dev/null +++ b/client-py/dexorder/history_client.py @@ -0,0 +1,296 @@ +""" +HistoryClient - Submit historical data requests via relay and wait for notifications + +RACE CONDITION PREVENTION: +The client must subscribe to notification topics BEFORE submitting requests. +Notification topics are deterministic: RESPONSE:{client_id} or HISTORY_READY:{request_id} +Since both are client-generated, we can subscribe before sending the request. +""" + +import asyncio +import uuid +import zmq +import zmq.asyncio +from typing import Optional +import struct +import sys +import os + +# Import protobuf messages (assuming they're generated in ../protobuf) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../protobuf')) +try: + from ingestor_pb2 import SubmitHistoricalRequest, SubmitResponse, HistoryReadyNotification +except ImportError: + print("Warning: Protobuf files not found. Run: protoc -I ../protobuf --python_out=../protobuf ../protobuf/*.proto") + raise + + +class HistoryClient: + """ + Client for submitting historical data requests via relay. + + IMPORTANT: Call connect() before making any requests. This ensures the notification + listener is running and subscribed BEFORE any requests are submitted, preventing + the race condition where notifications arrive before subscription. + + Provides: + - Submit historical OHLC data requests + - Wait for completion notifications + - Handle request timeouts and errors + """ + + def __init__(self, relay_endpoint: str, notification_endpoint: str, client_id: Optional[str] = None): + """ + Initialize history client. + + Args: + relay_endpoint: ZMQ endpoint for relay client requests (e.g., "tcp://relay:5559") + notification_endpoint: ZMQ endpoint for notifications (e.g., "tcp://relay:5558") + client_id: Optional client ID for notification routing. If not provided, generates one. + All notifications for this client will be sent to topic RESPONSE:{client_id} + """ + self.relay_endpoint = relay_endpoint + self.notification_endpoint = notification_endpoint + self.client_id = client_id or f"client-{uuid.uuid4().hex[:8]}" + self.context = zmq.asyncio.Context() + self.pending_requests = {} # request_id -> asyncio.Event + self.notification_task = None + self.connected = False + + async def connect(self): + """ + Connect to relay and start notification listener. + + CRITICAL: This MUST be called before making any requests to prevent race condition. + The notification listener subscribes to the deterministic topic RESPONSE:{client_id} + BEFORE any requests are sent, ensuring we never miss notifications. + """ + if self.connected: + return + + # Start notification listener FIRST + self.notification_task = asyncio.create_task(self._notification_listener()) + + # Give the listener a moment to connect and subscribe + await asyncio.sleep(0.1) + + self.connected = True + + async def request_historical_ohlc( + self, + ticker: str, + period_seconds: int, + start_time: int, + end_time: int, + timeout: float = 30.0, + limit: Optional[int] = None + ) -> dict: + """ + Request historical OHLC data and wait for completion notification. + + IMPORTANT: Call connect() before using this method. + + Args: + ticker: Market identifier (e.g., "BINANCE:BTC/USDT") + period_seconds: OHLC period in seconds + start_time: Start timestamp in microseconds + end_time: End timestamp in microseconds + timeout: Request timeout in seconds (default: 30) + limit: Optional limit on number of candles + + Returns: + dict with keys: + - request_id: The request ID + - status: 'OK', 'NOT_FOUND', or 'ERROR' + - error_message: Error message if status is 'ERROR' + - iceberg_namespace: Iceberg namespace (if status is 'OK') + - iceberg_table: Iceberg table name (if status is 'OK') + - row_count: Number of rows written (if status is 'OK') + + Raises: + TimeoutError: If request times out + ConnectionError: If unable to connect to relay or not connected + """ + if not self.connected: + raise ConnectionError("Client not connected. Call connect() first to prevent race condition.") + + request_id = str(uuid.uuid4()) + + # Register the pending request BEFORE sending to eliminate any race condition. + # The notification topic is deterministic (RESPONSE:{client_id}) and the listener + # is already subscribed, so we just need pending_requests populated before Flink + # could possibly publish the notification. + event = asyncio.Event() + self.pending_requests[request_id] = { + 'event': event, + 'result': None + } + + try: + # Create protobuf request with client_id for notification routing + request = SubmitHistoricalRequest( + request_id=request_id, + ticker=ticker, + period_seconds=period_seconds, + start_time=start_time, + end_time=end_time, + client_id=self.client_id # CRITICAL: Enables deterministic notification topic + ) + + if limit is not None: + request.limit = limit + + # Encode with ZMQ envelope: version (1 byte) + message type (1 byte) + protobuf payload + MESSAGE_TYPE_SUBMIT_HISTORICAL = 0x10 + version_frame = struct.pack('B', 0x01) + message_frame = struct.pack('B', MESSAGE_TYPE_SUBMIT_HISTORICAL) + request.SerializeToString() + + # Send request to relay + socket = self.context.socket(zmq.REQ) + socket.connect(self.relay_endpoint) + + try: + # Send two frames: version, then message + await socket.send(version_frame, zmq.SNDMORE) + await socket.send(message_frame) + + # Wait for immediate response + response_frames = [] + while True: + frame = await asyncio.wait_for(socket.recv(), timeout=5.0) + response_frames.append(frame) + if not socket.get(zmq.RCVMORE): + break + + # Parse response (expect 2 frames: version, message) + if len(response_frames) < 2: + raise ConnectionError(f"Expected 2 frames, got {len(response_frames)}") + + msg_type = response_frames[1][0] + response_payload = response_frames[1][1:] + + response = SubmitResponse() + response.ParseFromString(response_payload) + + if response.status != 0: + raise ConnectionError(f"Request failed: {response.error_message}") + + finally: + socket.close() + + # Wait for Flink notification with timeout + try: + await asyncio.wait_for(event.wait(), timeout=timeout) + result = self.pending_requests[request_id]['result'] + return result + except asyncio.TimeoutError: + raise TimeoutError(f"Request {request_id} timed out after {timeout}s") + + finally: + self.pending_requests.pop(request_id, None) + + async def _notification_listener(self): + """ + Internal notification listener that subscribes to RESPONSE:{client_id} topic. + + CRITICAL: This runs BEFORE any requests are submitted to prevent race condition. + The notification topic is deterministic based on our client_id. + """ + socket = self.context.socket(zmq.SUB) + socket.connect(self.notification_endpoint) + + # Subscribe to our client-specific topic + # CRITICAL: This topic is deterministic (RESPONSE:{client_id}) and we know it + # before sending any requests, so we can subscribe first to prevent race condition + notification_topic = f"RESPONSE:{self.client_id}" + socket.setsockopt_string(zmq.SUBSCRIBE, notification_topic) + + try: + while True: + # Receive multi-frame message: [topic][version][message] + frames = [] + while True: + frame = await socket.recv() + frames.append(frame) + if not socket.get(zmq.RCVMORE): + break + + # Parse frames + if len(frames) < 3: + continue + + topic_frame = frames[0] + version_frame = frames[1] + message_frame = frames[2] + + # Validate version + if len(version_frame) != 1 or version_frame[0] != 0x01: + continue + + # Validate message type + if len(message_frame) < 1: + continue + + msg_type = message_frame[0] + payload = message_frame[1:] + + MESSAGE_TYPE_HISTORY_READY = 0x12 + if msg_type != MESSAGE_TYPE_HISTORY_READY: + continue + + # Parse notification (protobuf) + try: + notification = HistoryReadyNotification() + notification.ParseFromString(payload) + except Exception as e: + print(f"Warning: failed to parse notification payload: {e}") + continue + + request_id = notification.request_id + + # Check if we're waiting for this request + if request_id in self.pending_requests: + # Map protobuf enum to string status + # NotificationStatus: OK=0, NOT_FOUND=1, ERROR=2, TIMEOUT=3 + status_map = {0: 'OK', 1: 'NOT_FOUND', 2: 'ERROR', 3: 'TIMEOUT'} + status = status_map.get(notification.status, 'ERROR') + + result = { + 'request_id': request_id, + 'status': status, + 'error_message': notification.error_message if notification.error_message else None + } + + # Add Iceberg details if available + if status == 'OK': + result.update({ + 'iceberg_namespace': notification.iceberg_namespace, + 'iceberg_table': notification.iceberg_table, + 'row_count': notification.row_count, + 'ticker': notification.ticker, + 'period_seconds': notification.period_seconds, + 'start_time': notification.start_time, + 'end_time': notification.end_time, + }) + + self.pending_requests[request_id]['result'] = result + self.pending_requests[request_id]['event'].set() + + except asyncio.CancelledError: + pass + finally: + socket.close() + + async def close(self): + """ + Close the client and cleanup resources. + """ + if self.notification_task: + self.notification_task.cancel() + try: + await self.notification_task + except asyncio.CancelledError: + pass + + self.context.term() + self.connected = False diff --git a/client-py/dexorder/iceberg_client.py b/client-py/dexorder/iceberg_client.py new file mode 100644 index 0000000..e86f79c --- /dev/null +++ b/client-py/dexorder/iceberg_client.py @@ -0,0 +1,179 @@ +""" +IcebergClient - Query OHLC data from Iceberg warehouse (Iceberg 1.10.1) +""" + +from typing import Optional, List, Tuple +import pandas as pd +from pyiceberg.catalog import load_catalog +from pyiceberg.expressions import ( + And, + EqualTo, + GreaterThanOrEqual, + LessThanOrEqual +) + + +class IcebergClient: + """ + Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1). + + Note: Iceberg 1.x does not enforce primary keys at the table level. + Deduplication is handled by: + - Flink upsert mode with equality delete files + - PyIceberg automatically filters deleted rows during queries + - Last-write-wins semantics for duplicates + + Provides: + - Query OHLC data by ticker, period, and time range + - Identify missing data gaps + - Efficient partition pruning for large datasets + """ + + def __init__( + self, + catalog_uri: str, + namespace: str = "trading", + s3_endpoint: Optional[str] = None, + s3_access_key: Optional[str] = None, + s3_secret_key: Optional[str] = None, + ): + """ + Initialize Iceberg client. + + Args: + catalog_uri: URI of the Iceberg catalog (e.g., "http://iceberg-catalog:8181") + namespace: Iceberg namespace (default: "trading") + s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000") + s3_access_key: S3/MinIO access key + s3_secret_key: S3/MinIO secret key + """ + self.catalog_uri = catalog_uri + self.namespace = namespace + + catalog_props = {"uri": catalog_uri} + if s3_endpoint: + catalog_props["s3.endpoint"] = s3_endpoint + catalog_props["s3.path-style-access"] = "true" + if s3_access_key: + catalog_props["s3.access-key-id"] = s3_access_key + if s3_secret_key: + catalog_props["s3.secret-access-key"] = s3_secret_key + + self.catalog = load_catalog("trading", **catalog_props) + self.table = self.catalog.load_table(f"{namespace}.ohlc") + + def query_ohlc( + self, + ticker: str, + period_seconds: int, + start_time: int, + end_time: int + ) -> pd.DataFrame: + """ + Query OHLC data for a specific ticker, period, and time range. + + Args: + ticker: Market identifier (e.g., "BINANCE:BTC/USDT") + period_seconds: OHLC period in seconds (60, 300, 3600, etc.) + start_time: Start timestamp in microseconds + end_time: End timestamp in microseconds + + Returns: + DataFrame with OHLC data sorted by timestamp + """ + # Reload table metadata to pick up snapshots committed after this client was initialized + self.table = self.catalog.load_table(f"{self.namespace}.ohlc") + + df = self.table.scan( + row_filter=And( + EqualTo("ticker", ticker), + EqualTo("period_seconds", period_seconds), + GreaterThanOrEqual("timestamp", start_time), + LessThanOrEqual("timestamp", end_time) + ) + ).to_pandas() + + if not df.empty: + df = df.sort_values("timestamp") + + return df + + def find_missing_ranges( + self, + ticker: str, + period_seconds: int, + start_time: int, + end_time: int + ) -> List[Tuple[int, int]]: + """ + Identify missing data ranges in the requested time period. + + Returns list of (start, end) tuples for missing ranges. + Expected candles are calculated based on period_seconds. + + Args: + ticker: Market identifier + period_seconds: OHLC period in seconds + start_time: Start timestamp in microseconds + end_time: End timestamp in microseconds + + Returns: + List of (start_time, end_time) tuples for missing ranges + """ + df = self.query_ohlc(ticker, period_seconds, start_time, end_time) + + if df.empty: + # All data is missing + return [(start_time, end_time)] + + # Convert period to microseconds + period_micros = period_seconds * 1_000_000 + + # Generate expected timestamps + expected_timestamps = list(range(start_time, end_time + 1, period_micros)) + actual_timestamps = set(df['timestamp'].values) + + # Find gaps + missing = sorted(set(expected_timestamps) - actual_timestamps) + + if not missing: + return [] + + # Consolidate consecutive missing timestamps into ranges + ranges = [] + range_start = missing[0] + prev_ts = missing[0] + + for ts in missing[1:]: + if ts > prev_ts + period_micros: + # Gap in missing data - close previous range + ranges.append((range_start, prev_ts)) + range_start = ts + prev_ts = ts + + # Close final range + ranges.append((range_start, prev_ts)) + + return ranges + + def has_data( + self, + ticker: str, + period_seconds: int, + start_time: int, + end_time: int + ) -> bool: + """ + Check if any data exists for the given parameters. + + Args: + ticker: Market identifier + period_seconds: OHLC period in seconds + start_time: Start timestamp in microseconds + end_time: End timestamp in microseconds + + Returns: + True if at least one candle exists, False otherwise + """ + df = self.query_ohlc(ticker, period_seconds, start_time, end_time) + return not df.empty diff --git a/client-py/dexorder/ohlc_client.py b/client-py/dexorder/ohlc_client.py new file mode 100644 index 0000000..ff9c024 --- /dev/null +++ b/client-py/dexorder/ohlc_client.py @@ -0,0 +1,142 @@ +""" +OHLCClient - High-level API for fetching OHLC data with smart caching +""" + +import asyncio +import pandas as pd +from typing import Optional +from .iceberg_client import IcebergClient +from .history_client import HistoryClient + + +class OHLCClient: + """ + High-level client for fetching OHLC data. + + Workflow: + 1. Check Iceberg for existing data + 2. Identify missing ranges + 3. Request missing data via relay + 4. Wait for notification + 5. Query Iceberg for complete dataset + 6. Return combined results + + This provides transparent caching - clients don't need to know + whether data came from cache or was fetched on-demand. + """ + + def __init__( + self, + iceberg_catalog_uri: str, + relay_endpoint: str, + notification_endpoint: str, + namespace: str = "trading", + s3_endpoint: str = None, + s3_access_key: str = None, + s3_secret_key: str = None, + ): + """ + Initialize OHLC client. + + Args: + iceberg_catalog_uri: URI of Iceberg catalog + relay_endpoint: ZMQ endpoint for relay requests + notification_endpoint: ZMQ endpoint for notifications + namespace: Iceberg namespace (default: "trading") + s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000") + s3_access_key: S3/MinIO access key + s3_secret_key: S3/MinIO secret key + """ + self.iceberg = IcebergClient( + iceberg_catalog_uri, namespace, + s3_endpoint=s3_endpoint, + s3_access_key=s3_access_key, + s3_secret_key=s3_secret_key, + ) + self.history = HistoryClient(relay_endpoint, notification_endpoint) + + async def start(self): + """ + Start the client. Must be called before making requests. + Starts background notification listener. + """ + await self.history.connect() + + async def stop(self): + """ + Stop the client and cleanup resources. + """ + await self.history.close() + + async def fetch_ohlc( + self, + ticker: str, + period_seconds: int, + start_time: int, + end_time: int, + request_timeout: float = 30.0 + ) -> pd.DataFrame: + """ + Fetch OHLC data with smart caching. + + Steps: + 1. Query Iceberg for existing data + 2. If complete, return immediately + 3. If missing data, request via relay + 4. Wait for completion notification + 5. Query Iceberg again for complete dataset + 6. Return results + + Args: + ticker: Market identifier (e.g., "BINANCE:BTC/USDT") + period_seconds: OHLC period in seconds (60, 300, 3600, etc.) + start_time: Start timestamp in microseconds + end_time: End timestamp in microseconds + request_timeout: Timeout for historical data requests (default: 30s) + + Returns: + DataFrame with OHLC data sorted by timestamp + + Raises: + TimeoutError: If historical data request times out + ValueError: If request fails + """ + # Step 1: Check Iceberg for existing data + df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time) + + # Step 2: Identify missing ranges + missing_ranges = self.iceberg.find_missing_ranges( + ticker, period_seconds, start_time, end_time + ) + + if not missing_ranges: + # All data exists in Iceberg + return df + + # Step 3: Request missing data for each range + # For simplicity, request entire range (relay can merge adjacent requests) + result = await self.history.request_historical_ohlc( + ticker=ticker, + period_seconds=period_seconds, + start_time=start_time, + end_time=end_time, + timeout=request_timeout + ) + + # Step 4: Check result status + if result['status'] == 'ERROR': + raise ValueError(f"Historical data request failed: {result['error_message']}") + + # Step 5: Query Iceberg again for complete dataset + df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time) + + return df + + async def __aenter__(self): + """Support async context manager.""" + await self.start() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Support async context manager.""" + await self.stop() diff --git a/client-py/setup.py b/client-py/setup.py new file mode 100644 index 0000000..e3f287d --- /dev/null +++ b/client-py/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup, find_packages + +setup( + name="dexorder-client", + version="0.1.0", + description="DexOrder Trading Platform Python Client", + packages=find_packages(), + python_requires=">=3.9", + install_requires=[ + "pyiceberg>=0.6.0", + "pyarrow>=14.0.0", + "pandas>=2.0.0", + "zmq>=0.0.0", + "protobuf>=4.25.0", + "pyyaml>=6.0", + ], + extras_require={ + "dev": [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", + ] + }, +) diff --git a/deploy/Dockerfile-alpine-backend b/deploy/Dockerfile-alpine-backend deleted file mode 100644 index 02835a9..0000000 --- a/deploy/Dockerfile-alpine-backend +++ /dev/null @@ -1,38 +0,0 @@ -FROM python:3.14-alpine - -# Set working directory -WORKDIR /app - -# Copy requirements first for better caching -COPY backend/requirements.txt /app/requirements.txt - -# Install TA-Lib C library and build dependencies, then install Python dependencies and clean up -RUN apk add --no-cache --virtual .build-deps \ - gcc \ - g++ \ - make \ - musl-dev \ - wget \ - tar \ - cargo \ - rust \ - && wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz \ - && tar -xzf ta-lib-0.4.0-src.tar.gz \ - && cd ta-lib/ \ - && ./configure --prefix=/usr \ - && make \ - && make install \ - && cd .. \ - && rm -rf ta-lib ta-lib-0.4.0-src.tar.gz \ - && pip install --no-cache-dir -r requirements.txt \ - && apk del .build-deps \ - && rm -rf /var/cache/apk/* /root/.cache /root/.cargo /root/.rustup - -# Copy application code -COPY backend/src /app/src - -# Expose port -EXPOSE 8000 - -# Run the application -CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/deploy/Dockerfile-backend b/deploy/Dockerfile-backend deleted file mode 100644 index 7f1f720..0000000 --- a/deploy/Dockerfile-backend +++ /dev/null @@ -1,65 +0,0 @@ -FROM python:3.12-slim - -ARG CONFIG=production - -# Install TA-Lib C library early for better layer caching -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - gcc \ - g++ \ - make \ - wget \ - ca-certificates \ - && wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz \ - && tar -xzf ta-lib-0.4.0-src.tar.gz \ - && cd ta-lib/ \ - && ./configure --prefix=/usr \ - && make \ - && make install \ - && cd .. \ - && rm -rf ta-lib ta-lib-0.4.0-src.tar.gz \ - && apt-get purge -y --auto-remove gcc g++ make wget \ - && rm -rf /var/lib/apt/lists/* - -# Install Python build dependencies early for better layer caching -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - gcc \ - g++ \ - cargo \ - rustc - -# Install compiled packages - separate layer so requirements.txt changes don't trigger recompilation -COPY backend/requirements-pre.txt /app/requirements-pre.txt -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cargo \ - pip install --no-cache-dir -r /app/requirements-pre.txt \ - && apt-get purge -y --auto-remove gcc g++ cargo rustc \ - && rm -rf /var/lib/apt/lists/* /root/.rustup /tmp/* - -# Set working directory -WORKDIR /app - -# Copy and install remaining requirements -COPY backend/requirements.txt /app/requirements.txt - -# Install Python dependencies and clean up -RUN pip install --no-cache-dir -r requirements.txt - -# Copy application code -COPY backend/src /app/src -COPY backend/config*.yaml /tmp/ -RUN if [ -f /tmp/config-${CONFIG}.yaml ]; then \ - cp /tmp/config-${CONFIG}.yaml /app/config.yaml; \ - else \ - cp /tmp/config.yaml /app/config.yaml; \ - fi && rm -rf /tmp/config*.yaml - -# Add src to PYTHONPATH for correct module resolution -ENV PYTHONPATH=/app/src - -# Expose port -EXPOSE 8000 - -# Run the application -CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/deploy/ingress.yaml b/deploy/ingress.yaml deleted file mode 100644 index 888b7db..0000000 --- a/deploy/ingress.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: ai-ingress - annotations: - cert-manager.io/cluster-issuer: "letsencrypt-prod" -spec: - ingressClassName: nginx - tls: - - hosts: - - dexorder.ai - secretName: dexorder-ai-tls - rules: - - host: dexorder.ai - http: - paths: - - path: /charting_library - pathType: Prefix - backend: - service: - name: ai-web - port: - number: 5173 - - path: /cryptochimp - pathType: Prefix - backend: - service: - name: ai-web - port: - number: 5173 - - path: /ws - pathType: Prefix - backend: - service: - name: ai-backend - port: - number: 8000 diff --git a/deploy/k8s/README.md b/deploy/k8s/README.md new file mode 100644 index 0000000..e9b267a --- /dev/null +++ b/deploy/k8s/README.md @@ -0,0 +1,287 @@ +# Kubernetes Deployment + +This directory contains Kubernetes manifests using [Kustomize](https://kustomize.io/) for managing dev and production environments. + +## Structure + +``` +deploy/k8s/ +├── base/ # Base manifests (shared) +│ ├── backend.yaml +│ ├── web.yaml +│ ├── ingress.yaml +│ ├── init.yaml +│ └── kustomization.yaml +├── dev/ # Dev overlay (minikube) +│ ├── infrastructure.yaml # Kafka, Postgres, MinIO, Flink, Relay, Ingestor +│ ├── ingress-dev.yaml # Dev ingress (dexorder.local) +│ ├── patches.yaml # Dev-specific patches +│ ├── kustomization.yaml +│ └── secrets/ +│ ├── *.yaml # Actual secrets (gitignored) +│ └── *.yaml.example # Templates +├── prod/ # Production overlay +│ ├── patches.yaml # Prod patches (replicas, resources, gVisor) +│ ├── kustomization.yaml +│ └── secrets/ +│ ├── *.yaml # Actual secrets (gitignored) +│ └── *.yaml.example # Templates +└── configmaps/ # Shared ConfigMaps + ├── relay-config.yaml + ├── ingestor-config.yaml + └── flink-config.yaml +``` + +## Dev Environment (Minikube) + +### Prerequisites + +- [minikube](https://minikube.sigs.k8s.io/docs/start/) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- Docker + +### Quick Start + +```bash +# Start everything +bin/dev start + +# Access the application +# Web UI: http://dexorder.local/cryptochimp/ +# Backend: ws://dexorder.local/ws + +# In another terminal, start tunnel for ingress +bin/dev tunnel +``` + +### Managing Dev Environment + +```bash +# Rebuild images after code changes +bin/dev rebuild + +# Redeploy services +bin/dev deploy + +# Full restart (rebuild + redeploy) +bin/dev restart + +# View status +bin/dev status + +# View logs +bin/dev logs relay +bin/dev logs ingestor +bin/dev logs flink-jobmanager + +# Open shell in pod +bin/dev shell relay + +# Clean everything +bin/dev clean + +# Stop minikube +bin/dev stop +``` + +### Setting Up Secrets (Dev) + +```bash +# Copy example secrets +cd deploy/k8s/dev/secrets/ +cp ai-secrets.yaml.example ai-secrets.yaml +cp postgres-secret.yaml.example postgres-secret.yaml +cp minio-secret.yaml.example minio-secret.yaml +cp ingestor-secrets.yaml.example ingestor-secrets.yaml + +# Edit with actual values +vim ai-secrets.yaml # Add your Anthropic API key + +# Apply to cluster +bin/secret-update dev + +# Or update a specific secret +bin/secret-update dev ai-secrets +``` + +### Updating Configs (Dev) + +```bash +# Edit config files +vim deploy/configmaps/relay-config.yaml + +# Apply changes +bin/config-update dev + +# Or update specific config +bin/config-update dev relay-config +``` + +### Dev vs Docker Compose + +The minikube dev environment mirrors production more closely than docker-compose: + +| Feature | docker-compose | minikube | +|---------|---------------|----------| +| Environment parity | ❌ Different from prod | ✅ Same as prod | +| Secrets management | `.env` files | K8s Secrets | +| Configuration | Volume mounts | ConfigMaps | +| Service discovery | DNS by service name | K8s Services | +| Ingress/routing | Port mapping | nginx-ingress | +| Resource limits | Limited support | Full K8s resources | +| Init containers | No | Yes | +| Readiness probes | No | Yes | + +## Production Environment + +### Prerequisites + +- Access to production Kubernetes cluster +- `kubectl` configured with production context +- Production secrets prepared + +### Setting Up Secrets (Prod) + +```bash +# Copy example secrets +cd deploy/k8s/prod/secrets/ +cp ai-secrets.yaml.example ai-secrets.yaml +cp postgres-secret.yaml.example postgres-secret.yaml +# ... etc + +# Edit with production values +vim ai-secrets.yaml + +# Apply to cluster (will prompt for confirmation) +bin/secret-update prod + +# Or update specific secret +bin/secret-update prod ai-secrets +``` + +### Updating Configs (Prod) + +```bash +# Edit production configs if needed +vim deploy/configmaps/relay-config.yaml + +# Apply changes (will prompt for confirmation) +bin/config-update prod +``` + +### Deploying to Production + +```bash +# Verify kubectl context +kubectl config current-context + +# Apply manifests +kubectl apply -k deploy/k8s/prod/ + +# Check rollout status +kubectl rollout status statefulset/ai-backend +kubectl rollout status deployment/ai-web + +# View status +kubectl get pods,svc,ingress +``` + +## Kustomize Overlays + +### Dev Overlay + +- **imagePullPolicy: Never** - Uses locally built images +- **Infrastructure services** - Kafka, Postgres, MinIO, Flink, Relay, Ingestor +- **Local ingress** - `dexorder.local` (requires `/etc/hosts` entry) +- **No gVisor** - RuntimeClass removed (not available in minikube) +- **Single replicas** - Minimal resource usage + +### Prod Overlay + +- **imagePullPolicy: Always** - Pulls from registry +- **Multiple replicas** - HA configuration +- **Resource limits** - CPU/memory constraints +- **gVisor** - Security sandbox via RuntimeClass +- **Production ingress** - `dexorder.ai` with TLS + +## Infrastructure Services (Dev Only) + +These services are included in the dev environment but are expected to be managed separately in production: + +- **Kafka** - KRaft mode (no Zookeeper), single broker +- **PostgreSQL** - Iceberg catalog metadata +- **MinIO** - S3-compatible object storage +- **Iceberg REST Catalog** - Table metadata +- **Flink** - JobManager + TaskManager +- **Relay** - ZMQ message router +- **Ingestor** - CCXT data fetcher + +In production, you would typically use: +- Managed Kafka (Confluent Cloud, MSK, etc.) +- Managed PostgreSQL (RDS, Cloud SQL, etc.) +- Object storage (S3, GCS, Azure Blob) +- Flink Kubernetes Operator or managed Flink + +## Troubleshooting + +### Minikube not starting + +```bash +minikube delete +minikube start --cpus=6 --memory=12g --driver=docker +``` + +### Images not found + +Make sure you're using minikube's docker daemon: + +```bash +eval $(minikube docker-env) +bin/dev rebuild +``` + +### Ingress not working + +Start minikube tunnel in another terminal: + +```bash +bin/dev tunnel +``` + +### Secrets not found + +Create secrets from examples: + +```bash +cd deploy/k8s/dev/secrets/ +cp *.example *.yaml +vim ai-secrets.yaml # Edit with actual values +bin/secret-update dev +``` + +### Pods not starting + +Check events and logs: + +```bash +kubectl get events --sort-by=.metadata.creationTimestamp +kubectl describe pod +kubectl logs +``` + +## CI/CD Integration + +For automated deployments, you can use: + +```bash +# Build and push images +docker build -t registry.example.com/dexorder/ai-web:$TAG . +docker push registry.example.com/dexorder/ai-web:$TAG + +# Update kustomization with new tag +cd deploy/k8s/prod +kustomize edit set image dexorder/ai-web=registry.example.com/dexorder/ai-web:$TAG + +# Deploy +kubectl apply -k deploy/k8s/prod/ +``` diff --git a/deploy/backend.yaml b/deploy/k8s/base/backend.yaml similarity index 100% rename from deploy/backend.yaml rename to deploy/k8s/base/backend.yaml diff --git a/deploy/k8s/base/ingress.yaml b/deploy/k8s/base/ingress.yaml new file mode 100644 index 0000000..9004040 --- /dev/null +++ b/deploy/k8s/base/ingress.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ai-ingress + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" +spec: + ingressClassName: nginx + tls: + - hosts: + - dexorder.ai + secretName: dexorder-ai-tls + rules: + - host: dexorder.ai + http: + paths: [] diff --git a/deploy/k8s/base/init.yaml b/deploy/k8s/base/init.yaml new file mode 100644 index 0000000..54d5370 --- /dev/null +++ b/deploy/k8s/base/init.yaml @@ -0,0 +1,9 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: gvisor +handler: gvisor +overhead: + podFixed: + memory: "64Mi" + cpu: "0m" diff --git a/deploy/k8s/base/kustomization.yaml b/deploy/k8s/base/kustomization.yaml new file mode 100644 index 0000000..bae21bc --- /dev/null +++ b/deploy/k8s/base/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: [] + # ingress.yaml - removed until we have services to expose diff --git a/deploy/web.yaml b/deploy/k8s/base/web.yaml similarity index 100% rename from deploy/web.yaml rename to deploy/k8s/base/web.yaml diff --git a/deploy/k8s/dev/configs/flink-config.yaml b/deploy/k8s/dev/configs/flink-config.yaml new file mode 100644 index 0000000..458f318 --- /dev/null +++ b/deploy/k8s/dev/configs/flink-config.yaml @@ -0,0 +1,40 @@ +# Flink Job Configuration + +# ZeroMQ bind address and ports +zmq_bind_address: "tcp://*" +zmq_ingestor_work_queue_port: 5555 +zmq_ingestor_response_port: 5556 +zmq_ingestor_control_port: 5557 +zmq_market_data_pub_port: 5558 +zmq_client_request_port: 5559 +zmq_cep_webhook_port: 5560 + +# Notification endpoints +# Task managers PUSH to job manager PULL socket at this address +notification_publish_endpoint: "tcp://flink-jobmanager:5561" +# Job manager binds PULL socket on this port to receive from task managers +notification_pull_port: 5561 + +# Kafka configuration +kafka_bootstrap_servers: "kafka:9092" +kafka_tick_topic: "market-tick" +kafka_ohlc_topic: "market-ohlc" +kafka_topics_file: "/topics-dev.yaml" # Use topics-dev.yaml for single broker dev environment + +# Iceberg catalog +iceberg_catalog_uri: "http://iceberg-catalog:8181" +iceberg_warehouse: "s3://trading-warehouse/" +iceberg_namespace: "trading" +iceberg_table_prefix: "market" +hadoop_conf_dir: "/etc/hadoop/conf" + +# Flink configuration +flink_parallelism: 1 +flink_checkpoint_interval_ms: 60000 + +# Flink memory configuration (required) +jobmanager.memory.process.size: 1600m +taskmanager.memory.process.size: 1728m +taskmanager.numberOfTaskSlots: 2 +jobmanager.rpc.address: flink-jobmanager +jobmanager.rpc.port: 6123 diff --git a/deploy/k8s/dev/configs/ingestor-config.yaml b/deploy/k8s/dev/configs/ingestor-config.yaml new file mode 100644 index 0000000..7828d10 --- /dev/null +++ b/deploy/k8s/dev/configs/ingestor-config.yaml @@ -0,0 +1,24 @@ +# CCXT Ingestor Configuration + +# Relay ZMQ endpoints (relay is the well-known gateway) +flink_hostname: relay +ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix +# Note: No response port needed - async architecture via Kafka! + +# Supported exchanges (subscribe to these prefixes) +supported_exchanges: + - BINANCE + - COINBASE + - KRAKEN + +# Kafka configuration +kafka_brokers: + - kafka:9092 +kafka_topic: market-ohlc + +# Worker configuration +max_concurrent: 10 +poll_interval_ms: 10000 + +# Logging +log_level: info diff --git a/deploy/k8s/dev/configs/relay-config.yaml b/deploy/k8s/dev/configs/relay-config.yaml new file mode 100644 index 0000000..6d8eb93 --- /dev/null +++ b/deploy/k8s/dev/configs/relay-config.yaml @@ -0,0 +1,19 @@ +# ZMQ Relay Configuration + +# Bind address for all relay sockets +bind_address: "tcp://*" + +# Client-facing ports +client_request_port: 5559 # ROUTER - Client historical data requests +market_data_pub_port: 5558 # XPUB - Market data fanout to clients + +# Ingestor-facing ports +ingestor_work_port: 5555 # PUB - Distribute work with exchange prefix +ingestor_response_port: 5556 # ROUTER - Receive responses from ingestors + +# Flink connection +flink_market_data_endpoint: "tcp://flink-jobmanager:5558" # XSUB - Subscribe to Flink market data (MARKET_DATA_PUB) + +# Timeouts and limits +request_timeout_secs: 30 # Timeout for pending client requests +high_water_mark: 10000 # ZMQ high water mark for all sockets diff --git a/deploy/k8s/dev/infrastructure.yaml b/deploy/k8s/dev/infrastructure.yaml new file mode 100644 index 0000000..6dad241 --- /dev/null +++ b/deploy/k8s/dev/infrastructure.yaml @@ -0,0 +1,519 @@ +--- +# Kafka (KRaft mode - no Zookeeper needed) +# Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because: +# - cp-kafka's entrypoint script has issues with KRaft configuration +# - apache/kafka allows explicit command configuration +# - For production, use Strimzi operator (see kafka/ directory) +apiVersion: v1 +kind: Service +metadata: + name: kafka +spec: + selector: + app: kafka + ports: + - name: broker + protocol: TCP + port: 9092 + targetPort: 9092 + - name: controller + protocol: TCP + port: 9093 + targetPort: 9093 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: kafka +spec: + serviceName: kafka + replicas: 1 + selector: + matchLabels: + app: kafka + template: + metadata: + labels: + app: kafka + spec: + containers: + - name: kafka + image: apache/kafka:3.9.0 + ports: + - containerPort: 9092 + name: broker + - containerPort: 9093 + name: controller + command: + - sh + - -c + - | + CLUSTER_ID="dexorder-dev-cluster" + if [ ! -f /var/lib/kafka/data/meta.properties ]; then + /opt/kafka/bin/kafka-storage.sh format -t $CLUSTER_ID -c /opt/kafka/config/kraft/server.properties + fi + /opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/kraft/server.properties \ + --override node.id=1 \ + --override process.roles=broker,controller \ + --override listeners=PLAINTEXT://:9092,CONTROLLER://:9093 \ + --override advertised.listeners=PLAINTEXT://kafka:9092 \ + --override controller.quorum.voters=1@kafka:9093 \ + --override controller.listener.names=CONTROLLER \ + --override listener.security.protocol.map=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT \ + --override log.dirs=/var/lib/kafka/data \ + --override offsets.topic.replication.factor=1 \ + --override transaction.state.log.replication.factor=1 \ + --override transaction.state.log.min.isr=1 + env: [] + volumeMounts: + - name: kafka-data + mountPath: /var/lib/kafka/data + volumeClaimTemplates: + - metadata: + name: kafka-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi +--- +# PostgreSQL (for Iceberg catalog metadata) +apiVersion: v1 +kind: Service +metadata: + name: postgres +spec: + selector: + app: postgres + ports: + - protocol: TCP + port: 5432 + targetPort: 5432 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres +spec: + serviceName: postgres + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:15 + ports: + - containerPort: 5432 + env: + - name: POSTGRES_USER + value: postgres + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: password + - name: POSTGRES_DB + value: iceberg + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: postgres-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Gi +--- +# MinIO (S3-compatible object storage) +apiVersion: v1 +kind: Service +metadata: + name: minio +spec: + selector: + app: minio + ports: + - name: api + protocol: TCP + port: 9000 + targetPort: 9000 + - name: console + protocol: TCP + port: 9001 + targetPort: 9001 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: minio +spec: + serviceName: minio + replicas: 1 + selector: + matchLabels: + app: minio + template: + metadata: + labels: + app: minio + spec: + containers: + - name: minio + image: minio/minio:latest + args: + - server + - /data + - --console-address + - ":9001" + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-secret + key: root-user + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-secret + key: root-password + volumeMounts: + - name: minio-data + mountPath: /data + volumeClaimTemplates: + - metadata: + name: minio-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +--- +# Iceberg REST Catalog +apiVersion: v1 +kind: Service +metadata: + name: iceberg-catalog +spec: + selector: + app: iceberg-catalog + ports: + - protocol: TCP + port: 8181 + targetPort: 8181 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: iceberg-catalog +spec: + replicas: 1 + selector: + matchLabels: + app: iceberg-catalog + template: + metadata: + labels: + app: iceberg-catalog + spec: + initContainers: + - name: wait-for-postgres + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z postgres 5432; do echo waiting for postgres; sleep 2; done;'] + - name: wait-for-minio + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z minio 9000; do echo waiting for minio; sleep 2; done;'] + containers: + - name: iceberg-catalog + image: tabulario/iceberg-rest:latest + ports: + - containerPort: 8181 + env: + - name: CATALOG_WAREHOUSE + value: "s3://warehouse/" + - name: CATALOG_IO__IMPL + value: "org.apache.iceberg.aws.s3.S3FileIO" + - name: CATALOG_S3_ENDPOINT + value: "http://minio:9000" + - name: CATALOG_S3_ACCESS__KEY__ID + valueFrom: + secretKeyRef: + name: minio-secret + key: root-user + - name: CATALOG_S3_SECRET__ACCESS__KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: root-password + - name: CATALOG_S3_PATH__STYLE__ACCESS + value: "true" + - name: AWS_REGION + value: "us-east-1" +--- +# Flink JobManager +apiVersion: v1 +kind: Service +metadata: + name: flink-jobmanager +spec: + selector: + app: flink-jobmanager + ports: + - name: rpc + protocol: TCP + port: 6123 + targetPort: 6123 + - name: ui + protocol: TCP + port: 8081 + targetPort: 8081 + - name: zmq-market-data + protocol: TCP + port: 5558 + targetPort: 5558 + - name: zmq-notif-pull + protocol: TCP + port: 5561 + targetPort: 5561 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flink-jobmanager +spec: + replicas: 1 + selector: + matchLabels: + app: flink-jobmanager + template: + metadata: + labels: + app: flink-jobmanager + spec: + initContainers: + - name: wait-for-kafka + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] + containers: + - name: flink-jobmanager + image: dexorder/flink:latest + imagePullPolicy: Never + args: ["standalone-job", "--job-classname", "com.dexorder.flink.TradingFlinkApp"] + ports: + - containerPort: 6123 + name: rpc + - containerPort: 8081 + name: ui + - containerPort: 5558 + name: zmq-market-data + - containerPort: 5561 + name: zmq-notif-pull + env: + - name: JOB_MANAGER_RPC_ADDRESS + value: flink-jobmanager + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-secret-key + volumeMounts: + - name: flink-config + mountPath: /etc/config/config.yaml + subPath: config.yaml + - name: flink-secrets + mountPath: /etc/secrets + volumes: + - name: flink-config + configMap: + name: flink-config + - name: flink-secrets + secret: + secretName: flink-secrets +--- +# Flink TaskManager +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flink-taskmanager +spec: + replicas: 1 + selector: + matchLabels: + app: flink-taskmanager + template: + metadata: + labels: + app: flink-taskmanager + spec: + initContainers: + - name: wait-for-jobmanager + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z flink-jobmanager 6123; do echo waiting for jobmanager; sleep 2; done;'] + containers: + - name: flink-taskmanager + image: dexorder/flink:latest + imagePullPolicy: Never + args: ["taskmanager"] + env: + - name: JOB_MANAGER_RPC_ADDRESS + value: flink-jobmanager + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-secret-key + volumeMounts: + - name: flink-config + mountPath: /etc/config/config.yaml + subPath: config.yaml + - name: flink-secrets + mountPath: /etc/secrets + volumes: + - name: flink-config + configMap: + name: flink-config + - name: flink-secrets + secret: + secretName: flink-secrets +--- +# Relay (ZMQ router) +apiVersion: v1 +kind: Service +metadata: + name: relay +spec: + selector: + app: relay + ports: + - name: work-queue + protocol: TCP + port: 5555 + targetPort: 5555 + - name: responses + protocol: TCP + port: 5556 + targetPort: 5556 + - name: market-data + protocol: TCP + port: 5558 + targetPort: 5558 + - name: client-requests + protocol: TCP + port: 5559 + targetPort: 5559 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: relay +spec: + replicas: 1 + selector: + matchLabels: + app: relay + template: + metadata: + labels: + app: relay + spec: + containers: + - name: relay + image: dexorder/relay:latest + imagePullPolicy: Never + ports: + - containerPort: 5555 + name: work-queue + - containerPort: 5556 + name: responses + - containerPort: 5558 + name: market-data + - containerPort: 5559 + name: client-requests + env: + - name: RUST_LOG + value: relay=info + - name: CONFIG_PATH + value: /config/config.yaml + volumeMounts: + - name: relay-config + mountPath: /config + volumes: + - name: relay-config + configMap: + name: relay-config +--- +# Ingestor (CCXT data fetcher) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ingestor +spec: + replicas: 1 + selector: + matchLabels: + app: ingestor + template: + metadata: + labels: + app: ingestor + spec: + initContainers: + - name: wait-for-relay + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z relay 5555; do echo waiting for relay; sleep 2; done;'] + - name: wait-for-kafka + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] + containers: + - name: ingestor + image: dexorder/ingestor:latest + imagePullPolicy: Never + env: + - name: LOG_LEVEL + value: info + - name: CONFIG_PATH + value: /config/config.yaml + volumeMounts: + - name: ingestor-config + mountPath: /config + - name: ingestor-secrets + mountPath: /secrets + volumes: + - name: ingestor-config + configMap: + name: ingestor-config + - name: ingestor-secrets + secret: + secretName: ingestor-secrets diff --git a/deploy/k8s/dev/ingress-dev.yaml b/deploy/k8s/dev/ingress-dev.yaml new file mode 100644 index 0000000..8f97035 --- /dev/null +++ b/deploy/k8s/dev/ingress-dev.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ai-ingress +spec: + ingressClassName: nginx + rules: + - host: dexorder.local + http: + paths: [] diff --git a/deploy/k8s/dev/kustomization.yaml b/deploy/k8s/dev/kustomization.yaml new file mode 100644 index 0000000..14168e5 --- /dev/null +++ b/deploy/k8s/dev/kustomization.yaml @@ -0,0 +1,32 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: default + +# Base resources +resources: + - ../base + - infrastructure.yaml + +# No patches needed currently +patches: [] + # ingress-dev.yaml - removed until we have services to expose + +# ConfigMaps for service configs +configMapGenerator: + - name: relay-config + files: + - config.yaml=configs/relay-config.yaml + - name: ingestor-config + files: + - config.yaml=configs/ingestor-config.yaml + - name: flink-config + files: + - config.yaml=configs/flink-config.yaml + +# Secrets (managed via kubectl, not committed) +# These are created by bin/secret-update +secretGenerator: [] + +generatorOptions: + disableNameSuffixHash: true diff --git a/deploy/k8s/dev/secrets/ai-secrets.yaml.example b/deploy/k8s/dev/secrets/ai-secrets.yaml.example new file mode 100644 index 0000000..0b770b7 --- /dev/null +++ b/deploy/k8s/dev/secrets/ai-secrets.yaml.example @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ai-secrets +type: Opaque +stringData: + anthropic-api-key: "sk-ant-YOUR_KEY_HERE" diff --git a/deploy/k8s/dev/secrets/flink-secrets.yaml.example b/deploy/k8s/dev/secrets/flink-secrets.yaml.example new file mode 100644 index 0000000..4756f29 --- /dev/null +++ b/deploy/k8s/dev/secrets/flink-secrets.yaml.example @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: flink-secrets +type: Opaque +stringData: + # MinIO/S3 credentials for Iceberg S3FileIO + minio-access-key: "minio" + minio-secret-key: "minio123" diff --git a/deploy/k8s/dev/secrets/ingestor-secrets.yaml.example b/deploy/k8s/dev/secrets/ingestor-secrets.yaml.example new file mode 100644 index 0000000..7b98475 --- /dev/null +++ b/deploy/k8s/dev/secrets/ingestor-secrets.yaml.example @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ingestor-secrets +type: Opaque +stringData: + # Exchange API keys (if needed for authenticated endpoints) + binance-api-key: "" + binance-api-secret: "" + coinbase-api-key: "" + coinbase-api-secret: "" + kraken-api-key: "" + kraken-api-secret: "" diff --git a/deploy/k8s/dev/secrets/minio-secret.yaml.example b/deploy/k8s/dev/secrets/minio-secret.yaml.example new file mode 100644 index 0000000..cc3584e --- /dev/null +++ b/deploy/k8s/dev/secrets/minio-secret.yaml.example @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret +type: Opaque +stringData: + root-user: "minio" + root-password: "minio123" diff --git a/deploy/k8s/dev/secrets/postgres-secret.yaml.example b/deploy/k8s/dev/secrets/postgres-secret.yaml.example new file mode 100644 index 0000000..5705978 --- /dev/null +++ b/deploy/k8s/dev/secrets/postgres-secret.yaml.example @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: postgres-secret +type: Opaque +stringData: + password: "password" diff --git a/deploy/k8s/prod/configs/flink-config.yaml b/deploy/k8s/prod/configs/flink-config.yaml new file mode 100644 index 0000000..34b525f --- /dev/null +++ b/deploy/k8s/prod/configs/flink-config.yaml @@ -0,0 +1,30 @@ +# Flink Job Configuration + +# ZeroMQ bind address and ports +zmq_bind_address: "tcp://*" +zmq_ingestor_work_queue_port: 5555 +zmq_ingestor_response_port: 5556 +zmq_ingestor_control_port: 5557 +zmq_market_data_pub_port: 5558 +zmq_client_request_port: 5559 +zmq_cep_webhook_port: 5560 + +# Notification publisher endpoint (Flink → Relay) +# Relay connects XSUB to this endpoint and proxies to clients +notification_publish_endpoint: "tcp://*:5557" + +# Kafka configuration +kafka_bootstrap_servers: "kafka:9092" +kafka_tick_topic: "market-tick" +kafka_ohlc_topic: "market-ohlc" + +# Iceberg catalog +iceberg_catalog_uri: "http://iceberg-catalog:8181" +iceberg_warehouse: "s3://trading-warehouse/" +iceberg_namespace: "trading" +iceberg_table_prefix: "market" +hadoop_conf_dir: "/etc/hadoop/conf" + +# Flink configuration +flink_parallelism: 2 +flink_checkpoint_interval_ms: 60000 diff --git a/deploy/k8s/prod/configs/ingestor-config.yaml b/deploy/k8s/prod/configs/ingestor-config.yaml new file mode 100644 index 0000000..b8ffd16 --- /dev/null +++ b/deploy/k8s/prod/configs/ingestor-config.yaml @@ -0,0 +1,24 @@ +# CCXT Ingestor Configuration + +# Relay ZMQ endpoints (relay is the well-known gateway) +flink_hostname: relay +ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix +# Note: No response port needed - async architecture via Kafka! + +# Supported exchanges (subscribe to these prefixes) +supported_exchanges: + - BINANCE + - COINBASE + - KRAKEN + +# Kafka configuration +kafka_brokers: + - kafka:9092 +kafka_topic: market-0 + +# Worker configuration +max_concurrent: 10 +poll_interval_ms: 10000 + +# Logging +log_level: info diff --git a/deploy/k8s/prod/configs/relay-config.yaml b/deploy/k8s/prod/configs/relay-config.yaml new file mode 100644 index 0000000..53e3b22 --- /dev/null +++ b/deploy/k8s/prod/configs/relay-config.yaml @@ -0,0 +1,19 @@ +# ZMQ Relay Configuration + +# Bind address for all relay sockets +bind_address: "tcp://*" + +# Client-facing ports +client_request_port: 5559 # ROUTER - Client historical data requests +market_data_pub_port: 5558 # XPUB - Market data fanout to clients + +# Ingestor-facing ports +ingestor_work_port: 5555 # PUB - Distribute work with exchange prefix +ingestor_response_port: 5556 # ROUTER - Receive responses from ingestors + +# Flink connection +flink_market_data_endpoint: "tcp://flink-jobmanager:5557" # XSUB - Subscribe to Flink market data + +# Timeouts and limits +request_timeout_secs: 30 # Timeout for pending client requests +high_water_mark: 10000 # ZMQ high water mark for all sockets diff --git a/deploy/k8s/prod/kustomization.yaml b/deploy/k8s/prod/kustomization.yaml new file mode 100644 index 0000000..6bd96fb --- /dev/null +++ b/deploy/k8s/prod/kustomization.yaml @@ -0,0 +1,40 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: default + +# Base resources (backend, web, ingress, init/gVisor) +resources: + - ../base + +# Production patches +patches: + - path: patches.yaml + +# ConfigMaps for service configs +# In production, these might come from external sources +# or be managed separately, but we'll include them here for consistency +configMapGenerator: + - name: relay-config + files: + - config.yaml=../../configmaps/relay-config.yaml + - name: ingestor-config + files: + - config.yaml=../../configmaps/ingestor-config.yaml + - name: flink-config + files: + - config.yaml=../../configmaps/flink-config.yaml + +# Secrets (managed via kubectl, not committed) +# These are created by bin/secret-update prod +secretGenerator: [] + +generatorOptions: + disableNameSuffixHash: true + +# Images +images: + - name: dexorder/ai-backend + newTag: latest + - name: dexorder/ai-web + newTag: latest diff --git a/deploy/k8s/prod/patches.yaml b/deploy/k8s/prod/patches.yaml new file mode 100644 index 0000000..55ffac9 --- /dev/null +++ b/deploy/k8s/prod/patches.yaml @@ -0,0 +1,52 @@ +--- +# Production backend patches +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ai-backend +spec: + replicas: 2 + template: + spec: + runtimeClassName: gvisor + containers: + - name: ai-backend + image: dexorder/ai-backend:latest + imagePullPolicy: Always + env: + - name: CONFIG + value: "prod" + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" +--- +# Production web patches +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ai-web +spec: + replicas: 2 + template: + spec: + runtimeClassName: gvisor + containers: + - name: ai-web + image: dexorder/ai-web:latest + imagePullPolicy: Always + env: + - name: VITE_BASE_PATH + value: "/cryptochimp/" + - name: VITE_WS_URL + value: "wss://dexorder.ai/ws" + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" diff --git a/deploy/k8s/prod/secrets/ai-secrets.yaml.example b/deploy/k8s/prod/secrets/ai-secrets.yaml.example new file mode 100644 index 0000000..0b770b7 --- /dev/null +++ b/deploy/k8s/prod/secrets/ai-secrets.yaml.example @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ai-secrets +type: Opaque +stringData: + anthropic-api-key: "sk-ant-YOUR_KEY_HERE" diff --git a/deploy/k8s/prod/secrets/ingestor-secrets.yaml.example b/deploy/k8s/prod/secrets/ingestor-secrets.yaml.example new file mode 100644 index 0000000..7b98475 --- /dev/null +++ b/deploy/k8s/prod/secrets/ingestor-secrets.yaml.example @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ingestor-secrets +type: Opaque +stringData: + # Exchange API keys (if needed for authenticated endpoints) + binance-api-key: "" + binance-api-secret: "" + coinbase-api-key: "" + coinbase-api-secret: "" + kraken-api-key: "" + kraken-api-secret: "" diff --git a/deploy/k8s/prod/secrets/minio-secret.yaml.example b/deploy/k8s/prod/secrets/minio-secret.yaml.example new file mode 100644 index 0000000..0ecd74e --- /dev/null +++ b/deploy/k8s/prod/secrets/minio-secret.yaml.example @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret +type: Opaque +stringData: + root-user: "CHANGE_THIS_IN_PRODUCTION" + root-password: "CHANGE_THIS_IN_PRODUCTION" diff --git a/deploy/k8s/prod/secrets/postgres-secret.yaml.example b/deploy/k8s/prod/secrets/postgres-secret.yaml.example new file mode 100644 index 0000000..4ac8d27 --- /dev/null +++ b/deploy/k8s/prod/secrets/postgres-secret.yaml.example @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: postgres-secret +type: Opaque +stringData: + password: "CHANGE_THIS_IN_PRODUCTION" diff --git a/doc/auth.md b/doc.old/auth.md similarity index 100% rename from doc/auth.md rename to doc.old/auth.md diff --git a/doc/data.md b/doc.old/data.md similarity index 100% rename from doc/data.md rename to doc.old/data.md diff --git a/doc/design.md b/doc.old/design.md similarity index 100% rename from doc/design.md rename to doc.old/design.md diff --git a/doc/libraries.md b/doc.old/libraries.md similarity index 100% rename from doc/libraries.md rename to doc.old/libraries.md diff --git a/doc/mvp.md b/doc.old/mvp.md similarity index 93% rename from doc/mvp.md rename to doc.old/mvp.md index 2e375b9..7d56213 100644 --- a/doc/mvp.md +++ b/doc.old/mvp.md @@ -1,6 +1,6 @@ # Minimum Viable Prototype -* Chat interface: slack/telegram/whatsapp etc +* Chat interface: web+telegram * Agent loop * Memory * RAG diff --git a/doc.old/trendspider.md b/doc.old/trendspider.md new file mode 100644 index 0000000..9b1c4ba --- /dev/null +++ b/doc.old/trendspider.md @@ -0,0 +1,38 @@ +Four Main Functions +* Charting and Analysis +* Strategy Development and Testing +* Idea Generation +* Trade Timing and Execution + +# Charting and Analysis +* "Smart Charts" autodetects chart patterns, trendlines, and plots them +* Multi-timeframe plots on same chart (e.g. 200 SMA from the daily) +* Multi-symbol view +* Sidebar data +* Mobile app +* Browser extension detects symbols across pages + +# Strategy Development and Testing +* Strategies seem to be lego components: + * "Price Close" "Less than" "Wedge Falling (Top)" + * "Any of the following" "happened within" "15 candles" on "Current symbol" +* The rest is obvious + +# Idea Generation +* Scanners provided filtered symbol lists according to whatever conditions +* News feeds like recent halts, WallStBets symbols, retail flow alerts, social media chatter + +# Trade Timing and Execution +* Alerts on chart elements have "cadence": + * "Break Through" Trigger when price opens & closes, or closes & opens on different sides of the buffer area. + * "Touch" Trigger when price touches or trades through the buffer area, but does not Break Through it. + * "Bounce" Triggers when price Touches the buffer area but does not Break Through on the next candle. +* Alerts on any visual element +* Bots can either trade or sent alerts +* Condition tree structure of strategies allows a quick red/green view of what sections are currently true/false, allowing a discretionary trader to maybe make an exceptional decision + +# Assistant +* "Quick Commands" finds UI elements + * Quick search box basically with minor AI assistance for "add to chat" etc + * Maybe use lightweight model agent to choose from a range of UI actions like "add indicator" etc. +* \ No newline at end of file diff --git a/doc/agent_redesign.md b/doc/agent_redesign.md new file mode 100644 index 0000000..159244d --- /dev/null +++ b/doc/agent_redesign.md @@ -0,0 +1,9 @@ +Generally use skills instead of subagents, except for the analysis subagent. + +## User-specific files +* Indicators +* Strategies +* Watchlists +* Preferences + * Trading style + * Charting / colors diff --git a/doc/backend_redesign.md b/doc/backend_redesign.md new file mode 100644 index 0000000..3a9f830 --- /dev/null +++ b/doc/backend_redesign.md @@ -0,0 +1,110 @@ +# aiignore +# This is not implemented yet and are just notes for Tim + +# Overview +We need a realtime data system that is scalable and durable, so we have the following architecture: +* Protobufs over ZeroMQ for data streaming +* Ingestors + * Realtime data subscriptions (tick data) + * Historical data queries (OHLC) + * Everything pushes to Kafka topics +* Kafka + * Durable append logs for incoming and in-process data + * Topics maintained by Flink in redesign/flink/src/main/resources/topics.yaml +* Flink + * Raw ingestor streams are read from Kafka + * Deduplication + * Builds OHLCs +* Apache Iceberg + * Historical data storage + +# Configuration +All systems should use two YAML configuration files that are mounted by k8s from a ConfigMap and / or Secrets. Keep secrets separate from config. + +When a configuration or secrets item is needed, describe it in resdesign/doc/config.md + +# Ingest +Ingestion API +* all symbols + * exchange id (BINANCE) + * market_id (BTC/USDT) + * market_type + * Spot + * description (Bitcoin/Tether on Binance) + * column names ( ['open', 'high', 'low', 'close', 'volume', 'taker_vol', 'maker_vol']) + * name + * exchange + * base asset + * quote asset + * earliest time + * tick size + * supported periods + +* Centralized data streaming backend + * Ingestion of tick, ohlc, news, etc. into Kafka by worker gatherers + * Flink with: + * zmq pubsub + * (seq, time) key for every row in a tick series + * every series also has seq->time and time->seq indexes + * Sequence tickers with strict seq's AND time index (seq can just be row counter autoincrement) +* Historical data + * Apache Iceberg + * Clients query here first + * Backfill service +* Quote Server + * Realtime current prices for selected quote currencies +* Workspace + * Current chart, indicators, drawings, etc. + * Always in context, must be brief. Data series are a reference not the actual data. +* Analysis + * Analysis engines are short-running and always tied to a user + * Free users lose pod and data when session times out + * Conda available with many preinstalled packages + * Pip & Conda configured to install + * Src dir r/w with git + * Indicators + * Strategies + * Analysis +* Request Context + * User ID + * Workspace ID + * Channel + * Telegram + * Web +* Website + * Current vue site +* Gateway + * Websocket gateway + * Authentication + * User Featureset / License Info added to requests/headers + * Relays data pub/sub to web/mobile clients + * Routes agent chat to/from user container + * Active channel features + * TV Chart + * Text chat + * Plot out + * Voice/Audio + * Static file server + * Kafka + * Temp Gateway files (image responses, etc.) +* Logs + * Kafka + * Strategy Logs + * Order/Execution Logs + * Chat Logs + * User ID Topic has TTL based on license +* Agent Framework + * Soul file + * Tool set (incl subagents) + * LLM choice + * RAG namespace + * Agents + * Top-level coordinator + * TradingView agent + * Indicators, Drawings, Annotations + * Research Agent + * Pandas/Polars analysis + * Plot generation +* License Manager +* Kafka Topics Doc w/ schemas +* \ No newline at end of file diff --git a/doc/config.md b/doc/config.md new file mode 100644 index 0000000..b520ac7 --- /dev/null +++ b/doc/config.md @@ -0,0 +1,18 @@ +This file describes all the configuration options used by all components. All configuration is divided into regular config and secrets, and k8s will mount either or both as a yaml file accessible to the process. + +# Configuration + +* `flink_hostname` +* ... various zmq ports for flink ... +* `iceberg_catalog_hostname` +* `iceberg_catalog_port` +* `iceberg_catalog_database` +* etc + + +# Secrets + +* `iceberg_catalog_username` +* `iceberg_catalog_password` +* etc. + diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..02cd7bc --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,139 @@ +version: '3.8' +services: + zookeeper: + image: confluentinc/cp-zookeeper:7.7.0 + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ports: + - "2181:2181" + + kafka: + image: confluentinc/cp-kafka:7.7.0 + depends_on: + - zookeeper + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + ports: + - "9092:9092" + + postgres: + image: postgres:15 + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: password + POSTGRES_DB: iceberg + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + + # MinIO for S3-compatible storage (Iceberg warehouse) + minio: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: minio123 + ports: + - "9000:9000" + - "9001:9001" + volumes: + - minio_data:/data + + # Iceberg REST Catalog + iceberg-catalog: + image: tabulario/iceberg-rest:latest + environment: + - CATALOG_WAREHOUSE=s3://warehouse/ + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio:9000 + - CATALOG_S3_ACCESS__KEY__ID=minio + - CATALOG_S3_SECRET__ACCESS__KEY=minio123 + - CATALOG_S3_PATH__STYLE__ACCESS=true + ports: + - "8181:8181" + depends_on: + - postgres + - minio + + flink-jobmanager: + image: flink:1.20-scala_2.12 + command: jobmanager + environment: + - JOB_MANAGER_RPC_ADDRESS=flink-jobmanager + ports: + - "6123:6123" + - "8081:8081" + depends_on: + - kafka + - postgres + + flink-taskmanager: + image: flink:1.20-scala_2.12 + command: taskmanager + environment: + - JOB_MANAGER_RPC_ADDRESS=flink-jobmanager + depends_on: + - flink-jobmanager + - kafka + + relay: + build: + context: relay + dockerfile: relay/Dockerfile + ports: + - "5555:5555" # Ingestor work queue + - "5556:5556" # Ingestor responses + - "5558:5558" # Market data (clients) + - "5559:5559" # Client requests + environment: + - RUST_LOG=relay=info + - CONFIG_PATH=/config/config.yaml + volumes: + - ./relay/config.example.yaml:/config/config.yaml:ro + depends_on: + - flink-jobmanager + restart: unless-stopped + + ingestor: + build: + context: ingestor + dockerfile: ingestor/Dockerfile + environment: + - LOG_LEVEL=info + - CONFIG_PATH=/config/config.yaml + volumes: + - ./ingestor/config.example.yaml:/config/config.yaml:ro + depends_on: + - kafka + - relay + restart: unless-stopped + + history-test-client: + build: + context: test/history_client + dockerfile: test/history_client/Dockerfile + depends_on: + - relay + - ingestor + - flink-jobmanager + - iceberg-catalog + environment: + - ICEBERG_CATALOG_URI=http://iceberg-catalog:8181 + - RELAY_ENDPOINT=tcp://relay:5555 + - NOTIFICATION_ENDPOINT=tcp://flink:5557 + volumes: + - ./client-py:/client-py:ro + profiles: + - test + # Wait for services to start up, then run new OHLCClient-based test + command: sh -c "sleep 10 && pip install -e /client-py && python client_ohlc_api.py" + +volumes: + postgres_data: + minio_data: diff --git a/flink/.gitignore b/flink/.gitignore new file mode 100644 index 0000000..f2fd999 --- /dev/null +++ b/flink/.gitignore @@ -0,0 +1,13 @@ +target/ +*.class +*.jar +*.war +*.ear +*.iml +.idea/ +.vscode/ +.settings/ +.project +.classpath +dependency-reduced-pom.xml +protobuf/ diff --git a/flink/Dockerfile b/flink/Dockerfile new file mode 100644 index 0000000..fbc97ff --- /dev/null +++ b/flink/Dockerfile @@ -0,0 +1,37 @@ +# Stage 1: Build the JAR +FROM maven:3.9-eclipse-temurin-11 AS builder + +WORKDIR /build + +# Copy pom.xml and protobuf definitions first for better caching +COPY pom.xml . +COPY protobuf ../protobuf/ + +# Download dependencies (cached if pom.xml doesn't change) +RUN mvn dependency:go-offline + +# Copy source code +COPY src ./src + +# Build the JAR +RUN mvn clean package -DskipTests + +# For dev environment: replace topics.yaml with topics-dev.yaml in the JAR +# This avoids replication factor errors with only 1 Kafka broker +RUN mkdir -p /tmp/jar-overlay && \ + cp /build/src/main/resources/topics-dev.yaml /tmp/jar-overlay/topics.yaml && \ + cd /tmp/jar-overlay && \ + jar uf /build/target/trading-flink-1.0-SNAPSHOT.jar topics.yaml + +# Stage 2: Create the Flink runtime image +FROM flink:1.20.0-java11 + +# Copy the built JAR to the Flink lib directory +COPY --from=builder /build/target/trading-flink-1.0-SNAPSHOT.jar /opt/flink/usrlib/trading-flink.jar + +# Copy configuration files +COPY config.example.yaml /opt/flink/conf/app-config.yaml + +# Set the entrypoint to use Application mode +# The job will auto-submit on startup +USER flink diff --git a/flink/README.md b/flink/README.md new file mode 100644 index 0000000..d4389b0 --- /dev/null +++ b/flink/README.md @@ -0,0 +1,77 @@ +# Flink Deployment for K8s Cluster + +## Install Flink Kubernetes Operator + +```bash +# Add the Flink Helm repository +helm repo add flink-operator-repo https://downloads.apache.org/flink/flink-kubernetes-operator-1.9.0/ +helm repo update + +# Install the operator +helm install flink-kubernetes-operator flink-operator-repo/flink-kubernetes-operator \ + -f values.yaml \ + --namespace flink --create-namespace + +# Wait for operator to be ready +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=flink-kubernetes-operator -n flink --timeout=300s +``` + +## Create Service Account + +```bash +kubectl create serviceaccount flink -n default +kubectl create clusterrolebinding flink-role-binding-default \ + --clusterrole=edit \ + --serviceaccount=default:flink +``` + +## Deploy Flink Cluster + +```bash +# Apply the Flink cluster manifest +kubectl apply -f flink-cluster.yaml + +# Check cluster status +kubectl get flinkdeployment -n default + +# Check pods +kubectl get pods -n default | grep flink +``` + +## Access Flink Web UI + +```bash +# Port forward to access the UI locally +kubectl port-forward svc/trading-flink-rest 8081:8081 -n default + +# Open browser to http://localhost:8081 +``` + +## Prometheus Metrics + +Flink exposes metrics on port 9249. Prometheus will automatically discover and scrape these metrics via pod annotations: +- `prometheus.io/scrape: "true"` +- `prometheus.io/port: "9249"` +- `prometheus.io/path: "/metrics"` + +To verify metrics are being exported: +```bash +kubectl exec -it -n default -- curl localhost:9249/metrics +``` + +## Submit a Job + +```bash +# Example: Submit a jar file +kubectl exec -it -- flink run /path/to/job.jar +``` + +## Uninstall + +```bash +# Delete Flink cluster +kubectl delete flinkdeployment trading-flink -n default + +# Delete operator +helm uninstall flink-kubernetes-operator -n flink +``` diff --git a/flink/config.example.yaml b/flink/config.example.yaml new file mode 100644 index 0000000..2777511 --- /dev/null +++ b/flink/config.example.yaml @@ -0,0 +1,31 @@ +# Example configuration file +# This should be mounted at /etc/config/config.yaml in the Flink container + +# ZeroMQ bind address and ports +zmq_bind_address: "tcp://*" +zmq_ingestor_work_queue_port: 5555 +zmq_ingestor_response_port: 5556 +zmq_ingestor_control_port: 5557 +zmq_market_data_pub_port: 5558 +zmq_client_request_port: 5559 +zmq_cep_webhook_port: 5560 + +# Notification publisher endpoint (Flink → Relay) +# Relay connects XSUB to this endpoint and proxies to clients +notification_publish_endpoint: "tcp://*:5557" + +# Kafka configuration +kafka_bootstrap_servers: "kafka:9092" +kafka_tick_topic: "market-tick" +kafka_ohlc_topic: "market-ohlc" +kafka_topics_file: "/topics-dev.yaml" # Use topics-dev.yaml for single broker dev environment + +# Iceberg catalog +iceberg_catalog_uri: "http://iceberg-catalog:8181" +iceberg_warehouse: "s3://trading-warehouse/" +iceberg_namespace: "trading" +iceberg_table_prefix: "market" + +# Flink configuration +flink_parallelism: 4 +flink_checkpoint_interval_ms: 60000 diff --git a/flink/flink-cluster.yaml b/flink/flink-cluster.yaml new file mode 100644 index 0000000..ea16967 --- /dev/null +++ b/flink/flink-cluster.yaml @@ -0,0 +1,42 @@ +apiVersion: flink.apache.org/v1beta1 +kind: FlinkDeployment +metadata: + name: trading-flink + namespace: default + labels: + app: flink +spec: + podTemplate: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9249" + prometheus.io/path: "/metrics" + image: dexorder/ai-flink:latest + imagePullPolicy: Always + flinkVersion: v1_19 + flinkConfiguration: + taskmanager.numberOfTaskSlots: "2" + state.backend: filesystem + state.checkpoints.dir: file:///flink-data/checkpoints + state.savepoints.dir: file:///flink-data/savepoints + execution.checkpointing.interval: 60s + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory + metrics.reporter.prom.port: "9249" + serviceAccount: flink + jobManager: + resource: + memory: "1024Mi" + cpu: 0.5 + replicas: 1 + taskManager: + resource: + memory: "1024Mi" + cpu: 0.5 + replicas: 1 + job: + jarURI: local:///opt/flink/usrlib/trading-flink.jar + entryClass: com.dexorder.flink.TradingFlinkApp + parallelism: 1 + upgradeMode: stateless + state: running diff --git a/flink/pom.xml b/flink/pom.xml new file mode 100644 index 0000000..45e2bee --- /dev/null +++ b/flink/pom.xml @@ -0,0 +1,250 @@ + + 4.0.0 + + com.dexorder + trading-flink + 1.0-SNAPSHOT + jar + + + UTF-8 + 1.20.0 + 1.10.1 + 11 + 2.12 + ${java.version} + ${java.version} + 0.6.0 + 3.24.0 + 2.2 + + + + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + org.apache.flink + flink-clients + ${flink.version} + provided + + + org.apache.flink + flink-connector-kafka + 3.3.0-1.20 + + + org.apache.flink + flink-table-api-java + ${flink.version} + provided + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + + + org.apache.kafka + kafka-clients + 3.6.0 + + + + + org.apache.iceberg + iceberg-flink-runtime-1.20 + ${iceberg.version} + + + org.apache.iceberg + iceberg-core + ${iceberg.version} + + + + org.apache.iceberg + iceberg-aws-bundle + ${iceberg.version} + + + + + org.apache.hadoop + hadoop-common + 3.3.6 + + + org.apache.hadoop + hadoop-aws + 3.3.6 + + + + + org.zeromq + jeromq + ${jeromq.version} + + + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + + + com.google.code.gson + gson + 2.10.1 + + + + + org.yaml + snakeyaml + ${snakeyaml.version} + + + + + org.slf4j + slf4j-api + 1.7.36 + provided + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.17.1 + provided + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + ${java.version} + ${java.version} + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.6.1 + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + + ${project.basedir}/../protobuf + + ${project.build.directory}/generated-sources/protobuf/java + false + + + + generate-sources + + compile + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + add-source + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/protobuf/java + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.0 + + + package + + shade + + + + + org.apache.flink:flink-shaded-force-shading + com.google.code.findbugs:jsr305 + org.slf4j:* + org.apache.logging.log4j:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com.dexorder.flink.TradingFlinkApp + + + + + + + + + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + + initialize + + detect + + + + + + + diff --git a/flink/secrets.example.yaml b/flink/secrets.example.yaml new file mode 100644 index 0000000..c5c895e --- /dev/null +++ b/flink/secrets.example.yaml @@ -0,0 +1,8 @@ +# Example secrets file +# This should be mounted at /etc/secrets/secrets.yaml in the Flink container + +# Iceberg catalog credentials +iceberg_catalog_username: "admin" +iceberg_catalog_password: "changeme" + +# Additional secrets as needed diff --git a/flink/src/main/java/com/dexorder/flink/TradingFlinkApp.java b/flink/src/main/java/com/dexorder/flink/TradingFlinkApp.java new file mode 100644 index 0000000..676d0dd --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/TradingFlinkApp.java @@ -0,0 +1,253 @@ +package com.dexorder.flink; + +import com.dexorder.flink.config.AppConfig; +import com.dexorder.flink.iceberg.SchemaInitializer; +import com.dexorder.flink.ingestor.IngestorControlChannel; +import com.dexorder.flink.ingestor.IngestorWorkQueue; +import com.dexorder.flink.kafka.TopicManager; +import com.dexorder.flink.publisher.HistoryNotificationForwarder; +import com.dexorder.flink.publisher.HistoryNotificationFunction; +import com.dexorder.flink.publisher.OHLCBatchWrapper; +import com.dexorder.flink.publisher.OHLCBatchDeserializer; +import com.dexorder.flink.sink.HistoricalBatchWriter; +import com.dexorder.flink.zmq.ZmqChannelManager; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.streaming.api.functions.sink.DiscardingSink; +import org.apache.flink.connector.kafka.source.KafkaSource; +import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.flink.CatalogLoader; +import org.apache.iceberg.flink.TableLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Main entry point for the Trading Flink application. + * + * Responsibilities: + * - Load configuration and secrets + * - Initialize ZMQ channels for ingestor communication, market data pub/sub, and client requests + * - Set up Kafka connectors for data ingestion + * - Bootstrap the Flink streaming job + */ +public class TradingFlinkApp { + private static final Logger LOG = LoggerFactory.getLogger(TradingFlinkApp.class); + + private static final String DEFAULT_CONFIG_PATH = "/etc/config/config.yaml"; + private static final String DEFAULT_SECRETS_PATH = "/etc/secrets/secrets.yaml"; + + public static void main(String[] args) throws Exception { + LOG.info("Starting Trading Flink Application"); + + // Load configuration + String configPath = System.getenv().getOrDefault("CONFIG_PATH", DEFAULT_CONFIG_PATH); + String secretsPath = System.getenv().getOrDefault("SECRETS_PATH", DEFAULT_SECRETS_PATH); + + AppConfig config = loadConfig(configPath, secretsPath); + LOG.info("Configuration loaded successfully"); + + // Initialize Kafka topics + try (TopicManager topicManager = new TopicManager(config.getKafkaBootstrapServers())) { + topicManager.ensureTopicsExist(); + LOG.info("Kafka topics initialized: {}", topicManager.getTopicNames()); + } catch (Exception e) { + LOG.error("Failed to initialize Kafka topics", e); + throw e; + } + + // Initialize Iceberg schemas + try { + Map catalogProps = new HashMap<>(); + catalogProps.put("type", "rest"); + catalogProps.put("uri", config.getString("iceberg_catalog_uri", "http://iceberg-catalog:8181")); + catalogProps.put("warehouse", config.getString("iceberg_warehouse", "s3://warehouse/")); + + // Configure S3 for MinIO + catalogProps.put("s3.endpoint", config.getString("s3_endpoint", "http://minio:9000")); + catalogProps.put("s3.path-style-access", "true"); + catalogProps.put("client.region", "us-east-1"); + + // Use CatalogLoader.rest() for REST catalog instead of custom loader + CatalogLoader catalogLoader = CatalogLoader.rest( + "trading", + new Configuration(), + catalogProps + ); + + org.apache.iceberg.catalog.Catalog catalog = catalogLoader.loadCatalog(); + try { + SchemaInitializer schemaInitializer = new SchemaInitializer( + catalog, + config.getIcebergNamespace() + ); + schemaInitializer.initializeSchemas(); + } finally { + if (catalog instanceof java.io.Closeable) { + ((java.io.Closeable) catalog).close(); + } + } + LOG.info("Iceberg schemas initialized"); + } catch (Exception e) { + LOG.error("Failed to initialize Iceberg schemas", e); + throw e; + } + + // Initialize ZeroMQ channels + ZmqChannelManager zmqManager = new ZmqChannelManager(config); + try { + zmqManager.initializeChannels(); + LOG.info("ZeroMQ channels initialized"); + + // Initialize history notification forwarder (runs in job manager) + // Binds PULL socket to receive notifications from task managers, forwards to MARKET_DATA_PUB + HistoryNotificationForwarder notificationForwarder = new HistoryNotificationForwarder( + config.getNotificationPullPort(), + zmqManager.getSocket(ZmqChannelManager.Channel.MARKET_DATA_PUB) + ); + notificationForwarder.start(); + LOG.info("History notification forwarder started on port {}", config.getNotificationPullPort()); + + // Initialize ingestor components + IngestorWorkQueue workQueue = new IngestorWorkQueue(zmqManager); + IngestorControlChannel controlChannel = new IngestorControlChannel(zmqManager); + + // Start the work queue processor + workQueue.start(); + LOG.info("Ingestor work queue started"); + + // Set up Flink streaming environment + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + // Configure Flink + configureFlinkEnvironment(env, config); + + // Set up Kafka source for OHLCBatch data + KafkaSource ohlcSource = KafkaSource.builder() + .setBootstrapServers(config.getKafkaBootstrapServers()) + .setTopics(config.getKafkaOhlcTopic()) + .setGroupId("flink-ohlc-consumer") + .setStartingOffsets(OffsetsInitializer.earliest()) + .setValueOnlyDeserializer(new OHLCBatchDeserializer()) + .build(); + + // Create OHLCBatch data stream + DataStream ohlcStream = env + .fromSource(ohlcSource, WatermarkStrategy.noWatermarks(), "OHLCBatch Kafka Source"); + + LOG.info("OHLCBatch Kafka source configured"); + + // Set up Iceberg catalog and table loader + Map catalogProps2 = new HashMap<>(); + catalogProps2.put("type", "rest"); + catalogProps2.put("uri", config.getString("iceberg_catalog_uri", "http://iceberg-catalog:8181")); + catalogProps2.put("warehouse", config.getString("iceberg_warehouse", "s3://warehouse/")); + + // Configure S3 for MinIO + catalogProps2.put("s3.endpoint", config.getString("s3_endpoint", "http://minio:9000")); + catalogProps2.put("s3.path-style-access", "true"); + catalogProps2.put("client.region", "us-east-1"); + + CatalogLoader catalogLoader2 = CatalogLoader.rest( + "trading", + new Configuration(), + catalogProps2 + ); + + TableLoader tableLoader = TableLoader.fromCatalog( + catalogLoader2, + TableIdentifier.of(config.getIcebergNamespace(), "ohlc") + ); + + LOG.info("Iceberg table loader configured: {}.ohlc", config.getIcebergNamespace()); + + // Historical pipeline: write to Iceberg first, then notify. + // HistoricalBatchWriter uses direct catalog API (table.newAppend().commit()), + // which commits synchronously — no checkpoint dependency. Batches are emitted + // downstream only after commit returns, so notifications are guaranteed to fire + // after data is visible to readers. + // Parallelism MUST be 1: HistoryNotificationPublisher binds a ZMQ PUB socket, + // and only one instance can bind the same port. + DataStream processedStream = ohlcStream + .flatMap(new HistoricalBatchWriter(tableLoader)) + .setParallelism(1) + .process(new HistoryNotificationFunction( + config.getNotificationPublishEndpoint(), + config.getIcebergNamespace(), + config.getIcebergTablePrefix() + )) + .setParallelism(1); + + // Add a discard sink to force Flink to include the pipeline in the execution graph. + processedStream.addSink(new DiscardingSink<>()).setParallelism(1); + + LOG.info("Historical pipeline configured: HistoricalBatchWriter -> HistoryNotificationFunction"); + + // TODO: Set up CEP patterns and triggers + // TODO: Set up realtime tick processing + + LOG.info("Flink job configured, starting execution"); + + // Register shutdown hook for cleanup + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + LOG.info("Shutting down Trading Flink Application"); + try { + // Send shutdown signal to ingestors + controlChannel.shutdown(); + + // Stop work queue + workQueue.stop(); + + // Stop notification forwarder + notificationForwarder.close(); + + // Close ZMQ channels + zmqManager.close(); + + LOG.info("Shutdown complete"); + } catch (Exception e) { + LOG.error("Error during shutdown", e); + } + })); + + // Execute the Flink job + env.execute("Trading Flink Application"); + + } catch (Exception e) { + LOG.error("Fatal error in Trading Flink Application", e); + zmqManager.close(); + throw e; + } + } + + private static AppConfig loadConfig(String configPath, String secretsPath) throws IOException { + LOG.info("Loading config from: {}", configPath); + LOG.info("Loading secrets from: {}", secretsPath); + + try { + return new AppConfig(configPath, secretsPath); + } catch (IOException e) { + LOG.error("Failed to load configuration", e); + throw e; + } + } + + private static void configureFlinkEnvironment(StreamExecutionEnvironment env, AppConfig config) { + // Set parallelism - defaults to 1; increase when more task slots are available + env.setParallelism(config.getInt("flink_parallelism", 1)); + + // Enable checkpointing for fault tolerance + env.enableCheckpointing(60000); // Checkpoint every 60 seconds + + // Set time characteristic + // env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // Deprecated in newer Flink versions + + LOG.info("Flink environment configured"); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/config/AppConfig.java b/flink/src/main/java/com/dexorder/flink/config/AppConfig.java new file mode 100644 index 0000000..2eb83ba --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/config/AppConfig.java @@ -0,0 +1,154 @@ +package com.dexorder.flink.config; + +import org.yaml.snakeyaml.Yaml; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +/** + * Application configuration loader. + * Loads config and secrets from YAML files mounted by Kubernetes. + */ +public class AppConfig { + private final Map config; + private final Map secrets; + + public AppConfig(String configPath, String secretsPath) throws IOException { + this.config = loadYaml(configPath); + Map loadedSecrets; + try { + loadedSecrets = loadYaml(secretsPath); + } catch (IOException e) { + // Secrets are optional - use empty map if not found + loadedSecrets = new HashMap<>(); + } + this.secrets = loadedSecrets; + } + + private Map loadYaml(String path) throws IOException { + Yaml yaml = new Yaml(); + try (InputStream inputStream = new FileInputStream(path)) { + Map data = yaml.load(inputStream); + return data != null ? data : new HashMap<>(); + } catch (IOException e) { + throw new IOException("Failed to load YAML from: " + path, e); + } + } + + public String getString(String key) { + return getString(key, null); + } + + public String getString(String key, String defaultValue) { + Object value = config.get(key); + if (value != null) { + return value.toString(); + } + value = secrets.get(key); + return value != null ? value.toString() : defaultValue; + } + + public int getInt(String key) { + return getInt(key, 0); + } + + public int getInt(String key, int defaultValue) { + Object value = config.get(key); + if (value == null) { + value = secrets.get(key); + } + if (value instanceof Number) { + return ((Number) value).intValue(); + } + if (value instanceof String) { + try { + return Integer.parseInt((String) value); + } catch (NumberFormatException e) { + return defaultValue; + } + } + return defaultValue; + } + + public boolean getBoolean(String key) { + return getBoolean(key, false); + } + + public boolean getBoolean(String key, boolean defaultValue) { + Object value = config.get(key); + if (value == null) { + value = secrets.get(key); + } + if (value instanceof Boolean) { + return (Boolean) value; + } + if (value instanceof String) { + return Boolean.parseBoolean((String) value); + } + return defaultValue; + } + + // ZMQ port getters + public int getIngestorWorkQueuePort() { + return getInt("zmq_ingestor_work_queue_port", 5555); + } + + public int getIngestorResponsePort() { + return getInt("zmq_ingestor_response_port", 5556); + } + + public int getIngestorControlPort() { + return getInt("zmq_ingestor_control_port", 5557); + } + + public int getMarketDataPubPort() { + return getInt("zmq_market_data_pub_port", 5558); + } + + public int getClientRequestPort() { + return getInt("zmq_client_request_port", 5559); + } + + public int getCepWebhookPort() { + return getInt("zmq_cep_webhook_port", 5560); + } + + public String getBindAddress() { + return getString("zmq_bind_address", "tcp://*"); + } + + // Kafka config + public String getKafkaBootstrapServers() { + return getString("kafka_bootstrap_servers", "localhost:9092"); + } + + public String getKafkaTickTopic() { + return getString("kafka_tick_topic", "market-tick"); + } + + public String getKafkaOhlcTopic() { + return getString("kafka_ohlc_topic", "market-ohlc"); + } + + // Notification config: + // Task managers PUSH notifications to this endpoint (job manager PULL address) + public String getNotificationPublishEndpoint() { + return getString("notification_publish_endpoint", "tcp://flink-jobmanager:5561"); + } + + // Job manager binds PULL on this port to receive notifications from task managers + public int getNotificationPullPort() { + return getInt("notification_pull_port", 5561); + } + + // Iceberg config + public String getIcebergNamespace() { + return getString("iceberg_namespace", "trading"); + } + + public String getIcebergTablePrefix() { + return getString("iceberg_table_prefix", "market"); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/iceberg/SchemaInitializer.java b/flink/src/main/java/com/dexorder/flink/iceberg/SchemaInitializer.java new file mode 100644 index 0000000..ebe6af5 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/iceberg/SchemaInitializer.java @@ -0,0 +1,155 @@ +package com.dexorder.flink.iceberg; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.types.Types; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; + +/** + * Initializes Iceberg tables directly via the Iceberg Catalog API. + * + * Creates tables in Iceberg if they don't already exist. This ensures all tables + * are properly initialized before Flink starts writing data. + */ +public class SchemaInitializer { + private static final Logger LOG = LoggerFactory.getLogger(SchemaInitializer.class); + + private final Catalog catalog; + private final String namespace; + + public SchemaInitializer(Catalog catalog, String namespace) { + this.catalog = catalog; + this.namespace = namespace; + } + + /** + * Initialize all schemas by creating tables from definitions. + * + * @throws IOException if initialization fails + */ + public void initializeSchemas() throws IOException { + LOG.info("Initializing Iceberg schemas in namespace: {}", namespace); + + // Ensure namespace exists + ensureNamespaceExists(); + + // Initialize each table + initializeOhlcTable(); + + // Add more table initializations here as needed + + LOG.info("Schema initialization completed successfully"); + } + + /** + * Ensure the namespace exists in the catalog. + */ + private void ensureNamespaceExists() { + Namespace ns = Namespace.of(namespace); + try { + if (catalog instanceof SupportsNamespaces) { + SupportsNamespaces nsCatalog = (SupportsNamespaces) catalog; + if (!nsCatalog.namespaceExists(ns)) { + nsCatalog.createNamespace(ns); + LOG.info("Created namespace: {}", namespace); + } else { + LOG.info("Namespace already exists: {}", namespace); + } + } else { + LOG.warn("Catalog does not support namespaces, skipping namespace creation"); + } + } catch (Exception e) { + LOG.error("Failed to create namespace: {}", namespace, e); + throw new RuntimeException("Namespace creation failed", e); + } + } + + /** + * Initialize the OHLC table if it doesn't exist. + */ + // Bump this when the schema changes. Tables with a different (or missing) version + // will be dropped and recreated. Increment by 1 for each incompatible change. + private static final String OHLC_SCHEMA_VERSION = "1"; + private static final String SCHEMA_VERSION_PROP = "app.schema.version"; + + private void initializeOhlcTable() { + TableIdentifier tableId = TableIdentifier.of(namespace, "ohlc"); + + try { + if (catalog.tableExists(tableId)) { + Table existing = catalog.loadTable(tableId); + String existingVersion = existing.properties().get(SCHEMA_VERSION_PROP); + if (!OHLC_SCHEMA_VERSION.equals(existingVersion)) { + LOG.warn("Table {} has schema version '{}', expected '{}' — manual migration required", + tableId, existingVersion, OHLC_SCHEMA_VERSION); + } + LOG.info("Table {} already exists at schema version {} — skipping creation", tableId, existingVersion); + return; + } + + LOG.info("Creating OHLC table: {}", tableId); + + // Define the OHLC schema. + // timestamp is stored as BIGINT (microseconds since epoch), not a TIMESTAMP type, + // so that GenericRowData.setField() accepts a plain Long value. + Schema schema = new Schema( + // Primary key fields + required(1, "ticker", Types.StringType.get(), "Market identifier (e.g., BINANCE:BTC/USDT)"), + required(2, "period_seconds", Types.IntegerType.get(), "OHLC period in seconds"), + required(3, "timestamp", Types.LongType.get(), "Candle timestamp in microseconds since epoch"), + + // OHLC price data + required(4, "open", Types.LongType.get(), "Opening price"), + required(5, "high", Types.LongType.get(), "Highest price"), + required(6, "low", Types.LongType.get(), "Lowest price"), + required(7, "close", Types.LongType.get(), "Closing price"), + + // Volume data + optional(8, "volume", Types.LongType.get(), "Total volume"), + optional(9, "buy_vol", Types.LongType.get(), "Buy volume"), + optional(10, "sell_vol", Types.LongType.get(), "Sell volume"), + + // Timing data + optional(11, "open_time", Types.LongType.get(), "Timestamp when open price occurred"), + optional(12, "high_time", Types.LongType.get(), "Timestamp when high price occurred"), + optional(13, "low_time", Types.LongType.get(), "Timestamp when low price occurred"), + optional(14, "close_time", Types.LongType.get(), "Timestamp when close price occurred"), + + // Additional fields + optional(15, "open_interest", Types.LongType.get(), "Open interest for futures"), + + // Metadata fields + optional(16, "request_id", Types.StringType.get(), "Request ID that generated this data"), + required(17, "ingested_at", Types.LongType.get(), "Timestamp when data was ingested by Flink") + ); + + // Create the table with partitioning and properties + Table table = catalog.buildTable(tableId, schema) + .withPartitionSpec(org.apache.iceberg.PartitionSpec.builderFor(schema) + .identity("ticker") + .build()) + .withProperty("write.format.default", "parquet") + .withProperty("write.parquet.compression-codec", "snappy") + .withProperty("write.metadata.compression-codec", "gzip") + .withProperty("format-version", "2") + .withProperty(SCHEMA_VERSION_PROP, OHLC_SCHEMA_VERSION) + .create(); + + LOG.info("Successfully created OHLC table: {}", tableId); + + } catch (Exception e) { + LOG.error("Failed to initialize OHLC table: {}", tableId, e); + throw new RuntimeException("OHLC table initialization failed", e); + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/ingestor/DataRequestMessage.java b/flink/src/main/java/com/dexorder/flink/ingestor/DataRequestMessage.java new file mode 100644 index 0000000..b9f8b29 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/ingestor/DataRequestMessage.java @@ -0,0 +1,111 @@ +package com.dexorder.flink.ingestor; + +/** + * Represents a data request message to be sent to ingestors. + * This is a simplified wrapper until we generate the actual protobuf classes. + */ +public class DataRequestMessage { + private final String requestId; + private final RequestType requestType; + private final String ticker; + private final HistoricalParams historicalParams; + private final RealtimeParams realtimeParams; + + public enum RequestType { + HISTORICAL_OHLC, + REALTIME_TICKS + } + + public DataRequestMessage(String requestId, RequestType requestType, String ticker, + HistoricalParams historicalParams, RealtimeParams realtimeParams) { + this.requestId = requestId; + this.requestType = requestType; + this.ticker = ticker; + this.historicalParams = historicalParams; + this.realtimeParams = realtimeParams; + } + + public String getRequestId() { + return requestId; + } + + public RequestType getRequestType() { + return requestType; + } + + public String getTicker() { + return ticker; + } + + public HistoricalParams getHistoricalParams() { + return historicalParams; + } + + public RealtimeParams getRealtimeParams() { + return realtimeParams; + } + + /** + * Serialize to protobuf bytes. + * TODO: Replace with actual generated protobuf serialization + */ + public byte[] toProtobuf() { + // For now, return a placeholder + // This will be replaced with actual protobuf serialization once we compile the .proto files + return new byte[0]; + } + + public static class HistoricalParams { + private final long startTime; + private final long endTime; + private final int periodSeconds; + private final Integer limit; + + public HistoricalParams(long startTime, long endTime, int periodSeconds, Integer limit) { + this.startTime = startTime; + this.endTime = endTime; + this.periodSeconds = periodSeconds; + this.limit = limit; + } + + public long getStartTime() { + return startTime; + } + + public long getEndTime() { + return endTime; + } + + public int getPeriodSeconds() { + return periodSeconds; + } + + public Integer getLimit() { + return limit; + } + } + + public static class RealtimeParams { + private final boolean includeTicks; + private final boolean includeOhlc; + private final int[] ohlcPeriodSeconds; + + public RealtimeParams(boolean includeTicks, boolean includeOhlc, int[] ohlcPeriodSeconds) { + this.includeTicks = includeTicks; + this.includeOhlc = includeOhlc; + this.ohlcPeriodSeconds = ohlcPeriodSeconds; + } + + public boolean isIncludeTicks() { + return includeTicks; + } + + public boolean isIncludeOhlc() { + return includeOhlc; + } + + public int[] getOhlcPeriodSeconds() { + return ohlcPeriodSeconds; + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/ingestor/DataResponseMessage.java b/flink/src/main/java/com/dexorder/flink/ingestor/DataResponseMessage.java new file mode 100644 index 0000000..59148ab --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/ingestor/DataResponseMessage.java @@ -0,0 +1,91 @@ +package com.dexorder.flink.ingestor; + +import java.util.ArrayList; +import java.util.List; + +/** + * Represents a DataResponse message from an ingestor. + * Contains the results of a historical data request. + */ +public class DataResponseMessage { + private final String requestId; + private final ResponseStatus status; + private final String errorMessage; + private final List ohlcData; + private final int totalRecords; + + public enum ResponseStatus { + OK, + NOT_FOUND, + ERROR + } + + public DataResponseMessage(String requestId, ResponseStatus status, String errorMessage, + List ohlcData, int totalRecords) { + this.requestId = requestId; + this.status = status; + this.errorMessage = errorMessage; + this.ohlcData = ohlcData != null ? ohlcData : new ArrayList<>(); + this.totalRecords = totalRecords; + } + + public String getRequestId() { + return requestId; + } + + public ResponseStatus getStatus() { + return status; + } + + public String getErrorMessage() { + return errorMessage; + } + + public List getOhlcData() { + return ohlcData; + } + + public int getTotalRecords() { + return totalRecords; + } + + /** + * Deserialize from protobuf bytes. + * TODO: Replace with actual generated protobuf deserialization + */ + public static DataResponseMessage fromProtobuf(byte[] protobufData) { + // Placeholder - will be replaced with actual protobuf deserialization + // For now, return a dummy response + return new DataResponseMessage("", ResponseStatus.ERROR, "Not implemented", null, 0); + } + + /** + * Serialize to protobuf bytes. + * TODO: Replace with actual generated protobuf serialization + */ + public byte[] toProtobuf() { + // Placeholder - will be replaced with actual protobuf serialization + return new byte[0]; + } + + /** + * Create a successful response. + */ + public static DataResponseMessage success(String requestId, List ohlcData) { + return new DataResponseMessage(requestId, ResponseStatus.OK, null, ohlcData, ohlcData.size()); + } + + /** + * Create an error response. + */ + public static DataResponseMessage error(String requestId, String errorMessage) { + return new DataResponseMessage(requestId, ResponseStatus.ERROR, errorMessage, null, 0); + } + + /** + * Create a not found response. + */ + public static DataResponseMessage notFound(String requestId) { + return new DataResponseMessage(requestId, ResponseStatus.NOT_FOUND, "Data not found", null, 0); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/ingestor/IngestorControlChannel.java b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorControlChannel.java new file mode 100644 index 0000000..94cec24 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorControlChannel.java @@ -0,0 +1,165 @@ +package com.dexorder.flink.ingestor; + +import com.dexorder.flink.zmq.ZmqChannelManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Manages the ingestor control channel. + * Broadcasts control messages to all ingestor workers via ZMQ PUB socket. + */ +public class IngestorControlChannel { + private static final Logger LOG = LoggerFactory.getLogger(IngestorControlChannel.class); + private static final byte PROTOCOL_VERSION = 0x01; + private static final byte MSG_TYPE_INGESTOR_CONTROL = 0x02; + + private final ZmqChannelManager zmqManager; + + public IngestorControlChannel(ZmqChannelManager zmqManager) { + this.zmqManager = zmqManager; + } + + /** + * Cancel a specific data request. + */ + public void cancelRequest(String requestId) { + IngestorControlMessage msg = IngestorControlMessage.cancel(requestId); + broadcastControlMessage(msg); + LOG.info("Sent CANCEL control message for request: {}", requestId); + } + + /** + * Send shutdown signal to all ingestors. + */ + public void shutdown() { + IngestorControlMessage msg = IngestorControlMessage.shutdown(); + broadcastControlMessage(msg); + LOG.info("Sent SHUTDOWN control message to all ingestors"); + } + + /** + * Update ingestor configuration. + */ + public void updateConfig(IngestorConfig config) { + IngestorControlMessage msg = IngestorControlMessage.configUpdate(config); + broadcastControlMessage(msg); + LOG.info("Sent CONFIG_UPDATE control message to all ingestors"); + } + + /** + * Send heartbeat to ingestors. + */ + public void sendHeartbeat() { + IngestorControlMessage msg = IngestorControlMessage.heartbeat(); + broadcastControlMessage(msg); + LOG.debug("Sent HEARTBEAT control message to all ingestors"); + } + + /** + * Broadcast a control message to all ingestors. + */ + private void broadcastControlMessage(IngestorControlMessage message) { + try { + byte[] protobufData = message.toProtobuf(); + + boolean sent = zmqManager.sendMessage( + ZmqChannelManager.Channel.INGESTOR_CONTROL, + PROTOCOL_VERSION, + MSG_TYPE_INGESTOR_CONTROL, + protobufData + ); + + if (!sent) { + LOG.error("Failed to send control message: action={}", message.getAction()); + } + } catch (Exception e) { + LOG.error("Error broadcasting control message: action={}", message.getAction(), e); + } + } + + /** + * Control message wrapper. + */ + public static class IngestorControlMessage { + private final ControlAction action; + private final String requestId; + private final IngestorConfig config; + + public enum ControlAction { + CANCEL, + SHUTDOWN, + CONFIG_UPDATE, + HEARTBEAT + } + + private IngestorControlMessage(ControlAction action, String requestId, IngestorConfig config) { + this.action = action; + this.requestId = requestId; + this.config = config; + } + + public static IngestorControlMessage cancel(String requestId) { + return new IngestorControlMessage(ControlAction.CANCEL, requestId, null); + } + + public static IngestorControlMessage shutdown() { + return new IngestorControlMessage(ControlAction.SHUTDOWN, null, null); + } + + public static IngestorControlMessage configUpdate(IngestorConfig config) { + return new IngestorControlMessage(ControlAction.CONFIG_UPDATE, null, config); + } + + public static IngestorControlMessage heartbeat() { + return new IngestorControlMessage(ControlAction.HEARTBEAT, null, null); + } + + public ControlAction getAction() { + return action; + } + + public String getRequestId() { + return requestId; + } + + public IngestorConfig getConfig() { + return config; + } + + /** + * Serialize to protobuf bytes. + * TODO: Replace with actual generated protobuf serialization + */ + public byte[] toProtobuf() { + // Placeholder - will be replaced with actual protobuf serialization + return new byte[0]; + } + } + + /** + * Ingestor configuration. + */ + public static class IngestorConfig { + private final Integer maxConcurrent; + private final Integer timeoutSeconds; + private final String kafkaTopic; + + public IngestorConfig(Integer maxConcurrent, Integer timeoutSeconds, String kafkaTopic) { + this.maxConcurrent = maxConcurrent; + this.timeoutSeconds = timeoutSeconds; + this.kafkaTopic = kafkaTopic; + } + + public Integer getMaxConcurrent() { + return maxConcurrent; + } + + public Integer getTimeoutSeconds() { + return timeoutSeconds; + } + + public String getKafkaTopic() { + return kafkaTopic; + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/ingestor/IngestorResponseListener.java b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorResponseListener.java new file mode 100644 index 0000000..4f3c12b --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorResponseListener.java @@ -0,0 +1,172 @@ +package com.dexorder.flink.ingestor; + +import com.dexorder.flink.zmq.ZmqChannelManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +/** + * Listens for DataResponse messages from ingestors on the ROUTER socket. + * Matches responses to pending requests and delivers them to waiting handlers. + */ +public class IngestorResponseListener { + private static final Logger LOG = LoggerFactory.getLogger(IngestorResponseListener.class); + private static final byte PROTOCOL_VERSION = 0x01; + private static final byte MSG_TYPE_DATA_RESPONSE = 0x02; + + private final ZmqChannelManager zmqManager; + private final Map> pendingRequests; + private volatile boolean running; + private Thread listenerThread; + + public IngestorResponseListener(ZmqChannelManager zmqManager) { + this.zmqManager = zmqManager; + this.pendingRequests = new ConcurrentHashMap<>(); + this.running = false; + } + + /** + * Start the response listener thread. + */ + public void start() { + if (running) { + LOG.warn("IngestorResponseListener already running"); + return; + } + + running = true; + listenerThread = new Thread(this::listenLoop, "IngestorResponseListener-Thread"); + listenerThread.setDaemon(false); + listenerThread.start(); + LOG.info("IngestorResponseListener started"); + } + + /** + * Stop the response listener. + */ + public void stop() { + if (!running) { + return; + } + + running = false; + if (listenerThread != null) { + listenerThread.interrupt(); + try { + listenerThread.join(5000); + } catch (InterruptedException e) { + LOG.warn("Interrupted while waiting for listener thread to stop", e); + Thread.currentThread().interrupt(); + } + } + + // Cancel all pending requests + pendingRequests.values().forEach(future -> + future.completeExceptionally(new Exception("Listener stopped")) + ); + pendingRequests.clear(); + + LOG.info("IngestorResponseListener stopped"); + } + + /** + * Register a request and return a CompletableFuture that will be completed + * when the response arrives. + */ + public CompletableFuture registerRequest(String requestId) { + CompletableFuture future = new CompletableFuture<>(); + pendingRequests.put(requestId, future); + LOG.debug("Registered pending request: {}", requestId); + return future; + } + + /** + * Cancel a pending request. + */ + public void cancelRequest(String requestId) { + CompletableFuture future = pendingRequests.remove(requestId); + if (future != null) { + future.completeExceptionally(new Exception("Request cancelled")); + LOG.debug("Cancelled pending request: {}", requestId); + } + } + + /** + * Main listener loop - receives and processes DataResponse messages. + */ + private void listenLoop() { + LOG.info("IngestorResponseListener loop started"); + + while (running) { + try { + // Receive message from ROUTER socket with 1 second timeout + ZmqChannelManager.ReceivedMessage receivedMsg = zmqManager.receiveRouterMessage( + ZmqChannelManager.Channel.INGESTOR_RESPONSE, + 1000 + ); + + if (receivedMsg == null) { + continue; + } + + // Verify protocol version and message type + if (receivedMsg.getVersion() != PROTOCOL_VERSION) { + LOG.warn("Received message with unsupported protocol version: {}", + receivedMsg.getVersion()); + continue; + } + + if (receivedMsg.getMessageType() != MSG_TYPE_DATA_RESPONSE) { + LOG.warn("Received unexpected message type: {}", + receivedMsg.getMessageType()); + continue; + } + + // Parse the DataResponse + DataResponseMessage response = DataResponseMessage.fromProtobuf( + receivedMsg.getProtobufData() + ); + + processResponse(response); + + } catch (Exception e) { + if (running) { + LOG.error("Error in listener loop", e); + } + } + } + + LOG.info("IngestorResponseListener loop stopped"); + } + + /** + * Process a received DataResponse message. + */ + private void processResponse(DataResponseMessage response) { + String requestId = response.getRequestId(); + + CompletableFuture future = pendingRequests.remove(requestId); + if (future == null) { + LOG.warn("Received response for unknown request: {}", requestId); + return; + } + + LOG.info("Received response for request: {}, status={}, records={}", + requestId, response.getStatus(), response.getTotalRecords()); + + // Complete the future with the response + future.complete(response); + } + + public boolean isRunning() { + return running; + } + + public int getPendingRequestCount() { + return pendingRequests.size(); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/ingestor/IngestorWorkQueue.java b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorWorkQueue.java new file mode 100644 index 0000000..d8e1df9 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/ingestor/IngestorWorkQueue.java @@ -0,0 +1,164 @@ +package com.dexorder.flink.ingestor; + +import com.dexorder.flink.zmq.ZmqChannelManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * Manages the ingestor work queue. + * Sends DataRequest messages to ingestor workers via ZMQ PUB socket with exchange prefix filtering. + */ +public class IngestorWorkQueue { + private static final Logger LOG = LoggerFactory.getLogger(IngestorWorkQueue.class); + private static final byte PROTOCOL_VERSION = 0x01; + private static final byte MSG_TYPE_DATA_REQUEST = 0x01; + + private final ZmqChannelManager zmqManager; + private final BlockingQueue requestQueue; + private volatile boolean running; + private Thread workerThread; + + public IngestorWorkQueue(ZmqChannelManager zmqManager) { + this.zmqManager = zmqManager; + this.requestQueue = new LinkedBlockingQueue<>(); + this.running = false; + } + + /** + * Start the work queue processor thread. + */ + public void start() { + if (running) { + LOG.warn("IngestorWorkQueue already running"); + return; + } + + running = true; + workerThread = new Thread(this::processQueue, "IngestorWorkQueue-Thread"); + workerThread.setDaemon(false); + workerThread.start(); + LOG.info("IngestorWorkQueue started"); + } + + /** + * Stop the work queue processor. + */ + public void stop() { + if (!running) { + return; + } + + running = false; + if (workerThread != null) { + workerThread.interrupt(); + try { + workerThread.join(5000); + } catch (InterruptedException e) { + LOG.warn("Interrupted while waiting for worker thread to stop", e); + Thread.currentThread().interrupt(); + } + } + LOG.info("IngestorWorkQueue stopped"); + } + + /** + * Submit a data request to the queue. + */ + public void submitRequest(DataRequestMessage request) { + if (!running) { + LOG.warn("Cannot submit request - work queue not running"); + return; + } + + try { + requestQueue.put(request); + LOG.debug("Submitted data request: {}", request.getRequestId()); + } catch (InterruptedException e) { + LOG.error("Interrupted while submitting request", e); + Thread.currentThread().interrupt(); + } + } + + /** + * Process the request queue and send to ingestors. + */ + private void processQueue() { + LOG.info("IngestorWorkQueue processor started"); + + while (running) { + try { + DataRequestMessage request = requestQueue.take(); + sendToIngestors(request); + } catch (InterruptedException e) { + if (running) { + LOG.error("Queue processing interrupted", e); + } + Thread.currentThread().interrupt(); + break; + } catch (Exception e) { + LOG.error("Error processing request", e); + } + } + + LOG.info("IngestorWorkQueue processor stopped"); + } + + /** + * Send a data request to ingestors via PUB socket with exchange prefix. + * The topic prefix is extracted from the ticker (e.g., "BINANCE:BTC/USDT" -> "BINANCE:") + */ + private void sendToIngestors(DataRequestMessage request) { + try { + byte[] protobufData = request.toProtobuf(); + + // Extract exchange prefix from ticker (e.g., "BINANCE:BTC/USDT" -> "BINANCE:") + String ticker = request.getTicker(); + String exchangePrefix = extractExchangePrefix(ticker); + + boolean sent = zmqManager.sendTopicMessage( + ZmqChannelManager.Channel.INGESTOR_WORK_QUEUE, + exchangePrefix, + PROTOCOL_VERSION, + MSG_TYPE_DATA_REQUEST, + protobufData + ); + + if (sent) { + LOG.info("Sent DataRequest to ingestors: requestId={}, type={}, ticker={}, prefix={}", + request.getRequestId(), request.getRequestType(), request.getTicker(), exchangePrefix); + } else { + LOG.error("Failed to send DataRequest: {}", request.getRequestId()); + // Re-queue the request + requestQueue.offer(request); + } + } catch (Exception e) { + LOG.error("Error sending request to ingestors: {}", request.getRequestId(), e); + // Re-queue the request + requestQueue.offer(request); + } + } + + /** + * Extract exchange prefix from ticker string. + * E.g., "BINANCE:BTC/USDT" -> "BINANCE:" + */ + private String extractExchangePrefix(String ticker) { + int colonIndex = ticker.indexOf(':'); + if (colonIndex > 0) { + return ticker.substring(0, colonIndex + 1); + } + LOG.warn("Ticker '{}' does not contain exchange prefix, using empty prefix", ticker); + return ""; + } + + public int getQueueSize() { + return requestQueue.size(); + } + + public boolean isRunning() { + return running; + } +} diff --git a/flink/src/main/java/com/dexorder/flink/kafka/TopicConfig.java b/flink/src/main/java/com/dexorder/flink/kafka/TopicConfig.java new file mode 100644 index 0000000..6a7dc34 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/kafka/TopicConfig.java @@ -0,0 +1,60 @@ +package com.dexorder.flink.kafka; + +import java.util.HashMap; +import java.util.Map; + +/** + * Configuration for a Kafka topic. + */ +public class TopicConfig { + private String name; + private int partitions; + private int replication; + private Map config; + + public TopicConfig() { + this.config = new HashMap<>(); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getPartitions() { + return partitions; + } + + public void setPartitions(int partitions) { + this.partitions = partitions; + } + + public int getReplication() { + return replication; + } + + public void setReplication(int replication) { + this.replication = replication; + } + + public Map getConfig() { + return config; + } + + public void setConfig(Map config) { + this.config = config; + } + + @Override + public String toString() { + return "TopicConfig{" + + "name='" + name + '\'' + + ", partitions=" + partitions + + ", replication=" + replication + + ", config=" + config + + '}'; + } +} diff --git a/flink/src/main/java/com/dexorder/flink/kafka/TopicManager.java b/flink/src/main/java/com/dexorder/flink/kafka/TopicManager.java new file mode 100644 index 0000000..fd196fd --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/kafka/TopicManager.java @@ -0,0 +1,224 @@ +package com.dexorder.flink.kafka; + +import org.apache.kafka.clients.admin.*; +import org.apache.kafka.common.config.TopicConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.Yaml; +import org.yaml.snakeyaml.constructor.Constructor; + +import java.io.InputStream; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +/** + * Manages Kafka topics based on a YAML configuration file. + * Creates topics if they don't exist, updates configuration if topics exist. + */ +public class TopicManager implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(TopicManager.class); + + private final AdminClient adminClient; + private final List topicConfigs; + + /** + * Creates a TopicManager with the specified Kafka bootstrap servers. + * Loads topic configuration from the classpath resource "topics.yaml". + * + * @param bootstrapServers Kafka bootstrap servers + */ + public TopicManager(String bootstrapServers) { + this(bootstrapServers, "/topics.yaml"); + } + + /** + * Creates a TopicManager with the specified Kafka bootstrap servers and config file. + * + * @param bootstrapServers Kafka bootstrap servers + * @param configResourcePath Path to the topics YAML file in classpath + */ + public TopicManager(String bootstrapServers, String configResourcePath) { + Properties props = new Properties(); + props.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + props.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "10000"); + props.put(AdminClientConfig.DEFAULT_API_TIMEOUT_MS_CONFIG, "10000"); + + this.adminClient = AdminClient.create(props); + this.topicConfigs = loadTopicConfigs(configResourcePath); + } + + /** + * Loads topic configurations from a YAML file. + */ + private List loadTopicConfigs(String resourcePath) { + try (InputStream inputStream = getClass().getResourceAsStream(resourcePath)) { + if (inputStream == null) { + LOG.error("Topic configuration file not found: {}", resourcePath); + return Collections.emptyList(); + } + + Yaml yaml = new Yaml(new Constructor(TopicsWrapper.class, new org.yaml.snakeyaml.LoaderOptions())); + TopicsWrapper wrapper = yaml.load(inputStream); + + if (wrapper == null || wrapper.getTopics() == null) { + LOG.warn("No topics defined in configuration file"); + return Collections.emptyList(); + } + + LOG.info("Loaded {} topic configurations from {}", wrapper.getTopics().size(), resourcePath); + return wrapper.getTopics(); + + } catch (Exception e) { + LOG.error("Failed to load topic configurations from {}", resourcePath, e); + return Collections.emptyList(); + } + } + + /** + * Ensures all configured topics exist with the correct configuration. + * Creates topics that don't exist, logs warnings for topics that exist with different config. + */ + public void ensureTopicsExist() throws ExecutionException, InterruptedException { + if (topicConfigs.isEmpty()) { + LOG.warn("No topics to create"); + return; + } + + // Get existing topics + Set existingTopics = adminClient.listTopics().names().get(); + LOG.info("Found {} existing topics in Kafka", existingTopics.size()); + + // Separate topics to create vs topics to check + List topicsToCreate = topicConfigs.stream() + .filter(tc -> !existingTopics.contains(tc.getName())) + .collect(Collectors.toList()); + + List existingConfiguredTopics = topicConfigs.stream() + .filter(tc -> existingTopics.contains(tc.getName())) + .collect(Collectors.toList()); + + // Create new topics + if (!topicsToCreate.isEmpty()) { + createTopics(topicsToCreate); + } + + // Verify existing topics + if (!existingConfiguredTopics.isEmpty()) { + verifyTopicConfigurations(existingConfiguredTopics); + } + + LOG.info("Topic management complete"); + } + + /** + * Creates the specified topics. + */ + private void createTopics(List topics) + throws ExecutionException, InterruptedException { + + List newTopics = topics.stream() + .map(tc -> { + NewTopic newTopic = new NewTopic( + tc.getName(), + tc.getPartitions(), + (short) tc.getReplication() + ); + + if (tc.getConfig() != null && !tc.getConfig().isEmpty()) { + newTopic.configs(tc.getConfig()); + } + + return newTopic; + }) + .collect(Collectors.toList()); + + LOG.info("Creating {} topics", newTopics.size()); + + CreateTopicsResult result = adminClient.createTopics(newTopics); + + // Wait for all topics to be created and log results + for (Map.Entry> entry : result.values().entrySet()) { + try { + entry.getValue().get(); + LOG.info("Successfully created topic: {}", entry.getKey()); + } catch (ExecutionException e) { + LOG.error("Failed to create topic: {}", entry.getKey(), e); + throw e; + } + } + } + + /** + * Verifies that existing topics have the expected configuration. + * Logs warnings if configuration differs. + */ + private void verifyTopicConfigurations(List topics) + throws ExecutionException, InterruptedException { + + List topicNames = topics.stream() + .map(com.dexorder.flink.kafka.TopicConfig::getName) + .collect(Collectors.toList()); + + // Describe topics to get their configurations + DescribeTopicsResult describeResult = adminClient.describeTopics(topicNames); + Map descriptions = describeResult.all().get(); + + for (com.dexorder.flink.kafka.TopicConfig tc : topics) { + TopicDescription desc = descriptions.get(tc.getName()); + if (desc == null) { + continue; + } + + // Check partition count + if (desc.partitions().size() != tc.getPartitions()) { + LOG.warn("Topic {} has {} partitions, expected {}. " + + "Partition count cannot be changed automatically.", + tc.getName(), desc.partitions().size(), tc.getPartitions()); + } + + // Check replication factor + if (!desc.partitions().isEmpty()) { + int actualReplication = desc.partitions().get(0).replicas().size(); + if (actualReplication != tc.getReplication()) { + LOG.warn("Topic {} has replication factor {}, expected {}. " + + "Replication factor cannot be changed automatically.", + tc.getName(), actualReplication, tc.getReplication()); + } + } + + LOG.info("Verified existing topic: {}", tc.getName()); + } + } + + /** + * Gets the list of configured topic names. + */ + public List getTopicNames() { + return topicConfigs.stream() + .map(com.dexorder.flink.kafka.TopicConfig::getName) + .collect(Collectors.toList()); + } + + @Override + public void close() { + if (adminClient != null) { + adminClient.close(); + } + } + + /** + * Wrapper class for YAML deserialization. + */ + public static class TopicsWrapper { + private List topics; + + public List getTopics() { + return topics; + } + + public void setTopics(List topics) { + this.topics = topics; + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationForwarder.java b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationForwarder.java new file mode 100644 index 0000000..af085b5 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationForwarder.java @@ -0,0 +1,100 @@ +package com.dexorder.flink.publisher; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.zeromq.SocketType; +import org.zeromq.ZContext; +import org.zeromq.ZMQ; + +/** + * Runs in the job manager. Pulls notifications from task managers (via PUSH/PULL) + * and republishes them on the MARKET_DATA_PUB socket that the relay subscribes to. + * + * Flow: + * Task manager HistoryNotificationPublisher → PUSH + * ↓ + * Job manager HistoryNotificationForwarder PULL → MARKET_DATA_PUB + * ↓ + * Relay (XSUB) → Relay (XPUB) → Clients + */ +public class HistoryNotificationForwarder implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(HistoryNotificationForwarder.class); + + private final ZMQ.Socket pullSocket; + private final ZMQ.Socket pubSocket; + private final ZContext context; + private volatile boolean running = true; + private Thread thread; + + /** + * @param pullPort Port to bind PULL socket on (task managers connect PUSH here) + * @param pubSocket Existing MARKET_DATA_PUB socket from ZmqChannelManager + */ + public HistoryNotificationForwarder(int pullPort, ZMQ.Socket pubSocket) { + this.pubSocket = pubSocket; + this.context = new ZContext(); + this.pullSocket = context.createSocket(SocketType.PULL); + this.pullSocket.setRcvHWM(10000); + + String endpoint = "tcp://*:" + pullPort; + this.pullSocket.bind(endpoint); + LOG.info("HistoryNotificationForwarder PULL socket bound to {}", endpoint); + } + + public void start() { + thread = new Thread(this::forwardLoop, "notification-forwarder"); + thread.setDaemon(true); + thread.start(); + LOG.info("HistoryNotificationForwarder started"); + } + + private void forwardLoop() { + LOG.info("Notification forwarder loop running"); + pullSocket.setReceiveTimeOut(200); // ms, so we can check running flag + + while (running) { + // Receive all frames of a multi-part message and forward to PUB + byte[] frame = pullSocket.recv(0); + if (frame == null) { + continue; // timeout, check running flag + } + + boolean more = pullSocket.hasReceiveMore(); + if (more) { + pubSocket.sendMore(frame); + } else { + pubSocket.send(frame, 0); + continue; + } + + // Receive remaining frames + while (more) { + frame = pullSocket.recv(0); + more = pullSocket.hasReceiveMore(); + if (more) { + pubSocket.sendMore(frame); + } else { + pubSocket.send(frame, 0); + } + } + + LOG.debug("Forwarded notification to MARKET_DATA_PUB"); + } + + LOG.info("Notification forwarder loop stopped"); + } + + @Override + public void close() { + running = false; + if (thread != null) { + try { + thread.join(2000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + pullSocket.close(); + context.close(); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationFunction.java b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationFunction.java new file mode 100644 index 0000000..0a356cd --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationFunction.java @@ -0,0 +1,137 @@ +package com.dexorder.flink.publisher; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Flink function that processes OHLCBatch messages and publishes notifications + * after data is written to Iceberg. + * + * This function: + * 1. Receives OHLCBatch messages with metadata and rows + * 2. Writes rows to Iceberg (pass through to sink) + * 3. Publishes HistoryReadyNotification immediately after batch completes + * + * Note: Each OHLCBatch is a complete unit - one batch = one notification + */ +public class HistoryNotificationFunction extends ProcessFunction { + private static final Logger LOG = LoggerFactory.getLogger(HistoryNotificationFunction.class); + + private final String notificationEndpoint; + private final String icebergNamespace; + private final String icebergTablePrefix; + + private transient HistoryNotificationPublisher publisher; + + public HistoryNotificationFunction( + String notificationEndpoint, + String icebergNamespace, + String icebergTablePrefix + ) { + this.notificationEndpoint = notificationEndpoint; + this.icebergNamespace = icebergNamespace; + this.icebergTablePrefix = icebergTablePrefix; + } + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + + // Initialize ZMQ publisher + publisher = new HistoryNotificationPublisher(notificationEndpoint); + LOG.info("Initialized HistoryNotificationPublisher on {}", notificationEndpoint); + } + + @Override + public void processElement( + OHLCBatchWrapper batch, + Context context, + Collector out + ) throws Exception { + + // Pass through the batch for Iceberg sink + out.collect(batch); + + String requestId = batch.getRequestId(); + String clientId = batch.getClientId(); + String ticker = batch.getTicker(); + int periodSeconds = batch.getPeriodSeconds(); + long startTime = batch.getStartTime(); + long endTime = batch.getEndTime(); + String status = batch.getStatus(); + int rowCount = batch.getRowCount(); + + LOG.info("Processing OHLCBatch: request_id={}, status={}, rows={}", + requestId, status, rowCount); + + // Determine Iceberg table name based on period + String tableName = getIcebergTableName(ticker, periodSeconds); + + // Publish notification based on status + if ("ERROR".equals(status)) { + // Error during fetch + publisher.publishError( + requestId, + clientId, + ticker, + periodSeconds, + startTime, + endTime, + batch.getErrorMessage() + ); + } else if ("NOT_FOUND".equals(status) || rowCount == 0) { + // No data available + publisher.publishNotFound( + requestId, + clientId, + ticker, + periodSeconds, + startTime, + endTime + ); + } else { + // Success - data available + publisher.publishHistoryReady( + requestId, + clientId, + ticker, + periodSeconds, + startTime, + endTime, + icebergNamespace, + tableName, + rowCount + ); + } + + LOG.info("Published notification for request_id={}", requestId); + } + + @Override + public void close() throws Exception { + super.close(); + if (publisher != null) { + publisher.close(); + } + } + + private String getIcebergTableName(String ticker, int periodSeconds) { + // Extract exchange from ticker (e.g., "BINANCE:BTC/USDT" -> "binance") + String exchange = ticker.split(":")[0].toLowerCase(); + + // Convert period to human-readable format + String period; + if (periodSeconds < 3600) { + period = (periodSeconds / 60) + "m"; + } else if (periodSeconds < 86400) { + period = (periodSeconds / 3600) + "h"; + } else { + period = (periodSeconds / 86400) + "d"; + } + + return String.format("%s_ohlc_%s_%s", icebergTablePrefix, exchange, period); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationPublisher.java b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationPublisher.java new file mode 100644 index 0000000..c4ab408 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/publisher/HistoryNotificationPublisher.java @@ -0,0 +1,130 @@ +package com.dexorder.flink.publisher; + +import com.dexorder.proto.HistoryReadyNotification; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.zeromq.SocketType; +import org.zeromq.ZContext; +import org.zeromq.ZMQ; + +/** + * Pushes HistoryReadyNotification protobuf messages to the job manager's notification forwarder. + * + * Uses ZMQ PUSH socket connecting to the job manager's PULL socket. + * The job manager's HistoryNotificationForwarder receives and republishes via MARKET_DATA_PUB. + * Topic format: "RESPONSE:{client_id}" or "HISTORY_READY:{request_id}" + */ +public class HistoryNotificationPublisher implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(HistoryNotificationPublisher.class); + private static final byte PROTOCOL_VERSION = 0x01; + private static final byte MSG_TYPE_HISTORY_READY = 0x12; + + private final ZContext context; + private final ZMQ.Socket publishSocket; + + public HistoryNotificationPublisher(String jobManagerPullEndpoint) { + this.context = new ZContext(); + this.publishSocket = context.createSocket(SocketType.PUSH); + + publishSocket.setLinger(1000); + publishSocket.setSndHWM(10000); + publishSocket.connect(jobManagerPullEndpoint); + + LOG.info("HistoryNotificationPublisher connected PUSH to {}", jobManagerPullEndpoint); + } + + public void publishHistoryReady( + String requestId, + String clientId, + String ticker, + int periodSeconds, + long startTime, + long endTime, + String icebergNamespace, + String icebergTable, + int rowCount + ) { + String topic = clientId != null ? "RESPONSE:" + clientId : "HISTORY_READY:" + requestId; + + HistoryReadyNotification notification = HistoryReadyNotification.newBuilder() + .setRequestId(requestId) + .setTicker(ticker) + .setPeriodSeconds(periodSeconds) + .setStartTime(startTime) + .setEndTime(endTime) + .setStatus(HistoryReadyNotification.NotificationStatus.OK) + .setIcebergNamespace(icebergNamespace) + .setIcebergTable(icebergTable) + .setRowCount(rowCount) + .setCompletedAt(System.currentTimeMillis() * 1000) + .build(); + + publish(topic, notification.toByteArray()); + LOG.info("Published HistoryReadyNotification: topic={}, request_id={}, rows={}", topic, requestId, rowCount); + } + + public void publishError( + String requestId, + String clientId, + String ticker, + int periodSeconds, + long startTime, + long endTime, + String errorMessage + ) { + String topic = clientId != null ? "RESPONSE:" + clientId : "HISTORY_READY:" + requestId; + + HistoryReadyNotification.Builder builder = HistoryReadyNotification.newBuilder() + .setRequestId(requestId) + .setTicker(ticker) + .setPeriodSeconds(periodSeconds) + .setStartTime(startTime) + .setEndTime(endTime) + .setStatus(HistoryReadyNotification.NotificationStatus.ERROR); + if (errorMessage != null) { + builder.setErrorMessage(errorMessage); + } + + publish(topic, builder.build().toByteArray()); + LOG.error("Published error notification: topic={}, request_id={}, error={}", topic, requestId, errorMessage); + } + + public void publishNotFound( + String requestId, + String clientId, + String ticker, + int periodSeconds, + long startTime, + long endTime + ) { + String topic = clientId != null ? "RESPONSE:" + clientId : "HISTORY_READY:" + requestId; + + HistoryReadyNotification notification = HistoryReadyNotification.newBuilder() + .setRequestId(requestId) + .setTicker(ticker) + .setPeriodSeconds(periodSeconds) + .setStartTime(startTime) + .setEndTime(endTime) + .setStatus(HistoryReadyNotification.NotificationStatus.NOT_FOUND) + .build(); + + publish(topic, notification.toByteArray()); + LOG.info("Published not-found notification: topic={}, request_id={}", topic, requestId); + } + + private void publish(String topic, byte[] protoPayload) { + byte[] messageFrame = new byte[1 + protoPayload.length]; + messageFrame[0] = MSG_TYPE_HISTORY_READY; + System.arraycopy(protoPayload, 0, messageFrame, 1, protoPayload.length); + + publishSocket.sendMore(topic); + publishSocket.sendMore(new byte[]{PROTOCOL_VERSION}); + publishSocket.send(messageFrame, 0); + } + + @Override + public void close() { + if (publishSocket != null) publishSocket.close(); + if (context != null) context.close(); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchDeserializer.java b/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchDeserializer.java new file mode 100644 index 0000000..bce8a74 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchDeserializer.java @@ -0,0 +1,103 @@ +package com.dexorder.flink.publisher; + +import com.dexorder.proto.OHLC; +import com.dexorder.proto.OHLCBatch; +import com.dexorder.proto.OHLCBatchMetadata; +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Kafka deserializer for OHLCBatch protobuf messages. + * Handles messages from ingestors with metadata and OHLC rows. + */ +public class OHLCBatchDeserializer implements DeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(OHLCBatchDeserializer.class); + private static final long serialVersionUID = 1L; + + private static final byte PROTOCOL_VERSION = 0x01; + private static final byte MSG_TYPE_OHLC_BATCH = 0x0B; + + @Override + public OHLCBatchWrapper deserialize(byte[] message) throws IOException { + try { + if (message.length < 2) { + throw new IOException("Message too short: " + message.length + " bytes"); + } + + byte version = message[0]; + if (version != PROTOCOL_VERSION) { + throw new IOException("Unsupported protocol version: " + version); + } + + byte messageType = message[1]; + if (messageType != MSG_TYPE_OHLC_BATCH) { + throw new IOException("Unexpected message type: 0x" + Integer.toHexString(messageType)); + } + + byte[] protoPayload = new byte[message.length - 2]; + System.arraycopy(message, 2, protoPayload, 0, protoPayload.length); + + OHLCBatchWrapper wrapper = parseOHLCBatch(protoPayload); + + LOG.debug("Deserialized OHLCBatch: request_id={}, rows={}", + wrapper.getRequestId(), wrapper.getRowCount()); + + return wrapper; + + } catch (Exception e) { + LOG.error("Failed to deserialize OHLCBatch", e); + throw new IOException("Failed to deserialize OHLCBatch", e); + } + } + + private OHLCBatchWrapper parseOHLCBatch(byte[] payload) throws IOException { + OHLCBatch batch = OHLCBatch.parseFrom(payload); + OHLCBatchMetadata meta = batch.getMetadata(); + + List rows = new ArrayList<>(batch.getRowsCount()); + for (OHLC row : batch.getRowsList()) { + rows.add(new OHLCBatchWrapper.OHLCRow( + row.getTimestamp(), + row.getTicker(), + row.getOpen(), + row.getHigh(), + row.getLow(), + row.getClose(), + row.hasVolume() ? row.getVolume() : 0 + )); + } + + String status = meta.getStatus(); + if (status == null || status.isEmpty()) { + status = "OK"; + } + + return new OHLCBatchWrapper( + meta.getRequestId(), + meta.hasClientId() ? meta.getClientId() : null, + meta.getTicker(), + meta.getPeriodSeconds(), + meta.getStartTime(), + meta.getEndTime(), + status, + meta.hasErrorMessage() ? meta.getErrorMessage() : null, + rows + ); + } + + @Override + public boolean isEndOfStream(OHLCBatchWrapper nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + return TypeInformation.of(OHLCBatchWrapper.class); + } +} diff --git a/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchWrapper.java b/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchWrapper.java new file mode 100644 index 0000000..81d1505 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/publisher/OHLCBatchWrapper.java @@ -0,0 +1,175 @@ +package com.dexorder.flink.publisher; + +import java.io.Serializable; +import java.util.List; + +/** + * Wrapper for OHLCBatch protobuf message. + * Contains metadata and OHLC rows from ingestor. + */ +public class OHLCBatchWrapper implements Serializable { + private static final long serialVersionUID = 1L; + + private final String requestId; + private final String clientId; + private final String ticker; + private final int periodSeconds; + private final long startTime; + private final long endTime; + private final String status; // OK, NOT_FOUND, ERROR + private final String errorMessage; + private final List rows; + + public OHLCBatchWrapper( + String requestId, + String clientId, + String ticker, + int periodSeconds, + long startTime, + long endTime, + String status, + String errorMessage, + List rows + ) { + this.requestId = requestId; + this.clientId = clientId; + this.ticker = ticker; + this.periodSeconds = periodSeconds; + this.startTime = startTime; + this.endTime = endTime; + this.status = status; + this.errorMessage = errorMessage; + this.rows = rows; + } + + public String getRequestId() { + return requestId; + } + + public String getClientId() { + return clientId; + } + + public String getTicker() { + return ticker; + } + + public int getPeriodSeconds() { + return periodSeconds; + } + + public long getStartTime() { + return startTime; + } + + public long getEndTime() { + return endTime; + } + + public String getStatus() { + return status; + } + + public String getErrorMessage() { + return errorMessage; + } + + public List getRows() { + return rows; + } + + public int getRowCount() { + return rows != null ? rows.size() : 0; + } + + public boolean hasError() { + return "ERROR".equals(status); + } + + public boolean isNotFound() { + return "NOT_FOUND".equals(status); + } + + public boolean isOk() { + return "OK".equals(status); + } + + @Override + public String toString() { + return "OHLCBatchWrapper{" + + "requestId='" + requestId + '\'' + + ", clientId='" + clientId + '\'' + + ", ticker='" + ticker + '\'' + + ", periodSeconds=" + periodSeconds + + ", status='" + status + '\'' + + ", rowCount=" + getRowCount() + + '}'; + } + + /** + * Single OHLC row + */ + public static class OHLCRow implements Serializable { + private static final long serialVersionUID = 1L; + + private final long timestamp; + private final String ticker; + private final long open; + private final long high; + private final long low; + private final long close; + private final long volume; + + public OHLCRow(long timestamp, String ticker, long open, long high, + long low, long close, long volume) { + this.timestamp = timestamp; + this.ticker = ticker; + this.open = open; + this.high = high; + this.low = low; + this.close = close; + this.volume = volume; + } + + public long getTimestamp() { + return timestamp; + } + + public String getTicker() { + return ticker; + } + + public long getOpen() { + return open; + } + + public long getHigh() { + return high; + } + + public long getLow() { + return low; + } + + public long getClose() { + return close; + } + + public long getVolume() { + return volume; + } + + @Override + public String toString() { + return "OHLCRow{" + + "timestamp=" + timestamp + + ", ticker='" + ticker + '\'' + + ", open=" + open + + ", high=" + high + + ", low=" + low + + ", close=" + close + + ", volume=" + volume + + '}'; + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/sink/HistoricalBatchWriter.java b/flink/src/main/java/com/dexorder/flink/sink/HistoricalBatchWriter.java new file mode 100644 index 0000000..45256ed --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/sink/HistoricalBatchWriter.java @@ -0,0 +1,117 @@ +package com.dexorder.flink.sink; + +import com.dexorder.flink.publisher.OHLCBatchWrapper; +import org.apache.flink.api.common.functions.RichFlatMapFunction; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.util.Collector; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.PartitionKey; +import org.apache.iceberg.Table; +import org.apache.iceberg.data.GenericAppenderFactory; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.Record; +import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.flink.TableLoader; +import org.apache.iceberg.io.DataWriter; +import org.apache.iceberg.io.OutputFileFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Writes historical OHLC batches directly to Iceberg using the catalog API. + * + * Unlike the streaming sink (FlinkSink), this uses table.newAppend().commit() which + * commits synchronously and immediately — no checkpoint dependency. Batches are emitted + * downstream only after the commit returns, guaranteeing HistoryNotificationFunction + * fires after data is visible to readers. + */ +public class HistoricalBatchWriter extends RichFlatMapFunction { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(HistoricalBatchWriter.class); + + private final TableLoader tableLoader; + private transient Table table; + + public HistoricalBatchWriter(TableLoader tableLoader) { + this.tableLoader = tableLoader; + } + + @Override + public void open(Configuration parameters) throws Exception { + tableLoader.open(); + table = tableLoader.loadTable(); + LOG.info("HistoricalBatchWriter opened, table loaded: {}", table.name()); + } + + @Override + public void flatMap(OHLCBatchWrapper batch, Collector out) throws Exception { + // Empty batches (NOT_FOUND/ERROR markers): emit immediately without writing + if (batch.getRows() == null || batch.getRows().isEmpty()) { + LOG.debug("Passing through empty batch (marker): request_id={}, status={}", + batch.getRequestId(), batch.getStatus()); + out.collect(batch); + return; + } + + GenericAppenderFactory appenderFactory = new GenericAppenderFactory(table.schema(), table.spec()); + OutputFileFactory fileFactory = OutputFileFactory + .builderFor(table, getRuntimeContext().getIndexOfThisSubtask(), System.nanoTime()) + .format(FileFormat.PARQUET) + .build(); + + // Compute partition key from ticker (all rows in a batch share one ticker) + GenericRecord partitionRecord = GenericRecord.create(table.schema()); + partitionRecord.setField("ticker", batch.getTicker()); + PartitionKey partitionKey = new PartitionKey(table.spec(), table.schema()); + partitionKey.partition(partitionRecord); + + // Write all rows to one data file + EncryptedOutputFile encryptedFile = fileFactory.newOutputFile(partitionKey); + DataWriter writer = appenderFactory.newDataWriter( + encryptedFile, FileFormat.PARQUET, partitionKey); + + long ingestedAt = System.currentTimeMillis() * 1000; + try { + for (OHLCBatchWrapper.OHLCRow row : batch.getRows()) { + GenericRecord record = GenericRecord.create(table.schema()); + record.setField("ticker", batch.getTicker()); + record.setField("period_seconds", batch.getPeriodSeconds()); + record.setField("timestamp", row.getTimestamp()); + record.setField("open", row.getOpen()); + record.setField("high", row.getHigh()); + record.setField("low", row.getLow()); + record.setField("close", row.getClose()); + record.setField("volume", row.getVolume() != 0 ? row.getVolume() : null); + record.setField("buy_vol", null); + record.setField("sell_vol", null); + record.setField("open_time", null); + record.setField("high_time", null); + record.setField("low_time", null); + record.setField("close_time", null); + record.setField("open_interest", null); + record.setField("request_id", batch.getRequestId()); + record.setField("ingested_at", ingestedAt); + writer.write(record); + } + } finally { + writer.close(); + } + + // Immediate commit — no checkpoint needed + table.newAppend() + .appendFile(writer.toDataFile()) + .commit(); + + LOG.info("Committed {} rows to Iceberg for request_id={}", batch.getRowCount(), batch.getRequestId()); + + // Emit batch downstream only after successful commit + out.collect(batch); + } + + @Override + public void close() throws Exception { + if (tableLoader != null) { + tableLoader.close(); + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/sink/IcebergOHLCSink.java b/flink/src/main/java/com/dexorder/flink/sink/IcebergOHLCSink.java new file mode 100644 index 0000000..bf980e8 --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/sink/IcebergOHLCSink.java @@ -0,0 +1,121 @@ +package com.dexorder.flink.sink; + +import com.dexorder.flink.publisher.OHLCBatchWrapper; +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.Collector; +import org.apache.iceberg.flink.TableLoader; +import org.apache.iceberg.flink.sink.FlinkSink; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Iceberg sink for OHLC data (Iceberg 1.10.1). + * Converts OHLCBatchWrapper to Flink RowData and writes to single Iceberg table. + * + * Deduplication Strategy: + * - Uses Flink's upsert mode with equality delete files + * - Natural key: (ticker, period_seconds, timestamp) + * - Last-write-wins semantics for duplicates + * - Copy-on-write mode for better query performance + */ +public class IcebergOHLCSink { + private static final Logger LOG = LoggerFactory.getLogger(IcebergOHLCSink.class); + + /** + * Create an Iceberg sink for OHLC data with upsert behavior. + * + * @param stream Input stream of OHLCBatchWrapper + * @param tableLoader Iceberg table loader for trading.ohlc + * @return DataStream with sink applied + */ + public static DataStream createSink( + DataStream stream, + TableLoader tableLoader + ) { + // Convert OHLCBatchWrapper to RowData + DataStream rowStream = stream + .flatMap(new OHLCBatchToRowDataMapper()) + .name("OHLCBatch to RowData"); + + // Apply Iceberg sink with upsert mode + // Upsert uses equality delete files to handle duplicates + // Natural key is (ticker, period_seconds, timestamp) + FlinkSink.forRowData(rowStream) + .tableLoader(tableLoader) + .upsert(true) // Enables equality delete file generation + .equalityFieldColumns(java.util.Arrays.asList("ticker", "period_seconds", "timestamp")) + .append(); + + LOG.info("Iceberg OHLC sink configured with upsert mode (equality deletes on ticker, period_seconds, timestamp)"); + + return rowStream; + } + + /** + * Mapper that converts OHLCBatchWrapper to Flink RowData. + * Flattens the batch into individual rows for Iceberg. + */ + private static class OHLCBatchToRowDataMapper implements FlatMapFunction { + private static final long serialVersionUID = 1L; + + @Override + public void flatMap(OHLCBatchWrapper batch, Collector out) throws Exception { + // Skip empty batches (marker messages) + if (batch.getRows() == null || batch.getRows().isEmpty()) { + LOG.debug("Skipping empty batch (marker): request_id={}, status={}", + batch.getRequestId(), batch.getStatus()); + return; + } + + String requestId = batch.getRequestId(); + String ticker = batch.getTicker(); + int periodSeconds = batch.getPeriodSeconds(); + long ingestedAt = System.currentTimeMillis() * 1000; + + // Emit one RowData for each OHLC row in the batch + for (OHLCBatchWrapper.OHLCRow row : batch.getRows()) { + GenericRowData rowData = new GenericRowData(RowKind.INSERT, 17); + + // Natural key fields (ticker, period_seconds, timestamp) + // Used by equality delete files for deduplication + rowData.setField(0, StringData.fromString(ticker)); + rowData.setField(1, periodSeconds); + rowData.setField(2, row.getTimestamp()); + + // OHLC price data + rowData.setField(3, row.getOpen()); + rowData.setField(4, row.getHigh()); + rowData.setField(5, row.getLow()); + rowData.setField(6, row.getClose()); + + // Volume data + rowData.setField(7, row.getVolume()); + rowData.setField(8, null); // buy_vol (TODO: extract from protobuf) + rowData.setField(9, null); // sell_vol + + // Timing data + rowData.setField(10, null); // open_time + rowData.setField(11, null); // high_time + rowData.setField(12, null); // low_time + rowData.setField(13, null); // close_time + + // Additional fields + rowData.setField(14, null); // open_interest + + // Metadata fields + rowData.setField(15, StringData.fromString(requestId)); + rowData.setField(16, ingestedAt); + + out.collect(rowData); + } + + LOG.debug("Converted batch to {} RowData records: request_id={}", + batch.getRowCount(), requestId); + } + } +} diff --git a/flink/src/main/java/com/dexorder/flink/zmq/ZmqChannelManager.java b/flink/src/main/java/com/dexorder/flink/zmq/ZmqChannelManager.java new file mode 100644 index 0000000..05d007d --- /dev/null +++ b/flink/src/main/java/com/dexorder/flink/zmq/ZmqChannelManager.java @@ -0,0 +1,286 @@ +package com.dexorder.flink.zmq; + +import com.dexorder.flink.config.AppConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.zeromq.SocketType; +import org.zeromq.ZContext; +import org.zeromq.ZMQ; + +import java.io.Closeable; +import java.util.HashMap; +import java.util.Map; + +/** + * Manages all ZeroMQ channels for the Flink application. + * Each channel is bound to a specific port and socket type. + */ +public class ZmqChannelManager implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(ZmqChannelManager.class); + + private final ZContext context; + private final Map sockets; + private final AppConfig config; + + public enum Channel { + INGESTOR_WORK_QUEUE, + INGESTOR_RESPONSE, + INGESTOR_CONTROL, + MARKET_DATA_PUB, + CLIENT_REQUEST, + CEP_WEBHOOK + } + + public ZmqChannelManager(AppConfig config) { + this.config = config; + this.context = new ZContext(); + this.sockets = new HashMap<>(); + } + + /** + * Initialize and bind all ZMQ channels. + */ + public void initializeChannels() { + String bindAddress = config.getBindAddress(); + + LOG.info("Initializing ZeroMQ channels on {}", bindAddress); + + // 1. Ingestor Work Queue - PUB socket for topic-based work distribution (exchange prefix filtering) + createAndBind( + Channel.INGESTOR_WORK_QUEUE, + SocketType.PUB, + bindAddress + ":" + config.getIngestorWorkQueuePort(), + "Ingestor Work Queue (PUB)" + ); + + // 2. Ingestor Response - ROUTER socket for receiving historical data responses + createAndBind( + Channel.INGESTOR_RESPONSE, + SocketType.ROUTER, + bindAddress + ":" + config.getIngestorResponsePort(), + "Ingestor Response (ROUTER)" + ); + + // 3. Ingestor Control - PUB socket for broadcast control messages + createAndBind( + Channel.INGESTOR_CONTROL, + SocketType.PUB, + bindAddress + ":" + config.getIngestorControlPort(), + "Ingestor Control (PUB)" + ); + + // 4. Market Data Publication - PUB socket for market data streaming + createAndBind( + Channel.MARKET_DATA_PUB, + SocketType.PUB, + bindAddress + ":" + config.getMarketDataPubPort(), + "Market Data Publication (PUB)" + ); + + // 5. Client Request - REP socket for request-response + createAndBind( + Channel.CLIENT_REQUEST, + SocketType.REP, + bindAddress + ":" + config.getClientRequestPort(), + "Client Request (REP)" + ); + + // 6. CEP Webhook - ROUTER socket for async callbacks + createAndBind( + Channel.CEP_WEBHOOK, + SocketType.ROUTER, + bindAddress + ":" + config.getCepWebhookPort(), + "CEP Webhook (ROUTER)" + ); + + LOG.info("All ZeroMQ channels initialized successfully"); + } + + private void createAndBind(Channel channel, SocketType socketType, String endpoint, String description) { + try { + ZMQ.Socket socket = context.createSocket(socketType); + + // Set socket options + socket.setLinger(1000); // 1 second linger on close + socket.setSndHWM(10000); // High water mark for outbound messages + socket.setRcvHWM(10000); // High water mark for inbound messages + + // Bind the socket + socket.bind(endpoint); + + sockets.put(channel.name(), socket); + LOG.info("Bound {} to {}", description, endpoint); + } catch (Exception e) { + LOG.error("Failed to bind {} to {}", description, endpoint, e); + throw new RuntimeException("Failed to initialize ZMQ channel: " + channel, e); + } + } + + /** + * Get a socket by channel type. + */ + public ZMQ.Socket getSocket(Channel channel) { + ZMQ.Socket socket = sockets.get(channel.name()); + if (socket == null) { + throw new IllegalStateException("Socket not initialized: " + channel); + } + return socket; + } + + /** + * Send a message on the specified channel. + * + * @param channel The channel to send on + * @param versionByte Protocol version byte + * @param messageTypeByte Message type ID byte + * @param protobufData Serialized protobuf message + * @return true if sent successfully + */ + public boolean sendMessage(Channel channel, byte versionByte, byte messageTypeByte, byte[] protobufData) { + ZMQ.Socket socket = getSocket(channel); + + // Send as two frames: [version byte] [type byte + protobuf data] + boolean sentFrame1 = socket.send(new byte[]{versionByte}, ZMQ.SNDMORE); + if (!sentFrame1) { + LOG.error("Failed to send version frame on channel {}", channel); + return false; + } + + byte[] frame2 = new byte[1 + protobufData.length]; + frame2[0] = messageTypeByte; + System.arraycopy(protobufData, 0, frame2, 1, protobufData.length); + + boolean sentFrame2 = socket.send(frame2, 0); + if (!sentFrame2) { + LOG.error("Failed to send message frame on channel {}", channel); + return false; + } + + return true; + } + + /** + * Send a message with a topic prefix (for PUB sockets). + * + * @param channel The channel to send on + * @param topic Topic string for subscription filtering + * @param versionByte Protocol version byte + * @param messageTypeByte Message type ID byte + * @param protobufData Serialized protobuf message + * @return true if sent successfully + */ + public boolean sendTopicMessage(Channel channel, String topic, byte versionByte, byte messageTypeByte, byte[] protobufData) { + ZMQ.Socket socket = getSocket(channel); + + // Send as three frames: [topic] [version byte] [type byte + protobuf data] + boolean sentTopic = socket.send(topic.getBytes(ZMQ.CHARSET), ZMQ.SNDMORE); + if (!sentTopic) { + LOG.error("Failed to send topic frame on channel {}", channel); + return false; + } + + boolean sentFrame1 = socket.send(new byte[]{versionByte}, ZMQ.SNDMORE); + if (!sentFrame1) { + LOG.error("Failed to send version frame on channel {}", channel); + return false; + } + + byte[] frame2 = new byte[1 + protobufData.length]; + frame2[0] = messageTypeByte; + System.arraycopy(protobufData, 0, frame2, 1, protobufData.length); + + boolean sentFrame2 = socket.send(frame2, 0); + if (!sentFrame2) { + LOG.error("Failed to send message frame on channel {}", channel); + return false; + } + + return true; + } + + /** + * Receive a message from a ROUTER socket. + * Returns a ReceivedMessage containing the identity, version, type, and payload. + * + * @param channel The channel to receive from (must be ROUTER) + * @param timeout Timeout in milliseconds (0 for non-blocking, -1 for blocking) + * @return ReceivedMessage or null if no message available + */ + public ReceivedMessage receiveRouterMessage(Channel channel, int timeout) { + ZMQ.Socket socket = getSocket(channel); + + // Set receive timeout + if (timeout >= 0) { + socket.setReceiveTimeOut(timeout); + } + + // Receive identity frame + byte[] identity = socket.recv(0); + if (identity == null) { + return null; + } + + // Receive version frame + byte[] versionFrame = socket.recv(0); + if (versionFrame == null || versionFrame.length != 1) { + LOG.error("Invalid version frame received on channel {}", channel); + return null; + } + + // Receive message frame (type byte + protobuf data) + byte[] messageFrame = socket.recv(0); + if (messageFrame == null || messageFrame.length < 1) { + LOG.error("Invalid message frame received on channel {}", channel); + return null; + } + + byte versionByte = versionFrame[0]; + byte messageTypeByte = messageFrame[0]; + byte[] protobufData = new byte[messageFrame.length - 1]; + System.arraycopy(messageFrame, 1, protobufData, 0, protobufData.length); + + return new ReceivedMessage(identity, versionByte, messageTypeByte, protobufData); + } + + /** + * Represents a received message from a ROUTER socket. + */ + public static class ReceivedMessage { + private final byte[] identity; + private final byte version; + private final byte messageType; + private final byte[] protobufData; + + public ReceivedMessage(byte[] identity, byte version, byte messageType, byte[] protobufData) { + this.identity = identity; + this.version = version; + this.messageType = messageType; + this.protobufData = protobufData; + } + + public byte[] getIdentity() { + return identity; + } + + public byte getVersion() { + return version; + } + + public byte getMessageType() { + return messageType; + } + + public byte[] getProtobufData() { + return protobufData; + } + } + + @Override + public void close() { + LOG.info("Closing ZeroMQ channels"); + sockets.values().forEach(ZMQ.Socket::close); + sockets.clear(); + context.close(); + LOG.info("ZeroMQ context closed"); + } +} diff --git a/flink/src/main/resources/iceberg-schemas/ohlc_schema.sql b/flink/src/main/resources/iceberg-schemas/ohlc_schema.sql new file mode 100644 index 0000000..3acd6ca --- /dev/null +++ b/flink/src/main/resources/iceberg-schemas/ohlc_schema.sql @@ -0,0 +1,54 @@ +-- Iceberg OHLC Table Schema (Documentation) +-- +-- NOTE: This file is kept for documentation purposes only. +-- The actual table is created by SchemaInitializer.java using the Iceberg API. +-- +-- Single table for all periods with Iceberg v2 primary key enforcement +-- Primary key: (ticker, period_seconds, timestamp) +-- Partition by: (ticker, days(timestamp)) + +CREATE TABLE IF NOT EXISTS trading.ohlc ( + -- Primary key fields + ticker STRING NOT NULL COMMENT 'Market identifier (e.g., BINANCE:BTC/USDT)', + period_seconds INT NOT NULL COMMENT 'OHLC period in seconds (60, 300, 900, 3600, 14400, 86400, 604800, etc.)', + timestamp BIGINT NOT NULL COMMENT 'Candle timestamp in microseconds since epoch', + + -- OHLC price data (stored as integers, divide by rational denominator from market metadata) + open BIGINT NOT NULL COMMENT 'Opening price', + high BIGINT NOT NULL COMMENT 'Highest price', + low BIGINT NOT NULL COMMENT 'Lowest price', + close BIGINT NOT NULL COMMENT 'Closing price', + + -- Volume data + volume BIGINT COMMENT 'Total volume', + buy_vol BIGINT COMMENT 'Buy volume', + sell_vol BIGINT COMMENT 'Sell volume', + + -- Timing data + open_time BIGINT COMMENT 'Timestamp when open price occurred', + high_time BIGINT COMMENT 'Timestamp when high price occurred', + low_time BIGINT COMMENT 'Timestamp when low price occurred', + close_time BIGINT COMMENT 'Timestamp when close price occurred', + + -- Additional fields + open_interest BIGINT COMMENT 'Open interest for futures', + + -- Metadata fields for tracking + request_id STRING COMMENT 'Request ID that generated this data (for historical requests)', + ingested_at BIGINT NOT NULL COMMENT 'Timestamp when data was ingested by Flink' +) +USING iceberg +PARTITIONED BY (ticker, days(timestamp)) +TBLPROPERTIES ( + 'write.format.default' = 'parquet', + 'write.parquet.compression-codec' = 'snappy', + 'write.metadata.compression-codec' = 'gzip', + 'format-version' = '2', + 'write.upsert.enabled' = 'true' +); + +-- Primary key constraint (enforced by Iceberg v2) +-- Uniqueness enforced on (ticker, period_seconds, timestamp) +-- Upserts will replace existing rows with same primary key + +COMMENT ON TABLE trading.ohlc IS 'Historical OHLC candle data from exchanges. Single table for all periods with primary key enforcement.'; diff --git a/flink/src/main/resources/topics-dev.yaml b/flink/src/main/resources/topics-dev.yaml new file mode 100644 index 0000000..13ab735 --- /dev/null +++ b/flink/src/main/resources/topics-dev.yaml @@ -0,0 +1,29 @@ +topics: + # Realtime and historical OHLC data (protobuf encoded) + # Individual OHLC messages for realtime data + # OHLCBatch messages for historical data (with metadata) + - name: market-ohlc + partitions: 3 + replication: 1 + config: + retention.ms: 86400000 # 24 hours + compression.type: snappy + cleanup.policy: delete + + # Realtime tick data (protobuf encoded) + - name: market-tick + partitions: 3 + replication: 1 + config: + retention.ms: 3600000 # 1 hour + compression.type: snappy + cleanup.policy: delete + + # Order execution events + - name: order-event + partitions: 2 + replication: 1 + config: + retention.ms: 2592000000 # 30 days + compression.type: snappy + cleanup.policy: delete diff --git a/flink/src/main/resources/topics.yaml b/flink/src/main/resources/topics.yaml new file mode 100644 index 0000000..855e694 --- /dev/null +++ b/flink/src/main/resources/topics.yaml @@ -0,0 +1,29 @@ +topics: + # Realtime and historical OHLC data (protobuf encoded) + # Individual OHLC messages for realtime data + # OHLCBatch messages for historical data (with metadata) + - name: market-ohlc + partitions: 6 + replication: 2 + config: + retention.ms: 86400000 # 24 hours + compression.type: snappy + cleanup.policy: delete + + # Realtime tick data (protobuf encoded) + - name: market-tick + partitions: 6 + replication: 2 + config: + retention.ms: 3600000 # 1 hour + compression.type: snappy + cleanup.policy: delete + + # Order execution events + - name: order-event + partitions: 3 + replication: 2 + config: + retention.ms: 2592000000 # 30 days + compression.type: snappy + cleanup.policy: delete diff --git a/flink/values.yaml b/flink/values.yaml new file mode 100644 index 0000000..b7d302f --- /dev/null +++ b/flink/values.yaml @@ -0,0 +1,8 @@ +# Strimzi Kafka Operator Helm Values +# Install with: helm install strimzi-kafka-operator oci://quay.io/strimzi-helm/strimzi-kafka-operator + +# This values file is for the operator installation +#watchNamespaces: [] # Empty = watch all namespaces + +watchNamespaces: + - default diff --git a/iceberg/README.md b/iceberg/README.md new file mode 100644 index 0000000..4d70fed --- /dev/null +++ b/iceberg/README.md @@ -0,0 +1,138 @@ +# Iceberg Schema Definitions + +We use Apache Iceberg for historical data storage. The metadata server is a PostgreSQL database. + +This directory stores schema files and database setup. + +## Tables + +### trading.ohlc +Historical OHLC (Open, High, Low, Close, Volume) candle data for all periods in a single table. + +**Schema**: `ohlc_schema.sql` + +**Natural Key**: `(ticker, period_seconds, timestamp)` - uniqueness enforced by application + +**Partitioning**: `(ticker, days(timestamp))` +- Partition by ticker to isolate different markets +- Partition by days for efficient time-range queries +- Hidden partitioning - not exposed in queries + +**Iceberg Version**: Format v1 (1.10.1) +- Uses equality delete files for deduplication +- Flink upsert mode generates equality deletes +- Last-write-wins semantics for duplicates +- Copy-on-write mode for better query performance + +**Deduplication**: +- Flink Iceberg sink with upsert mode +- Equality delete files on (ticker, period_seconds, timestamp) +- PyIceberg automatically filters deleted rows during queries + +**Storage Format**: Parquet with Snappy compression + +**Supported Periods**: Any period in seconds (60, 300, 900, 3600, 14400, 86400, 604800, etc.) + +**Usage**: +```sql +-- Query 1-hour candles for specific ticker and time range +SELECT * FROM trading.ohlc +WHERE ticker = 'BINANCE:BTC/USDT' + AND period_seconds = 3600 + AND timestamp BETWEEN 1735689600000000 AND 1736294399000000 +ORDER BY timestamp; + +-- Query most recent 1-minute candles +SELECT * FROM trading.ohlc +WHERE ticker = 'BINANCE:BTC/USDT' + AND period_seconds = 60 + AND timestamp > (UNIX_MICROS(CURRENT_TIMESTAMP()) - 3600000000) +ORDER BY timestamp DESC +LIMIT 60; + +-- Query all periods for a ticker +SELECT period_seconds, COUNT(*) as candle_count +FROM trading.ohlc +WHERE ticker = 'BINANCE:BTC/USDT' +GROUP BY period_seconds; +``` + +## Access Patterns + +### Flink (Write) +- Reads OHLCBatch from Kafka +- Writes rows to Iceberg table +- Uses Iceberg Flink connector +- Upsert mode to handle duplicates + +### Client-Py (Read) +- Queries historical data after receiving HistoryReadyNotification +- Uses PyIceberg or Iceberg REST API +- Read-only access via Iceberg catalog + +### Web UI (Read) +- Queries for chart display +- Time-series queries with partition pruning +- Read-only access + +## Catalog Configuration + +The Iceberg catalog is accessed via REST API: + +```yaml +catalog: + type: rest + uri: http://iceberg-catalog:8181 + warehouse: s3://trading-warehouse/ + s3: + endpoint: http://minio:9000 + access-key-id: ${S3_ACCESS_KEY} + secret-access-key: ${S3_SECRET_KEY} +``` + +## Table Naming Convention + +`{namespace}.ohlc` where: +- `namespace`: Trading namespace (default: "trading") +- All OHLC data is stored in a single table +- Partitioned by ticker and date for efficient queries + +## Integration Examples + +### Flink Write +```java +TableLoader tableLoader = TableLoader.fromCatalog( + CatalogLoader.rest("trading", catalogUri), + TableIdentifier.of("trading", "ohlc") +); + +DataStream ohlcRows = // ... from OHLCBatch + +FlinkSink.forRow(ohlcRows, schema) + .tableLoader(tableLoader) + .upsert(true) + .build(); +``` + +### Python Read +```python +from pyiceberg.catalog import load_catalog + +catalog = load_catalog("trading", uri="http://iceberg-catalog:8181") +table = catalog.load_table("trading.ohlc") + +# Query with filters +df = table.scan( + row_filter=( + (col("ticker") == "BINANCE:BTC/USDT") & + (col("period_seconds") == 3600) & + (col("timestamp") >= 1735689600000000) + ) +).to_pandas() +``` + +## References + +- [Apache Iceberg Documentation](https://iceberg.apache.org/) +- [Flink Iceberg Connector](https://iceberg.apache.org/docs/latest/flink/) +- [PyIceberg](https://py.iceberg.apache.org/) diff --git a/iceberg/ohlc_schema.sql b/iceberg/ohlc_schema.sql new file mode 100644 index 0000000..fbebc46 --- /dev/null +++ b/iceberg/ohlc_schema.sql @@ -0,0 +1,53 @@ +-- Iceberg OHLC Table Schema (Iceberg 1.10.1) +-- Single table for all periods with hidden partitioning +-- Natural key: (ticker, period_seconds, timestamp) - enforced by application logic +-- Partition by: (ticker, days(timestamp)) - hidden partitioning for efficiency + +CREATE TABLE IF NOT EXISTS trading.ohlc ( + -- Natural key fields (uniqueness enforced by Flink upsert logic) + ticker STRING NOT NULL COMMENT 'Market identifier (e.g., BINANCE:BTC/USDT)', + period_seconds INT NOT NULL COMMENT 'OHLC period in seconds (60, 300, 900, 3600, 14400, 86400, 604800, etc.)', + timestamp BIGINT NOT NULL COMMENT 'Candle timestamp in microseconds since epoch', + + -- OHLC price data (stored as integers, divide by rational denominator from market metadata) + open BIGINT NOT NULL COMMENT 'Opening price', + high BIGINT NOT NULL COMMENT 'Highest price', + low BIGINT NOT NULL COMMENT 'Lowest price', + close BIGINT NOT NULL COMMENT 'Closing price', + + -- Volume data + volume BIGINT COMMENT 'Total volume', + buy_vol BIGINT COMMENT 'Buy volume', + sell_vol BIGINT COMMENT 'Sell volume', + + -- Timing data + open_time BIGINT COMMENT 'Timestamp when open price occurred', + high_time BIGINT COMMENT 'Timestamp when high price occurred', + low_time BIGINT COMMENT 'Timestamp when low price occurred', + close_time BIGINT COMMENT 'Timestamp when close time occurred', + + -- Additional fields + open_interest BIGINT COMMENT 'Open interest for futures', + + -- Metadata fields for tracking + request_id STRING COMMENT 'Request ID that generated this data (for historical requests)', + ingested_at BIGINT NOT NULL COMMENT 'Timestamp when data was ingested by Flink' +) +USING iceberg +PARTITIONED BY (ticker) +TBLPROPERTIES ( + 'write.format.default' = 'parquet', + 'write.parquet.compression-codec' = 'snappy', + 'write.metadata.compression-codec' = 'gzip', + 'format-version' = '1', + 'write.merge.mode' = 'copy-on-write' +); + +-- Note: Iceberg 1.x does not support primary key constraints +-- Uniqueness of (ticker, period_seconds, timestamp) is enforced at application layer: +-- - Flink Iceberg sink uses equality delete files for upsert behavior +-- - Last-write-wins semantics for duplicate rows +-- - Copy-on-write mode rewrites data files on updates +-- - Queries automatically filter deleted rows + +COMMENT ON TABLE trading.ohlc IS 'Historical OHLC candle data from exchanges. Single table for all periods. Uniqueness enforced by Flink upsert mode with equality deletes.'; diff --git a/ingestor/.gitignore b/ingestor/.gitignore new file mode 100644 index 0000000..a3ffd39 --- /dev/null +++ b/ingestor/.gitignore @@ -0,0 +1,8 @@ +node_modules/ +config.yaml +secrets.yaml +*.log +.env +.DS_Store +src/proto/ +protobuf/ diff --git a/ingestor/Dockerfile b/ingestor/Dockerfile new file mode 100644 index 0000000..4e7a81a --- /dev/null +++ b/ingestor/Dockerfile @@ -0,0 +1,30 @@ +FROM node:20-alpine + +# Install protobuf compiler +RUN apk add --no-cache protobuf protobuf-dev build-base python3 + +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies +RUN npm install + +# Copy source code +COPY src ./src/ +COPY protobuf ./protobuf/ + +# Compile protobufs (if using proto:compile script) +# RUN npm run proto:compile + +# Create config directory +RUN mkdir -p /config + +# Set environment variables +ENV CONFIG_PATH=/config/config.yaml +ENV SECRETS_PATH=/config/secrets.yaml +ENV NODE_ENV=production + +# Run the ingestor +CMD ["node", "src/index.js"] diff --git a/ingestor/README.md b/ingestor/README.md new file mode 100644 index 0000000..b31dca0 --- /dev/null +++ b/ingestor/README.md @@ -0,0 +1,226 @@ +# CCXT Market Data Ingestor + +A NodeJS-based market data ingestor that uses CCXT to fetch historical OHLC data and realtime tick data from cryptocurrency exchanges. Integrates with Apache Flink via ZeroMQ for work distribution and writes data to Kafka. + +## Architecture + +The ingestor is a worker process that: +1. Connects to Flink's ZMQ work queue (PULL socket) to receive data requests +2. Connects to Flink's ZMQ control channel (SUB socket) to receive control messages +3. Fetches market data from exchanges using CCXT +4. Writes data to Kafka using the protobuf protocol + +### Data Request Types + +#### Historical OHLC +- Fetches historical candlestick data for a specified time range +- Uses CCXT's `fetchOHLCV` method +- Writes OHLC messages to Kafka +- Request is completed and removed from queue after processing + +#### Realtime Ticks +- Subscribes to realtime trade data +- Uses 10-second polling to fetch recent trades via `fetchTrades` +- Writes Tick messages to Kafka `market-0` topic +- Subscription persists until cancelled by Flink control message + +## Installation + +```bash +npm install +``` + +## Configuration + +Create `config.yaml` based on `config.example.yaml`: + +```yaml +# Flink ZMQ endpoints +flink_hostname: localhost +ingestor_work_port: 5555 +ingestor_control_port: 5556 + +# Kafka configuration +kafka_brokers: + - localhost:9092 +kafka_topic: market-0 + +# Worker configuration +max_concurrent: 10 +poll_interval_ms: 10000 +``` + +Optional `secrets.yaml` for sensitive configuration. + +## Usage + +### Development +```bash +npm run dev +``` + +### Production +```bash +npm start +``` + +### Docker +```bash +docker build -t ccxt-ingestor . +docker run -v /path/to/config:/config ccxt-ingestor +``` + +## Ticker Format + +Tickers must be in the format: `EXCHANGE:SYMBOL` + +Examples: +- `BINANCE:BTC/USDT` +- `COINBASE:ETH/USD` +- `KRAKEN:XRP/EUR` + +## Protocol + +### ZeroMQ Message Format + +All messages use a two-frame envelope: +``` +Frame 1: [1 byte: protocol version = 0x01] +Frame 2: [1 byte: message type ID][N bytes: protobuf message] +``` + +### Message Type IDs +- `0x01`: DataRequest +- `0x02`: IngestorControl +- `0x03`: Tick +- `0x04`: OHLC + +### DataRequest (from Flink) + +```protobuf +message DataRequest { + string request_id = 1; + RequestType type = 2; // HISTORICAL_OHLC or REALTIME_TICKS + string ticker = 3; + optional HistoricalParams historical = 4; + optional RealtimeParams realtime = 5; +} +``` + +### IngestorControl (from Flink) + +```protobuf +message IngestorControl { + ControlAction action = 1; // CANCEL, SHUTDOWN, CONFIG_UPDATE, HEARTBEAT + optional string request_id = 2; + optional IngestorConfig config = 3; +} +``` + +### Tick (to Kafka) + +```protobuf +message Tick { + string trade_id = 1; + string ticker = 2; + uint64 timestamp = 3; // microseconds + int64 price = 4; // fixed-point (10^8) + int64 amount = 5; // fixed-point (10^8) + int64 quote_amount = 6; // fixed-point (10^8) + bool taker_buy = 7; +} +``` + +### OHLC (to Kafka) + +```protobuf +message OHLC { + int64 open = 2; // fixed-point (10^8) + int64 high = 3; + int64 low = 4; + int64 close = 5; + optional int64 volume = 6; + optional int64 open_time = 9; // microseconds + optional int64 close_time = 12; + string ticker = 14; +} +``` + +## Fixed-Point Encoding + +All prices and amounts are encoded as fixed-point integers using 8 decimal places (denominator = 10^8): +- Example: 123.45678901 → 12345678901 +- This provides precision while avoiding floating-point errors + +## Components + +### `src/index.js` +Main worker process that coordinates all components and handles the work loop. + +### `src/zmq-client.js` +ZeroMQ client for connecting to Flink's work queue and control channel. + +### `src/kafka-producer.js` +Kafka producer for writing protobuf-encoded messages to Kafka topics. + +### `src/ccxt-fetcher.js` +CCXT wrapper for fetching historical OHLC and recent trades from exchanges. + +### `src/realtime-poller.js` +Manages realtime subscriptions with 10-second polling for trade updates. + +### `src/proto/messages.js` +Protobuf message definitions and encoding/decoding utilities. + +## Error Handling + +- Failed requests automatically return to the Flink work queue +- Realtime subscriptions are cancelled after 5 consecutive errors +- Worker logs all errors with context for debugging +- Graceful shutdown on SIGINT/SIGTERM + +## Monitoring + +The worker logs status information every 60 seconds including: +- Number of active requests +- Realtime subscription statistics +- Error counts + +## Environment Variables + +- `CONFIG_PATH`: Path to config.yaml (default: `/config/config.yaml`) +- `SECRETS_PATH`: Path to secrets.yaml (default: `/config/secrets.yaml`) +- `LOG_LEVEL`: Log level (default: `info`) + +## Supported Exchanges + +All exchanges supported by CCXT can be used. Popular exchanges include: +- Binance +- Coinbase +- Kraken +- Bitfinex +- Huobi +- And 100+ more + +## Development + +### Project Structure +``` +redesign/ingestor/ +├── src/ +│ ├── index.js # Main worker +│ ├── zmq-client.js # ZMQ client +│ ├── kafka-producer.js # Kafka producer +│ ├── ccxt-fetcher.js # CCXT wrapper +│ ├── realtime-poller.js # Realtime poller +│ └── proto/ +│ └── messages.js # Protobuf definitions +├── config.example.yaml +├── Dockerfile +├── package.json +└── README.md +``` + +## License + +ISC diff --git a/ingestor/config.example.yaml b/ingestor/config.example.yaml new file mode 100644 index 0000000..fda1211 --- /dev/null +++ b/ingestor/config.example.yaml @@ -0,0 +1,24 @@ +# CCXT Ingestor Configuration + +# Relay ZMQ endpoints (relay is the well-known gateway) +flink_hostname: relay +ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix +# Note: No response port needed - async architecture via Kafka! + +# Supported exchanges (subscribe to these prefixes) +supported_exchanges: + - BINANCE + - COINBASE + - KRAKEN + +# Kafka configuration +kafka_brokers: + - kafka:29092 +kafka_topic: market-0 + +# Worker configuration +max_concurrent: 10 +poll_interval_ms: 10000 + +# Logging +log_level: info diff --git a/ingestor/package.json b/ingestor/package.json new file mode 100644 index 0000000..fb4d26c --- /dev/null +++ b/ingestor/package.json @@ -0,0 +1,33 @@ +{ + "name": "@dexorder/ccxt-ingestor", + "version": "1.0.0", + "description": "CCXT-based market data ingestor for Flink processing pipeline", + "main": "src/index.js", + "type": "module", + "scripts": { + "start": "node src/index.js", + "dev": "node --watch src/index.js", + "proto:compile": "mkdir -p src/proto && protoc --js_out=import_style=commonjs,binary:src/proto --proto_path=../protobuf ../protobuf/*.proto" + }, + "keywords": [ + "ccxt", + "kafka", + "zeromq", + "market-data", + "ingestor" + ], + "author": "", + "license": "ISC", + "dependencies": { + "ccxt": "^4.4.0", + "google-protobuf": "^3.21.4", + "kafkajs": "^2.2.4", + "pino": "^9.0.0", + "pino-pretty": "^13.0.0", + "yaml": "^2.5.0", + "zeromq": "^6.0.0" + }, + "devDependencies": { + "protobufjs": "^7.4.0" + } +} diff --git a/ingestor/src/ccxt-fetcher.js b/ingestor/src/ccxt-fetcher.js new file mode 100644 index 0000000..d37d348 --- /dev/null +++ b/ingestor/src/ccxt-fetcher.js @@ -0,0 +1,248 @@ +// CCXT data fetcher for historical OHLC and realtime ticks +import ccxt from 'ccxt'; + +export class CCXTFetcher { + constructor(config, logger) { + this.config = config; + this.logger = logger; + this.exchanges = new Map(); + } + + /** + * Parse ticker string to exchange and symbol + * Expected format: "EXCHANGE:SYMBOL" (e.g., "BINANCE:BTC/USDT") + */ + parseTicker(ticker) { + const parts = ticker.split(':'); + if (parts.length !== 2) { + throw new Error(`Invalid ticker format: ${ticker}. Expected "EXCHANGE:SYMBOL"`); + } + + return { + exchange: parts[0].toLowerCase(), + symbol: parts[1] + }; + } + + /** + * Get or create CCXT exchange instance + */ + getExchange(exchangeName) { + if (this.exchanges.has(exchangeName)) { + return this.exchanges.get(exchangeName); + } + + // Create exchange instance + const ExchangeClass = ccxt[exchangeName]; + if (!ExchangeClass) { + throw new Error(`Unsupported exchange: ${exchangeName}`); + } + + const exchange = new ExchangeClass({ + enableRateLimit: true, + options: { + defaultType: 'spot' + } + }); + + this.exchanges.set(exchangeName, exchange); + this.logger.info({ exchange: exchangeName }, 'Created CCXT exchange instance'); + + return exchange; + } + + /** + * Fetch historical OHLC data + * @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL" + * @param {string} startTime - Start time in microseconds + * @param {string} endTime - End time in microseconds + * @param {number} periodSeconds - OHLC period in seconds + * @param {number} limit - Optional limit on number of candles + * @returns {Promise} Array of OHLC candles + */ + async fetchHistoricalOHLC(ticker, startTime, endTime, periodSeconds, limit) { + const { exchange: exchangeName, symbol } = this.parseTicker(ticker); + const exchange = this.getExchange(exchangeName); + + // Convert microseconds to milliseconds + const startMs = Math.floor(parseInt(startTime) / 1000); + const endMs = Math.floor(parseInt(endTime) / 1000); + + // Map period seconds to CCXT timeframe + const timeframe = this.secondsToTimeframe(periodSeconds); + + this.logger.info( + { ticker, timeframe, startMs, endMs, limit }, + 'Fetching historical OHLC' + ); + + const allCandles = []; + let since = startMs; + + // CCXT typically limits to 1000 candles per request + const batchSize = limit || 1000; + + while (since < endMs) { + try { + const candles = await exchange.fetchOHLCV( + symbol, + timeframe, + since, + batchSize + ); + + if (candles.length === 0) { + break; + } + + // Filter candles within the time range + const filteredCandles = candles.filter(c => { + const timestamp = c[0]; + return timestamp >= startMs && timestamp <= endMs; + }); + + allCandles.push(...filteredCandles); + + // Move to next batch + const lastTimestamp = candles[candles.length - 1][0]; + since = lastTimestamp + (periodSeconds * 1000); + + // Break if we've reached the end time or limit + if (since >= endMs || (limit && allCandles.length >= limit)) { + break; + } + + // Apply rate limiting + await exchange.sleep(exchange.rateLimit); + } catch (error) { + this.logger.error( + { error: error.message, ticker, since }, + 'Error fetching OHLC' + ); + throw error; + } + } + + // Convert to our OHLC format + return allCandles.map(candle => this.convertToOHLC(candle, ticker, periodSeconds)); + } + + /** + * Fetch recent trades for realtime tick data + * @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL" + * @param {string} since - Optional timestamp in microseconds to fetch from + * @returns {Promise} Array of trade ticks + */ + async fetchRecentTrades(ticker, since = null) { + const { exchange: exchangeName, symbol } = this.parseTicker(ticker); + const exchange = this.getExchange(exchangeName); + + try { + // Convert microseconds to milliseconds if provided + const sinceMs = since ? Math.floor(parseInt(since) / 1000) : undefined; + + const trades = await exchange.fetchTrades(symbol, sinceMs, 1000); + + this.logger.debug( + { ticker, count: trades.length }, + 'Fetched recent trades' + ); + + // Convert to our Tick format + return trades.map(trade => this.convertToTick(trade, ticker)); + } catch (error) { + this.logger.error( + { error: error.message, ticker }, + 'Error fetching trades' + ); + throw error; + } + } + + /** + * Convert CCXT OHLCV array to our OHLC format + * CCXT format: [timestamp, open, high, low, close, volume] + */ + convertToOHLC(candle, ticker, periodSeconds) { + const [timestamp, open, high, low, close, volume] = candle; + + // Convert to fixed-point integers (using 8 decimal places = 10^8) + const DENOM = 100000000; + + return { + ticker, + timestamp: (timestamp * 1000).toString(), // Convert ms to microseconds + open: Math.round(open * DENOM).toString(), + high: Math.round(high * DENOM).toString(), + low: Math.round(low * DENOM).toString(), + close: Math.round(close * DENOM).toString(), + volume: Math.round(volume * DENOM).toString(), + open_time: (timestamp * 1000).toString(), + close_time: ((timestamp + periodSeconds * 1000) * 1000).toString() + }; + } + + /** + * Convert CCXT trade to our Tick format + */ + convertToTick(trade, ticker) { + // Convert to fixed-point integers (using 8 decimal places = 10^8) + const DENOM = 100000000; + + const price = Math.round(trade.price * DENOM); + const amount = Math.round(trade.amount * DENOM); + const quoteAmount = Math.round((trade.price * trade.amount) * DENOM); + + return { + trade_id: trade.id || `${trade.timestamp}`, + ticker, + timestamp: (trade.timestamp * 1000).toString(), // Convert ms to microseconds + price: price.toString(), + amount: amount.toString(), + quote_amount: quoteAmount.toString(), + taker_buy: trade.side === 'buy', + sequence: trade.order ? trade.order.toString() : undefined + }; + } + + /** + * Convert period seconds to CCXT timeframe string + */ + secondsToTimeframe(seconds) { + const timeframes = { + 60: '1m', + 300: '5m', + 900: '15m', + 1800: '30m', + 3600: '1h', + 7200: '2h', + 14400: '4h', + 21600: '6h', + 28800: '8h', + 43200: '12h', + 86400: '1d', + 259200: '3d', + 604800: '1w', + 2592000: '1M' + }; + + const timeframe = timeframes[seconds]; + if (!timeframe) { + throw new Error(`Unsupported period: ${seconds} seconds`); + } + + return timeframe; + } + + /** + * Close all exchange connections + */ + async close() { + for (const [name, exchange] of this.exchanges) { + if (exchange.close) { + await exchange.close(); + } + } + this.exchanges.clear(); + } +} diff --git a/ingestor/src/index.js b/ingestor/src/index.js new file mode 100644 index 0000000..198b98f --- /dev/null +++ b/ingestor/src/index.js @@ -0,0 +1,411 @@ +#!/usr/bin/env node + +// Main ingestor worker process +import { readFileSync } from 'fs'; +import { parse as parseYaml } from 'yaml'; +import pino from 'pino'; +import { ZmqClient } from './zmq-client.js'; +import { KafkaProducer } from './kafka-producer.js'; +import { CCXTFetcher } from './ccxt-fetcher.js'; +import { RealtimePoller } from './realtime-poller.js'; + +// Logger setup +const logger = pino({ + level: process.env.LOG_LEVEL || 'info', + transport: { + target: 'pino-pretty', + options: { + colorize: true, + translateTime: 'SYS:standard', + ignore: 'pid,hostname' + } + } +}); + +// Load configuration +function loadConfig() { + const configPath = process.env.CONFIG_PATH || '/config/config.yaml'; + const secretsPath = process.env.SECRETS_PATH || '/config/secrets.yaml'; + + let config = {}; + let secrets = {}; + + try { + const configFile = readFileSync(configPath, 'utf8'); + config = parseYaml(configFile); + logger.info({ path: configPath }, 'Loaded configuration'); + } catch (error) { + logger.warn({ error: error.message }, 'Could not load config, using defaults'); + } + + try { + const secretsFile = readFileSync(secretsPath, 'utf8'); + secrets = parseYaml(secretsFile); + logger.info({ path: secretsPath }, 'Loaded secrets'); + } catch (error) { + logger.warn({ error: error.message }, 'Could not load secrets'); + } + + // Merge config and secrets + return { + // Flink ZMQ endpoints + flink_hostname: config.flink_hostname || 'localhost', + ingestor_work_port: config.ingestor_work_port || 5555, + ingestor_control_port: config.ingestor_control_port || 5556, + + // Kafka configuration + kafka_brokers: config.kafka_brokers || ['localhost:9092'], + kafka_topic: config.kafka_topic || 'market-0', + + // Worker configuration + max_concurrent: config.max_concurrent || 10, + poll_interval_ms: config.poll_interval_ms || 10000, + + ...secrets + }; +} + +class IngestorWorker { + constructor(config, logger) { + this.config = config; + this.logger = logger; + + this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' })); + this.kafkaProducer = new KafkaProducer( + config, + logger.child({ component: 'kafka' }) + ); + this.ccxtFetcher = new CCXTFetcher( + config, + logger.child({ component: 'ccxt' }) + ); + this.realtimePoller = new RealtimePoller( + this.ccxtFetcher, + this.kafkaProducer, + logger.child({ component: 'poller' }) + ); + + // Track active requests + this.activeRequests = new Map(); + this.isShutdown = false; + } + + /** + * Start the ingestor worker + */ + async start() { + this.logger.info('Starting CCXT ingestor worker'); + + // Connect to services + await this.kafkaProducer.connect(); + await this.zmqClient.connect(); + + // Start control message listener + this.zmqClient.startControlListener(msg => this.handleControlMessage(msg)); + + // Start work loop + this.workLoop(); + + this.logger.info('Ingestor worker started successfully'); + } + + /** + * Main work loop - pull and process data requests + */ + async workLoop() { + while (!this.isShutdown) { + try { + // Check if we can handle more requests + if (this.activeRequests.size >= this.config.max_concurrent) { + await new Promise(resolve => setTimeout(resolve, 1000)); + continue; + } + + // Pull next data request + const request = await this.zmqClient.pullDataRequest(); + if (!request) { + continue; + } + + // Handle request asynchronously + this.handleDataRequest(request).catch(error => { + this.logger.error( + { error: error.message, requestId: request.requestId }, + 'Error handling data request' + ); + }); + } catch (error) { + if (!this.isShutdown) { + this.logger.error({ error: error.message }, 'Error in work loop'); + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + } + } + + /** + * Handle a data request + */ + async handleDataRequest(request) { + const { requestId: request_id, type, ticker } = request; + + this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request'); + + this.activeRequests.set(request_id, request); + + try { + // HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire, + // so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC. + const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0; + const isRealtime = type === 'REALTIME_TICKS' || type === 1; + + if (isHistorical) { + await this.handleHistoricalRequest(request); + } else if (isRealtime) { + await this.handleRealtimeRequest(request); + } else { + this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type'); + } + } finally { + // For historical requests, remove from active requests when done + const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0; + if (isHistorical) { + this.activeRequests.delete(request_id); + } + } + } + + /** + * Handle historical OHLC request + * ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only. + * Flink will process from Kafka, write to Iceberg, and publish notification. + */ + async handleHistoricalRequest(request) { + const { requestId: request_id, ticker, historical, clientId: client_id } = request; + const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical; + + this.logger.info( + { request_id, ticker, period_seconds, client_id }, + 'Processing historical OHLC request (async mode - write to Kafka only)' + ); + + try { + // Fetch historical data from exchange + const candles = await this.ccxtFetcher.fetchHistoricalOHLC( + ticker, + start_time, + end_time, + period_seconds, + limit + ); + + this.logger.info( + { request_id, ticker, count: candles.length }, + 'Fetched data from exchange' + ); + + // Write to Kafka - THIS IS THE ONLY OUTPUT + // Flink will: + // 1. Read from Kafka + // 2. Write to Iceberg + // 3. Publish HistoryReadyNotification + // 4. Client receives notification via relay pub/sub + if (candles.length > 0) { + // Add metadata to first candle for Flink tracking + const enrichedCandles = candles.map((candle, idx) => ({ + ...candle, + __metadata: idx === 0 ? { + request_id, + client_id, + ticker, + period_seconds, + start_time, + end_time + } : undefined + })); + + await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles); + } else { + // Write a marker message even if no data found + // Flink will see this and publish a NOT_FOUND notification + await this.kafkaProducer.writeMarker(this.config.kafka_topic, { + request_id, + client_id, + ticker, + period_seconds, + start_time, + end_time, + status: 'NOT_FOUND', + message: 'No data available for requested period' + }); + } + + this.logger.info( + { request_id, ticker, count: candles.length }, + 'Completed historical OHLC request - data written to Kafka' + ); + + // NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification + + } catch (error) { + this.logger.error( + { error: error.message, request_id, ticker }, + 'Failed to process historical request' + ); + + // Write error marker to Kafka so Flink can notify client + try { + await this.kafkaProducer.writeMarker(this.config.kafka_topic, { + request_id, + client_id, + ticker, + period_seconds, + start_time, + end_time, + status: 'ERROR', + error_message: error.message + }); + } catch (kafkaError) { + this.logger.error( + { error: kafkaError.message, request_id }, + 'Failed to write error marker to Kafka' + ); + } + + // Do not throw - request is handled, Flink will notify client of error + } + } + + /** + * Handle realtime tick subscription request + */ + async handleRealtimeRequest(request) { + const { requestId: request_id, ticker } = request; + + this.logger.info( + { request_id, ticker }, + 'Processing realtime subscription request' + ); + + try { + // Start realtime polling + this.realtimePoller.startSubscription( + request_id, + ticker, + this.config.kafka_topic + ); + } catch (error) { + this.logger.error( + { error: error.message, request_id, ticker }, + 'Failed to start realtime subscription' + ); + this.activeRequests.delete(request_id); + throw error; + } + } + + /** + * Handle control messages from Flink + */ + async handleControlMessage(message) { + const { action, requestId: request_id } = message; + + this.logger.info({ action, request_id }, 'Received control message'); + + switch (action) { + case 'CANCEL': + if (request_id) { + // Cancel specific request + this.realtimePoller.cancelSubscription(request_id); + this.activeRequests.delete(request_id); + } + break; + + case 'SHUTDOWN': + this.logger.info('Received shutdown signal'); + await this.shutdown(); + break; + + case 'CONFIG_UPDATE': + // Handle config update if needed + this.logger.info('Received config update'); + break; + + case 'HEARTBEAT': + // Just acknowledge heartbeat + break; + + default: + this.logger.warn({ action }, 'Unknown control action'); + } + } + + /** + * Get worker status + */ + getStatus() { + return { + activeRequests: this.activeRequests.size, + maxConcurrent: this.config.max_concurrent, + pollerStats: this.realtimePoller.getStats() + }; + } + + /** + * Shutdown worker gracefully + */ + async shutdown() { + if (this.isShutdown) { + return; + } + + this.isShutdown = true; + this.logger.info('Shutting down ingestor worker'); + + // Stop polling + this.realtimePoller.shutdown(); + + // Close connections + await this.ccxtFetcher.close(); + await this.kafkaProducer.disconnect(); + await this.zmqClient.shutdown(); + + this.logger.info('Ingestor worker shutdown complete'); + process.exit(0); + } +} + +// Main entry point +async function main() { + const config = loadConfig(); + const worker = new IngestorWorker(config, logger); + + // Handle shutdown signals + process.on('SIGINT', () => worker.shutdown()); + process.on('SIGTERM', () => worker.shutdown()); + + // Handle errors + process.on('uncaughtException', error => { + logger.error({ error }, 'Uncaught exception'); + worker.shutdown(); + }); + + process.on('unhandledRejection', (reason, promise) => { + logger.error({ reason }, 'Unhandled rejection'); + }); + + // Start worker + await worker.start(); + + // Log status periodically + setInterval(() => { + const status = worker.getStatus(); + logger.info({ status }, 'Worker status'); + }, 60000); +} + +// Run +main().catch(error => { + logger.error({ error }, 'Fatal error'); + process.exit(1); +}); diff --git a/ingestor/src/kafka-producer.js b/ingestor/src/kafka-producer.js new file mode 100644 index 0000000..745b496 --- /dev/null +++ b/ingestor/src/kafka-producer.js @@ -0,0 +1,270 @@ +// Kafka producer for writing market data +import { Kafka } from 'kafkajs'; +import { encodeMessage, MessageTypeId, Tick, OHLC, OHLCBatch } from './proto/messages.js'; + +export class KafkaProducer { + constructor(config, logger) { + this.config = config; + this.logger = logger; + + this.kafka = new Kafka({ + clientId: 'ccxt-ingestor', + brokers: config.kafka_brokers || ['localhost:9092'], + logLevel: 0 // Error only + }); + + this.producer = this.kafka.producer(); + this.isConnected = false; + } + + /** + * Connect to Kafka + */ + async connect() { + await this.producer.connect(); + this.isConnected = true; + this.logger.info('Connected to Kafka'); + } + + /** + * Write a tick message to Kafka + * @param {string} topic - Kafka topic name + * @param {object} tickData - Tick data object + */ + async writeTick(topic, tickData) { + if (!this.isConnected) { + throw new Error('Kafka producer not connected'); + } + + const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick); + const message = Buffer.concat([frame1, frame2]); + + await this.producer.send({ + topic, + messages: [ + { + key: tickData.ticker, + value: message, + timestamp: tickData.timestamp.toString() + } + ] + }); + + this.logger.debug({ ticker: tickData.ticker, topic }, 'Wrote tick to Kafka'); + } + + /** + * Write multiple ticks to Kafka in batch + * @param {string} topic - Kafka topic name + * @param {Array} ticksData - Array of tick data objects + */ + async writeTicks(topic, ticksData) { + if (!this.isConnected) { + throw new Error('Kafka producer not connected'); + } + + if (ticksData.length === 0) { + return; + } + + const messages = ticksData.map(tickData => { + const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick); + const message = Buffer.concat([frame1, frame2]); + + return { + key: tickData.ticker, + value: message, + timestamp: tickData.timestamp.toString() + }; + }); + + await this.producer.send({ + topic, + messages + }); + + this.logger.debug( + { count: ticksData.length, topic }, + 'Wrote ticks batch to Kafka' + ); + } + + /** + * Write an OHLC message to Kafka + * @param {string} topic - Kafka topic name + * @param {object} ohlcData - OHLC data object + */ + async writeOHLC(topic, ohlcData) { + if (!this.isConnected) { + throw new Error('Kafka producer not connected'); + } + + const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, ohlcData, OHLC); + const message = Buffer.concat([frame1, frame2]); + + await this.producer.send({ + topic, + messages: [ + { + key: ohlcData.ticker, + value: message + } + ] + }); + + this.logger.debug({ ticker: ohlcData.ticker, topic }, 'Wrote OHLC to Kafka'); + } + + /** + * Write multiple OHLC candles to Kafka as an OHLCBatch message + * Uses protobuf encoding with metadata in batch wrapper + * @param {string} topic - Kafka topic name + * @param {Array} ohlcData - Array of OHLC data objects (may include __metadata in first record) + */ + async writeOHLCs(topic, ohlcData) { + if (!this.isConnected) { + throw new Error('Kafka producer not connected'); + } + + if (ohlcData.length === 0) { + return; + } + + // Extract metadata from first record if present + const firstCandle = ohlcData[0]; + const metadata = firstCandle.__metadata; + + if (!metadata) { + // No metadata - write individual OHLC messages (realtime mode) + const messages = ohlcData.map(candle => { + const protoCandle = { + timestamp: candle.timestamp, + ticker: candle.ticker, + open: candle.open, + high: candle.high, + low: candle.low, + close: candle.close, + volume: candle.volume + }; + + const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, protoCandle, OHLC); + const value = Buffer.concat([frame1, frame2]); + + return { + key: candle.ticker, + value + }; + }); + + await this.producer.send({ + topic, + messages + }); + + this.logger.debug( + { count: ohlcData.length, topic, type: 'individual' }, + 'Wrote OHLC messages to Kafka' + ); + return; + } + + // Historical mode - write as OHLCBatch with metadata + const batch = { + metadata: { + requestId: metadata.request_id, + clientId: metadata.client_id, + ticker: metadata.ticker, + periodSeconds: metadata.period_seconds, + startTime: metadata.start_time, + endTime: metadata.end_time, + status: metadata.status || 'OK', + errorMessage: metadata.error_message + }, + rows: ohlcData.map(candle => ({ + timestamp: candle.timestamp, + ticker: candle.ticker, + open: candle.open, + high: candle.high, + low: candle.low, + close: candle.close, + volume: candle.volume + })) + }; + + // Encode as protobuf OHLCBatch with ZMQ envelope + const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch); + const value = Buffer.concat([frame1, frame2]); + + await this.producer.send({ + topic, + messages: [ + { + key: metadata.ticker, + value + } + ] + }); + + this.logger.debug( + { request_id: metadata.request_id, count: ohlcData.length, topic, type: 'batch' }, + 'Wrote OHLCBatch to Kafka' + ); + } + + /** + * Write a marker message to Kafka for NOT_FOUND or ERROR cases + * This allows Flink to publish notifications even when no data is available + * @param {string} topic - Kafka topic name + * @param {object} marker - Marker object with request metadata and status + */ + async writeMarker(topic, marker) { + if (!this.isConnected) { + throw new Error('Kafka producer not connected'); + } + + // Create an empty OHLCBatch with status in metadata + const batch = { + metadata: { + requestId: marker.request_id, + clientId: marker.client_id, + ticker: marker.ticker, + periodSeconds: marker.period_seconds, + startTime: marker.start_time, + endTime: marker.end_time, + status: marker.status, // 'NOT_FOUND' or 'ERROR' + errorMessage: marker.error_message || marker.message + }, + rows: [] // Empty rows array indicates marker message + }; + + // Encode as protobuf OHLCBatch with ZMQ envelope + const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch); + const value = Buffer.concat([frame1, frame2]); + + await this.producer.send({ + topic, + messages: [ + { + key: marker.ticker, + value + } + ] + }); + + this.logger.info( + { request_id: marker.request_id, status: marker.status, topic }, + 'Wrote marker to Kafka' + ); + } + + /** + * Disconnect from Kafka + */ + async disconnect() { + if (this.isConnected) { + await this.producer.disconnect(); + this.isConnected = false; + this.logger.info('Disconnected from Kafka'); + } + } +} diff --git a/ingestor/src/realtime-poller.js b/ingestor/src/realtime-poller.js new file mode 100644 index 0000000..4275dc6 --- /dev/null +++ b/ingestor/src/realtime-poller.js @@ -0,0 +1,217 @@ +// Realtime tick data poller using 10-second polling +export class RealtimePoller { + constructor(ccxtFetcher, kafkaProducer, logger) { + this.ccxtFetcher = ccxtFetcher; + this.kafkaProducer = kafkaProducer; + this.logger = logger; + + // Active subscriptions: requestId -> subscription info + this.subscriptions = new Map(); + + // Poll interval in milliseconds (10 seconds) + this.pollInterval = 10000; + + // Main polling loop + this.pollingLoop = null; + } + + /** + * Start a realtime subscription + * @param {string} requestId - Unique request ID + * @param {string} ticker - Ticker to subscribe to + * @param {string} kafkaTopic - Kafka topic to write to + */ + startSubscription(requestId, ticker, kafkaTopic) { + if (this.subscriptions.has(requestId)) { + this.logger.warn({ requestId }, 'Subscription already exists'); + return; + } + + const subscription = { + requestId, + ticker, + kafkaTopic, + lastTimestamp: null, + isActive: true, + errorCount: 0 + }; + + this.subscriptions.set(requestId, subscription); + + this.logger.info( + { requestId, ticker, kafkaTopic }, + 'Started realtime subscription' + ); + + // Start polling loop if not already running + if (!this.pollingLoop) { + this.startPollingLoop(); + } + } + + /** + * Cancel a realtime subscription + * @param {string} requestId - Request ID to cancel + */ + cancelSubscription(requestId) { + const subscription = this.subscriptions.get(requestId); + if (subscription) { + subscription.isActive = false; + this.subscriptions.delete(requestId); + + this.logger.info( + { requestId, ticker: subscription.ticker }, + 'Cancelled realtime subscription' + ); + } + + // Stop polling loop if no active subscriptions + if (this.subscriptions.size === 0 && this.pollingLoop) { + clearInterval(this.pollingLoop); + this.pollingLoop = null; + this.logger.info('Stopped polling loop - no active subscriptions'); + } + } + + /** + * Start the main polling loop + */ + startPollingLoop() { + this.logger.info({ interval: this.pollInterval }, 'Starting polling loop'); + + this.pollingLoop = setInterval(async () => { + await this.pollAllSubscriptions(); + }, this.pollInterval); + + // Do an immediate poll + this.pollAllSubscriptions(); + } + + /** + * Poll all active subscriptions + */ + async pollAllSubscriptions() { + const subscriptions = Array.from(this.subscriptions.values()); + + // Poll subscriptions in parallel + await Promise.allSettled( + subscriptions.map(sub => this.pollSubscription(sub)) + ); + } + + /** + * Poll a single subscription + * @param {object} subscription - Subscription object + */ + async pollSubscription(subscription) { + if (!subscription.isActive) { + return; + } + + const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription; + + try { + // Fetch trades since last timestamp + const trades = await this.ccxtFetcher.fetchRecentTrades( + ticker, + lastTimestamp + ); + + if (trades.length === 0) { + this.logger.debug({ requestId, ticker }, 'No new trades'); + return; + } + + // Filter out trades we've already seen + let newTrades = trades; + if (lastTimestamp) { + const lastTs = BigInt(lastTimestamp); + newTrades = trades.filter(t => BigInt(t.timestamp) > lastTs); + } + + if (newTrades.length > 0) { + // Write trades to Kafka + await this.kafkaProducer.writeTicks(kafkaTopic, newTrades); + + // Update last timestamp + const latestTrade = newTrades[newTrades.length - 1]; + subscription.lastTimestamp = latestTrade.timestamp; + + this.logger.info( + { + requestId, + ticker, + count: newTrades.length, + kafkaTopic + }, + 'Wrote new trades to Kafka' + ); + } + + // Reset error count on success + subscription.errorCount = 0; + } catch (error) { + subscription.errorCount++; + + this.logger.error( + { + error: error.message, + requestId, + ticker, + errorCount: subscription.errorCount + }, + 'Error polling subscription' + ); + + // Cancel subscription after too many errors + if (subscription.errorCount >= 5) { + this.logger.error( + { requestId, ticker }, + 'Cancelling subscription due to repeated errors' + ); + this.cancelSubscription(requestId); + } + } + } + + /** + * Get subscription statistics + */ + getStats() { + const stats = { + totalSubscriptions: this.subscriptions.size, + subscriptions: [] + }; + + for (const [requestId, sub] of this.subscriptions) { + stats.subscriptions.push({ + requestId, + ticker: sub.ticker, + isActive: sub.isActive, + errorCount: sub.errorCount, + lastTimestamp: sub.lastTimestamp + }); + } + + return stats; + } + + /** + * Shutdown poller and cancel all subscriptions + */ + shutdown() { + this.logger.info('Shutting down realtime poller'); + + if (this.pollingLoop) { + clearInterval(this.pollingLoop); + this.pollingLoop = null; + } + + // Mark all subscriptions as inactive + for (const subscription of this.subscriptions.values()) { + subscription.isActive = false; + } + + this.subscriptions.clear(); + } +} diff --git a/ingestor/src/zmq-client.js b/ingestor/src/zmq-client.js new file mode 100644 index 0000000..e3eaa6d --- /dev/null +++ b/ingestor/src/zmq-client.js @@ -0,0 +1,116 @@ +// ZeroMQ client for connecting to Flink control channels +import * as zmq from 'zeromq'; +import { decodeMessage } from './proto/messages.js'; + +export class ZmqClient { + constructor(config, logger) { + this.config = config; + this.logger = logger; + + // Work queue - SUB socket to receive data requests with exchange prefix filtering + this.workSocket = null; + + // NOTE: NO RESPONSE SOCKET - Async architecture via Kafka! + // Ingestors write data to Kafka only + // Flink processes and publishes notifications + + this.isShutdown = false; + this.supportedExchanges = config.supported_exchanges || ['BINANCE', 'COINBASE']; + } + + /** + * Connect to Relay ZMQ endpoints + */ + async connect() { + const { flink_hostname, ingestor_work_port } = this.config; + + // Connect to work queue (SUB with exchange prefix filtering) + this.workSocket = new zmq.Subscriber(); + const workEndpoint = `tcp://${flink_hostname}:${ingestor_work_port}`; + await this.workSocket.connect(workEndpoint); + + // Subscribe to each supported exchange prefix + for (const exchange of this.supportedExchanges) { + const prefix = `${exchange}:`; + this.workSocket.subscribe(prefix); + this.logger.info(`Subscribed to exchange prefix: ${prefix}`); + } + this.logger.info(`Connected to work queue at ${workEndpoint}`); + this.logger.info('ASYNC MODE: No response socket - data flows via Kafka → Flink → pub/sub notification'); + } + + /** + * Pull a data request from the work queue + * @returns {Promise} Decoded DataRequest message + */ + async pullDataRequest() { + if (this.isShutdown) { + return null; + } + + try { + const frames = await this.workSocket.receive(); + this.logger.info({ + frameCount: frames.length, + frame0Len: frames[0]?.length, + frame1Len: frames[1]?.length, + frame2Len: frames[2]?.length, + frame0: frames[0]?.toString('utf8').substring(0, 50), + frame1Hex: frames[1]?.toString('hex').substring(0, 20), + frame2Hex: frames[2]?.toString('hex').substring(0, 20) + }, 'Received raw ZMQ frames'); + + // First frame is the topic (exchange prefix), skip it + // Remaining frames are: [version_frame, message_frame] + if (frames.length < 3) { + this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count'); + return null; + } + const messageFrames = frames.slice(1); // Skip topic, keep version + message + const { version, typeId, message } = decodeMessage(messageFrames); + this.logger.info({ + version, + typeId: `0x${typeId.toString(16)}`, + requestId: message.requestId, + type: message.type, + typeOf: typeof message.type, + ticker: message.ticker + }, 'Decoded data request'); + return message; + } catch (error) { + if (!this.isShutdown) { + this.logger.error({ error: error.message, stack: error.stack }, 'Error receiving data request'); + } + return null; + } + } + + + /** + * Start listening for control messages in the background + * @param {Function} handler - Callback function to handle control messages + * + * NOTE: Control channel not implemented yet. This is a stub for future use. + * For now, just log and ignore. + */ + startControlListener(handler) { + this.logger.info('Control channel listener stub - not implemented yet'); + // TODO: Implement control channel when needed + // Control messages would be used for: + // - Canceling realtime subscriptions + // - Graceful shutdown signals + // - Configuration updates + } + + /** + * Shutdown and close connections + */ + async shutdown() { + this.isShutdown = true; + this.logger.info('Shutting down ZMQ connections'); + + if (this.workSocket) { + await this.workSocket.close(); + } + } +} diff --git a/kafka/README.md b/kafka/README.md new file mode 100644 index 0000000..ecf1063 --- /dev/null +++ b/kafka/README.md @@ -0,0 +1,74 @@ +# Kafka Deployment for K8s Cluster + +## Install Strimzi Operator + +```bash +# Add the Strimzi Helm repository (if using Helm 3.6+) +helm install strimzi-kafka-operator oci://quay.io/strimzi-helm/strimzi-kafka-operator \ + -f values.yaml \ + --namespace kafka --create-namespace + +# Wait for operator to be ready +kubectl wait --for=condition=ready pod -l name=strimzi-cluster-operator -n kafka --timeout=300s +``` + +## Deploy Kafka Cluster + +```bash +# Apply the metrics ConfigMap first +kubectl apply -f kafka-metrics-config.yaml + +# Apply the Kafka cluster manifest +kubectl apply -f kafka-cluster.yaml + +# Wait for Kafka to be ready (this may take a few minutes) +kubectl wait kafka/trading-cluster --for=condition=Ready --timeout=300s -n default +``` + +## Verify Installation + +```bash +# Check Kafka cluster status +kubectl get kafka -n default + +# Check all pods +kubectl get pods -n default | grep trading-cluster + +# Check Kafka cluster details +kubectl describe kafka trading-cluster -n default +``` + +## Connect to Kafka + +Internal connection string (from within cluster): +- **Plaintext**: `trading-cluster-kafka-bootstrap.default.svc:9092` +- **TLS**: `trading-cluster-kafka-bootstrap.default.svc:9093` + +## Prometheus Metrics + +Kafka and ZooKeeper expose metrics on port 9404. Prometheus will automatically discover and scrape these metrics via pod annotations: +- `prometheus.io/scrape: "true"` +- `prometheus.io/port: "9404"` +- `prometheus.io/path: "/metrics"` + +To verify metrics are being exported: +```bash +kubectl exec -it trading-cluster-kafka-0 -n default -- curl localhost:9404/metrics +``` + +## Create a Test Topic + +```bash +kubectl run kafka-producer -ti --image=quay.io/strimzi/kafka:0.43.0-kafka-3.7.0 --rm=true --restart=Never -- \ + bin/kafka-topics.sh --create --topic test-topic --bootstrap-server trading-cluster-kafka-bootstrap:9092 --partitions 3 --replication-factor 3 +``` + +## Uninstall + +```bash +# Delete Kafka cluster +kubectl delete kafka trading-cluster -n default + +# Delete operator +helm uninstall strimzi-kafka-operator -n kafka +``` diff --git a/kafka/kafka-cluster.yaml b/kafka/kafka-cluster.yaml new file mode 100644 index 0000000..50178e4 --- /dev/null +++ b/kafka/kafka-cluster.yaml @@ -0,0 +1,85 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: Kafka +metadata: + name: trading-cluster + namespace: default + labels: + app: kafka +spec: + kafka: + version: 3.7.0 + replicas: 1 + metricsConfig: + type: jmxPrometheusExporter + valueFrom: + configMapKeyRef: + name: kafka-metrics + key: kafka-metrics-config.yml + template: + pod: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9404" + prometheus.io/path: "/metrics" + listeners: + - name: plain + port: 9092 + type: internal + tls: false + - name: tls + port: 9093 + type: internal + tls: true + config: + offsets.topic.replication.factor: 1 + transaction.state.log.replication.factor: 1 + transaction.state.log.min.isr: 1 + default.replication.factor: 1 + min.insync.replicas: 1 + log.retention.hours: 168 + log.segment.bytes: 1073741824 + log.retention.check.interval.ms: 300000 + storage: + type: jbod + volumes: + - id: 0 + type: persistent-claim + size: 5Gi + deleteClaim: false + resources: + requests: + memory: 512Mi + cpu: 250m + limits: + memory: 1Gi + cpu: 500m + zookeeper: + replicas: 1 + metricsConfig: + type: jmxPrometheusExporter + valueFrom: + configMapKeyRef: + name: kafka-metrics + key: zookeeper-metrics-config.yml + template: + pod: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9404" + prometheus.io/path: "/metrics" + storage: + type: persistent-claim + size: 2Gi + deleteClaim: false + resources: + requests: + memory: 256Mi + cpu: 100m + limits: + memory: 512Mi + cpu: 250m + entityOperator: + topicOperator: {} + userOperator: {} diff --git a/kafka/kafka-metrics-config.yaml b/kafka/kafka-metrics-config.yaml new file mode 100644 index 0000000..84ef3a4 --- /dev/null +++ b/kafka/kafka-metrics-config.yaml @@ -0,0 +1,44 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka-metrics + namespace: default +data: + kafka-metrics-config.yml: | + lowercaseOutputName: true + rules: + # Kafka broker metrics + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + topic: "$4" + partition: "$5" + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + broker: "$4:$5" + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + # Kafka network metrics + - pattern: kafka.network<>Value + name: kafka_network_$1_$2 + type: GAUGE + # Kafka log metrics + - pattern: kafka.log<>Value + name: kafka_log_$1_$2 + type: GAUGE + labels: + topic: "$3" + partition: "$4" + + zookeeper-metrics-config.yml: | + lowercaseOutputName: true + rules: + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: zookeeper_$2 + type: GAUGE diff --git a/kafka/values.yaml b/kafka/values.yaml new file mode 100644 index 0000000..d2c7d5e --- /dev/null +++ b/kafka/values.yaml @@ -0,0 +1,9 @@ +# Strimzi Kafka Operator Helm Values +# Install with: helm install strimzi-kafka-operator oci://quay.io/strimzi-helm/strimzi-kafka-operator + +# This values file is for the operator installation +# The operator itself is lightweight +#watchNamespaces: [] # Empty = watch all namespaces + +watchNamespaces: + - default diff --git a/protobuf/ingestor.proto b/protobuf/ingestor.proto new file mode 100644 index 0000000..43e82c3 --- /dev/null +++ b/protobuf/ingestor.proto @@ -0,0 +1,329 @@ +syntax = "proto3"; + +option java_multiple_files = true; +option java_package = "com.dexorder.proto"; + +// Request for data ingestion (used in Relay → Ingestor work queue) +message DataRequest { + // Unique request ID for tracking + string request_id = 1; + + // Type of request + RequestType type = 2; + + // Market identifier + string ticker = 3; + + // For historical requests + optional HistoricalParams historical = 4; + + // For realtime requests + optional RealtimeParams realtime = 5; + + // Optional client ID for notification routing (async architecture) + // Flink uses this to determine notification topic + optional string client_id = 6; + + enum RequestType { + HISTORICAL_OHLC = 0; + REALTIME_TICKS = 1; + } +} + +message HistoricalParams { + // Start time (microseconds since epoch) + uint64 start_time = 1; + + // End time (microseconds since epoch) + uint64 end_time = 2; + + // OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h, 86400 = 1d) + uint32 period_seconds = 3; + + // Maximum number of candles to return (optional limit) + optional uint32 limit = 4; +} + +message RealtimeParams { + // Whether to include tick data + bool include_ticks = 1; + + // Whether to include aggregated OHLC + bool include_ohlc = 2; + + // OHLC periods to generate in seconds (e.g., [60, 300, 900] for 1m, 5m, 15m) + repeated uint32 ohlc_period_seconds = 3; +} + +// Control messages for ingestors (Flink → Ingestor control channel) +message IngestorControl { + // Control action type + ControlAction action = 1; + + // Request ID to cancel (for CANCEL action) + optional string request_id = 2; + + // Configuration updates (for CONFIG_UPDATE action) + optional IngestorConfig config = 3; + + enum ControlAction { + CANCEL = 0; // Cancel a specific request + SHUTDOWN = 1; // Graceful shutdown signal + CONFIG_UPDATE = 2; // Update ingestor configuration + HEARTBEAT = 3; // Keep-alive signal + } +} + +message IngestorConfig { + // Maximum concurrent requests per ingestor + optional uint32 max_concurrent = 1; + + // Request timeout in seconds + optional uint32 timeout_seconds = 2; + + // Kafka topic for output + optional string kafka_topic = 3; +} + +// Historical data response from ingestor to Flink (Ingestor → Flink response channel) +message DataResponse { + // Request ID this is responding to + string request_id = 1; + + // Status of the request + ResponseStatus status = 2; + + // Error message if status is not OK + optional string error_message = 3; + + // Serialized OHLC data (repeated OHLCV protobuf messages) + repeated bytes ohlc_data = 4; + + // Total number of candles returned + uint32 total_records = 5; + + enum ResponseStatus { + OK = 0; + NOT_FOUND = 1; + ERROR = 2; + } +} + +// Client request submission for historical OHLC data (Client → Relay) +// Relay immediately responds with SubmitResponse containing request_id +message SubmitHistoricalRequest { + // Client-generated request ID for tracking + string request_id = 1; + + // Market identifier (e.g., "BINANCE:BTC/USDT") + string ticker = 2; + + // Start time (microseconds since epoch) + uint64 start_time = 3; + + // End time (microseconds since epoch) + uint64 end_time = 4; + + // OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h) + uint32 period_seconds = 5; + + // Optional limit on number of candles + optional uint32 limit = 6; + + // Optional client ID for notification routing (e.g., "client-abc-123") + // Notifications will be published to topic: "RESPONSE:{client_id}" + optional string client_id = 7; +} + +// Immediate response to SubmitHistoricalRequest (Relay → Client) +message SubmitResponse { + // Request ID (echoed from request) + string request_id = 1; + + // Status of submission + SubmitStatus status = 2; + + // Error message if status is not QUEUED + optional string error_message = 3; + + // Topic to subscribe to for result notification + // e.g., "RESPONSE:client-abc-123" or "HISTORY_READY:{request_id}" + string notification_topic = 4; + + enum SubmitStatus { + QUEUED = 0; // Request queued successfully + DUPLICATE = 1; // Request ID already exists + INVALID = 2; // Invalid parameters + ERROR = 3; // Internal error + } +} + +// Historical data ready notification (Flink → Relay → Client via pub/sub) +// Published after Flink writes data to Iceberg +message HistoryReadyNotification { + // Request ID + string request_id = 1; + + // Market identifier + string ticker = 2; + + // OHLC period in seconds + uint32 period_seconds = 3; + + // Start time (microseconds since epoch) + uint64 start_time = 4; + + // End time (microseconds since epoch) + uint64 end_time = 5; + + // Status of the data fetch + NotificationStatus status = 6; + + // Error message if status is not OK + optional string error_message = 7; + + // Iceberg table information for client queries + string iceberg_namespace = 10; + string iceberg_table = 11; + + // Number of records written + uint32 row_count = 12; + + // Timestamp when data was written (microseconds since epoch) + uint64 completed_at = 13; + + enum NotificationStatus { + OK = 0; // Data successfully written to Iceberg + NOT_FOUND = 1; // No data found for the requested period + ERROR = 2; // Error during fetch or processing + TIMEOUT = 3; // Request timed out + } +} + +// Legacy message for backward compatibility (Client → Relay) +message OHLCRequest { + // Request ID for tracking + string request_id = 1; + + // Market identifier + string ticker = 2; + + // Start time (microseconds since epoch) + uint64 start_time = 3; + + // End time (microseconds since epoch) + uint64 end_time = 4; + + // OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h) + uint32 period_seconds = 5; + + // Optional limit on number of candles + optional uint32 limit = 6; +} + +// Generic response for any request (Flink → Client) +message Response { + // Request ID this is responding to + string request_id = 1; + + // Status of the request + ResponseStatus status = 2; + + // Error message if status is not OK + optional string error_message = 3; + + // Generic payload data (serialized protobuf messages) + repeated bytes data = 4; + + // Total number of records + optional uint32 total_records = 5; + + // Whether this is the final response (for paginated results) + bool is_final = 6; + + enum ResponseStatus { + OK = 0; + NOT_FOUND = 1; + ERROR = 2; + } +} + +// CEP trigger registration (Client → Flink) +message CEPTriggerRequest { + // Unique trigger ID + string trigger_id = 1; + + // Flink SQL CEP pattern/condition + string sql_pattern = 2; + + // Markets to monitor + repeated string tickers = 3; + + // Callback endpoint (for DEALER/ROUTER routing) + optional string callback_id = 4; + + // Optional parameters for the CEP query + map parameters = 5; +} + +// CEP trigger acknowledgment (Flink → Client) +message CEPTriggerAck { + // Trigger ID being acknowledged + string trigger_id = 1; + + // Status of registration + TriggerStatus status = 2; + + // Error message if status is not OK + optional string error_message = 3; + + enum TriggerStatus { + REGISTERED = 0; + ALREADY_REGISTERED = 1; + INVALID_SQL = 2; + ERROR = 3; + } +} + +// CEP trigger event callback (Flink → Client) +message CEPTriggerEvent { + // Trigger ID that fired + string trigger_id = 1; + + // Timestamp when trigger fired (microseconds since epoch) + uint64 timestamp = 2; + + // Schema information for the result rows + ResultSchema schema = 3; + + // Result rows from the Flink SQL query + repeated ResultRow rows = 4; + + // Additional context from the CEP pattern + map context = 5; +} + +message ResultSchema { + // Column names in order + repeated string column_names = 1; + + // Column types (using Flink SQL type names) + repeated string column_types = 2; +} + +message ResultRow { + // Encoded row data (one bytes field per column, in schema order) + // Each value is encoded as a protobuf-serialized FieldValue + repeated bytes values = 1; +} + +message FieldValue { + oneof value { + string string_val = 1; + int64 int_val = 2; + double double_val = 3; + bool bool_val = 4; + bytes bytes_val = 5; + uint64 timestamp_val = 6; + } +} \ No newline at end of file diff --git a/protobuf/market.proto b/protobuf/market.proto new file mode 100644 index 0000000..2901714 --- /dev/null +++ b/protobuf/market.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; + +message Market { + // The prices and volumes must be adjusted by the rational denominator provided + // by the market metadata + string exchange_id = 2; // e.g., BINANCE + string market_id = 3; // e.g., BTC/USDT + string market_type = 4; // e.g., Spot + string description = 5; // e.g., Bitcoin/Tether on Binance + repeated string column_names = 6; // e.g., ['open', 'high', 'low', 'close', 'volume', 'taker_vol', 'maker_vol'] + string base_asset = 9; + string quote_asset = 10; + uint64 earliest_time = 11; + uint64 tick_denom = 12; // denominator applied to all OHLC price data + uint64 base_denom = 13; // denominator applied to base asset units + uint64 quote_denom = 14; // denominator applied to quote asset units + repeated uint32 supported_period_seconds = 15; + +} diff --git a/protobuf/ohlc.proto b/protobuf/ohlc.proto new file mode 100644 index 0000000..3093fe2 --- /dev/null +++ b/protobuf/ohlc.proto @@ -0,0 +1,61 @@ +syntax = "proto3"; + +option java_multiple_files = true; +option java_package = "com.dexorder.proto"; + +// Single OHLC row +message OHLC { + // Timestamp in microseconds since epoch + uint64 timestamp = 1; + + // The prices and volumes must be adjusted by the rational denominator provided + // by the market metadata + int64 open = 2; + int64 high = 3; + int64 low = 4; + int64 close = 5; + optional int64 volume = 6; + optional int64 buy_vol = 7; + optional int64 sell_vol = 8; + optional int64 open_time = 9; + optional int64 high_time = 10; + optional int64 low_time = 11; + optional int64 close_time = 12; + optional int64 open_interest = 13; + string ticker = 14; +} + +// Batch of OHLC rows with metadata for historical request tracking +// Used for Kafka messages from ingestor → Flink +message OHLCBatch { + // Metadata for tracking this request through the pipeline + OHLCBatchMetadata metadata = 1; + + // OHLC rows in this batch + repeated OHLC rows = 2; +} + +// Metadata for tracking historical data requests through the pipeline +message OHLCBatchMetadata { + // Request ID from client + string request_id = 1; + + // Optional client ID for notification routing + optional string client_id = 2; + + // Market identifier + string ticker = 3; + + // OHLC period in seconds + uint32 period_seconds = 4; + + // Time range requested (microseconds since epoch) + uint64 start_time = 5; + uint64 end_time = 6; + + // Status for marker messages (OK, NOT_FOUND, ERROR) + string status = 7; + + // Error message if status is ERROR + optional string error_message = 8; +} diff --git a/protobuf/protocol.md b/protobuf/protocol.md new file mode 100644 index 0000000..a193c96 --- /dev/null +++ b/protobuf/protocol.md @@ -0,0 +1,168 @@ +# ZeroMQ Protocol Architecture + +Our data transfer protocol uses ZeroMQ with Protobufs. We send a small envelope with a protocol version byte as the first frame, then a type ID as the first byte of the second frame, followed by the protobuf payload also in the second frame. + +OHLC periods are represented as seconds. + +## Data Flow Overview + +**Relay as Gateway**: The Relay is a well-known bind point that all components connect to. It routes messages between clients, ingestors, and Flink. + +### Historical Data Query Flow (Async Event-Driven Architecture) +* Client generates request_id and/or client_id (both are client-generated) +* Client computes notification topic: `RESPONSE:{client_id}` or `HISTORY_READY:{request_id}` +* **Client subscribes to notification topic BEFORE sending request (prevents race condition)** +* Client sends SubmitHistoricalRequest to Relay (REQ/REP) +* Relay returns immediate SubmitResponse with request_id and notification_topic (for confirmation) +* Relay publishes DataRequest to ingestor work queue with exchange prefix (PUB/SUB) +* Ingestor receives request, fetches data from exchange +* Ingestor writes OHLC data to Kafka with __metadata in first record +* Flink reads from Kafka, processes data, writes to Iceberg +* Flink publishes HistoryReadyNotification to ZMQ PUB socket (port 5557) with deterministic topic +* Relay proxies notification via XSUB → XPUB to clients +* Client receives notification (already subscribed) and queries Iceberg for data + +**Key Architectural Change**: Relay is completely stateless. No request/response correlation needed. All notification routing is topic-based (e.g., "RESPONSE:{client_id}"). + +**Race Condition Prevention**: Notification topics are deterministic based on client-generated values (request_id or client_id). Clients MUST subscribe to the notification topic BEFORE submitting the request to avoid missing notifications. + +**Two Notification Patterns**: +1. **Per-client topic** (`RESPONSE:{client_id}`): Subscribe once during connection, reuse for all requests from this client. Recommended for most clients. +2. **Per-request topic** (`HISTORY_READY:{request_id}`): Subscribe immediately before each request. Use when you need per-request isolation or don't have a persistent client_id. + +### Realtime Data Flow (Flink → Relay → Clients) +* Ingestors write realtime ticks to Kafka +* Flink reads from Kafka, processes OHLC aggregations, CEP triggers +* Flink publishes market data via ZMQ PUB +* Relay subscribes to Flink (XSUB) and fanouts to clients (XPUB) +* Clients subscribe to specific tickers + +### Data Processing (Kafka → Flink → Iceberg) +* All market data flows through Kafka (durable event log) +* Flink processes streams for aggregations and CEP +* Flink writes historical data to Apache Iceberg tables +* Clients can query Iceberg for historical data (alternative to ingestor backfill) + +**Key Design Principles**: +* Relay is the well-known bind point - all other components connect to it +* Relay is completely stateless - no request tracking, only topic-based routing +* Exchange prefix filtering allows ingestor specialization (e.g., only BINANCE ingestors) +* Historical data flows through Kafka (durable processing) only - no direct response +* Async event-driven notifications via pub/sub (Flink → Relay → Clients) +* Protobufs over ZMQ for all inter-service communication +* Kafka for durability and Flink stream processing +* Iceberg for long-term historical storage and client queries + +## ZeroMQ Channels and Patterns + +All sockets bind on **Relay** (well-known endpoint). Components connect to relay. + +### 1. Client Request Channel (Clients → Relay) +**Pattern**: ROUTER (Relay binds, Clients use REQ) +- **Socket Type**: Relay uses ROUTER (bind), Clients use REQ (connect) +- **Endpoint**: `tcp://*:5559` (Relay binds) +- **Message Types**: `SubmitHistoricalRequest` → `SubmitResponse` +- **Behavior**: + - Client generates request_id and/or client_id + - Client computes notification topic deterministically + - **Client subscribes to notification topic FIRST (prevents race)** + - Client sends REQ for historical OHLC data + - Relay validates request and returns immediate acknowledgment + - Response includes notification_topic for client confirmation + - Relay publishes DataRequest to ingestor work queue + - No request tracking - relay is stateless + +### 2. Ingestor Work Queue (Relay → Ingestors) +**Pattern**: PUB/SUB with exchange prefix filtering +- **Socket Type**: Relay uses PUB (bind), Ingestors use SUB (connect) +- **Endpoint**: `tcp://*:5555` (Relay binds) +- **Message Types**: `DataRequest` (historical or realtime) +- **Topic Prefix**: Exchange name (e.g., `BINANCE:`, `COINBASE:`) +- **Behavior**: + - Relay publishes work with exchange prefix from ticker + - Ingestors subscribe only to exchanges they support + - Multiple ingestors can compete for same exchange + - Ingestors write data to Kafka only (no direct response) + - Flink processes Kafka → Iceberg → notification + +### 3. Market Data Fanout (Relay ↔ Flink ↔ Clients) +**Pattern**: XPUB/XSUB proxy +- **Socket Type**: + - Relay XPUB (bind) ← Clients SUB (connect) - Port 5558 + - Relay XSUB (connect) → Flink PUB (bind) - Port 5557 +- **Message Types**: `Tick`, `OHLC`, `HistoryReadyNotification` +- **Topic Formats**: + - Market data: `{ticker}|{data_type}` (e.g., `BINANCE:BTC/USDT|tick`) + - Notifications: `RESPONSE:{client_id}` or `HISTORY_READY:{request_id}` +- **Behavior**: + - Clients subscribe to ticker topics and notification topics via Relay XPUB + - Relay forwards subscriptions to Flink via XSUB + - Flink publishes processed market data and notifications + - Relay proxies data to subscribed clients (stateless forwarding) + - Dynamic subscription management (no pre-registration) + +### 4. Ingestor Control Channel (Optional - Future Use) +**Pattern**: PUB/SUB (Broadcast control) +- **Socket Type**: Relay uses PUB, Ingestors use SUB +- **Endpoint**: `tcp://*:5557` (Relay binds) +- **Message Types**: `IngestorControl` (cancel, config updates) +- **Behavior**: + - Broadcast control messages to all ingestors + - Used for realtime subscription cancellation + - Configuration updates + +## Message Envelope Format + +The core protocol uses two ZeroMQ frames: +``` +Frame 1: [1 byte: protocol version] +Frame 2: [1 byte: message type ID][N bytes: protobuf message] +``` + +This two-frame approach allows receivers to check the protocol version before parsing the message type and protobuf payload. + +**Important**: Some ZeroMQ socket patterns (PUB/SUB, XPUB/XSUB) may prepend additional frames for routing purposes. For example: +- **PUB/SUB with topic filtering**: SUB sockets receive `[topic frame][version frame][message frame]` +- **ROUTER sockets**: Prepend identity frames before the message + +Components must handle these additional frames appropriately: +- SUB sockets: Skip the first frame (topic), then parse the remaining frames as the standard 2-frame envelope +- ROUTER sockets: Extract identity frames, then parse the standard 2-frame envelope + +The two-frame envelope is the **logical protocol format**, but physical transmission may include additional ZeroMQ transport frames. + +## Message Type IDs + +| Type ID | Message Type | Description | +|---------|---------------------------|------------------------------------------------| +| 0x01 | DataRequest | Request for historical or realtime data | +| 0x02 | DataResponse (deprecated) | Historical data response (no longer used) | +| 0x03 | IngestorControl | Control messages for ingestors | +| 0x04 | Tick | Individual trade tick data | +| 0x05 | OHLC | Single OHLC candle with volume | +| 0x06 | Market | Market metadata | +| 0x07 | OHLCRequest (deprecated) | Client request (replaced by SubmitHistorical) | +| 0x08 | Response (deprecated) | Generic response (replaced by SubmitResponse) | +| 0x09 | CEPTriggerRequest | Register CEP trigger | +| 0x0A | CEPTriggerAck | CEP trigger acknowledgment | +| 0x0B | CEPTriggerEvent | CEP trigger fired callback | +| 0x0C | OHLCBatch | Batch of OHLC rows with metadata (Kafka) | +| 0x10 | SubmitHistoricalRequest | Client request for historical data (async) | +| 0x11 | SubmitResponse | Immediate ack with notification topic | +| 0x12 | HistoryReadyNotification | Notification that data is ready in Iceberg | + +## Error Handling + +**Async Architecture Error Handling**: +- Failed historical requests: ingestor writes error marker to Kafka +- Flink reads error marker and publishes HistoryReadyNotification with ERROR status +- Client timeout: if no notification received within timeout, assume failure +- Realtime requests cancelled via control channel if ingestor fails +- REQ/REP timeouts: 30 seconds default for client request submission +- PUB/SUB has no delivery guarantees (Kafka provides durability) +- No response routing needed - all notifications via topic-based pub/sub + +**Durability**: +- All data flows through Kafka for durability +- Flink checkpointing ensures exactly-once processing +- Client can retry request with new request_id if notification not received diff --git a/protobuf/tick.proto b/protobuf/tick.proto new file mode 100644 index 0000000..5618c3f --- /dev/null +++ b/protobuf/tick.proto @@ -0,0 +1,48 @@ +syntax = "proto3"; + +message Tick { + // Unique identifier for the trade + string trade_id = 1; + + // Market identifier (matches Market.market_id) + string ticker = 2; + + // Timestamp in microseconds since epoch + uint64 timestamp = 3; + + // Price (must be adjusted by tick_denom from Market metadata) + int64 price = 4; + + // Base asset amount (must be adjusted by base_denom from Market metadata) + int64 amount = 5; + + // Quote asset amount (must be adjusted by quote_denom from Market metadata) + int64 quote_amount = 6; + + // Side: true = taker buy (market buy), false = taker sell (market sell) + bool taker_buy = 7; + + // Position effect: true = close position, false = open position + // Only relevant for derivatives/futures markets + optional bool to_close = 8; + + // Sequence number for ordering (if provided by exchange) + optional uint64 sequence = 9; + + // Additional flags for special trade types + optional TradeFlags flags = 10; +} + +message TradeFlags { + // Liquidation trade + bool is_liquidation = 1; + + // Block trade (large OTC trade) + bool is_block_trade = 2; + + // Maker side was a post-only order + bool maker_post_only = 3; + + // Trade occurred during auction + bool is_auction = 4; +} diff --git a/relay/.gitignore b/relay/.gitignore new file mode 100644 index 0000000..9c18d20 --- /dev/null +++ b/relay/.gitignore @@ -0,0 +1,7 @@ +target/ +config.yaml +secrets.yaml +*.log +.env +.DS_Store +protobuf/ diff --git a/relay/Cargo.lock b/relay/Cargo.lock new file mode 100644 index 0000000..3b2ea50 --- /dev/null +++ b/relay/Cargo.lock @@ -0,0 +1,1466 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "async-zmq-derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d138404687de04720fab6eac87d15a129babb1a30d116492849dbf8a914261" +dependencies = [ + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "async-zmq-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96829756bd48b04a930982268f973d13c534ef54f2f83bec6e9ffd94fe90a0b8" +dependencies = [ + "failure", + "futures", + "zmq", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if 1.0.4", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" +dependencies = [ + "byteorder", + "iovec", +] + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +dependencies = [ + "autocfg", + "cfg-if 0.1.10", + "lazy_static", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if 1.0.4", + "hashbrown 0.14.5", + "lock_api 0.4.14", + "once_cell", + "parking_lot_core 0.9.12", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "error-chain" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9435d864e017c3c6afeac1654189b06cdb491cf2ff73dbf0d73b0f292f42ff8" + +[[package]] +name = "failure" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" +dependencies = [ + "backtrace", + "failure_derive", +] + +[[package]] +name = "failure_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 1.0.109", + "synstructure", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +dependencies = [ + "bitflags 1.3.2", + "fuchsia-zircon-sys", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" + +[[package]] +name = "futures" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "lock_api" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "metadeps" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b122901b3a675fac8cecf68dcb2f0d3036193bc861d1ac0e1c337f7d5254c2" +dependencies = [ + "error-chain", + "pkg-config", + "toml", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4" +dependencies = [ + "cfg-if 0.1.10", + "fuchsia-zircon", + "fuchsia-zircon-sys", + "iovec", + "kernel32-sys", + "libc", + "log", + "miow", + "net2", + "slab", + "winapi 0.2.8", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys", +] + +[[package]] +name = "miow" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d" +dependencies = [ + "kernel32-sys", + "net2", + "winapi 0.2.8", + "ws2_32-sys", +] + +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + +[[package]] +name = "net2" +version = "0.2.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13b648036a2339d06de780866fbdfda0dde886de7b3af2ddeba8b14f4ee34ac" +dependencies = [ + "cfg-if 0.1.10", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f842b1982eb6c2fe34036a4fbfb06dd185a3f5c8edfaacdf7d1ea10b07de6252" +dependencies = [ + "lock_api 0.3.4", + "parking_lot_core 0.6.3", + "rustc_version", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api 0.4.14", + "parking_lot_core 0.9.12", +] + +[[package]] +name = "parking_lot_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66b810a62be75176a80873726630147a5ca780cd33921e0b5709033e66b0a" +dependencies = [ + "cfg-if 0.1.10", + "cloudabi", + "libc", + "redox_syscall 0.1.57", + "rustc_version", + "smallvec 0.6.14", + "winapi 0.3.9", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "redox_syscall 0.5.18", + "smallvec 1.15.1", + "windows-link", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2 1.0.106", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid 0.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes 1.11.1", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +dependencies = [ + "heck", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2 1.0.106", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "relay" +version = "0.1.0" +dependencies = [ + "anyhow", + "bytes 1.11.1", + "dashmap", + "prost", + "prost-build", + "serde", + "serde_json", + "serde_yaml", + "thiserror", + "tokio", + "tokio-zmq", + "tracing", + "tracing-subscriber", + "zmq", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97fcaeba89edba30f044a10c6a3cc39df9c3f17d7cd829dd1446cab35f890e0" +dependencies = [ + "maybe-uninit", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid 0.1.0", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 1.0.109", + "unicode-xid 0.2.6", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if 1.0.4", +] + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes 1.11.1", + "libc", + "mio 1.1.1", + "parking_lot 0.12.5", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys", +] + +[[package]] +name = "tokio-executor" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb2d1b8f4548dbf5e1f7818512e9c406860678f29c300cdf0ebac72d1a3a1671" +dependencies = [ + "crossbeam-utils", + "futures", +] + +[[package]] +name = "tokio-io" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674" +dependencies = [ + "bytes 0.4.12", + "futures", + "log", +] + +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "tokio-reactor" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09bc590ec4ba8ba87652da2068d150dcada2cfa2e07faae270a5e0409aa51351" +dependencies = [ + "crossbeam-utils", + "futures", + "lazy_static", + "log", + "mio 0.6.23", + "num_cpus", + "parking_lot 0.9.0", + "slab", + "tokio-executor", + "tokio-io", + "tokio-sync", +] + +[[package]] +name = "tokio-sync" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfe50152bc8164fcc456dab7891fa9bf8beaf01c5ee7e1dd43a397c3cf87dee" +dependencies = [ + "fnv", + "futures", +] + +[[package]] +name = "tokio-timer" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93044f2d313c95ff1cb7809ce9a7a05735b012288a888b62d4434fd58c94f296" +dependencies = [ + "crossbeam-utils", + "futures", + "slab", + "tokio-executor", +] + +[[package]] +name = "tokio-zmq" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f878b4c7295df021caa123f48849f51ca858dd9ae04d7d2192f3e62307fd19a9" +dependencies = [ + "async-zmq-derive", + "async-zmq-types", + "failure", + "futures", + "log", + "mio 0.6.23", + "tokio-reactor", + "tokio-timer", + "zmq", +] + +[[package]] +name = "toml" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736b60249cb25337bc196faa43ee12c705e426f3d55c214d73a4e7be06f92cb4" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec 1.15.1", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver 1.0.27", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver 1.0.27", + "serde", + "serde_derive", + "serde_json", + "unicode-xid 0.2.6", + "wasmparser", +] + +[[package]] +name = "ws2_32-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zmq" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aad98a7a617d608cd9e1127147f630d24af07c7cd95ba1533246d96cbdd76c66" +dependencies = [ + "bitflags 1.3.2", + "libc", + "log", + "zmq-sys", +] + +[[package]] +name = "zmq-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d33a2c51dde24d5b451a2ed4b488266df221a5eaee2ee519933dc46b9a9b3648" +dependencies = [ + "libc", + "metadeps", +] diff --git a/relay/Cargo.toml b/relay/Cargo.toml new file mode 100644 index 0000000..dc15308 --- /dev/null +++ b/relay/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "relay" +version = "0.1.0" +edition = "2021" + +[dependencies] +zmq = "0.9" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_yaml = "0.9" +tokio = { version = "1.0", features = ["full"] } +tokio-zmq = "0.10" +anyhow = "1.0" +thiserror = "1.0" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +bytes = "1.0" +dashmap = "5.5" +prost = "0.13" + +[build-dependencies] +prost-build = "0.13" + +[profile.release] +opt-level = 3 +lto = true +codegen-units = 1 diff --git a/relay/Dockerfile b/relay/Dockerfile new file mode 100644 index 0000000..3d2113b --- /dev/null +++ b/relay/Dockerfile @@ -0,0 +1,52 @@ +FROM rust:latest AS builder + +WORKDIR /app + +# Install ZMQ and protobuf dependencies +RUN apt-get update && apt-get install -y \ + libzmq3-dev \ + pkg-config \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +# Copy manifests +COPY Cargo.toml Cargo.lock* ./ + +# Copy build script and protobuf files (required for build) +COPY build.rs ./ +COPY protobuf ./protobuf + +# Copy source code +COPY src ./src + +# Build application (includes dependencies) +RUN cargo build --release + +# Runtime stage +FROM debian:bookworm-slim + +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + libzmq5 \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /app/target/release/relay /app/relay + +# Create config directory +RUN mkdir -p /config + +# Set environment +ENV RUST_LOG=relay=info + +# Expose ports +# 5555: Ingestor work queue (PUB) +# 5556: Ingestor response (ROUTER) +# 5558: Market data publication (XPUB) +# 5559: Client requests (ROUTER) +EXPOSE 5555 5556 5558 5559 + +CMD ["/app/relay"] diff --git a/relay/README.md b/relay/README.md new file mode 100644 index 0000000..a78246b --- /dev/null +++ b/relay/README.md @@ -0,0 +1,238 @@ +# ZMQ Relay Gateway + +High-performance ZMQ relay/gateway that routes messages between clients, Flink, and ingestors. + +## Architecture + +The relay acts as a well-known bind point for all components: + +``` +┌─────────┐ ┌───────┐ ┌──────────┐ +│ Clients │◄──────────────────►│ Relay │◄──────────────────►│ Ingestors│ +└─────────┘ └───┬───┘ └──────────┘ + │ + │ + ▼ + ┌────────┐ + │ Flink │ + └────────┘ +``` + +## Responsibilities + +### 1. Client Request Routing +- **Socket**: ROUTER (bind on port 5559) +- **Flow**: Client REQ → Relay ROUTER → Ingestor PUB +- Receives OHLC requests from clients +- Routes to appropriate ingestors using exchange prefix filtering +- Tracks pending requests and matches responses + +### 2. Ingestor Work Distribution +- **Socket**: PUB (bind on port 5555) +- **Pattern**: Topic-based distribution with exchange prefixes +- Publishes work requests with exchange prefix (e.g., `BINANCE:`) +- Ingestors subscribe to exchanges they support + +### 3. Response Routing +- **Socket**: ROUTER (bind on port 5556) +- **Flow**: Ingestor DEALER → Relay ROUTER → Client REQ +- Receives responses from ingestors +- Matches responses to pending client requests by request_id +- Returns data to waiting clients + +### 4. Market Data Fanout +- **Sockets**: XPUB (bind on 5558) + XSUB (connect to Flink:5557) +- **Pattern**: XPUB/XSUB proxy +- Relays market data from Flink to multiple clients +- Manages subscriptions dynamically +- Forwards subscription messages upstream to Flink + +## Message Flows + +### Historical Data Request + +``` +1. Client → Relay + Socket: REQ → ROUTER (5559) + Message: OHLCRequest (0x07) + +2. Relay → Ingestor + Socket: PUB (5555) + Topic: Exchange prefix (e.g., "BINANCE:") + Message: DataRequest (0x01) + +3. Ingestor fetches data from exchange + +4. Ingestor → Relay + Socket: DEALER → ROUTER (5556) + Message: DataResponse (0x02) + +5. Relay → Client + Socket: ROUTER → REQ + Message: Response (0x08) +``` + +### Market Data Subscription + +``` +1. Client subscribes to ticker + Socket: SUB → XPUB (5558) + Topic: "BINANCE:BTC/USDT|tick" + +2. Relay forwards subscription + Socket: XSUB → Flink PUB (5557) + +3. Flink publishes data + Socket: PUB (5557) → XSUB + +4. Relay fanout to clients + Socket: XPUB (5558) → SUB +``` + +## Configuration + +Edit `config.yaml`: + +```yaml +bind_address: "tcp://*" +client_request_port: 5559 +market_data_pub_port: 5558 +ingestor_work_port: 5555 +ingestor_response_port: 5556 +flink_market_data_endpoint: "tcp://flink-jobmanager:5557" +request_timeout_secs: 30 +high_water_mark: 10000 +``` + +## Building + +```bash +cargo build --release +``` + +## Running + +```bash +# With default config +./target/release/relay + +# With custom config +CONFIG_PATH=/path/to/config.yaml ./target/release/relay + +# With Docker +docker build -t relay . +docker run -p 5555-5559:5555-5559 relay +``` + +## Environment Variables + +- `CONFIG_PATH`: Path to config file (default: `/config/config.yaml`) +- `RUST_LOG`: Log level (default: `relay=info`) + +## Ports + +| Port | Socket Type | Direction | Purpose | +|------|------------|-----------|---------| +| 5555 | PUB | → Ingestors | Work distribution with exchange prefix | +| 5556 | ROUTER | ← Ingestors | Response collection | +| 5557 | - | (Flink) | Flink market data publication | +| 5558 | XPUB | → Clients | Market data fanout | +| 5559 | ROUTER | ← Clients | Client request handling | + +## Monitoring + +The relay logs all major events: + +``` +INFO relay: Client request routing +INFO relay: Forwarded request to ingestors: prefix=BINANCE:, request_id=... +INFO relay: Received response from ingestor: request_id=..., status=OK +INFO relay: Sent response to client: request_id=... +WARN relay: Request timed out: request_id=... +``` + +## Performance + +- **High water mark**: Configurable per socket (default: 10,000 messages) +- **Request timeout**: Automatic cleanup of expired requests (default: 30s) +- **Zero-copy proxying**: XPUB/XSUB market data forwarding +- **Async cleanup**: Background task for timeout management + +## Design Decisions + +### Why Rust? + +- **Performance**: Zero-cost abstractions, minimal overhead +- **Safety**: Memory safety without garbage collection +- **Concurrency**: Fearless concurrency with strong type system +- **ZMQ Integration**: Excellent ZMQ bindings + +### Why ROUTER for clients? + +- Preserves client identity for request/response matching +- Allows async responses (no blocking) +- Handles multiple concurrent clients efficiently + +### Why PUB for ingestor work? + +- Topic-based filtering by exchange +- Multiple ingestors can compete for same exchange +- Scales horizontally with ingestor count +- No single point of failure + +### Why XPUB/XSUB for market data? + +- Dynamic subscription management +- Efficient fanout to many clients +- Upstream subscription control +- Standard ZMQ proxy pattern + +## Troubleshooting + +### No response from ingestors + +Check: +- Ingestors are connected to port 5555 +- Ingestors have subscribed to exchange prefix +- Topic format: `EXCHANGE:` (e.g., `BINANCE:`) + +### Client timeout + +Check: +- Request timeout configuration +- Ingestor availability +- Network connectivity +- Pending requests map (logged on timeout) + +### Market data not flowing + +Check: +- Flink is publishing on port 5557 +- Relay XSUB is connected to Flink +- Clients have subscribed to correct topics +- Topic format: `{ticker}|{data_type}` + +## Testing + +Run the test client: + +```bash +cd ../test/history_client +python client.py +``` + +Expected flow: +1. Client sends request to relay:5559 +2. Relay publishes to ingestors:5555 +3. Ingestor fetches and responds to relay:5556 +4. Relay returns to client + +## Future Enhancements + +- [ ] Metrics collection (Prometheus) +- [ ] Health check endpoint +- [ ] Request rate limiting +- [ ] Circuit breaker for failed ingestors +- [ ] Request deduplication +- [ ] Response caching +- [ ] Multi-part response support for large datasets diff --git a/relay/build.rs b/relay/build.rs new file mode 100644 index 0000000..461c590 --- /dev/null +++ b/relay/build.rs @@ -0,0 +1,16 @@ +fn main() { + // Use Config to compile all protos together + // Since the proto files don't have package declarations, + // they'll all be generated into a single _.rs file + prost_build::Config::new() + .compile_protos( + &[ + "protobuf/ingestor.proto", + "protobuf/market.proto", + "protobuf/ohlc.proto", + "protobuf/tick.proto", + ], + &["protobuf/"], + ) + .unwrap_or_else(|e| panic!("Failed to compile protos: {}", e)); +} diff --git a/relay/config.example.yaml b/relay/config.example.yaml new file mode 100644 index 0000000..53e3b22 --- /dev/null +++ b/relay/config.example.yaml @@ -0,0 +1,19 @@ +# ZMQ Relay Configuration + +# Bind address for all relay sockets +bind_address: "tcp://*" + +# Client-facing ports +client_request_port: 5559 # ROUTER - Client historical data requests +market_data_pub_port: 5558 # XPUB - Market data fanout to clients + +# Ingestor-facing ports +ingestor_work_port: 5555 # PUB - Distribute work with exchange prefix +ingestor_response_port: 5556 # ROUTER - Receive responses from ingestors + +# Flink connection +flink_market_data_endpoint: "tcp://flink-jobmanager:5557" # XSUB - Subscribe to Flink market data + +# Timeouts and limits +request_timeout_secs: 30 # Timeout for pending client requests +high_water_mark: 10000 # ZMQ high water mark for all sockets diff --git a/relay/src/config.rs b/relay/src/config.rs new file mode 100644 index 0000000..14b6fb4 --- /dev/null +++ b/relay/src/config.rs @@ -0,0 +1,104 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::fs; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Config { + /// Bind address for client-facing sockets + #[serde(default = "default_bind_address")] + pub bind_address: String, + + /// Client request port (ROUTER - receives client requests) + #[serde(default = "default_client_request_port")] + pub client_request_port: u16, + + /// Market data publication port (XPUB - clients subscribe here) + #[serde(default = "default_market_data_pub_port")] + pub market_data_pub_port: u16, + + /// Ingestor work queue port (PUB - publish work with exchange prefix) + #[serde(default = "default_ingestor_work_port")] + pub ingestor_work_port: u16, + + /// Ingestor response port (ROUTER - receives responses from ingestors) + #[serde(default = "default_ingestor_response_port")] + pub ingestor_response_port: u16, + + /// Flink market data endpoint (XSUB - relay subscribes to Flink) + #[serde(default = "default_flink_market_data_endpoint")] + pub flink_market_data_endpoint: String, + + /// Request timeout in seconds + #[serde(default = "default_request_timeout_secs")] + pub request_timeout_secs: u64, + + /// High water mark for sockets + #[serde(default = "default_hwm")] + pub high_water_mark: i32, +} + +fn default_bind_address() -> String { + "tcp://*".to_string() +} + +fn default_client_request_port() -> u16 { + 5559 +} + +fn default_market_data_pub_port() -> u16 { + 5558 +} + +fn default_ingestor_work_port() -> u16 { + 5555 +} + +fn default_ingestor_response_port() -> u16 { + 5556 +} + +fn default_flink_market_data_endpoint() -> String { + "tcp://flink-jobmanager:5557".to_string() +} + +fn default_request_timeout_secs() -> u64 { + 30 +} + +fn default_hwm() -> i32 { + 10000 +} + +impl Default for Config { + fn default() -> Self { + Self { + bind_address: default_bind_address(), + client_request_port: default_client_request_port(), + market_data_pub_port: default_market_data_pub_port(), + ingestor_work_port: default_ingestor_work_port(), + ingestor_response_port: default_ingestor_response_port(), + flink_market_data_endpoint: default_flink_market_data_endpoint(), + request_timeout_secs: default_request_timeout_secs(), + high_water_mark: default_hwm(), + } + } +} + +impl Config { + pub fn from_file(path: &str) -> Result { + let contents = fs::read_to_string(path)?; + let config: Config = serde_yaml::from_str(&contents)?; + Ok(config) + } + + pub fn from_env() -> Result { + let config_path = std::env::var("CONFIG_PATH") + .unwrap_or_else(|_| "/config/config.yaml".to_string()); + + if std::path::Path::new(&config_path).exists() { + Self::from_file(&config_path) + } else { + Ok(Self::default()) + } + } +} diff --git a/relay/src/main.rs b/relay/src/main.rs new file mode 100644 index 0000000..3cd9279 --- /dev/null +++ b/relay/src/main.rs @@ -0,0 +1,47 @@ +mod config; +mod relay; +mod proto; + +use anyhow::Result; +use config::Config; +use relay::Relay; +use tracing::{info, error}; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + +#[tokio::main] +async fn main() -> Result<()> { + // Initialize tracing + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "relay=info".into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + info!("Starting Stateless ZMQ Relay Gateway"); + info!("Architecture: Async event-driven with pub/sub notifications"); + + // Load configuration + let config = Config::from_env()?; + info!("Configuration loaded: {:?}", config); + + // Create and run stateless relay + let relay = Relay::new(config)?; + + // Handle shutdown signals + tokio::select! { + result = relay.run() => { + match result { + Ok(_) => info!("Relay stopped gracefully"), + Err(e) => error!("Relay error: {}", e), + } + } + _ = tokio::signal::ctrl_c() => { + info!("Received shutdown signal"); + } + } + + info!("ZMQ Relay Gateway stopped"); + Ok(()) +} diff --git a/relay/src/proto.rs b/relay/src/proto.rs new file mode 100644 index 0000000..52fdf31 --- /dev/null +++ b/relay/src/proto.rs @@ -0,0 +1,3 @@ +// Include generated protobuf code from build.rs +// Since proto files have no package declaration, they're all in _.rs +include!(concat!(env!("OUT_DIR"), "/_.rs")); diff --git a/relay/src/relay.rs b/relay/src/relay.rs new file mode 100644 index 0000000..c9437b6 --- /dev/null +++ b/relay/src/relay.rs @@ -0,0 +1,323 @@ +use crate::config::Config; +use crate::proto; +use anyhow::{Context, Result}; +use prost::Message; +use tracing::{debug, error, info, warn}; + +const PROTOCOL_VERSION: u8 = 0x01; +const MSG_TYPE_SUBMIT_REQUEST: u8 = 0x10; +const MSG_TYPE_SUBMIT_RESPONSE: u8 = 0x11; +const MSG_TYPE_DATA_REQUEST: u8 = 0x01; +const MSG_TYPE_HISTORY_READY: u8 = 0x12; + +pub struct Relay { + config: Config, + context: zmq::Context, +} + +impl Relay { + pub fn new(config: Config) -> Result { + let context = zmq::Context::new(); + + Ok(Self { + config, + context, + }) + } + + pub async fn run(self) -> Result<()> { + info!("Initializing Stateless ZMQ Relay"); + + // Bind sockets + let client_request_socket = self.create_client_request_socket()?; + let market_data_frontend = self.create_market_data_frontend()?; + let market_data_backend = self.create_market_data_backend()?; + let ingestor_work_socket = self.create_ingestor_work_socket()?; + + info!("All sockets initialized successfully - relay is STATELESS"); + info!("No pending requests tracked - all async via pub/sub"); + + // Run main loop + tokio::task::spawn_blocking(move || { + Self::proxy_loop( + client_request_socket, + market_data_frontend, + market_data_backend, + ingestor_work_socket, + ) + }) + .await? + } + + fn create_client_request_socket(&self) -> Result { + let socket = self.context.socket(zmq::ROUTER)?; + socket.set_sndhwm(self.config.high_water_mark)?; + socket.set_rcvhwm(self.config.high_water_mark)?; + socket.set_linger(1000)?; + + let endpoint = format!("{}:{}", self.config.bind_address, self.config.client_request_port); + socket.bind(&endpoint)?; + info!("Client request socket (ROUTER) bound to {}", endpoint); + info!(" → Accepts SubmitHistoricalRequest, returns SubmitResponse immediately"); + + Ok(socket) + } + + fn create_market_data_frontend(&self) -> Result { + let socket = self.context.socket(zmq::XPUB)?; + socket.set_sndhwm(self.config.high_water_mark)?; + socket.set_xpub_verbose(true)?; + + let endpoint = format!("{}:{}", self.config.bind_address, self.config.market_data_pub_port); + socket.bind(&endpoint)?; + info!("Market data frontend (XPUB) bound to {}", endpoint); + info!(" → Clients subscribe here for HistoryReadyNotification and market data"); + + Ok(socket) + } + + fn create_market_data_backend(&self) -> Result { + let socket = self.context.socket(zmq::XSUB)?; + socket.set_rcvhwm(self.config.high_water_mark)?; + + socket.connect(&self.config.flink_market_data_endpoint)?; + info!("Market data backend (XSUB) connected to {}", self.config.flink_market_data_endpoint); + info!(" → Receives HistoryReadyNotification and market data from Flink"); + + Ok(socket) + } + + fn create_ingestor_work_socket(&self) -> Result { + let socket = self.context.socket(zmq::PUB)?; + socket.set_sndhwm(self.config.high_water_mark)?; + socket.set_linger(1000)?; + + let endpoint = format!("{}:{}", self.config.bind_address, self.config.ingestor_work_port); + socket.bind(&endpoint)?; + info!("Ingestor work queue (PUB) bound to {}", endpoint); + info!(" → Publishes DataRequest with exchange prefix"); + + Ok(socket) + } + + fn proxy_loop( + client_request_socket: zmq::Socket, + market_data_frontend: zmq::Socket, + market_data_backend: zmq::Socket, + ingestor_work_socket: zmq::Socket, + ) -> Result<()> { + let mut items = [ + client_request_socket.as_poll_item(zmq::POLLIN), + market_data_frontend.as_poll_item(zmq::POLLIN), + market_data_backend.as_poll_item(zmq::POLLIN), + ]; + + info!("Entering stateless proxy loop"); + + loop { + // Poll with 100ms timeout + zmq::poll(&mut items, 100) + .context("Failed to poll sockets")?; + + // Handle client request submissions + if items[0].is_readable() { + if let Err(e) = Self::handle_client_submission( + &client_request_socket, + &ingestor_work_socket, + ) { + error!("Error handling client submission: {}", e); + } + } + + // Handle market data subscriptions from clients (XPUB → XSUB) + if items[1].is_readable() { + if let Err(e) = Self::proxy_subscription(&market_data_frontend, &market_data_backend) { + error!("Error proxying subscription: {}", e); + } + } + + // Handle market data from Flink (XSUB → XPUB) + // This includes HistoryReadyNotification and regular market data + if items[2].is_readable() { + if let Err(e) = Self::proxy_market_data(&market_data_backend, &market_data_frontend) { + error!("Error proxying market data: {}", e); + } + } + } + } + + fn handle_client_submission( + client_socket: &zmq::Socket, + ingestor_socket: &zmq::Socket, + ) -> Result<()> { + // Receive from client: [identity][empty][version][message] + let identity = client_socket.recv_bytes(0)?; + let _empty = client_socket.recv_bytes(0)?; + let version_frame = client_socket.recv_bytes(0)?; + let message_frame = client_socket.recv_bytes(0)?; + + if version_frame.len() != 1 || version_frame[0] != PROTOCOL_VERSION { + warn!("Invalid protocol version from client"); + return Ok(()); + } + + if message_frame.is_empty() { + warn!("Empty message frame from client"); + return Ok(()); + } + + let msg_type = message_frame[0]; + let payload = &message_frame[1..]; + + debug!("Received client submission: type=0x{:02x}, payload_len={}", msg_type, payload.len()); + + match msg_type { + MSG_TYPE_SUBMIT_REQUEST => { + Self::handle_submit_request( + identity, + payload, + client_socket, + ingestor_socket, + )?; + } + _ => { + warn!("Unknown message type from client: 0x{:02x}", msg_type); + } + } + + Ok(()) + } + + fn handle_submit_request( + client_identity: Vec, + payload: &[u8], + client_socket: &zmq::Socket, + ingestor_socket: &zmq::Socket, + ) -> Result<()> { + // Parse protobuf request + let request = proto::SubmitHistoricalRequest::decode(payload) + .context("Failed to parse SubmitHistoricalRequest")?; + + let request_id = request.request_id.clone(); + let ticker = request.ticker.clone(); + let client_id = request.client_id.clone(); + + info!("Handling request submission: request_id={}, ticker={}, client_id={:?}", + request_id, ticker, client_id); + + // Extract exchange prefix from ticker + let exchange_prefix = ticker.split(':').next() + .map(|s| format!("{}:", s)) + .unwrap_or_else(|| String::from("")); + + if exchange_prefix.is_empty() { + warn!("Ticker '{}' missing exchange prefix", ticker); + } + + // Build DataRequest protobuf for ingestors + let data_request = proto::DataRequest { + request_id: request_id.clone(), + r#type: proto::data_request::RequestType::HistoricalOhlc as i32, + ticker: ticker.clone(), + historical: Some(proto::HistoricalParams { + start_time: request.start_time, + end_time: request.end_time, + period_seconds: request.period_seconds, + limit: request.limit, + }), + realtime: None, + client_id: client_id.clone(), + }; + + let mut data_request_bytes = Vec::new(); + data_request.encode(&mut data_request_bytes)?; + + // Publish to ingestors with exchange prefix + let version_frame = vec![PROTOCOL_VERSION]; + let mut message_frame = vec![MSG_TYPE_DATA_REQUEST]; + message_frame.extend_from_slice(&data_request_bytes); + + ingestor_socket.send(&exchange_prefix, zmq::SNDMORE)?; + ingestor_socket.send(&version_frame, zmq::SNDMORE)?; + ingestor_socket.send(&message_frame, 0)?; + + info!("Published to ingestors: prefix={}, request_id={}", exchange_prefix, request_id); + + // Build SubmitResponse protobuf + // NOTE: This topic is DETERMINISTIC based on client-generated values. + // Client should have already subscribed to this topic BEFORE sending the request + // to prevent race condition where notification arrives before client subscribes. + let notification_topic = if let Some(cid) = &client_id { + format!("RESPONSE:{}", cid) + } else { + format!("HISTORY_READY:{}", request_id) + }; + + let response = proto::SubmitResponse { + request_id: request_id.clone(), + status: proto::submit_response::SubmitStatus::Queued as i32, + error_message: None, + notification_topic: notification_topic.clone(), + }; + + let mut response_bytes = Vec::new(); + response.encode(&mut response_bytes)?; + + // Send immediate response to client + let version_frame = vec![PROTOCOL_VERSION]; + let mut message_frame = vec![MSG_TYPE_SUBMIT_RESPONSE]; + message_frame.extend_from_slice(&response_bytes); + + client_socket.send(&client_identity, zmq::SNDMORE)?; + client_socket.send(&[] as &[u8], zmq::SNDMORE)?; + client_socket.send(&version_frame, zmq::SNDMORE)?; + client_socket.send(&message_frame, 0)?; + + info!("Sent SubmitResponse to client: request_id={}, topic={}", request_id, notification_topic); + + // Relay is now DONE with this request - completely stateless! + // Client will receive notification via pub/sub when Flink publishes HistoryReadyNotification + + Ok(()) + } + + fn proxy_subscription( + frontend: &zmq::Socket, + backend: &zmq::Socket, + ) -> Result<()> { + // Forward subscription message from XPUB to XSUB + let msg = frontend.recv_bytes(0)?; + backend.send(&msg, 0)?; + + if !msg.is_empty() { + let action = if msg[0] == 1 { "subscribe" } else { "unsubscribe" }; + let topic = String::from_utf8_lossy(&msg[1..]); + debug!("Client {} to topic: {}", action, topic); + } + + Ok(()) + } + + fn proxy_market_data( + backend: &zmq::Socket, + frontend: &zmq::Socket, + ) -> Result<()> { + // Forward all messages from XSUB to XPUB (zero-copy proxy) + // This includes: + // - Regular market data (ticks, OHLC) + // - HistoryReadyNotification from Flink + loop { + let msg = backend.recv_bytes(0)?; + let more = backend.get_rcvmore()?; + + if more { + frontend.send(&msg, zmq::SNDMORE)?; + } else { + frontend.send(&msg, 0)?; + break; + } + } + + Ok(()) + } +} diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..90029b5 --- /dev/null +++ b/test/README.md @@ -0,0 +1,109 @@ +# Test Clients + +Test clients for the DexOrder trading system. + +## History Client + +Tests the historical OHLC data request/response pattern between clients, Flink, and ingestors. + +### Quick Start + +```bash +cd history_client +./run-test.sh +``` + +This will: +1. Start all required services (Kafka, Flink, Ingestor) +2. Wait for services to initialize +3. Run the test client to query historical data +4. Display the results + +### What it tests + +- **ZMQ Communication**: Client → Flink REQ/REP pattern (port 5559) +- **Work Distribution**: Flink → Ingestor PUB/SUB with exchange prefix filtering (port 5555) +- **Response Channel**: Ingestor → Flink DEALER/ROUTER pattern (port 5556) +- **Data Flow**: Request → Ingestor fetches data → Response back to Flink → Response to client + +### Expected Flow + +1. **Client** sends OHLCRequest to Flink (REQ/REP) + - Ticker: `BINANCE:BTC/USDT` + - Period: 3600s (1 hour) + - Range: Jan 1-7, 2026 + +2. **Flink** publishes DataRequest to ingestor work queue (PUB/SUB) + - Topic prefix: `BINANCE:` + - Any ingestor subscribed to BINANCE can respond + +3. **Ingestor** receives request, fetches data, sends back response + - Uses CCXT to fetch from exchange + - Sends DataResponse via DEALER socket + - Also writes to Kafka for Flink processing + +4. **Flink** receives response, sends back to client + - Matches response by request_id + - Returns data or error to waiting client + +### Manual Testing + +Run the Python client directly: + +```bash +cd history_client +pip install pyzmq +python client.py +``` + +Edit `client.py` to customize: +- Flink hostname and port +- Ticker symbol +- Time range +- Period (e.g., 3600 for 1h, 86400 for 1d) + +## Docker Compose Profiles + +The test client uses a Docker Compose profile to avoid starting automatically: + +```bash +# Start all services +docker-compose up -d + +# Run test client +docker-compose --profile test up history-test-client + +# Or start everything including test +docker-compose --profile test up +``` + +## Troubleshooting + +### Service logs + +```bash +docker-compose logs -f ingestor +docker-compose logs -f flink-jobmanager +``` + +### Check ZMQ ports + +```bash +# From inside Flink container +netstat -tlnp | grep 555 +``` + +### Verify ingestor subscriptions + +Check ingestor logs for: +``` +Subscribed to exchange prefix: BINANCE: +Subscribed to exchange prefix: COINBASE: +``` + +### Test without Docker + +1. Start Kafka: `docker-compose up -d kafka` +2. Build and run Flink app locally +3. Run ingestor: `cd ingestor && npm start` +4. Run test: `cd test/history_client && python client.py` diff --git a/test/history_client/Dockerfile b/test/history_client/Dockerfile new file mode 100644 index 0000000..d1ed774 --- /dev/null +++ b/test/history_client/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies for the OHLCClient library +RUN pip install --no-cache-dir \ + pyzmq \ + protobuf>=4.25.0 \ + pyiceberg>=0.6.0 \ + pyarrow>=14.0.0 \ + pandas>=2.0.0 \ + pyyaml>=6.0 + +# Copy test scripts +COPY client.py . +COPY client_async.py . +COPY client_ohlc_api.py . + +# Make them executable +RUN chmod +x *.py + +# Default command uses the new OHLCClient-based test +CMD ["python", "client_ohlc_api.py"] diff --git a/test/history_client/README.md b/test/history_client/README.md new file mode 100644 index 0000000..6fc4415 --- /dev/null +++ b/test/history_client/README.md @@ -0,0 +1,46 @@ +# Historical Data Test Client + +Simple ZMQ client to test historical OHLC data retrieval from Flink. + +## Usage + +### Run with Docker Compose + +The client is included in the docker-compose.yml. To run it: + +```bash +cd redesign +docker-compose up history-test-client +``` + +### Run locally + +```bash +pip install pyzmq +python client.py +``` + +## What it does + +1. Connects to Flink's client request endpoint (REQ/REP on port 5559) +2. Requests 1-hour OHLC candles for BINANCE:BTC/USDT +3. Time range: January 1-7, 2026 (168 candles) +4. Waits for Flink to respond (up to 30 seconds) +5. Displays the response status and sample data + +## Protocol + +Uses the ZMQ message format: +- Frame 1: Protocol version byte (0x01) +- Frame 2: Message type (0x07 = OHLCRequest) + protobuf payload + +Expected response: +- Frame 1: Protocol version byte (0x01) +- Frame 2: Message type (0x08 = Response) + protobuf payload + +## Configuration + +Edit `client.py` to change: +- `flink_host`: Flink hostname (default: 'localhost') +- `client_request_port`: Port number (default: 5559) +- Query parameters: ticker, time range, period, limit diff --git a/test/history_client/client.py b/test/history_client/client.py new file mode 100644 index 0000000..3b3c2bf --- /dev/null +++ b/test/history_client/client.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Simple ZMQ client to query historical OHLC data via the Relay gateway. +Tests the request-response pattern for historical data retrieval. +""" + +import zmq +import struct +import json +import time +from datetime import datetime, timezone + +# Protocol constants +PROTOCOL_VERSION = 0x01 +MSG_TYPE_OHLC_REQUEST = 0x07 +MSG_TYPE_RESPONSE = 0x08 + +class HistoryClient: + def __init__(self, relay_host='relay', client_request_port=5559): + self.context = zmq.Context() + self.socket = None + self.relay_endpoint = f"tcp://{relay_host}:{client_request_port}" + + def connect(self): + """Connect to Relay's client request endpoint (REQ/REP)""" + self.socket = self.context.socket(zmq.REQ) + self.socket.connect(self.relay_endpoint) + print(f"Connected to Relay at {self.relay_endpoint}") + + def request_historical_ohlc(self, ticker, start_time, end_time, period_seconds, limit=None): + """ + Request historical OHLC data via Relay. + + Args: + ticker: Market identifier (e.g., "BINANCE:BTC/USDT") + start_time: Start timestamp in microseconds since epoch + end_time: End timestamp in microseconds since epoch + period_seconds: OHLC period in seconds (e.g., 3600 for 1h) + limit: Optional limit on number of candles + + Returns: + Response dict with status, data, etc. + """ + request_id = f"test-{int(time.time() * 1000)}" + + # Build OHLCRequest message (simplified - would use protobuf in production) + request = { + 'request_id': request_id, + 'ticker': ticker, + 'start_time': start_time, + 'end_time': end_time, + 'period_seconds': period_seconds + } + + if limit: + request['limit'] = limit + + print(f"\n=== Sending OHLCRequest ===") + print(f"Request ID: {request_id}") + print(f"Ticker: {ticker}") + print(f"Period: {period_seconds}s ({period_seconds // 3600}h)") + print(f"Start: {datetime.fromtimestamp(start_time / 1_000_000, tz=timezone.utc).isoformat()}") + print(f"End: {datetime.fromtimestamp(end_time / 1_000_000, tz=timezone.utc).isoformat()}") + if limit: + print(f"Limit: {limit}") + + # Encode request (placeholder - would use actual protobuf) + request_data = json.dumps(request).encode('utf-8') + + # Send message: [version byte] [type byte + data] + version_frame = struct.pack('B', PROTOCOL_VERSION) + message_frame = struct.pack('B', MSG_TYPE_OHLC_REQUEST) + request_data + + self.socket.send_multipart([version_frame, message_frame]) + print("\n⏳ Waiting for response via Relay...") + + # Receive response with timeout + if self.socket.poll(30000): # 30 second timeout + response_frames = self.socket.recv_multipart() + return self._parse_response(response_frames) + else: + print("❌ Request timed out (30s)") + return None + + def _parse_response(self, frames): + """Parse response frames via Relay""" + if len(frames) != 2: + print(f"❌ Invalid response: expected 2 frames, got {len(frames)}") + return None + + version_frame = frames[0] + message_frame = frames[1] + + if len(version_frame) != 1: + print(f"❌ Invalid version frame length: {len(version_frame)}") + return None + + version = struct.unpack('B', version_frame)[0] + if version != PROTOCOL_VERSION: + print(f"❌ Unsupported protocol version: {version}") + return None + + if len(message_frame) < 1: + print(f"❌ Invalid message frame length: {len(message_frame)}") + return None + + msg_type = message_frame[0] + msg_data = message_frame[1:] + + print(f"\n=== Received Response ===") + print(f"Protocol version: {version}") + print(f"Message type: 0x{msg_type:02x}") + + if msg_type != MSG_TYPE_RESPONSE: + print(f"❌ Unexpected message type: expected 0x{MSG_TYPE_RESPONSE:02x}, got 0x{msg_type:02x}") + return None + + # Parse response (placeholder - would use actual protobuf) + try: + response = json.loads(msg_data.decode('utf-8')) + + print(f"Request ID: {response.get('request_id', 'N/A')}") + print(f"Status: {response.get('status', 'UNKNOWN')}") + + if response.get('error_message'): + print(f"Error: {response['error_message']}") + + data = response.get('data', []) + total_records = response.get('total_records', len(data)) + + print(f"Total records: {total_records}") + print(f"Is final: {response.get('is_final', True)}") + + if data and len(data) > 0: + print(f"\n📊 Sample data (first 3 records):") + for i, record in enumerate(data[:3]): + print(f" {i+1}. {record}") + + return response + + except json.JSONDecodeError as e: + print(f"❌ Failed to parse response JSON: {e}") + print(f"Raw data: {msg_data[:100]}...") + return None + + def close(self): + """Close the connection""" + if self.socket: + self.socket.close() + self.context.term() + print("\n🔌 Connection closed") + + +def main(): + """Test the historical data request""" + + # Create client + client = HistoryClient(relay_host='relay', client_request_port=5559) + + try: + # Connect to Relay + client.connect() + + # Request BINANCE:BTC/USDT 1h candles for first 7 days of January 2026 + # January 1, 2026 00:00:00 UTC = 1735689600 seconds = 1735689600000000 microseconds + # January 7, 2026 23:59:59 UTC = 1736294399 seconds = 1736294399000000 microseconds + + start_time_us = 1735689600 * 1_000_000 # Jan 1, 2026 00:00:00 UTC + end_time_us = 1736294399 * 1_000_000 # Jan 7, 2026 23:59:59 UTC + + response = client.request_historical_ohlc( + ticker='BINANCE:BTC/USDT', + start_time=start_time_us, + end_time=end_time_us, + period_seconds=3600, # 1 hour + limit=168 # 7 days * 24 hours = 168 candles + ) + + if response: + print("\n✅ Request completed successfully!") + status = response.get('status', 'UNKNOWN') + if status == 'OK': + print(f"📈 Received {response.get('total_records', 0)} candles") + else: + print(f"⚠️ Request status: {status}") + else: + print("\n❌ Request failed!") + + except KeyboardInterrupt: + print("\n\n⚠️ Interrupted by user") + except Exception as e: + print(f"\n❌ Error: {e}") + import traceback + traceback.print_exc() + finally: + client.close() + + +if __name__ == '__main__': + main() diff --git a/test/history_client/client_async.py b/test/history_client/client_async.py new file mode 100644 index 0000000..1dfd60d --- /dev/null +++ b/test/history_client/client_async.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +""" +Async ZMQ client for historical OHLC data requests via Relay gateway. +Uses async pub/sub pattern: submit request → wait for notification → query Iceberg +""" + +import zmq +import struct +import json +import time +import uuid +from datetime import datetime, timezone + +# Protocol constants +PROTOCOL_VERSION = 0x01 +MSG_TYPE_SUBMIT_REQUEST = 0x10 +MSG_TYPE_SUBMIT_RESPONSE = 0x11 +MSG_TYPE_HISTORY_READY = 0x12 + +class AsyncHistoryClient: + def __init__(self, relay_host='relay', request_port=5559, data_port=5558): + self.context = zmq.Context() + self.request_socket = None + self.subscribe_socket = None + self.relay_endpoint_req = f"tcp://{relay_host}:{request_port}" + self.relay_endpoint_sub = f"tcp://{relay_host}:{data_port}" + self.client_id = f"client-{uuid.uuid4().hex[:8]}" + + def connect(self): + """Connect to Relay endpoints""" + # REQ socket for submitting requests (gets immediate ack) + self.request_socket = self.context.socket(zmq.REQ) + self.request_socket.connect(self.relay_endpoint_req) + print(f"Connected REQ socket to Relay at {self.relay_endpoint_req}") + + # SUB socket for receiving notifications + self.subscribe_socket = self.context.socket(zmq.SUB) + self.subscribe_socket.connect(self.relay_endpoint_sub) + + # CRITICAL: Subscribe to our client-specific response topic BEFORE submitting any requests + # This prevents race condition where notification arrives before we subscribe. + # The notification topic is deterministic: RESPONSE:{client_id} (we generate client_id) + response_topic = f"RESPONSE:{self.client_id}" + self.subscribe_socket.subscribe(response_topic.encode()) + print(f"Connected SUB socket to Relay at {self.relay_endpoint_sub}") + print(f"Subscribed to topic: {response_topic}") + print(f"✓ Safe to submit requests - already subscribed to notifications") + + def request_historical_ohlc(self, ticker, start_time, end_time, period_seconds, limit=None, timeout_secs=60): + """ + Request historical OHLC data (async pattern). + + Flow: + 1. Submit request → get immediate ack with request_id + 2. Wait for HistoryReadyNotification on pub/sub + 3. Query Iceberg with the table information (or notification includes data) + + Args: + ticker: Market identifier (e.g., "BINANCE:BTC/USDT") + start_time: Start timestamp in microseconds since epoch + end_time: End timestamp in microseconds since epoch + period_seconds: OHLC period in seconds (e.g., 3600 for 1h) + limit: Optional limit on number of candles + timeout_secs: How long to wait for notification (default 60s) + + Returns: + Notification dict or None on timeout + """ + # Generate request ID + request_id = f"{self.client_id}-{int(time.time() * 1000)}" + + # Build SubmitHistoricalRequest + request = { + 'request_id': request_id, + 'ticker': ticker, + 'start_time': start_time, + 'end_time': end_time, + 'period_seconds': period_seconds, + 'client_id': self.client_id, # For response routing + } + + if limit: + request['limit'] = limit + + print(f"\n=== Step 1: Submitting Request ===") + print(f"Request ID: {request_id}") + print(f"Ticker: {ticker}") + print(f"Period: {period_seconds}s ({period_seconds // 3600}h)") + print(f"Start: {datetime.fromtimestamp(start_time / 1_000_000, tz=timezone.utc).isoformat()}") + print(f"End: {datetime.fromtimestamp(end_time / 1_000_000, tz=timezone.utc).isoformat()}") + print(f"Client ID: {self.client_id}") + if limit: + print(f"Limit: {limit}") + + # Encode request + request_data = json.dumps(request).encode('utf-8') + + # Send: [version byte] [type byte + data] + version_frame = struct.pack('B', PROTOCOL_VERSION) + message_frame = struct.pack('B', MSG_TYPE_SUBMIT_REQUEST) + request_data + + self.request_socket.send_multipart([version_frame, message_frame]) + + # Receive immediate SubmitResponse + if self.request_socket.poll(5000): # 5 second timeout for ack + response_frames = self.request_socket.recv_multipart() + submit_response = self._parse_submit_response(response_frames) + + if not submit_response or submit_response.get('status') != 'QUEUED': + print(f"❌ Request submission failed: {submit_response}") + return None + + print(f"\n✅ Request queued successfully") + print(f"Notification topic: {submit_response.get('notification_topic')}") + else: + print("❌ Timeout waiting for submit response") + return None + + # Step 2: Wait for HistoryReadyNotification + print(f"\n=== Step 2: Waiting for Notification ===") + print(f"⏳ Waiting up to {timeout_secs}s for HistoryReadyNotification...") + print(f" (Ingestor fetches → Kafka → Flink → Iceberg → Notification)") + + if self.subscribe_socket.poll(timeout_secs * 1000): + notification_frames = self.subscribe_socket.recv_multipart() + notification = self._parse_history_ready(notification_frames) + + if notification: + print(f"\n=== Step 3: Notification Received ===") + return notification + else: + print("❌ Failed to parse notification") + return None + else: + print(f"\n❌ Timeout waiting for notification ({timeout_secs}s)") + print(" Possible reasons:") + print(" - Ingestor still fetching data from exchange") + print(" - Flink still processing Kafka stream") + print(" - Flink writing to Iceberg") + return None + + def _parse_submit_response(self, frames): + """Parse SubmitResponse from relay""" + if len(frames) != 2: + print(f"❌ Invalid submit response: expected 2 frames, got {len(frames)}") + return None + + version_frame = frames[0] + message_frame = frames[1] + + if len(version_frame) != 1: + return None + + version = struct.unpack('B', version_frame)[0] + if version != PROTOCOL_VERSION: + print(f"❌ Unsupported protocol version: {version}") + return None + + if len(message_frame) < 1: + return None + + msg_type = message_frame[0] + msg_data = message_frame[1:] + + if msg_type != MSG_TYPE_SUBMIT_RESPONSE: + print(f"❌ Unexpected message type: 0x{msg_type:02x}") + return None + + try: + response = json.loads(msg_data.decode('utf-8')) + return response + except json.JSONDecodeError as e: + print(f"❌ Failed to parse response: {e}") + return None + + def _parse_history_ready(self, frames): + """Parse HistoryReadyNotification from Flink via relay""" + # Topic frame + message frames + if len(frames) < 2: + print(f"❌ Invalid notification: expected at least 2 frames, got {len(frames)}") + return None + + topic_frame = frames[0] + + # Find version and message frames (may have multiple frames) + # Typically: [topic][version][message] + if len(frames) == 3: + version_frame = frames[1] + message_frame = frames[2] + else: + # Handle multi-part message + version_frame = frames[1] + message_frame = frames[2] + + topic = topic_frame.decode('utf-8') + print(f"📬 Received on topic: {topic}") + + if len(version_frame) != 1: + print(f"❌ Invalid version frame") + return None + + version = struct.unpack('B', version_frame)[0] + if version != PROTOCOL_VERSION: + print(f"❌ Unsupported protocol version: {version}") + return None + + if len(message_frame) < 1: + print(f"❌ Empty message frame") + return None + + msg_type = message_frame[0] + msg_data = message_frame[1:] + + print(f"Message type: 0x{msg_type:02x}") + + if msg_type != MSG_TYPE_HISTORY_READY: + print(f"⚠️ Unexpected message type: expected 0x{MSG_TYPE_HISTORY_READY:02x}, got 0x{msg_type:02x}") + + try: + notification = json.loads(msg_data.decode('utf-8')) + + print(f"\nRequest ID: {notification.get('request_id')}") + print(f"Status: {notification.get('status')}") + print(f"Ticker: {notification.get('ticker')}") + print(f"Period: {notification.get('period_seconds')}s") + + if notification.get('error_message'): + print(f"❌ Error: {notification['error_message']}") + + if notification.get('status') == 'OK': + print(f"✅ Data ready in Iceberg") + print(f" Namespace: {notification.get('iceberg_namespace', 'N/A')}") + print(f" Table: {notification.get('iceberg_table', 'N/A')}") + print(f" Row count: {notification.get('row_count', 0)}") + completed_at = notification.get('completed_at') + if completed_at: + ts = datetime.fromtimestamp(completed_at / 1_000_000, tz=timezone.utc) + print(f" Completed at: {ts.isoformat()}") + + return notification + + except json.JSONDecodeError as e: + print(f"❌ Failed to parse notification: {e}") + print(f"Raw data: {msg_data[:200]}...") + return None + + def close(self): + """Close connections""" + if self.request_socket: + self.request_socket.close() + if self.subscribe_socket: + self.subscribe_socket.close() + self.context.term() + print("\n🔌 Connection closed") + + +def main(): + """Test the async historical data request pattern""" + + client = AsyncHistoryClient(relay_host='relay', request_port=5559, data_port=5558) + + try: + # Connect + client.connect() + + # Request BINANCE:BTC/USDT 1h candles for first 7 days of January 2026 + start_time_us = 1735689600 * 1_000_000 # Jan 1, 2026 00:00:00 UTC + end_time_us = 1736294399 * 1_000_000 # Jan 7, 2026 23:59:59 UTC + + notification = client.request_historical_ohlc( + ticker='BINANCE:BTC/USDT', + start_time=start_time_us, + end_time=end_time_us, + period_seconds=3600, # 1 hour + limit=168, # 7 days * 24 hours + timeout_secs=60 + ) + + if notification: + status = notification.get('status') + if status == 'OK': + print(f"\n🎉 Success! Data is ready in Iceberg") + print(f"📊 Query Iceberg to retrieve {notification.get('row_count', 0)} records") + print(f"\nNext steps:") + print(f" 1. Connect to Iceberg") + print(f" 2. Query table: {notification.get('iceberg_table')}") + print(f" 3. Filter by time range and ticker") + elif status == 'NOT_FOUND': + print(f"\n⚠️ No data found for the requested period") + elif status == 'ERROR': + print(f"\n❌ Error: {notification.get('error_message')}") + elif status == 'TIMEOUT': + print(f"\n⏱️ Request timed out on server side") + else: + print("\n❌ Request failed or timed out") + + except KeyboardInterrupt: + print("\n\n⚠️ Interrupted by user") + except Exception as e: + print(f"\n❌ Error: {e}") + import traceback + traceback.print_exc() + finally: + client.close() + + +if __name__ == '__main__': + main() diff --git a/test/history_client/client_ohlc_api.py b/test/history_client/client_ohlc_api.py new file mode 100755 index 0000000..0a30929 --- /dev/null +++ b/test/history_client/client_ohlc_api.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +""" +Simple test client using the high-level OHLCClient API. +Demonstrates smart caching - checks Iceberg first, requests missing data automatically. +""" + +import asyncio +import sys +import os +from datetime import datetime, timezone + +# Add client library to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../client-py')) + +from dexorder import OHLCClient + + +async def main(): + """ + Test the high-level OHLC client API with smart caching. + """ + + print("=== DexOrder OHLC Client Test ===\n") + + # Initialize client + client = OHLCClient( + iceberg_catalog_uri="http://localhost:8181", + relay_endpoint="tcp://localhost:5559", # Client request port + notification_endpoint="tcp://localhost:5558", # Market data pub port + namespace="trading", + s3_endpoint="http://localhost:9000", # Port-forwarded MinIO + s3_access_key="minio", + s3_secret_key="minio123", + ) + + try: + # Start background notification listener + await client.start() + print("✅ Client started\n") + + # Request parameters + ticker = "BINANCE:BTC/USDT" + period_seconds = 3600 # 1-hour candles + + # Request 7 days of data (Jan 1-7, 2026) + start_time_us = 1735689600 * 1_000_000 # Jan 1, 2026 00:00:00 UTC + end_time_us = 1736294399 * 1_000_000 # Jan 7, 2026 23:59:59 UTC + + start_dt = datetime.fromtimestamp(start_time_us / 1_000_000, tz=timezone.utc) + end_dt = datetime.fromtimestamp(end_time_us / 1_000_000, tz=timezone.utc) + + print(f"Requesting data:") + print(f" Ticker: {ticker}") + print(f" Period: {period_seconds}s ({period_seconds // 3600}h)") + print(f" Start: {start_dt.isoformat()}") + print(f" End: {end_dt.isoformat()}") + print(f" Expected candles: ~{(end_time_us - start_time_us) // (period_seconds * 1_000_000)}") + print() + + # Fetch OHLC data (automatically handles caching) + print("⏳ Fetching data (checking cache, requesting if needed)...\n") + + df = await client.fetch_ohlc( + ticker=ticker, + period_seconds=period_seconds, + start_time=start_time_us, + end_time=end_time_us, + request_timeout=60.0 + ) + + # Display results + print(f"✅ Success! Fetched {len(df)} candles\n") + + if not df.empty: + print("First 5 candles:") + print(df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].head()) + print() + + print("Last 5 candles:") + print(df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].tail()) + print() + + # Data quality check + expected_count = (end_time_us - start_time_us) // (period_seconds * 1_000_000) + actual_count = len(df) + coverage = (actual_count / expected_count) * 100 if expected_count > 0 else 0 + + print(f"Data coverage: {coverage:.1f}% ({actual_count}/{expected_count} candles)") + + if coverage < 100: + print(f"⚠️ Missing {expected_count - actual_count} candles") + else: + print("✅ Complete data coverage") + + else: + print("⚠️ No data returned") + + except asyncio.TimeoutError: + print("\n❌ Request timed out") + print("Possible reasons:") + print(" - Ingestor still fetching from exchange") + print(" - Flink processing backlog") + print(" - Network issues") + + except ValueError as e: + print(f"\n❌ Request failed: {e}") + + except ConnectionError as e: + print(f"\n❌ Connection error: {e}") + print("Make sure relay and Flink are running") + + except KeyboardInterrupt: + print("\n\n⚠️ Interrupted by user") + + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + + finally: + await client.stop() + print("\n🔌 Client stopped") + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/test/history_client/run-test.sh b/test/history_client/run-test.sh new file mode 100755 index 0000000..93cdacd --- /dev/null +++ b/test/history_client/run-test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Script to run the historical data test + +echo "Starting test environment..." +echo "This will start Kafka, Flink, and the Ingestor services" +echo "" + +cd ../.. + +echo "Step 1: Starting core services (Kafka, Flink, Ingestor)..." +docker-compose up -d zookeeper kafka postgres flink-jobmanager flink-taskmanager ingestor + +echo "" +echo "Step 2: Waiting for services to be ready (30 seconds)..." +sleep 30 + +echo "" +echo "Step 3: Running test client..." +docker-compose --profile test up history-test-client + +echo "" +echo "Test complete!" +echo "" +echo "To view logs:" +echo " docker-compose logs ingestor" +echo " docker-compose logs flink-jobmanager" +echo "" +echo "To stop all services:" +echo " docker-compose down" diff --git a/deploy/Dockerfile-web b/web/Dockerfile similarity index 86% rename from deploy/Dockerfile-web rename to web/Dockerfile index e7b4fda..fdd96a7 100644 --- a/deploy/Dockerfile-web +++ b/web/Dockerfile @@ -4,13 +4,13 @@ FROM node:20-alpine WORKDIR /app # Copy package files first for better caching -COPY web/package*.json /app/ +COPY package*.json /app/ # Install dependencies RUN npm install # Copy application code -COPY web /app/ +COPY . /app/ # Expose port EXPOSE 5173