backend redesign

This commit is contained in:
2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions

329
protobuf/ingestor.proto Normal file
View File

@@ -0,0 +1,329 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
// Request for data ingestion (used in Relay → Ingestor work queue)
message DataRequest {
// Unique request ID for tracking
string request_id = 1;
// Type of request
RequestType type = 2;
// Market identifier
string ticker = 3;
// For historical requests
optional HistoricalParams historical = 4;
// For realtime requests
optional RealtimeParams realtime = 5;
// Optional client ID for notification routing (async architecture)
// Flink uses this to determine notification topic
optional string client_id = 6;
enum RequestType {
HISTORICAL_OHLC = 0;
REALTIME_TICKS = 1;
}
}
message HistoricalParams {
// Start time (microseconds since epoch)
uint64 start_time = 1;
// End time (microseconds since epoch)
uint64 end_time = 2;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h, 86400 = 1d)
uint32 period_seconds = 3;
// Maximum number of candles to return (optional limit)
optional uint32 limit = 4;
}
message RealtimeParams {
// Whether to include tick data
bool include_ticks = 1;
// Whether to include aggregated OHLC
bool include_ohlc = 2;
// OHLC periods to generate in seconds (e.g., [60, 300, 900] for 1m, 5m, 15m)
repeated uint32 ohlc_period_seconds = 3;
}
// Control messages for ingestors (Flink → Ingestor control channel)
message IngestorControl {
// Control action type
ControlAction action = 1;
// Request ID to cancel (for CANCEL action)
optional string request_id = 2;
// Configuration updates (for CONFIG_UPDATE action)
optional IngestorConfig config = 3;
enum ControlAction {
CANCEL = 0; // Cancel a specific request
SHUTDOWN = 1; // Graceful shutdown signal
CONFIG_UPDATE = 2; // Update ingestor configuration
HEARTBEAT = 3; // Keep-alive signal
}
}
message IngestorConfig {
// Maximum concurrent requests per ingestor
optional uint32 max_concurrent = 1;
// Request timeout in seconds
optional uint32 timeout_seconds = 2;
// Kafka topic for output
optional string kafka_topic = 3;
}
// Historical data response from ingestor to Flink (Ingestor → Flink response channel)
message DataResponse {
// Request ID this is responding to
string request_id = 1;
// Status of the request
ResponseStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
// Serialized OHLC data (repeated OHLCV protobuf messages)
repeated bytes ohlc_data = 4;
// Total number of candles returned
uint32 total_records = 5;
enum ResponseStatus {
OK = 0;
NOT_FOUND = 1;
ERROR = 2;
}
}
// Client request submission for historical OHLC data (Client → Relay)
// Relay immediately responds with SubmitResponse containing request_id
message SubmitHistoricalRequest {
// Client-generated request ID for tracking
string request_id = 1;
// Market identifier (e.g., "BINANCE:BTC/USDT")
string ticker = 2;
// Start time (microseconds since epoch)
uint64 start_time = 3;
// End time (microseconds since epoch)
uint64 end_time = 4;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h)
uint32 period_seconds = 5;
// Optional limit on number of candles
optional uint32 limit = 6;
// Optional client ID for notification routing (e.g., "client-abc-123")
// Notifications will be published to topic: "RESPONSE:{client_id}"
optional string client_id = 7;
}
// Immediate response to SubmitHistoricalRequest (Relay → Client)
message SubmitResponse {
// Request ID (echoed from request)
string request_id = 1;
// Status of submission
SubmitStatus status = 2;
// Error message if status is not QUEUED
optional string error_message = 3;
// Topic to subscribe to for result notification
// e.g., "RESPONSE:client-abc-123" or "HISTORY_READY:{request_id}"
string notification_topic = 4;
enum SubmitStatus {
QUEUED = 0; // Request queued successfully
DUPLICATE = 1; // Request ID already exists
INVALID = 2; // Invalid parameters
ERROR = 3; // Internal error
}
}
// Historical data ready notification (Flink → Relay → Client via pub/sub)
// Published after Flink writes data to Iceberg
message HistoryReadyNotification {
// Request ID
string request_id = 1;
// Market identifier
string ticker = 2;
// OHLC period in seconds
uint32 period_seconds = 3;
// Start time (microseconds since epoch)
uint64 start_time = 4;
// End time (microseconds since epoch)
uint64 end_time = 5;
// Status of the data fetch
NotificationStatus status = 6;
// Error message if status is not OK
optional string error_message = 7;
// Iceberg table information for client queries
string iceberg_namespace = 10;
string iceberg_table = 11;
// Number of records written
uint32 row_count = 12;
// Timestamp when data was written (microseconds since epoch)
uint64 completed_at = 13;
enum NotificationStatus {
OK = 0; // Data successfully written to Iceberg
NOT_FOUND = 1; // No data found for the requested period
ERROR = 2; // Error during fetch or processing
TIMEOUT = 3; // Request timed out
}
}
// Legacy message for backward compatibility (Client → Relay)
message OHLCRequest {
// Request ID for tracking
string request_id = 1;
// Market identifier
string ticker = 2;
// Start time (microseconds since epoch)
uint64 start_time = 3;
// End time (microseconds since epoch)
uint64 end_time = 4;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h)
uint32 period_seconds = 5;
// Optional limit on number of candles
optional uint32 limit = 6;
}
// Generic response for any request (Flink → Client)
message Response {
// Request ID this is responding to
string request_id = 1;
// Status of the request
ResponseStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
// Generic payload data (serialized protobuf messages)
repeated bytes data = 4;
// Total number of records
optional uint32 total_records = 5;
// Whether this is the final response (for paginated results)
bool is_final = 6;
enum ResponseStatus {
OK = 0;
NOT_FOUND = 1;
ERROR = 2;
}
}
// CEP trigger registration (Client → Flink)
message CEPTriggerRequest {
// Unique trigger ID
string trigger_id = 1;
// Flink SQL CEP pattern/condition
string sql_pattern = 2;
// Markets to monitor
repeated string tickers = 3;
// Callback endpoint (for DEALER/ROUTER routing)
optional string callback_id = 4;
// Optional parameters for the CEP query
map<string, string> parameters = 5;
}
// CEP trigger acknowledgment (Flink → Client)
message CEPTriggerAck {
// Trigger ID being acknowledged
string trigger_id = 1;
// Status of registration
TriggerStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
enum TriggerStatus {
REGISTERED = 0;
ALREADY_REGISTERED = 1;
INVALID_SQL = 2;
ERROR = 3;
}
}
// CEP trigger event callback (Flink → Client)
message CEPTriggerEvent {
// Trigger ID that fired
string trigger_id = 1;
// Timestamp when trigger fired (microseconds since epoch)
uint64 timestamp = 2;
// Schema information for the result rows
ResultSchema schema = 3;
// Result rows from the Flink SQL query
repeated ResultRow rows = 4;
// Additional context from the CEP pattern
map<string, string> context = 5;
}
message ResultSchema {
// Column names in order
repeated string column_names = 1;
// Column types (using Flink SQL type names)
repeated string column_types = 2;
}
message ResultRow {
// Encoded row data (one bytes field per column, in schema order)
// Each value is encoded as a protobuf-serialized FieldValue
repeated bytes values = 1;
}
message FieldValue {
oneof value {
string string_val = 1;
int64 int_val = 2;
double double_val = 3;
bool bool_val = 4;
bytes bytes_val = 5;
uint64 timestamp_val = 6;
}
}

19
protobuf/market.proto Normal file
View File

@@ -0,0 +1,19 @@
syntax = "proto3";
message Market {
// The prices and volumes must be adjusted by the rational denominator provided
// by the market metadata
string exchange_id = 2; // e.g., BINANCE
string market_id = 3; // e.g., BTC/USDT
string market_type = 4; // e.g., Spot
string description = 5; // e.g., Bitcoin/Tether on Binance
repeated string column_names = 6; // e.g., ['open', 'high', 'low', 'close', 'volume', 'taker_vol', 'maker_vol']
string base_asset = 9;
string quote_asset = 10;
uint64 earliest_time = 11;
uint64 tick_denom = 12; // denominator applied to all OHLC price data
uint64 base_denom = 13; // denominator applied to base asset units
uint64 quote_denom = 14; // denominator applied to quote asset units
repeated uint32 supported_period_seconds = 15;
}

61
protobuf/ohlc.proto Normal file
View File

@@ -0,0 +1,61 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
// Single OHLC row
message OHLC {
// Timestamp in microseconds since epoch
uint64 timestamp = 1;
// The prices and volumes must be adjusted by the rational denominator provided
// by the market metadata
int64 open = 2;
int64 high = 3;
int64 low = 4;
int64 close = 5;
optional int64 volume = 6;
optional int64 buy_vol = 7;
optional int64 sell_vol = 8;
optional int64 open_time = 9;
optional int64 high_time = 10;
optional int64 low_time = 11;
optional int64 close_time = 12;
optional int64 open_interest = 13;
string ticker = 14;
}
// Batch of OHLC rows with metadata for historical request tracking
// Used for Kafka messages from ingestor → Flink
message OHLCBatch {
// Metadata for tracking this request through the pipeline
OHLCBatchMetadata metadata = 1;
// OHLC rows in this batch
repeated OHLC rows = 2;
}
// Metadata for tracking historical data requests through the pipeline
message OHLCBatchMetadata {
// Request ID from client
string request_id = 1;
// Optional client ID for notification routing
optional string client_id = 2;
// Market identifier
string ticker = 3;
// OHLC period in seconds
uint32 period_seconds = 4;
// Time range requested (microseconds since epoch)
uint64 start_time = 5;
uint64 end_time = 6;
// Status for marker messages (OK, NOT_FOUND, ERROR)
string status = 7;
// Error message if status is ERROR
optional string error_message = 8;
}

168
protobuf/protocol.md Normal file
View File

@@ -0,0 +1,168 @@
# ZeroMQ Protocol Architecture
Our data transfer protocol uses ZeroMQ with Protobufs. We send a small envelope with a protocol version byte as the first frame, then a type ID as the first byte of the second frame, followed by the protobuf payload also in the second frame.
OHLC periods are represented as seconds.
## Data Flow Overview
**Relay as Gateway**: The Relay is a well-known bind point that all components connect to. It routes messages between clients, ingestors, and Flink.
### Historical Data Query Flow (Async Event-Driven Architecture)
* Client generates request_id and/or client_id (both are client-generated)
* Client computes notification topic: `RESPONSE:{client_id}` or `HISTORY_READY:{request_id}`
* **Client subscribes to notification topic BEFORE sending request (prevents race condition)**
* Client sends SubmitHistoricalRequest to Relay (REQ/REP)
* Relay returns immediate SubmitResponse with request_id and notification_topic (for confirmation)
* Relay publishes DataRequest to ingestor work queue with exchange prefix (PUB/SUB)
* Ingestor receives request, fetches data from exchange
* Ingestor writes OHLC data to Kafka with __metadata in first record
* Flink reads from Kafka, processes data, writes to Iceberg
* Flink publishes HistoryReadyNotification to ZMQ PUB socket (port 5557) with deterministic topic
* Relay proxies notification via XSUB → XPUB to clients
* Client receives notification (already subscribed) and queries Iceberg for data
**Key Architectural Change**: Relay is completely stateless. No request/response correlation needed. All notification routing is topic-based (e.g., "RESPONSE:{client_id}").
**Race Condition Prevention**: Notification topics are deterministic based on client-generated values (request_id or client_id). Clients MUST subscribe to the notification topic BEFORE submitting the request to avoid missing notifications.
**Two Notification Patterns**:
1. **Per-client topic** (`RESPONSE:{client_id}`): Subscribe once during connection, reuse for all requests from this client. Recommended for most clients.
2. **Per-request topic** (`HISTORY_READY:{request_id}`): Subscribe immediately before each request. Use when you need per-request isolation or don't have a persistent client_id.
### Realtime Data Flow (Flink → Relay → Clients)
* Ingestors write realtime ticks to Kafka
* Flink reads from Kafka, processes OHLC aggregations, CEP triggers
* Flink publishes market data via ZMQ PUB
* Relay subscribes to Flink (XSUB) and fanouts to clients (XPUB)
* Clients subscribe to specific tickers
### Data Processing (Kafka → Flink → Iceberg)
* All market data flows through Kafka (durable event log)
* Flink processes streams for aggregations and CEP
* Flink writes historical data to Apache Iceberg tables
* Clients can query Iceberg for historical data (alternative to ingestor backfill)
**Key Design Principles**:
* Relay is the well-known bind point - all other components connect to it
* Relay is completely stateless - no request tracking, only topic-based routing
* Exchange prefix filtering allows ingestor specialization (e.g., only BINANCE ingestors)
* Historical data flows through Kafka (durable processing) only - no direct response
* Async event-driven notifications via pub/sub (Flink → Relay → Clients)
* Protobufs over ZMQ for all inter-service communication
* Kafka for durability and Flink stream processing
* Iceberg for long-term historical storage and client queries
## ZeroMQ Channels and Patterns
All sockets bind on **Relay** (well-known endpoint). Components connect to relay.
### 1. Client Request Channel (Clients → Relay)
**Pattern**: ROUTER (Relay binds, Clients use REQ)
- **Socket Type**: Relay uses ROUTER (bind), Clients use REQ (connect)
- **Endpoint**: `tcp://*:5559` (Relay binds)
- **Message Types**: `SubmitHistoricalRequest``SubmitResponse`
- **Behavior**:
- Client generates request_id and/or client_id
- Client computes notification topic deterministically
- **Client subscribes to notification topic FIRST (prevents race)**
- Client sends REQ for historical OHLC data
- Relay validates request and returns immediate acknowledgment
- Response includes notification_topic for client confirmation
- Relay publishes DataRequest to ingestor work queue
- No request tracking - relay is stateless
### 2. Ingestor Work Queue (Relay → Ingestors)
**Pattern**: PUB/SUB with exchange prefix filtering
- **Socket Type**: Relay uses PUB (bind), Ingestors use SUB (connect)
- **Endpoint**: `tcp://*:5555` (Relay binds)
- **Message Types**: `DataRequest` (historical or realtime)
- **Topic Prefix**: Exchange name (e.g., `BINANCE:`, `COINBASE:`)
- **Behavior**:
- Relay publishes work with exchange prefix from ticker
- Ingestors subscribe only to exchanges they support
- Multiple ingestors can compete for same exchange
- Ingestors write data to Kafka only (no direct response)
- Flink processes Kafka → Iceberg → notification
### 3. Market Data Fanout (Relay ↔ Flink ↔ Clients)
**Pattern**: XPUB/XSUB proxy
- **Socket Type**:
- Relay XPUB (bind) ← Clients SUB (connect) - Port 5558
- Relay XSUB (connect) → Flink PUB (bind) - Port 5557
- **Message Types**: `Tick`, `OHLC`, `HistoryReadyNotification`
- **Topic Formats**:
- Market data: `{ticker}|{data_type}` (e.g., `BINANCE:BTC/USDT|tick`)
- Notifications: `RESPONSE:{client_id}` or `HISTORY_READY:{request_id}`
- **Behavior**:
- Clients subscribe to ticker topics and notification topics via Relay XPUB
- Relay forwards subscriptions to Flink via XSUB
- Flink publishes processed market data and notifications
- Relay proxies data to subscribed clients (stateless forwarding)
- Dynamic subscription management (no pre-registration)
### 4. Ingestor Control Channel (Optional - Future Use)
**Pattern**: PUB/SUB (Broadcast control)
- **Socket Type**: Relay uses PUB, Ingestors use SUB
- **Endpoint**: `tcp://*:5557` (Relay binds)
- **Message Types**: `IngestorControl` (cancel, config updates)
- **Behavior**:
- Broadcast control messages to all ingestors
- Used for realtime subscription cancellation
- Configuration updates
## Message Envelope Format
The core protocol uses two ZeroMQ frames:
```
Frame 1: [1 byte: protocol version]
Frame 2: [1 byte: message type ID][N bytes: protobuf message]
```
This two-frame approach allows receivers to check the protocol version before parsing the message type and protobuf payload.
**Important**: Some ZeroMQ socket patterns (PUB/SUB, XPUB/XSUB) may prepend additional frames for routing purposes. For example:
- **PUB/SUB with topic filtering**: SUB sockets receive `[topic frame][version frame][message frame]`
- **ROUTER sockets**: Prepend identity frames before the message
Components must handle these additional frames appropriately:
- SUB sockets: Skip the first frame (topic), then parse the remaining frames as the standard 2-frame envelope
- ROUTER sockets: Extract identity frames, then parse the standard 2-frame envelope
The two-frame envelope is the **logical protocol format**, but physical transmission may include additional ZeroMQ transport frames.
## Message Type IDs
| Type ID | Message Type | Description |
|---------|---------------------------|------------------------------------------------|
| 0x01 | DataRequest | Request for historical or realtime data |
| 0x02 | DataResponse (deprecated) | Historical data response (no longer used) |
| 0x03 | IngestorControl | Control messages for ingestors |
| 0x04 | Tick | Individual trade tick data |
| 0x05 | OHLC | Single OHLC candle with volume |
| 0x06 | Market | Market metadata |
| 0x07 | OHLCRequest (deprecated) | Client request (replaced by SubmitHistorical) |
| 0x08 | Response (deprecated) | Generic response (replaced by SubmitResponse) |
| 0x09 | CEPTriggerRequest | Register CEP trigger |
| 0x0A | CEPTriggerAck | CEP trigger acknowledgment |
| 0x0B | CEPTriggerEvent | CEP trigger fired callback |
| 0x0C | OHLCBatch | Batch of OHLC rows with metadata (Kafka) |
| 0x10 | SubmitHistoricalRequest | Client request for historical data (async) |
| 0x11 | SubmitResponse | Immediate ack with notification topic |
| 0x12 | HistoryReadyNotification | Notification that data is ready in Iceberg |
## Error Handling
**Async Architecture Error Handling**:
- Failed historical requests: ingestor writes error marker to Kafka
- Flink reads error marker and publishes HistoryReadyNotification with ERROR status
- Client timeout: if no notification received within timeout, assume failure
- Realtime requests cancelled via control channel if ingestor fails
- REQ/REP timeouts: 30 seconds default for client request submission
- PUB/SUB has no delivery guarantees (Kafka provides durability)
- No response routing needed - all notifications via topic-based pub/sub
**Durability**:
- All data flows through Kafka for durability
- Flink checkpointing ensures exactly-once processing
- Client can retry request with new request_id if notification not received

48
protobuf/tick.proto Normal file
View File

@@ -0,0 +1,48 @@
syntax = "proto3";
message Tick {
// Unique identifier for the trade
string trade_id = 1;
// Market identifier (matches Market.market_id)
string ticker = 2;
// Timestamp in microseconds since epoch
uint64 timestamp = 3;
// Price (must be adjusted by tick_denom from Market metadata)
int64 price = 4;
// Base asset amount (must be adjusted by base_denom from Market metadata)
int64 amount = 5;
// Quote asset amount (must be adjusted by quote_denom from Market metadata)
int64 quote_amount = 6;
// Side: true = taker buy (market buy), false = taker sell (market sell)
bool taker_buy = 7;
// Position effect: true = close position, false = open position
// Only relevant for derivatives/futures markets
optional bool to_close = 8;
// Sequence number for ordering (if provided by exchange)
optional uint64 sequence = 9;
// Additional flags for special trade types
optional TradeFlags flags = 10;
}
message TradeFlags {
// Liquidation trade
bool is_liquidation = 1;
// Block trade (large OTC trade)
bool is_block_trade = 2;
// Maker side was a post-only order
bool maker_post_only = 3;
// Trade occurred during auction
bool is_auction = 4;
}