data fixes; indicator=>workspace sync

This commit is contained in:
2026-03-31 20:29:12 -04:00
parent 998f69fa1a
commit cd28e18e52
45 changed files with 1324 additions and 1239 deletions

View File

@@ -10,7 +10,7 @@ from pyiceberg.expressions import (
And,
EqualTo,
GreaterThanOrEqual,
LessThanOrEqual
LessThan,
)
log = logging.getLogger(__name__)
@@ -98,7 +98,7 @@ class IcebergClient:
EqualTo("ticker", ticker),
EqualTo("period_seconds", period_seconds),
GreaterThanOrEqual("timestamp", start_time),
LessThanOrEqual("timestamp", end_time)
LessThan("timestamp", end_time) # end_time is exclusive
)
)
@@ -110,6 +110,10 @@ class IcebergClient:
if not df.empty:
df = df.sort_values("timestamp")
# Convert integer microsecond timestamps to DatetimeIndex
df.index = pd.to_datetime(df["timestamp"], unit="us", utc=True)
df.index.name = "datetime"
df = df.drop(columns=["timestamp"])
# Apply price/volume conversion if metadata client available
if self.metadata_client is not None:
df = self._apply_denominators(df, ticker)
@@ -186,9 +190,9 @@ class IcebergClient:
# Convert period to microseconds
period_micros = period_seconds * 1_000_000
# Generate expected timestamps
expected_timestamps = list(range(start_time, end_time + 1, period_micros))
actual_timestamps = set(df['timestamp'].values)
# Generate expected timestamps — end_time is exclusive
expected_timestamps = list(range(start_time, end_time, period_micros))
actual_timestamps = set(df.index.view('int64') // 1000)
# Find gaps
missing = sorted(set(expected_timestamps) - actual_timestamps)

View File

@@ -12,6 +12,8 @@ from .symbol_metadata_client import SymbolMetadataClient
log = logging.getLogger(__name__)
log = logging.getLogger(__name__)
class OHLCClient:
"""
@@ -118,6 +120,11 @@ class OHLCClient:
TimeoutError: If historical data request times out
ValueError: If request fails
"""
# Align times to period boundaries: [ceil(start), ceil(end)) exclusive
period_micros = period_seconds * 1_000_000
start_time = ((start_time + period_micros - 1) // period_micros) * period_micros
end_time = ((end_time + period_micros - 1) // period_micros) * period_micros # exclusive
# Step 1: Check Iceberg for existing data
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
@@ -128,7 +135,7 @@ class OHLCClient:
if not missing_ranges:
# All data exists in Iceberg
return df
return self._forward_fill_gaps(df, period_seconds)
# Step 3: Request missing data for each range
# For simplicity, request entire range (relay can merge adjacent requests)
@@ -147,6 +154,39 @@ class OHLCClient:
# Step 5: Query Iceberg again for complete dataset
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
return self._forward_fill_gaps(df, period_seconds)
def _forward_fill_gaps(self, df: pd.DataFrame, period_seconds: int) -> pd.DataFrame:
"""
Forward-fill interior missing bars by carrying the last known close into
open, high, low, and close of any gap bar.
Only interior gaps (rows already present with null OHLC from the ingestor,
or timestamp slots missing between real bars) are filled. Edge gaps (before
the first real bar or after the last real bar) are left as-is.
"""
if df.empty:
return df
df = df.sort_index()
# Identify rows that are gap bars (null close)
is_gap = df['close'].isna()
if not is_gap.any():
return df
# Forward-fill close across gap rows, then copy into open/high/low
df['close'] = df['close'].ffill()
price_cols = ['open', 'high', 'low']
for col in price_cols:
if col in df.columns:
df[col] = df[col].where(~is_gap, df['close'])
# Zero out volume for filled gap rows
if 'volume' in df.columns:
df['volume'] = df['volume'].where(~is_gap, 0.0)
return df
async def __aenter__(self):

View File

@@ -1,329 +0,0 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
// Request for data ingestion (used in Relay → Ingestor work queue)
message DataRequest {
// Unique request ID for tracking
string request_id = 1;
// Type of request
RequestType type = 2;
// Market identifier
string ticker = 3;
// For historical requests
optional HistoricalParams historical = 4;
// For realtime requests
optional RealtimeParams realtime = 5;
// Optional client ID for notification routing (async architecture)
// Flink uses this to determine notification topic
optional string client_id = 6;
enum RequestType {
HISTORICAL_OHLC = 0;
REALTIME_TICKS = 1;
}
}
message HistoricalParams {
// Start time (microseconds since epoch)
uint64 start_time = 1;
// End time (microseconds since epoch)
uint64 end_time = 2;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h, 86400 = 1d)
uint32 period_seconds = 3;
// Maximum number of candles to return (optional limit)
optional uint32 limit = 4;
}
message RealtimeParams {
// Whether to include tick data
bool include_ticks = 1;
// Whether to include aggregated OHLC
bool include_ohlc = 2;
// OHLC periods to generate in seconds (e.g., [60, 300, 900] for 1m, 5m, 15m)
repeated uint32 ohlc_period_seconds = 3;
}
// Control messages for ingestors (Flink → Ingestor control channel)
message IngestorControl {
// Control action type
ControlAction action = 1;
// Request ID to cancel (for CANCEL action)
optional string request_id = 2;
// Configuration updates (for CONFIG_UPDATE action)
optional IngestorConfig config = 3;
enum ControlAction {
CANCEL = 0; // Cancel a specific request
SHUTDOWN = 1; // Graceful shutdown signal
CONFIG_UPDATE = 2; // Update ingestor configuration
HEARTBEAT = 3; // Keep-alive signal
}
}
message IngestorConfig {
// Maximum concurrent requests per ingestor
optional uint32 max_concurrent = 1;
// Request timeout in seconds
optional uint32 timeout_seconds = 2;
// Kafka topic for output
optional string kafka_topic = 3;
}
// Historical data response from ingestor to Flink (Ingestor → Flink response channel)
message DataResponse {
// Request ID this is responding to
string request_id = 1;
// Status of the request
ResponseStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
// Serialized OHLC data (repeated OHLCV protobuf messages)
repeated bytes ohlc_data = 4;
// Total number of candles returned
uint32 total_records = 5;
enum ResponseStatus {
OK = 0;
NOT_FOUND = 1;
ERROR = 2;
}
}
// Client request submission for historical OHLC data (Client → Relay)
// Relay immediately responds with SubmitResponse containing request_id
message SubmitHistoricalRequest {
// Client-generated request ID for tracking
string request_id = 1;
// Market identifier (e.g., "BINANCE:BTC/USDT")
string ticker = 2;
// Start time (microseconds since epoch)
uint64 start_time = 3;
// End time (microseconds since epoch)
uint64 end_time = 4;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h)
uint32 period_seconds = 5;
// Optional limit on number of candles
optional uint32 limit = 6;
// Optional client ID for notification routing (e.g., "client-abc-123")
// Notifications will be published to topic: "RESPONSE:{client_id}"
optional string client_id = 7;
}
// Immediate response to SubmitHistoricalRequest (Relay → Client)
message SubmitResponse {
// Request ID (echoed from request)
string request_id = 1;
// Status of submission
SubmitStatus status = 2;
// Error message if status is not QUEUED
optional string error_message = 3;
// Topic to subscribe to for result notification
// e.g., "RESPONSE:client-abc-123" or "HISTORY_READY:{request_id}"
string notification_topic = 4;
enum SubmitStatus {
QUEUED = 0; // Request queued successfully
DUPLICATE = 1; // Request ID already exists
INVALID = 2; // Invalid parameters
ERROR = 3; // Internal error
}
}
// Historical data ready notification (Flink → Relay → Client via pub/sub)
// Published after Flink writes data to Iceberg
message HistoryReadyNotification {
// Request ID
string request_id = 1;
// Market identifier
string ticker = 2;
// OHLC period in seconds
uint32 period_seconds = 3;
// Start time (microseconds since epoch)
uint64 start_time = 4;
// End time (microseconds since epoch)
uint64 end_time = 5;
// Status of the data fetch
NotificationStatus status = 6;
// Error message if status is not OK
optional string error_message = 7;
// Iceberg table information for client queries
string iceberg_namespace = 10;
string iceberg_table = 11;
// Number of records written
uint32 row_count = 12;
// Timestamp when data was written (microseconds since epoch)
uint64 completed_at = 13;
enum NotificationStatus {
OK = 0; // Data successfully written to Iceberg
NOT_FOUND = 1; // No data found for the requested period
ERROR = 2; // Error during fetch or processing
TIMEOUT = 3; // Request timed out
}
}
// Legacy message for backward compatibility (Client → Relay)
message OHLCRequest {
// Request ID for tracking
string request_id = 1;
// Market identifier
string ticker = 2;
// Start time (microseconds since epoch)
uint64 start_time = 3;
// End time (microseconds since epoch)
uint64 end_time = 4;
// OHLC period in seconds (e.g., 60 = 1m, 300 = 5m, 3600 = 1h)
uint32 period_seconds = 5;
// Optional limit on number of candles
optional uint32 limit = 6;
}
// Generic response for any request (Flink → Client)
message Response {
// Request ID this is responding to
string request_id = 1;
// Status of the request
ResponseStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
// Generic payload data (serialized protobuf messages)
repeated bytes data = 4;
// Total number of records
optional uint32 total_records = 5;
// Whether this is the final response (for paginated results)
bool is_final = 6;
enum ResponseStatus {
OK = 0;
NOT_FOUND = 1;
ERROR = 2;
}
}
// CEP trigger registration (Client → Flink)
message CEPTriggerRequest {
// Unique trigger ID
string trigger_id = 1;
// Flink SQL CEP pattern/condition
string sql_pattern = 2;
// Markets to monitor
repeated string tickers = 3;
// Callback endpoint (for DEALER/ROUTER routing)
optional string callback_id = 4;
// Optional parameters for the CEP query
map<string, string> parameters = 5;
}
// CEP trigger acknowledgment (Flink → Client)
message CEPTriggerAck {
// Trigger ID being acknowledged
string trigger_id = 1;
// Status of registration
TriggerStatus status = 2;
// Error message if status is not OK
optional string error_message = 3;
enum TriggerStatus {
REGISTERED = 0;
ALREADY_REGISTERED = 1;
INVALID_SQL = 2;
ERROR = 3;
}
}
// CEP trigger event callback (Flink → Client)
message CEPTriggerEvent {
// Trigger ID that fired
string trigger_id = 1;
// Timestamp when trigger fired (microseconds since epoch)
uint64 timestamp = 2;
// Schema information for the result rows
ResultSchema schema = 3;
// Result rows from the Flink SQL query
repeated ResultRow rows = 4;
// Additional context from the CEP pattern
map<string, string> context = 5;
}
message ResultSchema {
// Column names in order
repeated string column_names = 1;
// Column types (using Flink SQL type names)
repeated string column_types = 2;
}
message ResultRow {
// Encoded row data (one bytes field per column, in schema order)
// Each value is encoded as a protobuf-serialized FieldValue
repeated bytes values = 1;
}
message FieldValue {
oneof value {
string string_val = 1;
int64 int_val = 2;
double double_val = 3;
bool bool_val = 4;
bytes bytes_val = 5;
uint64 timestamp_val = 6;
}
}

View File

@@ -1,22 +0,0 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
message Market {
// The prices and volumes must be adjusted by the rational denominator provided
// by the market metadata
string exchange_id = 2; // e.g., BINANCE
string market_id = 3; // e.g., BTC/USDT
string market_type = 4; // e.g., Spot
string description = 5; // e.g., Bitcoin/Tether on Binance
repeated string column_names = 6; // e.g., ['open', 'high', 'low', 'close', 'volume', 'taker_vol', 'maker_vol']
string base_asset = 9;
string quote_asset = 10;
uint64 earliest_time = 11;
uint64 tick_denom = 12; // denominator applied to all OHLC price data
uint64 base_denom = 13; // denominator applied to base asset units
uint64 quote_denom = 14; // denominator applied to quote asset units
repeated uint32 supported_period_seconds = 15;
}

View File

@@ -1,61 +0,0 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
// Single OHLC row
message OHLC {
// Timestamp in microseconds since epoch
uint64 timestamp = 1;
// The prices and volumes must be adjusted by the rational denominator provided
// by the market metadata
int64 open = 2;
int64 high = 3;
int64 low = 4;
int64 close = 5;
optional int64 volume = 6;
optional int64 buy_vol = 7;
optional int64 sell_vol = 8;
optional int64 open_time = 9;
optional int64 high_time = 10;
optional int64 low_time = 11;
optional int64 close_time = 12;
optional int64 open_interest = 13;
string ticker = 14;
}
// Batch of OHLC rows with metadata for historical request tracking
// Used for Kafka messages from ingestor → Flink
message OHLCBatch {
// Metadata for tracking this request through the pipeline
OHLCBatchMetadata metadata = 1;
// OHLC rows in this batch
repeated OHLC rows = 2;
}
// Metadata for tracking historical data requests through the pipeline
message OHLCBatchMetadata {
// Request ID from client
string request_id = 1;
// Optional client ID for notification routing
optional string client_id = 2;
// Market identifier
string ticker = 3;
// OHLC period in seconds
uint32 period_seconds = 4;
// Time range requested (microseconds since epoch)
uint64 start_time = 5;
uint64 end_time = 6;
// Status for marker messages (OK, NOT_FOUND, ERROR)
string status = 7;
// Error message if status is ERROR
optional string error_message = 8;
}

View File

@@ -1,51 +0,0 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
message Tick {
// Unique identifier for the trade
string trade_id = 1;
// Market identifier (matches Market.market_id)
string ticker = 2;
// Timestamp in microseconds since epoch
uint64 timestamp = 3;
// Price (must be adjusted by tick_denom from Market metadata)
int64 price = 4;
// Base asset amount (must be adjusted by base_denom from Market metadata)
int64 amount = 5;
// Quote asset amount (must be adjusted by quote_denom from Market metadata)
int64 quote_amount = 6;
// Side: true = taker buy (market buy), false = taker sell (market sell)
bool taker_buy = 7;
// Position effect: true = close position, false = open position
// Only relevant for derivatives/futures markets
optional bool to_close = 8;
// Sequence number for ordering (if provided by exchange)
optional uint64 sequence = 9;
// Additional flags for special trade types
optional TradeFlags flags = 10;
}
message TradeFlags {
// Liquidation trade
bool is_liquidation = 1;
// Block trade (large OTC trade)
bool is_block_trade = 2;
// Maker side was a post-only order
bool maker_post_only = 3;
// Trade occurred during auction
bool is_auction = 4;
}

View File

@@ -1,258 +0,0 @@
syntax = "proto3";
option java_multiple_files = true;
option java_package = "com.dexorder.proto";
// User container event system for delivering notifications to users
// via active sessions or external channels (Telegram, email, push).
//
// Two ZMQ patterns:
// - XPUB/SUB (port 5570): Fast path for informational events to active sessions
// - DEALER/ROUTER (port 5571): Guaranteed delivery for critical events with ack
//
// See doc/protocol.md and doc/user_container_events.md for details.
// =============================================================================
// User Event (Container → Gateway)
// Message Type ID: 0x20
// =============================================================================
message UserEvent {
// User ID this event belongs to
string user_id = 1;
// Unique event ID for deduplication and ack tracking (UUID)
string event_id = 2;
// Timestamp when event was generated (Unix milliseconds)
int64 timestamp = 3;
// Type of event
EventType event_type = 4;
// Event payload (JSON or nested protobuf, depending on event_type)
bytes payload = 5;
// Delivery specification (priority and channel preferences)
DeliverySpec delivery = 6;
}
enum EventType {
// Trading events
ORDER_PLACED = 0;
ORDER_FILLED = 1;
ORDER_CANCELLED = 2;
ORDER_REJECTED = 3;
ORDER_EXPIRED = 4;
// Alert events
ALERT_TRIGGERED = 10;
ALERT_CREATED = 11;
ALERT_DELETED = 12;
// Position events
POSITION_OPENED = 20;
POSITION_CLOSED = 21;
POSITION_UPDATED = 22;
POSITION_LIQUIDATED = 23;
// Workspace/chart events
WORKSPACE_CHANGED = 30;
CHART_ANNOTATION_ADDED = 31;
CHART_ANNOTATION_REMOVED = 32;
INDICATOR_UPDATED = 33;
// Strategy events
STRATEGY_STARTED = 40;
STRATEGY_STOPPED = 41;
STRATEGY_LOG = 42;
STRATEGY_ERROR = 43;
BACKTEST_COMPLETED = 44;
// System events
CONTAINER_STARTING = 50;
CONTAINER_READY = 51;
CONTAINER_SHUTTING_DOWN = 52;
EVENT_ERROR = 53;
}
// =============================================================================
// Delivery Specification
// =============================================================================
message DeliverySpec {
// Priority determines routing behavior
Priority priority = 1;
// Ordered list of channel preferences (try first, then second, etc.)
repeated ChannelPreference channels = 2;
}
enum Priority {
// Drop if no active session (fire-and-forget via XPUB)
// Use for: indicator updates, chart syncs, strategy logs when watching
INFORMATIONAL = 0;
// Best effort delivery - queue briefly, deliver when possible
// Uses XPUB if subscribed, otherwise DEALER
// Use for: alerts, position updates
NORMAL = 1;
// Must deliver - retry until acked, escalate channels
// Always uses DEALER for guaranteed delivery
// Use for: order fills, liquidations, critical errors
CRITICAL = 2;
}
message ChannelPreference {
// Channel to deliver to
ChannelType channel = 1;
// If true, skip this channel if user is not connected to it
// If false, deliver even if user is not actively connected
// (e.g., send Telegram message even if user isn't in Telegram chat)
bool only_if_active = 2;
}
enum ChannelType {
// Whatever channel the user currently has open (WebSocket, Telegram session)
ACTIVE_SESSION = 0;
// Specific channels
WEB = 1; // WebSocket to web UI
TELEGRAM = 2; // Telegram bot message
EMAIL = 3; // Email notification
PUSH = 4; // Mobile push notification (iOS/Android)
DISCORD = 5; // Discord webhook (future)
SLACK = 6; // Slack webhook (future)
}
// =============================================================================
// Event Acknowledgment (Gateway → Container)
// Message Type ID: 0x21
// =============================================================================
message EventAck {
// Event ID being acknowledged
string event_id = 1;
// Delivery status
AckStatus status = 2;
// Error message if status is ERROR
string error_message = 3;
// Channel that successfully delivered (for logging/debugging)
ChannelType delivered_via = 4;
}
enum AckStatus {
// Successfully delivered to at least one channel
DELIVERED = 0;
// Accepted and queued for delivery (e.g., rate limited, will retry)
QUEUED = 1;
// Permanent failure - all channels failed
ACK_ERROR = 2;
}
// =============================================================================
// Event Payloads
// These are JSON-encoded in the UserEvent.payload field.
// Defined here for documentation; actual encoding is JSON for flexibility.
// =============================================================================
// Payload for ORDER_PLACED, ORDER_FILLED, ORDER_CANCELLED, etc.
message OrderEventPayload {
string order_id = 1;
string symbol = 2;
string side = 3; // "buy" or "sell"
string order_type = 4; // "market", "limit", "stop_limit", etc.
string quantity = 5; // Decimal string
string price = 6; // Decimal string (for limit orders)
string fill_price = 7; // Decimal string (for fills)
string fill_quantity = 8; // Decimal string (for partial fills)
string status = 9; // "open", "filled", "cancelled", etc.
string exchange = 10;
int64 timestamp = 11; // Unix milliseconds
string strategy_id = 12; // If order was placed by a strategy
string error_message = 13; // If rejected/failed
}
// Payload for ALERT_TRIGGERED
message AlertEventPayload {
string alert_id = 1;
string symbol = 2;
string condition = 3; // Human-readable condition (e.g., "BTC > 50000")
string triggered_price = 4; // Decimal string
int64 timestamp = 5;
}
// Payload for POSITION_OPENED, POSITION_CLOSED, POSITION_UPDATED
message PositionEventPayload {
string position_id = 1;
string symbol = 2;
string side = 3; // "long" or "short"
string size = 4; // Decimal string
string entry_price = 5; // Decimal string
string current_price = 6; // Decimal string
string unrealized_pnl = 7; // Decimal string
string realized_pnl = 8; // Decimal string (for closed positions)
string leverage = 9; // Decimal string (for margin)
string liquidation_price = 10;
string exchange = 11;
int64 timestamp = 12;
}
// Payload for WORKSPACE_CHANGED, CHART_ANNOTATION_*, INDICATOR_UPDATED
message WorkspaceEventPayload {
string workspace_id = 1;
string change_type = 2; // "symbol_changed", "timeframe_changed", "annotation_added", etc.
string symbol = 3;
string timeframe = 4;
// For annotations
string annotation_id = 5;
string annotation_type = 6; // "trendline", "horizontal", "rectangle", "text", etc.
string annotation_data = 7; // JSON string with coordinates, style, etc.
// For indicators
string indicator_name = 8;
string indicator_params = 9; // JSON string with indicator parameters
int64 timestamp = 10;
}
// Payload for STRATEGY_LOG, STRATEGY_ERROR
message StrategyEventPayload {
string strategy_id = 1;
string strategy_name = 2;
string log_level = 3; // "debug", "info", "warn", "error"
string message = 4;
string details = 5; // JSON string with additional context
int64 timestamp = 6;
}
// Payload for BACKTEST_COMPLETED
message BacktestEventPayload {
string backtest_id = 1;
string strategy_id = 2;
string strategy_name = 3;
string symbol = 4;
string timeframe = 5;
int64 start_time = 6;
int64 end_time = 7;
// Results summary
int32 total_trades = 8;
int32 winning_trades = 9;
int32 losing_trades = 10;
string total_pnl = 11; // Decimal string
string win_rate = 12; // Decimal string (0-1)
string sharpe_ratio = 13; // Decimal string
string max_drawdown = 14; // Decimal string (0-1)
string results_path = 15; // Path to full results file
int64 completed_at = 16;
}