297 lines
11 KiB
Python
297 lines
11 KiB
Python
"""
|
|
HistoryClient - Submit historical data requests via relay and wait for notifications
|
|
|
|
RACE CONDITION PREVENTION:
|
|
The client must subscribe to notification topics BEFORE submitting requests.
|
|
Notification topics are deterministic: RESPONSE:{client_id} or HISTORY_READY:{request_id}
|
|
Since both are client-generated, we can subscribe before sending the request.
|
|
"""
|
|
|
|
import asyncio
|
|
import uuid
|
|
import zmq
|
|
import zmq.asyncio
|
|
from typing import Optional
|
|
import struct
|
|
import sys
|
|
import os
|
|
|
|
# Import protobuf messages (assuming they're generated in ../protobuf)
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../protobuf'))
|
|
try:
|
|
from ingestor_pb2 import SubmitHistoricalRequest, SubmitResponse, HistoryReadyNotification
|
|
except ImportError:
|
|
print("Warning: Protobuf files not found. Run: protoc -I ../protobuf --python_out=../protobuf ../protobuf/*.proto")
|
|
raise
|
|
|
|
|
|
class HistoryClient:
|
|
"""
|
|
Client for submitting historical data requests via relay.
|
|
|
|
IMPORTANT: Call connect() before making any requests. This ensures the notification
|
|
listener is running and subscribed BEFORE any requests are submitted, preventing
|
|
the race condition where notifications arrive before subscription.
|
|
|
|
Provides:
|
|
- Submit historical OHLC data requests
|
|
- Wait for completion notifications
|
|
- Handle request timeouts and errors
|
|
"""
|
|
|
|
def __init__(self, relay_endpoint: str, notification_endpoint: str, client_id: Optional[str] = None):
|
|
"""
|
|
Initialize history client.
|
|
|
|
Args:
|
|
relay_endpoint: ZMQ endpoint for relay client requests (e.g., "tcp://relay:5559")
|
|
notification_endpoint: ZMQ endpoint for notifications (e.g., "tcp://relay:5558")
|
|
client_id: Optional client ID for notification routing. If not provided, generates one.
|
|
All notifications for this client will be sent to topic RESPONSE:{client_id}
|
|
"""
|
|
self.relay_endpoint = relay_endpoint
|
|
self.notification_endpoint = notification_endpoint
|
|
self.client_id = client_id or f"client-{uuid.uuid4().hex[:8]}"
|
|
self.context = zmq.asyncio.Context()
|
|
self.pending_requests = {} # request_id -> asyncio.Event
|
|
self.notification_task = None
|
|
self.connected = False
|
|
|
|
async def connect(self):
|
|
"""
|
|
Connect to relay and start notification listener.
|
|
|
|
CRITICAL: This MUST be called before making any requests to prevent race condition.
|
|
The notification listener subscribes to the deterministic topic RESPONSE:{client_id}
|
|
BEFORE any requests are sent, ensuring we never miss notifications.
|
|
"""
|
|
if self.connected:
|
|
return
|
|
|
|
# Start notification listener FIRST
|
|
self.notification_task = asyncio.create_task(self._notification_listener())
|
|
|
|
# Give the listener a moment to connect and subscribe
|
|
await asyncio.sleep(0.1)
|
|
|
|
self.connected = True
|
|
|
|
async def request_historical_ohlc(
|
|
self,
|
|
ticker: str,
|
|
period_seconds: int,
|
|
start_time: int,
|
|
end_time: int,
|
|
timeout: float = 30.0,
|
|
limit: Optional[int] = None
|
|
) -> dict:
|
|
"""
|
|
Request historical OHLC data and wait for completion notification.
|
|
|
|
IMPORTANT: Call connect() before using this method.
|
|
|
|
Args:
|
|
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
|
|
period_seconds: OHLC period in seconds
|
|
start_time: Start timestamp in microseconds
|
|
end_time: End timestamp in microseconds
|
|
timeout: Request timeout in seconds (default: 30)
|
|
limit: Optional limit on number of candles
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- request_id: The request ID
|
|
- status: 'OK', 'NOT_FOUND', or 'ERROR'
|
|
- error_message: Error message if status is 'ERROR'
|
|
- iceberg_namespace: Iceberg namespace (if status is 'OK')
|
|
- iceberg_table: Iceberg table name (if status is 'OK')
|
|
- row_count: Number of rows written (if status is 'OK')
|
|
|
|
Raises:
|
|
TimeoutError: If request times out
|
|
ConnectionError: If unable to connect to relay or not connected
|
|
"""
|
|
if not self.connected:
|
|
raise ConnectionError("Client not connected. Call connect() first to prevent race condition.")
|
|
|
|
request_id = str(uuid.uuid4())
|
|
|
|
# Register the pending request BEFORE sending to eliminate any race condition.
|
|
# The notification topic is deterministic (RESPONSE:{client_id}) and the listener
|
|
# is already subscribed, so we just need pending_requests populated before Flink
|
|
# could possibly publish the notification.
|
|
event = asyncio.Event()
|
|
self.pending_requests[request_id] = {
|
|
'event': event,
|
|
'result': None
|
|
}
|
|
|
|
try:
|
|
# Create protobuf request with client_id for notification routing
|
|
request = SubmitHistoricalRequest(
|
|
request_id=request_id,
|
|
ticker=ticker,
|
|
period_seconds=period_seconds,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
client_id=self.client_id # CRITICAL: Enables deterministic notification topic
|
|
)
|
|
|
|
if limit is not None:
|
|
request.limit = limit
|
|
|
|
# Encode with ZMQ envelope: version (1 byte) + message type (1 byte) + protobuf payload
|
|
MESSAGE_TYPE_SUBMIT_HISTORICAL = 0x10
|
|
version_frame = struct.pack('B', 0x01)
|
|
message_frame = struct.pack('B', MESSAGE_TYPE_SUBMIT_HISTORICAL) + request.SerializeToString()
|
|
|
|
# Send request to relay
|
|
socket = self.context.socket(zmq.REQ)
|
|
socket.connect(self.relay_endpoint)
|
|
|
|
try:
|
|
# Send two frames: version, then message
|
|
await socket.send(version_frame, zmq.SNDMORE)
|
|
await socket.send(message_frame)
|
|
|
|
# Wait for immediate response
|
|
response_frames = []
|
|
while True:
|
|
frame = await asyncio.wait_for(socket.recv(), timeout=5.0)
|
|
response_frames.append(frame)
|
|
if not socket.get(zmq.RCVMORE):
|
|
break
|
|
|
|
# Parse response (expect 2 frames: version, message)
|
|
if len(response_frames) < 2:
|
|
raise ConnectionError(f"Expected 2 frames, got {len(response_frames)}")
|
|
|
|
msg_type = response_frames[1][0]
|
|
response_payload = response_frames[1][1:]
|
|
|
|
response = SubmitResponse()
|
|
response.ParseFromString(response_payload)
|
|
|
|
if response.status != 0:
|
|
raise ConnectionError(f"Request failed: {response.error_message}")
|
|
|
|
finally:
|
|
socket.close()
|
|
|
|
# Wait for Flink notification with timeout
|
|
try:
|
|
await asyncio.wait_for(event.wait(), timeout=timeout)
|
|
result = self.pending_requests[request_id]['result']
|
|
return result
|
|
except asyncio.TimeoutError:
|
|
raise TimeoutError(f"Request {request_id} timed out after {timeout}s")
|
|
|
|
finally:
|
|
self.pending_requests.pop(request_id, None)
|
|
|
|
async def _notification_listener(self):
|
|
"""
|
|
Internal notification listener that subscribes to RESPONSE:{client_id} topic.
|
|
|
|
CRITICAL: This runs BEFORE any requests are submitted to prevent race condition.
|
|
The notification topic is deterministic based on our client_id.
|
|
"""
|
|
socket = self.context.socket(zmq.SUB)
|
|
socket.connect(self.notification_endpoint)
|
|
|
|
# Subscribe to our client-specific topic
|
|
# CRITICAL: This topic is deterministic (RESPONSE:{client_id}) and we know it
|
|
# before sending any requests, so we can subscribe first to prevent race condition
|
|
notification_topic = f"RESPONSE:{self.client_id}"
|
|
socket.setsockopt_string(zmq.SUBSCRIBE, notification_topic)
|
|
|
|
try:
|
|
while True:
|
|
# Receive multi-frame message: [topic][version][message]
|
|
frames = []
|
|
while True:
|
|
frame = await socket.recv()
|
|
frames.append(frame)
|
|
if not socket.get(zmq.RCVMORE):
|
|
break
|
|
|
|
# Parse frames
|
|
if len(frames) < 3:
|
|
continue
|
|
|
|
topic_frame = frames[0]
|
|
version_frame = frames[1]
|
|
message_frame = frames[2]
|
|
|
|
# Validate version
|
|
if len(version_frame) != 1 or version_frame[0] != 0x01:
|
|
continue
|
|
|
|
# Validate message type
|
|
if len(message_frame) < 1:
|
|
continue
|
|
|
|
msg_type = message_frame[0]
|
|
payload = message_frame[1:]
|
|
|
|
MESSAGE_TYPE_HISTORY_READY = 0x12
|
|
if msg_type != MESSAGE_TYPE_HISTORY_READY:
|
|
continue
|
|
|
|
# Parse notification (protobuf)
|
|
try:
|
|
notification = HistoryReadyNotification()
|
|
notification.ParseFromString(payload)
|
|
except Exception as e:
|
|
print(f"Warning: failed to parse notification payload: {e}")
|
|
continue
|
|
|
|
request_id = notification.request_id
|
|
|
|
# Check if we're waiting for this request
|
|
if request_id in self.pending_requests:
|
|
# Map protobuf enum to string status
|
|
# NotificationStatus: OK=0, NOT_FOUND=1, ERROR=2, TIMEOUT=3
|
|
status_map = {0: 'OK', 1: 'NOT_FOUND', 2: 'ERROR', 3: 'TIMEOUT'}
|
|
status = status_map.get(notification.status, 'ERROR')
|
|
|
|
result = {
|
|
'request_id': request_id,
|
|
'status': status,
|
|
'error_message': notification.error_message if notification.error_message else None
|
|
}
|
|
|
|
# Add Iceberg details if available
|
|
if status == 'OK':
|
|
result.update({
|
|
'iceberg_namespace': notification.iceberg_namespace,
|
|
'iceberg_table': notification.iceberg_table,
|
|
'row_count': notification.row_count,
|
|
'ticker': notification.ticker,
|
|
'period_seconds': notification.period_seconds,
|
|
'start_time': notification.start_time,
|
|
'end_time': notification.end_time,
|
|
})
|
|
|
|
self.pending_requests[request_id]['result'] = result
|
|
self.pending_requests[request_id]['event'].set()
|
|
|
|
except asyncio.CancelledError:
|
|
pass
|
|
finally:
|
|
socket.close()
|
|
|
|
async def close(self):
|
|
"""
|
|
Close the client and cleanup resources.
|
|
"""
|
|
if self.notification_task:
|
|
self.notification_task.cancel()
|
|
try:
|
|
await self.notification_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
self.context.term()
|
|
self.connected = False
|