ai/client-py/dexorder/history_client.py

"""
HistoryClient - Submit historical data requests via relay and wait for notifications

RACE CONDITION PREVENTION:
The client must subscribe to notification topics BEFORE submitting requests.
Notification topics are deterministic: RESPONSE:{client_id} or HISTORY_READY:{request_id}
Since both are client-generated, we can subscribe before sending the request.
"""

import asyncio
import uuid
import zmq
import zmq.asyncio
from typing import Optional
import struct
import sys
import os

# Import protobuf messages (assuming they're generated in ../protobuf)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../protobuf'))
try:
    from ingestor_pb2 import SubmitHistoricalRequest, SubmitResponse, HistoryReadyNotification
except ImportError:
    print("Warning: Protobuf files not found. Run: protoc -I ../protobuf --python_out=../protobuf ../protobuf/*.proto")
    raise


class HistoryClient:
    """
    Client for submitting historical data requests via relay.

    IMPORTANT: Call connect() before making any requests. This ensures the notification
    listener is running and subscribed BEFORE any requests are submitted, preventing
    the race condition where notifications arrive before subscription.

    Provides:
    - Submit historical OHLC data requests
    - Wait for completion notifications
    - Handle request timeouts and errors
    """

    def __init__(self, relay_endpoint: str, notification_endpoint: str, client_id: Optional[str] = None):
        """
        Initialize history client.

        Args:
            relay_endpoint: ZMQ endpoint for relay client requests (e.g., "tcp://relay:5559")
            notification_endpoint: ZMQ endpoint for notifications (e.g., "tcp://relay:5558")
            client_id: Optional client ID for notification routing. If not provided, generates one.
                      All notifications for this client will be sent to topic RESPONSE:{client_id}
        """
        self.relay_endpoint = relay_endpoint
        self.notification_endpoint = notification_endpoint
        self.client_id = client_id or f"client-{uuid.uuid4().hex[:8]}"
        self.context = zmq.asyncio.Context()
        self.pending_requests = {}  # request_id -> asyncio.Event
        self.notification_task = None
        self.connected = False

    async def connect(self):
        """
        Connect to relay and start notification listener.

        CRITICAL: This MUST be called before making any requests to prevent race condition.
        The notification listener subscribes to the deterministic topic RESPONSE:{client_id}
        BEFORE any requests are sent, ensuring we never miss notifications.
        """
        if self.connected:
            return

        # Start notification listener FIRST
        self.notification_task = asyncio.create_task(self._notification_listener())

        # Give the listener a moment to connect and subscribe
        await asyncio.sleep(0.1)

        self.connected = True

    async def request_historical_ohlc(
        self,
        ticker: str,
        period_seconds: int,
        start_time: int,
        end_time: int,
        timeout: float = 30.0,
        limit: Optional[int] = None
    ) -> dict:
        """
        Request historical OHLC data and wait for completion notification.

        IMPORTANT: Call connect() before using this method.

        Args:
            ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
            period_seconds: OHLC period in seconds
            start_time: Start timestamp in microseconds
            end_time: End timestamp in microseconds
            timeout: Request timeout in seconds (default: 30)
            limit: Optional limit on number of candles

        Returns:
            dict with keys:
                - request_id: The request ID
                - status: 'OK', 'NOT_FOUND', or 'ERROR'
                - error_message: Error message if status is 'ERROR'
                - iceberg_namespace: Iceberg namespace (if status is 'OK')
                - iceberg_table: Iceberg table name (if status is 'OK')
                - row_count: Number of rows written (if status is 'OK')

        Raises:
            TimeoutError: If request times out
            ConnectionError: If unable to connect to relay or not connected
        """
        if not self.connected:
            raise ConnectionError("Client not connected. Call connect() first to prevent race condition.")

        request_id = str(uuid.uuid4())

        # Register the pending request BEFORE sending to eliminate any race condition.
        # The notification topic is deterministic (RESPONSE:{client_id}) and the listener
        # is already subscribed, so we just need pending_requests populated before Flink
        # could possibly publish the notification.
        event = asyncio.Event()
        self.pending_requests[request_id] = {
            'event': event,
            'result': None
        }

        try:
            # Create protobuf request with client_id for notification routing
            request = SubmitHistoricalRequest(
                request_id=request_id,
                ticker=ticker,
                period_seconds=period_seconds,
                start_time=start_time,
                end_time=end_time,
                client_id=self.client_id  # CRITICAL: Enables deterministic notification topic
            )

            if limit is not None:
                request.limit = limit

            # Encode with ZMQ envelope: version (1 byte) + message type (1 byte) + protobuf payload
            MESSAGE_TYPE_SUBMIT_HISTORICAL = 0x10
            version_frame = struct.pack('B', 0x01)
            message_frame = struct.pack('B', MESSAGE_TYPE_SUBMIT_HISTORICAL) + request.SerializeToString()

            # Send request to relay
            socket = self.context.socket(zmq.REQ)
            socket.connect(self.relay_endpoint)

            try:
                # Send two frames: version, then message
                await socket.send(version_frame, zmq.SNDMORE)
                await socket.send(message_frame)

                # Wait for immediate response
                response_frames = []
                while True:
                    frame = await asyncio.wait_for(socket.recv(), timeout=5.0)
                    response_frames.append(frame)
                    if not socket.get(zmq.RCVMORE):
                        break

                # Parse response (expect 2 frames: version, message)
                if len(response_frames) < 2:
                    raise ConnectionError(f"Expected 2 frames, got {len(response_frames)}")

                msg_type = response_frames[1][0]
                response_payload = response_frames[1][1:]

                response = SubmitResponse()
                response.ParseFromString(response_payload)

                if response.status != 0:
                    raise ConnectionError(f"Request failed: {response.error_message}")

            finally:
                socket.close()

            # Wait for Flink notification with timeout
            try:
                await asyncio.wait_for(event.wait(), timeout=timeout)
                result = self.pending_requests[request_id]['result']
                return result
            except asyncio.TimeoutError:
                raise TimeoutError(f"Request {request_id} timed out after {timeout}s")

        finally:
            self.pending_requests.pop(request_id, None)

    async def _notification_listener(self):
        """
        Internal notification listener that subscribes to RESPONSE:{client_id} topic.

        CRITICAL: This runs BEFORE any requests are submitted to prevent race condition.
        The notification topic is deterministic based on our client_id.
        """
        socket = self.context.socket(zmq.SUB)
        socket.connect(self.notification_endpoint)

        # Subscribe to our client-specific topic
        # CRITICAL: This topic is deterministic (RESPONSE:{client_id}) and we know it
        # before sending any requests, so we can subscribe first to prevent race condition
        notification_topic = f"RESPONSE:{self.client_id}"
        socket.setsockopt_string(zmq.SUBSCRIBE, notification_topic)

        try:
            while True:
                # Receive multi-frame message: [topic][version][message]
                frames = []
                while True:
                    frame = await socket.recv()
                    frames.append(frame)
                    if not socket.get(zmq.RCVMORE):
                        break

                # Parse frames
                if len(frames) < 3:
                    continue

                topic_frame = frames[0]
                version_frame = frames[1]
                message_frame = frames[2]

                # Validate version
                if len(version_frame) != 1 or version_frame[0] != 0x01:
                    continue

                # Validate message type
                if len(message_frame) < 1:
                    continue

                msg_type = message_frame[0]
                payload = message_frame[1:]

                MESSAGE_TYPE_HISTORY_READY = 0x12
                if msg_type != MESSAGE_TYPE_HISTORY_READY:
                    continue

                # Parse notification (protobuf)
                try:
                    notification = HistoryReadyNotification()
                    notification.ParseFromString(payload)
                except Exception as e:
                    print(f"Warning: failed to parse notification payload: {e}")
                    continue

                request_id = notification.request_id

                # Check if we're waiting for this request
                if request_id in self.pending_requests:
                    # Map protobuf enum to string status
                    # NotificationStatus: OK=0, NOT_FOUND=1, ERROR=2, TIMEOUT=3
                    status_map = {0: 'OK', 1: 'NOT_FOUND', 2: 'ERROR', 3: 'TIMEOUT'}
                    status = status_map.get(notification.status, 'ERROR')

                    result = {
                        'request_id': request_id,
                        'status': status,
                        'error_message': notification.error_message if notification.error_message else None
                    }

                    # Add Iceberg details if available
                    if status == 'OK':
                        result.update({
                            'iceberg_namespace': notification.iceberg_namespace,
                            'iceberg_table': notification.iceberg_table,
                            'row_count': notification.row_count,
                            'ticker': notification.ticker,
                            'period_seconds': notification.period_seconds,
                            'start_time': notification.start_time,
                            'end_time': notification.end_time,
                        })

                    self.pending_requests[request_id]['result'] = result
                    self.pending_requests[request_id]['event'].set()

        except asyncio.CancelledError:
            pass
        finally:
            socket.close()

    async def close(self):
        """
        Close the client and cleanup resources.
        """
        if self.notification_task:
            self.notification_task.cancel()
            try:
                await self.notification_task
            except asyncio.CancelledError:
                pass

        self.context.term()
        self.connected = False