ai/sandbox/dexorder/ohlc_client.py

"""
OHLCClient - High-level API for fetching OHLC data with smart caching
"""

import asyncio
import pandas as pd
import logging
from typing import Optional
from .iceberg_client import IcebergClient
from .history_client import HistoryClient
from .symbol_metadata_client import SymbolMetadataClient

log = logging.getLogger(__name__)

log = logging.getLogger(__name__)


class OHLCClient:
    """
    High-level client for fetching OHLC data.

    Workflow:
    1. Check Iceberg for existing data
    2. Identify missing ranges
    3. Request missing data via relay
    4. Wait for notification
    5. Query Iceberg for complete dataset
    6. Return combined results

    This provides transparent caching - clients don't need to know
    whether data came from cache or was fetched on-demand.
    """

    def __init__(
        self,
        iceberg_catalog_uri: str,
        relay_endpoint: str,
        notification_endpoint: str,
        namespace: str = "trading",
        s3_endpoint: str = None,
        s3_access_key: str = None,
        s3_secret_key: str = None,
        s3_region: str = None,
    ):
        """
        Initialize OHLC client.

        Args:
            iceberg_catalog_uri: URI of Iceberg catalog
            relay_endpoint: ZMQ endpoint for relay requests
            notification_endpoint: ZMQ endpoint for notifications
            namespace: Iceberg namespace (default: "trading")
            s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
            s3_access_key: S3/MinIO access key
            s3_secret_key: S3/MinIO secret key
            s3_region: S3/MinIO region (e.g., "us-east-1")
        """
        self.iceberg = IcebergClient(
            iceberg_catalog_uri, namespace,
            s3_endpoint=s3_endpoint,
            s3_access_key=s3_access_key,
            s3_secret_key=s3_secret_key,
            s3_region=s3_region,
        )
        self.symbol_metadata = SymbolMetadataClient(
            iceberg_catalog_uri, namespace,
            s3_endpoint=s3_endpoint,
            s3_access_key=s3_access_key,
            s3_secret_key=s3_secret_key,
        )
        self.history = HistoryClient(relay_endpoint, notification_endpoint)
        log.info("OHLCClient initialized")

    async def start(self):
        """
        Start the client. Must be called before making requests.
        Starts background notification listener.
        """
        await self.history.connect()

    async def stop(self):
        """
        Stop the client and cleanup resources.
        """
        await self.history.close()

    async def fetch_ohlc(
        self,
        ticker: str,
        period_seconds: int,
        start_time: int,
        end_time: int,
        request_timeout: float = 120.0
    ) -> pd.DataFrame:
        """
        Fetch OHLC data with smart caching.

        Steps:
        1. Query Iceberg for existing data
        2. If complete, return immediately
        3. If missing data, request via relay
        4. Wait for completion notification
        5. Query Iceberg again for complete dataset
        6. Return results

        Args:
            ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
            period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
            start_time: Start timestamp in nanoseconds
            end_time: End timestamp in nanoseconds
            request_timeout: Timeout for historical data requests (default: 120s)

        Returns:
            DataFrame with OHLC data sorted by timestamp

        Raises:
            TimeoutError: If historical data request times out
            ValueError: If request fails
        """
        # Align times to period boundaries: [ceil(start), ceil(end)) exclusive
        period_nanos = period_seconds * 1_000_000_000
        start_time = ((start_time + period_nanos - 1) // period_nanos) * period_nanos
        end_time   = ((end_time   + period_nanos - 1) // period_nanos) * period_nanos  # exclusive

        # Step 1: Check Iceberg for existing data
        df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)

        # Step 2: Identify missing ranges
        missing_ranges = self.iceberg.find_missing_ranges(
            ticker, period_seconds, start_time, end_time
        )

        if not missing_ranges:
            # All data exists in Iceberg
            return self._apply_decimal_correction(ticker, df)

        # Step 3: Request missing data for each range
        # For simplicity, request entire range (relay can merge adjacent requests)
        result = await self.history.request_historical_ohlc(
            ticker=ticker,
            period_seconds=period_seconds,
            start_time=start_time,
            end_time=end_time,
            timeout=request_timeout
        )

        # Step 4: Check result status
        if result['status'] == 'ERROR':
            raise ValueError(f"Historical data request failed: {result['error_message']}")

        # Step 5: Query Iceberg again for complete dataset
        df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)

        return self._apply_decimal_correction(ticker, df)

    def _apply_decimal_correction(self, ticker: str, df: pd.DataFrame) -> pd.DataFrame:
        """
        Convert raw integer OHLC columns to float prices/volumes.

        Iceberg stores prices and volumes as integers (Nautilus internal units).
        Divide by 10^price_precision for OHLC columns and 10^size_precision for
        volume columns to recover actual floating-point values.
        """
        if df.empty:
            return df

        meta = self.symbol_metadata.get_metadata(ticker)
        price_precision = meta.price_precision
        size_precision = meta.size_precision

        if price_precision is not None and price_precision > 0:
            price_divisor = 10 ** price_precision
            for col in ("open", "high", "low", "close"):
                if col in df.columns:
                    df[col] = df[col] / price_divisor

        if size_precision is not None and size_precision > 0:
            size_divisor = 10 ** size_precision
            for col in ("volume", "buy_vol", "sell_vol"):
                if col in df.columns:
                    df[col] = df[col] / size_divisor

        if price_precision is not None and price_precision > 0:
            price_divisor = 10 ** price_precision
            for col in ("quote_volume",):
                if col in df.columns:
                    df[col] = df[col] / price_divisor

        return df

    async def __aenter__(self):
        """Support async context manager."""
        await self.start()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Support async context manager."""
        await self.stop()