199 lines
6.7 KiB
Python
199 lines
6.7 KiB
Python
"""
|
|
OHLCClient - High-level API for fetching OHLC data with smart caching
|
|
"""
|
|
|
|
import asyncio
|
|
import pandas as pd
|
|
import logging
|
|
from typing import Optional
|
|
from .iceberg_client import IcebergClient
|
|
from .history_client import HistoryClient
|
|
from .symbol_metadata_client import SymbolMetadataClient
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class OHLCClient:
|
|
"""
|
|
High-level client for fetching OHLC data.
|
|
|
|
Workflow:
|
|
1. Check Iceberg for existing data
|
|
2. Identify missing ranges
|
|
3. Request missing data via relay
|
|
4. Wait for notification
|
|
5. Query Iceberg for complete dataset
|
|
6. Return combined results
|
|
|
|
This provides transparent caching - clients don't need to know
|
|
whether data came from cache or was fetched on-demand.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
iceberg_catalog_uri: str,
|
|
relay_endpoint: str,
|
|
notification_endpoint: str,
|
|
namespace: str = "trading",
|
|
s3_endpoint: str = None,
|
|
s3_access_key: str = None,
|
|
s3_secret_key: str = None,
|
|
s3_region: str = None,
|
|
):
|
|
"""
|
|
Initialize OHLC client.
|
|
|
|
Args:
|
|
iceberg_catalog_uri: URI of Iceberg catalog
|
|
relay_endpoint: ZMQ endpoint for relay requests
|
|
notification_endpoint: ZMQ endpoint for notifications
|
|
namespace: Iceberg namespace (default: "trading")
|
|
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
|
|
s3_access_key: S3/MinIO access key
|
|
s3_secret_key: S3/MinIO secret key
|
|
s3_region: S3/MinIO region (e.g., "us-east-1")
|
|
"""
|
|
self.iceberg = IcebergClient(
|
|
iceberg_catalog_uri, namespace,
|
|
s3_endpoint=s3_endpoint,
|
|
s3_access_key=s3_access_key,
|
|
s3_secret_key=s3_secret_key,
|
|
s3_region=s3_region,
|
|
)
|
|
self.symbol_metadata = SymbolMetadataClient(
|
|
iceberg_catalog_uri, namespace,
|
|
s3_endpoint=s3_endpoint,
|
|
s3_access_key=s3_access_key,
|
|
s3_secret_key=s3_secret_key,
|
|
)
|
|
self.history = HistoryClient(relay_endpoint, notification_endpoint)
|
|
log.info("OHLCClient initialized")
|
|
|
|
async def start(self):
|
|
"""
|
|
Start the client. Must be called before making requests.
|
|
Starts background notification listener.
|
|
"""
|
|
await self.history.connect()
|
|
|
|
async def stop(self):
|
|
"""
|
|
Stop the client and cleanup resources.
|
|
"""
|
|
await self.history.close()
|
|
|
|
async def fetch_ohlc(
|
|
self,
|
|
ticker: str,
|
|
period_seconds: int,
|
|
start_time: int,
|
|
end_time: int,
|
|
request_timeout: float = 120.0
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Fetch OHLC data with smart caching.
|
|
|
|
Steps:
|
|
1. Query Iceberg for existing data
|
|
2. If complete, return immediately
|
|
3. If missing data, request via relay
|
|
4. Wait for completion notification
|
|
5. Query Iceberg again for complete dataset
|
|
6. Return results
|
|
|
|
Args:
|
|
ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
|
|
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
|
|
start_time: Start timestamp in nanoseconds
|
|
end_time: End timestamp in nanoseconds
|
|
request_timeout: Timeout for historical data requests (default: 120s)
|
|
|
|
Returns:
|
|
DataFrame with OHLC data sorted by timestamp
|
|
|
|
Raises:
|
|
TimeoutError: If historical data request times out
|
|
ValueError: If request fails
|
|
"""
|
|
# Align times to period boundaries: [ceil(start), ceil(end)) exclusive
|
|
period_nanos = period_seconds * 1_000_000_000
|
|
start_time = ((start_time + period_nanos - 1) // period_nanos) * period_nanos
|
|
end_time = ((end_time + period_nanos - 1) // period_nanos) * period_nanos # exclusive
|
|
|
|
# Step 1: Check Iceberg for existing data
|
|
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
|
|
|
|
# Step 2: Identify missing ranges
|
|
missing_ranges = self.iceberg.find_missing_ranges(
|
|
ticker, period_seconds, start_time, end_time
|
|
)
|
|
|
|
if not missing_ranges:
|
|
# All data exists in Iceberg
|
|
return self._apply_decimal_correction(ticker, df)
|
|
|
|
# Step 3: Request missing data for each range
|
|
# For simplicity, request entire range (relay can merge adjacent requests)
|
|
result = await self.history.request_historical_ohlc(
|
|
ticker=ticker,
|
|
period_seconds=period_seconds,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
timeout=request_timeout
|
|
)
|
|
|
|
# Step 4: Check result status
|
|
if result['status'] == 'ERROR':
|
|
raise ValueError(f"Historical data request failed: {result['error_message']}")
|
|
|
|
# Step 5: Query Iceberg again for complete dataset
|
|
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
|
|
|
|
return self._apply_decimal_correction(ticker, df)
|
|
|
|
def _apply_decimal_correction(self, ticker: str, df: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
Convert raw integer OHLC columns to float prices/volumes.
|
|
|
|
Iceberg stores prices and volumes as integers (Nautilus internal units).
|
|
Divide by 10^price_precision for OHLC columns and 10^size_precision for
|
|
volume columns to recover actual floating-point values.
|
|
"""
|
|
if df.empty:
|
|
return df
|
|
|
|
meta = self.symbol_metadata.get_metadata(ticker)
|
|
price_precision = meta.price_precision
|
|
size_precision = meta.size_precision
|
|
|
|
if price_precision is not None and price_precision > 0:
|
|
price_divisor = 10 ** price_precision
|
|
for col in ("open", "high", "low", "close"):
|
|
if col in df.columns:
|
|
df[col] = df[col] / price_divisor
|
|
|
|
if size_precision is not None and size_precision > 0:
|
|
size_divisor = 10 ** size_precision
|
|
for col in ("volume", "buy_vol", "sell_vol"):
|
|
if col in df.columns:
|
|
df[col] = df[col] / size_divisor
|
|
|
|
if price_precision is not None and price_precision > 0:
|
|
price_divisor = 10 ** price_precision
|
|
for col in ("quote_volume",):
|
|
if col in df.columns:
|
|
df[col] = df[col] / price_divisor
|
|
|
|
return df
|
|
|
|
async def __aenter__(self):
|
|
"""Support async context manager."""
|
|
await self.start()
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
"""Support async context manager."""
|
|
await self.stop()
|