Support custom column selection in OHLC queries and extend CCXT with configurable exchange-specific fields

- Add `columns` parameter to `get_ohlc_async` and pass through to Iceberg queries
- Replace hardcoded Binance field extraction with declarative `EXCHANGE_OHLCV_EXTENSIONS` config
- Add `applyScale` helper for field-specific transformations (ms_to_ns, price, size, int)
- Support `complementOf` spec for derived fields (e.g., sell_vol from total - buy_vol)
- Apply extensions dynamically in `convertToOHLC` and gap-filling logic
- Remove redundant column filtering in DataAPI (now handled upstream)
This commit is contained in:
2026-04-28 20:00:10 -04:00
parent 77e9ad7f68
commit b4e99744d8
3 changed files with 50 additions and 28 deletions

View File

@@ -166,13 +166,11 @@ class DataAPIImpl(DataAPI):
period_seconds=period_seconds,
start_time=start_nanos,
end_time=end_nanos,
request_timeout=self.request_timeout
columns=columns_to_fetch,
request_timeout=self.request_timeout,
)
# Select only requested columns (filter out metadata and unrequested fields)
if not df.empty:
available_cols = [col for col in columns_to_fetch if col in df.columns]
df = df[available_cols]
self._bars_fetched += len(df)
return df

View File

@@ -5,7 +5,7 @@ OHLCClient - High-level API for fetching OHLC data with smart caching
import asyncio
import pandas as pd
import logging
from typing import Optional
from typing import List, Optional
from .iceberg_client import IcebergClient
from .history_client import HistoryClient
from .symbol_metadata_client import SymbolMetadataClient
@@ -90,6 +90,7 @@ class OHLCClient:
period_seconds: int,
start_time: int,
end_time: int,
columns: Optional[List[str]] = None,
request_timeout: float = 120.0
) -> pd.DataFrame:
"""
@@ -123,7 +124,7 @@ class OHLCClient:
end_time = ((end_time + period_nanos - 1) // period_nanos) * period_nanos # exclusive
# Step 1: Check Iceberg for existing data (run in thread — scan.to_pandas() blocks ~3-5s)
df = await asyncio.to_thread(self.iceberg.query_ohlc, ticker, period_seconds, start_time, end_time)
df = await asyncio.to_thread(self.iceberg.query_ohlc, ticker, period_seconds, start_time, end_time, columns)
# Step 2: Identify missing ranges — pass df to avoid a redundant Iceberg scan
missing_ranges = self.iceberg.find_missing_ranges(
@@ -149,7 +150,7 @@ class OHLCClient:
raise ValueError(f"Historical data request failed: {result['error_message']}")
# Step 5: Query Iceberg again for complete dataset (run in thread)
df = await asyncio.to_thread(self.iceberg.query_ohlc, ticker, period_seconds, start_time, end_time)
df = await asyncio.to_thread(self.iceberg.query_ohlc, ticker, period_seconds, start_time, end_time, columns)
return self._apply_decimal_correction(ticker, df)