Files
ai/backend.old/src/datasource/adapters/ccxt_adapter.py
2026-03-11 18:47:11 -04:00

569 lines
20 KiB
Python

"""
CCXT DataSource adapter for accessing cryptocurrency exchange data.
This adapter provides access to hundreds of cryptocurrency exchanges through
the free CCXT library (not ccxt.pro), supporting both historical data and
polling-based subscriptions.
Numerical Precision:
- OHLCV data uses native floats for optimal DataFrame/analysis performance
- Account balances and order data should use Decimal (via _to_decimal method)
- CCXT returns numeric values as strings or floats depending on configuration
- Price data converted to float (_to_float), financial data to Decimal (_to_decimal)
Real-time Updates:
- Uses polling instead of WebSocket (free CCXT doesn't have WebSocket support)
- Default polling interval: 60 seconds (configurable)
- Simulates real-time subscriptions by periodically fetching latest bars
"""
import asyncio
import logging
import time
from datetime import datetime, timezone
from decimal import Decimal
from typing import Callable, Dict, List, Optional, Set, Union
import ccxt.async_support as ccxt
from ..base import DataSource
logger = logging.getLogger(__name__)
from ..schema import (
Bar,
ColumnInfo,
DatafeedConfig,
HistoryResult,
Resolution,
SearchResult,
SymbolInfo,
)
class CCXTDataSource(DataSource):
"""
DataSource adapter for CCXT cryptocurrency exchanges (free version).
Provides access to:
- Multiple cryptocurrency exchanges (Binance, Coinbase, Kraken, etc.)
- Historical OHLCV data via REST API
- Polling-based real-time updates (configurable interval)
- Symbol search and metadata
Args:
exchange_id: CCXT exchange identifier (e.g., 'binance', 'coinbase', 'kraken')
config: Optional exchange-specific configuration (API keys, options)
sandbox: Whether to use sandbox/testnet mode (default: False)
poll_interval: Interval in seconds for polling updates (default: 60)
"""
def __init__(
self,
exchange_id: str = "binance",
config: Optional[Dict] = None,
sandbox: bool = False,
poll_interval: int = 60,
):
self.exchange_id = exchange_id
self._config = config or {}
self._sandbox = sandbox
self._poll_interval = poll_interval
# Initialize exchange (using free async_support, not pro)
exchange_class = getattr(ccxt, exchange_id)
self.exchange = exchange_class(self._config)
# Configure CCXT to use Decimal mode for precise financial calculations
# This ensures all numeric values from the exchange use Decimal internally
# We then convert OHLCV to float for DataFrame performance, but keep
# Decimal precision for account balances, order sizes, etc.
from decimal import Decimal as PythonDecimal
self.exchange.number = PythonDecimal
# Log the precision mode being used by this exchange
precision_mode = getattr(self.exchange, 'precisionMode', 'UNKNOWN')
logger.info(
f"CCXT {exchange_id}: Configured with Decimal mode. "
f"Exchange precision mode: {precision_mode}"
)
if sandbox and hasattr(self.exchange, 'set_sandbox_mode'):
self.exchange.set_sandbox_mode(True)
# Cache for markets
self._markets: Optional[Dict] = None
self._markets_loaded = False
# Active subscriptions (polling-based)
self._subscriptions: Dict[str, asyncio.Task] = {}
self._subscription_callbacks: Dict[str, Callable] = {}
self._last_bars: Dict[str, int] = {} # Track last bar timestamp per subscription
@staticmethod
def _to_decimal(value: Union[str, int, float, Decimal, None]) -> Optional[Decimal]:
"""
Convert a value to Decimal for numerical precision.
Handles CCXT's mixed output (strings, floats, ints, None).
Converts floats by converting to string first to avoid precision loss.
"""
if value is None:
return None
if isinstance(value, Decimal):
return value
if isinstance(value, str):
return Decimal(value)
if isinstance(value, (int, float)):
# Convert to string first to avoid float precision issues
return Decimal(str(value))
return None
@staticmethod
def _to_float(value: Union[str, int, float, Decimal, None]) -> Optional[float]:
"""
Convert a value to float for OHLCV data.
OHLCV data is used for charting and DataFrame analysis, where native
floats provide better performance and compatibility with pandas/numpy.
For financial precision (balances, order sizes), use _to_decimal() instead.
When CCXT is in Decimal mode (exchange.number = Decimal), it returns
Decimal objects. This method converts them to float for performance.
Handles CCXT's output in both modes:
- Decimal mode: receives Decimal objects
- Default mode: receives strings, floats, or ints
"""
if value is None:
return None
if isinstance(value, float):
return value
if isinstance(value, Decimal):
# CCXT in Decimal mode - convert to float for OHLCV
return float(value)
if isinstance(value, (str, int)):
return float(value)
return None
async def _ensure_markets_loaded(self):
"""Ensure markets are loaded from exchange"""
if not self._markets_loaded:
self._markets = await self.exchange.load_markets()
self._markets_loaded = True
async def get_config(self) -> DatafeedConfig:
"""Get datafeed configuration"""
await self._ensure_markets_loaded()
# Determine supported resolutions based on exchange capabilities
supported_resolutions = [
Resolution.M1,
Resolution.M5,
Resolution.M15,
Resolution.M30,
Resolution.H1,
Resolution.H4,
Resolution.D1,
]
# Get unique exchange names (most CCXT exchanges are just one)
exchanges = [self.exchange_id.upper()]
return DatafeedConfig(
name=f"CCXT {self.exchange_id.title()}",
description=f"Live and historical cryptocurrency data from {self.exchange_id} via CCXT library. "
f"Supports OHLCV data for {len(self._markets) if self._markets else 'many'} trading pairs.",
supported_resolutions=supported_resolutions,
supports_search=True,
supports_time=True,
exchanges=exchanges,
symbols_types=["crypto", "spot", "futures", "swap"],
)
async def search_symbols(
self,
query: str,
type: Optional[str] = None,
exchange: Optional[str] = None,
limit: int = 30,
) -> List[SearchResult]:
"""Search for symbols on the exchange"""
await self._ensure_markets_loaded()
query_upper = query.upper()
results = []
for symbol, market in self._markets.items():
# Match query against symbol or base/quote currencies
if (query_upper in symbol or
query_upper in market.get('base', '') or
query_upper in market.get('quote', '')):
# Filter by type if specified
market_type = market.get('type', 'spot')
if type and market_type != type:
continue
# Create search result
base = market.get('base', '')
quote = market.get('quote', '')
results.append(
SearchResult(
symbol=f"{base}/{quote}", # Clean user-facing format
ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing
full_name=f"{base}/{quote} ({self.exchange_id.upper()})",
description=f"{base}/{quote} {market_type} trading pair on {self.exchange_id}",
exchange=self.exchange_id.upper(),
type=market_type,
)
)
if len(results) >= limit:
break
return results
async def resolve_symbol(self, symbol: str) -> SymbolInfo:
"""Get complete metadata for a symbol"""
await self._ensure_markets_loaded()
if symbol not in self._markets:
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
market = self._markets[symbol]
base = market.get('base', '')
quote = market.get('quote', '')
market_type = market.get('type', 'spot')
# Determine price scale from market precision
# CCXT precision can be in different modes:
# - DECIMAL_PLACES (int): number of decimal places (e.g., 2 = 0.01)
# - TICK_SIZE (float): actual tick size (e.g., 0.01, 0.00001)
# We need to convert to pricescale (10^n where n is decimal places)
price_precision = market.get('precision', {}).get('price', 2)
if isinstance(price_precision, float):
# TICK_SIZE mode: precision is the actual tick size (e.g., 0.01, 0.00001)
# Convert tick size to decimal places
# For 0.01 -> 2 decimal places, 0.00001 -> 5 decimal places
tick_str = str(Decimal(str(price_precision)))
if '.' in tick_str:
decimal_places = len(tick_str.split('.')[1].rstrip('0'))
else:
decimal_places = 0
pricescale = 10 ** decimal_places
else:
# DECIMAL_PLACES or SIGNIFICANT_DIGITS mode: precision is an integer
# Assume DECIMAL_PLACES mode (most common for price)
pricescale = 10 ** int(price_precision)
return SymbolInfo(
symbol=f"{base}/{quote}", # Clean user-facing format
ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing
name=f"{base}/{quote}",
description=f"{base}/{quote} {market_type} pair on {self.exchange_id}. "
f"Minimum order: {market.get('limits', {}).get('amount', {}).get('min', 'N/A')} {base}",
type=market_type,
exchange=self.exchange_id.upper(),
timezone="Etc/UTC",
session="24x7",
supported_resolutions=[
Resolution.M1,
Resolution.M5,
Resolution.M15,
Resolution.M30,
Resolution.H1,
Resolution.H4,
Resolution.D1,
],
has_intraday=True,
has_daily=True,
has_weekly_and_monthly=False,
columns=[
ColumnInfo(
name="open",
type="float",
description=f"Opening price in {quote}",
unit=quote,
),
ColumnInfo(
name="high",
type="float",
description=f"Highest price in {quote}",
unit=quote,
),
ColumnInfo(
name="low",
type="float",
description=f"Lowest price in {quote}",
unit=quote,
),
ColumnInfo(
name="close",
type="float",
description=f"Closing price in {quote}",
unit=quote,
),
ColumnInfo(
name="volume",
type="float",
description=f"Trading volume in {base}",
unit=base,
),
],
time_column="time",
has_ohlcv=True,
pricescale=pricescale,
minmov=1,
base_currency=base,
quote_currency=quote,
)
def _resolution_to_timeframe(self, resolution: str) -> str:
"""Convert our resolution format to CCXT timeframe format"""
# Map our resolutions to CCXT timeframes
mapping = {
"1": "1m",
"5": "5m",
"15": "15m",
"30": "30m",
"60": "1h",
"120": "2h",
"240": "4h",
"360": "6h",
"720": "12h",
"1D": "1d",
"1W": "1w",
"1M": "1M",
}
return mapping.get(resolution, "1m")
def _timeframe_to_milliseconds(self, timeframe: str) -> int:
"""Convert CCXT timeframe to milliseconds"""
unit = timeframe[-1]
amount = int(timeframe[:-1]) if len(timeframe) > 1 else 1
units = {
's': 1000,
'm': 60 * 1000,
'h': 60 * 60 * 1000,
'd': 24 * 60 * 60 * 1000,
'w': 7 * 24 * 60 * 60 * 1000,
'M': 30 * 24 * 60 * 60 * 1000, # Approximate
}
return amount * units.get(unit, 60000)
async def get_bars(
self,
symbol: str,
resolution: str,
from_time: int,
to_time: int,
countback: Optional[int] = None,
) -> HistoryResult:
"""Get historical bars from the exchange"""
logger.info(
f"CCXTDataSource({self.exchange_id}).get_bars: symbol={symbol}, resolution={resolution}, "
f"from_time={from_time}, to_time={to_time}, countback={countback}"
)
await self._ensure_markets_loaded()
if symbol not in self._markets:
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
timeframe = self._resolution_to_timeframe(resolution)
# CCXT uses milliseconds for timestamps
since = from_time * 1000
until = to_time * 1000
# Fetch OHLCV data
limit = countback if countback else 1000
try:
# Fetch in batches if needed
all_ohlcv = []
current_since = since
while current_since < until:
ohlcv = await self.exchange.fetch_ohlcv(
symbol,
timeframe=timeframe,
since=current_since,
limit=limit,
)
if not ohlcv:
break
all_ohlcv.extend(ohlcv)
# Update since for next batch
last_timestamp = ohlcv[-1][0]
if last_timestamp <= current_since:
break # No progress, avoid infinite loop
current_since = last_timestamp + 1
# Stop if we have enough bars
if countback and len(all_ohlcv) >= countback:
all_ohlcv = all_ohlcv[:countback]
break
# Convert to our Bar format with float for OHLCV (used in DataFrames)
bars = []
for candle in all_ohlcv:
timestamp_ms, open_price, high, low, close, volume = candle
timestamp = timestamp_ms // 1000 # Convert to seconds
# Only include bars within requested range
if timestamp < from_time or timestamp >= to_time:
continue
bars.append(
Bar(
time=timestamp,
data={
"open": self._to_float(open_price),
"high": self._to_float(high),
"low": self._to_float(low),
"close": self._to_float(close),
"volume": self._to_float(volume),
},
)
)
# Get symbol info for column metadata
symbol_info = await self.resolve_symbol(symbol)
logger.info(
f"CCXTDataSource({self.exchange_id}).get_bars: Returning {len(bars)} bars. "
f"First: {bars[0].time if bars else 'N/A'}, Last: {bars[-1].time if bars else 'N/A'}"
)
# Determine if more data is available
next_time = None
if bars and countback and len(bars) >= countback:
next_time = bars[-1].time + (bars[-1].time - bars[-2].time if len(bars) > 1 else 60)
return HistoryResult(
symbol=symbol,
resolution=resolution,
bars=bars,
columns=symbol_info.columns,
nextTime=next_time,
)
except Exception as e:
raise ValueError(f"Failed to fetch bars for {symbol}: {str(e)}")
async def subscribe_bars(
self,
symbol: str,
resolution: str,
on_tick: Callable[[dict], None],
) -> str:
"""
Subscribe to bar updates via polling.
Note: Uses polling instead of WebSocket since we're using free CCXT.
Polls at the configured interval (default: 60 seconds).
"""
await self._ensure_markets_loaded()
if symbol not in self._markets:
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
subscription_id = f"{symbol}:{resolution}:{time.time()}"
# Store callback
self._subscription_callbacks[subscription_id] = on_tick
# Start polling task
timeframe = self._resolution_to_timeframe(resolution)
task = asyncio.create_task(
self._poll_ohlcv(symbol, timeframe, subscription_id)
)
self._subscriptions[subscription_id] = task
return subscription_id
async def _poll_ohlcv(self, symbol: str, timeframe: str, subscription_id: str):
"""
Poll for OHLCV updates at regular intervals.
This simulates real-time updates by fetching the latest bars periodically.
Only sends updates when new bars are detected.
"""
try:
while subscription_id in self._subscription_callbacks:
try:
# Fetch latest bars
ohlcv = await self.exchange.fetch_ohlcv(
symbol,
timeframe=timeframe,
limit=2, # Get last 2 bars to detect new ones
)
if ohlcv and len(ohlcv) > 0:
# Get the latest candle
latest = ohlcv[-1]
timestamp_ms, open_price, high, low, close, volume = latest
timestamp = timestamp_ms // 1000
# Only send update if this is a new bar
last_timestamp = self._last_bars.get(subscription_id, 0)
if timestamp > last_timestamp:
self._last_bars[subscription_id] = timestamp
# Convert to our format with float for OHLCV (used in DataFrames)
tick_data = {
"time": timestamp,
"open": self._to_float(open_price),
"high": self._to_float(high),
"low": self._to_float(low),
"close": self._to_float(close),
"volume": self._to_float(volume),
}
# Call the callback
callback = self._subscription_callbacks.get(subscription_id)
if callback:
callback(tick_data)
except Exception as e:
print(f"Error polling OHLCV for {symbol}: {e}")
# Wait for next poll interval
await asyncio.sleep(self._poll_interval)
except asyncio.CancelledError:
pass
async def unsubscribe_bars(self, subscription_id: str) -> None:
"""Unsubscribe from polling updates"""
# Remove callback and tracking
self._subscription_callbacks.pop(subscription_id, None)
self._last_bars.pop(subscription_id, None)
# Cancel polling task
task = self._subscriptions.pop(subscription_id, None)
if task:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
async def close(self):
"""Close exchange connection and cleanup"""
# Cancel all subscriptions
for subscription_id in list(self._subscriptions.keys()):
await self.unsubscribe_bars(subscription_id)
# Close exchange
if hasattr(self.exchange, 'close'):
await self.exchange.close()