""" CCXT DataSource adapter for accessing cryptocurrency exchange data. This adapter provides access to hundreds of cryptocurrency exchanges through the free CCXT library (not ccxt.pro), supporting both historical data and polling-based subscriptions. Numerical Precision: - OHLCV data uses native floats for optimal DataFrame/analysis performance - Account balances and order data should use Decimal (via _to_decimal method) - CCXT returns numeric values as strings or floats depending on configuration - Price data converted to float (_to_float), financial data to Decimal (_to_decimal) Real-time Updates: - Uses polling instead of WebSocket (free CCXT doesn't have WebSocket support) - Default polling interval: 60 seconds (configurable) - Simulates real-time subscriptions by periodically fetching latest bars """ import asyncio import logging import time from datetime import datetime, timezone from decimal import Decimal from typing import Callable, Dict, List, Optional, Set, Union import ccxt.async_support as ccxt from ..base import DataSource logger = logging.getLogger(__name__) from ..schema import ( Bar, ColumnInfo, DatafeedConfig, HistoryResult, Resolution, SearchResult, SymbolInfo, ) class CCXTDataSource(DataSource): """ DataSource adapter for CCXT cryptocurrency exchanges (free version). Provides access to: - Multiple cryptocurrency exchanges (Binance, Coinbase, Kraken, etc.) - Historical OHLCV data via REST API - Polling-based real-time updates (configurable interval) - Symbol search and metadata Args: exchange_id: CCXT exchange identifier (e.g., 'binance', 'coinbase', 'kraken') config: Optional exchange-specific configuration (API keys, options) sandbox: Whether to use sandbox/testnet mode (default: False) poll_interval: Interval in seconds for polling updates (default: 60) """ def __init__( self, exchange_id: str = "binance", config: Optional[Dict] = None, sandbox: bool = False, poll_interval: int = 60, ): self.exchange_id = exchange_id self._config = config or {} self._sandbox = sandbox self._poll_interval = poll_interval # Initialize exchange (using free async_support, not pro) exchange_class = getattr(ccxt, exchange_id) self.exchange = exchange_class(self._config) # Configure CCXT to use Decimal mode for precise financial calculations # This ensures all numeric values from the exchange use Decimal internally # We then convert OHLCV to float for DataFrame performance, but keep # Decimal precision for account balances, order sizes, etc. from decimal import Decimal as PythonDecimal self.exchange.number = PythonDecimal # Log the precision mode being used by this exchange precision_mode = getattr(self.exchange, 'precisionMode', 'UNKNOWN') logger.info( f"CCXT {exchange_id}: Configured with Decimal mode. " f"Exchange precision mode: {precision_mode}" ) if sandbox and hasattr(self.exchange, 'set_sandbox_mode'): self.exchange.set_sandbox_mode(True) # Cache for markets self._markets: Optional[Dict] = None self._markets_loaded = False # Active subscriptions (polling-based) self._subscriptions: Dict[str, asyncio.Task] = {} self._subscription_callbacks: Dict[str, Callable] = {} self._last_bars: Dict[str, int] = {} # Track last bar timestamp per subscription @staticmethod def _to_decimal(value: Union[str, int, float, Decimal, None]) -> Optional[Decimal]: """ Convert a value to Decimal for numerical precision. Handles CCXT's mixed output (strings, floats, ints, None). Converts floats by converting to string first to avoid precision loss. """ if value is None: return None if isinstance(value, Decimal): return value if isinstance(value, str): return Decimal(value) if isinstance(value, (int, float)): # Convert to string first to avoid float precision issues return Decimal(str(value)) return None @staticmethod def _to_float(value: Union[str, int, float, Decimal, None]) -> Optional[float]: """ Convert a value to float for OHLCV data. OHLCV data is used for charting and DataFrame analysis, where native floats provide better performance and compatibility with pandas/numpy. For financial precision (balances, order sizes), use _to_decimal() instead. When CCXT is in Decimal mode (exchange.number = Decimal), it returns Decimal objects. This method converts them to float for performance. Handles CCXT's output in both modes: - Decimal mode: receives Decimal objects - Default mode: receives strings, floats, or ints """ if value is None: return None if isinstance(value, float): return value if isinstance(value, Decimal): # CCXT in Decimal mode - convert to float for OHLCV return float(value) if isinstance(value, (str, int)): return float(value) return None async def _ensure_markets_loaded(self): """Ensure markets are loaded from exchange""" if not self._markets_loaded: self._markets = await self.exchange.load_markets() self._markets_loaded = True async def get_config(self) -> DatafeedConfig: """Get datafeed configuration""" await self._ensure_markets_loaded() # Determine supported resolutions based on exchange capabilities supported_resolutions = [ Resolution.M1, Resolution.M5, Resolution.M15, Resolution.M30, Resolution.H1, Resolution.H4, Resolution.D1, ] # Get unique exchange names (most CCXT exchanges are just one) exchanges = [self.exchange_id.upper()] return DatafeedConfig( name=f"CCXT {self.exchange_id.title()}", description=f"Live and historical cryptocurrency data from {self.exchange_id} via CCXT library. " f"Supports OHLCV data for {len(self._markets) if self._markets else 'many'} trading pairs.", supported_resolutions=supported_resolutions, supports_search=True, supports_time=True, exchanges=exchanges, symbols_types=["crypto", "spot", "futures", "swap"], ) async def search_symbols( self, query: str, type: Optional[str] = None, exchange: Optional[str] = None, limit: int = 30, ) -> List[SearchResult]: """Search for symbols on the exchange""" await self._ensure_markets_loaded() query_upper = query.upper() results = [] for symbol, market in self._markets.items(): # Match query against symbol or base/quote currencies if (query_upper in symbol or query_upper in market.get('base', '') or query_upper in market.get('quote', '')): # Filter by type if specified market_type = market.get('type', 'spot') if type and market_type != type: continue # Create search result base = market.get('base', '') quote = market.get('quote', '') results.append( SearchResult( symbol=f"{base}/{quote}", # Clean user-facing format ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing full_name=f"{base}/{quote} ({self.exchange_id.upper()})", description=f"{base}/{quote} {market_type} trading pair on {self.exchange_id}", exchange=self.exchange_id.upper(), type=market_type, ) ) if len(results) >= limit: break return results async def resolve_symbol(self, symbol: str) -> SymbolInfo: """Get complete metadata for a symbol""" await self._ensure_markets_loaded() if symbol not in self._markets: raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}") market = self._markets[symbol] base = market.get('base', '') quote = market.get('quote', '') market_type = market.get('type', 'spot') # Determine price scale from market precision # CCXT precision can be in different modes: # - DECIMAL_PLACES (int): number of decimal places (e.g., 2 = 0.01) # - TICK_SIZE (float): actual tick size (e.g., 0.01, 0.00001) # We need to convert to pricescale (10^n where n is decimal places) price_precision = market.get('precision', {}).get('price', 2) if isinstance(price_precision, float): # TICK_SIZE mode: precision is the actual tick size (e.g., 0.01, 0.00001) # Convert tick size to decimal places # For 0.01 -> 2 decimal places, 0.00001 -> 5 decimal places tick_str = str(Decimal(str(price_precision))) if '.' in tick_str: decimal_places = len(tick_str.split('.')[1].rstrip('0')) else: decimal_places = 0 pricescale = 10 ** decimal_places else: # DECIMAL_PLACES or SIGNIFICANT_DIGITS mode: precision is an integer # Assume DECIMAL_PLACES mode (most common for price) pricescale = 10 ** int(price_precision) return SymbolInfo( symbol=f"{base}/{quote}", # Clean user-facing format ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing name=f"{base}/{quote}", description=f"{base}/{quote} {market_type} pair on {self.exchange_id}. " f"Minimum order: {market.get('limits', {}).get('amount', {}).get('min', 'N/A')} {base}", type=market_type, exchange=self.exchange_id.upper(), timezone="Etc/UTC", session="24x7", supported_resolutions=[ Resolution.M1, Resolution.M5, Resolution.M15, Resolution.M30, Resolution.H1, Resolution.H4, Resolution.D1, ], has_intraday=True, has_daily=True, has_weekly_and_monthly=False, columns=[ ColumnInfo( name="open", type="float", description=f"Opening price in {quote}", unit=quote, ), ColumnInfo( name="high", type="float", description=f"Highest price in {quote}", unit=quote, ), ColumnInfo( name="low", type="float", description=f"Lowest price in {quote}", unit=quote, ), ColumnInfo( name="close", type="float", description=f"Closing price in {quote}", unit=quote, ), ColumnInfo( name="volume", type="float", description=f"Trading volume in {base}", unit=base, ), ], time_column="time", has_ohlcv=True, pricescale=pricescale, minmov=1, base_currency=base, quote_currency=quote, ) def _resolution_to_timeframe(self, resolution: str) -> str: """Convert our resolution format to CCXT timeframe format""" # Map our resolutions to CCXT timeframes mapping = { "1": "1m", "5": "5m", "15": "15m", "30": "30m", "60": "1h", "120": "2h", "240": "4h", "360": "6h", "720": "12h", "1D": "1d", "1W": "1w", "1M": "1M", } return mapping.get(resolution, "1m") def _timeframe_to_milliseconds(self, timeframe: str) -> int: """Convert CCXT timeframe to milliseconds""" unit = timeframe[-1] amount = int(timeframe[:-1]) if len(timeframe) > 1 else 1 units = { 's': 1000, 'm': 60 * 1000, 'h': 60 * 60 * 1000, 'd': 24 * 60 * 60 * 1000, 'w': 7 * 24 * 60 * 60 * 1000, 'M': 30 * 24 * 60 * 60 * 1000, # Approximate } return amount * units.get(unit, 60000) async def get_bars( self, symbol: str, resolution: str, from_time: int, to_time: int, countback: Optional[int] = None, ) -> HistoryResult: """Get historical bars from the exchange""" logger.info( f"CCXTDataSource({self.exchange_id}).get_bars: symbol={symbol}, resolution={resolution}, " f"from_time={from_time}, to_time={to_time}, countback={countback}" ) await self._ensure_markets_loaded() if symbol not in self._markets: raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}") timeframe = self._resolution_to_timeframe(resolution) # CCXT uses milliseconds for timestamps since = from_time * 1000 until = to_time * 1000 # Fetch OHLCV data limit = countback if countback else 1000 try: # Fetch in batches if needed all_ohlcv = [] current_since = since while current_since < until: ohlcv = await self.exchange.fetch_ohlcv( symbol, timeframe=timeframe, since=current_since, limit=limit, ) if not ohlcv: break all_ohlcv.extend(ohlcv) # Update since for next batch last_timestamp = ohlcv[-1][0] if last_timestamp <= current_since: break # No progress, avoid infinite loop current_since = last_timestamp + 1 # Stop if we have enough bars if countback and len(all_ohlcv) >= countback: all_ohlcv = all_ohlcv[:countback] break # Convert to our Bar format with float for OHLCV (used in DataFrames) bars = [] for candle in all_ohlcv: timestamp_ms, open_price, high, low, close, volume = candle timestamp = timestamp_ms // 1000 # Convert to seconds # Only include bars within requested range if timestamp < from_time or timestamp >= to_time: continue bars.append( Bar( time=timestamp, data={ "open": self._to_float(open_price), "high": self._to_float(high), "low": self._to_float(low), "close": self._to_float(close), "volume": self._to_float(volume), }, ) ) # Get symbol info for column metadata symbol_info = await self.resolve_symbol(symbol) logger.info( f"CCXTDataSource({self.exchange_id}).get_bars: Returning {len(bars)} bars. " f"First: {bars[0].time if bars else 'N/A'}, Last: {bars[-1].time if bars else 'N/A'}" ) # Determine if more data is available next_time = None if bars and countback and len(bars) >= countback: next_time = bars[-1].time + (bars[-1].time - bars[-2].time if len(bars) > 1 else 60) return HistoryResult( symbol=symbol, resolution=resolution, bars=bars, columns=symbol_info.columns, nextTime=next_time, ) except Exception as e: raise ValueError(f"Failed to fetch bars for {symbol}: {str(e)}") async def subscribe_bars( self, symbol: str, resolution: str, on_tick: Callable[[dict], None], ) -> str: """ Subscribe to bar updates via polling. Note: Uses polling instead of WebSocket since we're using free CCXT. Polls at the configured interval (default: 60 seconds). """ await self._ensure_markets_loaded() if symbol not in self._markets: raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}") subscription_id = f"{symbol}:{resolution}:{time.time()}" # Store callback self._subscription_callbacks[subscription_id] = on_tick # Start polling task timeframe = self._resolution_to_timeframe(resolution) task = asyncio.create_task( self._poll_ohlcv(symbol, timeframe, subscription_id) ) self._subscriptions[subscription_id] = task return subscription_id async def _poll_ohlcv(self, symbol: str, timeframe: str, subscription_id: str): """ Poll for OHLCV updates at regular intervals. This simulates real-time updates by fetching the latest bars periodically. Only sends updates when new bars are detected. """ try: while subscription_id in self._subscription_callbacks: try: # Fetch latest bars ohlcv = await self.exchange.fetch_ohlcv( symbol, timeframe=timeframe, limit=2, # Get last 2 bars to detect new ones ) if ohlcv and len(ohlcv) > 0: # Get the latest candle latest = ohlcv[-1] timestamp_ms, open_price, high, low, close, volume = latest timestamp = timestamp_ms // 1000 # Only send update if this is a new bar last_timestamp = self._last_bars.get(subscription_id, 0) if timestamp > last_timestamp: self._last_bars[subscription_id] = timestamp # Convert to our format with float for OHLCV (used in DataFrames) tick_data = { "time": timestamp, "open": self._to_float(open_price), "high": self._to_float(high), "low": self._to_float(low), "close": self._to_float(close), "volume": self._to_float(volume), } # Call the callback callback = self._subscription_callbacks.get(subscription_id) if callback: callback(tick_data) except Exception as e: print(f"Error polling OHLCV for {symbol}: {e}") # Wait for next poll interval await asyncio.sleep(self._poll_interval) except asyncio.CancelledError: pass async def unsubscribe_bars(self, subscription_id: str) -> None: """Unsubscribe from polling updates""" # Remove callback and tracking self._subscription_callbacks.pop(subscription_id, None) self._last_bars.pop(subscription_id, None) # Cancel polling task task = self._subscriptions.pop(subscription_id, None) if task: task.cancel() try: await task except asyncio.CancelledError: pass async def close(self): """Close exchange connection and cleanup""" # Cancel all subscriptions for subscription_id in list(self._subscriptions.keys()): await self.unsubscribe_bars(subscription_id) # Close exchange if hasattr(self.exchange, 'close'): await self.exchange.close()