569 lines
20 KiB
Python
569 lines
20 KiB
Python
"""
|
|
CCXT DataSource adapter for accessing cryptocurrency exchange data.
|
|
|
|
This adapter provides access to hundreds of cryptocurrency exchanges through
|
|
the free CCXT library (not ccxt.pro), supporting both historical data and
|
|
polling-based subscriptions.
|
|
|
|
Numerical Precision:
|
|
- OHLCV data uses native floats for optimal DataFrame/analysis performance
|
|
- Account balances and order data should use Decimal (via _to_decimal method)
|
|
- CCXT returns numeric values as strings or floats depending on configuration
|
|
- Price data converted to float (_to_float), financial data to Decimal (_to_decimal)
|
|
|
|
Real-time Updates:
|
|
- Uses polling instead of WebSocket (free CCXT doesn't have WebSocket support)
|
|
- Default polling interval: 60 seconds (configurable)
|
|
- Simulates real-time subscriptions by periodically fetching latest bars
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from decimal import Decimal
|
|
from typing import Callable, Dict, List, Optional, Set, Union
|
|
|
|
import ccxt.async_support as ccxt
|
|
|
|
from ..base import DataSource
|
|
|
|
logger = logging.getLogger(__name__)
|
|
from ..schema import (
|
|
Bar,
|
|
ColumnInfo,
|
|
DatafeedConfig,
|
|
HistoryResult,
|
|
Resolution,
|
|
SearchResult,
|
|
SymbolInfo,
|
|
)
|
|
|
|
|
|
class CCXTDataSource(DataSource):
|
|
"""
|
|
DataSource adapter for CCXT cryptocurrency exchanges (free version).
|
|
|
|
Provides access to:
|
|
- Multiple cryptocurrency exchanges (Binance, Coinbase, Kraken, etc.)
|
|
- Historical OHLCV data via REST API
|
|
- Polling-based real-time updates (configurable interval)
|
|
- Symbol search and metadata
|
|
|
|
Args:
|
|
exchange_id: CCXT exchange identifier (e.g., 'binance', 'coinbase', 'kraken')
|
|
config: Optional exchange-specific configuration (API keys, options)
|
|
sandbox: Whether to use sandbox/testnet mode (default: False)
|
|
poll_interval: Interval in seconds for polling updates (default: 60)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
exchange_id: str = "binance",
|
|
config: Optional[Dict] = None,
|
|
sandbox: bool = False,
|
|
poll_interval: int = 60,
|
|
):
|
|
self.exchange_id = exchange_id
|
|
self._config = config or {}
|
|
self._sandbox = sandbox
|
|
self._poll_interval = poll_interval
|
|
|
|
# Initialize exchange (using free async_support, not pro)
|
|
exchange_class = getattr(ccxt, exchange_id)
|
|
self.exchange = exchange_class(self._config)
|
|
|
|
# Configure CCXT to use Decimal mode for precise financial calculations
|
|
# This ensures all numeric values from the exchange use Decimal internally
|
|
# We then convert OHLCV to float for DataFrame performance, but keep
|
|
# Decimal precision for account balances, order sizes, etc.
|
|
from decimal import Decimal as PythonDecimal
|
|
self.exchange.number = PythonDecimal
|
|
|
|
# Log the precision mode being used by this exchange
|
|
precision_mode = getattr(self.exchange, 'precisionMode', 'UNKNOWN')
|
|
logger.info(
|
|
f"CCXT {exchange_id}: Configured with Decimal mode. "
|
|
f"Exchange precision mode: {precision_mode}"
|
|
)
|
|
|
|
if sandbox and hasattr(self.exchange, 'set_sandbox_mode'):
|
|
self.exchange.set_sandbox_mode(True)
|
|
|
|
# Cache for markets
|
|
self._markets: Optional[Dict] = None
|
|
self._markets_loaded = False
|
|
|
|
# Active subscriptions (polling-based)
|
|
self._subscriptions: Dict[str, asyncio.Task] = {}
|
|
self._subscription_callbacks: Dict[str, Callable] = {}
|
|
self._last_bars: Dict[str, int] = {} # Track last bar timestamp per subscription
|
|
|
|
@staticmethod
|
|
def _to_decimal(value: Union[str, int, float, Decimal, None]) -> Optional[Decimal]:
|
|
"""
|
|
Convert a value to Decimal for numerical precision.
|
|
|
|
Handles CCXT's mixed output (strings, floats, ints, None).
|
|
Converts floats by converting to string first to avoid precision loss.
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, Decimal):
|
|
return value
|
|
if isinstance(value, str):
|
|
return Decimal(value)
|
|
if isinstance(value, (int, float)):
|
|
# Convert to string first to avoid float precision issues
|
|
return Decimal(str(value))
|
|
return None
|
|
|
|
@staticmethod
|
|
def _to_float(value: Union[str, int, float, Decimal, None]) -> Optional[float]:
|
|
"""
|
|
Convert a value to float for OHLCV data.
|
|
|
|
OHLCV data is used for charting and DataFrame analysis, where native
|
|
floats provide better performance and compatibility with pandas/numpy.
|
|
For financial precision (balances, order sizes), use _to_decimal() instead.
|
|
|
|
When CCXT is in Decimal mode (exchange.number = Decimal), it returns
|
|
Decimal objects. This method converts them to float for performance.
|
|
|
|
Handles CCXT's output in both modes:
|
|
- Decimal mode: receives Decimal objects
|
|
- Default mode: receives strings, floats, or ints
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, float):
|
|
return value
|
|
if isinstance(value, Decimal):
|
|
# CCXT in Decimal mode - convert to float for OHLCV
|
|
return float(value)
|
|
if isinstance(value, (str, int)):
|
|
return float(value)
|
|
return None
|
|
|
|
async def _ensure_markets_loaded(self):
|
|
"""Ensure markets are loaded from exchange"""
|
|
if not self._markets_loaded:
|
|
self._markets = await self.exchange.load_markets()
|
|
self._markets_loaded = True
|
|
|
|
async def get_config(self) -> DatafeedConfig:
|
|
"""Get datafeed configuration"""
|
|
await self._ensure_markets_loaded()
|
|
|
|
# Determine supported resolutions based on exchange capabilities
|
|
supported_resolutions = [
|
|
Resolution.M1,
|
|
Resolution.M5,
|
|
Resolution.M15,
|
|
Resolution.M30,
|
|
Resolution.H1,
|
|
Resolution.H4,
|
|
Resolution.D1,
|
|
]
|
|
|
|
# Get unique exchange names (most CCXT exchanges are just one)
|
|
exchanges = [self.exchange_id.upper()]
|
|
|
|
return DatafeedConfig(
|
|
name=f"CCXT {self.exchange_id.title()}",
|
|
description=f"Live and historical cryptocurrency data from {self.exchange_id} via CCXT library. "
|
|
f"Supports OHLCV data for {len(self._markets) if self._markets else 'many'} trading pairs.",
|
|
supported_resolutions=supported_resolutions,
|
|
supports_search=True,
|
|
supports_time=True,
|
|
exchanges=exchanges,
|
|
symbols_types=["crypto", "spot", "futures", "swap"],
|
|
)
|
|
|
|
async def search_symbols(
|
|
self,
|
|
query: str,
|
|
type: Optional[str] = None,
|
|
exchange: Optional[str] = None,
|
|
limit: int = 30,
|
|
) -> List[SearchResult]:
|
|
"""Search for symbols on the exchange"""
|
|
await self._ensure_markets_loaded()
|
|
|
|
query_upper = query.upper()
|
|
results = []
|
|
|
|
for symbol, market in self._markets.items():
|
|
# Match query against symbol or base/quote currencies
|
|
if (query_upper in symbol or
|
|
query_upper in market.get('base', '') or
|
|
query_upper in market.get('quote', '')):
|
|
|
|
# Filter by type if specified
|
|
market_type = market.get('type', 'spot')
|
|
if type and market_type != type:
|
|
continue
|
|
|
|
# Create search result
|
|
base = market.get('base', '')
|
|
quote = market.get('quote', '')
|
|
|
|
results.append(
|
|
SearchResult(
|
|
symbol=f"{base}/{quote}", # Clean user-facing format
|
|
ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing
|
|
full_name=f"{base}/{quote} ({self.exchange_id.upper()})",
|
|
description=f"{base}/{quote} {market_type} trading pair on {self.exchange_id}",
|
|
exchange=self.exchange_id.upper(),
|
|
type=market_type,
|
|
)
|
|
)
|
|
|
|
if len(results) >= limit:
|
|
break
|
|
|
|
return results
|
|
|
|
async def resolve_symbol(self, symbol: str) -> SymbolInfo:
|
|
"""Get complete metadata for a symbol"""
|
|
await self._ensure_markets_loaded()
|
|
|
|
if symbol not in self._markets:
|
|
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
|
|
|
|
market = self._markets[symbol]
|
|
base = market.get('base', '')
|
|
quote = market.get('quote', '')
|
|
market_type = market.get('type', 'spot')
|
|
|
|
# Determine price scale from market precision
|
|
# CCXT precision can be in different modes:
|
|
# - DECIMAL_PLACES (int): number of decimal places (e.g., 2 = 0.01)
|
|
# - TICK_SIZE (float): actual tick size (e.g., 0.01, 0.00001)
|
|
# We need to convert to pricescale (10^n where n is decimal places)
|
|
price_precision = market.get('precision', {}).get('price', 2)
|
|
|
|
if isinstance(price_precision, float):
|
|
# TICK_SIZE mode: precision is the actual tick size (e.g., 0.01, 0.00001)
|
|
# Convert tick size to decimal places
|
|
# For 0.01 -> 2 decimal places, 0.00001 -> 5 decimal places
|
|
tick_str = str(Decimal(str(price_precision)))
|
|
if '.' in tick_str:
|
|
decimal_places = len(tick_str.split('.')[1].rstrip('0'))
|
|
else:
|
|
decimal_places = 0
|
|
pricescale = 10 ** decimal_places
|
|
else:
|
|
# DECIMAL_PLACES or SIGNIFICANT_DIGITS mode: precision is an integer
|
|
# Assume DECIMAL_PLACES mode (most common for price)
|
|
pricescale = 10 ** int(price_precision)
|
|
|
|
return SymbolInfo(
|
|
symbol=f"{base}/{quote}", # Clean user-facing format
|
|
ticker=f"{self.exchange_id.upper()}:{symbol}", # Ticker with exchange prefix for routing
|
|
name=f"{base}/{quote}",
|
|
description=f"{base}/{quote} {market_type} pair on {self.exchange_id}. "
|
|
f"Minimum order: {market.get('limits', {}).get('amount', {}).get('min', 'N/A')} {base}",
|
|
type=market_type,
|
|
exchange=self.exchange_id.upper(),
|
|
timezone="Etc/UTC",
|
|
session="24x7",
|
|
supported_resolutions=[
|
|
Resolution.M1,
|
|
Resolution.M5,
|
|
Resolution.M15,
|
|
Resolution.M30,
|
|
Resolution.H1,
|
|
Resolution.H4,
|
|
Resolution.D1,
|
|
],
|
|
has_intraday=True,
|
|
has_daily=True,
|
|
has_weekly_and_monthly=False,
|
|
columns=[
|
|
ColumnInfo(
|
|
name="open",
|
|
type="float",
|
|
description=f"Opening price in {quote}",
|
|
unit=quote,
|
|
),
|
|
ColumnInfo(
|
|
name="high",
|
|
type="float",
|
|
description=f"Highest price in {quote}",
|
|
unit=quote,
|
|
),
|
|
ColumnInfo(
|
|
name="low",
|
|
type="float",
|
|
description=f"Lowest price in {quote}",
|
|
unit=quote,
|
|
),
|
|
ColumnInfo(
|
|
name="close",
|
|
type="float",
|
|
description=f"Closing price in {quote}",
|
|
unit=quote,
|
|
),
|
|
ColumnInfo(
|
|
name="volume",
|
|
type="float",
|
|
description=f"Trading volume in {base}",
|
|
unit=base,
|
|
),
|
|
],
|
|
time_column="time",
|
|
has_ohlcv=True,
|
|
pricescale=pricescale,
|
|
minmov=1,
|
|
base_currency=base,
|
|
quote_currency=quote,
|
|
)
|
|
|
|
def _resolution_to_timeframe(self, resolution: str) -> str:
|
|
"""Convert our resolution format to CCXT timeframe format"""
|
|
# Map our resolutions to CCXT timeframes
|
|
mapping = {
|
|
"1": "1m",
|
|
"5": "5m",
|
|
"15": "15m",
|
|
"30": "30m",
|
|
"60": "1h",
|
|
"120": "2h",
|
|
"240": "4h",
|
|
"360": "6h",
|
|
"720": "12h",
|
|
"1D": "1d",
|
|
"1W": "1w",
|
|
"1M": "1M",
|
|
}
|
|
return mapping.get(resolution, "1m")
|
|
|
|
def _timeframe_to_milliseconds(self, timeframe: str) -> int:
|
|
"""Convert CCXT timeframe to milliseconds"""
|
|
unit = timeframe[-1]
|
|
amount = int(timeframe[:-1]) if len(timeframe) > 1 else 1
|
|
|
|
units = {
|
|
's': 1000,
|
|
'm': 60 * 1000,
|
|
'h': 60 * 60 * 1000,
|
|
'd': 24 * 60 * 60 * 1000,
|
|
'w': 7 * 24 * 60 * 60 * 1000,
|
|
'M': 30 * 24 * 60 * 60 * 1000, # Approximate
|
|
}
|
|
|
|
return amount * units.get(unit, 60000)
|
|
|
|
async def get_bars(
|
|
self,
|
|
symbol: str,
|
|
resolution: str,
|
|
from_time: int,
|
|
to_time: int,
|
|
countback: Optional[int] = None,
|
|
) -> HistoryResult:
|
|
"""Get historical bars from the exchange"""
|
|
logger.info(
|
|
f"CCXTDataSource({self.exchange_id}).get_bars: symbol={symbol}, resolution={resolution}, "
|
|
f"from_time={from_time}, to_time={to_time}, countback={countback}"
|
|
)
|
|
|
|
await self._ensure_markets_loaded()
|
|
|
|
if symbol not in self._markets:
|
|
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
|
|
|
|
timeframe = self._resolution_to_timeframe(resolution)
|
|
|
|
# CCXT uses milliseconds for timestamps
|
|
since = from_time * 1000
|
|
until = to_time * 1000
|
|
|
|
# Fetch OHLCV data
|
|
limit = countback if countback else 1000
|
|
|
|
try:
|
|
# Fetch in batches if needed
|
|
all_ohlcv = []
|
|
current_since = since
|
|
|
|
while current_since < until:
|
|
ohlcv = await self.exchange.fetch_ohlcv(
|
|
symbol,
|
|
timeframe=timeframe,
|
|
since=current_since,
|
|
limit=limit,
|
|
)
|
|
|
|
if not ohlcv:
|
|
break
|
|
|
|
all_ohlcv.extend(ohlcv)
|
|
|
|
# Update since for next batch
|
|
last_timestamp = ohlcv[-1][0]
|
|
if last_timestamp <= current_since:
|
|
break # No progress, avoid infinite loop
|
|
current_since = last_timestamp + 1
|
|
|
|
# Stop if we have enough bars
|
|
if countback and len(all_ohlcv) >= countback:
|
|
all_ohlcv = all_ohlcv[:countback]
|
|
break
|
|
|
|
# Convert to our Bar format with float for OHLCV (used in DataFrames)
|
|
bars = []
|
|
for candle in all_ohlcv:
|
|
timestamp_ms, open_price, high, low, close, volume = candle
|
|
timestamp = timestamp_ms // 1000 # Convert to seconds
|
|
|
|
# Only include bars within requested range
|
|
if timestamp < from_time or timestamp >= to_time:
|
|
continue
|
|
|
|
bars.append(
|
|
Bar(
|
|
time=timestamp,
|
|
data={
|
|
"open": self._to_float(open_price),
|
|
"high": self._to_float(high),
|
|
"low": self._to_float(low),
|
|
"close": self._to_float(close),
|
|
"volume": self._to_float(volume),
|
|
},
|
|
)
|
|
)
|
|
|
|
# Get symbol info for column metadata
|
|
symbol_info = await self.resolve_symbol(symbol)
|
|
|
|
logger.info(
|
|
f"CCXTDataSource({self.exchange_id}).get_bars: Returning {len(bars)} bars. "
|
|
f"First: {bars[0].time if bars else 'N/A'}, Last: {bars[-1].time if bars else 'N/A'}"
|
|
)
|
|
|
|
# Determine if more data is available
|
|
next_time = None
|
|
if bars and countback and len(bars) >= countback:
|
|
next_time = bars[-1].time + (bars[-1].time - bars[-2].time if len(bars) > 1 else 60)
|
|
|
|
return HistoryResult(
|
|
symbol=symbol,
|
|
resolution=resolution,
|
|
bars=bars,
|
|
columns=symbol_info.columns,
|
|
nextTime=next_time,
|
|
)
|
|
|
|
except Exception as e:
|
|
raise ValueError(f"Failed to fetch bars for {symbol}: {str(e)}")
|
|
|
|
async def subscribe_bars(
|
|
self,
|
|
symbol: str,
|
|
resolution: str,
|
|
on_tick: Callable[[dict], None],
|
|
) -> str:
|
|
"""
|
|
Subscribe to bar updates via polling.
|
|
|
|
Note: Uses polling instead of WebSocket since we're using free CCXT.
|
|
Polls at the configured interval (default: 60 seconds).
|
|
"""
|
|
await self._ensure_markets_loaded()
|
|
|
|
if symbol not in self._markets:
|
|
raise ValueError(f"Symbol '{symbol}' not found on {self.exchange_id}")
|
|
|
|
subscription_id = f"{symbol}:{resolution}:{time.time()}"
|
|
|
|
# Store callback
|
|
self._subscription_callbacks[subscription_id] = on_tick
|
|
|
|
# Start polling task
|
|
timeframe = self._resolution_to_timeframe(resolution)
|
|
task = asyncio.create_task(
|
|
self._poll_ohlcv(symbol, timeframe, subscription_id)
|
|
)
|
|
self._subscriptions[subscription_id] = task
|
|
|
|
return subscription_id
|
|
|
|
async def _poll_ohlcv(self, symbol: str, timeframe: str, subscription_id: str):
|
|
"""
|
|
Poll for OHLCV updates at regular intervals.
|
|
|
|
This simulates real-time updates by fetching the latest bars periodically.
|
|
Only sends updates when new bars are detected.
|
|
"""
|
|
try:
|
|
while subscription_id in self._subscription_callbacks:
|
|
try:
|
|
# Fetch latest bars
|
|
ohlcv = await self.exchange.fetch_ohlcv(
|
|
symbol,
|
|
timeframe=timeframe,
|
|
limit=2, # Get last 2 bars to detect new ones
|
|
)
|
|
|
|
if ohlcv and len(ohlcv) > 0:
|
|
# Get the latest candle
|
|
latest = ohlcv[-1]
|
|
timestamp_ms, open_price, high, low, close, volume = latest
|
|
timestamp = timestamp_ms // 1000
|
|
|
|
# Only send update if this is a new bar
|
|
last_timestamp = self._last_bars.get(subscription_id, 0)
|
|
if timestamp > last_timestamp:
|
|
self._last_bars[subscription_id] = timestamp
|
|
|
|
# Convert to our format with float for OHLCV (used in DataFrames)
|
|
tick_data = {
|
|
"time": timestamp,
|
|
"open": self._to_float(open_price),
|
|
"high": self._to_float(high),
|
|
"low": self._to_float(low),
|
|
"close": self._to_float(close),
|
|
"volume": self._to_float(volume),
|
|
}
|
|
|
|
# Call the callback
|
|
callback = self._subscription_callbacks.get(subscription_id)
|
|
if callback:
|
|
callback(tick_data)
|
|
|
|
except Exception as e:
|
|
print(f"Error polling OHLCV for {symbol}: {e}")
|
|
|
|
# Wait for next poll interval
|
|
await asyncio.sleep(self._poll_interval)
|
|
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
async def unsubscribe_bars(self, subscription_id: str) -> None:
|
|
"""Unsubscribe from polling updates"""
|
|
# Remove callback and tracking
|
|
self._subscription_callbacks.pop(subscription_id, None)
|
|
self._last_bars.pop(subscription_id, None)
|
|
|
|
# Cancel polling task
|
|
task = self._subscriptions.pop(subscription_id, None)
|
|
if task:
|
|
task.cancel()
|
|
try:
|
|
await task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
async def close(self):
|
|
"""Close exchange connection and cleanup"""
|
|
# Cancel all subscriptions
|
|
for subscription_id in list(self._subscriptions.keys()):
|
|
await self.unsubscribe_bars(subscription_id)
|
|
|
|
# Close exchange
|
|
if hasattr(self.exchange, 'close'):
|
|
await self.exchange.close()
|