Symbol & data refactoring for Nautilus
This commit is contained in:
@@ -36,7 +36,7 @@ async def main():
|
||||
try:
|
||||
# Fetch OHLC data (automatically checks cache and requests missing data)
|
||||
df = await client.fetch_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600, # 1-hour candles
|
||||
start_time=1735689600000000, # microseconds
|
||||
end_time=1736294399000000
|
||||
@@ -112,7 +112,7 @@ Initialize the client with connection parameters.
|
||||
Fetch OHLC data with smart caching.
|
||||
|
||||
**Parameters:**
|
||||
- `ticker` (str): Market identifier (e.g., "BINANCE:BTC/USDT")
|
||||
- `ticker` (str): Market identifier (e.g., "BTC/USDT.BINANCE")
|
||||
- `period_seconds` (int): OHLC period in seconds (60, 300, 3600, 86400, etc.)
|
||||
- `start_time` (int): Start timestamp in microseconds
|
||||
- `end_time` (int): End timestamp in microseconds
|
||||
@@ -179,7 +179,7 @@ await client.connect()
|
||||
|
||||
# Now safe to make requests
|
||||
result = await client.request_historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time=1735689600000000,
|
||||
end_time=1736294399000000
|
||||
|
||||
@@ -29,7 +29,7 @@ api = get_api()
|
||||
|
||||
# Method 1: Using Unix timestamps (seconds)
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600, # 1 hour candles
|
||||
start_time=1640000000, # Unix timestamp in seconds
|
||||
end_time=1640086400,
|
||||
@@ -38,7 +38,7 @@ df = asyncio.run(api.data.historical_ohlc(
|
||||
|
||||
# Method 2: Using date strings
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20", # Simple date string
|
||||
end_time="2021-12-21",
|
||||
@@ -47,7 +47,7 @@ df = asyncio.run(api.data.historical_ohlc(
|
||||
|
||||
# Method 3: Using date strings with time
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20 00:00:00",
|
||||
end_time="2021-12-20 23:59:59",
|
||||
@@ -56,7 +56,7 @@ df = asyncio.run(api.data.historical_ohlc(
|
||||
|
||||
# Method 4: Using datetime objects
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time=datetime(2021, 12, 20),
|
||||
end_time=datetime(2021, 12, 21),
|
||||
@@ -92,7 +92,7 @@ api = get_api()
|
||||
|
||||
# Fetch data
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21",
|
||||
@@ -121,7 +121,7 @@ api = get_api()
|
||||
|
||||
# Fetch data
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21"
|
||||
@@ -161,7 +161,7 @@ api = get_api()
|
||||
|
||||
# Fetch historical data using date strings (easiest for research)
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600, # 1 hour
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21",
|
||||
|
||||
@@ -44,7 +44,7 @@ def get_api() -> API:
|
||||
|
||||
# Fetch data
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21"
|
||||
|
||||
@@ -29,7 +29,7 @@ class API:
|
||||
|
||||
# Fetch data
|
||||
df = asyncio.run(api.data.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21"
|
||||
|
||||
@@ -27,8 +27,8 @@ class DataAPI(ABC):
|
||||
Fetch historical OHLC candlestick data for a market.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier in format "EXCHANGE:SYMBOL"
|
||||
Examples: "BINANCE:BTC/USDT", "COINBASE:ETH/USD"
|
||||
ticker: Market identifier in format "MARKET.EXCHANGE"
|
||||
Examples: "BTC/USDT.BINANCE", "ETH/USD.COINBASE"
|
||||
period_seconds: Candle period in seconds
|
||||
Common values:
|
||||
- 60 (1 minute)
|
||||
@@ -55,7 +55,7 @@ class DataAPI(ABC):
|
||||
Returns:
|
||||
DataFrame with candlestick data sorted by timestamp (ascending).
|
||||
Standard columns (always included):
|
||||
- timestamp: Period start time in microseconds
|
||||
- timestamp: Period start time in nanoseconds
|
||||
- open: Opening price (decimal float)
|
||||
- high: Highest price (decimal float)
|
||||
- low: Lowest price (decimal float)
|
||||
@@ -71,7 +71,7 @@ class DataAPI(ABC):
|
||||
Examples:
|
||||
# Basic OHLC with Unix timestamp
|
||||
df = await api.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time=1640000000,
|
||||
end_time=1640086400
|
||||
@@ -79,7 +79,7 @@ class DataAPI(ABC):
|
||||
|
||||
# Using date strings with volume
|
||||
df = await api.historical_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600,
|
||||
start_time="2021-12-20",
|
||||
end_time="2021-12-21",
|
||||
@@ -89,7 +89,7 @@ class DataAPI(ABC):
|
||||
# Using datetime objects
|
||||
from datetime import datetime
|
||||
df = await api.historical_ohlc(
|
||||
ticker="COINBASE:ETH/USD",
|
||||
ticker="ETH/USD.COINBASE",
|
||||
period_seconds=300,
|
||||
start_time=datetime(2021, 12, 20, 9, 30),
|
||||
end_time=datetime(2021, 12, 20, 16, 30),
|
||||
@@ -113,8 +113,8 @@ class DataAPI(ABC):
|
||||
specify exact timestamps. Useful for real-time analysis and indicators.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier in format "EXCHANGE:SYMBOL"
|
||||
Examples: "BINANCE:BTC/USDT", "COINBASE:ETH/USD"
|
||||
ticker: Market identifier in format "MARKET.EXCHANGE"
|
||||
Examples: "BTC/USDT.BINANCE", "ETH/USD.COINBASE"
|
||||
period_seconds: OHLC candle period in seconds
|
||||
Common values: 60 (1m), 300 (5m), 900 (15m), 3600 (1h),
|
||||
86400 (1d), 604800 (1w)
|
||||
@@ -133,14 +133,14 @@ class DataAPI(ABC):
|
||||
Examples:
|
||||
# Get the last candle
|
||||
df = await api.latest_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=3600
|
||||
)
|
||||
# Returns: timestamp, open, high, low, close
|
||||
|
||||
# Get the last 50 5-minute candles with volume
|
||||
df = await api.latest_ohlc(
|
||||
ticker="COINBASE:ETH/USD",
|
||||
ticker="ETH/USD.COINBASE",
|
||||
period_seconds=300,
|
||||
length=50,
|
||||
extra_columns=["volume", "buy_vol", "sell_vol"]
|
||||
@@ -148,7 +148,7 @@ class DataAPI(ABC):
|
||||
|
||||
# Get recent candles with all timing data
|
||||
df = await api.latest_ohlc(
|
||||
ticker="BINANCE:BTC/USDT",
|
||||
ticker="BTC/USDT.BINANCE",
|
||||
period_seconds=60,
|
||||
length=100,
|
||||
extra_columns=["open_time", "high_time", "low_time", "close_time"]
|
||||
|
||||
@@ -110,10 +110,10 @@ class HistoryClient:
|
||||
IMPORTANT: Call connect() before using this method.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
|
||||
ticker: Market identifier (e.g., "BTC/USDT.BINANCE")
|
||||
period_seconds: OHLC period in seconds
|
||||
start_time: Start timestamp in microseconds
|
||||
end_time: End timestamp in microseconds
|
||||
start_time: Start timestamp in nanoseconds
|
||||
end_time: End timestamp in nanoseconds
|
||||
timeout: Request timeout in seconds (default: 30)
|
||||
limit: Optional limit on number of candles
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""
|
||||
IcebergClient - Query OHLC data from Iceberg warehouse (Iceberg 1.10.1)
|
||||
|
||||
Tickers use Nautilus format: "BTC/USDT.BINANCE"
|
||||
All timestamps are nanoseconds since epoch.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
@@ -39,7 +42,6 @@ class IcebergClient:
|
||||
s3_endpoint: Optional[str] = None,
|
||||
s3_access_key: Optional[str] = None,
|
||||
s3_secret_key: Optional[str] = None,
|
||||
metadata_client=None, # SymbolMetadataClient (avoid circular import)
|
||||
):
|
||||
"""
|
||||
Initialize Iceberg client.
|
||||
@@ -50,11 +52,9 @@ class IcebergClient:
|
||||
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
|
||||
s3_access_key: S3/MinIO access key
|
||||
s3_secret_key: S3/MinIO secret key
|
||||
metadata_client: SymbolMetadataClient for price/volume conversion
|
||||
"""
|
||||
self.catalog_uri = catalog_uri
|
||||
self.namespace = namespace
|
||||
self.metadata_client = metadata_client
|
||||
|
||||
catalog_props = {"uri": catalog_uri}
|
||||
if s3_endpoint:
|
||||
@@ -80,15 +80,14 @@ class IcebergClient:
|
||||
Query OHLC data for a specific ticker, period, and time range.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
|
||||
ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
|
||||
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
|
||||
start_time: Start timestamp in microseconds
|
||||
end_time: End timestamp in microseconds
|
||||
columns: Optional list of columns to select. If None, returns all columns.
|
||||
Example: ["timestamp", "open", "high", "low", "close", "volume"]
|
||||
start_time: Start timestamp in nanoseconds
|
||||
end_time: End timestamp in nanoseconds (exclusive)
|
||||
columns: Optional list of columns to select.
|
||||
|
||||
Returns:
|
||||
DataFrame with OHLC data sorted by timestamp
|
||||
DataFrame with OHLC data sorted by timestamp, with a DatetimeIndex (UTC).
|
||||
"""
|
||||
# Reload table metadata to pick up snapshots committed after this client was initialized
|
||||
self.table = self.catalog.load_table(f"{self.namespace}.ohlc")
|
||||
@@ -102,7 +101,6 @@ class IcebergClient:
|
||||
)
|
||||
)
|
||||
|
||||
# Select specific columns if requested
|
||||
if columns is not None:
|
||||
scan = scan.select(*columns)
|
||||
|
||||
@@ -110,52 +108,10 @@ class IcebergClient:
|
||||
|
||||
if not df.empty:
|
||||
df = df.sort_values("timestamp")
|
||||
# Convert integer microsecond timestamps to DatetimeIndex
|
||||
df.index = pd.to_datetime(df["timestamp"], unit="us", utc=True)
|
||||
# Convert integer nanosecond timestamps to DatetimeIndex
|
||||
df.index = pd.to_datetime(df["timestamp"], unit="ns", utc=True)
|
||||
df.index.name = "datetime"
|
||||
df = df.drop(columns=["timestamp"])
|
||||
# Apply price/volume conversion if metadata client available
|
||||
if self.metadata_client is not None:
|
||||
df = self._apply_denominators(df, ticker)
|
||||
|
||||
return df
|
||||
|
||||
def _apply_denominators(self, df: pd.DataFrame, ticker: str) -> pd.DataFrame:
|
||||
"""
|
||||
Convert integer prices and volumes to decimal floats using market metadata.
|
||||
|
||||
Args:
|
||||
df: DataFrame with integer OHLC data
|
||||
ticker: Market identifier for metadata lookup
|
||||
|
||||
Returns:
|
||||
DataFrame with decimal prices and volumes
|
||||
|
||||
Raises:
|
||||
ValueError: If metadata not found for ticker
|
||||
"""
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
# Get metadata for this ticker
|
||||
metadata = self.metadata_client.get_metadata(ticker)
|
||||
|
||||
# Convert price columns (divide by tick_denom)
|
||||
price_columns = ["open", "high", "low", "close"]
|
||||
for col in price_columns:
|
||||
if col in df.columns:
|
||||
df[col] = df[col].astype(float) / metadata.tick_denom
|
||||
|
||||
# Convert volume columns (divide by base_denom)
|
||||
volume_columns = ["volume", "buy_vol", "sell_vol"]
|
||||
for col in volume_columns:
|
||||
if col in df.columns and df[col].notna().any():
|
||||
df[col] = df[col].astype(float) / metadata.base_denom
|
||||
|
||||
log.debug(
|
||||
f"Applied denominators to {ticker}: tick_denom={metadata.tick_denom}, "
|
||||
f"base_denom={metadata.base_denom} ({len(df)} rows)"
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
@@ -169,32 +125,28 @@ class IcebergClient:
|
||||
"""
|
||||
Identify missing data ranges in the requested time period.
|
||||
|
||||
Returns list of (start, end) tuples for missing ranges.
|
||||
Expected candles are calculated based on period_seconds.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier
|
||||
ticker: Market identifier in Nautilus format
|
||||
period_seconds: OHLC period in seconds
|
||||
start_time: Start timestamp in microseconds
|
||||
end_time: End timestamp in microseconds
|
||||
start_time: Start timestamp in nanoseconds
|
||||
end_time: End timestamp in nanoseconds
|
||||
|
||||
Returns:
|
||||
List of (start_time, end_time) tuples for missing ranges
|
||||
List of (start_time, end_time) tuples for missing ranges (nanoseconds)
|
||||
"""
|
||||
df = self.query_ohlc(ticker, period_seconds, start_time, end_time)
|
||||
|
||||
if df.empty:
|
||||
# All data is missing
|
||||
return [(start_time, end_time)]
|
||||
|
||||
# Convert period to microseconds
|
||||
period_micros = period_seconds * 1_000_000
|
||||
# Convert period to nanoseconds
|
||||
period_nanos = period_seconds * 1_000_000_000
|
||||
|
||||
# Generate expected timestamps — end_time is exclusive
|
||||
expected_timestamps = list(range(start_time, end_time, period_micros))
|
||||
actual_timestamps = set(df.index.view('int64') // 1000)
|
||||
expected_timestamps = list(range(start_time, end_time, period_nanos))
|
||||
# DatetimeIndex backed by nanoseconds — view as int64 directly
|
||||
actual_timestamps = set(df.index.view('int64'))
|
||||
|
||||
# Find gaps
|
||||
missing = sorted(set(expected_timestamps) - actual_timestamps)
|
||||
|
||||
if not missing:
|
||||
@@ -206,15 +158,12 @@ class IcebergClient:
|
||||
prev_ts = missing[0]
|
||||
|
||||
for ts in missing[1:]:
|
||||
if ts > prev_ts + period_micros:
|
||||
# Gap in missing data - close previous range
|
||||
if ts > prev_ts + period_nanos:
|
||||
ranges.append((range_start, prev_ts))
|
||||
range_start = ts
|
||||
prev_ts = ts
|
||||
|
||||
# Close final range
|
||||
ranges.append((range_start, prev_ts))
|
||||
|
||||
return ranges
|
||||
|
||||
def has_data(
|
||||
@@ -228,10 +177,10 @@ class IcebergClient:
|
||||
Check if any data exists for the given parameters.
|
||||
|
||||
Args:
|
||||
ticker: Market identifier
|
||||
ticker: Market identifier in Nautilus format
|
||||
period_seconds: OHLC period in seconds
|
||||
start_time: Start timestamp in microseconds
|
||||
end_time: End timestamp in microseconds
|
||||
start_time: Start timestamp in nanoseconds
|
||||
end_time: End timestamp in nanoseconds
|
||||
|
||||
Returns:
|
||||
True if at least one candle exists, False otherwise
|
||||
|
||||
@@ -138,8 +138,8 @@ class ChartingAPIImpl(ChartingAPI):
|
||||
if col in df.columns:
|
||||
# Handle potential timestamp index (convert from microseconds)
|
||||
if df.index.name == 'timestamp' or 'timestamp' in str(df.index.dtype):
|
||||
# Assume microseconds, convert to datetime
|
||||
plot_index = pd.to_datetime(df.index, unit='us')
|
||||
# Assume nanoseconds, convert to datetime
|
||||
plot_index = pd.to_datetime(df.index, unit='ns')
|
||||
else:
|
||||
plot_index = df.index
|
||||
|
||||
@@ -206,18 +206,18 @@ class ChartingAPIImpl(ChartingAPI):
|
||||
"""
|
||||
df_copy = df.copy()
|
||||
|
||||
# Handle timestamp column (in microseconds) -> DatetimeIndex
|
||||
# Handle timestamp column (in nanoseconds) -> DatetimeIndex
|
||||
if 'timestamp' in df_copy.columns:
|
||||
df_copy.index = pd.to_datetime(df_copy['timestamp'], unit='us')
|
||||
df_copy.index = pd.to_datetime(df_copy['timestamp'], unit='ns')
|
||||
df_copy = df_copy.drop(columns=['timestamp'])
|
||||
elif df_copy.index.name == 'timestamp' or 'int' in str(df_copy.index.dtype):
|
||||
# Index is timestamp in microseconds
|
||||
df_copy.index = pd.to_datetime(df_copy.index, unit='us')
|
||||
# Index is timestamp in nanoseconds
|
||||
df_copy.index = pd.to_datetime(df_copy.index, unit='ns')
|
||||
|
||||
# Ensure index is DatetimeIndex
|
||||
if not isinstance(df_copy.index, pd.DatetimeIndex):
|
||||
raise ValueError(
|
||||
"DataFrame must have a DatetimeIndex or a 'timestamp' column in microseconds"
|
||||
"DataFrame must have a DatetimeIndex or a 'timestamp' column in nanoseconds"
|
||||
)
|
||||
|
||||
# Normalize column names to lowercase
|
||||
|
||||
@@ -8,7 +8,7 @@ import pandas as pd
|
||||
|
||||
from dexorder.api.data_api import DataAPI
|
||||
from dexorder.ohlc_client import OHLCClient
|
||||
from dexorder.utils import TimestampInput, to_microseconds
|
||||
from dexorder.utils import TimestampInput, to_nanoseconds
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -105,12 +105,12 @@ class DataAPIImpl(DataAPI):
|
||||
if not self._started:
|
||||
await self.start()
|
||||
|
||||
# Convert timestamps to microseconds
|
||||
start_micros = to_microseconds(start_time)
|
||||
end_micros = to_microseconds(end_time)
|
||||
# Convert timestamps to nanoseconds
|
||||
start_nanos = to_nanoseconds(start_time)
|
||||
end_nanos = to_nanoseconds(end_time)
|
||||
|
||||
log.debug(f"Fetching OHLC: {ticker}, period={period_seconds}s, "
|
||||
f"start={start_time} ({start_micros}), end={end_time} ({end_micros})")
|
||||
f"start={start_time} ({start_nanos}ns), end={end_time} ({end_nanos}ns)")
|
||||
|
||||
# Validate extra_columns
|
||||
if extra_columns:
|
||||
@@ -131,8 +131,8 @@ class DataAPIImpl(DataAPI):
|
||||
df = await self.ohlc_client.fetch_ohlc(
|
||||
ticker=ticker,
|
||||
period_seconds=period_seconds,
|
||||
start_time=start_micros,
|
||||
end_time=end_micros,
|
||||
start_time=start_nanos,
|
||||
end_time=end_nanos,
|
||||
request_timeout=self.request_timeout
|
||||
)
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import logging
|
||||
from typing import Optional
|
||||
from .iceberg_client import IcebergClient
|
||||
from .history_client import HistoryClient
|
||||
from .symbol_metadata_client import SymbolMetadataClient
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -53,26 +52,14 @@ class OHLCClient:
|
||||
s3_access_key: S3/MinIO access key
|
||||
s3_secret_key: S3/MinIO secret key
|
||||
"""
|
||||
# Initialize symbol metadata client for price/volume conversion
|
||||
self.metadata = SymbolMetadataClient(
|
||||
iceberg_catalog_uri,
|
||||
namespace=namespace,
|
||||
s3_endpoint=s3_endpoint,
|
||||
s3_access_key=s3_access_key,
|
||||
s3_secret_key=s3_secret_key,
|
||||
)
|
||||
|
||||
# Initialize Iceberg client with metadata client for automatic conversion
|
||||
self.iceberg = IcebergClient(
|
||||
iceberg_catalog_uri, namespace,
|
||||
s3_endpoint=s3_endpoint,
|
||||
s3_access_key=s3_access_key,
|
||||
s3_secret_key=s3_secret_key,
|
||||
metadata_client=self.metadata,
|
||||
)
|
||||
|
||||
self.history = HistoryClient(relay_endpoint, notification_endpoint)
|
||||
log.info("OHLCClient initialized with automatic price/volume conversion")
|
||||
log.info("OHLCClient initialized")
|
||||
|
||||
async def start(self):
|
||||
"""
|
||||
@@ -107,10 +94,10 @@ class OHLCClient:
|
||||
6. Return results
|
||||
|
||||
Args:
|
||||
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
|
||||
ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
|
||||
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
|
||||
start_time: Start timestamp in microseconds
|
||||
end_time: End timestamp in microseconds
|
||||
start_time: Start timestamp in nanoseconds
|
||||
end_time: End timestamp in nanoseconds
|
||||
request_timeout: Timeout for historical data requests (default: 30s)
|
||||
|
||||
Returns:
|
||||
@@ -121,9 +108,9 @@ class OHLCClient:
|
||||
ValueError: If request fails
|
||||
"""
|
||||
# Align times to period boundaries: [ceil(start), ceil(end)) exclusive
|
||||
period_micros = period_seconds * 1_000_000
|
||||
start_time = ((start_time + period_micros - 1) // period_micros) * period_micros
|
||||
end_time = ((end_time + period_micros - 1) // period_micros) * period_micros # exclusive
|
||||
period_nanos = period_seconds * 1_000_000_000
|
||||
start_time = ((start_time + period_nanos - 1) // period_nanos) * period_nanos
|
||||
end_time = ((end_time + period_nanos - 1) // period_nanos) * period_nanos # exclusive
|
||||
|
||||
# Step 1: Check Iceberg for existing data
|
||||
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
"""
|
||||
SymbolMetadataClient - Query symbol metadata from Iceberg for price/volume conversion.
|
||||
SymbolMetadataClient - Query symbol metadata from Iceberg.
|
||||
|
||||
Provides lazy-loaded, cached access to symbol metadata including denominators
|
||||
used to convert integer OHLC data to decimal prices and volumes.
|
||||
Tickers use Nautilus format: "BTC/USDT.BINANCE" (market_id.exchange_id).
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -13,23 +12,67 @@ from pyiceberg.expressions import EqualTo, And
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def format_ticker(exchange_id: str, market_id: str) -> str:
|
||||
"""Format a ticker in Nautilus convention: 'BTC/USDT.BINANCE'."""
|
||||
return f"{market_id}.{exchange_id}"
|
||||
|
||||
|
||||
def parse_ticker(ticker: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse a Nautilus-format ticker into (exchange_id, market_id).
|
||||
|
||||
Args:
|
||||
ticker: e.g. "BTC/USDT.BINANCE"
|
||||
|
||||
Returns:
|
||||
(exchange_id, market_id) e.g. ("BINANCE", "BTC/USDT")
|
||||
|
||||
Raises:
|
||||
ValueError: if the ticker does not contain a dot separator
|
||||
"""
|
||||
if "." not in ticker:
|
||||
raise ValueError(
|
||||
f"Invalid ticker format '{ticker}'. Expected Nautilus format: 'MARKET.EXCHANGE' "
|
||||
f"(e.g., 'BTC/USDT.BINANCE')"
|
||||
)
|
||||
# Split on the LAST dot to handle market IDs that could theoretically contain dots
|
||||
dot_pos = ticker.rfind(".")
|
||||
market_id = ticker[:dot_pos]
|
||||
exchange_id = ticker[dot_pos + 1:]
|
||||
return exchange_id, market_id
|
||||
|
||||
|
||||
class SymbolMetadata(NamedTuple):
|
||||
"""Symbol metadata containing denominators for price/volume conversion."""
|
||||
"""Symbol metadata for Nautilus Instrument construction and order validation."""
|
||||
exchange_id: str
|
||||
market_id: str
|
||||
tick_denom: int # Denominator for price fields (open, high, low, close)
|
||||
base_denom: int # Denominator for base asset (volume in base terms)
|
||||
quote_denom: int # Denominator for quote asset
|
||||
market_type: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
base_asset: Optional[str] = None
|
||||
quote_asset: Optional[str] = None
|
||||
# Nautilus Instrument fields
|
||||
price_precision: Optional[int] = None # decimal places for prices
|
||||
size_precision: Optional[int] = None # decimal places for quantities
|
||||
tick_size: Optional[float] = None # minimum price increment
|
||||
lot_size: Optional[float] = None # minimum order size
|
||||
min_notional: Optional[float] = None # minimum order value in quote currency
|
||||
margin_init: Optional[float] = None # initial margin (futures/perps only)
|
||||
margin_maint: Optional[float] = None # maintenance margin (futures/perps only)
|
||||
maker_fee: Optional[float] = None # maker fee rate (e.g., 0.001 = 0.1%)
|
||||
taker_fee: Optional[float] = None # taker fee rate
|
||||
contract_multiplier: Optional[float] = None # for derivatives (default 1.0)
|
||||
|
||||
@property
|
||||
def ticker(self) -> str:
|
||||
"""Nautilus-format ticker: 'BTC/USDT.BINANCE'."""
|
||||
return format_ticker(self.exchange_id, self.market_id)
|
||||
|
||||
|
||||
class SymbolMetadataClient:
|
||||
"""
|
||||
Client for querying symbol metadata from Iceberg.
|
||||
|
||||
Provides lazy-loaded, cached access to market metadata including
|
||||
denominators needed to convert integer OHLC prices/volumes to decimals.
|
||||
Tickers use Nautilus format: "BTC/USDT.BINANCE"
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -40,16 +83,6 @@ class SymbolMetadataClient:
|
||||
s3_access_key: Optional[str] = None,
|
||||
s3_secret_key: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Initialize symbol metadata client.
|
||||
|
||||
Args:
|
||||
catalog_uri: URI of the Iceberg catalog
|
||||
namespace: Iceberg namespace (default: "trading")
|
||||
s3_endpoint: S3/MinIO endpoint URL
|
||||
s3_access_key: S3/MinIO access key
|
||||
s3_secret_key: S3/MinIO secret key
|
||||
"""
|
||||
self.catalog_uri = catalog_uri
|
||||
self.namespace = namespace
|
||||
|
||||
@@ -63,55 +96,39 @@ class SymbolMetadataClient:
|
||||
catalog_props["s3.secret-access-key"] = s3_secret_key
|
||||
|
||||
self.catalog = load_catalog("trading", **catalog_props)
|
||||
|
||||
# Lazy load the table
|
||||
self._table = None
|
||||
|
||||
# Cache: ticker -> SymbolMetadata
|
||||
self._cache: Dict[str, SymbolMetadata] = {}
|
||||
|
||||
@property
|
||||
def table(self):
|
||||
"""Lazy load the symbol_metadata table."""
|
||||
if self._table is None:
|
||||
try:
|
||||
self._table = self.catalog.load_table(f"{self.namespace}.symbol_metadata")
|
||||
log.info(f"Loaded symbol_metadata table from {self.namespace}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Failed to load symbol_metadata table from {self.namespace}.symbol_metadata. "
|
||||
f"This table is required for price/volume conversion. Error: {e}"
|
||||
f"Failed to load symbol_metadata table from {self.namespace}.symbol_metadata: {e}"
|
||||
) from e
|
||||
return self._table
|
||||
|
||||
def get_metadata(self, ticker: str) -> SymbolMetadata:
|
||||
"""
|
||||
Get metadata for a ticker (e.g., "BINANCE:BTC/USDT").
|
||||
Get metadata for a ticker (e.g., "BTC/USDT.BINANCE").
|
||||
|
||||
Args:
|
||||
ticker: Market identifier in format "EXCHANGE:SYMBOL"
|
||||
ticker: Market identifier in Nautilus format "MARKET.EXCHANGE"
|
||||
|
||||
Returns:
|
||||
SymbolMetadata with denominators and market info
|
||||
SymbolMetadata with Nautilus instrument fields
|
||||
|
||||
Raises:
|
||||
ValueError: If ticker format is invalid or metadata not found
|
||||
RuntimeError: If symbol_metadata table cannot be loaded
|
||||
"""
|
||||
# Check cache first
|
||||
if ticker in self._cache:
|
||||
return self._cache[ticker]
|
||||
|
||||
# Parse ticker into exchange_id and market_id
|
||||
if ":" not in ticker:
|
||||
raise ValueError(
|
||||
f"Invalid ticker format '{ticker}'. Expected format: 'EXCHANGE:SYMBOL' "
|
||||
f"(e.g., 'BINANCE:BTC/USDT')"
|
||||
)
|
||||
exchange_id, market_id = parse_ticker(ticker)
|
||||
|
||||
exchange_id, market_id = ticker.split(":", 1)
|
||||
|
||||
# Query Iceberg for this symbol
|
||||
try:
|
||||
df = self.table.scan(
|
||||
row_filter=And(
|
||||
@@ -122,9 +139,9 @@ class SymbolMetadataClient:
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(
|
||||
f"No metadata found for ticker '{ticker}' (exchange_id='{exchange_id}', "
|
||||
f"market_id='{market_id}'). The symbol may not be configured in the system. "
|
||||
f"Available tickers can be queried from the symbol_metadata table."
|
||||
f"No metadata found for ticker '{ticker}' "
|
||||
f"(exchange_id='{exchange_id}', market_id='{market_id}'). "
|
||||
f"The symbol may not be configured in the system."
|
||||
)
|
||||
|
||||
if len(df) > 1:
|
||||
@@ -132,55 +149,44 @@ class SymbolMetadataClient:
|
||||
|
||||
row = df.iloc[0]
|
||||
|
||||
# Extract denominators (required fields)
|
||||
tick_denom = row.get("tick_denom")
|
||||
base_denom = row.get("base_denom")
|
||||
quote_denom = row.get("quote_denom")
|
||||
def _opt_int(col):
|
||||
v = row.get(col)
|
||||
return int(v) if v is not None and not (isinstance(v, float) and v != v) else None
|
||||
|
||||
if tick_denom is None or tick_denom == 0:
|
||||
raise ValueError(
|
||||
f"Invalid tick_denom for {ticker}: {tick_denom}. "
|
||||
f"Denominator must be a positive integer."
|
||||
)
|
||||
|
||||
if base_denom is None or base_denom == 0:
|
||||
raise ValueError(
|
||||
f"Invalid base_denom for {ticker}: {base_denom}. "
|
||||
f"Denominator must be a positive integer."
|
||||
)
|
||||
|
||||
if quote_denom is None or quote_denom == 0:
|
||||
raise ValueError(
|
||||
f"Invalid quote_denom for {ticker}: {quote_denom}. "
|
||||
f"Denominator must be a positive integer."
|
||||
)
|
||||
def _opt_float(col):
|
||||
v = row.get(col)
|
||||
return float(v) if v is not None and not (isinstance(v, float) and v != v) else None
|
||||
|
||||
metadata = SymbolMetadata(
|
||||
exchange_id=exchange_id,
|
||||
market_id=market_id,
|
||||
tick_denom=int(tick_denom),
|
||||
base_denom=int(base_denom),
|
||||
quote_denom=int(quote_denom),
|
||||
market_type=row.get("market_type"),
|
||||
description=row.get("description"),
|
||||
base_asset=row.get("base_asset"),
|
||||
quote_asset=row.get("quote_asset"),
|
||||
price_precision=_opt_int("price_precision"),
|
||||
size_precision=_opt_int("size_precision"),
|
||||
tick_size=_opt_float("tick_size"),
|
||||
lot_size=_opt_float("lot_size"),
|
||||
min_notional=_opt_float("min_notional"),
|
||||
margin_init=_opt_float("margin_init"),
|
||||
margin_maint=_opt_float("margin_maint"),
|
||||
maker_fee=_opt_float("maker_fee"),
|
||||
taker_fee=_opt_float("taker_fee"),
|
||||
contract_multiplier=_opt_float("contract_multiplier"),
|
||||
)
|
||||
|
||||
# Cache the result
|
||||
self._cache[ticker] = metadata
|
||||
log.debug(
|
||||
f"Loaded metadata for {ticker}: tick_denom={metadata.tick_denom}, "
|
||||
f"base_denom={metadata.base_denom}, quote_denom={metadata.quote_denom}"
|
||||
f"Loaded metadata for {ticker}: price_precision={metadata.price_precision}, "
|
||||
f"tick_size={metadata.tick_size}, maker_fee={metadata.maker_fee}"
|
||||
)
|
||||
|
||||
return metadata
|
||||
|
||||
except ValueError:
|
||||
# Re-raise ValueError as-is (ticker not found, invalid format, etc.)
|
||||
raise
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Failed to query metadata for ticker '{ticker}': {e}"
|
||||
) from e
|
||||
raise RuntimeError(f"Failed to query metadata for ticker '{ticker}': {e}") from e
|
||||
|
||||
def clear_cache(self):
|
||||
"""Clear the metadata cache (useful for testing or forcing reloads)."""
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Utility functions for dexorder.
|
||||
|
||||
Includes timestamp conversions, date parsing, and other common utilities.
|
||||
All internal timestamps use nanoseconds since epoch (UTC).
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -15,13 +16,15 @@ log = logging.getLogger(__name__)
|
||||
# Type alias for flexible timestamp input
|
||||
TimestampInput = Union[int, float, str, datetime, pd.Timestamp]
|
||||
|
||||
NANOS_PER_SECOND = 1_000_000_000
|
||||
|
||||
def to_microseconds(timestamp: TimestampInput) -> int:
|
||||
|
||||
def to_nanoseconds(timestamp: TimestampInput) -> int:
|
||||
"""
|
||||
Convert various timestamp formats to microseconds since epoch.
|
||||
Convert various timestamp formats to nanoseconds since epoch.
|
||||
|
||||
This is the canonical way to convert user-friendly timestamps (unix seconds,
|
||||
date strings, datetime objects) into the internal microsecond format used
|
||||
date strings, datetime objects) into the internal nanosecond format used
|
||||
throughout the dexorder system.
|
||||
|
||||
Args:
|
||||
@@ -32,87 +35,69 @@ def to_microseconds(timestamp: TimestampInput) -> int:
|
||||
- pandas Timestamp
|
||||
|
||||
Returns:
|
||||
Microseconds since epoch as integer
|
||||
Nanoseconds since epoch as integer
|
||||
|
||||
Examples:
|
||||
>>> to_microseconds(1640000000) # Unix timestamp in seconds
|
||||
1640000000000000
|
||||
>>> to_microseconds(1640000000.5) # Unix timestamp with fractional seconds
|
||||
1640000000500000
|
||||
>>> to_microseconds("2021-12-20") # Date string
|
||||
1640000000000000
|
||||
>>> to_microseconds("2021-12-20 12:00:00") # Date string with time
|
||||
1640000000000000
|
||||
>>> to_microseconds(datetime(2021, 12, 20, 12, 0, 0)) # datetime object
|
||||
1640000000000000
|
||||
>>> to_microseconds(pd.Timestamp("2021-12-20 12:00:00")) # pandas Timestamp
|
||||
1640000000000000
|
||||
>>> to_nanoseconds(1640000000) # Unix timestamp in seconds
|
||||
1640000000000000000
|
||||
>>> to_nanoseconds(1640000000.5) # Unix timestamp with fractional seconds
|
||||
1640000000500000000
|
||||
>>> to_nanoseconds("2021-12-20")
|
||||
1639958400000000000
|
||||
"""
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Assume Unix timestamp in seconds
|
||||
return int(timestamp * 1_000_000)
|
||||
return int(timestamp * NANOS_PER_SECOND)
|
||||
elif isinstance(timestamp, str):
|
||||
# Parse date string
|
||||
dt = dateparser.parse(timestamp)
|
||||
if dt is None:
|
||||
raise ValueError(f"Could not parse date string: {timestamp}")
|
||||
return int(dt.timestamp() * 1_000_000)
|
||||
return int(dt.timestamp() * NANOS_PER_SECOND)
|
||||
elif isinstance(timestamp, datetime):
|
||||
return int(timestamp.timestamp() * 1_000_000)
|
||||
return int(timestamp.timestamp() * NANOS_PER_SECOND)
|
||||
elif isinstance(timestamp, pd.Timestamp):
|
||||
return int(timestamp.timestamp() * 1_000_000)
|
||||
return int(timestamp.timestamp() * NANOS_PER_SECOND)
|
||||
else:
|
||||
raise TypeError(f"Unsupported timestamp type: {type(timestamp)}")
|
||||
|
||||
|
||||
def to_seconds(timestamp_micros: int) -> float:
|
||||
def to_seconds(timestamp_nanos: int) -> float:
|
||||
"""
|
||||
Convert microseconds since epoch to Unix timestamp in seconds.
|
||||
Convert nanoseconds since epoch to Unix timestamp in seconds.
|
||||
|
||||
Args:
|
||||
timestamp_micros: Timestamp in microseconds since epoch
|
||||
timestamp_nanos: Timestamp in nanoseconds since epoch
|
||||
|
||||
Returns:
|
||||
Unix timestamp in seconds (float)
|
||||
|
||||
Examples:
|
||||
>>> to_seconds(1640000000000000)
|
||||
>>> to_seconds(1640000000000000000)
|
||||
1640000000.0
|
||||
>>> to_seconds(1640000000500000)
|
||||
1640000000.5
|
||||
"""
|
||||
return timestamp_micros / 1_000_000
|
||||
return timestamp_nanos / NANOS_PER_SECOND
|
||||
|
||||
|
||||
def to_datetime(timestamp_micros: int) -> datetime:
|
||||
def to_datetime(timestamp_nanos: int) -> datetime:
|
||||
"""
|
||||
Convert microseconds since epoch to datetime object.
|
||||
Convert nanoseconds since epoch to datetime object (UTC).
|
||||
|
||||
Args:
|
||||
timestamp_micros: Timestamp in microseconds since epoch
|
||||
timestamp_nanos: Timestamp in nanoseconds since epoch
|
||||
|
||||
Returns:
|
||||
datetime object in UTC
|
||||
|
||||
Examples:
|
||||
>>> to_datetime(1640000000000000)
|
||||
datetime.datetime(2021, 12, 20, 12, 0, tzinfo=datetime.timezone.utc)
|
||||
"""
|
||||
return datetime.fromtimestamp(timestamp_micros / 1_000_000)
|
||||
return datetime.fromtimestamp(timestamp_nanos / NANOS_PER_SECOND)
|
||||
|
||||
|
||||
def to_timestamp(timestamp_micros: int) -> pd.Timestamp:
|
||||
def to_timestamp(timestamp_nanos: int) -> pd.Timestamp:
|
||||
"""
|
||||
Convert microseconds since epoch to pandas Timestamp.
|
||||
Convert nanoseconds since epoch to pandas Timestamp.
|
||||
|
||||
Args:
|
||||
timestamp_micros: Timestamp in microseconds since epoch
|
||||
timestamp_nanos: Timestamp in nanoseconds since epoch
|
||||
|
||||
Returns:
|
||||
pandas Timestamp
|
||||
|
||||
Examples:
|
||||
>>> to_timestamp(1640000000000000)
|
||||
Timestamp('2021-12-20 12:00:00')
|
||||
"""
|
||||
return pd.Timestamp(timestamp_micros, unit='us')
|
||||
return pd.Timestamp(timestamp_nanos, unit='ns')
|
||||
|
||||
Reference in New Issue
Block a user