Symbol & data refactoring for Nautilus

This commit is contained in:
2026-04-01 00:59:13 -04:00
parent cd28e18e52
commit 93bc8a3a4f
55 changed files with 537 additions and 600 deletions

View File

@@ -44,7 +44,7 @@ def get_api() -> API:
# Fetch data
df = asyncio.run(api.data.historical_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=3600,
start_time="2021-12-20",
end_time="2021-12-21"

View File

@@ -29,7 +29,7 @@ class API:
# Fetch data
df = asyncio.run(api.data.historical_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=3600,
start_time="2021-12-20",
end_time="2021-12-21"

View File

@@ -27,8 +27,8 @@ class DataAPI(ABC):
Fetch historical OHLC candlestick data for a market.
Args:
ticker: Market identifier in format "EXCHANGE:SYMBOL"
Examples: "BINANCE:BTC/USDT", "COINBASE:ETH/USD"
ticker: Market identifier in format "MARKET.EXCHANGE"
Examples: "BTC/USDT.BINANCE", "ETH/USD.COINBASE"
period_seconds: Candle period in seconds
Common values:
- 60 (1 minute)
@@ -55,7 +55,7 @@ class DataAPI(ABC):
Returns:
DataFrame with candlestick data sorted by timestamp (ascending).
Standard columns (always included):
- timestamp: Period start time in microseconds
- timestamp: Period start time in nanoseconds
- open: Opening price (decimal float)
- high: Highest price (decimal float)
- low: Lowest price (decimal float)
@@ -71,7 +71,7 @@ class DataAPI(ABC):
Examples:
# Basic OHLC with Unix timestamp
df = await api.historical_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=3600,
start_time=1640000000,
end_time=1640086400
@@ -79,7 +79,7 @@ class DataAPI(ABC):
# Using date strings with volume
df = await api.historical_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=3600,
start_time="2021-12-20",
end_time="2021-12-21",
@@ -89,7 +89,7 @@ class DataAPI(ABC):
# Using datetime objects
from datetime import datetime
df = await api.historical_ohlc(
ticker="COINBASE:ETH/USD",
ticker="ETH/USD.COINBASE",
period_seconds=300,
start_time=datetime(2021, 12, 20, 9, 30),
end_time=datetime(2021, 12, 20, 16, 30),
@@ -113,8 +113,8 @@ class DataAPI(ABC):
specify exact timestamps. Useful for real-time analysis and indicators.
Args:
ticker: Market identifier in format "EXCHANGE:SYMBOL"
Examples: "BINANCE:BTC/USDT", "COINBASE:ETH/USD"
ticker: Market identifier in format "MARKET.EXCHANGE"
Examples: "BTC/USDT.BINANCE", "ETH/USD.COINBASE"
period_seconds: OHLC candle period in seconds
Common values: 60 (1m), 300 (5m), 900 (15m), 3600 (1h),
86400 (1d), 604800 (1w)
@@ -133,14 +133,14 @@ class DataAPI(ABC):
Examples:
# Get the last candle
df = await api.latest_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=3600
)
# Returns: timestamp, open, high, low, close
# Get the last 50 5-minute candles with volume
df = await api.latest_ohlc(
ticker="COINBASE:ETH/USD",
ticker="ETH/USD.COINBASE",
period_seconds=300,
length=50,
extra_columns=["volume", "buy_vol", "sell_vol"]
@@ -148,7 +148,7 @@ class DataAPI(ABC):
# Get recent candles with all timing data
df = await api.latest_ohlc(
ticker="BINANCE:BTC/USDT",
ticker="BTC/USDT.BINANCE",
period_seconds=60,
length=100,
extra_columns=["open_time", "high_time", "low_time", "close_time"]

View File

@@ -110,10 +110,10 @@ class HistoryClient:
IMPORTANT: Call connect() before using this method.
Args:
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
ticker: Market identifier (e.g., "BTC/USDT.BINANCE")
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
start_time: Start timestamp in nanoseconds
end_time: End timestamp in nanoseconds
timeout: Request timeout in seconds (default: 30)
limit: Optional limit on number of candles

View File

@@ -1,5 +1,8 @@
"""
IcebergClient - Query OHLC data from Iceberg warehouse (Iceberg 1.10.1)
Tickers use Nautilus format: "BTC/USDT.BINANCE"
All timestamps are nanoseconds since epoch.
"""
from typing import Optional, List, Tuple
@@ -39,7 +42,6 @@ class IcebergClient:
s3_endpoint: Optional[str] = None,
s3_access_key: Optional[str] = None,
s3_secret_key: Optional[str] = None,
metadata_client=None, # SymbolMetadataClient (avoid circular import)
):
"""
Initialize Iceberg client.
@@ -50,11 +52,9 @@ class IcebergClient:
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key
metadata_client: SymbolMetadataClient for price/volume conversion
"""
self.catalog_uri = catalog_uri
self.namespace = namespace
self.metadata_client = metadata_client
catalog_props = {"uri": catalog_uri}
if s3_endpoint:
@@ -80,15 +80,14 @@ class IcebergClient:
Query OHLC data for a specific ticker, period, and time range.
Args:
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
columns: Optional list of columns to select. If None, returns all columns.
Example: ["timestamp", "open", "high", "low", "close", "volume"]
start_time: Start timestamp in nanoseconds
end_time: End timestamp in nanoseconds (exclusive)
columns: Optional list of columns to select.
Returns:
DataFrame with OHLC data sorted by timestamp
DataFrame with OHLC data sorted by timestamp, with a DatetimeIndex (UTC).
"""
# Reload table metadata to pick up snapshots committed after this client was initialized
self.table = self.catalog.load_table(f"{self.namespace}.ohlc")
@@ -102,7 +101,6 @@ class IcebergClient:
)
)
# Select specific columns if requested
if columns is not None:
scan = scan.select(*columns)
@@ -110,52 +108,10 @@ class IcebergClient:
if not df.empty:
df = df.sort_values("timestamp")
# Convert integer microsecond timestamps to DatetimeIndex
df.index = pd.to_datetime(df["timestamp"], unit="us", utc=True)
# Convert integer nanosecond timestamps to DatetimeIndex
df.index = pd.to_datetime(df["timestamp"], unit="ns", utc=True)
df.index.name = "datetime"
df = df.drop(columns=["timestamp"])
# Apply price/volume conversion if metadata client available
if self.metadata_client is not None:
df = self._apply_denominators(df, ticker)
return df
def _apply_denominators(self, df: pd.DataFrame, ticker: str) -> pd.DataFrame:
"""
Convert integer prices and volumes to decimal floats using market metadata.
Args:
df: DataFrame with integer OHLC data
ticker: Market identifier for metadata lookup
Returns:
DataFrame with decimal prices and volumes
Raises:
ValueError: If metadata not found for ticker
"""
if df.empty:
return df
# Get metadata for this ticker
metadata = self.metadata_client.get_metadata(ticker)
# Convert price columns (divide by tick_denom)
price_columns = ["open", "high", "low", "close"]
for col in price_columns:
if col in df.columns:
df[col] = df[col].astype(float) / metadata.tick_denom
# Convert volume columns (divide by base_denom)
volume_columns = ["volume", "buy_vol", "sell_vol"]
for col in volume_columns:
if col in df.columns and df[col].notna().any():
df[col] = df[col].astype(float) / metadata.base_denom
log.debug(
f"Applied denominators to {ticker}: tick_denom={metadata.tick_denom}, "
f"base_denom={metadata.base_denom} ({len(df)} rows)"
)
return df
@@ -169,32 +125,28 @@ class IcebergClient:
"""
Identify missing data ranges in the requested time period.
Returns list of (start, end) tuples for missing ranges.
Expected candles are calculated based on period_seconds.
Args:
ticker: Market identifier
ticker: Market identifier in Nautilus format
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
start_time: Start timestamp in nanoseconds
end_time: End timestamp in nanoseconds
Returns:
List of (start_time, end_time) tuples for missing ranges
List of (start_time, end_time) tuples for missing ranges (nanoseconds)
"""
df = self.query_ohlc(ticker, period_seconds, start_time, end_time)
if df.empty:
# All data is missing
return [(start_time, end_time)]
# Convert period to microseconds
period_micros = period_seconds * 1_000_000
# Convert period to nanoseconds
period_nanos = period_seconds * 1_000_000_000
# Generate expected timestamps — end_time is exclusive
expected_timestamps = list(range(start_time, end_time, period_micros))
actual_timestamps = set(df.index.view('int64') // 1000)
expected_timestamps = list(range(start_time, end_time, period_nanos))
# DatetimeIndex backed by nanoseconds — view as int64 directly
actual_timestamps = set(df.index.view('int64'))
# Find gaps
missing = sorted(set(expected_timestamps) - actual_timestamps)
if not missing:
@@ -206,15 +158,12 @@ class IcebergClient:
prev_ts = missing[0]
for ts in missing[1:]:
if ts > prev_ts + period_micros:
# Gap in missing data - close previous range
if ts > prev_ts + period_nanos:
ranges.append((range_start, prev_ts))
range_start = ts
prev_ts = ts
# Close final range
ranges.append((range_start, prev_ts))
return ranges
def has_data(
@@ -228,10 +177,10 @@ class IcebergClient:
Check if any data exists for the given parameters.
Args:
ticker: Market identifier
ticker: Market identifier in Nautilus format
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
start_time: Start timestamp in nanoseconds
end_time: End timestamp in nanoseconds
Returns:
True if at least one candle exists, False otherwise

View File

@@ -138,8 +138,8 @@ class ChartingAPIImpl(ChartingAPI):
if col in df.columns:
# Handle potential timestamp index (convert from microseconds)
if df.index.name == 'timestamp' or 'timestamp' in str(df.index.dtype):
# Assume microseconds, convert to datetime
plot_index = pd.to_datetime(df.index, unit='us')
# Assume nanoseconds, convert to datetime
plot_index = pd.to_datetime(df.index, unit='ns')
else:
plot_index = df.index
@@ -206,18 +206,18 @@ class ChartingAPIImpl(ChartingAPI):
"""
df_copy = df.copy()
# Handle timestamp column (in microseconds) -> DatetimeIndex
# Handle timestamp column (in nanoseconds) -> DatetimeIndex
if 'timestamp' in df_copy.columns:
df_copy.index = pd.to_datetime(df_copy['timestamp'], unit='us')
df_copy.index = pd.to_datetime(df_copy['timestamp'], unit='ns')
df_copy = df_copy.drop(columns=['timestamp'])
elif df_copy.index.name == 'timestamp' or 'int' in str(df_copy.index.dtype):
# Index is timestamp in microseconds
df_copy.index = pd.to_datetime(df_copy.index, unit='us')
# Index is timestamp in nanoseconds
df_copy.index = pd.to_datetime(df_copy.index, unit='ns')
# Ensure index is DatetimeIndex
if not isinstance(df_copy.index, pd.DatetimeIndex):
raise ValueError(
"DataFrame must have a DatetimeIndex or a 'timestamp' column in microseconds"
"DataFrame must have a DatetimeIndex or a 'timestamp' column in nanoseconds"
)
# Normalize column names to lowercase

View File

@@ -8,7 +8,7 @@ import pandas as pd
from dexorder.api.data_api import DataAPI
from dexorder.ohlc_client import OHLCClient
from dexorder.utils import TimestampInput, to_microseconds
from dexorder.utils import TimestampInput, to_nanoseconds
log = logging.getLogger(__name__)
@@ -105,12 +105,12 @@ class DataAPIImpl(DataAPI):
if not self._started:
await self.start()
# Convert timestamps to microseconds
start_micros = to_microseconds(start_time)
end_micros = to_microseconds(end_time)
# Convert timestamps to nanoseconds
start_nanos = to_nanoseconds(start_time)
end_nanos = to_nanoseconds(end_time)
log.debug(f"Fetching OHLC: {ticker}, period={period_seconds}s, "
f"start={start_time} ({start_micros}), end={end_time} ({end_micros})")
f"start={start_time} ({start_nanos}ns), end={end_time} ({end_nanos}ns)")
# Validate extra_columns
if extra_columns:
@@ -131,8 +131,8 @@ class DataAPIImpl(DataAPI):
df = await self.ohlc_client.fetch_ohlc(
ticker=ticker,
period_seconds=period_seconds,
start_time=start_micros,
end_time=end_micros,
start_time=start_nanos,
end_time=end_nanos,
request_timeout=self.request_timeout
)

View File

@@ -8,7 +8,6 @@ import logging
from typing import Optional
from .iceberg_client import IcebergClient
from .history_client import HistoryClient
from .symbol_metadata_client import SymbolMetadataClient
log = logging.getLogger(__name__)
@@ -53,26 +52,14 @@ class OHLCClient:
s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key
"""
# Initialize symbol metadata client for price/volume conversion
self.metadata = SymbolMetadataClient(
iceberg_catalog_uri,
namespace=namespace,
s3_endpoint=s3_endpoint,
s3_access_key=s3_access_key,
s3_secret_key=s3_secret_key,
)
# Initialize Iceberg client with metadata client for automatic conversion
self.iceberg = IcebergClient(
iceberg_catalog_uri, namespace,
s3_endpoint=s3_endpoint,
s3_access_key=s3_access_key,
s3_secret_key=s3_secret_key,
metadata_client=self.metadata,
)
self.history = HistoryClient(relay_endpoint, notification_endpoint)
log.info("OHLCClient initialized with automatic price/volume conversion")
log.info("OHLCClient initialized")
async def start(self):
"""
@@ -107,10 +94,10 @@ class OHLCClient:
6. Return results
Args:
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
ticker: Market identifier in Nautilus format (e.g., "BTC/USDT.BINANCE")
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
start_time: Start timestamp in nanoseconds
end_time: End timestamp in nanoseconds
request_timeout: Timeout for historical data requests (default: 30s)
Returns:
@@ -121,9 +108,9 @@ class OHLCClient:
ValueError: If request fails
"""
# Align times to period boundaries: [ceil(start), ceil(end)) exclusive
period_micros = period_seconds * 1_000_000
start_time = ((start_time + period_micros - 1) // period_micros) * period_micros
end_time = ((end_time + period_micros - 1) // period_micros) * period_micros # exclusive
period_nanos = period_seconds * 1_000_000_000
start_time = ((start_time + period_nanos - 1) // period_nanos) * period_nanos
end_time = ((end_time + period_nanos - 1) // period_nanos) * period_nanos # exclusive
# Step 1: Check Iceberg for existing data
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)

View File

@@ -1,8 +1,7 @@
"""
SymbolMetadataClient - Query symbol metadata from Iceberg for price/volume conversion.
SymbolMetadataClient - Query symbol metadata from Iceberg.
Provides lazy-loaded, cached access to symbol metadata including denominators
used to convert integer OHLC data to decimal prices and volumes.
Tickers use Nautilus format: "BTC/USDT.BINANCE" (market_id.exchange_id).
"""
import logging
@@ -13,23 +12,67 @@ from pyiceberg.expressions import EqualTo, And
log = logging.getLogger(__name__)
def format_ticker(exchange_id: str, market_id: str) -> str:
"""Format a ticker in Nautilus convention: 'BTC/USDT.BINANCE'."""
return f"{market_id}.{exchange_id}"
def parse_ticker(ticker: str) -> tuple[str, str]:
"""
Parse a Nautilus-format ticker into (exchange_id, market_id).
Args:
ticker: e.g. "BTC/USDT.BINANCE"
Returns:
(exchange_id, market_id) e.g. ("BINANCE", "BTC/USDT")
Raises:
ValueError: if the ticker does not contain a dot separator
"""
if "." not in ticker:
raise ValueError(
f"Invalid ticker format '{ticker}'. Expected Nautilus format: 'MARKET.EXCHANGE' "
f"(e.g., 'BTC/USDT.BINANCE')"
)
# Split on the LAST dot to handle market IDs that could theoretically contain dots
dot_pos = ticker.rfind(".")
market_id = ticker[:dot_pos]
exchange_id = ticker[dot_pos + 1:]
return exchange_id, market_id
class SymbolMetadata(NamedTuple):
"""Symbol metadata containing denominators for price/volume conversion."""
"""Symbol metadata for Nautilus Instrument construction and order validation."""
exchange_id: str
market_id: str
tick_denom: int # Denominator for price fields (open, high, low, close)
base_denom: int # Denominator for base asset (volume in base terms)
quote_denom: int # Denominator for quote asset
market_type: Optional[str] = None
description: Optional[str] = None
base_asset: Optional[str] = None
quote_asset: Optional[str] = None
# Nautilus Instrument fields
price_precision: Optional[int] = None # decimal places for prices
size_precision: Optional[int] = None # decimal places for quantities
tick_size: Optional[float] = None # minimum price increment
lot_size: Optional[float] = None # minimum order size
min_notional: Optional[float] = None # minimum order value in quote currency
margin_init: Optional[float] = None # initial margin (futures/perps only)
margin_maint: Optional[float] = None # maintenance margin (futures/perps only)
maker_fee: Optional[float] = None # maker fee rate (e.g., 0.001 = 0.1%)
taker_fee: Optional[float] = None # taker fee rate
contract_multiplier: Optional[float] = None # for derivatives (default 1.0)
@property
def ticker(self) -> str:
"""Nautilus-format ticker: 'BTC/USDT.BINANCE'."""
return format_ticker(self.exchange_id, self.market_id)
class SymbolMetadataClient:
"""
Client for querying symbol metadata from Iceberg.
Provides lazy-loaded, cached access to market metadata including
denominators needed to convert integer OHLC prices/volumes to decimals.
Tickers use Nautilus format: "BTC/USDT.BINANCE"
"""
def __init__(
@@ -40,16 +83,6 @@ class SymbolMetadataClient:
s3_access_key: Optional[str] = None,
s3_secret_key: Optional[str] = None,
):
"""
Initialize symbol metadata client.
Args:
catalog_uri: URI of the Iceberg catalog
namespace: Iceberg namespace (default: "trading")
s3_endpoint: S3/MinIO endpoint URL
s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key
"""
self.catalog_uri = catalog_uri
self.namespace = namespace
@@ -63,55 +96,39 @@ class SymbolMetadataClient:
catalog_props["s3.secret-access-key"] = s3_secret_key
self.catalog = load_catalog("trading", **catalog_props)
# Lazy load the table
self._table = None
# Cache: ticker -> SymbolMetadata
self._cache: Dict[str, SymbolMetadata] = {}
@property
def table(self):
"""Lazy load the symbol_metadata table."""
if self._table is None:
try:
self._table = self.catalog.load_table(f"{self.namespace}.symbol_metadata")
log.info(f"Loaded symbol_metadata table from {self.namespace}")
except Exception as e:
raise RuntimeError(
f"Failed to load symbol_metadata table from {self.namespace}.symbol_metadata. "
f"This table is required for price/volume conversion. Error: {e}"
f"Failed to load symbol_metadata table from {self.namespace}.symbol_metadata: {e}"
) from e
return self._table
def get_metadata(self, ticker: str) -> SymbolMetadata:
"""
Get metadata for a ticker (e.g., "BINANCE:BTC/USDT").
Get metadata for a ticker (e.g., "BTC/USDT.BINANCE").
Args:
ticker: Market identifier in format "EXCHANGE:SYMBOL"
ticker: Market identifier in Nautilus format "MARKET.EXCHANGE"
Returns:
SymbolMetadata with denominators and market info
SymbolMetadata with Nautilus instrument fields
Raises:
ValueError: If ticker format is invalid or metadata not found
RuntimeError: If symbol_metadata table cannot be loaded
"""
# Check cache first
if ticker in self._cache:
return self._cache[ticker]
# Parse ticker into exchange_id and market_id
if ":" not in ticker:
raise ValueError(
f"Invalid ticker format '{ticker}'. Expected format: 'EXCHANGE:SYMBOL' "
f"(e.g., 'BINANCE:BTC/USDT')"
)
exchange_id, market_id = parse_ticker(ticker)
exchange_id, market_id = ticker.split(":", 1)
# Query Iceberg for this symbol
try:
df = self.table.scan(
row_filter=And(
@@ -122,9 +139,9 @@ class SymbolMetadataClient:
if df.empty:
raise ValueError(
f"No metadata found for ticker '{ticker}' (exchange_id='{exchange_id}', "
f"market_id='{market_id}'). The symbol may not be configured in the system. "
f"Available tickers can be queried from the symbol_metadata table."
f"No metadata found for ticker '{ticker}' "
f"(exchange_id='{exchange_id}', market_id='{market_id}'). "
f"The symbol may not be configured in the system."
)
if len(df) > 1:
@@ -132,55 +149,44 @@ class SymbolMetadataClient:
row = df.iloc[0]
# Extract denominators (required fields)
tick_denom = row.get("tick_denom")
base_denom = row.get("base_denom")
quote_denom = row.get("quote_denom")
def _opt_int(col):
v = row.get(col)
return int(v) if v is not None and not (isinstance(v, float) and v != v) else None
if tick_denom is None or tick_denom == 0:
raise ValueError(
f"Invalid tick_denom for {ticker}: {tick_denom}. "
f"Denominator must be a positive integer."
)
if base_denom is None or base_denom == 0:
raise ValueError(
f"Invalid base_denom for {ticker}: {base_denom}. "
f"Denominator must be a positive integer."
)
if quote_denom is None or quote_denom == 0:
raise ValueError(
f"Invalid quote_denom for {ticker}: {quote_denom}. "
f"Denominator must be a positive integer."
)
def _opt_float(col):
v = row.get(col)
return float(v) if v is not None and not (isinstance(v, float) and v != v) else None
metadata = SymbolMetadata(
exchange_id=exchange_id,
market_id=market_id,
tick_denom=int(tick_denom),
base_denom=int(base_denom),
quote_denom=int(quote_denom),
market_type=row.get("market_type"),
description=row.get("description"),
base_asset=row.get("base_asset"),
quote_asset=row.get("quote_asset"),
price_precision=_opt_int("price_precision"),
size_precision=_opt_int("size_precision"),
tick_size=_opt_float("tick_size"),
lot_size=_opt_float("lot_size"),
min_notional=_opt_float("min_notional"),
margin_init=_opt_float("margin_init"),
margin_maint=_opt_float("margin_maint"),
maker_fee=_opt_float("maker_fee"),
taker_fee=_opt_float("taker_fee"),
contract_multiplier=_opt_float("contract_multiplier"),
)
# Cache the result
self._cache[ticker] = metadata
log.debug(
f"Loaded metadata for {ticker}: tick_denom={metadata.tick_denom}, "
f"base_denom={metadata.base_denom}, quote_denom={metadata.quote_denom}"
f"Loaded metadata for {ticker}: price_precision={metadata.price_precision}, "
f"tick_size={metadata.tick_size}, maker_fee={metadata.maker_fee}"
)
return metadata
except ValueError:
# Re-raise ValueError as-is (ticker not found, invalid format, etc.)
raise
except Exception as e:
raise RuntimeError(
f"Failed to query metadata for ticker '{ticker}': {e}"
) from e
raise RuntimeError(f"Failed to query metadata for ticker '{ticker}': {e}") from e
def clear_cache(self):
"""Clear the metadata cache (useful for testing or forcing reloads)."""

View File

@@ -2,6 +2,7 @@
Utility functions for dexorder.
Includes timestamp conversions, date parsing, and other common utilities.
All internal timestamps use nanoseconds since epoch (UTC).
"""
import logging
@@ -15,13 +16,15 @@ log = logging.getLogger(__name__)
# Type alias for flexible timestamp input
TimestampInput = Union[int, float, str, datetime, pd.Timestamp]
NANOS_PER_SECOND = 1_000_000_000
def to_microseconds(timestamp: TimestampInput) -> int:
def to_nanoseconds(timestamp: TimestampInput) -> int:
"""
Convert various timestamp formats to microseconds since epoch.
Convert various timestamp formats to nanoseconds since epoch.
This is the canonical way to convert user-friendly timestamps (unix seconds,
date strings, datetime objects) into the internal microsecond format used
date strings, datetime objects) into the internal nanosecond format used
throughout the dexorder system.
Args:
@@ -32,87 +35,69 @@ def to_microseconds(timestamp: TimestampInput) -> int:
- pandas Timestamp
Returns:
Microseconds since epoch as integer
Nanoseconds since epoch as integer
Examples:
>>> to_microseconds(1640000000) # Unix timestamp in seconds
1640000000000000
>>> to_microseconds(1640000000.5) # Unix timestamp with fractional seconds
1640000000500000
>>> to_microseconds("2021-12-20") # Date string
1640000000000000
>>> to_microseconds("2021-12-20 12:00:00") # Date string with time
1640000000000000
>>> to_microseconds(datetime(2021, 12, 20, 12, 0, 0)) # datetime object
1640000000000000
>>> to_microseconds(pd.Timestamp("2021-12-20 12:00:00")) # pandas Timestamp
1640000000000000
>>> to_nanoseconds(1640000000) # Unix timestamp in seconds
1640000000000000000
>>> to_nanoseconds(1640000000.5) # Unix timestamp with fractional seconds
1640000000500000000
>>> to_nanoseconds("2021-12-20")
1639958400000000000
"""
if isinstance(timestamp, (int, float)):
# Assume Unix timestamp in seconds
return int(timestamp * 1_000_000)
return int(timestamp * NANOS_PER_SECOND)
elif isinstance(timestamp, str):
# Parse date string
dt = dateparser.parse(timestamp)
if dt is None:
raise ValueError(f"Could not parse date string: {timestamp}")
return int(dt.timestamp() * 1_000_000)
return int(dt.timestamp() * NANOS_PER_SECOND)
elif isinstance(timestamp, datetime):
return int(timestamp.timestamp() * 1_000_000)
return int(timestamp.timestamp() * NANOS_PER_SECOND)
elif isinstance(timestamp, pd.Timestamp):
return int(timestamp.timestamp() * 1_000_000)
return int(timestamp.timestamp() * NANOS_PER_SECOND)
else:
raise TypeError(f"Unsupported timestamp type: {type(timestamp)}")
def to_seconds(timestamp_micros: int) -> float:
def to_seconds(timestamp_nanos: int) -> float:
"""
Convert microseconds since epoch to Unix timestamp in seconds.
Convert nanoseconds since epoch to Unix timestamp in seconds.
Args:
timestamp_micros: Timestamp in microseconds since epoch
timestamp_nanos: Timestamp in nanoseconds since epoch
Returns:
Unix timestamp in seconds (float)
Examples:
>>> to_seconds(1640000000000000)
>>> to_seconds(1640000000000000000)
1640000000.0
>>> to_seconds(1640000000500000)
1640000000.5
"""
return timestamp_micros / 1_000_000
return timestamp_nanos / NANOS_PER_SECOND
def to_datetime(timestamp_micros: int) -> datetime:
def to_datetime(timestamp_nanos: int) -> datetime:
"""
Convert microseconds since epoch to datetime object.
Convert nanoseconds since epoch to datetime object (UTC).
Args:
timestamp_micros: Timestamp in microseconds since epoch
timestamp_nanos: Timestamp in nanoseconds since epoch
Returns:
datetime object in UTC
Examples:
>>> to_datetime(1640000000000000)
datetime.datetime(2021, 12, 20, 12, 0, tzinfo=datetime.timezone.utc)
"""
return datetime.fromtimestamp(timestamp_micros / 1_000_000)
return datetime.fromtimestamp(timestamp_nanos / NANOS_PER_SECOND)
def to_timestamp(timestamp_micros: int) -> pd.Timestamp:
def to_timestamp(timestamp_nanos: int) -> pd.Timestamp:
"""
Convert microseconds since epoch to pandas Timestamp.
Convert nanoseconds since epoch to pandas Timestamp.
Args:
timestamp_micros: Timestamp in microseconds since epoch
timestamp_nanos: Timestamp in nanoseconds since epoch
Returns:
pandas Timestamp
Examples:
>>> to_timestamp(1640000000000000)
Timestamp('2021-12-20 12:00:00')
"""
return pd.Timestamp(timestamp_micros, unit='us')
return pd.Timestamp(timestamp_nanos, unit='ns')