Add Ticker24h support: hourly market snapshots with USD-normalized volume filtering

This commit is contained in:
2026-04-26 18:39:52 -04:00
parent 85fcbe1330
commit 0178b5d29d
45 changed files with 1995 additions and 170 deletions

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Optional, List
from typing import Optional, List, Any
import pandas as pd
@@ -160,3 +160,59 @@ class DataAPI(ABC):
"""
pass
@abstractmethod
async def get_ticker_24h(
self,
exchange: str,
limit: Optional[int] = None,
min_std_quote_volume: Optional[float] = None,
market_type: Optional[str] = None,
base_asset_contains: Optional[str] = None,
) -> pd.DataFrame:
"""
Retrieve the 24h rolling market stats for all symbols on an exchange.
Data is refreshed hourly by the ingestor pipeline. Returns all symbols
sorted by std_quote_volume (USD-normalized volume) descending. Symbols
with unknown quote currency conversion are listed last (std_quote_volume = NaN).
Args:
exchange: Exchange name (e.g., "BINANCE", "COINBASE", "KRAKEN")
limit: If set, return only the Top N symbols By Volume. None = return all.
min_std_quote_volume: Exclude symbols with USD volume below this threshold.
market_type: Filter by market type: "spot" or "perp". None = return all.
base_asset_contains: Filter to symbols whose base asset contains this string
(case-insensitive). E.g., "BTC" matches "BTC/USDT".
Returns:
DataFrame sorted by std_quote_volume descending (NULLs last). Columns:
- ticker: Full ticker (e.g., "BTC/USDT.BINANCE")
- exchange_id: Exchange name
- base_asset: Base currency (e.g., "BTC")
- quote_asset: Quote currency (e.g., "USDT")
- last_price: Last traded price in quote currency
- price_change_pct: 24h price change as percentage
- quote_volume_24h: Raw 24h volume in quote asset
- std_quote_volume: quote_volume_24h converted to USD (NaN if conversion unknown)
- bid_price, ask_price: Current best bid/ask (NaN if not provided)
- open_24h, high_24h, low_24h: 24h OHLC prices (NaN if not provided)
- volume_24h: Base-asset volume (NaN if not provided)
- num_trades: 24h trade count (NaN if not provided)
- timestamp_ms: Snapshot timestamp in milliseconds
Returns empty DataFrame if no data is available (e.g., not yet fetched).
Examples:
# Top 50 most liquid Binance spot symbols
df = await api.get_ticker_24h("BINANCE", limit=50, market_type="spot")
# All BTC pairs with at least $10M daily volume
df = await api.get_ticker_24h("BINANCE",
base_asset_contains="BTC",
min_std_quote_volume=10_000_000)
# All Binance symbols (for building a scanner universe)
df = await api.get_ticker_24h("BINANCE")
"""
pass

View File

@@ -8,6 +8,7 @@ import pandas as pd
from dexorder.api.data_api import DataAPI
from dexorder.ohlc_client import OHLCClient
from dexorder.ticker24h_client import Ticker24hClient
from dexorder.utils import TimestampInput, to_nanoseconds
log = logging.getLogger(__name__)
@@ -33,6 +34,9 @@ VALID_EXTRA_COLUMNS = {
}
MAX_BARS_PER_SCRIPT = 2_000_000
class DataAPIImpl(DataAPI):
"""
Implementation of DataAPI using OHLCClient for querying OHLC data.
@@ -79,8 +83,10 @@ class DataAPIImpl(DataAPI):
s3_secret_key=s3_secret_key,
s3_region=s3_region,
)
self.ticker24h_client = Ticker24hClient(relay_endpoint, notification_endpoint)
self.request_timeout = request_timeout
self._started = False
self._bars_fetched: int = 0
async def start(self):
"""
@@ -91,6 +97,7 @@ class DataAPIImpl(DataAPI):
"""
if not self._started:
await self.ohlc_client.start()
await self.ticker24h_client.connect()
self._started = True
async def stop(self):
@@ -99,6 +106,7 @@ class DataAPIImpl(DataAPI):
"""
if self._started:
await self.ohlc_client.stop()
await self.ticker24h_client.stop()
self._started = False
async def historical_ohlc(
@@ -121,6 +129,17 @@ class DataAPIImpl(DataAPI):
start_nanos = to_nanoseconds(start_time)
end_nanos = to_nanoseconds(end_time)
estimated_bars = int((end_nanos - start_nanos) / (period_seconds * 1_000_000_000))
if self._bars_fetched + estimated_bars > MAX_BARS_PER_SCRIPT:
raise ValueError(
f"Script bar budget exceeded: {self._bars_fetched:,} bars already fetched, "
f"this request would add ~{estimated_bars:,} more "
f"(~{self._bars_fetched + estimated_bars:,} total). "
f"Limit is {MAX_BARS_PER_SCRIPT:,} bars per script. "
f"Reduce the date range, use a coarser period (e.g. 86400s instead of 3600s), "
f"or fetch fewer symbols."
)
log.debug(f"Fetching OHLC: {ticker}, period={period_seconds}s, "
f"start={start_time} ({start_nanos}ns), end={end_time} ({end_nanos}ns)")
@@ -154,6 +173,7 @@ class DataAPIImpl(DataAPI):
if not df.empty:
available_cols = [col for col in columns_to_fetch if col in df.columns]
df = df[available_cols]
self._bars_fetched += len(df)
return df
@@ -173,6 +193,30 @@ class DataAPIImpl(DataAPI):
"""
raise NotImplementedError("latest_ohlc will be implemented in the future")
async def get_ticker_24h(
self,
exchange: str,
limit: Optional[int] = None,
min_std_quote_volume: Optional[float] = None,
market_type: Optional[str] = None,
base_asset_contains: Optional[str] = None,
) -> pd.DataFrame:
"""
Retrieve the 24h rolling market stats for all symbols on an exchange.
See DataAPI.get_ticker_24h for full documentation.
"""
if not self._started:
await self.start()
return await self.ticker24h_client.get_ticker_24h(
exchange=exchange,
limit=limit,
min_std_quote_volume=min_std_quote_volume,
market_type=market_type,
base_asset_contains=base_asset_contains,
request_timeout=self.request_timeout,
)
async def __aenter__(self):
"""Support async context manager."""
await self.start()

View File

@@ -0,0 +1,242 @@
"""
ZMQ-based client for fetching Ticker24h snapshots via the relay backbone.
Sends a SubmitHistoricalRequest with ticker="@TICKER24H.{EXCHANGE}" to the relay.
Flink processes the TICKER_SNAPSHOT job and publishes the result on
"RESPONSE:{client_id}" — a topic only this client subscribes to, preventing
the DoS vector of research scripts triggering gateway-broadcast updates.
Results are cached in-process. Subsequent calls for the same exchange return
instantly from cache.
"""
import asyncio
import logging
import struct
import uuid
from typing import Optional, Dict
import pandas as pd
import zmq
import zmq.asyncio
try:
from dexorder.generated.ingestor_pb2 import SubmitHistoricalRequest, SubmitResponse
from dexorder.generated.ticker24h_pb2 import Ticker24h
except ImportError:
print("Warning: Protobuf files not found. Run protoc inside the sandbox container.")
raise
log = logging.getLogger(__name__)
MSG_TYPE_SUBMIT = 0x10
MSG_TYPE_TICKER_24H = 0x0D
PROTOCOL_VERSION = 0x01
TICKER24H_COLUMNS = [
"ticker", "exchange_id", "base_asset", "quote_asset",
"last_price", "price_change_pct", "quote_volume_24h", "std_quote_volume",
"bid_price", "ask_price", "open_24h", "high_24h", "low_24h",
"volume_24h", "num_trades", "timestamp_ms",
]
class Ticker24hClient:
"""
Client that fetches Ticker24h snapshots via the relay/Flink pipeline.
Call connect() before use. Each exchange result is cached; re-requests
are only issued when the cache is empty (first call per exchange per session).
The background listener continues running and will update the cache on any
future broadcast subscriptions if the subscription list is expanded.
"""
def __init__(self, relay_endpoint: str, notification_endpoint: str):
self._relay_endpoint = relay_endpoint
self._notification_endpoint = notification_endpoint
self._client_id = f"t24h-{uuid.uuid4().hex[:8]}"
self._cache: Dict[str, Ticker24h] = {}
self._events: Dict[str, asyncio.Event] = {}
self._context: Optional[zmq.asyncio.Context] = None
self._sub: Optional[zmq.asyncio.Socket] = None
self._listener_task: Optional[asyncio.Task] = None
self._connected = False
async def connect(self):
"""Start the background listener. Safe to call multiple times."""
if self._connected:
return
# Clean up stale state from a previous event loop
if self._listener_task is not None and not self._listener_task.done():
self._listener_task.cancel()
try:
await self._listener_task
except asyncio.CancelledError:
pass
if self._context is not None:
self._context.term()
self._context = zmq.asyncio.Context()
self._sub = self._context.socket(zmq.SUB)
self._sub.connect(self._notification_endpoint)
self._sub.subscribe(f"RESPONSE:{self._client_id}".encode())
self._listener_task = asyncio.create_task(self._listen())
# Let the listener establish its subscription before any request goes out
await asyncio.sleep(0.1)
self._connected = True
log.debug("Ticker24hClient connected: client_id=%s", self._client_id)
async def stop(self):
"""Stop the background listener and close sockets."""
self._connected = False
if self._listener_task and not self._listener_task.done():
self._listener_task.cancel()
try:
await self._listener_task
except asyncio.CancelledError:
pass
if self._sub:
self._sub.close()
self._sub = None
if self._context:
self._context.term()
self._context = None
async def _listen(self):
while True:
try:
frames = await self._sub.recv_multipart()
# Wire: [topic][0x01][0x0D + Ticker24h proto bytes]
if len(frames) < 3:
continue
payload = frames[2]
if not payload or payload[0] != MSG_TYPE_TICKER_24H:
continue
snapshot = Ticker24h()
snapshot.ParseFromString(payload[1:])
exchange = snapshot.exchange_id
self._cache[exchange] = snapshot
event = self._events.get(exchange)
if event:
event.set()
log.debug("Ticker24h received: exchange=%s tickers=%d", exchange, len(snapshot.tickers))
except asyncio.CancelledError:
return
except Exception as exc:
log.warning("Ticker24hClient listener error: %s", exc)
async def _request(self, exchange: str, timeout: float):
"""Send a TICKER_SNAPSHOT request and wait for the notification."""
event = self._events.setdefault(exchange, asyncio.Event())
event.clear()
request_id = str(uuid.uuid4())
req = SubmitHistoricalRequest(
request_id=request_id,
ticker=f"@TICKER24H.{exchange}",
client_id=self._client_id,
)
version_frame = struct.pack("B", PROTOCOL_VERSION)
message_frame = struct.pack("B", MSG_TYPE_SUBMIT) + req.SerializeToString()
# Use a fresh REQ socket per request (matches HistoryClient pattern)
sock = self._context.socket(zmq.REQ)
sock.connect(self._relay_endpoint)
try:
await sock.send(version_frame, zmq.SNDMORE)
await sock.send(message_frame)
# Drain the relay's immediate SubmitResponse ack
while True:
await asyncio.wait_for(sock.recv(), timeout=5.0)
if not sock.get(zmq.RCVMORE):
break
except asyncio.TimeoutError:
log.warning("No ack from relay for ticker24h request exchange=%s", exchange)
finally:
sock.close()
try:
await asyncio.wait_for(event.wait(), timeout=timeout)
except asyncio.TimeoutError:
raise TimeoutError(
f"Ticker24h request for {exchange} timed out after {timeout}s"
)
async def get_ticker_24h(
self,
exchange: str,
limit: Optional[int] = None,
min_std_quote_volume: Optional[float] = None,
market_type: Optional[str] = None,
base_asset_contains: Optional[str] = None,
request_timeout: float = 30.0,
) -> pd.DataFrame:
"""
Return a DataFrame of 24h stats for all symbols on the exchange.
First call per exchange triggers a relay request and waits for Flink's
response (up to request_timeout seconds). Subsequent calls return from cache.
Args:
exchange: Exchange name, e.g. "BINANCE"
limit: Return only top N by std_quote_volume
min_std_quote_volume: Exclude symbols below this USD volume threshold
market_type: "spot" (no ':' in ticker) or "perp" (has ':')
base_asset_contains: Case-insensitive substring filter on base asset
request_timeout: Seconds to wait for first response (default 30)
"""
if self._connected and self._listener_task is not None and self._listener_task.done():
self._connected = False
if not self._connected:
await self.connect()
exchange = exchange.upper()
if exchange not in self._cache:
await self._request(exchange, timeout=request_timeout)
snapshot = self._cache[exchange]
rows = []
for ts in snapshot.tickers:
rows.append({
"ticker": ts.ticker,
"exchange_id": ts.exchange_id,
"base_asset": ts.base_asset,
"quote_asset": ts.quote_asset,
"last_price": ts.last_price,
"price_change_pct": ts.price_change_pct,
"quote_volume_24h": ts.quote_volume_24h,
"std_quote_volume": ts.std_quote_volume if ts.HasField("std_quote_volume") else None,
"bid_price": ts.bid_price if ts.HasField("bid_price") else None,
"ask_price": ts.ask_price if ts.HasField("ask_price") else None,
"open_24h": ts.open_24h if ts.HasField("open_24h") else None,
"high_24h": ts.high_24h if ts.HasField("high_24h") else None,
"low_24h": ts.low_24h if ts.HasField("low_24h") else None,
"volume_24h": ts.volume_24h if ts.HasField("volume_24h") else None,
"num_trades": ts.num_trades if ts.HasField("num_trades") else None,
"timestamp_ms": round(ts.timestamp / 1_000_000),
})
df = pd.DataFrame(rows, columns=TICKER24H_COLUMNS) if rows else pd.DataFrame(columns=TICKER24H_COLUMNS)
if market_type:
mt = market_type.lower()
if mt == "spot":
df = df[~df["ticker"].str.contains(":", na=False)]
elif mt in ("perp", "perpetual"):
df = df[df["ticker"].str.contains(":", na=False)]
if base_asset_contains:
df = df[df["base_asset"].str.contains(base_asset_contains, case=False, na=False)]
if min_std_quote_volume is not None:
df = df[df["std_quote_volume"].notna() & (df["std_quote_volume"] >= min_std_quote_volume)]
df = df.sort_values("std_quote_volume", ascending=False, na_position="last")
if limit:
df = df.head(limit)
return df.reset_index(drop=True)

View File

@@ -8,6 +8,7 @@ Returns a JSON object with a `values` array of {timestamp, ...} records, where
timestamp is a Unix second integer and value fields hold floats (or null for NaN).
"""
import asyncio
import json
import logging
from pathlib import Path
@@ -212,9 +213,16 @@ async def evaluate_indicator(
}))]
args.append(df[col])
# Compute
# Compute — run in a thread so a slow indicator doesn't block the event loop
try:
result = fn(*args, **parameters)
result = await asyncio.wait_for(
asyncio.to_thread(fn, *args, **parameters),
timeout=25.0,
)
except asyncio.TimeoutError:
return [TextContent(type="text", text=json.dumps({
"error": f"Indicator computation timed out after 25 seconds: {pandas_ta_name}"
}))]
except Exception as exc:
log.exception("evaluate_indicator: computation failed")
return [TextContent(type="text", text=json.dumps({

View File

@@ -380,6 +380,12 @@ class GitManager:
# Custom Indicator Setup
# =============================================================================
# Maps ta_name → git commit hash of implementation.py at last registration.
# Used to detect when an indicator file has been updated so the binding can
# be refreshed without requiring a process restart.
_custom_indicator_revisions: dict[str, str] = {}
def setup_custom_indicators(data_dir: Path) -> None:
"""
Register user's custom indicators with pandas-ta.
@@ -388,7 +394,8 @@ def setup_custom_indicators(data_dir: Path) -> None:
the function as ``ta.custom_{sanitized_name}`` so that evaluate_indicator
can call it as ``getattr(ta, "custom_trendflex", None)``.
The binding is idempotent — indicators already registered are skipped.
Re-registers automatically when the implementation file's git revision
changes, so in-process edits are picked up without a restart.
Note: pandas-ta's ta.import_dir() requires a category-based directory
structure (e.g. tmpdir/momentum/trendflex.py) plus a companion
@@ -434,9 +441,29 @@ def setup_custom_indicators(data_dir: Path) -> None:
continue
seen.add(ta_name)
# Skip if already bound (e.g. called multiple times in a process)
# Get the current git revision of this implementation file so we can
# detect edits and re-register without a process restart.
git_rev = ""
try:
result = subprocess.run(
["git", "log", "-1", "--format=%H", "--", impl.name],
capture_output=True, text=True, cwd=impl.parent,
)
git_rev = result.stdout.strip()
except Exception:
pass
if getattr(ta, ta_name, None) is not None:
continue
if _custom_indicator_revisions.get(ta_name) == git_rev:
continue # Same revision — already up to date
# Revision changed — clear old binding so we re-register below
log.info("Re-registering updated custom indicator '%s' (rev %s)", ta_name, git_rev[:8])
try:
delattr(ta, ta_name)
except AttributeError:
pass
_custom_indicator_revisions[ta_name] = git_rev
try:
spec = importlib.util.spec_from_file_location(ta_name, impl)