chart data loading

This commit is contained in:
2026-03-24 21:37:49 -04:00
parent f6bd22a8ef
commit c76887ab92
65 changed files with 6350 additions and 713 deletions

View File

@@ -0,0 +1,546 @@
/**
* DuckDB Client for querying Apache Iceberg tables
*
* Uses DuckDB's native Iceberg and Parquet support to query data
* directly from S3/MinIO without needing catalog-only libraries.
*/
import duckdb from 'duckdb';
import type { FastifyBaseLogger } from 'fastify';
import { promisify } from 'util';
type Database = duckdb.Database;
type Connection = duckdb.Connection;
const { Database, Connection } = duckdb;
export interface DuckDBConfig {
catalogUri: string;
namespace: string;
ohlcCatalogUri?: string;
ohlcNamespace?: string;
s3Endpoint?: string;
s3AccessKey?: string;
s3SecretKey?: string;
}
/**
* DuckDB Client with Iceberg support
*
* Provides SQL-based queries against Iceberg tables stored in S3/MinIO.
*/
export class DuckDBClient {
private db: Database | null = null;
private conn: Connection | null = null;
private namespace: string;
private ohlcNamespace: string;
private catalogUri: string;
private ohlcCatalogUri: string;
private s3Config: {
endpoint?: string;
accessKey?: string;
secretKey?: string;
};
private logger: FastifyBaseLogger;
private initialized = false;
constructor(config: DuckDBConfig, logger: FastifyBaseLogger) {
this.logger = logger;
this.namespace = config.namespace;
this.catalogUri = config.catalogUri;
this.ohlcCatalogUri = config.ohlcCatalogUri || config.catalogUri;
this.ohlcNamespace = config.ohlcNamespace || 'trading';
this.s3Config = {
endpoint: config.s3Endpoint,
accessKey: config.s3AccessKey,
secretKey: config.s3SecretKey,
};
}
/**
* Initialize DuckDB connection and configure S3/Iceberg extensions
*/
async initialize(): Promise<void> {
if (this.initialized) {
return;
}
try {
this.db = new Database(':memory:');
this.conn = this.db.connect();
const all = promisify(this.conn.all.bind(this.conn));
// Install and load required extensions
await all('INSTALL httpfs;');
await all('LOAD httpfs;');
await all('INSTALL iceberg;');
await all('LOAD iceberg;');
// Configure S3 credentials if provided
if (this.s3Config.endpoint && this.s3Config.accessKey && this.s3Config.secretKey) {
const s3Url = new URL(this.s3Config.endpoint);
const useSSL = s3Url.protocol === 'https:';
await all(`SET s3_endpoint='${s3Url.hostname}:${s3Url.port || (useSSL ? 443 : 9000)}';`);
await all(`SET s3_access_key_id='${this.s3Config.accessKey}';`);
await all(`SET s3_secret_access_key='${this.s3Config.secretKey}';`);
await all(`SET s3_use_ssl=${useSSL};`);
await all(`SET s3_url_style='path';`);
await all(`SET s3_region='us-east-1';`);
await all(`SET s3_url_compatibility_mode=true;`);
this.logger.info({
endpoint: this.s3Config.endpoint,
useSSL,
}, 'Configured DuckDB S3 settings');
}
this.initialized = true;
this.logger.info({
catalogUri: this.catalogUri,
namespace: this.namespace,
ohlcCatalogUri: this.ohlcCatalogUri,
ohlcNamespace: this.ohlcNamespace,
}, 'DuckDB client initialized');
} catch (error) {
this.logger.error({ error }, 'Failed to initialize DuckDB');
throw error;
}
}
/**
* Execute a SQL query and return all rows
*/
private async query<T = any>(sql: string, params?: any[]): Promise<T[]> {
if (!this.conn) {
throw new Error('DuckDB connection not initialized');
}
try {
const all = promisify(this.conn.all.bind(this.conn)) as (sql: string, ...params: any[]) => Promise<any[]>;
const rows = params && params.length > 0 ? await all(sql, ...params) : await all(sql);
return rows as T[];
} catch (error) {
this.logger.error({ error, sql, params }, 'DuckDB query failed');
throw error;
}
}
/**
* Get the Iceberg table path from REST catalog
*/
private async getTablePath(namespace: string, tableName: string, catalogUri: string): Promise<string | null> {
try {
const tableUrl = `${catalogUri}/v1/namespaces/${namespace}/tables/${tableName}`;
this.logger.debug({ tableUrl }, 'Fetching Iceberg table metadata');
const response = await fetch(tableUrl, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
});
if (!response.ok) {
if (response.status === 404) {
this.logger.debug({ namespace, tableName }, 'Table not found in catalog');
return null;
}
throw new Error(`Failed to fetch table metadata: ${response.status} ${response.statusText}`);
}
const metadata = await response.json() as any;
// Extract metadata location (S3 path to metadata.json)
const metadataLocation = metadata['metadata-location'] || metadata.location;
if (!metadataLocation) {
this.logger.warn({ metadata }, 'No metadata-location found in table response');
return null;
}
this.logger.debug({ metadataLocation }, 'Found Iceberg table location');
return metadataLocation;
} catch (error: any) {
this.logger.error({ error: error.message, namespace, tableName }, 'Failed to get table path');
return null;
}
}
/**
* Query messages from gateway.conversations table
*/
async queryMessages(
userId: string,
sessionId: string,
options?: {
startTime?: number;
endTime?: number;
limit?: number;
}
): Promise<any[]> {
await this.initialize();
try {
const tablePath = await this.getTablePath(
this.namespace,
'conversations',
this.catalogUri
);
if (!tablePath) {
this.logger.warn('Conversations table not found');
return [];
}
// Build SQL query with optional filters
let sql = `
SELECT
id,
user_id,
session_id,
role,
content,
metadata,
timestamp
FROM iceberg_scan('${tablePath}')
WHERE user_id = ?
AND session_id = ?
`;
const params: any[] = [userId, sessionId];
if (options?.startTime) {
sql += ' AND timestamp >= ?';
params.push(options.startTime.toString());
}
if (options?.endTime) {
sql += ' AND timestamp <= ?';
params.push(options.endTime.toString());
}
sql += ' ORDER BY timestamp ASC';
if (options?.limit) {
sql += ' LIMIT ?';
params.push(options.limit);
}
this.logger.debug({ userId, sessionId, options }, 'Querying conversation messages');
const rows = await this.query(sql, params);
this.logger.info({
userId,
sessionId,
count: rows.length
}, 'Loaded conversation messages from Iceberg');
// Convert timestamp strings back to numbers
return rows.map((row: any) => ({
...row,
timestamp: Number(row.timestamp)
}));
} catch (error: any) {
this.logger.error({
error: error.message,
userId,
sessionId
}, 'Failed to query conversation messages');
return [];
}
}
/**
* Query checkpoint from gateway.checkpoints table
*/
async queryCheckpoint(
userId: string,
sessionId: string,
checkpointId?: string
): Promise<any | null> {
await this.initialize();
try {
const tablePath = await this.getTablePath(
this.namespace,
'checkpoints',
this.catalogUri
);
if (!tablePath) {
this.logger.warn('Checkpoints table not found');
return null;
}
let sql = `
SELECT
user_id,
session_id,
checkpoint_id,
checkpoint_data,
metadata,
timestamp
FROM iceberg_scan('${tablePath}')
WHERE user_id = ?
AND session_id = ?
`;
const params: any[] = [userId, sessionId];
if (checkpointId) {
sql += ' AND checkpoint_id = ?';
params.push(checkpointId);
}
sql += ' ORDER BY timestamp DESC LIMIT 1';
this.logger.debug({ userId, sessionId, checkpointId }, 'Querying checkpoint');
const rows = await this.query(sql, params);
if (rows.length === 0) {
return null;
}
const row = rows[0];
this.logger.info({
userId,
sessionId,
checkpointId: row.checkpoint_id
}, 'Loaded checkpoint from Iceberg');
// Convert timestamp string back to number
return {
...row,
timestamp: Number(row.timestamp)
};
} catch (error: any) {
this.logger.error({
error: error.message,
userId,
sessionId,
checkpointId
}, 'Failed to query checkpoint');
return null;
}
}
/**
* Query symbol metadata from trading.symbol_metadata table
*/
async queryAllSymbols(): Promise<any[]> {
await this.initialize();
try {
const tablePath = await this.getTablePath(
this.ohlcNamespace,
'symbol_metadata',
this.ohlcCatalogUri
);
if (!tablePath) {
this.logger.warn('Symbol metadata table not found');
return [];
}
// Query the Iceberg table using DuckDB
const sql = `SELECT * FROM iceberg_scan('${tablePath}')`;
this.logger.debug({ sql }, 'Querying symbol metadata');
const rows = await this.query(sql);
this.logger.info({ count: rows.length }, 'Loaded symbol metadata from Iceberg');
return rows;
} catch (error: any) {
this.logger.error({ error: error.message }, 'Failed to query symbol metadata');
return [];
}
}
/**
* Query OHLC data from trading.ohlc table
*/
async queryOHLC(
ticker: string,
period_seconds: number,
start_time: bigint, // microseconds
end_time: bigint // microseconds
): Promise<any[]> {
await this.initialize();
try {
const tablePath = await this.getTablePath(
this.ohlcNamespace,
'ohlc',
this.ohlcCatalogUri
);
if (!tablePath) {
this.logger.warn('OHLC table not found');
return [];
}
// Query the Iceberg table with filters
const sql = `
SELECT
timestamp,
ticker,
period_seconds,
open,
high,
low,
close,
volume
FROM iceberg_scan('${tablePath}')
WHERE ticker = ?
AND period_seconds = ?
AND timestamp >= ?
AND timestamp <= ?
ORDER BY timestamp ASC
`;
const params = [
ticker,
period_seconds,
start_time.toString(),
end_time.toString()
];
this.logger.debug({ ticker, period_seconds, start_time, end_time }, 'Querying OHLC data');
const rows = await this.query(sql, params);
this.logger.info({
ticker,
period_seconds,
count: rows.length
}, 'Loaded OHLC data from Iceberg');
// Convert timestamp strings to numbers (microseconds as Number is fine for display)
return rows.map((row: any) => ({
...row,
timestamp: Number(row.timestamp)
}));
} catch (error: any) {
this.logger.error({
error: error.message,
ticker,
period_seconds
}, 'Failed to query OHLC data');
return [];
}
}
/**
* Check if OHLC data exists for the given parameters
*/
async hasOHLCData(
ticker: string,
period_seconds: number,
start_time: bigint,
end_time: bigint
): Promise<boolean> {
await this.initialize();
try {
const tablePath = await this.getTablePath(
this.ohlcNamespace,
'ohlc',
this.ohlcCatalogUri
);
if (!tablePath) {
return false;
}
const sql = `
SELECT COUNT(*) as count
FROM iceberg_scan('${tablePath}')
WHERE ticker = ?
AND period_seconds = ?
AND timestamp >= ?
AND timestamp <= ?
`;
const params = [
ticker,
period_seconds,
start_time.toString(),
end_time.toString()
];
const rows = await this.query<{ count: number }>(sql, params);
return rows.length > 0 && rows[0].count > 0;
} catch (error: any) {
this.logger.error({ error: error.message }, 'Failed to check OHLC data existence');
return false;
}
}
/**
* Find missing OHLC data ranges
*/
async findMissingOHLCRanges(
ticker: string,
period_seconds: number,
start_time: bigint,
end_time: bigint
): Promise<Array<[bigint, bigint]>> {
await this.initialize();
try {
const data = await this.queryOHLC(ticker, period_seconds, start_time, end_time);
if (data.length === 0) {
// All data is missing
return [[start_time, end_time]];
}
// Check if we have continuous data
// For now, simple check: if we have any data, assume complete
// TODO: Implement proper gap detection by checking for missing periods
const periodMicros = BigInt(period_seconds) * 1000000n;
const expectedBars = Number((end_time - start_time) / periodMicros);
if (data.length < expectedBars * 0.95) { // Allow 5% tolerance
this.logger.debug({
ticker,
expected: expectedBars,
actual: data.length,
}, 'Incomplete OHLC data detected');
return [[start_time, end_time]]; // Request full range
}
// Data appears complete
return [];
} catch (error: any) {
this.logger.error({ error: error.message }, 'Failed to find missing OHLC ranges');
// Return full range on error (safe default)
return [[start_time, end_time]];
}
}
/**
* Close the DuckDB connection
*/
async close(): Promise<void> {
if (this.conn) {
const close = promisify(this.conn.close.bind(this.conn));
await close();
this.conn = null;
}
if (this.db) {
const close = promisify(this.db.close.bind(this.db));
await close();
this.db = null;
}
this.initialized = false;
this.logger.info('DuckDB client closed');
}
}

View File

@@ -1,15 +1,32 @@
import { IcebergRestCatalog } from 'iceberg-js';
import type { FastifyBaseLogger } from 'fastify';
import { DuckDBClient } from './duckdb-client.js';
/**
* Iceberg client configuration
*
* Supports separate catalog/warehouse configurations for:
* 1. Conversation data (catalogUri + namespace, typically 'gateway')
* 2. OHLC/Trading data (ohlcCatalogUri + ohlcNamespace, typically 'trading')
*
* This allows for:
* - Different S3 buckets/warehouses per data type
* - Different retention policies
* - Independent scaling and management
* - Cost optimization (e.g., cheaper storage class for old conversations)
*/
export interface IcebergConfig {
// Conversation/Gateway data catalog
catalogUri: string;
namespace: string;
// S3 configuration for conversation data
s3Endpoint?: string;
s3AccessKey?: string;
s3SecretKey?: string;
// OHLC/Trading data catalog (can be same or different from conversation catalog)
ohlcCatalogUri?: string;
ohlcNamespace?: string;
}
/**
@@ -40,79 +57,73 @@ export interface IcebergCheckpoint {
/**
* Iceberg REST client wrapper for durable storage
*
* Uses Iceberg REST Catalog API to:
* - Query conversation history from gateway.conversations
* - Query checkpoints from gateway.checkpoints
* - Note: Writes are handled by Flink; this is read-only
* Architecture:
* - Uses DuckDB with Iceberg extension for querying Parquet data
* - Supports SEPARATE catalogs/warehouses for conversation vs OHLC data
* - Writes are handled by Flink via Kafka; this client is READ-ONLY
*
* For writes, we'll send to a Kafka topic that Flink consumes
* (or implement direct REST catalog write if needed)
* Data separation:
* 1. Conversation data: catalogUri + namespace (e.g., http://catalog:8181 + 'gateway')
* - Tables: conversations, checkpoints
* - Can use different warehouse/S3 bucket in the future
*
* 2. OHLC/Trading data: ohlcCatalogUri + ohlcNamespace (e.g., http://catalog:8181 + 'trading')
* - Tables: ohlc, symbol_metadata
* - Can use different warehouse/S3 bucket for cost optimization
*
* To use separate warehouses in production:
* 1. Deploy two Iceberg REST catalog instances (or configure multi-warehouse)
* 2. Point catalogUri to conversations warehouse
* 3. Point ohlcCatalogUri to trading warehouse
* 4. Update Flink configuration to write to the correct catalogs
*/
export class IcebergClient {
private namespace: string;
private duckdb: DuckDBClient;
private logger: FastifyBaseLogger;
private namespace: string;
private ohlcNamespace: string;
constructor(config: IcebergConfig, logger: FastifyBaseLogger) {
this.logger = logger;
this.namespace = config.namespace;
this.ohlcNamespace = config.ohlcNamespace || 'trading';
// Initialize Iceberg REST client
const clientConfig: any = {
uri: config.catalogUri,
};
if (config.s3Endpoint) {
clientConfig.s3 = {
endpoint: config.s3Endpoint,
'access-key-id': config.s3AccessKey,
'secret-access-key': config.s3SecretKey,
'path-style-access': 'true',
};
}
// TODO: Store client for queries when needed
new IcebergRestCatalog(clientConfig);
// Initialize DuckDB client for querying Iceberg tables
// DuckDB will query tables from the appropriate catalog based on the data type
this.duckdb = new DuckDBClient(
{
catalogUri: config.catalogUri,
namespace: config.namespace,
ohlcCatalogUri: config.ohlcCatalogUri,
ohlcNamespace: config.ohlcNamespace,
s3Endpoint: config.s3Endpoint,
s3AccessKey: config.s3AccessKey,
s3SecretKey: config.s3SecretKey,
},
logger
);
this.logger.info({
catalogUri: config.catalogUri,
namespace: this.namespace,
}, 'Iceberg client initialized');
ohlcCatalogUri: config.ohlcCatalogUri || config.catalogUri,
ohlcNamespace: this.ohlcNamespace,
}, 'Iceberg client initialized with separate conversation and OHLC catalogs');
}
/**
* Query messages from gateway.conversations table
*
* Note: This is a simplified interface. The actual Iceberg REST API
* returns table metadata, and you'd need to query the underlying
* Parquet files via S3 or use a query engine like DuckDB/Trino.
*
* For now, we'll document the expected schema and leave actual
* implementation as TODO since Flink handles writes.
*/
async queryMessages(
userId: string,
sessionId: string,
_options?: {
options?: {
startTime?: number;
endTime?: number;
limit?: number;
}
): Promise<IcebergMessage[]> {
this.logger.debug({
userId,
sessionId,
table: `${this.namespace}.conversations`,
}, 'Querying messages from Iceberg');
// TODO: Implement actual Iceberg query
// Options:
// 1. Use iceberg-js to get table metadata and Parquet file locations
// 2. Query Parquet files directly via S3 + parquet-wasm
// 3. Use external query engine (DuckDB, Trino, Presto)
// 4. Use Flink SQL REST endpoint for queries
this.logger.warn('Iceberg query not yet implemented - returning empty array');
return [];
return this.duckdb.queryMessages(userId, sessionId, options);
}
/**
@@ -123,16 +134,7 @@ export class IcebergClient {
sessionId: string,
checkpointId?: string
): Promise<IcebergCheckpoint | null> {
this.logger.debug({
userId,
sessionId,
checkpointId,
table: `${this.namespace}.checkpoints`,
}, 'Querying checkpoint from Iceberg');
// TODO: Implement actual Iceberg query
this.logger.warn('Iceberg query not yet implemented - returning null');
return null;
return this.duckdb.queryCheckpoint(userId, sessionId, checkpointId);
}
/**
@@ -188,6 +190,49 @@ export class IcebergClient {
return false;
}
}
/**
* Query OHLC data from trading.ohlc table
*/
async queryOHLC(
ticker: string,
period_seconds: number,
start_time: bigint, // microseconds
end_time: bigint // microseconds
): Promise<any[]> {
return this.duckdb.queryOHLC(ticker, period_seconds, start_time, end_time);
}
/**
* Check if OHLC data exists for the given parameters
*/
async hasOHLCData(
ticker: string,
period_seconds: number,
start_time: bigint,
end_time: bigint
): Promise<boolean> {
return this.duckdb.hasOHLCData(ticker, period_seconds, start_time, end_time);
}
/**
* Find missing OHLC data ranges
*/
async findMissingOHLCRanges(
ticker: string,
period_seconds: number,
start_time: bigint,
end_time: bigint
): Promise<Array<[bigint, bigint]>> {
return this.duckdb.findMissingOHLCRanges(ticker, period_seconds, start_time, end_time);
}
/**
* Query all symbols from symbol_metadata table
*/
async queryAllSymbols(): Promise<any[]> {
return this.duckdb.queryAllSymbols();
}
}
/**

View File

@@ -0,0 +1,180 @@
/**
* ZMQ Protocol encoding/decoding using Protobuf
*
* Protocol format (as defined in protobuf/ingestor.proto):
* Frame 1: [1 byte: protocol version]
* Frame 2: [1 byte: message type ID][N bytes: protobuf message]
*
* For PUB/SUB: [topic frame][version frame][message frame]
*/
import protobuf from 'protobufjs';
import { readFileSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import type {
SubmitHistoricalRequest,
SubmitResponse,
HistoryReadyNotification,
SubmitStatus,
NotificationStatus,
} from '../types/ohlc.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
/**
* Protocol constants
*/
export const PROTOCOL_VERSION = 0x01;
export enum MessageType {
SUBMIT_HISTORICAL_REQUEST = 0x10,
SUBMIT_RESPONSE = 0x11,
HISTORY_READY_NOTIFICATION = 0x12,
}
// Load protobuf types at runtime (same pattern as ingestor)
// Proto files are copied to /app/protobuf/ in the Docker image
const protoDir = join(__dirname, '../..', 'protobuf');
const root = new protobuf.Root();
// Load proto file and parse it
const ingestorProto = readFileSync(join(protoDir, 'ingestor.proto'), 'utf8');
protobuf.parse(ingestorProto, root);
// Export message types
const SubmitHistoricalRequestType = root.lookupType('SubmitHistoricalRequest');
const SubmitResponseType = root.lookupType('SubmitResponse');
const HistoryReadyNotificationType = root.lookupType('HistoryReadyNotification');
/**
* Encode SubmitHistoricalRequest to ZMQ frames
*
* Returns: [version_frame, message_frame]
*/
export function encodeSubmitHistoricalRequest(req: SubmitHistoricalRequest): Buffer[] {
const versionFrame = Buffer.from([PROTOCOL_VERSION]);
// Convert to protobuf-compatible format (pbjs uses camelCase)
// Note: protobufjs handles bigint/number conversion automatically for uint64
const protoMessage = {
requestId: req.request_id,
ticker: req.ticker,
startTime: Number(req.start_time), // Convert bigint to number for protobuf
endTime: Number(req.end_time),
periodSeconds: req.period_seconds,
limit: req.limit,
clientId: req.client_id,
};
// Encode as protobuf
const message = SubmitHistoricalRequestType.create(protoMessage);
const payloadBuffer = SubmitHistoricalRequestType.encode(message).finish();
const messageFrame = Buffer.concat([
Buffer.from([MessageType.SUBMIT_HISTORICAL_REQUEST]),
Buffer.from(payloadBuffer),
]);
return [versionFrame, messageFrame];
}
/**
* Decode SubmitResponse from ZMQ frames
*
* Input: [version_frame, message_frame]
*/
export function decodeSubmitResponse(frames: Buffer[]): SubmitResponse {
try {
if (frames.length < 2) {
throw new Error(`Expected 2 frames, got ${frames.length}`);
}
const versionFrame = frames[0];
const messageFrame = frames[1];
// Validate version
if (versionFrame[0] !== PROTOCOL_VERSION) {
throw new Error(`Unsupported protocol version: ${versionFrame[0]}`);
}
// Validate message type
const messageType = messageFrame[0];
if (messageType !== MessageType.SUBMIT_RESPONSE) {
throw new Error(`Expected SUBMIT_RESPONSE (0x11), got 0x${messageType.toString(16)}`);
}
// Decode protobuf payload
const payloadBuffer = messageFrame.slice(1);
const decoded = SubmitResponseType.decode(payloadBuffer);
const payload = SubmitResponseType.toObject(decoded, {
longs: String,
enums: Number, // Keep enums as numbers for comparison
defaults: true,
});
return {
request_id: payload.requestId,
status: payload.status as SubmitStatus,
error_message: payload.errorMessage || undefined,
notification_topic: payload.notificationTopic,
};
} catch (error) {
console.error('Error decoding SubmitResponse:', error);
console.error('Frame count:', frames.length);
if (frames.length >= 2) {
console.error('Version frame:', frames[0].toString('hex'));
console.error('Message frame (first 100 bytes):', frames[1].slice(0, 100).toString('hex'));
}
throw error;
}
}
/**
* Decode HistoryReadyNotification from ZMQ frames
*
* Input: [topic_frame, version_frame, message_frame] (for SUB socket)
*/
export function decodeHistoryReadyNotification(frames: Buffer[]): HistoryReadyNotification {
if (frames.length < 3) {
throw new Error(`Expected 3 frames (topic, version, message), got ${frames.length}`);
}
const versionFrame = frames[1];
const messageFrame = frames[2];
// Validate version
if (versionFrame[0] !== PROTOCOL_VERSION) {
throw new Error(`Unsupported protocol version: ${versionFrame[0]}`);
}
// Validate message type
const messageType = messageFrame[0];
if (messageType !== MessageType.HISTORY_READY_NOTIFICATION) {
throw new Error(`Expected HISTORY_READY_NOTIFICATION (0x12), got 0x${messageType.toString(16)}`);
}
// Decode protobuf payload
const payloadBuffer = messageFrame.slice(1);
const decoded = HistoryReadyNotificationType.decode(payloadBuffer);
const payload = HistoryReadyNotificationType.toObject(decoded, {
longs: String,
enums: Number, // Keep enums as numbers for comparison
defaults: true,
});
return {
request_id: payload.requestId,
ticker: payload.ticker,
period_seconds: payload.periodSeconds,
start_time: BigInt(payload.startTime),
end_time: BigInt(payload.endTime),
status: payload.status as NotificationStatus,
error_message: payload.errorMessage || undefined,
iceberg_namespace: payload.icebergNamespace,
iceberg_table: payload.icebergTable,
row_count: payload.rowCount,
completed_at: BigInt(payload.completedAt),
};
}

View File

@@ -0,0 +1,356 @@
/**
* ZMQ Relay Client for historical data requests
*
* IMPORTANT: Implements race-condition-free notification subscription
* by subscribing to RESPONSE:{client_id} topic BEFORE sending requests.
*
* Architecture:
* - REQ socket to relay (port 5559) for SubmitHistoricalRequest
* - SUB socket to relay (port 5558) for HistoryReadyNotification
* - Notification topic: RESPONSE:{client_id} (deterministic, client-generated)
*/
import * as zmq from 'zeromq';
import type { FastifyBaseLogger } from 'fastify';
import { randomUUID } from 'crypto';
import {
encodeSubmitHistoricalRequest,
decodeSubmitResponse,
decodeHistoryReadyNotification,
} from './zmq-protocol.js';
import type {
SubmitHistoricalRequest,
HistoryReadyNotification,
} from '../types/ohlc.js';
import {
SubmitStatus,
NotificationStatus,
} from '../types/ohlc.js';
export interface ZMQRelayConfig {
relayRequestEndpoint: string; // e.g., "tcp://relay:5559"
relayNotificationEndpoint: string; // e.g., "tcp://relay:5558"
clientId?: string; // Optional client ID, will generate if not provided
requestTimeout?: number; // Request timeout in ms (default: 30000)
onMetadataUpdate?: () => Promise<void>; // Callback when symbol metadata updates
}
interface PendingRequest {
resolve: (notification: HistoryReadyNotification) => void;
reject: (error: Error) => void;
timeoutHandle: NodeJS.Timeout;
}
/**
* ZMQ Relay Client
*
* Provides async API for submitting historical data requests and waiting for
* completion notifications.
*/
export class ZMQRelayClient {
private config: Required<ZMQRelayConfig>;
private logger: FastifyBaseLogger;
private reqSocket?: zmq.Request;
private subSocket?: zmq.Subscriber;
private notificationTopic: string;
private pendingRequests: Map<string, PendingRequest> = new Map();
private connected = false;
private notificationListenerRunning = false;
constructor(config: ZMQRelayConfig, logger: FastifyBaseLogger) {
this.config = {
relayRequestEndpoint: config.relayRequestEndpoint,
relayNotificationEndpoint: config.relayNotificationEndpoint,
clientId: config.clientId || `gateway-${randomUUID().slice(0, 8)}`,
requestTimeout: config.requestTimeout || 30000,
};
this.logger = logger;
this.notificationTopic = `RESPONSE:${this.config.clientId}`;
}
/**
* Connect to relay and start notification listener
*
* CRITICAL: This MUST be called before making any requests.
* The notification listener subscribes to RESPONSE:{client_id} topic
* BEFORE any requests are sent, preventing race conditions.
*/
async connect(): Promise<void> {
if (this.connected) {
return;
}
this.logger.info({
requestEndpoint: this.config.relayRequestEndpoint,
notificationEndpoint: this.config.relayNotificationEndpoint,
clientId: this.config.clientId,
notificationTopic: this.notificationTopic,
}, 'Connecting to ZMQ relay');
// Create REQ socket for requests
this.reqSocket = new zmq.Request();
this.reqSocket.connect(this.config.relayRequestEndpoint);
// Create SUB socket for notifications
this.subSocket = new zmq.Subscriber();
this.subSocket.connect(this.config.relayNotificationEndpoint);
// Subscribe to our notification topic BEFORE sending any requests
this.subSocket.subscribe(this.notificationTopic);
// Subscribe to system metadata update notifications
this.subSocket.subscribe('METADATA_UPDATE');
this.logger.info({
topics: [this.notificationTopic, 'METADATA_UPDATE']
}, 'Subscribed to notification topics');
// Start notification listener
this.startNotificationListener();
// Give sockets a moment to connect
await new Promise(resolve => setTimeout(resolve, 100));
this.connected = true;
this.logger.info('ZMQ relay client connected');
}
/**
* Request historical OHLC data
*
* IMPORTANT: Call connect() before using this method.
*
* @param ticker Market identifier (e.g., "BINANCE:BTC/USDT")
* @param period_seconds OHLC period in seconds
* @param start_time Start timestamp in MICROSECONDS
* @param end_time End timestamp in MICROSECONDS
* @param limit Optional limit on number of candles
* @returns Promise that resolves when data is ready in Iceberg
*/
async requestHistoricalOHLC(
ticker: string,
period_seconds: number,
start_time: bigint,
end_time: bigint,
limit?: number
): Promise<HistoryReadyNotification> {
if (!this.connected || !this.reqSocket) {
throw new Error('Client not connected. Call connect() first.');
}
const request_id = randomUUID();
this.logger.debug({
request_id,
ticker,
period_seconds,
start_time: start_time.toString(),
end_time: end_time.toString(),
}, 'Submitting historical OHLC request');
const request: SubmitHistoricalRequest = {
request_id,
ticker,
start_time,
end_time,
period_seconds,
limit,
client_id: this.config.clientId,
};
// Register pending request BEFORE sending (notification listener is already running)
const resultPromise = new Promise<HistoryReadyNotification>((resolve, reject) => {
const timeoutHandle = setTimeout(() => {
this.pendingRequests.delete(request_id);
reject(new Error(`Request ${request_id} timed out after ${this.config.requestTimeout}ms`));
}, this.config.requestTimeout);
this.pendingRequests.set(request_id, { resolve, reject, timeoutHandle });
});
// Encode and send request
const frames = encodeSubmitHistoricalRequest(request);
try {
// Send two frames: version, then message
await this.reqSocket.send(frames);
// Wait for immediate acknowledgment
const responseFrames = await this.reqSocket.receive();
this.logger.debug({
frameCount: responseFrames.length,
frameLengths: Array.from(responseFrames).map(f => f.length),
}, 'Received response frames from relay');
const response = decodeSubmitResponse(Array.from(responseFrames));
this.logger.debug({
request_id,
response,
}, 'Decoded SubmitResponse');
if (response.status !== SubmitStatus.QUEUED) {
// Request was rejected - clean up pending request
const pending = this.pendingRequests.get(request_id);
if (pending) {
clearTimeout(pending.timeoutHandle);
this.pendingRequests.delete(request_id);
}
throw new Error(`Request rejected: ${response.error_message || 'Unknown error'}`);
}
this.logger.debug({ request_id }, 'Request queued, waiting for notification');
// Wait for notification (already subscribed to topic)
return await resultPromise;
} catch (error) {
// Clean up pending request on error
const pending = this.pendingRequests.get(request_id);
if (pending) {
clearTimeout(pending.timeoutHandle);
this.pendingRequests.delete(request_id);
}
this.logger.error({
error,
request_id,
ticker,
errorMessage: error instanceof Error ? error.message : String(error),
errorStack: error instanceof Error ? error.stack : undefined,
}, 'Failed to submit historical OHLC request');
throw error;
}
}
/**
* Start notification listener
*
* CRITICAL: This runs BEFORE any requests are submitted to prevent race condition.
* We're already subscribed to RESPONSE:{client_id} and METADATA_UPDATE, so we'll receive all notifications.
*/
private startNotificationListener(): void {
if (this.notificationListenerRunning || !this.subSocket) {
return;
}
this.notificationListenerRunning = true;
// Listen for notifications asynchronously
(async () => {
try {
for await (const frames of this.subSocket!) {
try {
// First frame is the topic
const topic = frames[0].toString();
// Handle metadata update notifications
if (topic === 'METADATA_UPDATE') {
this.logger.info('Received METADATA_UPDATE notification');
// Call the onMetadataUpdate callback if configured
if (this.config.onMetadataUpdate) {
try {
await this.config.onMetadataUpdate();
} catch (error) {
this.logger.error({ error }, 'Failed to handle metadata update');
}
}
continue;
}
// Handle history ready notifications
const notification = decodeHistoryReadyNotification(Array.from(frames));
this.logger.debug({
request_id: notification.request_id,
status: NotificationStatus[notification.status],
row_count: notification.row_count,
}, 'Received history ready notification');
// Check if we're waiting for this request
const pending = this.pendingRequests.get(notification.request_id);
if (pending) {
clearTimeout(pending.timeoutHandle);
this.pendingRequests.delete(notification.request_id);
if (notification.status === NotificationStatus.OK) {
pending.resolve(notification);
} else {
pending.reject(new Error(
`Historical data request failed: ${notification.error_message || NotificationStatus[notification.status]}`
));
}
} else {
this.logger.warn({
request_id: notification.request_id,
}, 'Received notification for unknown request');
}
} catch (error) {
this.logger.error({ error }, 'Failed to process notification');
}
}
} catch (error) {
if (this.notificationListenerRunning) {
this.logger.error({ error }, 'Notification listener error');
}
} finally {
this.notificationListenerRunning = false;
}
})();
this.logger.debug('Notification listener started');
}
/**
* Close the client and cleanup resources
*/
async close(): Promise<void> {
if (!this.connected) {
return;
}
this.logger.info('Closing ZMQ relay client');
this.notificationListenerRunning = false;
// Reject all pending requests
for (const [, pending] of this.pendingRequests) {
clearTimeout(pending.timeoutHandle);
pending.reject(new Error('Client closed'));
}
this.pendingRequests.clear();
// Close sockets
if (this.subSocket) {
this.subSocket.close();
this.subSocket = undefined;
}
if (this.reqSocket) {
this.reqSocket.close();
this.reqSocket = undefined;
}
this.connected = false;
this.logger.info('ZMQ relay client closed');
}
/**
* Check if client is connected
*/
isConnected(): boolean {
return this.connected;
}
/**
* Get the client ID used for notifications
*/
getClientId(): string {
return this.config.clientId;
}
}