data pipeline refactor and fix

This commit is contained in:
2026-04-13 18:30:04 -04:00
parent 6418729b16
commit 326bf80846
96 changed files with 7107 additions and 1763 deletions

View File

@@ -47,24 +47,22 @@ function loadConfig() {
logger.warn({ error: error.message }, 'Could not load secrets');
}
// Merge config and secrets
return {
// Flink ZMQ endpoints
flink_hostname: config.flink_hostname || 'localhost',
ingestor_work_port: config.ingestor_work_port || 5555,
ingestor_control_port: config.ingestor_control_port || 5556,
ingestor_broker_port: config.ingestor_broker_port || 5567,
// Kafka configuration
kafka_brokers: config.kafka_brokers || ['localhost:9092'],
kafka_topic: 'market-ohlc',
kafka_ohlc_topic: config.kafka_ohlc_topic || 'market-ohlc',
kafka_tick_topic: config.kafka_tick_topic || 'market-tick',
// Worker configuration
max_concurrent: config.max_concurrent || 10,
poll_interval_ms: config.poll_interval_ms || 10000,
// Symbol metadata configuration
supported_exchanges: config.supported_exchanges || ['binance', 'coinbase', 'kraken'],
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000, // 6 hours
symbol_metadata_interval_ms: config.symbol_metadata_interval_ms || 6 * 60 * 60 * 1000,
...secrets
};
@@ -76,11 +74,7 @@ class IngestorWorker {
this.logger = logger;
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
this.kafkaProducer = new KafkaProducer(
config,
logger.child({ component: 'kafka' })
);
// Create metadata generator first so ccxtFetcher can use it
this.kafkaProducer = new KafkaProducer(config, logger.child({ component: 'kafka' }));
this.metadataGenerator = new SymbolMetadataGenerator(
config,
this.kafkaProducer,
@@ -94,33 +88,27 @@ class IngestorWorker {
this.realtimePoller = new RealtimePoller(
this.ccxtFetcher,
this.kafkaProducer,
this.zmqClient,
logger.child({ component: 'poller' })
);
// Track active requests
this.activeRequests = new Map();
this.isShutdown = false;
// jobId → active realtime subscription (for stop handling)
this.activeRealtime = new Set();
// Metadata generation interval
this.metadataIntervalMs = config.symbol_metadata_interval_ms;
this.isShutdown = false;
this.metadataInterval = null;
}
/**
* Start the ingestor worker
*/
async start() {
this.logger.info('Starting CCXT ingestor worker');
// Connect to services
await this.kafkaProducer.connect();
await this.zmqClient.connect();
// Start control message listener
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg));
// Wire event callbacks before connecting so we don't miss early messages
this.zmqClient.onWorkAssign = req => this.handleWorkAssign(req);
this.zmqClient.onWorkStop = jobId => this.handleWorkStop(jobId);
// Start work loop
this.workLoop();
await this.zmqClient.connect(); // also sends WorkerReady
// Generate symbol metadata on startup
this.logger.info('Generating initial symbol metadata');
@@ -140,281 +128,126 @@ class IngestorWorker {
} catch (error) {
this.logger.error({ error: error.message }, 'Failed to generate periodic symbol metadata');
}
}, this.metadataIntervalMs);
}, this.config.symbol_metadata_interval_ms);
this.logger.info('Ingestor worker started successfully');
}
/**
* Main work loop - pull and process data requests
* Handle a WorkAssign message dispatched by Flink IngestorBroker.
* Called from the ZmqClient receive loop — do not block.
*/
async workLoop() {
while (!this.isShutdown) {
try {
// Check if we can handle more requests
if (this.activeRequests.size >= this.config.max_concurrent) {
await new Promise(resolve => setTimeout(resolve, 1000));
continue;
}
handleWorkAssign(request) {
const { jobId, requestId, type, ticker } = request;
// Pull next data request
const request = await this.zmqClient.pullDataRequest();
if (!request) {
continue;
}
this.logger.info({ jobId, requestId, type, ticker }, 'Received WorkAssign');
// Handle request asynchronously
this.handleDataRequest(request).catch(error => {
this.logger.error(
{ error: error.message, requestId: request.requestId },
'Error handling data request'
);
});
} catch (error) {
if (!this.isShutdown) {
this.logger.error({ error: error.message }, 'Error in work loop');
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
// HISTORICAL_OHLC = 0 (proto3 default, may appear as undefined or 'HISTORICAL_OHLC')
const isHistorical = !type || type === 'HISTORICAL_OHLC' || type === 0;
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
if (isHistorical) {
this.handleHistoricalRequest(request).catch(err => {
this.logger.error({ jobId, requestId, error: err.message }, 'Unexpected error in historical handler');
});
} else if (isRealtime) {
this.handleRealtimeRequest(request);
} else {
this.logger.warn({ jobId, type }, 'Unknown request type — rejecting');
this.zmqClient.sendReject(jobId, `Unknown request type: ${type}`).catch(() => {});
}
}
/**
* Handle a data request
* Handle WorkStop sent by Flink (e.g., all subscribers left).
*/
async handleDataRequest(request) {
const { requestId: request_id, type, ticker } = request;
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request');
this.activeRequests.set(request_id, request);
try {
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
if (isHistorical) {
await this.handleHistoricalRequest(request);
} else if (isRealtime) {
await this.handleRealtimeRequest(request);
} else {
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
}
} finally {
// For historical requests, remove from active requests when done
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
if (isHistorical) {
this.activeRequests.delete(request_id);
}
}
handleWorkStop(jobId) {
this.logger.info({ jobId }, 'Received WorkStop — cancelling realtime subscription');
this.realtimePoller.cancelSubscription(jobId);
this.activeRealtime.delete(jobId);
// No WorkComplete needed — Flink sent the stop, it already knows
}
/**
* Handle historical OHLC request
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only.
* Flink will process from Kafka, write to Iceberg, and publish notification.
* Fetch historical OHLC data and write to Kafka.
* Sends WorkComplete when done (success or error).
*/
async handleHistoricalRequest(request) {
const { requestId: request_id, ticker, historical, clientId: client_id } = request;
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical;
const { jobId, requestId, ticker, historical, clientId: client_id } = request;
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical || {};
this.logger.info(
{ request_id, ticker, period_seconds, client_id },
'Processing historical OHLC request (async mode - write to Kafka only)'
);
this.logger.info({ jobId, requestId, ticker, period_seconds }, 'Processing historical OHLC request');
try {
// Fetch historical data from exchange
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
ticker,
start_time,
end_time,
period_seconds,
limit
ticker, start_time, end_time, period_seconds, limit
);
this.logger.info(
{ request_id, ticker, count: candles.length },
'Fetched data from exchange'
);
this.logger.info({ jobId, requestId, ticker, count: candles.length }, 'Fetched from exchange');
// Write to Kafka - THIS IS THE ONLY OUTPUT
// Flink will:
// 1. Read from Kafka
// 2. Write to Iceberg
// 3. Publish HistoryReadyNotification
// 4. Client receives notification via relay pub/sub
if (candles.length > 0) {
// Add metadata to first candle for Flink tracking
const enrichedCandles = candles.map((candle, idx) => ({
...candle,
__metadata: idx === 0 ? {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time
} : undefined
}));
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
} else {
// Write a marker message even if no data found
// Flink will see this and publish a NOT_FOUND notification
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'NOT_FOUND',
message: 'No data available for requested period'
});
}
this.logger.info(
{ request_id, ticker, count: candles.length },
'Completed historical OHLC request - data written to Kafka'
);
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
} catch (error) {
this.logger.error(
{
errorType: error.constructor?.name,
error: error.message,
errorUrl: error.url,
request_id,
ticker,
stack: error.stack
},
'Failed to process historical request'
);
// Write error marker to Kafka so Flink can notify client
try {
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'ERROR',
error_message: error.message
});
} catch (kafkaError) {
this.logger.error(
{ error: kafkaError.message, request_id },
'Failed to write error marker to Kafka'
);
}
// Do not throw - request is handled, Flink will notify client of error
}
}
/**
* Handle realtime tick subscription request
*/
async handleRealtimeRequest(request) {
const { requestId: request_id, ticker } = request;
this.logger.info(
{ request_id, ticker },
'Processing realtime subscription request'
);
try {
// Start realtime polling
this.realtimePoller.startSubscription(
request_id,
ticker,
this.config.kafka_topic
);
} catch (error) {
this.logger.error(
{ error: error.message, request_id, ticker },
'Failed to start realtime subscription'
);
this.activeRequests.delete(request_id);
throw error;
}
}
/**
* Handle control messages from Flink
*/
async handleControlMessage(message) {
const { action, requestId: request_id } = message;
this.logger.info({ action, request_id }, 'Received control message');
switch (action) {
case 'CANCEL':
if (request_id) {
// Cancel specific request
this.realtimePoller.cancelSubscription(request_id);
this.activeRequests.delete(request_id);
const metadata = { request_id: requestId, client_id, ticker, period_seconds, start_time, end_time };
const PAGE_SIZE = 1000;
for (let i = 0; i < candles.length; i += PAGE_SIZE) {
const page = candles.slice(i, i + PAGE_SIZE);
const isLastPage = (i + PAGE_SIZE) >= candles.length;
await this.kafkaProducer.writeOHLCs(this.config.kafka_ohlc_topic, page, metadata, isLastPage);
}
break;
this.logger.info({ jobId, requestId, ticker, count: candles.length, pages: Math.ceil(candles.length / PAGE_SIZE) }, 'Wrote all pages to Kafka');
} else {
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
status: 'NOT_FOUND', message: 'No data available for requested period'
});
}
case 'SHUTDOWN':
this.logger.info('Received shutdown signal');
await this.shutdown();
break;
this.logger.info({ jobId, requestId, ticker }, 'Historical request complete — sending WorkComplete');
await this.zmqClient.sendComplete(jobId, true);
case 'CONFIG_UPDATE':
// Handle config update if needed
this.logger.info('Received config update');
break;
} catch (error) {
this.logger.error({ jobId, requestId, ticker, error: error.message }, 'Historical request failed');
case 'HEARTBEAT':
// Just acknowledge heartbeat
break;
try {
await this.kafkaProducer.writeMarker(this.config.kafka_ohlc_topic, {
request_id: requestId, client_id, ticker, period_seconds, start_time, end_time,
status: 'ERROR', error_message: error.message
});
} catch (kafkaErr) {
this.logger.error({ jobId, error: kafkaErr.message }, 'Failed to write error marker to Kafka');
}
default:
this.logger.warn({ action }, 'Unknown control action');
await this.zmqClient.sendComplete(jobId, false, error.message);
}
}
/**
* Get worker status
* Start realtime tick polling for a job dispatched by Flink.
* Ticks flow: exchange → Kafka market-tick → Flink → OHLC bars → clients.
*/
handleRealtimeRequest(request) {
const { jobId, requestId, ticker } = request;
this.logger.info({ jobId, requestId, ticker }, 'Processing realtime subscription request');
this.activeRealtime.add(jobId);
this.realtimePoller.startSubscription(jobId, requestId, ticker, this.config.kafka_tick_topic);
}
getStatus() {
return {
activeRequests: this.activeRequests.size,
maxConcurrent: this.config.max_concurrent,
activeRealtime: this.activeRealtime.size,
pollerStats: this.realtimePoller.getStats(),
metadataStatus: this.metadataGenerator.getStatus()
};
}
/**
* Shutdown worker gracefully
*/
async shutdown() {
if (this.isShutdown) {
return;
}
if (this.isShutdown) return;
this.isShutdown = true;
this.logger.info('Shutting down ingestor worker');
// Stop metadata generation interval
if (this.metadataInterval) {
clearInterval(this.metadataInterval);
}
if (this.metadataInterval) clearInterval(this.metadataInterval);
// Stop polling
this.realtimePoller.shutdown();
// Close connections
await this.ccxtFetcher.close();
await this.metadataGenerator.close();
await this.kafkaProducer.disconnect();
@@ -430,31 +263,23 @@ async function main() {
const config = loadConfig();
const worker = new IngestorWorker(config, logger);
// Handle shutdown signals
process.on('SIGINT', () => worker.shutdown());
process.on('SIGTERM', () => worker.shutdown());
// Handle errors
process.on('uncaughtException', error => {
logger.error({ error }, 'Uncaught exception');
worker.shutdown();
});
process.on('unhandledRejection', (reason, promise) => {
process.on('unhandledRejection', reason => {
logger.error({ reason }, 'Unhandled rejection');
});
// Start worker
await worker.start();
// Log status periodically
setInterval(() => {
const status = worker.getStatus();
logger.info({ status }, 'Worker status');
logger.info({ status: worker.getStatus() }, 'Worker status');
}, 60000);
}
// Run
main().catch(error => {
logger.error({ error }, 'Fatal error');
process.exit(1);

View File

@@ -116,12 +116,17 @@ export class KafkaProducer {
}
/**
* Write multiple OHLC candles to Kafka as an OHLCBatch message
* Uses protobuf encoding with metadata in batch wrapper
* Write multiple OHLC candles to Kafka as an OHLCBatch message.
*
* Historical mode: pass explicit metadata and isLastPage flag.
* Realtime mode: omit metadata (null/undefined) — writes individual OHLC messages instead.
*
* @param {string} topic - Kafka topic name
* @param {Array<object>} ohlcData - Array of OHLC data objects (may include __metadata in first record)
* @param {Array<object>} ohlcData - Array of OHLC candle objects
* @param {object|null} metadata - Request metadata for historical batches; null for realtime
* @param {boolean} isLastPage - True if this is the final page of a historical query
*/
async writeOHLCs(topic, ohlcData) {
async writeOHLCs(topic, ohlcData, metadata = null, isLastPage = false) {
if (!this.isConnected) {
throw new Error('Kafka producer not connected');
}
@@ -130,12 +135,8 @@ export class KafkaProducer {
return;
}
// Extract metadata from first record if present
const firstCandle = ohlcData[0];
const metadata = firstCandle.__metadata;
if (!metadata) {
// No metadata - write individual OHLC messages (realtime mode)
// Realtime mode — write individual OHLC messages (no batch wrapper)
const messages = ohlcData.map(candle => {
const protoCandle = {
timestamp: candle.timestamp,
@@ -156,10 +157,7 @@ export class KafkaProducer {
};
});
await this.producer.send({
topic,
messages
});
await this.producer.send({ topic, messages });
this.logger.debug(
{ count: ohlcData.length, topic, type: 'individual' },
@@ -168,7 +166,7 @@ export class KafkaProducer {
return;
}
// Historical mode - write as OHLCBatch with metadata
// Historical mode write as OHLCBatch with metadata
const batch = {
metadata: {
requestId: metadata.request_id,
@@ -178,7 +176,8 @@ export class KafkaProducer {
startTime: metadata.start_time,
endTime: metadata.end_time,
status: metadata.status || 'OK',
errorMessage: metadata.error_message
errorMessage: metadata.error_message,
isLastPage
},
rows: ohlcData.map(candle => {
const row = {
@@ -194,22 +193,16 @@ export class KafkaProducer {
})
};
// Encode as protobuf OHLCBatch with ZMQ envelope
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
const value = Buffer.concat([frame1, frame2]);
await this.producer.send({
topic,
messages: [
{
key: metadata.ticker,
value
}
]
messages: [{ key: metadata.ticker, value }]
});
this.logger.debug(
{ request_id: metadata.request_id, count: ohlcData.length, topic, type: 'batch' },
{ request_id: metadata.request_id, count: ohlcData.length, isLastPage, topic },
'Wrote OHLCBatch to Kafka'
);
}
@@ -225,7 +218,8 @@ export class KafkaProducer {
throw new Error('Kafka producer not connected');
}
// Create an empty OHLCBatch with status in metadata
// Create an empty OHLCBatch with status in metadata.
// Markers are always the terminal message for a request (is_last_page = true).
const batch = {
metadata: {
requestId: marker.request_id,
@@ -235,7 +229,8 @@ export class KafkaProducer {
startTime: marker.start_time,
endTime: marker.end_time,
status: marker.status, // 'NOT_FOUND' or 'ERROR'
errorMessage: marker.error_message || marker.message
errorMessage: marker.error_message || marker.message,
isLastPage: true
},
rows: [] // Empty rows array indicates marker message
};

View File

@@ -1,33 +1,40 @@
// Realtime tick data poller using 10-second polling
// Realtime tick data poller — polls exchange every 10s, writes ticks to market-tick Kafka topic.
// Heartbeats every 5s so Flink IngestorBroker knows the job is alive.
export class RealtimePoller {
constructor(ccxtFetcher, kafkaProducer, logger) {
constructor(ccxtFetcher, kafkaProducer, zmqClient, logger) {
this.ccxtFetcher = ccxtFetcher;
this.kafkaProducer = kafkaProducer;
this.zmqClient = zmqClient;
this.logger = logger;
// Active subscriptions: requestId -> subscription info
// Active subscriptions: jobId -> subscription info
this.subscriptions = new Map();
// Poll interval in milliseconds (10 seconds)
this.pollInterval = 10000;
// Main polling loop
// Heartbeat interval (5 seconds)
this.heartbeatInterval = 5000;
this.pollingLoop = null;
this.heartbeatLoop = null;
}
/**
* Start a realtime subscription
* @param {string} requestId - Unique request ID
* @param {string} ticker - Ticker to subscribe to
* @param {string} kafkaTopic - Kafka topic to write to
* Start a realtime subscription for a job dispatched by IngestorBroker.
* @param {string} jobId - Broker-assigned job ID (for heartbeats and COMPLETE)
* @param {string} requestId - Original request ID (for metadata)
* @param {string} ticker - Ticker to subscribe to
* @param {string} kafkaTopic - Kafka topic to write ticks to (market-tick)
*/
startSubscription(requestId, ticker, kafkaTopic) {
if (this.subscriptions.has(requestId)) {
this.logger.warn({ requestId }, 'Subscription already exists');
startSubscription(jobId, requestId, ticker, kafkaTopic) {
if (this.subscriptions.has(jobId)) {
this.logger.warn({ jobId }, 'Subscription already exists');
return;
}
const subscription = {
jobId,
requestId,
ticker,
kafkaTopic,
@@ -36,93 +43,81 @@ export class RealtimePoller {
errorCount: 0
};
this.subscriptions.set(requestId, subscription);
this.subscriptions.set(jobId, subscription);
this.logger.info({ jobId, requestId, ticker, kafkaTopic }, 'Started realtime subscription');
this.logger.info(
{ requestId, ticker, kafkaTopic },
'Started realtime subscription'
);
// Start polling loop if not already running
if (!this.pollingLoop) {
this.startPollingLoop();
}
if (!this.heartbeatLoop) {
this.startHeartbeatLoop();
}
}
/**
* Cancel a realtime subscription
* @param {string} requestId - Request ID to cancel
* Stop a realtime subscription. Called when Flink sends WorkStop or on error.
* Does NOT send WorkComplete — caller is responsible for that.
*/
cancelSubscription(requestId) {
const subscription = this.subscriptions.get(requestId);
cancelSubscription(jobId) {
const subscription = this.subscriptions.get(jobId);
if (subscription) {
subscription.isActive = false;
this.subscriptions.delete(requestId);
this.logger.info(
{ requestId, ticker: subscription.ticker },
'Cancelled realtime subscription'
);
this.subscriptions.delete(jobId);
this.logger.info({ jobId, ticker: subscription.ticker }, 'Cancelled realtime subscription');
}
// Stop polling loop if no active subscriptions
if (this.subscriptions.size === 0 && this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
this.logger.info('Stopped polling loop - no active subscriptions');
if (this.subscriptions.size === 0) {
if (this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
}
if (this.heartbeatLoop) {
clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
this.logger.info('Stopped polling/heartbeat loops — no active subscriptions');
}
}
/**
* Start the main polling loop
*/
startPollingLoop() {
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
this.pollingLoop = setInterval(async () => {
await this.pollAllSubscriptions();
}, this.pollInterval);
// Do an immediate poll
this.pollingLoop = setInterval(() => this.pollAllSubscriptions(), this.pollInterval);
// Immediate first poll
this.pollAllSubscriptions();
}
/**
* Poll all active subscriptions
*/
async pollAllSubscriptions() {
const subscriptions = Array.from(this.subscriptions.values());
// Poll subscriptions in parallel
await Promise.allSettled(
subscriptions.map(sub => this.pollSubscription(sub))
);
startHeartbeatLoop() {
this.logger.info({ interval: this.heartbeatInterval }, 'Starting heartbeat loop');
this.heartbeatLoop = setInterval(async () => {
for (const { jobId } of this.subscriptions.values()) {
try {
await this.zmqClient.sendHeartbeat(jobId);
} catch (err) {
this.logger.error({ jobId, error: err.message }, 'Failed to send heartbeat');
}
}
}, this.heartbeatInterval);
}
/**
* Poll a single subscription
* @param {object} subscription - Subscription object
*/
async pollSubscription(subscription) {
if (!subscription.isActive) {
return;
}
async pollAllSubscriptions() {
const subscriptions = Array.from(this.subscriptions.values());
await Promise.allSettled(subscriptions.map(sub => this.pollSubscription(sub)));
}
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
async pollSubscription(subscription) {
if (!subscription.isActive) return;
const { jobId, requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
try {
// Fetch trades since last timestamp
const trades = await this.ccxtFetcher.fetchRecentTrades(
ticker,
lastTimestamp
);
const trades = await this.ccxtFetcher.fetchRecentTrades(ticker, lastTimestamp);
if (trades.length === 0) {
this.logger.debug({ requestId, ticker }, 'No new trades');
this.logger.debug({ jobId, ticker }, 'No new trades');
return;
}
// Filter out trades we've already seen
// Skip trades we've already seen (timestamp-based dedup)
let newTrades = trades;
if (lastTimestamp) {
const lastTs = BigInt(lastTimestamp);
@@ -130,88 +125,59 @@ export class RealtimePoller {
}
if (newTrades.length > 0) {
// Write trades to Kafka
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
// Update last timestamp
const latestTrade = newTrades[newTrades.length - 1];
subscription.lastTimestamp = latestTrade.timestamp;
this.logger.info(
{
requestId,
ticker,
count: newTrades.length,
kafkaTopic
},
'Wrote new trades to Kafka'
);
subscription.lastTimestamp = newTrades[newTrades.length - 1].timestamp;
this.logger.info({ jobId, ticker, count: newTrades.length, kafkaTopic }, 'Wrote ticks to Kafka');
}
// Reset error count on success
subscription.errorCount = 0;
} catch (error) {
subscription.errorCount++;
this.logger.error(
{
error: error.message,
requestId,
ticker,
errorCount: subscription.errorCount
},
{ error: error.message, jobId, ticker, errorCount: subscription.errorCount },
'Error polling subscription'
);
// Cancel subscription after too many errors
// After 5 consecutive errors, give up and notify Flink
if (subscription.errorCount >= 5) {
this.logger.error(
{ requestId, ticker },
'Cancelling subscription due to repeated errors'
);
this.cancelSubscription(requestId);
this.logger.error({ jobId, ticker }, 'Cancelling subscription due to repeated errors');
this.cancelSubscription(jobId);
try {
await this.zmqClient.sendComplete(jobId, false, `Polling failed after 5 errors: ${error.message}`);
} catch (zmqErr) {
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
}
}
}
}
/**
* Get subscription statistics
*/
getStats() {
const stats = {
return {
totalSubscriptions: this.subscriptions.size,
subscriptions: []
};
for (const [requestId, sub] of this.subscriptions) {
stats.subscriptions.push({
requestId,
subscriptions: Array.from(this.subscriptions.values()).map(sub => ({
jobId: sub.jobId,
requestId: sub.requestId,
ticker: sub.ticker,
isActive: sub.isActive,
errorCount: sub.errorCount,
lastTimestamp: sub.lastTimestamp
});
}
return stats;
}))
};
}
/**
* Shutdown poller and cancel all subscriptions
*/
shutdown() {
this.logger.info('Shutting down realtime poller');
if (this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
}
// Mark all subscriptions as inactive
if (this.heartbeatLoop) {
clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
for (const subscription of this.subscriptions.values()) {
subscription.isActive = false;
}
this.subscriptions.clear();
}
}

View File

@@ -1,116 +1,204 @@
// ZeroMQ client for connecting to Flink control channels
// ZeroMQ DEALER client connecting to Flink IngestorBroker (ROUTER, port 5567)
import * as zmq from 'zeromq';
import { decodeMessage } from './proto/messages.js';
import {
DataRequest,
WorkerReady, WorkComplete, WorkHeartbeat, WorkReject, WorkStop,
MessageTypeId, PROTOCOL_VERSION
} from './proto/messages.js';
const PROTOCOL_VERSION_BUF = Buffer.from([PROTOCOL_VERSION]);
/**
* Encodes a broker protocol message for sending over DEALER.
* Frame layout (DEALER → ROUTER):
* Frame 0: empty delimiter (required for ROUTER peering)
* Frame 1: [0x01] version
* Frame 2: [typeId][protobuf bytes]
*/
function encodeBrokerMessage(typeId, messageData, MessageType) {
const protoBytes = MessageType.encode(MessageType.create(messageData)).finish();
const frame2 = Buffer.concat([Buffer.from([typeId]), Buffer.from(protoBytes)]);
return [Buffer.alloc(0), PROTOCOL_VERSION_BUF, frame2];
}
export class ZmqClient {
constructor(config, logger) {
this.config = config;
this.logger = logger;
// Work queue - SUB socket to receive data requests with exchange prefix filtering
this.workSocket = null;
// NOTE: NO RESPONSE SOCKET - Async architecture via Kafka!
// Ingestors write data to Kafka only
// Flink processes and publishes notifications
this.dealerSocket = null;
this.isShutdown = false;
this.supportedExchanges = config.supported_exchanges || ['BINANCE', 'COINBASE'];
this.activeJobId = null;
this._idleHeartbeatInterval = null;
this.supportedExchanges = (config.supported_exchanges || ['BINANCE', 'COINBASE'])
.map(e => e.toUpperCase());
// Callbacks set by IngestorWorker
this.onWorkAssign = null; // (DataRequest) => void
this.onWorkStop = null; // (jobId) => void
}
/**
* Connect to Relay ZMQ endpoints
* Connect DEALER socket to Flink IngestorBroker (ROUTER).
* Sends WorkerReady immediately so Flink knows this worker is available.
*/
async connect() {
const { flink_hostname, ingestor_work_port } = this.config;
const { flink_hostname, ingestor_broker_port = 5567 } = this.config;
// Connect to work queue (SUB with exchange prefix filtering)
this.workSocket = new zmq.Subscriber();
const workEndpoint = `tcp://${flink_hostname}:${ingestor_work_port}`;
await this.workSocket.connect(workEndpoint);
this.dealerSocket = new zmq.Dealer();
const endpoint = `tcp://${flink_hostname}:${ingestor_broker_port}`;
await this.dealerSocket.connect(endpoint);
this.logger.info(`Connected DEALER to Flink IngestorBroker at ${endpoint}`);
// Subscribe to each supported exchange suffix (Nautilus format: "BTC/USDT.BINANCE")
for (const exchange of this.supportedExchanges) {
const prefix = `${exchange}.`;
this.workSocket.subscribe(prefix);
this.logger.info(`Subscribed to exchange prefix: ${prefix}`);
}
this.logger.info(`Connected to work queue at ${workEndpoint}`);
this.logger.info('ASYNC MODE: No response socket - data flows via Kafka → Flink → pub/sub notification');
// Register as available
await this.sendReady();
// Periodically re-send WorkerReady when idle, to recover from missed initial registration
this._idleHeartbeatInterval = setInterval(() => {
if (this.activeJobId === null && !this.isShutdown) {
this.sendReady().catch(err =>
this.logger.warn({ error: err.message }, 'Failed to re-send WorkerReady'));
}
}, 30_000);
// Start receiving work in background
this._receiveLoop();
}
/**
* Pull a data request from the work queue
* @returns {Promise<object>} Decoded DataRequest message
* Send WorkerReady — called on connect and after each COMPLETE.
*/
async pullDataRequest() {
if (this.isShutdown) {
return null;
}
async sendReady() {
const frames = encodeBrokerMessage(
MessageTypeId.WORKER_READY,
{ exchanges: this.supportedExchanges },
WorkerReady
);
await this.dealerSocket.send(frames);
this.logger.info({ exchanges: this.supportedExchanges }, 'Sent WorkerReady');
}
/**
* Send WorkComplete after a historical job finishes.
* Automatically sends WorkerReady so Flink returns us to the free pool.
*/
async sendComplete(jobId, success, errorMessage) {
this.activeJobId = null;
const frames = encodeBrokerMessage(
MessageTypeId.WORK_COMPLETE,
{
jobId,
success,
...(errorMessage ? { errorMessage } : {})
},
WorkComplete
);
await this.dealerSocket.send(frames);
this.logger.info({ jobId, success }, 'Sent WorkComplete');
// Return to free pool
await this.sendReady();
}
/**
* Send WorkHeartbeat for an active realtime job.
*/
async sendHeartbeat(jobId) {
const frames = encodeBrokerMessage(
MessageTypeId.WORK_HEARTBEAT,
{ jobId },
WorkHeartbeat
);
await this.dealerSocket.send(frames);
this.logger.debug({ jobId }, 'Sent WorkHeartbeat');
}
/**
* Send WorkReject if we cannot handle the dispatched job.
*/
async sendReject(jobId, reason) {
const frames = encodeBrokerMessage(
MessageTypeId.WORK_REJECT,
{ jobId, reason },
WorkReject
);
await this.dealerSocket.send(frames);
this.logger.warn({ jobId, reason }, 'Sent WorkReject');
}
/**
* Background loop: receive WorkAssign (DataRequest) or WorkStop from Flink.
* ROUTER→DEALER frame layout: [empty][version][typeId+payload]
*/
async _receiveLoop() {
try {
const frames = await this.workSocket.receive();
this.logger.info({
frameCount: frames.length,
frame0Len: frames[0]?.length,
frame1Len: frames[1]?.length,
frame2Len: frames[2]?.length,
frame0: frames[0]?.toString('utf8').substring(0, 50),
frame1Hex: frames[1]?.toString('hex').substring(0, 20),
frame2Hex: frames[2]?.toString('hex').substring(0, 20)
}, 'Received raw ZMQ frames');
for await (const frames of this.dealerSocket) {
if (this.isShutdown) break;
// First frame is the topic (exchange prefix), skip it
// Remaining frames are: [version_frame, message_frame]
if (frames.length < 3) {
this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count');
return null;
try {
// frames[0] = empty delimiter, frames[1] = version, frames[2] = type+payload
if (frames.length < 3) {
this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count from broker');
continue;
}
const versionByte = frames[1][0];
if (versionByte !== PROTOCOL_VERSION) {
this.logger.warn({ versionByte }, 'Unexpected protocol version from broker');
continue;
}
const typeId = frames[2][0];
const payload = frames[2].slice(1);
if (typeId === MessageTypeId.WORK_ASSIGN) {
// DataRequest protobuf
const request = DataRequest.decode(payload);
const req = DataRequest.toObject(request, {
longs: String, enums: String, bytes: Buffer
});
this.activeJobId = req.jobId;
this.logger.info(
{ jobId: req.jobId, requestId: req.requestId, type: req.type, ticker: req.ticker },
'Received WorkAssign from broker'
);
if (this.onWorkAssign) {
this.onWorkAssign(req);
}
} else if (typeId === MessageTypeId.WORK_STOP) {
const stop = WorkStop.decode(payload);
const { jobId } = WorkStop.toObject(stop);
this.logger.info({ jobId }, 'Received WorkStop from broker');
if (this.onWorkStop) {
this.onWorkStop(jobId);
}
} else {
this.logger.warn({ typeId: `0x${typeId.toString(16)}` }, 'Unknown message type from broker');
}
} catch (err) {
this.logger.error({ error: err.message }, 'Error processing broker message');
}
}
const messageFrames = frames.slice(1); // Skip topic, keep version + message
const { version, typeId, message } = decodeMessage(messageFrames);
this.logger.info({
version,
typeId: `0x${typeId.toString(16)}`,
requestId: message.requestId,
type: message.type,
typeOf: typeof message.type,
ticker: message.ticker
}, 'Decoded data request');
return message;
} catch (error) {
} catch (err) {
if (!this.isShutdown) {
this.logger.error({ error: error.message, stack: error.stack }, 'Error receiving data request');
this.logger.error({ error: err.message }, 'DEALER receive loop error');
}
return null;
}
}
/**
* Start listening for control messages in the background
* @param {Function} handler - Callback function to handle control messages
*
* NOTE: Control channel not implemented yet. This is a stub for future use.
* For now, just log and ignore.
*/
startControlListener(handler) {
this.logger.info('Control channel listener stub - not implemented yet');
// TODO: Implement control channel when needed
// Control messages would be used for:
// - Canceling realtime subscriptions
// - Graceful shutdown signals
// - Configuration updates
}
/**
* Shutdown and close connections
*/
async shutdown() {
this.isShutdown = true;
this.logger.info('Shutting down ZMQ connections');
if (this.workSocket) {
await this.workSocket.close();
if (this._idleHeartbeatInterval) {
clearInterval(this._idleHeartbeatInterval);
this._idleHeartbeatInterval = null;
}
this.logger.info('Shutting down ZMQ DEALER connection');
if (this.dealerSocket) {
this.dealerSocket.close();
}
}
}