data pipeline refactor and fix

This commit is contained in:
2026-04-13 18:30:04 -04:00
parent 6418729b16
commit 326bf80846
96 changed files with 7107 additions and 1763 deletions

View File

@@ -1,33 +1,40 @@
// Realtime tick data poller using 10-second polling
// Realtime tick data poller — polls exchange every 10s, writes ticks to market-tick Kafka topic.
// Heartbeats every 5s so Flink IngestorBroker knows the job is alive.
export class RealtimePoller {
constructor(ccxtFetcher, kafkaProducer, logger) {
constructor(ccxtFetcher, kafkaProducer, zmqClient, logger) {
this.ccxtFetcher = ccxtFetcher;
this.kafkaProducer = kafkaProducer;
this.zmqClient = zmqClient;
this.logger = logger;
// Active subscriptions: requestId -> subscription info
// Active subscriptions: jobId -> subscription info
this.subscriptions = new Map();
// Poll interval in milliseconds (10 seconds)
this.pollInterval = 10000;
// Main polling loop
// Heartbeat interval (5 seconds)
this.heartbeatInterval = 5000;
this.pollingLoop = null;
this.heartbeatLoop = null;
}
/**
* Start a realtime subscription
* @param {string} requestId - Unique request ID
* @param {string} ticker - Ticker to subscribe to
* @param {string} kafkaTopic - Kafka topic to write to
* Start a realtime subscription for a job dispatched by IngestorBroker.
* @param {string} jobId - Broker-assigned job ID (for heartbeats and COMPLETE)
* @param {string} requestId - Original request ID (for metadata)
* @param {string} ticker - Ticker to subscribe to
* @param {string} kafkaTopic - Kafka topic to write ticks to (market-tick)
*/
startSubscription(requestId, ticker, kafkaTopic) {
if (this.subscriptions.has(requestId)) {
this.logger.warn({ requestId }, 'Subscription already exists');
startSubscription(jobId, requestId, ticker, kafkaTopic) {
if (this.subscriptions.has(jobId)) {
this.logger.warn({ jobId }, 'Subscription already exists');
return;
}
const subscription = {
jobId,
requestId,
ticker,
kafkaTopic,
@@ -36,93 +43,81 @@ export class RealtimePoller {
errorCount: 0
};
this.subscriptions.set(requestId, subscription);
this.subscriptions.set(jobId, subscription);
this.logger.info({ jobId, requestId, ticker, kafkaTopic }, 'Started realtime subscription');
this.logger.info(
{ requestId, ticker, kafkaTopic },
'Started realtime subscription'
);
// Start polling loop if not already running
if (!this.pollingLoop) {
this.startPollingLoop();
}
if (!this.heartbeatLoop) {
this.startHeartbeatLoop();
}
}
/**
* Cancel a realtime subscription
* @param {string} requestId - Request ID to cancel
* Stop a realtime subscription. Called when Flink sends WorkStop or on error.
* Does NOT send WorkComplete — caller is responsible for that.
*/
cancelSubscription(requestId) {
const subscription = this.subscriptions.get(requestId);
cancelSubscription(jobId) {
const subscription = this.subscriptions.get(jobId);
if (subscription) {
subscription.isActive = false;
this.subscriptions.delete(requestId);
this.logger.info(
{ requestId, ticker: subscription.ticker },
'Cancelled realtime subscription'
);
this.subscriptions.delete(jobId);
this.logger.info({ jobId, ticker: subscription.ticker }, 'Cancelled realtime subscription');
}
// Stop polling loop if no active subscriptions
if (this.subscriptions.size === 0 && this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
this.logger.info('Stopped polling loop - no active subscriptions');
if (this.subscriptions.size === 0) {
if (this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
}
if (this.heartbeatLoop) {
clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
this.logger.info('Stopped polling/heartbeat loops — no active subscriptions');
}
}
/**
* Start the main polling loop
*/
startPollingLoop() {
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
this.pollingLoop = setInterval(async () => {
await this.pollAllSubscriptions();
}, this.pollInterval);
// Do an immediate poll
this.pollingLoop = setInterval(() => this.pollAllSubscriptions(), this.pollInterval);
// Immediate first poll
this.pollAllSubscriptions();
}
/**
* Poll all active subscriptions
*/
async pollAllSubscriptions() {
const subscriptions = Array.from(this.subscriptions.values());
// Poll subscriptions in parallel
await Promise.allSettled(
subscriptions.map(sub => this.pollSubscription(sub))
);
startHeartbeatLoop() {
this.logger.info({ interval: this.heartbeatInterval }, 'Starting heartbeat loop');
this.heartbeatLoop = setInterval(async () => {
for (const { jobId } of this.subscriptions.values()) {
try {
await this.zmqClient.sendHeartbeat(jobId);
} catch (err) {
this.logger.error({ jobId, error: err.message }, 'Failed to send heartbeat');
}
}
}, this.heartbeatInterval);
}
/**
* Poll a single subscription
* @param {object} subscription - Subscription object
*/
async pollSubscription(subscription) {
if (!subscription.isActive) {
return;
}
async pollAllSubscriptions() {
const subscriptions = Array.from(this.subscriptions.values());
await Promise.allSettled(subscriptions.map(sub => this.pollSubscription(sub)));
}
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
async pollSubscription(subscription) {
if (!subscription.isActive) return;
const { jobId, requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
try {
// Fetch trades since last timestamp
const trades = await this.ccxtFetcher.fetchRecentTrades(
ticker,
lastTimestamp
);
const trades = await this.ccxtFetcher.fetchRecentTrades(ticker, lastTimestamp);
if (trades.length === 0) {
this.logger.debug({ requestId, ticker }, 'No new trades');
this.logger.debug({ jobId, ticker }, 'No new trades');
return;
}
// Filter out trades we've already seen
// Skip trades we've already seen (timestamp-based dedup)
let newTrades = trades;
if (lastTimestamp) {
const lastTs = BigInt(lastTimestamp);
@@ -130,88 +125,59 @@ export class RealtimePoller {
}
if (newTrades.length > 0) {
// Write trades to Kafka
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
// Update last timestamp
const latestTrade = newTrades[newTrades.length - 1];
subscription.lastTimestamp = latestTrade.timestamp;
this.logger.info(
{
requestId,
ticker,
count: newTrades.length,
kafkaTopic
},
'Wrote new trades to Kafka'
);
subscription.lastTimestamp = newTrades[newTrades.length - 1].timestamp;
this.logger.info({ jobId, ticker, count: newTrades.length, kafkaTopic }, 'Wrote ticks to Kafka');
}
// Reset error count on success
subscription.errorCount = 0;
} catch (error) {
subscription.errorCount++;
this.logger.error(
{
error: error.message,
requestId,
ticker,
errorCount: subscription.errorCount
},
{ error: error.message, jobId, ticker, errorCount: subscription.errorCount },
'Error polling subscription'
);
// Cancel subscription after too many errors
// After 5 consecutive errors, give up and notify Flink
if (subscription.errorCount >= 5) {
this.logger.error(
{ requestId, ticker },
'Cancelling subscription due to repeated errors'
);
this.cancelSubscription(requestId);
this.logger.error({ jobId, ticker }, 'Cancelling subscription due to repeated errors');
this.cancelSubscription(jobId);
try {
await this.zmqClient.sendComplete(jobId, false, `Polling failed after 5 errors: ${error.message}`);
} catch (zmqErr) {
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
}
}
}
}
/**
* Get subscription statistics
*/
getStats() {
const stats = {
return {
totalSubscriptions: this.subscriptions.size,
subscriptions: []
};
for (const [requestId, sub] of this.subscriptions) {
stats.subscriptions.push({
requestId,
subscriptions: Array.from(this.subscriptions.values()).map(sub => ({
jobId: sub.jobId,
requestId: sub.requestId,
ticker: sub.ticker,
isActive: sub.isActive,
errorCount: sub.errorCount,
lastTimestamp: sub.lastTimestamp
});
}
return stats;
}))
};
}
/**
* Shutdown poller and cancel all subscriptions
*/
shutdown() {
this.logger.info('Shutting down realtime poller');
if (this.pollingLoop) {
clearInterval(this.pollingLoop);
this.pollingLoop = null;
}
// Mark all subscriptions as inactive
if (this.heartbeatLoop) {
clearInterval(this.heartbeatLoop);
this.heartbeatLoop = null;
}
for (const subscription of this.subscriptions.values()) {
subscription.isActive = false;
}
this.subscriptions.clear();
}
}