data pipeline refactor and fix
This commit is contained in:
@@ -1,33 +1,40 @@
|
||||
// Realtime tick data poller using 10-second polling
|
||||
// Realtime tick data poller — polls exchange every 10s, writes ticks to market-tick Kafka topic.
|
||||
// Heartbeats every 5s so Flink IngestorBroker knows the job is alive.
|
||||
export class RealtimePoller {
|
||||
constructor(ccxtFetcher, kafkaProducer, logger) {
|
||||
constructor(ccxtFetcher, kafkaProducer, zmqClient, logger) {
|
||||
this.ccxtFetcher = ccxtFetcher;
|
||||
this.kafkaProducer = kafkaProducer;
|
||||
this.zmqClient = zmqClient;
|
||||
this.logger = logger;
|
||||
|
||||
// Active subscriptions: requestId -> subscription info
|
||||
// Active subscriptions: jobId -> subscription info
|
||||
this.subscriptions = new Map();
|
||||
|
||||
// Poll interval in milliseconds (10 seconds)
|
||||
this.pollInterval = 10000;
|
||||
|
||||
// Main polling loop
|
||||
// Heartbeat interval (5 seconds)
|
||||
this.heartbeatInterval = 5000;
|
||||
|
||||
this.pollingLoop = null;
|
||||
this.heartbeatLoop = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a realtime subscription
|
||||
* @param {string} requestId - Unique request ID
|
||||
* @param {string} ticker - Ticker to subscribe to
|
||||
* @param {string} kafkaTopic - Kafka topic to write to
|
||||
* Start a realtime subscription for a job dispatched by IngestorBroker.
|
||||
* @param {string} jobId - Broker-assigned job ID (for heartbeats and COMPLETE)
|
||||
* @param {string} requestId - Original request ID (for metadata)
|
||||
* @param {string} ticker - Ticker to subscribe to
|
||||
* @param {string} kafkaTopic - Kafka topic to write ticks to (market-tick)
|
||||
*/
|
||||
startSubscription(requestId, ticker, kafkaTopic) {
|
||||
if (this.subscriptions.has(requestId)) {
|
||||
this.logger.warn({ requestId }, 'Subscription already exists');
|
||||
startSubscription(jobId, requestId, ticker, kafkaTopic) {
|
||||
if (this.subscriptions.has(jobId)) {
|
||||
this.logger.warn({ jobId }, 'Subscription already exists');
|
||||
return;
|
||||
}
|
||||
|
||||
const subscription = {
|
||||
jobId,
|
||||
requestId,
|
||||
ticker,
|
||||
kafkaTopic,
|
||||
@@ -36,93 +43,81 @@ export class RealtimePoller {
|
||||
errorCount: 0
|
||||
};
|
||||
|
||||
this.subscriptions.set(requestId, subscription);
|
||||
this.subscriptions.set(jobId, subscription);
|
||||
this.logger.info({ jobId, requestId, ticker, kafkaTopic }, 'Started realtime subscription');
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker, kafkaTopic },
|
||||
'Started realtime subscription'
|
||||
);
|
||||
|
||||
// Start polling loop if not already running
|
||||
if (!this.pollingLoop) {
|
||||
this.startPollingLoop();
|
||||
}
|
||||
if (!this.heartbeatLoop) {
|
||||
this.startHeartbeatLoop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel a realtime subscription
|
||||
* @param {string} requestId - Request ID to cancel
|
||||
* Stop a realtime subscription. Called when Flink sends WorkStop or on error.
|
||||
* Does NOT send WorkComplete — caller is responsible for that.
|
||||
*/
|
||||
cancelSubscription(requestId) {
|
||||
const subscription = this.subscriptions.get(requestId);
|
||||
cancelSubscription(jobId) {
|
||||
const subscription = this.subscriptions.get(jobId);
|
||||
if (subscription) {
|
||||
subscription.isActive = false;
|
||||
this.subscriptions.delete(requestId);
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker: subscription.ticker },
|
||||
'Cancelled realtime subscription'
|
||||
);
|
||||
this.subscriptions.delete(jobId);
|
||||
this.logger.info({ jobId, ticker: subscription.ticker }, 'Cancelled realtime subscription');
|
||||
}
|
||||
|
||||
// Stop polling loop if no active subscriptions
|
||||
if (this.subscriptions.size === 0 && this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
this.logger.info('Stopped polling loop - no active subscriptions');
|
||||
if (this.subscriptions.size === 0) {
|
||||
if (this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
if (this.heartbeatLoop) {
|
||||
clearInterval(this.heartbeatLoop);
|
||||
this.heartbeatLoop = null;
|
||||
}
|
||||
this.logger.info('Stopped polling/heartbeat loops — no active subscriptions');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the main polling loop
|
||||
*/
|
||||
startPollingLoop() {
|
||||
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
|
||||
|
||||
this.pollingLoop = setInterval(async () => {
|
||||
await this.pollAllSubscriptions();
|
||||
}, this.pollInterval);
|
||||
|
||||
// Do an immediate poll
|
||||
this.pollingLoop = setInterval(() => this.pollAllSubscriptions(), this.pollInterval);
|
||||
// Immediate first poll
|
||||
this.pollAllSubscriptions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll all active subscriptions
|
||||
*/
|
||||
async pollAllSubscriptions() {
|
||||
const subscriptions = Array.from(this.subscriptions.values());
|
||||
|
||||
// Poll subscriptions in parallel
|
||||
await Promise.allSettled(
|
||||
subscriptions.map(sub => this.pollSubscription(sub))
|
||||
);
|
||||
startHeartbeatLoop() {
|
||||
this.logger.info({ interval: this.heartbeatInterval }, 'Starting heartbeat loop');
|
||||
this.heartbeatLoop = setInterval(async () => {
|
||||
for (const { jobId } of this.subscriptions.values()) {
|
||||
try {
|
||||
await this.zmqClient.sendHeartbeat(jobId);
|
||||
} catch (err) {
|
||||
this.logger.error({ jobId, error: err.message }, 'Failed to send heartbeat');
|
||||
}
|
||||
}
|
||||
}, this.heartbeatInterval);
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll a single subscription
|
||||
* @param {object} subscription - Subscription object
|
||||
*/
|
||||
async pollSubscription(subscription) {
|
||||
if (!subscription.isActive) {
|
||||
return;
|
||||
}
|
||||
async pollAllSubscriptions() {
|
||||
const subscriptions = Array.from(this.subscriptions.values());
|
||||
await Promise.allSettled(subscriptions.map(sub => this.pollSubscription(sub)));
|
||||
}
|
||||
|
||||
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
|
||||
async pollSubscription(subscription) {
|
||||
if (!subscription.isActive) return;
|
||||
|
||||
const { jobId, requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
|
||||
|
||||
try {
|
||||
// Fetch trades since last timestamp
|
||||
const trades = await this.ccxtFetcher.fetchRecentTrades(
|
||||
ticker,
|
||||
lastTimestamp
|
||||
);
|
||||
const trades = await this.ccxtFetcher.fetchRecentTrades(ticker, lastTimestamp);
|
||||
|
||||
if (trades.length === 0) {
|
||||
this.logger.debug({ requestId, ticker }, 'No new trades');
|
||||
this.logger.debug({ jobId, ticker }, 'No new trades');
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter out trades we've already seen
|
||||
// Skip trades we've already seen (timestamp-based dedup)
|
||||
let newTrades = trades;
|
||||
if (lastTimestamp) {
|
||||
const lastTs = BigInt(lastTimestamp);
|
||||
@@ -130,88 +125,59 @@ export class RealtimePoller {
|
||||
}
|
||||
|
||||
if (newTrades.length > 0) {
|
||||
// Write trades to Kafka
|
||||
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
|
||||
|
||||
// Update last timestamp
|
||||
const latestTrade = newTrades[newTrades.length - 1];
|
||||
subscription.lastTimestamp = latestTrade.timestamp;
|
||||
|
||||
this.logger.info(
|
||||
{
|
||||
requestId,
|
||||
ticker,
|
||||
count: newTrades.length,
|
||||
kafkaTopic
|
||||
},
|
||||
'Wrote new trades to Kafka'
|
||||
);
|
||||
subscription.lastTimestamp = newTrades[newTrades.length - 1].timestamp;
|
||||
this.logger.info({ jobId, ticker, count: newTrades.length, kafkaTopic }, 'Wrote ticks to Kafka');
|
||||
}
|
||||
|
||||
// Reset error count on success
|
||||
subscription.errorCount = 0;
|
||||
} catch (error) {
|
||||
subscription.errorCount++;
|
||||
|
||||
this.logger.error(
|
||||
{
|
||||
error: error.message,
|
||||
requestId,
|
||||
ticker,
|
||||
errorCount: subscription.errorCount
|
||||
},
|
||||
{ error: error.message, jobId, ticker, errorCount: subscription.errorCount },
|
||||
'Error polling subscription'
|
||||
);
|
||||
|
||||
// Cancel subscription after too many errors
|
||||
// After 5 consecutive errors, give up and notify Flink
|
||||
if (subscription.errorCount >= 5) {
|
||||
this.logger.error(
|
||||
{ requestId, ticker },
|
||||
'Cancelling subscription due to repeated errors'
|
||||
);
|
||||
this.cancelSubscription(requestId);
|
||||
this.logger.error({ jobId, ticker }, 'Cancelling subscription due to repeated errors');
|
||||
this.cancelSubscription(jobId);
|
||||
try {
|
||||
await this.zmqClient.sendComplete(jobId, false, `Polling failed after 5 errors: ${error.message}`);
|
||||
} catch (zmqErr) {
|
||||
this.logger.error({ jobId, error: zmqErr.message }, 'Failed to send WorkComplete after error');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get subscription statistics
|
||||
*/
|
||||
getStats() {
|
||||
const stats = {
|
||||
return {
|
||||
totalSubscriptions: this.subscriptions.size,
|
||||
subscriptions: []
|
||||
};
|
||||
|
||||
for (const [requestId, sub] of this.subscriptions) {
|
||||
stats.subscriptions.push({
|
||||
requestId,
|
||||
subscriptions: Array.from(this.subscriptions.values()).map(sub => ({
|
||||
jobId: sub.jobId,
|
||||
requestId: sub.requestId,
|
||||
ticker: sub.ticker,
|
||||
isActive: sub.isActive,
|
||||
errorCount: sub.errorCount,
|
||||
lastTimestamp: sub.lastTimestamp
|
||||
});
|
||||
}
|
||||
|
||||
return stats;
|
||||
}))
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown poller and cancel all subscriptions
|
||||
*/
|
||||
shutdown() {
|
||||
this.logger.info('Shutting down realtime poller');
|
||||
|
||||
if (this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
|
||||
// Mark all subscriptions as inactive
|
||||
if (this.heartbeatLoop) {
|
||||
clearInterval(this.heartbeatLoop);
|
||||
this.heartbeatLoop = null;
|
||||
}
|
||||
for (const subscription of this.subscriptions.values()) {
|
||||
subscription.isActive = false;
|
||||
}
|
||||
|
||||
this.subscriptions.clear();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user