backend redesign
This commit is contained in:
248
ingestor/src/ccxt-fetcher.js
Normal file
248
ingestor/src/ccxt-fetcher.js
Normal file
@@ -0,0 +1,248 @@
|
||||
// CCXT data fetcher for historical OHLC and realtime ticks
|
||||
import ccxt from 'ccxt';
|
||||
|
||||
export class CCXTFetcher {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
this.exchanges = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse ticker string to exchange and symbol
|
||||
* Expected format: "EXCHANGE:SYMBOL" (e.g., "BINANCE:BTC/USDT")
|
||||
*/
|
||||
parseTicker(ticker) {
|
||||
const parts = ticker.split(':');
|
||||
if (parts.length !== 2) {
|
||||
throw new Error(`Invalid ticker format: ${ticker}. Expected "EXCHANGE:SYMBOL"`);
|
||||
}
|
||||
|
||||
return {
|
||||
exchange: parts[0].toLowerCase(),
|
||||
symbol: parts[1]
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create CCXT exchange instance
|
||||
*/
|
||||
getExchange(exchangeName) {
|
||||
if (this.exchanges.has(exchangeName)) {
|
||||
return this.exchanges.get(exchangeName);
|
||||
}
|
||||
|
||||
// Create exchange instance
|
||||
const ExchangeClass = ccxt[exchangeName];
|
||||
if (!ExchangeClass) {
|
||||
throw new Error(`Unsupported exchange: ${exchangeName}`);
|
||||
}
|
||||
|
||||
const exchange = new ExchangeClass({
|
||||
enableRateLimit: true,
|
||||
options: {
|
||||
defaultType: 'spot'
|
||||
}
|
||||
});
|
||||
|
||||
this.exchanges.set(exchangeName, exchange);
|
||||
this.logger.info({ exchange: exchangeName }, 'Created CCXT exchange instance');
|
||||
|
||||
return exchange;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch historical OHLC data
|
||||
* @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL"
|
||||
* @param {string} startTime - Start time in microseconds
|
||||
* @param {string} endTime - End time in microseconds
|
||||
* @param {number} periodSeconds - OHLC period in seconds
|
||||
* @param {number} limit - Optional limit on number of candles
|
||||
* @returns {Promise<Array>} Array of OHLC candles
|
||||
*/
|
||||
async fetchHistoricalOHLC(ticker, startTime, endTime, periodSeconds, limit) {
|
||||
const { exchange: exchangeName, symbol } = this.parseTicker(ticker);
|
||||
const exchange = this.getExchange(exchangeName);
|
||||
|
||||
// Convert microseconds to milliseconds
|
||||
const startMs = Math.floor(parseInt(startTime) / 1000);
|
||||
const endMs = Math.floor(parseInt(endTime) / 1000);
|
||||
|
||||
// Map period seconds to CCXT timeframe
|
||||
const timeframe = this.secondsToTimeframe(periodSeconds);
|
||||
|
||||
this.logger.info(
|
||||
{ ticker, timeframe, startMs, endMs, limit },
|
||||
'Fetching historical OHLC'
|
||||
);
|
||||
|
||||
const allCandles = [];
|
||||
let since = startMs;
|
||||
|
||||
// CCXT typically limits to 1000 candles per request
|
||||
const batchSize = limit || 1000;
|
||||
|
||||
while (since < endMs) {
|
||||
try {
|
||||
const candles = await exchange.fetchOHLCV(
|
||||
symbol,
|
||||
timeframe,
|
||||
since,
|
||||
batchSize
|
||||
);
|
||||
|
||||
if (candles.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Filter candles within the time range
|
||||
const filteredCandles = candles.filter(c => {
|
||||
const timestamp = c[0];
|
||||
return timestamp >= startMs && timestamp <= endMs;
|
||||
});
|
||||
|
||||
allCandles.push(...filteredCandles);
|
||||
|
||||
// Move to next batch
|
||||
const lastTimestamp = candles[candles.length - 1][0];
|
||||
since = lastTimestamp + (periodSeconds * 1000);
|
||||
|
||||
// Break if we've reached the end time or limit
|
||||
if (since >= endMs || (limit && allCandles.length >= limit)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Apply rate limiting
|
||||
await exchange.sleep(exchange.rateLimit);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, ticker, since },
|
||||
'Error fetching OHLC'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to our OHLC format
|
||||
return allCandles.map(candle => this.convertToOHLC(candle, ticker, periodSeconds));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch recent trades for realtime tick data
|
||||
* @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL"
|
||||
* @param {string} since - Optional timestamp in microseconds to fetch from
|
||||
* @returns {Promise<Array>} Array of trade ticks
|
||||
*/
|
||||
async fetchRecentTrades(ticker, since = null) {
|
||||
const { exchange: exchangeName, symbol } = this.parseTicker(ticker);
|
||||
const exchange = this.getExchange(exchangeName);
|
||||
|
||||
try {
|
||||
// Convert microseconds to milliseconds if provided
|
||||
const sinceMs = since ? Math.floor(parseInt(since) / 1000) : undefined;
|
||||
|
||||
const trades = await exchange.fetchTrades(symbol, sinceMs, 1000);
|
||||
|
||||
this.logger.debug(
|
||||
{ ticker, count: trades.length },
|
||||
'Fetched recent trades'
|
||||
);
|
||||
|
||||
// Convert to our Tick format
|
||||
return trades.map(trade => this.convertToTick(trade, ticker));
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, ticker },
|
||||
'Error fetching trades'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert CCXT OHLCV array to our OHLC format
|
||||
* CCXT format: [timestamp, open, high, low, close, volume]
|
||||
*/
|
||||
convertToOHLC(candle, ticker, periodSeconds) {
|
||||
const [timestamp, open, high, low, close, volume] = candle;
|
||||
|
||||
// Convert to fixed-point integers (using 8 decimal places = 10^8)
|
||||
const DENOM = 100000000;
|
||||
|
||||
return {
|
||||
ticker,
|
||||
timestamp: (timestamp * 1000).toString(), // Convert ms to microseconds
|
||||
open: Math.round(open * DENOM).toString(),
|
||||
high: Math.round(high * DENOM).toString(),
|
||||
low: Math.round(low * DENOM).toString(),
|
||||
close: Math.round(close * DENOM).toString(),
|
||||
volume: Math.round(volume * DENOM).toString(),
|
||||
open_time: (timestamp * 1000).toString(),
|
||||
close_time: ((timestamp + periodSeconds * 1000) * 1000).toString()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert CCXT trade to our Tick format
|
||||
*/
|
||||
convertToTick(trade, ticker) {
|
||||
// Convert to fixed-point integers (using 8 decimal places = 10^8)
|
||||
const DENOM = 100000000;
|
||||
|
||||
const price = Math.round(trade.price * DENOM);
|
||||
const amount = Math.round(trade.amount * DENOM);
|
||||
const quoteAmount = Math.round((trade.price * trade.amount) * DENOM);
|
||||
|
||||
return {
|
||||
trade_id: trade.id || `${trade.timestamp}`,
|
||||
ticker,
|
||||
timestamp: (trade.timestamp * 1000).toString(), // Convert ms to microseconds
|
||||
price: price.toString(),
|
||||
amount: amount.toString(),
|
||||
quote_amount: quoteAmount.toString(),
|
||||
taker_buy: trade.side === 'buy',
|
||||
sequence: trade.order ? trade.order.toString() : undefined
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert period seconds to CCXT timeframe string
|
||||
*/
|
||||
secondsToTimeframe(seconds) {
|
||||
const timeframes = {
|
||||
60: '1m',
|
||||
300: '5m',
|
||||
900: '15m',
|
||||
1800: '30m',
|
||||
3600: '1h',
|
||||
7200: '2h',
|
||||
14400: '4h',
|
||||
21600: '6h',
|
||||
28800: '8h',
|
||||
43200: '12h',
|
||||
86400: '1d',
|
||||
259200: '3d',
|
||||
604800: '1w',
|
||||
2592000: '1M'
|
||||
};
|
||||
|
||||
const timeframe = timeframes[seconds];
|
||||
if (!timeframe) {
|
||||
throw new Error(`Unsupported period: ${seconds} seconds`);
|
||||
}
|
||||
|
||||
return timeframe;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all exchange connections
|
||||
*/
|
||||
async close() {
|
||||
for (const [name, exchange] of this.exchanges) {
|
||||
if (exchange.close) {
|
||||
await exchange.close();
|
||||
}
|
||||
}
|
||||
this.exchanges.clear();
|
||||
}
|
||||
}
|
||||
411
ingestor/src/index.js
Normal file
411
ingestor/src/index.js
Normal file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Main ingestor worker process
|
||||
import { readFileSync } from 'fs';
|
||||
import { parse as parseYaml } from 'yaml';
|
||||
import pino from 'pino';
|
||||
import { ZmqClient } from './zmq-client.js';
|
||||
import { KafkaProducer } from './kafka-producer.js';
|
||||
import { CCXTFetcher } from './ccxt-fetcher.js';
|
||||
import { RealtimePoller } from './realtime-poller.js';
|
||||
|
||||
// Logger setup
|
||||
const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
transport: {
|
||||
target: 'pino-pretty',
|
||||
options: {
|
||||
colorize: true,
|
||||
translateTime: 'SYS:standard',
|
||||
ignore: 'pid,hostname'
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Load configuration
|
||||
function loadConfig() {
|
||||
const configPath = process.env.CONFIG_PATH || '/config/config.yaml';
|
||||
const secretsPath = process.env.SECRETS_PATH || '/config/secrets.yaml';
|
||||
|
||||
let config = {};
|
||||
let secrets = {};
|
||||
|
||||
try {
|
||||
const configFile = readFileSync(configPath, 'utf8');
|
||||
config = parseYaml(configFile);
|
||||
logger.info({ path: configPath }, 'Loaded configuration');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load config, using defaults');
|
||||
}
|
||||
|
||||
try {
|
||||
const secretsFile = readFileSync(secretsPath, 'utf8');
|
||||
secrets = parseYaml(secretsFile);
|
||||
logger.info({ path: secretsPath }, 'Loaded secrets');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load secrets');
|
||||
}
|
||||
|
||||
// Merge config and secrets
|
||||
return {
|
||||
// Flink ZMQ endpoints
|
||||
flink_hostname: config.flink_hostname || 'localhost',
|
||||
ingestor_work_port: config.ingestor_work_port || 5555,
|
||||
ingestor_control_port: config.ingestor_control_port || 5556,
|
||||
|
||||
// Kafka configuration
|
||||
kafka_brokers: config.kafka_brokers || ['localhost:9092'],
|
||||
kafka_topic: config.kafka_topic || 'market-0',
|
||||
|
||||
// Worker configuration
|
||||
max_concurrent: config.max_concurrent || 10,
|
||||
poll_interval_ms: config.poll_interval_ms || 10000,
|
||||
|
||||
...secrets
|
||||
};
|
||||
}
|
||||
|
||||
class IngestorWorker {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
|
||||
this.kafkaProducer = new KafkaProducer(
|
||||
config,
|
||||
logger.child({ component: 'kafka' })
|
||||
);
|
||||
this.ccxtFetcher = new CCXTFetcher(
|
||||
config,
|
||||
logger.child({ component: 'ccxt' })
|
||||
);
|
||||
this.realtimePoller = new RealtimePoller(
|
||||
this.ccxtFetcher,
|
||||
this.kafkaProducer,
|
||||
logger.child({ component: 'poller' })
|
||||
);
|
||||
|
||||
// Track active requests
|
||||
this.activeRequests = new Map();
|
||||
this.isShutdown = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the ingestor worker
|
||||
*/
|
||||
async start() {
|
||||
this.logger.info('Starting CCXT ingestor worker');
|
||||
|
||||
// Connect to services
|
||||
await this.kafkaProducer.connect();
|
||||
await this.zmqClient.connect();
|
||||
|
||||
// Start control message listener
|
||||
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg));
|
||||
|
||||
// Start work loop
|
||||
this.workLoop();
|
||||
|
||||
this.logger.info('Ingestor worker started successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Main work loop - pull and process data requests
|
||||
*/
|
||||
async workLoop() {
|
||||
while (!this.isShutdown) {
|
||||
try {
|
||||
// Check if we can handle more requests
|
||||
if (this.activeRequests.size >= this.config.max_concurrent) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull next data request
|
||||
const request = await this.zmqClient.pullDataRequest();
|
||||
if (!request) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle request asynchronously
|
||||
this.handleDataRequest(request).catch(error => {
|
||||
this.logger.error(
|
||||
{ error: error.message, requestId: request.requestId },
|
||||
'Error handling data request'
|
||||
);
|
||||
});
|
||||
} catch (error) {
|
||||
if (!this.isShutdown) {
|
||||
this.logger.error({ error: error.message }, 'Error in work loop');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a data request
|
||||
*/
|
||||
async handleDataRequest(request) {
|
||||
const { requestId: request_id, type, ticker } = request;
|
||||
|
||||
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request');
|
||||
|
||||
this.activeRequests.set(request_id, request);
|
||||
|
||||
try {
|
||||
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
|
||||
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
||||
|
||||
if (isHistorical) {
|
||||
await this.handleHistoricalRequest(request);
|
||||
} else if (isRealtime) {
|
||||
await this.handleRealtimeRequest(request);
|
||||
} else {
|
||||
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
|
||||
}
|
||||
} finally {
|
||||
// For historical requests, remove from active requests when done
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
if (isHistorical) {
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle historical OHLC request
|
||||
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only.
|
||||
* Flink will process from Kafka, write to Iceberg, and publish notification.
|
||||
*/
|
||||
async handleHistoricalRequest(request) {
|
||||
const { requestId: request_id, ticker, historical, clientId: client_id } = request;
|
||||
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, period_seconds, client_id },
|
||||
'Processing historical OHLC request (async mode - write to Kafka only)'
|
||||
);
|
||||
|
||||
try {
|
||||
// Fetch historical data from exchange
|
||||
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
||||
ticker,
|
||||
start_time,
|
||||
end_time,
|
||||
period_seconds,
|
||||
limit
|
||||
);
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Fetched data from exchange'
|
||||
);
|
||||
|
||||
// Write to Kafka - THIS IS THE ONLY OUTPUT
|
||||
// Flink will:
|
||||
// 1. Read from Kafka
|
||||
// 2. Write to Iceberg
|
||||
// 3. Publish HistoryReadyNotification
|
||||
// 4. Client receives notification via relay pub/sub
|
||||
if (candles.length > 0) {
|
||||
// Add metadata to first candle for Flink tracking
|
||||
const enrichedCandles = candles.map((candle, idx) => ({
|
||||
...candle,
|
||||
__metadata: idx === 0 ? {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time
|
||||
} : undefined
|
||||
}));
|
||||
|
||||
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
|
||||
} else {
|
||||
// Write a marker message even if no data found
|
||||
// Flink will see this and publish a NOT_FOUND notification
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'NOT_FOUND',
|
||||
message: 'No data available for requested period'
|
||||
});
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Completed historical OHLC request - data written to Kafka'
|
||||
);
|
||||
|
||||
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to process historical request'
|
||||
);
|
||||
|
||||
// Write error marker to Kafka so Flink can notify client
|
||||
try {
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'ERROR',
|
||||
error_message: error.message
|
||||
});
|
||||
} catch (kafkaError) {
|
||||
this.logger.error(
|
||||
{ error: kafkaError.message, request_id },
|
||||
'Failed to write error marker to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
// Do not throw - request is handled, Flink will notify client of error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle realtime tick subscription request
|
||||
*/
|
||||
async handleRealtimeRequest(request) {
|
||||
const { requestId: request_id, ticker } = request;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker },
|
||||
'Processing realtime subscription request'
|
||||
);
|
||||
|
||||
try {
|
||||
// Start realtime polling
|
||||
this.realtimePoller.startSubscription(
|
||||
request_id,
|
||||
ticker,
|
||||
this.config.kafka_topic
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to start realtime subscription'
|
||||
);
|
||||
this.activeRequests.delete(request_id);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle control messages from Flink
|
||||
*/
|
||||
async handleControlMessage(message) {
|
||||
const { action, requestId: request_id } = message;
|
||||
|
||||
this.logger.info({ action, request_id }, 'Received control message');
|
||||
|
||||
switch (action) {
|
||||
case 'CANCEL':
|
||||
if (request_id) {
|
||||
// Cancel specific request
|
||||
this.realtimePoller.cancelSubscription(request_id);
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'SHUTDOWN':
|
||||
this.logger.info('Received shutdown signal');
|
||||
await this.shutdown();
|
||||
break;
|
||||
|
||||
case 'CONFIG_UPDATE':
|
||||
// Handle config update if needed
|
||||
this.logger.info('Received config update');
|
||||
break;
|
||||
|
||||
case 'HEARTBEAT':
|
||||
// Just acknowledge heartbeat
|
||||
break;
|
||||
|
||||
default:
|
||||
this.logger.warn({ action }, 'Unknown control action');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get worker status
|
||||
*/
|
||||
getStatus() {
|
||||
return {
|
||||
activeRequests: this.activeRequests.size,
|
||||
maxConcurrent: this.config.max_concurrent,
|
||||
pollerStats: this.realtimePoller.getStats()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown worker gracefully
|
||||
*/
|
||||
async shutdown() {
|
||||
if (this.isShutdown) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isShutdown = true;
|
||||
this.logger.info('Shutting down ingestor worker');
|
||||
|
||||
// Stop polling
|
||||
this.realtimePoller.shutdown();
|
||||
|
||||
// Close connections
|
||||
await this.ccxtFetcher.close();
|
||||
await this.kafkaProducer.disconnect();
|
||||
await this.zmqClient.shutdown();
|
||||
|
||||
this.logger.info('Ingestor worker shutdown complete');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
const worker = new IngestorWorker(config, logger);
|
||||
|
||||
// Handle shutdown signals
|
||||
process.on('SIGINT', () => worker.shutdown());
|
||||
process.on('SIGTERM', () => worker.shutdown());
|
||||
|
||||
// Handle errors
|
||||
process.on('uncaughtException', error => {
|
||||
logger.error({ error }, 'Uncaught exception');
|
||||
worker.shutdown();
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error({ reason }, 'Unhandled rejection');
|
||||
});
|
||||
|
||||
// Start worker
|
||||
await worker.start();
|
||||
|
||||
// Log status periodically
|
||||
setInterval(() => {
|
||||
const status = worker.getStatus();
|
||||
logger.info({ status }, 'Worker status');
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch(error => {
|
||||
logger.error({ error }, 'Fatal error');
|
||||
process.exit(1);
|
||||
});
|
||||
270
ingestor/src/kafka-producer.js
Normal file
270
ingestor/src/kafka-producer.js
Normal file
@@ -0,0 +1,270 @@
|
||||
// Kafka producer for writing market data
|
||||
import { Kafka } from 'kafkajs';
|
||||
import { encodeMessage, MessageTypeId, Tick, OHLC, OHLCBatch } from './proto/messages.js';
|
||||
|
||||
export class KafkaProducer {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
this.kafka = new Kafka({
|
||||
clientId: 'ccxt-ingestor',
|
||||
brokers: config.kafka_brokers || ['localhost:9092'],
|
||||
logLevel: 0 // Error only
|
||||
});
|
||||
|
||||
this.producer = this.kafka.producer();
|
||||
this.isConnected = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to Kafka
|
||||
*/
|
||||
async connect() {
|
||||
await this.producer.connect();
|
||||
this.isConnected = true;
|
||||
this.logger.info('Connected to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a tick message to Kafka
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} tickData - Tick data object
|
||||
*/
|
||||
async writeTick(topic, tickData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: tickData.ticker,
|
||||
value: message,
|
||||
timestamp: tickData.timestamp.toString()
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug({ ticker: tickData.ticker, topic }, 'Wrote tick to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write multiple ticks to Kafka in batch
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {Array<object>} ticksData - Array of tick data objects
|
||||
*/
|
||||
async writeTicks(topic, ticksData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
if (ticksData.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const messages = ticksData.map(tickData => {
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
return {
|
||||
key: tickData.ticker,
|
||||
value: message,
|
||||
timestamp: tickData.timestamp.toString()
|
||||
};
|
||||
});
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ count: ticksData.length, topic },
|
||||
'Wrote ticks batch to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an OHLC message to Kafka
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} ohlcData - OHLC data object
|
||||
*/
|
||||
async writeOHLC(topic, ohlcData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, ohlcData, OHLC);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: ohlcData.ticker,
|
||||
value: message
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug({ ticker: ohlcData.ticker, topic }, 'Wrote OHLC to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write multiple OHLC candles to Kafka as an OHLCBatch message
|
||||
* Uses protobuf encoding with metadata in batch wrapper
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {Array<object>} ohlcData - Array of OHLC data objects (may include __metadata in first record)
|
||||
*/
|
||||
async writeOHLCs(topic, ohlcData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
if (ohlcData.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract metadata from first record if present
|
||||
const firstCandle = ohlcData[0];
|
||||
const metadata = firstCandle.__metadata;
|
||||
|
||||
if (!metadata) {
|
||||
// No metadata - write individual OHLC messages (realtime mode)
|
||||
const messages = ohlcData.map(candle => {
|
||||
const protoCandle = {
|
||||
timestamp: candle.timestamp,
|
||||
ticker: candle.ticker,
|
||||
open: candle.open,
|
||||
high: candle.high,
|
||||
low: candle.low,
|
||||
close: candle.close,
|
||||
volume: candle.volume
|
||||
};
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, protoCandle, OHLC);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
return {
|
||||
key: candle.ticker,
|
||||
value
|
||||
};
|
||||
});
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ count: ohlcData.length, topic, type: 'individual' },
|
||||
'Wrote OHLC messages to Kafka'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Historical mode - write as OHLCBatch with metadata
|
||||
const batch = {
|
||||
metadata: {
|
||||
requestId: metadata.request_id,
|
||||
clientId: metadata.client_id,
|
||||
ticker: metadata.ticker,
|
||||
periodSeconds: metadata.period_seconds,
|
||||
startTime: metadata.start_time,
|
||||
endTime: metadata.end_time,
|
||||
status: metadata.status || 'OK',
|
||||
errorMessage: metadata.error_message
|
||||
},
|
||||
rows: ohlcData.map(candle => ({
|
||||
timestamp: candle.timestamp,
|
||||
ticker: candle.ticker,
|
||||
open: candle.open,
|
||||
high: candle.high,
|
||||
low: candle.low,
|
||||
close: candle.close,
|
||||
volume: candle.volume
|
||||
}))
|
||||
};
|
||||
|
||||
// Encode as protobuf OHLCBatch with ZMQ envelope
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: metadata.ticker,
|
||||
value
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ request_id: metadata.request_id, count: ohlcData.length, topic, type: 'batch' },
|
||||
'Wrote OHLCBatch to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a marker message to Kafka for NOT_FOUND or ERROR cases
|
||||
* This allows Flink to publish notifications even when no data is available
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} marker - Marker object with request metadata and status
|
||||
*/
|
||||
async writeMarker(topic, marker) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
// Create an empty OHLCBatch with status in metadata
|
||||
const batch = {
|
||||
metadata: {
|
||||
requestId: marker.request_id,
|
||||
clientId: marker.client_id,
|
||||
ticker: marker.ticker,
|
||||
periodSeconds: marker.period_seconds,
|
||||
startTime: marker.start_time,
|
||||
endTime: marker.end_time,
|
||||
status: marker.status, // 'NOT_FOUND' or 'ERROR'
|
||||
errorMessage: marker.error_message || marker.message
|
||||
},
|
||||
rows: [] // Empty rows array indicates marker message
|
||||
};
|
||||
|
||||
// Encode as protobuf OHLCBatch with ZMQ envelope
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: marker.ticker,
|
||||
value
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.info(
|
||||
{ request_id: marker.request_id, status: marker.status, topic },
|
||||
'Wrote marker to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from Kafka
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.isConnected) {
|
||||
await this.producer.disconnect();
|
||||
this.isConnected = false;
|
||||
this.logger.info('Disconnected from Kafka');
|
||||
}
|
||||
}
|
||||
}
|
||||
217
ingestor/src/realtime-poller.js
Normal file
217
ingestor/src/realtime-poller.js
Normal file
@@ -0,0 +1,217 @@
|
||||
// Realtime tick data poller using 10-second polling
|
||||
export class RealtimePoller {
|
||||
constructor(ccxtFetcher, kafkaProducer, logger) {
|
||||
this.ccxtFetcher = ccxtFetcher;
|
||||
this.kafkaProducer = kafkaProducer;
|
||||
this.logger = logger;
|
||||
|
||||
// Active subscriptions: requestId -> subscription info
|
||||
this.subscriptions = new Map();
|
||||
|
||||
// Poll interval in milliseconds (10 seconds)
|
||||
this.pollInterval = 10000;
|
||||
|
||||
// Main polling loop
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a realtime subscription
|
||||
* @param {string} requestId - Unique request ID
|
||||
* @param {string} ticker - Ticker to subscribe to
|
||||
* @param {string} kafkaTopic - Kafka topic to write to
|
||||
*/
|
||||
startSubscription(requestId, ticker, kafkaTopic) {
|
||||
if (this.subscriptions.has(requestId)) {
|
||||
this.logger.warn({ requestId }, 'Subscription already exists');
|
||||
return;
|
||||
}
|
||||
|
||||
const subscription = {
|
||||
requestId,
|
||||
ticker,
|
||||
kafkaTopic,
|
||||
lastTimestamp: null,
|
||||
isActive: true,
|
||||
errorCount: 0
|
||||
};
|
||||
|
||||
this.subscriptions.set(requestId, subscription);
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker, kafkaTopic },
|
||||
'Started realtime subscription'
|
||||
);
|
||||
|
||||
// Start polling loop if not already running
|
||||
if (!this.pollingLoop) {
|
||||
this.startPollingLoop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel a realtime subscription
|
||||
* @param {string} requestId - Request ID to cancel
|
||||
*/
|
||||
cancelSubscription(requestId) {
|
||||
const subscription = this.subscriptions.get(requestId);
|
||||
if (subscription) {
|
||||
subscription.isActive = false;
|
||||
this.subscriptions.delete(requestId);
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker: subscription.ticker },
|
||||
'Cancelled realtime subscription'
|
||||
);
|
||||
}
|
||||
|
||||
// Stop polling loop if no active subscriptions
|
||||
if (this.subscriptions.size === 0 && this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
this.logger.info('Stopped polling loop - no active subscriptions');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the main polling loop
|
||||
*/
|
||||
startPollingLoop() {
|
||||
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
|
||||
|
||||
this.pollingLoop = setInterval(async () => {
|
||||
await this.pollAllSubscriptions();
|
||||
}, this.pollInterval);
|
||||
|
||||
// Do an immediate poll
|
||||
this.pollAllSubscriptions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll all active subscriptions
|
||||
*/
|
||||
async pollAllSubscriptions() {
|
||||
const subscriptions = Array.from(this.subscriptions.values());
|
||||
|
||||
// Poll subscriptions in parallel
|
||||
await Promise.allSettled(
|
||||
subscriptions.map(sub => this.pollSubscription(sub))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll a single subscription
|
||||
* @param {object} subscription - Subscription object
|
||||
*/
|
||||
async pollSubscription(subscription) {
|
||||
if (!subscription.isActive) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
|
||||
|
||||
try {
|
||||
// Fetch trades since last timestamp
|
||||
const trades = await this.ccxtFetcher.fetchRecentTrades(
|
||||
ticker,
|
||||
lastTimestamp
|
||||
);
|
||||
|
||||
if (trades.length === 0) {
|
||||
this.logger.debug({ requestId, ticker }, 'No new trades');
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter out trades we've already seen
|
||||
let newTrades = trades;
|
||||
if (lastTimestamp) {
|
||||
const lastTs = BigInt(lastTimestamp);
|
||||
newTrades = trades.filter(t => BigInt(t.timestamp) > lastTs);
|
||||
}
|
||||
|
||||
if (newTrades.length > 0) {
|
||||
// Write trades to Kafka
|
||||
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
|
||||
|
||||
// Update last timestamp
|
||||
const latestTrade = newTrades[newTrades.length - 1];
|
||||
subscription.lastTimestamp = latestTrade.timestamp;
|
||||
|
||||
this.logger.info(
|
||||
{
|
||||
requestId,
|
||||
ticker,
|
||||
count: newTrades.length,
|
||||
kafkaTopic
|
||||
},
|
||||
'Wrote new trades to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
// Reset error count on success
|
||||
subscription.errorCount = 0;
|
||||
} catch (error) {
|
||||
subscription.errorCount++;
|
||||
|
||||
this.logger.error(
|
||||
{
|
||||
error: error.message,
|
||||
requestId,
|
||||
ticker,
|
||||
errorCount: subscription.errorCount
|
||||
},
|
||||
'Error polling subscription'
|
||||
);
|
||||
|
||||
// Cancel subscription after too many errors
|
||||
if (subscription.errorCount >= 5) {
|
||||
this.logger.error(
|
||||
{ requestId, ticker },
|
||||
'Cancelling subscription due to repeated errors'
|
||||
);
|
||||
this.cancelSubscription(requestId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get subscription statistics
|
||||
*/
|
||||
getStats() {
|
||||
const stats = {
|
||||
totalSubscriptions: this.subscriptions.size,
|
||||
subscriptions: []
|
||||
};
|
||||
|
||||
for (const [requestId, sub] of this.subscriptions) {
|
||||
stats.subscriptions.push({
|
||||
requestId,
|
||||
ticker: sub.ticker,
|
||||
isActive: sub.isActive,
|
||||
errorCount: sub.errorCount,
|
||||
lastTimestamp: sub.lastTimestamp
|
||||
});
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown poller and cancel all subscriptions
|
||||
*/
|
||||
shutdown() {
|
||||
this.logger.info('Shutting down realtime poller');
|
||||
|
||||
if (this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
|
||||
// Mark all subscriptions as inactive
|
||||
for (const subscription of this.subscriptions.values()) {
|
||||
subscription.isActive = false;
|
||||
}
|
||||
|
||||
this.subscriptions.clear();
|
||||
}
|
||||
}
|
||||
116
ingestor/src/zmq-client.js
Normal file
116
ingestor/src/zmq-client.js
Normal file
@@ -0,0 +1,116 @@
|
||||
// ZeroMQ client for connecting to Flink control channels
|
||||
import * as zmq from 'zeromq';
|
||||
import { decodeMessage } from './proto/messages.js';
|
||||
|
||||
export class ZmqClient {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
// Work queue - SUB socket to receive data requests with exchange prefix filtering
|
||||
this.workSocket = null;
|
||||
|
||||
// NOTE: NO RESPONSE SOCKET - Async architecture via Kafka!
|
||||
// Ingestors write data to Kafka only
|
||||
// Flink processes and publishes notifications
|
||||
|
||||
this.isShutdown = false;
|
||||
this.supportedExchanges = config.supported_exchanges || ['BINANCE', 'COINBASE'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to Relay ZMQ endpoints
|
||||
*/
|
||||
async connect() {
|
||||
const { flink_hostname, ingestor_work_port } = this.config;
|
||||
|
||||
// Connect to work queue (SUB with exchange prefix filtering)
|
||||
this.workSocket = new zmq.Subscriber();
|
||||
const workEndpoint = `tcp://${flink_hostname}:${ingestor_work_port}`;
|
||||
await this.workSocket.connect(workEndpoint);
|
||||
|
||||
// Subscribe to each supported exchange prefix
|
||||
for (const exchange of this.supportedExchanges) {
|
||||
const prefix = `${exchange}:`;
|
||||
this.workSocket.subscribe(prefix);
|
||||
this.logger.info(`Subscribed to exchange prefix: ${prefix}`);
|
||||
}
|
||||
this.logger.info(`Connected to work queue at ${workEndpoint}`);
|
||||
this.logger.info('ASYNC MODE: No response socket - data flows via Kafka → Flink → pub/sub notification');
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a data request from the work queue
|
||||
* @returns {Promise<object>} Decoded DataRequest message
|
||||
*/
|
||||
async pullDataRequest() {
|
||||
if (this.isShutdown) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const frames = await this.workSocket.receive();
|
||||
this.logger.info({
|
||||
frameCount: frames.length,
|
||||
frame0Len: frames[0]?.length,
|
||||
frame1Len: frames[1]?.length,
|
||||
frame2Len: frames[2]?.length,
|
||||
frame0: frames[0]?.toString('utf8').substring(0, 50),
|
||||
frame1Hex: frames[1]?.toString('hex').substring(0, 20),
|
||||
frame2Hex: frames[2]?.toString('hex').substring(0, 20)
|
||||
}, 'Received raw ZMQ frames');
|
||||
|
||||
// First frame is the topic (exchange prefix), skip it
|
||||
// Remaining frames are: [version_frame, message_frame]
|
||||
if (frames.length < 3) {
|
||||
this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count');
|
||||
return null;
|
||||
}
|
||||
const messageFrames = frames.slice(1); // Skip topic, keep version + message
|
||||
const { version, typeId, message } = decodeMessage(messageFrames);
|
||||
this.logger.info({
|
||||
version,
|
||||
typeId: `0x${typeId.toString(16)}`,
|
||||
requestId: message.requestId,
|
||||
type: message.type,
|
||||
typeOf: typeof message.type,
|
||||
ticker: message.ticker
|
||||
}, 'Decoded data request');
|
||||
return message;
|
||||
} catch (error) {
|
||||
if (!this.isShutdown) {
|
||||
this.logger.error({ error: error.message, stack: error.stack }, 'Error receiving data request');
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Start listening for control messages in the background
|
||||
* @param {Function} handler - Callback function to handle control messages
|
||||
*
|
||||
* NOTE: Control channel not implemented yet. This is a stub for future use.
|
||||
* For now, just log and ignore.
|
||||
*/
|
||||
startControlListener(handler) {
|
||||
this.logger.info('Control channel listener stub - not implemented yet');
|
||||
// TODO: Implement control channel when needed
|
||||
// Control messages would be used for:
|
||||
// - Canceling realtime subscriptions
|
||||
// - Graceful shutdown signals
|
||||
// - Configuration updates
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown and close connections
|
||||
*/
|
||||
async shutdown() {
|
||||
this.isShutdown = true;
|
||||
this.logger.info('Shutting down ZMQ connections');
|
||||
|
||||
if (this.workSocket) {
|
||||
await this.workSocket.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user