backend redesign
This commit is contained in:
411
ingestor/src/index.js
Normal file
411
ingestor/src/index.js
Normal file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Main ingestor worker process
|
||||
import { readFileSync } from 'fs';
|
||||
import { parse as parseYaml } from 'yaml';
|
||||
import pino from 'pino';
|
||||
import { ZmqClient } from './zmq-client.js';
|
||||
import { KafkaProducer } from './kafka-producer.js';
|
||||
import { CCXTFetcher } from './ccxt-fetcher.js';
|
||||
import { RealtimePoller } from './realtime-poller.js';
|
||||
|
||||
// Logger setup
|
||||
const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
transport: {
|
||||
target: 'pino-pretty',
|
||||
options: {
|
||||
colorize: true,
|
||||
translateTime: 'SYS:standard',
|
||||
ignore: 'pid,hostname'
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Load configuration
|
||||
function loadConfig() {
|
||||
const configPath = process.env.CONFIG_PATH || '/config/config.yaml';
|
||||
const secretsPath = process.env.SECRETS_PATH || '/config/secrets.yaml';
|
||||
|
||||
let config = {};
|
||||
let secrets = {};
|
||||
|
||||
try {
|
||||
const configFile = readFileSync(configPath, 'utf8');
|
||||
config = parseYaml(configFile);
|
||||
logger.info({ path: configPath }, 'Loaded configuration');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load config, using defaults');
|
||||
}
|
||||
|
||||
try {
|
||||
const secretsFile = readFileSync(secretsPath, 'utf8');
|
||||
secrets = parseYaml(secretsFile);
|
||||
logger.info({ path: secretsPath }, 'Loaded secrets');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load secrets');
|
||||
}
|
||||
|
||||
// Merge config and secrets
|
||||
return {
|
||||
// Flink ZMQ endpoints
|
||||
flink_hostname: config.flink_hostname || 'localhost',
|
||||
ingestor_work_port: config.ingestor_work_port || 5555,
|
||||
ingestor_control_port: config.ingestor_control_port || 5556,
|
||||
|
||||
// Kafka configuration
|
||||
kafka_brokers: config.kafka_brokers || ['localhost:9092'],
|
||||
kafka_topic: config.kafka_topic || 'market-0',
|
||||
|
||||
// Worker configuration
|
||||
max_concurrent: config.max_concurrent || 10,
|
||||
poll_interval_ms: config.poll_interval_ms || 10000,
|
||||
|
||||
...secrets
|
||||
};
|
||||
}
|
||||
|
||||
class IngestorWorker {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
|
||||
this.kafkaProducer = new KafkaProducer(
|
||||
config,
|
||||
logger.child({ component: 'kafka' })
|
||||
);
|
||||
this.ccxtFetcher = new CCXTFetcher(
|
||||
config,
|
||||
logger.child({ component: 'ccxt' })
|
||||
);
|
||||
this.realtimePoller = new RealtimePoller(
|
||||
this.ccxtFetcher,
|
||||
this.kafkaProducer,
|
||||
logger.child({ component: 'poller' })
|
||||
);
|
||||
|
||||
// Track active requests
|
||||
this.activeRequests = new Map();
|
||||
this.isShutdown = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the ingestor worker
|
||||
*/
|
||||
async start() {
|
||||
this.logger.info('Starting CCXT ingestor worker');
|
||||
|
||||
// Connect to services
|
||||
await this.kafkaProducer.connect();
|
||||
await this.zmqClient.connect();
|
||||
|
||||
// Start control message listener
|
||||
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg));
|
||||
|
||||
// Start work loop
|
||||
this.workLoop();
|
||||
|
||||
this.logger.info('Ingestor worker started successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Main work loop - pull and process data requests
|
||||
*/
|
||||
async workLoop() {
|
||||
while (!this.isShutdown) {
|
||||
try {
|
||||
// Check if we can handle more requests
|
||||
if (this.activeRequests.size >= this.config.max_concurrent) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull next data request
|
||||
const request = await this.zmqClient.pullDataRequest();
|
||||
if (!request) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle request asynchronously
|
||||
this.handleDataRequest(request).catch(error => {
|
||||
this.logger.error(
|
||||
{ error: error.message, requestId: request.requestId },
|
||||
'Error handling data request'
|
||||
);
|
||||
});
|
||||
} catch (error) {
|
||||
if (!this.isShutdown) {
|
||||
this.logger.error({ error: error.message }, 'Error in work loop');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a data request
|
||||
*/
|
||||
async handleDataRequest(request) {
|
||||
const { requestId: request_id, type, ticker } = request;
|
||||
|
||||
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request');
|
||||
|
||||
this.activeRequests.set(request_id, request);
|
||||
|
||||
try {
|
||||
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
|
||||
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
||||
|
||||
if (isHistorical) {
|
||||
await this.handleHistoricalRequest(request);
|
||||
} else if (isRealtime) {
|
||||
await this.handleRealtimeRequest(request);
|
||||
} else {
|
||||
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
|
||||
}
|
||||
} finally {
|
||||
// For historical requests, remove from active requests when done
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
if (isHistorical) {
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle historical OHLC request
|
||||
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only.
|
||||
* Flink will process from Kafka, write to Iceberg, and publish notification.
|
||||
*/
|
||||
async handleHistoricalRequest(request) {
|
||||
const { requestId: request_id, ticker, historical, clientId: client_id } = request;
|
||||
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, period_seconds, client_id },
|
||||
'Processing historical OHLC request (async mode - write to Kafka only)'
|
||||
);
|
||||
|
||||
try {
|
||||
// Fetch historical data from exchange
|
||||
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
||||
ticker,
|
||||
start_time,
|
||||
end_time,
|
||||
period_seconds,
|
||||
limit
|
||||
);
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Fetched data from exchange'
|
||||
);
|
||||
|
||||
// Write to Kafka - THIS IS THE ONLY OUTPUT
|
||||
// Flink will:
|
||||
// 1. Read from Kafka
|
||||
// 2. Write to Iceberg
|
||||
// 3. Publish HistoryReadyNotification
|
||||
// 4. Client receives notification via relay pub/sub
|
||||
if (candles.length > 0) {
|
||||
// Add metadata to first candle for Flink tracking
|
||||
const enrichedCandles = candles.map((candle, idx) => ({
|
||||
...candle,
|
||||
__metadata: idx === 0 ? {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time
|
||||
} : undefined
|
||||
}));
|
||||
|
||||
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
|
||||
} else {
|
||||
// Write a marker message even if no data found
|
||||
// Flink will see this and publish a NOT_FOUND notification
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'NOT_FOUND',
|
||||
message: 'No data available for requested period'
|
||||
});
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Completed historical OHLC request - data written to Kafka'
|
||||
);
|
||||
|
||||
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to process historical request'
|
||||
);
|
||||
|
||||
// Write error marker to Kafka so Flink can notify client
|
||||
try {
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'ERROR',
|
||||
error_message: error.message
|
||||
});
|
||||
} catch (kafkaError) {
|
||||
this.logger.error(
|
||||
{ error: kafkaError.message, request_id },
|
||||
'Failed to write error marker to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
// Do not throw - request is handled, Flink will notify client of error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle realtime tick subscription request
|
||||
*/
|
||||
async handleRealtimeRequest(request) {
|
||||
const { requestId: request_id, ticker } = request;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker },
|
||||
'Processing realtime subscription request'
|
||||
);
|
||||
|
||||
try {
|
||||
// Start realtime polling
|
||||
this.realtimePoller.startSubscription(
|
||||
request_id,
|
||||
ticker,
|
||||
this.config.kafka_topic
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to start realtime subscription'
|
||||
);
|
||||
this.activeRequests.delete(request_id);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle control messages from Flink
|
||||
*/
|
||||
async handleControlMessage(message) {
|
||||
const { action, requestId: request_id } = message;
|
||||
|
||||
this.logger.info({ action, request_id }, 'Received control message');
|
||||
|
||||
switch (action) {
|
||||
case 'CANCEL':
|
||||
if (request_id) {
|
||||
// Cancel specific request
|
||||
this.realtimePoller.cancelSubscription(request_id);
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'SHUTDOWN':
|
||||
this.logger.info('Received shutdown signal');
|
||||
await this.shutdown();
|
||||
break;
|
||||
|
||||
case 'CONFIG_UPDATE':
|
||||
// Handle config update if needed
|
||||
this.logger.info('Received config update');
|
||||
break;
|
||||
|
||||
case 'HEARTBEAT':
|
||||
// Just acknowledge heartbeat
|
||||
break;
|
||||
|
||||
default:
|
||||
this.logger.warn({ action }, 'Unknown control action');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get worker status
|
||||
*/
|
||||
getStatus() {
|
||||
return {
|
||||
activeRequests: this.activeRequests.size,
|
||||
maxConcurrent: this.config.max_concurrent,
|
||||
pollerStats: this.realtimePoller.getStats()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown worker gracefully
|
||||
*/
|
||||
async shutdown() {
|
||||
if (this.isShutdown) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isShutdown = true;
|
||||
this.logger.info('Shutting down ingestor worker');
|
||||
|
||||
// Stop polling
|
||||
this.realtimePoller.shutdown();
|
||||
|
||||
// Close connections
|
||||
await this.ccxtFetcher.close();
|
||||
await this.kafkaProducer.disconnect();
|
||||
await this.zmqClient.shutdown();
|
||||
|
||||
this.logger.info('Ingestor worker shutdown complete');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
const worker = new IngestorWorker(config, logger);
|
||||
|
||||
// Handle shutdown signals
|
||||
process.on('SIGINT', () => worker.shutdown());
|
||||
process.on('SIGTERM', () => worker.shutdown());
|
||||
|
||||
// Handle errors
|
||||
process.on('uncaughtException', error => {
|
||||
logger.error({ error }, 'Uncaught exception');
|
||||
worker.shutdown();
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error({ reason }, 'Unhandled rejection');
|
||||
});
|
||||
|
||||
// Start worker
|
||||
await worker.start();
|
||||
|
||||
// Log status periodically
|
||||
setInterval(() => {
|
||||
const status = worker.getStatus();
|
||||
logger.info({ status }, 'Worker status');
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch(error => {
|
||||
logger.error({ error }, 'Fatal error');
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user