backend redesign

This commit is contained in:
2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions

411
ingestor/src/index.js Normal file
View File

@@ -0,0 +1,411 @@
#!/usr/bin/env node
// Main ingestor worker process
import { readFileSync } from 'fs';
import { parse as parseYaml } from 'yaml';
import pino from 'pino';
import { ZmqClient } from './zmq-client.js';
import { KafkaProducer } from './kafka-producer.js';
import { CCXTFetcher } from './ccxt-fetcher.js';
import { RealtimePoller } from './realtime-poller.js';
// Logger setup
const logger = pino({
level: process.env.LOG_LEVEL || 'info',
transport: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard',
ignore: 'pid,hostname'
}
}
});
// Load configuration
function loadConfig() {
const configPath = process.env.CONFIG_PATH || '/config/config.yaml';
const secretsPath = process.env.SECRETS_PATH || '/config/secrets.yaml';
let config = {};
let secrets = {};
try {
const configFile = readFileSync(configPath, 'utf8');
config = parseYaml(configFile);
logger.info({ path: configPath }, 'Loaded configuration');
} catch (error) {
logger.warn({ error: error.message }, 'Could not load config, using defaults');
}
try {
const secretsFile = readFileSync(secretsPath, 'utf8');
secrets = parseYaml(secretsFile);
logger.info({ path: secretsPath }, 'Loaded secrets');
} catch (error) {
logger.warn({ error: error.message }, 'Could not load secrets');
}
// Merge config and secrets
return {
// Flink ZMQ endpoints
flink_hostname: config.flink_hostname || 'localhost',
ingestor_work_port: config.ingestor_work_port || 5555,
ingestor_control_port: config.ingestor_control_port || 5556,
// Kafka configuration
kafka_brokers: config.kafka_brokers || ['localhost:9092'],
kafka_topic: config.kafka_topic || 'market-0',
// Worker configuration
max_concurrent: config.max_concurrent || 10,
poll_interval_ms: config.poll_interval_ms || 10000,
...secrets
};
}
class IngestorWorker {
constructor(config, logger) {
this.config = config;
this.logger = logger;
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
this.kafkaProducer = new KafkaProducer(
config,
logger.child({ component: 'kafka' })
);
this.ccxtFetcher = new CCXTFetcher(
config,
logger.child({ component: 'ccxt' })
);
this.realtimePoller = new RealtimePoller(
this.ccxtFetcher,
this.kafkaProducer,
logger.child({ component: 'poller' })
);
// Track active requests
this.activeRequests = new Map();
this.isShutdown = false;
}
/**
* Start the ingestor worker
*/
async start() {
this.logger.info('Starting CCXT ingestor worker');
// Connect to services
await this.kafkaProducer.connect();
await this.zmqClient.connect();
// Start control message listener
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg));
// Start work loop
this.workLoop();
this.logger.info('Ingestor worker started successfully');
}
/**
* Main work loop - pull and process data requests
*/
async workLoop() {
while (!this.isShutdown) {
try {
// Check if we can handle more requests
if (this.activeRequests.size >= this.config.max_concurrent) {
await new Promise(resolve => setTimeout(resolve, 1000));
continue;
}
// Pull next data request
const request = await this.zmqClient.pullDataRequest();
if (!request) {
continue;
}
// Handle request asynchronously
this.handleDataRequest(request).catch(error => {
this.logger.error(
{ error: error.message, requestId: request.requestId },
'Error handling data request'
);
});
} catch (error) {
if (!this.isShutdown) {
this.logger.error({ error: error.message }, 'Error in work loop');
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
}
}
/**
* Handle a data request
*/
async handleDataRequest(request) {
const { requestId: request_id, type, ticker } = request;
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request');
this.activeRequests.set(request_id, request);
try {
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
if (isHistorical) {
await this.handleHistoricalRequest(request);
} else if (isRealtime) {
await this.handleRealtimeRequest(request);
} else {
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
}
} finally {
// For historical requests, remove from active requests when done
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
if (isHistorical) {
this.activeRequests.delete(request_id);
}
}
}
/**
* Handle historical OHLC request
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only.
* Flink will process from Kafka, write to Iceberg, and publish notification.
*/
async handleHistoricalRequest(request) {
const { requestId: request_id, ticker, historical, clientId: client_id } = request;
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical;
this.logger.info(
{ request_id, ticker, period_seconds, client_id },
'Processing historical OHLC request (async mode - write to Kafka only)'
);
try {
// Fetch historical data from exchange
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
ticker,
start_time,
end_time,
period_seconds,
limit
);
this.logger.info(
{ request_id, ticker, count: candles.length },
'Fetched data from exchange'
);
// Write to Kafka - THIS IS THE ONLY OUTPUT
// Flink will:
// 1. Read from Kafka
// 2. Write to Iceberg
// 3. Publish HistoryReadyNotification
// 4. Client receives notification via relay pub/sub
if (candles.length > 0) {
// Add metadata to first candle for Flink tracking
const enrichedCandles = candles.map((candle, idx) => ({
...candle,
__metadata: idx === 0 ? {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time
} : undefined
}));
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
} else {
// Write a marker message even if no data found
// Flink will see this and publish a NOT_FOUND notification
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'NOT_FOUND',
message: 'No data available for requested period'
});
}
this.logger.info(
{ request_id, ticker, count: candles.length },
'Completed historical OHLC request - data written to Kafka'
);
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
} catch (error) {
this.logger.error(
{ error: error.message, request_id, ticker },
'Failed to process historical request'
);
// Write error marker to Kafka so Flink can notify client
try {
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
request_id,
client_id,
ticker,
period_seconds,
start_time,
end_time,
status: 'ERROR',
error_message: error.message
});
} catch (kafkaError) {
this.logger.error(
{ error: kafkaError.message, request_id },
'Failed to write error marker to Kafka'
);
}
// Do not throw - request is handled, Flink will notify client of error
}
}
/**
* Handle realtime tick subscription request
*/
async handleRealtimeRequest(request) {
const { requestId: request_id, ticker } = request;
this.logger.info(
{ request_id, ticker },
'Processing realtime subscription request'
);
try {
// Start realtime polling
this.realtimePoller.startSubscription(
request_id,
ticker,
this.config.kafka_topic
);
} catch (error) {
this.logger.error(
{ error: error.message, request_id, ticker },
'Failed to start realtime subscription'
);
this.activeRequests.delete(request_id);
throw error;
}
}
/**
* Handle control messages from Flink
*/
async handleControlMessage(message) {
const { action, requestId: request_id } = message;
this.logger.info({ action, request_id }, 'Received control message');
switch (action) {
case 'CANCEL':
if (request_id) {
// Cancel specific request
this.realtimePoller.cancelSubscription(request_id);
this.activeRequests.delete(request_id);
}
break;
case 'SHUTDOWN':
this.logger.info('Received shutdown signal');
await this.shutdown();
break;
case 'CONFIG_UPDATE':
// Handle config update if needed
this.logger.info('Received config update');
break;
case 'HEARTBEAT':
// Just acknowledge heartbeat
break;
default:
this.logger.warn({ action }, 'Unknown control action');
}
}
/**
* Get worker status
*/
getStatus() {
return {
activeRequests: this.activeRequests.size,
maxConcurrent: this.config.max_concurrent,
pollerStats: this.realtimePoller.getStats()
};
}
/**
* Shutdown worker gracefully
*/
async shutdown() {
if (this.isShutdown) {
return;
}
this.isShutdown = true;
this.logger.info('Shutting down ingestor worker');
// Stop polling
this.realtimePoller.shutdown();
// Close connections
await this.ccxtFetcher.close();
await this.kafkaProducer.disconnect();
await this.zmqClient.shutdown();
this.logger.info('Ingestor worker shutdown complete');
process.exit(0);
}
}
// Main entry point
async function main() {
const config = loadConfig();
const worker = new IngestorWorker(config, logger);
// Handle shutdown signals
process.on('SIGINT', () => worker.shutdown());
process.on('SIGTERM', () => worker.shutdown());
// Handle errors
process.on('uncaughtException', error => {
logger.error({ error }, 'Uncaught exception');
worker.shutdown();
});
process.on('unhandledRejection', (reason, promise) => {
logger.error({ reason }, 'Unhandled rejection');
});
// Start worker
await worker.start();
// Log status periodically
setInterval(() => {
const status = worker.getStatus();
logger.info({ status }, 'Worker status');
}, 60000);
}
// Run
main().catch(error => {
logger.error({ error }, 'Fatal error');
process.exit(1);
});