backend redesign
This commit is contained in:
8
ingestor/.gitignore
vendored
Normal file
8
ingestor/.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
node_modules/
|
||||
config.yaml
|
||||
secrets.yaml
|
||||
*.log
|
||||
.env
|
||||
.DS_Store
|
||||
src/proto/
|
||||
protobuf/
|
||||
30
ingestor/Dockerfile
Normal file
30
ingestor/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
FROM node:20-alpine
|
||||
|
||||
# Install protobuf compiler
|
||||
RUN apk add --no-cache protobuf protobuf-dev build-base python3
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm install
|
||||
|
||||
# Copy source code
|
||||
COPY src ./src/
|
||||
COPY protobuf ./protobuf/
|
||||
|
||||
# Compile protobufs (if using proto:compile script)
|
||||
# RUN npm run proto:compile
|
||||
|
||||
# Create config directory
|
||||
RUN mkdir -p /config
|
||||
|
||||
# Set environment variables
|
||||
ENV CONFIG_PATH=/config/config.yaml
|
||||
ENV SECRETS_PATH=/config/secrets.yaml
|
||||
ENV NODE_ENV=production
|
||||
|
||||
# Run the ingestor
|
||||
CMD ["node", "src/index.js"]
|
||||
226
ingestor/README.md
Normal file
226
ingestor/README.md
Normal file
@@ -0,0 +1,226 @@
|
||||
# CCXT Market Data Ingestor
|
||||
|
||||
A NodeJS-based market data ingestor that uses CCXT to fetch historical OHLC data and realtime tick data from cryptocurrency exchanges. Integrates with Apache Flink via ZeroMQ for work distribution and writes data to Kafka.
|
||||
|
||||
## Architecture
|
||||
|
||||
The ingestor is a worker process that:
|
||||
1. Connects to Flink's ZMQ work queue (PULL socket) to receive data requests
|
||||
2. Connects to Flink's ZMQ control channel (SUB socket) to receive control messages
|
||||
3. Fetches market data from exchanges using CCXT
|
||||
4. Writes data to Kafka using the protobuf protocol
|
||||
|
||||
### Data Request Types
|
||||
|
||||
#### Historical OHLC
|
||||
- Fetches historical candlestick data for a specified time range
|
||||
- Uses CCXT's `fetchOHLCV` method
|
||||
- Writes OHLC messages to Kafka
|
||||
- Request is completed and removed from queue after processing
|
||||
|
||||
#### Realtime Ticks
|
||||
- Subscribes to realtime trade data
|
||||
- Uses 10-second polling to fetch recent trades via `fetchTrades`
|
||||
- Writes Tick messages to Kafka `market-0` topic
|
||||
- Subscription persists until cancelled by Flink control message
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Create `config.yaml` based on `config.example.yaml`:
|
||||
|
||||
```yaml
|
||||
# Flink ZMQ endpoints
|
||||
flink_hostname: localhost
|
||||
ingestor_work_port: 5555
|
||||
ingestor_control_port: 5556
|
||||
|
||||
# Kafka configuration
|
||||
kafka_brokers:
|
||||
- localhost:9092
|
||||
kafka_topic: market-0
|
||||
|
||||
# Worker configuration
|
||||
max_concurrent: 10
|
||||
poll_interval_ms: 10000
|
||||
```
|
||||
|
||||
Optional `secrets.yaml` for sensitive configuration.
|
||||
|
||||
## Usage
|
||||
|
||||
### Development
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Production
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
docker build -t ccxt-ingestor .
|
||||
docker run -v /path/to/config:/config ccxt-ingestor
|
||||
```
|
||||
|
||||
## Ticker Format
|
||||
|
||||
Tickers must be in the format: `EXCHANGE:SYMBOL`
|
||||
|
||||
Examples:
|
||||
- `BINANCE:BTC/USDT`
|
||||
- `COINBASE:ETH/USD`
|
||||
- `KRAKEN:XRP/EUR`
|
||||
|
||||
## Protocol
|
||||
|
||||
### ZeroMQ Message Format
|
||||
|
||||
All messages use a two-frame envelope:
|
||||
```
|
||||
Frame 1: [1 byte: protocol version = 0x01]
|
||||
Frame 2: [1 byte: message type ID][N bytes: protobuf message]
|
||||
```
|
||||
|
||||
### Message Type IDs
|
||||
- `0x01`: DataRequest
|
||||
- `0x02`: IngestorControl
|
||||
- `0x03`: Tick
|
||||
- `0x04`: OHLC
|
||||
|
||||
### DataRequest (from Flink)
|
||||
|
||||
```protobuf
|
||||
message DataRequest {
|
||||
string request_id = 1;
|
||||
RequestType type = 2; // HISTORICAL_OHLC or REALTIME_TICKS
|
||||
string ticker = 3;
|
||||
optional HistoricalParams historical = 4;
|
||||
optional RealtimeParams realtime = 5;
|
||||
}
|
||||
```
|
||||
|
||||
### IngestorControl (from Flink)
|
||||
|
||||
```protobuf
|
||||
message IngestorControl {
|
||||
ControlAction action = 1; // CANCEL, SHUTDOWN, CONFIG_UPDATE, HEARTBEAT
|
||||
optional string request_id = 2;
|
||||
optional IngestorConfig config = 3;
|
||||
}
|
||||
```
|
||||
|
||||
### Tick (to Kafka)
|
||||
|
||||
```protobuf
|
||||
message Tick {
|
||||
string trade_id = 1;
|
||||
string ticker = 2;
|
||||
uint64 timestamp = 3; // microseconds
|
||||
int64 price = 4; // fixed-point (10^8)
|
||||
int64 amount = 5; // fixed-point (10^8)
|
||||
int64 quote_amount = 6; // fixed-point (10^8)
|
||||
bool taker_buy = 7;
|
||||
}
|
||||
```
|
||||
|
||||
### OHLC (to Kafka)
|
||||
|
||||
```protobuf
|
||||
message OHLC {
|
||||
int64 open = 2; // fixed-point (10^8)
|
||||
int64 high = 3;
|
||||
int64 low = 4;
|
||||
int64 close = 5;
|
||||
optional int64 volume = 6;
|
||||
optional int64 open_time = 9; // microseconds
|
||||
optional int64 close_time = 12;
|
||||
string ticker = 14;
|
||||
}
|
||||
```
|
||||
|
||||
## Fixed-Point Encoding
|
||||
|
||||
All prices and amounts are encoded as fixed-point integers using 8 decimal places (denominator = 10^8):
|
||||
- Example: 123.45678901 → 12345678901
|
||||
- This provides precision while avoiding floating-point errors
|
||||
|
||||
## Components
|
||||
|
||||
### `src/index.js`
|
||||
Main worker process that coordinates all components and handles the work loop.
|
||||
|
||||
### `src/zmq-client.js`
|
||||
ZeroMQ client for connecting to Flink's work queue and control channel.
|
||||
|
||||
### `src/kafka-producer.js`
|
||||
Kafka producer for writing protobuf-encoded messages to Kafka topics.
|
||||
|
||||
### `src/ccxt-fetcher.js`
|
||||
CCXT wrapper for fetching historical OHLC and recent trades from exchanges.
|
||||
|
||||
### `src/realtime-poller.js`
|
||||
Manages realtime subscriptions with 10-second polling for trade updates.
|
||||
|
||||
### `src/proto/messages.js`
|
||||
Protobuf message definitions and encoding/decoding utilities.
|
||||
|
||||
## Error Handling
|
||||
|
||||
- Failed requests automatically return to the Flink work queue
|
||||
- Realtime subscriptions are cancelled after 5 consecutive errors
|
||||
- Worker logs all errors with context for debugging
|
||||
- Graceful shutdown on SIGINT/SIGTERM
|
||||
|
||||
## Monitoring
|
||||
|
||||
The worker logs status information every 60 seconds including:
|
||||
- Number of active requests
|
||||
- Realtime subscription statistics
|
||||
- Error counts
|
||||
|
||||
## Environment Variables
|
||||
|
||||
- `CONFIG_PATH`: Path to config.yaml (default: `/config/config.yaml`)
|
||||
- `SECRETS_PATH`: Path to secrets.yaml (default: `/config/secrets.yaml`)
|
||||
- `LOG_LEVEL`: Log level (default: `info`)
|
||||
|
||||
## Supported Exchanges
|
||||
|
||||
All exchanges supported by CCXT can be used. Popular exchanges include:
|
||||
- Binance
|
||||
- Coinbase
|
||||
- Kraken
|
||||
- Bitfinex
|
||||
- Huobi
|
||||
- And 100+ more
|
||||
|
||||
## Development
|
||||
|
||||
### Project Structure
|
||||
```
|
||||
redesign/ingestor/
|
||||
├── src/
|
||||
│ ├── index.js # Main worker
|
||||
│ ├── zmq-client.js # ZMQ client
|
||||
│ ├── kafka-producer.js # Kafka producer
|
||||
│ ├── ccxt-fetcher.js # CCXT wrapper
|
||||
│ ├── realtime-poller.js # Realtime poller
|
||||
│ └── proto/
|
||||
│ └── messages.js # Protobuf definitions
|
||||
├── config.example.yaml
|
||||
├── Dockerfile
|
||||
├── package.json
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
ISC
|
||||
24
ingestor/config.example.yaml
Normal file
24
ingestor/config.example.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# CCXT Ingestor Configuration
|
||||
|
||||
# Relay ZMQ endpoints (relay is the well-known gateway)
|
||||
flink_hostname: relay
|
||||
ingestor_work_port: 5555 # SUB - receives DataRequest with exchange prefix
|
||||
# Note: No response port needed - async architecture via Kafka!
|
||||
|
||||
# Supported exchanges (subscribe to these prefixes)
|
||||
supported_exchanges:
|
||||
- BINANCE
|
||||
- COINBASE
|
||||
- KRAKEN
|
||||
|
||||
# Kafka configuration
|
||||
kafka_brokers:
|
||||
- kafka:29092
|
||||
kafka_topic: market-0
|
||||
|
||||
# Worker configuration
|
||||
max_concurrent: 10
|
||||
poll_interval_ms: 10000
|
||||
|
||||
# Logging
|
||||
log_level: info
|
||||
33
ingestor/package.json
Normal file
33
ingestor/package.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "@dexorder/ccxt-ingestor",
|
||||
"version": "1.0.0",
|
||||
"description": "CCXT-based market data ingestor for Flink processing pipeline",
|
||||
"main": "src/index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "node src/index.js",
|
||||
"dev": "node --watch src/index.js",
|
||||
"proto:compile": "mkdir -p src/proto && protoc --js_out=import_style=commonjs,binary:src/proto --proto_path=../protobuf ../protobuf/*.proto"
|
||||
},
|
||||
"keywords": [
|
||||
"ccxt",
|
||||
"kafka",
|
||||
"zeromq",
|
||||
"market-data",
|
||||
"ingestor"
|
||||
],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"ccxt": "^4.4.0",
|
||||
"google-protobuf": "^3.21.4",
|
||||
"kafkajs": "^2.2.4",
|
||||
"pino": "^9.0.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"yaml": "^2.5.0",
|
||||
"zeromq": "^6.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"protobufjs": "^7.4.0"
|
||||
}
|
||||
}
|
||||
248
ingestor/src/ccxt-fetcher.js
Normal file
248
ingestor/src/ccxt-fetcher.js
Normal file
@@ -0,0 +1,248 @@
|
||||
// CCXT data fetcher for historical OHLC and realtime ticks
|
||||
import ccxt from 'ccxt';
|
||||
|
||||
export class CCXTFetcher {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
this.exchanges = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse ticker string to exchange and symbol
|
||||
* Expected format: "EXCHANGE:SYMBOL" (e.g., "BINANCE:BTC/USDT")
|
||||
*/
|
||||
parseTicker(ticker) {
|
||||
const parts = ticker.split(':');
|
||||
if (parts.length !== 2) {
|
||||
throw new Error(`Invalid ticker format: ${ticker}. Expected "EXCHANGE:SYMBOL"`);
|
||||
}
|
||||
|
||||
return {
|
||||
exchange: parts[0].toLowerCase(),
|
||||
symbol: parts[1]
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create CCXT exchange instance
|
||||
*/
|
||||
getExchange(exchangeName) {
|
||||
if (this.exchanges.has(exchangeName)) {
|
||||
return this.exchanges.get(exchangeName);
|
||||
}
|
||||
|
||||
// Create exchange instance
|
||||
const ExchangeClass = ccxt[exchangeName];
|
||||
if (!ExchangeClass) {
|
||||
throw new Error(`Unsupported exchange: ${exchangeName}`);
|
||||
}
|
||||
|
||||
const exchange = new ExchangeClass({
|
||||
enableRateLimit: true,
|
||||
options: {
|
||||
defaultType: 'spot'
|
||||
}
|
||||
});
|
||||
|
||||
this.exchanges.set(exchangeName, exchange);
|
||||
this.logger.info({ exchange: exchangeName }, 'Created CCXT exchange instance');
|
||||
|
||||
return exchange;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch historical OHLC data
|
||||
* @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL"
|
||||
* @param {string} startTime - Start time in microseconds
|
||||
* @param {string} endTime - End time in microseconds
|
||||
* @param {number} periodSeconds - OHLC period in seconds
|
||||
* @param {number} limit - Optional limit on number of candles
|
||||
* @returns {Promise<Array>} Array of OHLC candles
|
||||
*/
|
||||
async fetchHistoricalOHLC(ticker, startTime, endTime, periodSeconds, limit) {
|
||||
const { exchange: exchangeName, symbol } = this.parseTicker(ticker);
|
||||
const exchange = this.getExchange(exchangeName);
|
||||
|
||||
// Convert microseconds to milliseconds
|
||||
const startMs = Math.floor(parseInt(startTime) / 1000);
|
||||
const endMs = Math.floor(parseInt(endTime) / 1000);
|
||||
|
||||
// Map period seconds to CCXT timeframe
|
||||
const timeframe = this.secondsToTimeframe(periodSeconds);
|
||||
|
||||
this.logger.info(
|
||||
{ ticker, timeframe, startMs, endMs, limit },
|
||||
'Fetching historical OHLC'
|
||||
);
|
||||
|
||||
const allCandles = [];
|
||||
let since = startMs;
|
||||
|
||||
// CCXT typically limits to 1000 candles per request
|
||||
const batchSize = limit || 1000;
|
||||
|
||||
while (since < endMs) {
|
||||
try {
|
||||
const candles = await exchange.fetchOHLCV(
|
||||
symbol,
|
||||
timeframe,
|
||||
since,
|
||||
batchSize
|
||||
);
|
||||
|
||||
if (candles.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Filter candles within the time range
|
||||
const filteredCandles = candles.filter(c => {
|
||||
const timestamp = c[0];
|
||||
return timestamp >= startMs && timestamp <= endMs;
|
||||
});
|
||||
|
||||
allCandles.push(...filteredCandles);
|
||||
|
||||
// Move to next batch
|
||||
const lastTimestamp = candles[candles.length - 1][0];
|
||||
since = lastTimestamp + (periodSeconds * 1000);
|
||||
|
||||
// Break if we've reached the end time or limit
|
||||
if (since >= endMs || (limit && allCandles.length >= limit)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Apply rate limiting
|
||||
await exchange.sleep(exchange.rateLimit);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, ticker, since },
|
||||
'Error fetching OHLC'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to our OHLC format
|
||||
return allCandles.map(candle => this.convertToOHLC(candle, ticker, periodSeconds));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch recent trades for realtime tick data
|
||||
* @param {string} ticker - Ticker in format "EXCHANGE:SYMBOL"
|
||||
* @param {string} since - Optional timestamp in microseconds to fetch from
|
||||
* @returns {Promise<Array>} Array of trade ticks
|
||||
*/
|
||||
async fetchRecentTrades(ticker, since = null) {
|
||||
const { exchange: exchangeName, symbol } = this.parseTicker(ticker);
|
||||
const exchange = this.getExchange(exchangeName);
|
||||
|
||||
try {
|
||||
// Convert microseconds to milliseconds if provided
|
||||
const sinceMs = since ? Math.floor(parseInt(since) / 1000) : undefined;
|
||||
|
||||
const trades = await exchange.fetchTrades(symbol, sinceMs, 1000);
|
||||
|
||||
this.logger.debug(
|
||||
{ ticker, count: trades.length },
|
||||
'Fetched recent trades'
|
||||
);
|
||||
|
||||
// Convert to our Tick format
|
||||
return trades.map(trade => this.convertToTick(trade, ticker));
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, ticker },
|
||||
'Error fetching trades'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert CCXT OHLCV array to our OHLC format
|
||||
* CCXT format: [timestamp, open, high, low, close, volume]
|
||||
*/
|
||||
convertToOHLC(candle, ticker, periodSeconds) {
|
||||
const [timestamp, open, high, low, close, volume] = candle;
|
||||
|
||||
// Convert to fixed-point integers (using 8 decimal places = 10^8)
|
||||
const DENOM = 100000000;
|
||||
|
||||
return {
|
||||
ticker,
|
||||
timestamp: (timestamp * 1000).toString(), // Convert ms to microseconds
|
||||
open: Math.round(open * DENOM).toString(),
|
||||
high: Math.round(high * DENOM).toString(),
|
||||
low: Math.round(low * DENOM).toString(),
|
||||
close: Math.round(close * DENOM).toString(),
|
||||
volume: Math.round(volume * DENOM).toString(),
|
||||
open_time: (timestamp * 1000).toString(),
|
||||
close_time: ((timestamp + periodSeconds * 1000) * 1000).toString()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert CCXT trade to our Tick format
|
||||
*/
|
||||
convertToTick(trade, ticker) {
|
||||
// Convert to fixed-point integers (using 8 decimal places = 10^8)
|
||||
const DENOM = 100000000;
|
||||
|
||||
const price = Math.round(trade.price * DENOM);
|
||||
const amount = Math.round(trade.amount * DENOM);
|
||||
const quoteAmount = Math.round((trade.price * trade.amount) * DENOM);
|
||||
|
||||
return {
|
||||
trade_id: trade.id || `${trade.timestamp}`,
|
||||
ticker,
|
||||
timestamp: (trade.timestamp * 1000).toString(), // Convert ms to microseconds
|
||||
price: price.toString(),
|
||||
amount: amount.toString(),
|
||||
quote_amount: quoteAmount.toString(),
|
||||
taker_buy: trade.side === 'buy',
|
||||
sequence: trade.order ? trade.order.toString() : undefined
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert period seconds to CCXT timeframe string
|
||||
*/
|
||||
secondsToTimeframe(seconds) {
|
||||
const timeframes = {
|
||||
60: '1m',
|
||||
300: '5m',
|
||||
900: '15m',
|
||||
1800: '30m',
|
||||
3600: '1h',
|
||||
7200: '2h',
|
||||
14400: '4h',
|
||||
21600: '6h',
|
||||
28800: '8h',
|
||||
43200: '12h',
|
||||
86400: '1d',
|
||||
259200: '3d',
|
||||
604800: '1w',
|
||||
2592000: '1M'
|
||||
};
|
||||
|
||||
const timeframe = timeframes[seconds];
|
||||
if (!timeframe) {
|
||||
throw new Error(`Unsupported period: ${seconds} seconds`);
|
||||
}
|
||||
|
||||
return timeframe;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all exchange connections
|
||||
*/
|
||||
async close() {
|
||||
for (const [name, exchange] of this.exchanges) {
|
||||
if (exchange.close) {
|
||||
await exchange.close();
|
||||
}
|
||||
}
|
||||
this.exchanges.clear();
|
||||
}
|
||||
}
|
||||
411
ingestor/src/index.js
Normal file
411
ingestor/src/index.js
Normal file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Main ingestor worker process
|
||||
import { readFileSync } from 'fs';
|
||||
import { parse as parseYaml } from 'yaml';
|
||||
import pino from 'pino';
|
||||
import { ZmqClient } from './zmq-client.js';
|
||||
import { KafkaProducer } from './kafka-producer.js';
|
||||
import { CCXTFetcher } from './ccxt-fetcher.js';
|
||||
import { RealtimePoller } from './realtime-poller.js';
|
||||
|
||||
// Logger setup
|
||||
const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
transport: {
|
||||
target: 'pino-pretty',
|
||||
options: {
|
||||
colorize: true,
|
||||
translateTime: 'SYS:standard',
|
||||
ignore: 'pid,hostname'
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Load configuration
|
||||
function loadConfig() {
|
||||
const configPath = process.env.CONFIG_PATH || '/config/config.yaml';
|
||||
const secretsPath = process.env.SECRETS_PATH || '/config/secrets.yaml';
|
||||
|
||||
let config = {};
|
||||
let secrets = {};
|
||||
|
||||
try {
|
||||
const configFile = readFileSync(configPath, 'utf8');
|
||||
config = parseYaml(configFile);
|
||||
logger.info({ path: configPath }, 'Loaded configuration');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load config, using defaults');
|
||||
}
|
||||
|
||||
try {
|
||||
const secretsFile = readFileSync(secretsPath, 'utf8');
|
||||
secrets = parseYaml(secretsFile);
|
||||
logger.info({ path: secretsPath }, 'Loaded secrets');
|
||||
} catch (error) {
|
||||
logger.warn({ error: error.message }, 'Could not load secrets');
|
||||
}
|
||||
|
||||
// Merge config and secrets
|
||||
return {
|
||||
// Flink ZMQ endpoints
|
||||
flink_hostname: config.flink_hostname || 'localhost',
|
||||
ingestor_work_port: config.ingestor_work_port || 5555,
|
||||
ingestor_control_port: config.ingestor_control_port || 5556,
|
||||
|
||||
// Kafka configuration
|
||||
kafka_brokers: config.kafka_brokers || ['localhost:9092'],
|
||||
kafka_topic: config.kafka_topic || 'market-0',
|
||||
|
||||
// Worker configuration
|
||||
max_concurrent: config.max_concurrent || 10,
|
||||
poll_interval_ms: config.poll_interval_ms || 10000,
|
||||
|
||||
...secrets
|
||||
};
|
||||
}
|
||||
|
||||
class IngestorWorker {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
this.zmqClient = new ZmqClient(config, logger.child({ component: 'zmq' }));
|
||||
this.kafkaProducer = new KafkaProducer(
|
||||
config,
|
||||
logger.child({ component: 'kafka' })
|
||||
);
|
||||
this.ccxtFetcher = new CCXTFetcher(
|
||||
config,
|
||||
logger.child({ component: 'ccxt' })
|
||||
);
|
||||
this.realtimePoller = new RealtimePoller(
|
||||
this.ccxtFetcher,
|
||||
this.kafkaProducer,
|
||||
logger.child({ component: 'poller' })
|
||||
);
|
||||
|
||||
// Track active requests
|
||||
this.activeRequests = new Map();
|
||||
this.isShutdown = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the ingestor worker
|
||||
*/
|
||||
async start() {
|
||||
this.logger.info('Starting CCXT ingestor worker');
|
||||
|
||||
// Connect to services
|
||||
await this.kafkaProducer.connect();
|
||||
await this.zmqClient.connect();
|
||||
|
||||
// Start control message listener
|
||||
this.zmqClient.startControlListener(msg => this.handleControlMessage(msg));
|
||||
|
||||
// Start work loop
|
||||
this.workLoop();
|
||||
|
||||
this.logger.info('Ingestor worker started successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Main work loop - pull and process data requests
|
||||
*/
|
||||
async workLoop() {
|
||||
while (!this.isShutdown) {
|
||||
try {
|
||||
// Check if we can handle more requests
|
||||
if (this.activeRequests.size >= this.config.max_concurrent) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull next data request
|
||||
const request = await this.zmqClient.pullDataRequest();
|
||||
if (!request) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle request asynchronously
|
||||
this.handleDataRequest(request).catch(error => {
|
||||
this.logger.error(
|
||||
{ error: error.message, requestId: request.requestId },
|
||||
'Error handling data request'
|
||||
);
|
||||
});
|
||||
} catch (error) {
|
||||
if (!this.isShutdown) {
|
||||
this.logger.error({ error: error.message }, 'Error in work loop');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a data request
|
||||
*/
|
||||
async handleDataRequest(request) {
|
||||
const { requestId: request_id, type, ticker } = request;
|
||||
|
||||
this.logger.info({ request_id, type, ticker, fullRequest: request }, 'Handling data request');
|
||||
|
||||
this.activeRequests.set(request_id, request);
|
||||
|
||||
try {
|
||||
// HISTORICAL_OHLC = 0 is the proto3 default and is omitted from the wire,
|
||||
// so protobufjs decodes it as undefined. Treat undefined as HISTORICAL_OHLC.
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
const isRealtime = type === 'REALTIME_TICKS' || type === 1;
|
||||
|
||||
if (isHistorical) {
|
||||
await this.handleHistoricalRequest(request);
|
||||
} else if (isRealtime) {
|
||||
await this.handleRealtimeRequest(request);
|
||||
} else {
|
||||
this.logger.warn({ request_id, type, typeOf: typeof type, fullRequest: request }, 'Unknown request type');
|
||||
}
|
||||
} finally {
|
||||
// For historical requests, remove from active requests when done
|
||||
const isHistorical = type === undefined || type === 'HISTORICAL_OHLC' || type === 0;
|
||||
if (isHistorical) {
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle historical OHLC request
|
||||
* ASYNC ARCHITECTURE: No response sent back. Data written to Kafka only.
|
||||
* Flink will process from Kafka, write to Iceberg, and publish notification.
|
||||
*/
|
||||
async handleHistoricalRequest(request) {
|
||||
const { requestId: request_id, ticker, historical, clientId: client_id } = request;
|
||||
const { startTime: start_time, endTime: end_time, periodSeconds: period_seconds, limit } = historical;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, period_seconds, client_id },
|
||||
'Processing historical OHLC request (async mode - write to Kafka only)'
|
||||
);
|
||||
|
||||
try {
|
||||
// Fetch historical data from exchange
|
||||
const candles = await this.ccxtFetcher.fetchHistoricalOHLC(
|
||||
ticker,
|
||||
start_time,
|
||||
end_time,
|
||||
period_seconds,
|
||||
limit
|
||||
);
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Fetched data from exchange'
|
||||
);
|
||||
|
||||
// Write to Kafka - THIS IS THE ONLY OUTPUT
|
||||
// Flink will:
|
||||
// 1. Read from Kafka
|
||||
// 2. Write to Iceberg
|
||||
// 3. Publish HistoryReadyNotification
|
||||
// 4. Client receives notification via relay pub/sub
|
||||
if (candles.length > 0) {
|
||||
// Add metadata to first candle for Flink tracking
|
||||
const enrichedCandles = candles.map((candle, idx) => ({
|
||||
...candle,
|
||||
__metadata: idx === 0 ? {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time
|
||||
} : undefined
|
||||
}));
|
||||
|
||||
await this.kafkaProducer.writeOHLCs(this.config.kafka_topic, enrichedCandles);
|
||||
} else {
|
||||
// Write a marker message even if no data found
|
||||
// Flink will see this and publish a NOT_FOUND notification
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'NOT_FOUND',
|
||||
message: 'No data available for requested period'
|
||||
});
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker, count: candles.length },
|
||||
'Completed historical OHLC request - data written to Kafka'
|
||||
);
|
||||
|
||||
// NO RESPONSE SENT - Relay is stateless, client waits for pub/sub notification
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to process historical request'
|
||||
);
|
||||
|
||||
// Write error marker to Kafka so Flink can notify client
|
||||
try {
|
||||
await this.kafkaProducer.writeMarker(this.config.kafka_topic, {
|
||||
request_id,
|
||||
client_id,
|
||||
ticker,
|
||||
period_seconds,
|
||||
start_time,
|
||||
end_time,
|
||||
status: 'ERROR',
|
||||
error_message: error.message
|
||||
});
|
||||
} catch (kafkaError) {
|
||||
this.logger.error(
|
||||
{ error: kafkaError.message, request_id },
|
||||
'Failed to write error marker to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
// Do not throw - request is handled, Flink will notify client of error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle realtime tick subscription request
|
||||
*/
|
||||
async handleRealtimeRequest(request) {
|
||||
const { requestId: request_id, ticker } = request;
|
||||
|
||||
this.logger.info(
|
||||
{ request_id, ticker },
|
||||
'Processing realtime subscription request'
|
||||
);
|
||||
|
||||
try {
|
||||
// Start realtime polling
|
||||
this.realtimePoller.startSubscription(
|
||||
request_id,
|
||||
ticker,
|
||||
this.config.kafka_topic
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
{ error: error.message, request_id, ticker },
|
||||
'Failed to start realtime subscription'
|
||||
);
|
||||
this.activeRequests.delete(request_id);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle control messages from Flink
|
||||
*/
|
||||
async handleControlMessage(message) {
|
||||
const { action, requestId: request_id } = message;
|
||||
|
||||
this.logger.info({ action, request_id }, 'Received control message');
|
||||
|
||||
switch (action) {
|
||||
case 'CANCEL':
|
||||
if (request_id) {
|
||||
// Cancel specific request
|
||||
this.realtimePoller.cancelSubscription(request_id);
|
||||
this.activeRequests.delete(request_id);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'SHUTDOWN':
|
||||
this.logger.info('Received shutdown signal');
|
||||
await this.shutdown();
|
||||
break;
|
||||
|
||||
case 'CONFIG_UPDATE':
|
||||
// Handle config update if needed
|
||||
this.logger.info('Received config update');
|
||||
break;
|
||||
|
||||
case 'HEARTBEAT':
|
||||
// Just acknowledge heartbeat
|
||||
break;
|
||||
|
||||
default:
|
||||
this.logger.warn({ action }, 'Unknown control action');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get worker status
|
||||
*/
|
||||
getStatus() {
|
||||
return {
|
||||
activeRequests: this.activeRequests.size,
|
||||
maxConcurrent: this.config.max_concurrent,
|
||||
pollerStats: this.realtimePoller.getStats()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown worker gracefully
|
||||
*/
|
||||
async shutdown() {
|
||||
if (this.isShutdown) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isShutdown = true;
|
||||
this.logger.info('Shutting down ingestor worker');
|
||||
|
||||
// Stop polling
|
||||
this.realtimePoller.shutdown();
|
||||
|
||||
// Close connections
|
||||
await this.ccxtFetcher.close();
|
||||
await this.kafkaProducer.disconnect();
|
||||
await this.zmqClient.shutdown();
|
||||
|
||||
this.logger.info('Ingestor worker shutdown complete');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
const worker = new IngestorWorker(config, logger);
|
||||
|
||||
// Handle shutdown signals
|
||||
process.on('SIGINT', () => worker.shutdown());
|
||||
process.on('SIGTERM', () => worker.shutdown());
|
||||
|
||||
// Handle errors
|
||||
process.on('uncaughtException', error => {
|
||||
logger.error({ error }, 'Uncaught exception');
|
||||
worker.shutdown();
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error({ reason }, 'Unhandled rejection');
|
||||
});
|
||||
|
||||
// Start worker
|
||||
await worker.start();
|
||||
|
||||
// Log status periodically
|
||||
setInterval(() => {
|
||||
const status = worker.getStatus();
|
||||
logger.info({ status }, 'Worker status');
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch(error => {
|
||||
logger.error({ error }, 'Fatal error');
|
||||
process.exit(1);
|
||||
});
|
||||
270
ingestor/src/kafka-producer.js
Normal file
270
ingestor/src/kafka-producer.js
Normal file
@@ -0,0 +1,270 @@
|
||||
// Kafka producer for writing market data
|
||||
import { Kafka } from 'kafkajs';
|
||||
import { encodeMessage, MessageTypeId, Tick, OHLC, OHLCBatch } from './proto/messages.js';
|
||||
|
||||
export class KafkaProducer {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
this.kafka = new Kafka({
|
||||
clientId: 'ccxt-ingestor',
|
||||
brokers: config.kafka_brokers || ['localhost:9092'],
|
||||
logLevel: 0 // Error only
|
||||
});
|
||||
|
||||
this.producer = this.kafka.producer();
|
||||
this.isConnected = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to Kafka
|
||||
*/
|
||||
async connect() {
|
||||
await this.producer.connect();
|
||||
this.isConnected = true;
|
||||
this.logger.info('Connected to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a tick message to Kafka
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} tickData - Tick data object
|
||||
*/
|
||||
async writeTick(topic, tickData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: tickData.ticker,
|
||||
value: message,
|
||||
timestamp: tickData.timestamp.toString()
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug({ ticker: tickData.ticker, topic }, 'Wrote tick to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write multiple ticks to Kafka in batch
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {Array<object>} ticksData - Array of tick data objects
|
||||
*/
|
||||
async writeTicks(topic, ticksData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
if (ticksData.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const messages = ticksData.map(tickData => {
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.TICK, tickData, Tick);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
return {
|
||||
key: tickData.ticker,
|
||||
value: message,
|
||||
timestamp: tickData.timestamp.toString()
|
||||
};
|
||||
});
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ count: ticksData.length, topic },
|
||||
'Wrote ticks batch to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an OHLC message to Kafka
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} ohlcData - OHLC data object
|
||||
*/
|
||||
async writeOHLC(topic, ohlcData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, ohlcData, OHLC);
|
||||
const message = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: ohlcData.ticker,
|
||||
value: message
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug({ ticker: ohlcData.ticker, topic }, 'Wrote OHLC to Kafka');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write multiple OHLC candles to Kafka as an OHLCBatch message
|
||||
* Uses protobuf encoding with metadata in batch wrapper
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {Array<object>} ohlcData - Array of OHLC data objects (may include __metadata in first record)
|
||||
*/
|
||||
async writeOHLCs(topic, ohlcData) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
if (ohlcData.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract metadata from first record if present
|
||||
const firstCandle = ohlcData[0];
|
||||
const metadata = firstCandle.__metadata;
|
||||
|
||||
if (!metadata) {
|
||||
// No metadata - write individual OHLC messages (realtime mode)
|
||||
const messages = ohlcData.map(candle => {
|
||||
const protoCandle = {
|
||||
timestamp: candle.timestamp,
|
||||
ticker: candle.ticker,
|
||||
open: candle.open,
|
||||
high: candle.high,
|
||||
low: candle.low,
|
||||
close: candle.close,
|
||||
volume: candle.volume
|
||||
};
|
||||
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC, protoCandle, OHLC);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
return {
|
||||
key: candle.ticker,
|
||||
value
|
||||
};
|
||||
});
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ count: ohlcData.length, topic, type: 'individual' },
|
||||
'Wrote OHLC messages to Kafka'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Historical mode - write as OHLCBatch with metadata
|
||||
const batch = {
|
||||
metadata: {
|
||||
requestId: metadata.request_id,
|
||||
clientId: metadata.client_id,
|
||||
ticker: metadata.ticker,
|
||||
periodSeconds: metadata.period_seconds,
|
||||
startTime: metadata.start_time,
|
||||
endTime: metadata.end_time,
|
||||
status: metadata.status || 'OK',
|
||||
errorMessage: metadata.error_message
|
||||
},
|
||||
rows: ohlcData.map(candle => ({
|
||||
timestamp: candle.timestamp,
|
||||
ticker: candle.ticker,
|
||||
open: candle.open,
|
||||
high: candle.high,
|
||||
low: candle.low,
|
||||
close: candle.close,
|
||||
volume: candle.volume
|
||||
}))
|
||||
};
|
||||
|
||||
// Encode as protobuf OHLCBatch with ZMQ envelope
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: metadata.ticker,
|
||||
value
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
{ request_id: metadata.request_id, count: ohlcData.length, topic, type: 'batch' },
|
||||
'Wrote OHLCBatch to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a marker message to Kafka for NOT_FOUND or ERROR cases
|
||||
* This allows Flink to publish notifications even when no data is available
|
||||
* @param {string} topic - Kafka topic name
|
||||
* @param {object} marker - Marker object with request metadata and status
|
||||
*/
|
||||
async writeMarker(topic, marker) {
|
||||
if (!this.isConnected) {
|
||||
throw new Error('Kafka producer not connected');
|
||||
}
|
||||
|
||||
// Create an empty OHLCBatch with status in metadata
|
||||
const batch = {
|
||||
metadata: {
|
||||
requestId: marker.request_id,
|
||||
clientId: marker.client_id,
|
||||
ticker: marker.ticker,
|
||||
periodSeconds: marker.period_seconds,
|
||||
startTime: marker.start_time,
|
||||
endTime: marker.end_time,
|
||||
status: marker.status, // 'NOT_FOUND' or 'ERROR'
|
||||
errorMessage: marker.error_message || marker.message
|
||||
},
|
||||
rows: [] // Empty rows array indicates marker message
|
||||
};
|
||||
|
||||
// Encode as protobuf OHLCBatch with ZMQ envelope
|
||||
const [frame1, frame2] = encodeMessage(MessageTypeId.OHLC_BATCH, batch, OHLCBatch);
|
||||
const value = Buffer.concat([frame1, frame2]);
|
||||
|
||||
await this.producer.send({
|
||||
topic,
|
||||
messages: [
|
||||
{
|
||||
key: marker.ticker,
|
||||
value
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
this.logger.info(
|
||||
{ request_id: marker.request_id, status: marker.status, topic },
|
||||
'Wrote marker to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from Kafka
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.isConnected) {
|
||||
await this.producer.disconnect();
|
||||
this.isConnected = false;
|
||||
this.logger.info('Disconnected from Kafka');
|
||||
}
|
||||
}
|
||||
}
|
||||
217
ingestor/src/realtime-poller.js
Normal file
217
ingestor/src/realtime-poller.js
Normal file
@@ -0,0 +1,217 @@
|
||||
// Realtime tick data poller using 10-second polling
|
||||
export class RealtimePoller {
|
||||
constructor(ccxtFetcher, kafkaProducer, logger) {
|
||||
this.ccxtFetcher = ccxtFetcher;
|
||||
this.kafkaProducer = kafkaProducer;
|
||||
this.logger = logger;
|
||||
|
||||
// Active subscriptions: requestId -> subscription info
|
||||
this.subscriptions = new Map();
|
||||
|
||||
// Poll interval in milliseconds (10 seconds)
|
||||
this.pollInterval = 10000;
|
||||
|
||||
// Main polling loop
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a realtime subscription
|
||||
* @param {string} requestId - Unique request ID
|
||||
* @param {string} ticker - Ticker to subscribe to
|
||||
* @param {string} kafkaTopic - Kafka topic to write to
|
||||
*/
|
||||
startSubscription(requestId, ticker, kafkaTopic) {
|
||||
if (this.subscriptions.has(requestId)) {
|
||||
this.logger.warn({ requestId }, 'Subscription already exists');
|
||||
return;
|
||||
}
|
||||
|
||||
const subscription = {
|
||||
requestId,
|
||||
ticker,
|
||||
kafkaTopic,
|
||||
lastTimestamp: null,
|
||||
isActive: true,
|
||||
errorCount: 0
|
||||
};
|
||||
|
||||
this.subscriptions.set(requestId, subscription);
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker, kafkaTopic },
|
||||
'Started realtime subscription'
|
||||
);
|
||||
|
||||
// Start polling loop if not already running
|
||||
if (!this.pollingLoop) {
|
||||
this.startPollingLoop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel a realtime subscription
|
||||
* @param {string} requestId - Request ID to cancel
|
||||
*/
|
||||
cancelSubscription(requestId) {
|
||||
const subscription = this.subscriptions.get(requestId);
|
||||
if (subscription) {
|
||||
subscription.isActive = false;
|
||||
this.subscriptions.delete(requestId);
|
||||
|
||||
this.logger.info(
|
||||
{ requestId, ticker: subscription.ticker },
|
||||
'Cancelled realtime subscription'
|
||||
);
|
||||
}
|
||||
|
||||
// Stop polling loop if no active subscriptions
|
||||
if (this.subscriptions.size === 0 && this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
this.logger.info('Stopped polling loop - no active subscriptions');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the main polling loop
|
||||
*/
|
||||
startPollingLoop() {
|
||||
this.logger.info({ interval: this.pollInterval }, 'Starting polling loop');
|
||||
|
||||
this.pollingLoop = setInterval(async () => {
|
||||
await this.pollAllSubscriptions();
|
||||
}, this.pollInterval);
|
||||
|
||||
// Do an immediate poll
|
||||
this.pollAllSubscriptions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll all active subscriptions
|
||||
*/
|
||||
async pollAllSubscriptions() {
|
||||
const subscriptions = Array.from(this.subscriptions.values());
|
||||
|
||||
// Poll subscriptions in parallel
|
||||
await Promise.allSettled(
|
||||
subscriptions.map(sub => this.pollSubscription(sub))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll a single subscription
|
||||
* @param {object} subscription - Subscription object
|
||||
*/
|
||||
async pollSubscription(subscription) {
|
||||
if (!subscription.isActive) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { requestId, ticker, kafkaTopic, lastTimestamp } = subscription;
|
||||
|
||||
try {
|
||||
// Fetch trades since last timestamp
|
||||
const trades = await this.ccxtFetcher.fetchRecentTrades(
|
||||
ticker,
|
||||
lastTimestamp
|
||||
);
|
||||
|
||||
if (trades.length === 0) {
|
||||
this.logger.debug({ requestId, ticker }, 'No new trades');
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter out trades we've already seen
|
||||
let newTrades = trades;
|
||||
if (lastTimestamp) {
|
||||
const lastTs = BigInt(lastTimestamp);
|
||||
newTrades = trades.filter(t => BigInt(t.timestamp) > lastTs);
|
||||
}
|
||||
|
||||
if (newTrades.length > 0) {
|
||||
// Write trades to Kafka
|
||||
await this.kafkaProducer.writeTicks(kafkaTopic, newTrades);
|
||||
|
||||
// Update last timestamp
|
||||
const latestTrade = newTrades[newTrades.length - 1];
|
||||
subscription.lastTimestamp = latestTrade.timestamp;
|
||||
|
||||
this.logger.info(
|
||||
{
|
||||
requestId,
|
||||
ticker,
|
||||
count: newTrades.length,
|
||||
kafkaTopic
|
||||
},
|
||||
'Wrote new trades to Kafka'
|
||||
);
|
||||
}
|
||||
|
||||
// Reset error count on success
|
||||
subscription.errorCount = 0;
|
||||
} catch (error) {
|
||||
subscription.errorCount++;
|
||||
|
||||
this.logger.error(
|
||||
{
|
||||
error: error.message,
|
||||
requestId,
|
||||
ticker,
|
||||
errorCount: subscription.errorCount
|
||||
},
|
||||
'Error polling subscription'
|
||||
);
|
||||
|
||||
// Cancel subscription after too many errors
|
||||
if (subscription.errorCount >= 5) {
|
||||
this.logger.error(
|
||||
{ requestId, ticker },
|
||||
'Cancelling subscription due to repeated errors'
|
||||
);
|
||||
this.cancelSubscription(requestId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get subscription statistics
|
||||
*/
|
||||
getStats() {
|
||||
const stats = {
|
||||
totalSubscriptions: this.subscriptions.size,
|
||||
subscriptions: []
|
||||
};
|
||||
|
||||
for (const [requestId, sub] of this.subscriptions) {
|
||||
stats.subscriptions.push({
|
||||
requestId,
|
||||
ticker: sub.ticker,
|
||||
isActive: sub.isActive,
|
||||
errorCount: sub.errorCount,
|
||||
lastTimestamp: sub.lastTimestamp
|
||||
});
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown poller and cancel all subscriptions
|
||||
*/
|
||||
shutdown() {
|
||||
this.logger.info('Shutting down realtime poller');
|
||||
|
||||
if (this.pollingLoop) {
|
||||
clearInterval(this.pollingLoop);
|
||||
this.pollingLoop = null;
|
||||
}
|
||||
|
||||
// Mark all subscriptions as inactive
|
||||
for (const subscription of this.subscriptions.values()) {
|
||||
subscription.isActive = false;
|
||||
}
|
||||
|
||||
this.subscriptions.clear();
|
||||
}
|
||||
}
|
||||
116
ingestor/src/zmq-client.js
Normal file
116
ingestor/src/zmq-client.js
Normal file
@@ -0,0 +1,116 @@
|
||||
// ZeroMQ client for connecting to Flink control channels
|
||||
import * as zmq from 'zeromq';
|
||||
import { decodeMessage } from './proto/messages.js';
|
||||
|
||||
export class ZmqClient {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
|
||||
// Work queue - SUB socket to receive data requests with exchange prefix filtering
|
||||
this.workSocket = null;
|
||||
|
||||
// NOTE: NO RESPONSE SOCKET - Async architecture via Kafka!
|
||||
// Ingestors write data to Kafka only
|
||||
// Flink processes and publishes notifications
|
||||
|
||||
this.isShutdown = false;
|
||||
this.supportedExchanges = config.supported_exchanges || ['BINANCE', 'COINBASE'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to Relay ZMQ endpoints
|
||||
*/
|
||||
async connect() {
|
||||
const { flink_hostname, ingestor_work_port } = this.config;
|
||||
|
||||
// Connect to work queue (SUB with exchange prefix filtering)
|
||||
this.workSocket = new zmq.Subscriber();
|
||||
const workEndpoint = `tcp://${flink_hostname}:${ingestor_work_port}`;
|
||||
await this.workSocket.connect(workEndpoint);
|
||||
|
||||
// Subscribe to each supported exchange prefix
|
||||
for (const exchange of this.supportedExchanges) {
|
||||
const prefix = `${exchange}:`;
|
||||
this.workSocket.subscribe(prefix);
|
||||
this.logger.info(`Subscribed to exchange prefix: ${prefix}`);
|
||||
}
|
||||
this.logger.info(`Connected to work queue at ${workEndpoint}`);
|
||||
this.logger.info('ASYNC MODE: No response socket - data flows via Kafka → Flink → pub/sub notification');
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a data request from the work queue
|
||||
* @returns {Promise<object>} Decoded DataRequest message
|
||||
*/
|
||||
async pullDataRequest() {
|
||||
if (this.isShutdown) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const frames = await this.workSocket.receive();
|
||||
this.logger.info({
|
||||
frameCount: frames.length,
|
||||
frame0Len: frames[0]?.length,
|
||||
frame1Len: frames[1]?.length,
|
||||
frame2Len: frames[2]?.length,
|
||||
frame0: frames[0]?.toString('utf8').substring(0, 50),
|
||||
frame1Hex: frames[1]?.toString('hex').substring(0, 20),
|
||||
frame2Hex: frames[2]?.toString('hex').substring(0, 20)
|
||||
}, 'Received raw ZMQ frames');
|
||||
|
||||
// First frame is the topic (exchange prefix), skip it
|
||||
// Remaining frames are: [version_frame, message_frame]
|
||||
if (frames.length < 3) {
|
||||
this.logger.warn({ frameCount: frames.length }, 'Unexpected frame count');
|
||||
return null;
|
||||
}
|
||||
const messageFrames = frames.slice(1); // Skip topic, keep version + message
|
||||
const { version, typeId, message } = decodeMessage(messageFrames);
|
||||
this.logger.info({
|
||||
version,
|
||||
typeId: `0x${typeId.toString(16)}`,
|
||||
requestId: message.requestId,
|
||||
type: message.type,
|
||||
typeOf: typeof message.type,
|
||||
ticker: message.ticker
|
||||
}, 'Decoded data request');
|
||||
return message;
|
||||
} catch (error) {
|
||||
if (!this.isShutdown) {
|
||||
this.logger.error({ error: error.message, stack: error.stack }, 'Error receiving data request');
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Start listening for control messages in the background
|
||||
* @param {Function} handler - Callback function to handle control messages
|
||||
*
|
||||
* NOTE: Control channel not implemented yet. This is a stub for future use.
|
||||
* For now, just log and ignore.
|
||||
*/
|
||||
startControlListener(handler) {
|
||||
this.logger.info('Control channel listener stub - not implemented yet');
|
||||
// TODO: Implement control channel when needed
|
||||
// Control messages would be used for:
|
||||
// - Canceling realtime subscriptions
|
||||
// - Graceful shutdown signals
|
||||
// - Configuration updates
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown and close connections
|
||||
*/
|
||||
async shutdown() {
|
||||
this.isShutdown = true;
|
||||
this.logger.info('Shutting down ZMQ connections');
|
||||
|
||||
if (this.workSocket) {
|
||||
await this.workSocket.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user