data timeout fixes; research agent improvements
This commit is contained in:
@@ -123,10 +123,10 @@ public class SchemaInitializer {
|
||||
/**
|
||||
* Initialize the OHLC table if it doesn't exist.
|
||||
*/
|
||||
// Bump this when the schema changes. Tables with a different (or missing) version
|
||||
// will be dropped and recreated. Increment by 1 for each incompatible change.
|
||||
// Bump this when the schema changes. Increment by 1 for each change.
|
||||
// v1: open/high/low/close required; ingestor forward-fills interior gaps with previous close
|
||||
private static final String OHLC_SCHEMA_VERSION = "1";
|
||||
// v2: added num_trades and quote_volume (appended; backward-compatible via Iceberg schema evolution)
|
||||
private static final String OHLC_SCHEMA_VERSION = "2";
|
||||
private static final String SCHEMA_VERSION_PROP = "app.schema.version";
|
||||
|
||||
private void initializeOhlcTable() {
|
||||
@@ -154,11 +154,13 @@ public class SchemaInitializer {
|
||||
if (tableExists) {
|
||||
Table existing = catalog.loadTable(tableId);
|
||||
String existingVersion = existing.properties().get(SCHEMA_VERSION_PROP);
|
||||
LOG.info("Table {} already exists at schema version {}", tableId, existingVersion);
|
||||
if (!OHLC_SCHEMA_VERSION.equals(existingVersion)) {
|
||||
LOG.warn("Table {} has schema version '{}', expected '{}' — skipping (manual migration required if needed)",
|
||||
tableId, existingVersion, OHLC_SCHEMA_VERSION);
|
||||
LOG.info("Evolving table {} from version '{}' to '{}'", tableId, existingVersion, OHLC_SCHEMA_VERSION);
|
||||
evolveOhlcSchema(existing);
|
||||
existing.updateProperties().set(SCHEMA_VERSION_PROP, OHLC_SCHEMA_VERSION).commit();
|
||||
LOG.info("Schema evolution complete for {}", tableId);
|
||||
}
|
||||
LOG.info("Table {} already exists at schema version {} — skipping creation", tableId, existingVersion);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -195,7 +197,11 @@ public class SchemaInitializer {
|
||||
|
||||
// Metadata fields
|
||||
optional(16, "request_id", Types.StringType.get(), "Request ID that generated this data"),
|
||||
required(17, "ingested_at", Types.LongType.get(), "Timestamp when data was ingested by Flink (nanoseconds since epoch)")
|
||||
required(17, "ingested_at", Types.LongType.get(), "Timestamp when data was ingested by Flink (nanoseconds since epoch)"),
|
||||
|
||||
// Extended exchange fields — appended for backward-compatible schema evolution (v2)
|
||||
optional(18, "num_trades", Types.LongType.get(), "Number of trades in the candle"),
|
||||
optional(19, "quote_volume", Types.LongType.get(), "Total quote asset volume (scaled by price precision)")
|
||||
);
|
||||
|
||||
// Create the table with partitioning and properties
|
||||
@@ -218,6 +224,30 @@ public class SchemaInitializer {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add any columns missing from a v1 OHLC table to bring it to v2.
|
||||
* Iceberg schema evolution is safe and non-destructive — existing rows get null for new columns.
|
||||
*/
|
||||
private void evolveOhlcSchema(Table table) {
|
||||
org.apache.iceberg.UpdateSchema update = table.updateSchema();
|
||||
boolean changed = false;
|
||||
java.util.Set<String> existing = new java.util.HashSet<>();
|
||||
for (org.apache.iceberg.types.Types.NestedField f : table.schema().columns()) {
|
||||
existing.add(f.name());
|
||||
}
|
||||
if (!existing.contains("num_trades")) {
|
||||
update.addColumn("num_trades", Types.LongType.get(), "Number of trades in the candle");
|
||||
changed = true;
|
||||
}
|
||||
if (!existing.contains("quote_volume")) {
|
||||
update.addColumn("quote_volume", Types.LongType.get(), "Total quote asset volume (scaled by price precision)");
|
||||
changed = true;
|
||||
}
|
||||
if (changed) {
|
||||
update.commit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the symbol_metadata table if it doesn't exist.
|
||||
*/
|
||||
|
||||
@@ -69,7 +69,13 @@ public class OHLCBatchDeserializer implements DeserializationSchema<OHLCBatchWra
|
||||
row.getHigh(),
|
||||
row.getLow(),
|
||||
row.getClose(),
|
||||
row.hasVolume() ? row.getVolume() : null
|
||||
row.hasVolume() ? row.getVolume() : null,
|
||||
row.hasBuyVol() ? row.getBuyVol() : null,
|
||||
row.hasSellVol() ? row.getSellVol() : null,
|
||||
row.hasOpenTime() ? row.getOpenTime() : null,
|
||||
row.hasCloseTime() ? row.getCloseTime() : null,
|
||||
row.hasNumTrades() ? row.getNumTrades() : null,
|
||||
row.hasQuoteVolume() ? row.getQuoteVolume() : null
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
@@ -116,57 +116,58 @@ public class OHLCBatchWrapper implements Serializable {
|
||||
|
||||
/**
|
||||
* Single OHLC row. open/high/low/close/volume are nullable to support gap bars
|
||||
* (periods where no trades occurred).
|
||||
* (periods where no trades occurred). All extended fields are nullable and only
|
||||
* populated when the exchange provides them (e.g. Binance klines).
|
||||
*/
|
||||
public static class OHLCRow implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final long timestamp;
|
||||
private final String ticker;
|
||||
private final Long open; // null for gap bars
|
||||
private final Long high; // null for gap bars
|
||||
private final Long low; // null for gap bars
|
||||
private final Long close; // null for gap bars
|
||||
private final Long volume; // null when no volume data
|
||||
private final Long open; // null for gap bars
|
||||
private final Long high; // null for gap bars
|
||||
private final Long low; // null for gap bars
|
||||
private final Long close; // null for gap bars
|
||||
private final Long volume;
|
||||
private final Long buyVol;
|
||||
private final Long sellVol;
|
||||
private final Long openTime;
|
||||
private final Long closeTime;
|
||||
private final Long numTrades;
|
||||
private final Long quoteVolume;
|
||||
|
||||
public OHLCRow(long timestamp, String ticker, Long open, Long high,
|
||||
Long low, Long close, Long volume) {
|
||||
this.timestamp = timestamp;
|
||||
this.ticker = ticker;
|
||||
this.open = open;
|
||||
this.high = high;
|
||||
this.low = low;
|
||||
this.close = close;
|
||||
this.volume = volume;
|
||||
Long low, Long close, Long volume,
|
||||
Long buyVol, Long sellVol, Long openTime, Long closeTime,
|
||||
Long numTrades, Long quoteVolume) {
|
||||
this.timestamp = timestamp;
|
||||
this.ticker = ticker;
|
||||
this.open = open;
|
||||
this.high = high;
|
||||
this.low = low;
|
||||
this.close = close;
|
||||
this.volume = volume;
|
||||
this.buyVol = buyVol;
|
||||
this.sellVol = sellVol;
|
||||
this.openTime = openTime;
|
||||
this.closeTime = closeTime;
|
||||
this.numTrades = numTrades;
|
||||
this.quoteVolume = quoteVolume;
|
||||
}
|
||||
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
public String getTicker() {
|
||||
return ticker;
|
||||
}
|
||||
|
||||
public Long getOpen() {
|
||||
return open;
|
||||
}
|
||||
|
||||
public Long getHigh() {
|
||||
return high;
|
||||
}
|
||||
|
||||
public Long getLow() {
|
||||
return low;
|
||||
}
|
||||
|
||||
public Long getClose() {
|
||||
return close;
|
||||
}
|
||||
|
||||
public Long getVolume() {
|
||||
return volume;
|
||||
}
|
||||
public long getTimestamp() { return timestamp; }
|
||||
public String getTicker() { return ticker; }
|
||||
public Long getOpen() { return open; }
|
||||
public Long getHigh() { return high; }
|
||||
public Long getLow() { return low; }
|
||||
public Long getClose() { return close; }
|
||||
public Long getVolume() { return volume; }
|
||||
public Long getBuyVol() { return buyVol; }
|
||||
public Long getSellVol() { return sellVol; }
|
||||
public Long getOpenTime() { return openTime; }
|
||||
public Long getCloseTime() { return closeTime; }
|
||||
public Long getNumTrades() { return numTrades; }
|
||||
public Long getQuoteVolume() { return quoteVolume; }
|
||||
|
||||
public boolean isGapBar() {
|
||||
return open == null && high == null && low == null && close == null;
|
||||
@@ -180,6 +181,9 @@ public class OHLCBatchWrapper implements Serializable {
|
||||
(isGapBar() ? ", gap=true" :
|
||||
", open=" + open + ", high=" + high + ", low=" + low + ", close=" + close) +
|
||||
", volume=" + volume +
|
||||
", buyVol=" + buyVol +
|
||||
", sellVol=" + sellVol +
|
||||
", numTrades=" + numTrades +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,10 +79,9 @@ public class IcebergOHLCSink {
|
||||
|
||||
// Emit one RowData for each OHLC row in the batch
|
||||
for (OHLCBatchWrapper.OHLCRow row : batch.getRows()) {
|
||||
GenericRowData rowData = new GenericRowData(RowKind.INSERT, 17);
|
||||
GenericRowData rowData = new GenericRowData(RowKind.INSERT, 19);
|
||||
|
||||
// Natural key fields (ticker, period_seconds, timestamp)
|
||||
// Used by equality delete files for deduplication
|
||||
rowData.setField(0, StringData.fromString(ticker));
|
||||
rowData.setField(1, periodSeconds);
|
||||
rowData.setField(2, row.getTimestamp());
|
||||
@@ -95,22 +94,26 @@ public class IcebergOHLCSink {
|
||||
|
||||
// Volume data
|
||||
rowData.setField(7, row.getVolume());
|
||||
rowData.setField(8, null); // buy_vol (TODO: extract from protobuf)
|
||||
rowData.setField(9, null); // sell_vol
|
||||
rowData.setField(8, row.getBuyVol());
|
||||
rowData.setField(9, row.getSellVol());
|
||||
|
||||
// Timing data
|
||||
rowData.setField(10, null); // open_time
|
||||
rowData.setField(11, null); // high_time
|
||||
rowData.setField(12, null); // low_time
|
||||
rowData.setField(13, null); // close_time
|
||||
rowData.setField(10, row.getOpenTime());
|
||||
rowData.setField(11, null); // high_time — not provided by exchanges
|
||||
rowData.setField(12, null); // low_time — not provided by exchanges
|
||||
rowData.setField(13, row.getCloseTime());
|
||||
|
||||
// Additional fields
|
||||
rowData.setField(14, null); // open_interest
|
||||
rowData.setField(14, null); // open_interest (futures only, not yet fetched)
|
||||
|
||||
// Metadata fields
|
||||
rowData.setField(15, StringData.fromString(requestId));
|
||||
rowData.setField(16, ingestedAt);
|
||||
|
||||
// Extended exchange fields (appended at end for backward-compatible schema evolution)
|
||||
rowData.setField(17, row.getNumTrades());
|
||||
rowData.setField(18, row.getQuoteVolume());
|
||||
|
||||
out.collect(rowData);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user