data timeout fixes; research agent improvements

This commit is contained in:
2026-04-24 20:43:42 -04:00
parent 1800363566
commit 319d81c41f
37 changed files with 672 additions and 280 deletions

View File

@@ -123,10 +123,10 @@ public class SchemaInitializer {
/**
* Initialize the OHLC table if it doesn't exist.
*/
// Bump this when the schema changes. Tables with a different (or missing) version
// will be dropped and recreated. Increment by 1 for each incompatible change.
// Bump this when the schema changes. Increment by 1 for each change.
// v1: open/high/low/close required; ingestor forward-fills interior gaps with previous close
private static final String OHLC_SCHEMA_VERSION = "1";
// v2: added num_trades and quote_volume (appended; backward-compatible via Iceberg schema evolution)
private static final String OHLC_SCHEMA_VERSION = "2";
private static final String SCHEMA_VERSION_PROP = "app.schema.version";
private void initializeOhlcTable() {
@@ -154,11 +154,13 @@ public class SchemaInitializer {
if (tableExists) {
Table existing = catalog.loadTable(tableId);
String existingVersion = existing.properties().get(SCHEMA_VERSION_PROP);
LOG.info("Table {} already exists at schema version {}", tableId, existingVersion);
if (!OHLC_SCHEMA_VERSION.equals(existingVersion)) {
LOG.warn("Table {} has schema version '{}', expected '{}' — skipping (manual migration required if needed)",
tableId, existingVersion, OHLC_SCHEMA_VERSION);
LOG.info("Evolving table {} from version '{}' to '{}'", tableId, existingVersion, OHLC_SCHEMA_VERSION);
evolveOhlcSchema(existing);
existing.updateProperties().set(SCHEMA_VERSION_PROP, OHLC_SCHEMA_VERSION).commit();
LOG.info("Schema evolution complete for {}", tableId);
}
LOG.info("Table {} already exists at schema version {} — skipping creation", tableId, existingVersion);
return;
}
@@ -195,7 +197,11 @@ public class SchemaInitializer {
// Metadata fields
optional(16, "request_id", Types.StringType.get(), "Request ID that generated this data"),
required(17, "ingested_at", Types.LongType.get(), "Timestamp when data was ingested by Flink (nanoseconds since epoch)")
required(17, "ingested_at", Types.LongType.get(), "Timestamp when data was ingested by Flink (nanoseconds since epoch)"),
// Extended exchange fields — appended for backward-compatible schema evolution (v2)
optional(18, "num_trades", Types.LongType.get(), "Number of trades in the candle"),
optional(19, "quote_volume", Types.LongType.get(), "Total quote asset volume (scaled by price precision)")
);
// Create the table with partitioning and properties
@@ -218,6 +224,30 @@ public class SchemaInitializer {
}
}
/**
* Add any columns missing from a v1 OHLC table to bring it to v2.
* Iceberg schema evolution is safe and non-destructive — existing rows get null for new columns.
*/
private void evolveOhlcSchema(Table table) {
org.apache.iceberg.UpdateSchema update = table.updateSchema();
boolean changed = false;
java.util.Set<String> existing = new java.util.HashSet<>();
for (org.apache.iceberg.types.Types.NestedField f : table.schema().columns()) {
existing.add(f.name());
}
if (!existing.contains("num_trades")) {
update.addColumn("num_trades", Types.LongType.get(), "Number of trades in the candle");
changed = true;
}
if (!existing.contains("quote_volume")) {
update.addColumn("quote_volume", Types.LongType.get(), "Total quote asset volume (scaled by price precision)");
changed = true;
}
if (changed) {
update.commit();
}
}
/**
* Initialize the symbol_metadata table if it doesn't exist.
*/

View File

@@ -69,7 +69,13 @@ public class OHLCBatchDeserializer implements DeserializationSchema<OHLCBatchWra
row.getHigh(),
row.getLow(),
row.getClose(),
row.hasVolume() ? row.getVolume() : null
row.hasVolume() ? row.getVolume() : null,
row.hasBuyVol() ? row.getBuyVol() : null,
row.hasSellVol() ? row.getSellVol() : null,
row.hasOpenTime() ? row.getOpenTime() : null,
row.hasCloseTime() ? row.getCloseTime() : null,
row.hasNumTrades() ? row.getNumTrades() : null,
row.hasQuoteVolume() ? row.getQuoteVolume() : null
));
}

View File

@@ -116,57 +116,58 @@ public class OHLCBatchWrapper implements Serializable {
/**
* Single OHLC row. open/high/low/close/volume are nullable to support gap bars
* (periods where no trades occurred).
* (periods where no trades occurred). All extended fields are nullable and only
* populated when the exchange provides them (e.g. Binance klines).
*/
public static class OHLCRow implements Serializable {
private static final long serialVersionUID = 1L;
private final long timestamp;
private final String ticker;
private final Long open; // null for gap bars
private final Long high; // null for gap bars
private final Long low; // null for gap bars
private final Long close; // null for gap bars
private final Long volume; // null when no volume data
private final Long open; // null for gap bars
private final Long high; // null for gap bars
private final Long low; // null for gap bars
private final Long close; // null for gap bars
private final Long volume;
private final Long buyVol;
private final Long sellVol;
private final Long openTime;
private final Long closeTime;
private final Long numTrades;
private final Long quoteVolume;
public OHLCRow(long timestamp, String ticker, Long open, Long high,
Long low, Long close, Long volume) {
this.timestamp = timestamp;
this.ticker = ticker;
this.open = open;
this.high = high;
this.low = low;
this.close = close;
this.volume = volume;
Long low, Long close, Long volume,
Long buyVol, Long sellVol, Long openTime, Long closeTime,
Long numTrades, Long quoteVolume) {
this.timestamp = timestamp;
this.ticker = ticker;
this.open = open;
this.high = high;
this.low = low;
this.close = close;
this.volume = volume;
this.buyVol = buyVol;
this.sellVol = sellVol;
this.openTime = openTime;
this.closeTime = closeTime;
this.numTrades = numTrades;
this.quoteVolume = quoteVolume;
}
public long getTimestamp() {
return timestamp;
}
public String getTicker() {
return ticker;
}
public Long getOpen() {
return open;
}
public Long getHigh() {
return high;
}
public Long getLow() {
return low;
}
public Long getClose() {
return close;
}
public Long getVolume() {
return volume;
}
public long getTimestamp() { return timestamp; }
public String getTicker() { return ticker; }
public Long getOpen() { return open; }
public Long getHigh() { return high; }
public Long getLow() { return low; }
public Long getClose() { return close; }
public Long getVolume() { return volume; }
public Long getBuyVol() { return buyVol; }
public Long getSellVol() { return sellVol; }
public Long getOpenTime() { return openTime; }
public Long getCloseTime() { return closeTime; }
public Long getNumTrades() { return numTrades; }
public Long getQuoteVolume() { return quoteVolume; }
public boolean isGapBar() {
return open == null && high == null && low == null && close == null;
@@ -180,6 +181,9 @@ public class OHLCBatchWrapper implements Serializable {
(isGapBar() ? ", gap=true" :
", open=" + open + ", high=" + high + ", low=" + low + ", close=" + close) +
", volume=" + volume +
", buyVol=" + buyVol +
", sellVol=" + sellVol +
", numTrades=" + numTrades +
'}';
}
}

View File

@@ -79,10 +79,9 @@ public class IcebergOHLCSink {
// Emit one RowData for each OHLC row in the batch
for (OHLCBatchWrapper.OHLCRow row : batch.getRows()) {
GenericRowData rowData = new GenericRowData(RowKind.INSERT, 17);
GenericRowData rowData = new GenericRowData(RowKind.INSERT, 19);
// Natural key fields (ticker, period_seconds, timestamp)
// Used by equality delete files for deduplication
rowData.setField(0, StringData.fromString(ticker));
rowData.setField(1, periodSeconds);
rowData.setField(2, row.getTimestamp());
@@ -95,22 +94,26 @@ public class IcebergOHLCSink {
// Volume data
rowData.setField(7, row.getVolume());
rowData.setField(8, null); // buy_vol (TODO: extract from protobuf)
rowData.setField(9, null); // sell_vol
rowData.setField(8, row.getBuyVol());
rowData.setField(9, row.getSellVol());
// Timing data
rowData.setField(10, null); // open_time
rowData.setField(11, null); // high_time
rowData.setField(12, null); // low_time
rowData.setField(13, null); // close_time
rowData.setField(10, row.getOpenTime());
rowData.setField(11, null); // high_time — not provided by exchanges
rowData.setField(12, null); // low_time — not provided by exchanges
rowData.setField(13, row.getCloseTime());
// Additional fields
rowData.setField(14, null); // open_interest
rowData.setField(14, null); // open_interest (futures only, not yet fetched)
// Metadata fields
rowData.setField(15, StringData.fromString(requestId));
rowData.setField(16, ingestedAt);
// Extended exchange fields (appended at end for backward-compatible schema evolution)
rowData.setField(17, row.getNumTrades());
rowData.setField(18, row.getQuoteVolume());
out.collect(rowData);
}