custom indicators fixed

This commit is contained in:
2026-04-09 17:00:43 -04:00
parent a70dcd954f
commit fd431516cc
17 changed files with 778 additions and 440 deletions

View File

@@ -92,6 +92,12 @@ class IcebergClient:
# Reload table metadata to pick up snapshots committed after this client was initialized
self.table = self.catalog.load_table(f"{self.namespace}.ohlc")
# Always fetch ingested_at alongside requested columns so we can deduplicate
# duplicate parquet files left by repeated Flink job runs on the same key range.
fetch_columns = None
if columns is not None:
fetch_columns = list(columns) if "ingested_at" in columns else list(columns) + ["ingested_at"]
scan = self.table.scan(
row_filter=And(
EqualTo("ticker", ticker),
@@ -101,12 +107,22 @@ class IcebergClient:
)
)
if columns is not None:
scan = scan.select(*columns)
if fetch_columns is not None:
scan = scan.select(*fetch_columns)
df = scan.to_pandas()
if not df.empty:
# Deduplicate: keep the most-recently-ingested row per timestamp.
if "ingested_at" in df.columns:
df = (
df.sort_values("ingested_at", ascending=False)
.drop_duplicates(subset=["timestamp"])
)
# Drop ingested_at if the caller did not ask for it
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
df = df.drop(columns=["ingested_at"])
df = df.sort_values("timestamp")
# Convert integer nanosecond timestamps to DatetimeIndex
df.index = pd.to_datetime(df["timestamp"], unit="ns", utc=True)