custom indicators fixed
This commit is contained in:
@@ -92,6 +92,12 @@ class IcebergClient:
|
||||
# Reload table metadata to pick up snapshots committed after this client was initialized
|
||||
self.table = self.catalog.load_table(f"{self.namespace}.ohlc")
|
||||
|
||||
# Always fetch ingested_at alongside requested columns so we can deduplicate
|
||||
# duplicate parquet files left by repeated Flink job runs on the same key range.
|
||||
fetch_columns = None
|
||||
if columns is not None:
|
||||
fetch_columns = list(columns) if "ingested_at" in columns else list(columns) + ["ingested_at"]
|
||||
|
||||
scan = self.table.scan(
|
||||
row_filter=And(
|
||||
EqualTo("ticker", ticker),
|
||||
@@ -101,12 +107,22 @@ class IcebergClient:
|
||||
)
|
||||
)
|
||||
|
||||
if columns is not None:
|
||||
scan = scan.select(*columns)
|
||||
if fetch_columns is not None:
|
||||
scan = scan.select(*fetch_columns)
|
||||
|
||||
df = scan.to_pandas()
|
||||
|
||||
if not df.empty:
|
||||
# Deduplicate: keep the most-recently-ingested row per timestamp.
|
||||
if "ingested_at" in df.columns:
|
||||
df = (
|
||||
df.sort_values("ingested_at", ascending=False)
|
||||
.drop_duplicates(subset=["timestamp"])
|
||||
)
|
||||
# Drop ingested_at if the caller did not ask for it
|
||||
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
|
||||
df = df.drop(columns=["ingested_at"])
|
||||
|
||||
df = df.sort_values("timestamp")
|
||||
# Convert integer nanosecond timestamps to DatetimeIndex
|
||||
df.index = pd.to_datetime(df["timestamp"], unit="ns", utc=True)
|
||||
|
||||
Reference in New Issue
Block a user