data fixes; indicator=>workspace sync
This commit is contained in:
@@ -10,7 +10,7 @@ from pyiceberg.expressions import (
|
||||
And,
|
||||
EqualTo,
|
||||
GreaterThanOrEqual,
|
||||
LessThanOrEqual
|
||||
LessThan,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -98,7 +98,7 @@ class IcebergClient:
|
||||
EqualTo("ticker", ticker),
|
||||
EqualTo("period_seconds", period_seconds),
|
||||
GreaterThanOrEqual("timestamp", start_time),
|
||||
LessThanOrEqual("timestamp", end_time)
|
||||
LessThan("timestamp", end_time) # end_time is exclusive
|
||||
)
|
||||
)
|
||||
|
||||
@@ -110,6 +110,10 @@ class IcebergClient:
|
||||
|
||||
if not df.empty:
|
||||
df = df.sort_values("timestamp")
|
||||
# Convert integer microsecond timestamps to DatetimeIndex
|
||||
df.index = pd.to_datetime(df["timestamp"], unit="us", utc=True)
|
||||
df.index.name = "datetime"
|
||||
df = df.drop(columns=["timestamp"])
|
||||
# Apply price/volume conversion if metadata client available
|
||||
if self.metadata_client is not None:
|
||||
df = self._apply_denominators(df, ticker)
|
||||
@@ -186,9 +190,9 @@ class IcebergClient:
|
||||
# Convert period to microseconds
|
||||
period_micros = period_seconds * 1_000_000
|
||||
|
||||
# Generate expected timestamps
|
||||
expected_timestamps = list(range(start_time, end_time + 1, period_micros))
|
||||
actual_timestamps = set(df['timestamp'].values)
|
||||
# Generate expected timestamps — end_time is exclusive
|
||||
expected_timestamps = list(range(start_time, end_time, period_micros))
|
||||
actual_timestamps = set(df.index.view('int64') // 1000)
|
||||
|
||||
# Find gaps
|
||||
missing = sorted(set(expected_timestamps) - actual_timestamps)
|
||||
|
||||
@@ -12,6 +12,8 @@ from .symbol_metadata_client import SymbolMetadataClient
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OHLCClient:
|
||||
"""
|
||||
@@ -118,6 +120,11 @@ class OHLCClient:
|
||||
TimeoutError: If historical data request times out
|
||||
ValueError: If request fails
|
||||
"""
|
||||
# Align times to period boundaries: [ceil(start), ceil(end)) exclusive
|
||||
period_micros = period_seconds * 1_000_000
|
||||
start_time = ((start_time + period_micros - 1) // period_micros) * period_micros
|
||||
end_time = ((end_time + period_micros - 1) // period_micros) * period_micros # exclusive
|
||||
|
||||
# Step 1: Check Iceberg for existing data
|
||||
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
|
||||
|
||||
@@ -128,7 +135,7 @@ class OHLCClient:
|
||||
|
||||
if not missing_ranges:
|
||||
# All data exists in Iceberg
|
||||
return df
|
||||
return self._forward_fill_gaps(df, period_seconds)
|
||||
|
||||
# Step 3: Request missing data for each range
|
||||
# For simplicity, request entire range (relay can merge adjacent requests)
|
||||
@@ -147,6 +154,39 @@ class OHLCClient:
|
||||
# Step 5: Query Iceberg again for complete dataset
|
||||
df = self.iceberg.query_ohlc(ticker, period_seconds, start_time, end_time)
|
||||
|
||||
return self._forward_fill_gaps(df, period_seconds)
|
||||
|
||||
def _forward_fill_gaps(self, df: pd.DataFrame, period_seconds: int) -> pd.DataFrame:
|
||||
"""
|
||||
Forward-fill interior missing bars by carrying the last known close into
|
||||
open, high, low, and close of any gap bar.
|
||||
|
||||
Only interior gaps (rows already present with null OHLC from the ingestor,
|
||||
or timestamp slots missing between real bars) are filled. Edge gaps (before
|
||||
the first real bar or after the last real bar) are left as-is.
|
||||
"""
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
df = df.sort_index()
|
||||
|
||||
# Identify rows that are gap bars (null close)
|
||||
is_gap = df['close'].isna()
|
||||
|
||||
if not is_gap.any():
|
||||
return df
|
||||
|
||||
# Forward-fill close across gap rows, then copy into open/high/low
|
||||
df['close'] = df['close'].ffill()
|
||||
price_cols = ['open', 'high', 'low']
|
||||
for col in price_cols:
|
||||
if col in df.columns:
|
||||
df[col] = df[col].where(~is_gap, df['close'])
|
||||
|
||||
# Zero out volume for filled gap rows
|
||||
if 'volume' in df.columns:
|
||||
df['volume'] = df['volume'].where(~is_gap, 0.0)
|
||||
|
||||
return df
|
||||
|
||||
async def __aenter__(self):
|
||||
|
||||
Reference in New Issue
Block a user