backend redesign

This commit is contained in:
2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions

53
iceberg/ohlc_schema.sql Normal file
View File

@@ -0,0 +1,53 @@
-- Iceberg OHLC Table Schema (Iceberg 1.10.1)
-- Single table for all periods with hidden partitioning
-- Natural key: (ticker, period_seconds, timestamp) - enforced by application logic
-- Partition by: (ticker, days(timestamp)) - hidden partitioning for efficiency
CREATE TABLE IF NOT EXISTS trading.ohlc (
-- Natural key fields (uniqueness enforced by Flink upsert logic)
ticker STRING NOT NULL COMMENT 'Market identifier (e.g., BINANCE:BTC/USDT)',
period_seconds INT NOT NULL COMMENT 'OHLC period in seconds (60, 300, 900, 3600, 14400, 86400, 604800, etc.)',
timestamp BIGINT NOT NULL COMMENT 'Candle timestamp in microseconds since epoch',
-- OHLC price data (stored as integers, divide by rational denominator from market metadata)
open BIGINT NOT NULL COMMENT 'Opening price',
high BIGINT NOT NULL COMMENT 'Highest price',
low BIGINT NOT NULL COMMENT 'Lowest price',
close BIGINT NOT NULL COMMENT 'Closing price',
-- Volume data
volume BIGINT COMMENT 'Total volume',
buy_vol BIGINT COMMENT 'Buy volume',
sell_vol BIGINT COMMENT 'Sell volume',
-- Timing data
open_time BIGINT COMMENT 'Timestamp when open price occurred',
high_time BIGINT COMMENT 'Timestamp when high price occurred',
low_time BIGINT COMMENT 'Timestamp when low price occurred',
close_time BIGINT COMMENT 'Timestamp when close time occurred',
-- Additional fields
open_interest BIGINT COMMENT 'Open interest for futures',
-- Metadata fields for tracking
request_id STRING COMMENT 'Request ID that generated this data (for historical requests)',
ingested_at BIGINT NOT NULL COMMENT 'Timestamp when data was ingested by Flink'
)
USING iceberg
PARTITIONED BY (ticker)
TBLPROPERTIES (
'write.format.default' = 'parquet',
'write.parquet.compression-codec' = 'snappy',
'write.metadata.compression-codec' = 'gzip',
'format-version' = '1',
'write.merge.mode' = 'copy-on-write'
);
-- Note: Iceberg 1.x does not support primary key constraints
-- Uniqueness of (ticker, period_seconds, timestamp) is enforced at application layer:
-- - Flink Iceberg sink uses equality delete files for upsert behavior
-- - Last-write-wins semantics for duplicate rows
-- - Copy-on-write mode rewrites data files on updates
-- - Queries automatically filter deleted rows
COMMENT ON TABLE trading.ohlc IS 'Historical OHLC candle data from exchanges. Single table for all periods. Uniqueness enforced by Flink upsert mode with equality deletes.';