54 lines
2.4 KiB
SQL
54 lines
2.4 KiB
SQL
-- Iceberg OHLC Table Schema (Iceberg 1.10.1)
|
|
-- Single table for all periods with hidden partitioning
|
|
-- Natural key: (ticker, period_seconds, timestamp) - enforced by application logic
|
|
-- Partition by: (ticker, days(timestamp)) - hidden partitioning for efficiency
|
|
|
|
CREATE TABLE IF NOT EXISTS trading.ohlc (
|
|
-- Natural key fields (uniqueness enforced by Flink upsert logic)
|
|
ticker STRING NOT NULL COMMENT 'Market identifier (e.g., BINANCE:BTC/USDT)',
|
|
period_seconds INT NOT NULL COMMENT 'OHLC period in seconds (60, 300, 900, 3600, 14400, 86400, 604800, etc.)',
|
|
timestamp BIGINT NOT NULL COMMENT 'Candle timestamp in microseconds since epoch',
|
|
|
|
-- OHLC price data (stored as integers, divide by rational denominator from market metadata)
|
|
open BIGINT NOT NULL COMMENT 'Opening price',
|
|
high BIGINT NOT NULL COMMENT 'Highest price',
|
|
low BIGINT NOT NULL COMMENT 'Lowest price',
|
|
close BIGINT NOT NULL COMMENT 'Closing price',
|
|
|
|
-- Volume data
|
|
volume BIGINT COMMENT 'Total volume',
|
|
buy_vol BIGINT COMMENT 'Buy volume',
|
|
sell_vol BIGINT COMMENT 'Sell volume',
|
|
|
|
-- Timing data
|
|
open_time BIGINT COMMENT 'Timestamp when open price occurred',
|
|
high_time BIGINT COMMENT 'Timestamp when high price occurred',
|
|
low_time BIGINT COMMENT 'Timestamp when low price occurred',
|
|
close_time BIGINT COMMENT 'Timestamp when close time occurred',
|
|
|
|
-- Additional fields
|
|
open_interest BIGINT COMMENT 'Open interest for futures',
|
|
|
|
-- Metadata fields for tracking
|
|
request_id STRING COMMENT 'Request ID that generated this data (for historical requests)',
|
|
ingested_at BIGINT NOT NULL COMMENT 'Timestamp when data was ingested by Flink'
|
|
)
|
|
USING iceberg
|
|
PARTITIONED BY (ticker)
|
|
TBLPROPERTIES (
|
|
'write.format.default' = 'parquet',
|
|
'write.parquet.compression-codec' = 'snappy',
|
|
'write.metadata.compression-codec' = 'gzip',
|
|
'format-version' = '1',
|
|
'write.merge.mode' = 'copy-on-write'
|
|
);
|
|
|
|
-- Note: Iceberg 1.x does not support primary key constraints
|
|
-- Uniqueness of (ticker, period_seconds, timestamp) is enforced at application layer:
|
|
-- - Flink Iceberg sink uses equality delete files for upsert behavior
|
|
-- - Last-write-wins semantics for duplicate rows
|
|
-- - Copy-on-write mode rewrites data files on updates
|
|
-- - Queries automatically filter deleted rows
|
|
|
|
COMMENT ON TABLE trading.ohlc IS 'Historical OHLC candle data from exchanges. Single table for all periods. Uniqueness enforced by Flink upsert mode with equality deletes.';
|