OHLC chunk caching and broad NativeOHLC use

This commit is contained in:
Tim
2024-02-14 20:56:19 -04:00
parent a14a26d38d
commit 1f9b778ac0
3 changed files with 185 additions and 109 deletions

View File

@@ -198,7 +198,10 @@ class BlockState:
else:
updated_keys.add((d.series, d.key))
del self.diffs_by_hash[dead.hash]
del self.ancestors[dead.hash]
try:
del self.ancestors[dead.hash]
except KeyError:
pass # todo is this bad?
# prune diffs_by_series by removing old series diffs that have been superceded by new diffs
for s, k in updated_keys:

View File

@@ -1,7 +1,6 @@
import json
import logging
import os
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Optional, NamedTuple, Reversible, Union
@@ -35,27 +34,63 @@ def opt_dec(v):
def dt(v):
return v if isinstance(v, datetime) else from_timestamp(v)
@dataclass
class NativeOHLC:
@staticmethod
def from_ohlc(ohlc: OHLC) -> 'NativeOHLC':
return NativeOHLC(*[cast(value) for value, cast in zip(ohlc,(dt, opt_dec, opt_dec, opt_dec, dec))])
return NativeOHLC(*[cast(value) for value, cast in zip(ohlc,(dt, opt_dec, opt_dec, opt_dec, dec))], ohlc=ohlc)
# noinspection PyShadowingBuiltins
def __init__(self, start: datetime, open: Optional[dec], high: Optional[dec], low: Optional[dec], close: dec,
*, ohlc: list=None):
self.start = start
self.open = open
self.high = high
self.low = low
self.close = close
self._ohlc = ohlc # JSON-able representation
self._json: str = None if ohlc is None else json.dumps(ohlc)
def update(self, price):
pstr = str(price)
if self.open is None:
self.open = price
self.high = price
self.low = price
if self._ohlc:
self._ohlc[1] = pstr
self._ohlc[2] = pstr
self._ohlc[3] = pstr
else:
self.high = max(self.high, price)
self.low = min(self.low, price)
if self._ohlc:
self._ohlc[1] = str(self.high)
self._ohlc[2] = str(self.low)
self.close = price
if self._ohlc:
self._ohlc[3] = pstr
self._json = None
start: datetime
open: Optional[dec]
high: Optional[dec]
low: Optional[dec]
close: dec
@property
def ohlc(self) -> OHLC:
return [
timestamp(self.start),
None if self.open is None else str(self.open),
None if self.high is None else str(self.high),
None if self.low is None else str(self.low),
str(self.close)
]
if self._ohlc is None:
self._ohlc = [
timestamp(self.start),
None if self.open is None else str(self.open),
None if self.high is None else str(self.high),
None if self.low is None else str(self.low),
str(self.close)
]
return self._ohlc
@property
def json(self) -> str:
if self._json is None:
self._json = json.dumps(self.ohlc)
return self._json
def period_name(period: timedelta) -> str:
@@ -79,13 +114,13 @@ def ohlc_start_time(time, period: timedelta):
return OHLC_DATE_ROOT + timedelta(seconds=period_sec * period_count)
def update_ohlc(prev: OHLC, period: timedelta, time: datetime, price: Optional[dec]) -> list[OHLC]:
def update_ohlc(prev: NativeOHLC, period: timedelta, time: datetime, price: Optional[dec]) -> list[NativeOHLC]:
"""
returns an ordered list of OHLC's that have been created/modified by the new time/price
if price is None, then bars are advanced based on the time but no new price is added to the series.
"""
# log.debug(f'\tupdating {prev} with {minutely(time)} {price}')
cur = NativeOHLC.from_ohlc(prev)
cur = prev
assert time >= cur.start
result = []
# advance time and finalize any past OHLC's into the result array
@@ -93,20 +128,13 @@ def update_ohlc(prev: OHLC, period: timedelta, time: datetime, price: Optional[d
end = cur.start + period
if time < end:
break
result.append(cur.ohlc)
result.append(cur)
cur = NativeOHLC(end, None, None, None, cur.close)
# log.debug(f'\ttime advancements: {result}')
# if we are setting a price, update the current bar
if price is not None:
if cur.open is None:
cur.open = price
cur.high = price
cur.low = price
else:
cur.high = max(cur.high, price)
cur.low = min(cur.low, price)
cur.close = price
result.append(cur.ohlc)
cur.update(price)
result.append(cur)
# log.debug(f'\tappended current bar: {cur.ohlc}')
# log.debug(f'\tupdate result: {result}')
return result
@@ -116,13 +144,85 @@ class OHLCKey (NamedTuple):
period: timedelta
def chunk_path(symbol: str, period: timedelta, time: datetime, *, chain_id: int = None) -> str:
if chain_id is None:
chain_id = current_chain.get().chain_id
start = ohlc_start_time(time, period)
name = period_name(period)
return f'{chain_id}/{symbol}/{name}/' + (
f'{start.year}/{symbol}-{name}-{start:%Y%m%d}.json' if period < timedelta(hours=1) else # <1H data has a file per day
f'{start.year}/{symbol}-{name}-{start:%Y%m}.json' if period < timedelta(days=7) else # <1W data has a file per month
f'{symbol}-{name}.json' # long periods are a single file for all of history
)
class Chunk:
""" Chunks map to files of OHLC's on disk """
def __init__(self, repo_dir: str, symbol: str, period: timedelta, time: datetime,
*, bars: Optional[list[NativeOHLC]] = None, chain_id: int = None):
self.repo_dir = repo_dir
self.symbol = symbol
self.period = period
self.chain_id = chain_id if chain_id is not None else current_chain.get().chain_id
self.path = chunk_path(symbol, period, time, chain_id=chain_id)
self.fullpath = os.path.join(repo_dir, self.path)
if bars is not None:
self.bars = bars
else:
try:
with open(self.fullpath, 'r') as file:
bars = json.load(file)
self.bars = [NativeOHLC.from_ohlc(ohlc) for ohlc in bars]
except FileNotFoundError:
self.bars = []
def update(self, native: NativeOHLC):
if not self.bars:
self.bars = [native]
return
start = self.bars[0].start
index = (native.start - start) // self.period
# log.debug(f'save {symbol} {ohlc_name(period)} chunk {start} index {index} <= {len(chunk)}')
if index > len(self.bars):
log.error(f'chunk gap: {index} > {len(self.bars)} {self.symbol} {period_name(self.period)} {native}' + ''.join(
f'\n\t{c}' for c in self.bars))
exit(1)
if index == len(self.bars):
assert self.bars[-1].start + self.period == native.start
self.bars.append(native)
else:
assert self.bars[index].start == native.start
self.bars[index] = native
def save(self):
for _ in range(2):
try:
with open(self.fullpath, 'w') as file:
json.dump([n.ohlc for n in self.bars], file)
return
except FileNotFoundError:
os.makedirs(os.path.dirname(self.fullpath), exist_ok=True)
raise IOError(f'Could not write chunk {self.fullpath}')
def __eq__(self, other):
return self.path == other.path
def __hash__(self):
return hash(self.path)
class OHLCRepository:
def __init__(self, base_dir: str = None):
""" can't actually make more than one of these because there's a global recent_ohlcs BlockDict """
if base_dir is None:
base_dir = config.ohlc_dir
self.dir = base_dir
self.cache = LFUCache(len(OHLC_PERIODS) * 128) # enough for the top 128 pools
self.cache = LFUCache(len(OHLC_PERIODS) * 1024)
self.dirty_chunks = set()
@staticmethod
def add_symbol(symbol: str, period: timedelta = None):
@@ -139,28 +239,30 @@ class OHLCRepository:
self.update(symbol, period, time, price, create=create)
@staticmethod
def update(symbol: str, period: timedelta, time: datetime, price: Optional[dec] = None, *, create: bool = True) -> Optional[list[OHLC]]:
def update(symbol: str, period: timedelta, time: datetime, price: Optional[dec] = None, *, create: bool = True) \
-> Optional[list[NativeOHLC]]:
"""
if price is None, then bars are advanced based on the time but no new price is added to the series.
"""
logname = f'{symbol} {period_name(period)}'
# log.debug(f'Updating OHLC {logname} {minutely(time)} {price}')
key = (symbol, period)
# bars is a list of "recent" OHLC's stored as blockdata. we try to keep the recent array long enough to extend before the root block time
historical: Optional[list[OHLC]] = recent_ohlcs.get(key)
key = symbol, period
# recent_ohlcs holds a list of "recent" NativeOHLC's stored as blockdata. we try to keep the recent array long
# enough to extend prior the root block time
historical: Optional[list[NativeOHLC]] = recent_ohlcs.get(key)
# log.debug(f'got recent {historical}')
if not historical:
if create is False or price is None:
return # do not track symbols which have not been explicity set up
p = str(price)
historical = []
updated = [OHLC((timestamp(ohlc_start_time(time, period)), p, p, p, p))]
updated = [NativeOHLC(ohlc_start_time(time, period), price, price, price, price)]
# log.debug(f'\tcreated new bars {updated}')
else:
updated = update_ohlc(historical[-1], period, time, price)
# drop any historical bars that are older than we need
oldest_needed = from_timestamp(current_blockstate.get().root_block.timestamp) - period # cover the root block time plus one period prior
trim = (oldest_needed - from_timestamp(historical[0][0])) // period
# oldest_needed = cover the root block time plus one period prior
oldest_needed = from_timestamp(current_blockstate.get().root_block.timestamp) - period
trim = (oldest_needed - historical[0].start) // period
if trim > 0:
historical = historical[trim:]
@@ -168,118 +270,89 @@ class OHLCRepository:
if not historical or not updated:
updated = historical + updated
else:
last_bar = from_timestamp(historical[-1][0])
first_updated = from_timestamp(updated[0][0])
last_bar = historical[-1].start
first_updated = updated[0].start
overlap = (first_updated - last_bar) // period + 1
updated = historical[:-overlap] + updated if overlap > 0 else historical + updated
# log.debug(f'\tnew recents: {updated}')
recent_ohlcs.setitem(key, updated)
return updated
def save_all(self, symbol: str, period: timedelta, ohlc_list: list[OHLC]) -> None:
def save_all(self, symbol: str, period: timedelta, ohlc_list: list[NativeOHLC]) -> None:
""" saves all OHLC's in the list """
for ohlc in ohlc_list:
self.save(symbol, period, ohlc) # we need to act synchronously so we don't have conflicting access to chunks
self.save(symbol, period, ohlc)
def save(self, symbol: str, period: timedelta, ohlc: OHLC) -> None:
def save(self, symbol: str, period: timedelta, native: NativeOHLC) -> None:
"""
pushes the native OHLC to a Chunk but DOES NOT WRITE TO DISK YET. you must call flush() to write all
dirty chunks to disk after a batch of OHLC's has been saved.
"""
# log.debug(f'saving ohlc {symbol} {period_name(period)} {ohlc}')
time = dt(ohlc[0])
chunk = self.get_chunk(symbol, period, time)
chunk = self.get_chunk(symbol, period, native.start)
if not chunk:
chunk = [ohlc]
chunk = self.cache[chunk.path] = Chunk(self.dir, symbol, period, native.start, bars=[native])
else:
start = from_timestamp(chunk[0][0])
index = (time - start) // period
# log.debug(f'save {symbol} {ohlc_name(period)} chunk {start} index {index} <= {len(chunk)}')
if index > len(chunk):
log.error(f'chunk gap: {index} > {len(chunk)} {symbol} {period_name(period)} {ohlc}'+''.join(f'\n\t{c}' for c in chunk))
exit(1)
if index == len(chunk):
assert from_timestamp(chunk[-1][0]) + period == time
chunk.append(ohlc)
else:
assert from_timestamp(chunk[index][0]) == time
chunk[index] = ohlc
self.save_chunk(symbol, period, chunk)
chunk.update(native)
self.dirty_chunks.add(chunk)
def get_chunk(self, symbol: str, period: timedelta, start_time: datetime) -> list[OHLC]:
path = self.chunk_path(symbol, period, start_time)
found = self.cache.get(path)
def get_chunk(self, symbol: str, period: timedelta, time: datetime) -> Chunk:
start_time = ohlc_start_time(time, period)
chain_id = current_chain.get().chain_id
key = chunk_path(symbol, period, time, chain_id=chain_id)
found = self.cache.get(key)
if found is None:
found = self.load_chunk(path)
if found is None:
found = []
self.cache[path] = found
found = self.cache[key] = Chunk(self.dir, symbol, period, start_time, chain_id=chain_id)
return found
@staticmethod
def load_chunk(path: str) -> Optional[list[OHLC]]:
try:
with open(path, 'r') as file:
return json.load(file)
except FileNotFoundError:
return []
except:
log.error(f'exception loading chunk {path}')
raise
def save_chunk(self, symbol: str, period: timedelta, chunk: list[OHLC]):
if not chunk:
return
path = self.chunk_path(symbol, period, from_timestamp(chunk[0][0]))
for _ in range(2):
try:
with open(path, 'w') as file:
json.dump(chunk, file)
self.cache[path] = chunk
return
except FileNotFoundError:
os.makedirs(os.path.dirname(path), exist_ok=True)
raise IOError(f'Could not write chunk {path}')
def flush(self) -> None:
for chunk in self.dirty_chunks:
chunk.save()
self.dirty_chunks.clear()
def chunk_path(self, symbol: str, period: timedelta, time: datetime, *, chain_id: int = None) -> str:
if chain_id is None:
chain_id = current_chain.get().chain_id
start = ohlc_start_time(time, period)
name = period_name(period)
return f'{self.dir}/{chain_id}/{symbol}/{name}/' + (
f'{start.year}/{symbol}-{name}-{start:%Y%m%d}.json' if period < timedelta(hours=1) else # <1H data has a file per day
f'{start.year}/{symbol}-{name}-{start:%Y%m}.json' if period < timedelta(days=7) else # <1W data has a file per month
f'{symbol}-{name}.json' # long periods are a single file for all of history
)
def pub_ohlc(_series:str, key: OHLCKey, bars: list[OHLC]):
def pub_ohlc(_series:str, key: OHLCKey, bars: list[NativeOHLC]):
pool_addr, period = key
chain_id = current_chain.get().chain_id
return (
f'{chain_id}|{pool_addr}|{period_name(period)}', # channel name is like 0x...|1m
'ohlcs',
(chain_id, pool_addr, bars)
(chain_id, pool_addr, [b.ohlc for b in bars])
)
def ohlc_key_to_str(k):
return f'{k[0]}|{period_name(k[1])}'
def ohlc_str_to_key(s):
pool, period_name = s.split('|')
return pool, period_from_name(period_name)
pool, period = s.split('|')
return pool, period_from_name(period)
def ohlc_save(_block: Block, diffs: Reversible[Union[DiffItem, DiffEntryItem]]):
"""
used as a finalization callback from BlockState data.
"""
dirty = False
for diff in diffs:
if diff.series == 'ohlc':
symbol, period = diff.key
ohlcs.save_all(symbol, period, diff.value)
dirty = True
if dirty:
ohlcs.flush()
def ohlc_value_to_string(bars: list[NativeOHLC]):
return '['+','.join(b.json for b in bars)+']'
def ohlc_value_from_string(s: str):
return [NativeOHLC.from_ohlc(ohlc) for ohlc in json.loads(s)]
# The most recent OHLC's are stored as block data. We store a list of at least the two latest bars, which provides clients with
# the latest finalized bar as well as the current open bar.
recent_ohlcs: BlockDict[OHLCKey, list[OHLC]] = BlockDict('ohlc', db=True, redis=True, pub=pub_ohlc,
key2str=ohlc_key_to_str, str2key=ohlc_str_to_key,
series2key=lambda x:x, series2str=lambda x:x)
recent_ohlcs: BlockDict[OHLCKey, list[NativeOHLC]] = BlockDict(
'ohlc', db=True, redis=True, pub=pub_ohlc,
key2str=ohlc_key_to_str, str2key=ohlc_str_to_key, series2key=lambda x: x, series2str=lambda x: x,
value2str=ohlc_value_to_string, str2value=ohlc_value_from_string
)
ohlcs = OHLCRepository()

View File

@@ -73,7 +73,7 @@ async def uniswap_price(pool: PoolDict, sqrt_price=None) -> dec:
pool_dec = pool['decimals']
price = dec(sqrt_price*sqrt_price) / 2 ** (96 * 2)
result = price * dec(10) ** dec(pool_dec)
log.debug(f'pool sqrtX96 {sqrt_price} with {pool_dec} decimals = {result}')
# log.debug(f'pool sqrtX96 {sqrt_price} with {pool_dec} decimals = {result}')
return result