data pipeline refactor and fix

This commit is contained in:
2026-04-13 18:30:04 -04:00
parent 6418729b16
commit 326bf80846
96 changed files with 7107 additions and 1763 deletions

View File

@@ -11,6 +11,7 @@ Brings together:
import asyncio
import contextlib
import json
import logging
import os
import signal
@@ -32,7 +33,7 @@ from starlette.routing import Route, Mount
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
from dexorder.api import set_api, API
from dexorder.conda_manager import sync_packages, install_packages
from dexorder.conda_manager import sync_packages, install_packages, cleanup_extra_packages
from dexorder.events import EventType, UserEvent, DeliverySpec
from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl
@@ -41,6 +42,8 @@ from dexorder.tools.workspace_tools import get_workspace_store
from dexorder.tools.evaluate_indicator import evaluate_indicator
from dexorder.tools.backtest_strategy import backtest_strategy
from dexorder.tools.activate_strategy import activate_strategy, deactivate_strategy, list_active_strategies
from dexorder.strategy.event_bridge import StrategyEventBridge
from dexorder.strategy.lifecycle import get_strategy_lifecycle
# =============================================================================
# Global Data Directory
@@ -59,19 +62,34 @@ def get_data_dir() -> Path:
# =============================================================================
# Indicator Types Helpers
# Category Types Helpers
# =============================================================================
def _build_indicator_type_entry(meta: dict) -> dict:
"""Build an indicator_types workspace entry from indicator metadata dict."""
def _type_store_name(category: str) -> str:
return f"{category}_types"
def _type_store_key(category: str, name: str) -> str:
sanitized = sanitize_name(name).lower()
return f"custom_{sanitized}" if category == "indicator" else sanitized
def _build_type_entry(category: str, meta: dict) -> dict:
"""Build a {category}_types workspace entry from a metadata dict."""
name = meta.get('name', '')
pandas_ta_name = f"custom_{sanitize_name(name).lower()}"
key = _type_store_key(category, name)
now = int(time.time())
return {
'pandas_ta_name': pandas_ta_name,
entry = {
'key': key,
'display_name': name,
'description': meta.get('description', ''),
'metadata': {
'metadata': {},
'created_at': now,
'modified_at': now,
}
if category == "indicator":
entry['pandas_ta_name'] = key
entry['metadata'] = {
'display_name': name,
'parameters': meta.get('parameters') or {},
'input_series': meta.get('input_series') or ['close'],
@@ -79,31 +97,89 @@ def _build_indicator_type_entry(meta: dict) -> dict:
'pane': meta.get('pane', 'separate'),
'filled_areas': meta.get('filled_areas') or [],
'bands': meta.get('bands') or [],
},
'created_at': now,
'modified_at': now,
}
}
elif category == "strategy":
entry['metadata'] = {
'data_feeds': meta.get('data_feeds') or [],
'parameters': meta.get('parameters') or {},
}
# research: metadata stays empty (no fields beyond base)
return entry
def _upsert_indicator_type(workspace_store, category_manager, name: str) -> None:
"""Read indicator metadata from disk and upsert into indicator_types workspace store."""
read_result = category_manager.read('indicator', name)
def _upsert_type(workspace_store, category_manager, category: str, name: str) -> None:
"""Read category metadata from disk and upsert into the {category}_types workspace store."""
read_result = category_manager.read(category, name)
if not read_result.get('exists') or not read_result.get('metadata'):
return
meta = read_result['metadata']
entry = _build_indicator_type_entry(meta)
pandas_ta_name = entry['pandas_ta_name']
entry = _build_type_entry(category, read_result['metadata'])
key = entry['key']
store = _type_store_name(category)
# Preserve original created_at if already present
existing = workspace_store.read('indicator_types')
existing = workspace_store.read(store)
existing_types = (existing.get('data') or {}).get('types') or {}
if pandas_ta_name in existing_types:
entry['created_at'] = existing_types[pandas_ta_name].get('created_at', entry['created_at'])
if key in existing_types:
entry['created_at'] = existing_types[key].get('created_at', entry['created_at'])
workspace_store.patch('indicator_types', [
{'op': 'add', 'path': f'/types/{pandas_ta_name}', 'value': entry}
])
logging.info(f"Upserted indicator_types/{pandas_ta_name} for '{name}'")
workspace_store.patch(store, [{'op': 'add', 'path': f'/types/{key}', 'value': entry}])
logging.info(f"Upserted {store}/{key} for '{name}'")
def _remove_type(workspace_store, category: str, name: str) -> None:
"""Remove a category item from the {category}_types workspace store."""
key = _type_store_key(category, name)
store = _type_store_name(category)
try:
workspace_store.patch(store, [{'op': 'remove', 'path': f'/types/{key}'}])
logging.info(f"Removed {store}/{key} for '{name}'")
except Exception:
pass # entry may not exist; that's fine
if category == "indicator":
_remove_indicator_instances(workspace_store, key)
def _remove_indicator_instances(workspace_store, pandas_ta_name: str) -> None:
"""Remove all instances of a custom indicator from the indicators workspace store."""
existing = workspace_store.read('indicators')
instances = (existing.get('data') or {}).get('indicators') or {}
to_remove = [inst_id for inst_id, inst in instances.items()
if inst.get('pandas_ta_name') == pandas_ta_name]
if not to_remove:
return
patches = [{'op': 'remove', 'path': f'/indicators/{inst_id}'} for inst_id in to_remove]
try:
workspace_store.patch('indicators', patches)
logging.info(f"Removed {len(to_remove)} instance(s) of {pandas_ta_name} from indicators store")
except Exception:
logging.warning(f"Failed to remove indicator instances for {pandas_ta_name}", exc_info=True)
def _populate_types_from_disk(workspace_store, category_manager, category: str) -> None:
"""Scan existing category items and add any missing entries to the {category}_types store."""
store = _type_store_name(category)
existing = workspace_store.read(store)
existing_types = (existing.get('data') or {}).get('types') or {}
items = category_manager.list_items(category).get('items', [])
added = 0
for item in items:
item_name = item.get('name', '')
if not item_name:
continue
key = _type_store_key(category, item_name)
if key not in existing_types:
_upsert_type(workspace_store, category_manager, category, item_name)
added += 1
if added > 0:
logging.info(f"Populated {added} {category} type(s) from disk into {store}")
def _get_env_yml() -> Optional[Path]:
"""Return the path to environment.yml if it exists alongside main.py."""
p = Path(__file__).parent / "environment.yml"
return p if p.exists() else None
def _populate_indicator_types_from_disk(workspace_store, category_manager) -> None:
@@ -226,8 +302,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
category_manager = get_category_manager(config.data_dir)
logging.info(f"Category manager initialized at {config.data_dir}")
# Populate indicator_types store from existing indicators on disk (migration/startup sync)
_populate_indicator_types_from_disk(workspace_store, category_manager)
# Populate {category}_types stores from existing items on disk (migration/startup sync)
for _cat in ("indicator", "strategy", "research"):
_populate_types_from_disk(workspace_store, category_manager, _cat)
@server.list_resources()
async def list_resources():
@@ -503,6 +580,25 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
"required": ["revision", "category", "name"]
}
),
Tool(
name="python_delete",
description="Delete a category script permanently. Commits removal to git history and removes any conda packages that are no longer needed.",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category of the script"
},
"name": {
"type": "string",
"description": "Display name of the item to delete"
}
},
"required": ["category", "name"]
}
),
Tool(
name="conda_sync",
description="Sync conda packages: scan all metadata, remove unused packages (excluding base environment)",
@@ -699,6 +795,77 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
"required": []
}
),
Tool(
name="get_backtest_results",
description=(
"Retrieve stored backtest results for a strategy. "
"Returns the most recent backtest runs with summary stats, "
"extended statistics, trade list, and equity curve."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"limit": {
"type": "integer",
"description": "Maximum number of backtest runs to return (default 5)",
"default": 5
}
},
"required": ["strategy_name"]
}
),
Tool(
name="get_strategy_trades",
description=(
"Retrieve the trade log for a strategy (live/paper or backtest). "
"Returns individual round-trip trades with entry/exit prices and PnL."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"limit": {
"type": "integer",
"description": "Maximum number of trades to return (default 100)",
"default": 100
}
},
"required": ["strategy_name"]
}
),
Tool(
name="get_strategy_events",
description=(
"Retrieve the event log for a strategy "
"(PnL updates, fills, errors, status changes)."
),
inputSchema={
"type": "object",
"properties": {
"strategy_name": {
"type": "string",
"description": "Display name of the strategy"
},
"event_type": {
"type": "string",
"description": "Filter by event type (optional): PNL_UPDATE, ORDER_FILLED, ERROR, etc."
},
"limit": {
"type": "integer",
"description": "Maximum number of events to return (default 50)",
"default": 50
}
},
"required": ["strategy_name"]
}
),
]
@@ -734,7 +901,11 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
metadata=arguments.get("metadata")
)
content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"]
meta_parts = [f"success: {result['success']}"]
if result.get('path'):
meta_parts.append(f"path: {result['path']}")
if result.get('error'):
meta_parts.append(f"error: {result['error']}")
if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"):
@@ -747,8 +918,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
logging.info(f"python_write '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else:
logging.info(f"python_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
if result.get("success") and arguments.get("category") == "indicator":
_upsert_indicator_type(workspace_store, category_manager, arguments.get("name", ""))
if result.get("success"):
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
cleanup_extra_packages(get_data_dir(), _get_env_yml())
return content
elif name == "python_edit":
result = category_manager.edit(
@@ -760,7 +932,11 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
metadata=arguments.get("metadata")
)
content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"]
meta_parts = [f"success: {result['success']}"]
if result.get('path'):
meta_parts.append(f"path: {result['path']}")
if result.get('error'):
meta_parts.append(f"error: {result['error']}")
if result.get("revision"):
meta_parts.append(f"revision: {result['revision']}")
if result.get("validation") and not result["validation"].get("success"):
@@ -773,8 +949,9 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
logging.info(f"python_edit '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else:
logging.info(f"python_edit '{arguments.get('name')}': no execution result")
if result.get("success") and arguments.get("category") == "indicator":
_upsert_indicator_type(workspace_store, category_manager, arguments.get("name", ""))
if result.get("success"):
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
cleanup_extra_packages(get_data_dir(), _get_env_yml())
return content
elif name == "python_read":
return category_manager.read(
@@ -808,13 +985,28 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
meta_parts.append(f"error: {result['error']}")
if result.get("validation") and not result["validation"].get("success"):
meta_parts.append(f"validation errors: {result['validation'].get('errors', [])}")
if result.get("success"):
_upsert_type(workspace_store, category_manager, arguments.get("category", ""), arguments.get("name", ""))
return [TextContent(type="text", text="\n".join(meta_parts))]
elif name == "python_delete":
result = category_manager.delete(
category=arguments.get("category", ""),
name=arguments.get("name", "")
)
if result.get("success"):
_remove_type(workspace_store, arguments.get("category", ""), arguments.get("name", ""))
cleanup_result = cleanup_extra_packages(get_data_dir(), _get_env_yml())
if cleanup_result.get("removed"):
result["packages_removed"] = cleanup_result["removed"]
parts = [f"success: {result['success']}"]
for k in ("category", "name", "revision", "packages_removed", "error"):
if result.get(k):
parts.append(f"{k}: {result[k]}")
return [TextContent(type="text", text="\n".join(parts))]
elif name == "conda_sync":
# Get environment.yml path relative to main.py
env_yml = Path(__file__).parent / "environment.yml"
return sync_packages(
data_dir=get_data_dir(),
environment_yml=env_yml if env_yml.exists() else None
environment_yml=_get_env_yml()
)
elif name == "conda_install":
return install_packages(arguments.get("packages", []))
@@ -837,7 +1029,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
parameters=arguments.get("parameters") or {},
)
elif name == "backtest_strategy":
return await backtest_strategy(
result = await backtest_strategy(
strategy_name=arguments.get("strategy_name", ""),
feeds=arguments.get("feeds", []),
from_time=arguments.get("from_time"),
@@ -845,6 +1037,26 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
initial_capital=float(arguments.get("initial_capital", 10_000.0)),
paper=bool(arguments.get("paper", True)),
)
# Persist backtest to DB (non-fatal)
try:
payload = json.loads(result[0].text) if result and isinstance(result[0], TextContent) else {}
if payload and "summary" in payload:
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
await db.insert_backtest(
strategy_name=arguments.get("strategy_name", ""),
from_time=arguments.get("from_time"),
to_time=arguments.get("to_time"),
initial_capital=float(arguments.get("initial_capital", 10_000.0)),
feeds=arguments.get("feeds", []),
summary=payload.get("summary", {}),
statistics=payload.get("statistics", {}),
trades=payload.get("trades", []),
equity_curve=payload.get("equity_curve", []),
)
except Exception as _e:
logging.debug("Failed to persist backtest results: %s", _e)
return result
elif name == "activate_strategy":
return await activate_strategy(
strategy_name=arguments.get("strategy_name", ""),
@@ -858,6 +1070,31 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server
)
elif name == "list_active_strategies":
return await list_active_strategies()
elif name == "get_backtest_results":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
results = await db.get_backtests(
strategy_name=arguments.get("strategy_name", ""),
limit=int(arguments.get("limit", 5)),
)
return [TextContent(type="text", text=json.dumps({"backtest_runs": results}))]
elif name == "get_strategy_trades":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
trades = await db.get_trades(
strategy_name=arguments.get("strategy_name", ""),
limit=int(arguments.get("limit", 100)),
)
return [TextContent(type="text", text=json.dumps({"trades": trades}))]
elif name == "get_strategy_events":
from dexorder.strategy.db import get_strategy_db
db = get_strategy_db(get_data_dir())
events = await db.get_events(
strategy_name=arguments.get("strategy_name", ""),
event_type=arguments.get("event_type"),
limit=int(arguments.get("limit", 50)),
)
return [TextContent(type="text", text=json.dumps({"events": events}))]
else:
raise ValueError(f"Unknown tool: {name}")
@@ -909,6 +1146,7 @@ class UserContainer:
self.event_publisher: Optional[EventPublisher] = None
self.mcp_server: Optional[Server] = None
self.data_api: Optional[DataAPIImpl] = None
self.event_bridge: Optional[StrategyEventBridge] = None
self.running = False
async def start(self) -> None:
@@ -933,6 +1171,7 @@ class UserContainer:
s3_endpoint=s3_cfg.get("s3_endpoint") or secrets.get("s3_endpoint"),
s3_access_key=s3_cfg.get("s3_access_key") or secrets.get("s3_access_key"),
s3_secret_key=s3_cfg.get("s3_secret_key") or secrets.get("s3_secret_key"),
s3_region=s3_cfg.get("s3_region") or secrets.get("s3_region"),
)
await self.data_api.start()
set_api(API(charting=ChartingAPIImpl(), data=self.data_api))
@@ -965,6 +1204,23 @@ class UserContainer:
delivery=DeliverySpec.active_or_telegram(),
))
# Initialize strategy lifecycle manager (sets up DB + worktrees dir)
strategy_lifecycle = get_strategy_lifecycle(self.config.data_dir)
await strategy_lifecycle.initialize()
# Start strategy event bridge (PULL socket for subprocess events)
self.event_bridge = StrategyEventBridge(
event_publisher=self.event_publisher,
strategy_lifecycle=strategy_lifecycle,
)
await self.event_bridge.start()
strategy_lifecycle._bridge = self.event_bridge
strategy_lifecycle._lifecycle = get_lifecycle_manager()
logging.info("Strategy event bridge started")
# Resume any strategies that were running before container restart
await strategy_lifecycle.resume_running()
# Create MCP server
self.mcp_server = create_mcp_server(self.config, self.event_publisher)
@@ -998,6 +1254,20 @@ class UserContainer:
delivery=DeliverySpec.active_or_telegram(),
))
# Stop running strategies gracefully
try:
from dexorder.strategy.lifecycle import get_strategy_lifecycle
strategy_lifecycle = get_strategy_lifecycle()
await strategy_lifecycle.shutdown()
logging.info("Strategy lifecycle manager stopped")
except Exception as e:
logging.warning("Error stopping strategy lifecycle: %s", e)
# Stop event bridge
if self.event_bridge:
await self.event_bridge.stop()
logging.info("Strategy event bridge stopped")
# Stop subsystems
if self.data_api:
await self.data_api.stop()