bugfixes; research subproc; higher sandbox limits

This commit is contained in:
2026-04-16 18:11:26 -04:00
parent f80c943dc3
commit 3153e89d4f
54 changed files with 1947 additions and 498 deletions

View File

@@ -510,3 +510,44 @@ def sync_packages(data_dir: Path, environment_yml: Optional[Path] = None) -> dic
log.info(f"Conda package sync complete: {len(result['removed'])} packages removed")
return result
# =============================================================================
# Async wrappers — non-blocking equivalents for use from asyncio contexts
# =============================================================================
import asyncio as _asyncio
async def get_installed_packages_async() -> Set[str]:
"""Non-blocking wrapper around get_installed_packages()."""
return await _asyncio.to_thread(get_installed_packages)
async def install_packages_async(
packages: list[str],
data_dir: Optional[Path] = None,
) -> dict:
"""Non-blocking wrapper around install_packages()."""
return await _asyncio.to_thread(install_packages, packages, data_dir)
async def remove_packages_async(packages: list[str]) -> dict:
"""Non-blocking wrapper around remove_packages()."""
return await _asyncio.to_thread(remove_packages, packages)
async def cleanup_extra_packages_async(
data_dir: Path,
environment_yml: Optional[Path] = None,
) -> dict:
"""Non-blocking wrapper around cleanup_extra_packages()."""
return await _asyncio.to_thread(cleanup_extra_packages, data_dir, environment_yml)
async def sync_packages_async(
data_dir: Path,
environment_yml: Optional[Path] = None,
) -> dict:
"""Non-blocking wrapper around sync_packages()."""
return await _asyncio.to_thread(sync_packages, data_dir, environment_yml)

View File

@@ -0,0 +1,54 @@
"""
Thread-safe asyncio.run() for the sandbox.
Installs a global replacement for asyncio.run() that, when called from a
non-async thread while uvicorn's event loop is running, dispatches the
coroutine to that loop via run_coroutine_threadsafe(). The calling thread
blocks on future.result() — releasing the GIL — so uvicorn's loop runs
freely (health checks, MCP requests, etc.).
Usage:
from dexorder.event_loop import install_thread_safe_asyncio_run
install_thread_safe_asyncio_run(asyncio.get_running_loop()) # call once at startup
"""
import asyncio
import logging
log = logging.getLogger(__name__)
_main_loop: asyncio.AbstractEventLoop | None = None
_original_asyncio_run = asyncio.run
def install_thread_safe_asyncio_run(loop: asyncio.AbstractEventLoop) -> None:
"""
Patch asyncio.run globally to cooperate with uvicorn's event loop.
Call once from the lifespan startup (main thread, loop already running).
"""
global _main_loop
_main_loop = loop
def _thread_safe_run(coro, *, debug=None):
# Detect if we're in a thread (no running loop in this thread)
try:
asyncio.get_running_loop()
# We're already inside an async context — asyncio.run() is not
# valid here regardless; let it raise the normal error.
raise RuntimeError(
"asyncio.run() cannot be called when another event loop is running "
"in the same thread."
)
except RuntimeError as exc:
if "cannot be called" in str(exc):
raise
# No running loop in this thread — safe to dispatch to main loop.
if _main_loop is not None and _main_loop.is_running():
log.debug("asyncio.run() from thread → run_coroutine_threadsafe")
return asyncio.run_coroutine_threadsafe(coro, _main_loop).result()
# Fallback: main loop not available (e.g., called before startup or in tests)
return _original_asyncio_run(coro, debug=debug)
asyncio.run = _thread_safe_run
log.info("Installed thread-safe asyncio.run()")

View File

@@ -5,6 +5,8 @@ Tickers use Nautilus format: "BTC/USDT.BINANCE"
All timestamps are nanoseconds since epoch.
"""
import tracemalloc
from pathlib import Path
from typing import Optional, List, Tuple
import pandas as pd
import logging
@@ -19,6 +21,19 @@ from pyiceberg.expressions import (
log = logging.getLogger(__name__)
def _rss_mb() -> str:
"""Return current VmRSS and VmPeak from /proc/self/status as a short string."""
try:
info = {}
for line in Path("/proc/self/status").read_text().splitlines():
for key in ("VmRSS", "VmPeak", "VmSize"):
if line.startswith(f"{key}:"):
info[key] = int(line.split()[1]) // 1024 # kB → MB
return f"RSS={info.get('VmRSS','?')}MB peak={info.get('VmPeak','?')}MB virt={info.get('VmSize','?')}MB"
except Exception:
return "?"
class IcebergClient:
"""
Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1).
@@ -114,8 +129,21 @@ class IcebergClient:
if fetch_columns is not None:
scan = scan.select(*fetch_columns)
if not tracemalloc.is_tracing():
tracemalloc.start()
tm_before = tracemalloc.take_snapshot()
log.info("MEM before scan.to_pandas(): %s", _rss_mb())
df = scan.to_pandas()
log.info("MEM after scan.to_pandas(): %s | rows=%d cols=%s mem=%dMB",
_rss_mb(), len(df), list(df.columns),
df.memory_usage(deep=True).sum() // (1024 * 1024))
tm_after = tracemalloc.take_snapshot()
top = tm_after.compare_to(tm_before, "lineno")
for stat in top[:5]:
log.info("TRACEMALLOC: %s", stat)
if not df.empty:
# Deduplicate: keep the most-recently-ingested row per timestamp.
if "ingested_at" in df.columns:
@@ -123,6 +151,7 @@ class IcebergClient:
df.sort_values("ingested_at", ascending=False)
.drop_duplicates(subset=["timestamp"])
)
log.info("MEM after dedup: %s | rows=%d", _rss_mb(), len(df))
# Drop ingested_at if the caller did not ask for it
if columns is not None and "ingested_at" not in columns and "ingested_at" in df.columns:
df = df.drop(columns=["ingested_at"])

View File

@@ -0,0 +1,85 @@
"""
Memory guard for sandbox containers.
Sets a soft RLIMIT_AS limit derived from the cgroup memory limit at a
configurable fraction, so Python raises MemoryError before the kernel's
OOM killer fires. The MCP session survives; only the tool call fails.
"""
import gc
import logging
import resource
from pathlib import Path
log = logging.getLogger(__name__)
def _read_cgroup_limit_bytes() -> int | None:
"""Read container memory.max from cgroup v2. Returns bytes or None."""
try:
val = Path("/sys/fs/cgroup/memory.max").read_text().strip()
if val == "max":
return None
return int(val)
except Exception:
return None
def setup_memory_limit(fraction: float) -> None:
"""
Set RLIMIT_AS soft limit to baseline VmSize + allowed growth.
RLIMIT_AS caps total virtual address space, which includes shared libraries
and memory-mapped files that don't consume physical RAM. The baseline VmSize
at startup can be 3+ GB even when RSS is only ~200 MB. Setting the limit to
a flat cgroup fraction would crash immediately.
Instead: limit = current VmSize + (cgroup_limit * fraction)
This allows `fraction` worth of new allocations (numpy arrays, pandas
dataframes, etc.) above the startup baseline before raising MemoryError.
Args:
fraction: Proportion of cgroup memory.max to allow as new growth, e.g. 0.85.
"""
cgroup_bytes = _read_cgroup_limit_bytes()
# Read baseline VmSize (total virtual address space at startup)
vmsize_bytes: int | None = None
try:
for line in Path("/proc/self/status").read_text().splitlines():
if line.startswith("VmSize:"):
vmsize_bytes = int(line.split()[1]) * 1024 # kB → bytes
log.info("Memory baseline: %s", line.strip())
elif line.startswith("VmRSS:"):
log.info("Memory baseline: %s", line.strip())
except Exception:
pass
if cgroup_bytes is None:
log.warning("cgroup memory.max is unlimited; RLIMIT_AS not set")
return
allowed_growth_bytes = int(cgroup_bytes * fraction)
baseline = vmsize_bytes or 0
limit_bytes = baseline + allowed_growth_bytes
_, hard = resource.getrlimit(resource.RLIMIT_AS)
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, hard))
log.info(
"RLIMIT_AS soft limit set to %d MB (baseline %d MB + allowed growth %d MB, %.0f%% of cgroup %d MB)",
limit_bytes // (1024 * 1024),
baseline // (1024 * 1024),
allowed_growth_bytes // (1024 * 1024),
fraction * 100,
cgroup_bytes // (1024 * 1024),
)
def cleanup_memory() -> None:
"""
Called after a MemoryError is caught in a tool execution thread.
Runs gc.collect() to free objects held by the failed script.
Hook here for future recovery strategies (cache eviction, etc.).
"""
log.warning("MemoryError in tool thread — running gc.collect()")
gc.collect()

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python3
"""
backtest_harness — runs a strategy backtest as a subprocess.
Reads a JSON config from stdin:
{
"strategy_name": str,
"feeds": [{"symbol": str, "period_seconds": int}, ...],
"from_time": ...,
"to_time": ...,
"initial_capital": float,
"paper": bool
}
Outputs JSON to stdout on success:
{
"strategy_name": str,
"feeds": [...],
"initial_capital": float,
"paper": bool,
"total_candles": int,
... (metrics from run_backtest)
}
On error:
{"error": str}
"""
import asyncio
import json
import os
import sys
import traceback
from pathlib import Path
# Ensure dexorder package is importable when run as a subprocess
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
_OHLC_EXTRA_COLUMNS = [
"volume", "buy_vol", "sell_vol",
"open_time", "high_time", "low_time", "close_time",
"open_interest",
]
async def _run(cfg: dict) -> dict:
strategy_name = cfg["strategy_name"]
feeds = cfg["feeds"]
from_time = cfg.get("from_time")
to_time = cfg.get("to_time")
initial_capital = float(cfg.get("initial_capital", 10_000.0))
paper = bool(cfg.get("paper", True))
# -------------------------------------------------------------------------
# Initialize API
# -------------------------------------------------------------------------
try:
import yaml
config_path = os.environ.get("CONFIG_PATH", "/app/config/config.yaml")
secrets_path = os.environ.get("SECRETS_PATH", "/app/config/secrets.yaml")
config_data = {}
secrets_data = {}
if Path(config_path).exists():
with open(config_path) as f:
config_data = yaml.safe_load(f) or {}
if Path(secrets_path).exists():
with open(secrets_path) as f:
secrets_data = yaml.safe_load(f) or {}
data_cfg = config_data.get("data", {})
iceberg_cfg = data_cfg.get("iceberg", {})
relay_cfg = data_cfg.get("relay", {})
from dexorder.api import set_api, API
from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl
data_api = DataAPIImpl(
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
namespace=iceberg_cfg.get("namespace", "trading"),
s3_endpoint=iceberg_cfg.get("s3_endpoint") or secrets_data.get("s3_endpoint"),
s3_access_key=iceberg_cfg.get("s3_access_key") or secrets_data.get("s3_access_key"),
s3_secret_key=iceberg_cfg.get("s3_secret_key") or secrets_data.get("s3_secret_key"),
s3_region=iceberg_cfg.get("s3_region") or secrets_data.get("s3_region"),
request_timeout=240.0,
)
set_api(API(charting=ChartingAPIImpl(), data=data_api))
except Exception as e:
return {"error": f"API initialization failed: {e}"}
# -------------------------------------------------------------------------
# Locate strategy
# -------------------------------------------------------------------------
data_dir = Path(os.environ.get("DATA_DIR", "/app/data"))
try:
from dexorder.tools.python_tools import get_category_manager, sanitize_name
category_manager = get_category_manager(data_dir)
safe_name = sanitize_name(strategy_name)
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
if not impl_path.exists():
return {"error": f"Strategy '{strategy_name}' not found (looked at {impl_path})"}
except Exception as exc:
return {"error": f"Failed to locate strategy: {exc}"}
# -------------------------------------------------------------------------
# Register custom indicators and load strategy class
# -------------------------------------------------------------------------
try:
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
_setup_custom_indicators(category_manager.src_dir)
except Exception as exc:
sys.stderr.write(f"WARNING: custom indicator setup failed: {exc}\n")
try:
from dexorder.nautilus.backtest_runner import _load_strategy_class
strategy_class = _load_strategy_class(impl_path)
except Exception:
return {"error": f"Strategy load failed:\n{traceback.format_exc()}"}
# -------------------------------------------------------------------------
# Fetch OHLC data
# -------------------------------------------------------------------------
from dexorder.api import get_api
from dexorder.nautilus.pandas_strategy import make_feed_key
api = get_api()
parsed_feeds = [(f["symbol"], int(f["period_seconds"])) for f in feeds]
ohlc_dfs = {}
total_candles = 0
for ticker, period_seconds in parsed_feeds:
feed_key = make_feed_key(ticker, period_seconds)
try:
df = await api.data.historical_ohlc(
ticker=ticker,
period_seconds=period_seconds,
start_time=from_time,
end_time=to_time,
extra_columns=_OHLC_EXTRA_COLUMNS,
)
except Exception as exc:
return {"error": f"OHLC fetch failed for {feed_key}: {exc}"}
if df.empty:
return {"error": f"No OHLC data for {feed_key} in the requested range"}
ohlc_dfs[feed_key] = df
total_candles += len(df)
# -------------------------------------------------------------------------
# Run backtest (synchronous)
# -------------------------------------------------------------------------
try:
from dexorder.nautilus.backtest_runner import run_backtest
metrics = run_backtest(
strategy_class=strategy_class,
feeds=parsed_feeds,
ohlc_dfs=ohlc_dfs,
initial_capital=initial_capital,
paper=paper,
)
except Exception:
return {"error": f"Backtest failed:\n{traceback.format_exc()}"}
return {
"strategy_name": strategy_name,
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
"initial_capital": initial_capital,
"paper": paper,
"total_candles": total_candles,
**metrics,
}
def main():
cfg = json.loads(sys.stdin.read())
result = asyncio.run(_run(cfg))
print(json.dumps(result))
if __name__ == "__main__":
main()

View File

@@ -1,25 +1,21 @@
"""
backtest_strategy — run a PandasStrategy against historical OHLC data.
Called directly from the MCP server's async handle_tool_call.
Returns a JSON payload with backtest metrics and equity curve, following the
same pattern as evaluate_indicator.py.
Spawns backtest_harness.py as a subprocess so user strategy code is isolated
from the MCP server process. The harness handles API init, data fetch, and
the synchronous BacktestEngine internally.
"""
import asyncio
import json
import logging
import sys
from pathlib import Path
from typing import Any
log = logging.getLogger(__name__)
# All OHLC+ columns to request from the DataAPI
_OHLC_EXTRA_COLUMNS = [
"volume", "buy_vol", "sell_vol",
"open_time", "high_time", "low_time", "close_time",
"open_interest",
]
_BACKTEST_HARNESS = Path(__file__).parent / "backtest_harness.py"
async def backtest_strategy(
@@ -42,23 +38,8 @@ async def backtest_strategy(
paper: Always True for historical backtest (flag reserved for forward testing)
Returns:
list[TextContent] with JSON payload:
{
"strategy_name": str,
"feeds": [...],
"initial_capital": float,
"paper": bool,
"total_candles": int,
"total_return": float, # fractional (0.15 = +15%)
"sharpe_ratio": float,
"max_drawdown": float, # fractional (0.10 = 10% drawdown)
"win_rate": float,
"trade_count": int,
"equity_curve": [{"timestamp": int, "equity": float}, ...]
}
On error:
{"error": str}
list[TextContent] with JSON payload containing backtest metrics.
On error: [TextContent] with {"error": str}
"""
from mcp.types import TextContent
@@ -66,102 +47,52 @@ async def backtest_strategy(
log.error("backtest_strategy '%s': %s", strategy_name, msg)
return [TextContent(type="text", text=json.dumps({"error": msg}))]
# --- 1. Validate feeds input ---
if not feeds:
return _err("feeds list is empty — provide at least one {symbol, period_seconds} entry")
parsed_feeds: list[tuple[str, int]] = []
for f in feeds:
sym = f.get("symbol", "")
ps = f.get("period_seconds", 3600)
if not sym:
if not f.get("symbol"):
return _err(f"Feed entry missing 'symbol': {f}")
parsed_feeds.append((sym, int(ps)))
# --- 2. Resolve strategy implementation file ---
try:
from dexorder.tools.python_tools import get_category_manager, sanitize_name
category_manager = get_category_manager()
safe_name = sanitize_name(strategy_name)
impl_path = category_manager.src_dir / "strategy" / safe_name / "implementation.py"
if not impl_path.exists():
return _err(f"Strategy '{strategy_name}' not found (looked at {impl_path})")
except Exception as exc:
return _err(f"Failed to locate strategy: {exc}")
# --- 3. Register custom indicators with pandas-ta ---
try:
from dexorder.nautilus.backtest_runner import _setup_custom_indicators
_setup_custom_indicators(category_manager.src_dir)
except Exception as exc:
log.warning("backtest_strategy: custom indicator setup failed: %s", exc)
# --- 4. Load strategy class ---
try:
from dexorder.nautilus.backtest_runner import _load_strategy_class
strategy_class = _load_strategy_class(impl_path)
except Exception as exc:
log.exception("backtest_strategy: strategy load failed")
return _err(f"Strategy load failed: {exc}")
# --- 5. Fetch OHLC+ data for each feed ---
try:
from dexorder.api import get_api
api = get_api()
except Exception as exc:
return _err(f"API not available: {exc}")
ohlc_dfs: dict[str, Any] = {}
total_candles = 0
for ticker, period_seconds in parsed_feeds:
from dexorder.nautilus.pandas_strategy import make_feed_key
feed_key = make_feed_key(ticker, period_seconds)
try:
df = await api.data.historical_ohlc(
ticker=ticker,
period_seconds=period_seconds,
start_time=from_time,
end_time=to_time,
extra_columns=_OHLC_EXTRA_COLUMNS,
)
except Exception as exc:
log.exception("backtest_strategy: OHLC fetch failed for %s", feed_key)
return _err(f"OHLC fetch failed for {feed_key}: {exc}")
if df.empty:
return _err(f"No OHLC data for {feed_key} in the requested range")
ohlc_dfs[feed_key] = df
total_candles += len(df)
# --- 6. Run backtest in thread executor (BacktestEngine is synchronous) ---
try:
import asyncio
from dexorder.nautilus.backtest_runner import run_backtest
loop = asyncio.get_event_loop()
metrics = await loop.run_in_executor(
None,
lambda: run_backtest(
strategy_class=strategy_class,
feeds=parsed_feeds,
ohlc_dfs=ohlc_dfs,
initial_capital=initial_capital,
paper=paper,
),
)
except Exception as exc:
log.exception("backtest_strategy: backtest run failed")
return _err(f"Backtest failed: {exc}")
# --- 7. Return results ---
payload = {
"strategy_name": strategy_name,
"feeds": [{"symbol": t, "period_seconds": p} for t, p in parsed_feeds],
cfg = {
"strategy_name": strategy_name,
"feeds": feeds,
"from_time": from_time,
"to_time": to_time,
"initial_capital": initial_capital,
"paper": paper,
"total_candles": total_candles,
**metrics, # keys: summary, statistics, trades, equity_curve
"paper": paper,
}
try:
proc = await asyncio.create_subprocess_exec(
sys.executable, str(_BACKTEST_HARNESS),
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(
proc.communicate(json.dumps(cfg).encode()),
timeout=600,
)
except asyncio.TimeoutError:
return _err("Backtest timed out (10 minutes)")
except Exception as exc:
return _err(f"Failed to launch backtest harness: {exc}")
if proc.returncode != 0:
err_text = stderr.decode(errors="replace")
log.error("backtest_strategy '%s': harness exited %d: %s", strategy_name, proc.returncode, err_text[:500])
return _err(f"Backtest harness failed:\n{err_text}")
if stderr:
log.warning("backtest_strategy '%s' stderr: %s", strategy_name, stderr.decode(errors="replace")[:500])
try:
payload = json.loads(stdout.decode())
except json.JSONDecodeError:
return _err(f"Harness produced invalid JSON: {stdout.decode(errors='replace')[:200]}")
if "error" in payload:
return _err(payload["error"])
return [TextContent(type="text", text=json.dumps(payload))]

View File

@@ -18,51 +18,32 @@ After write/edit operations, a category-specific test harness runs to validate
the code and capture errors/output for agent feedback.
"""
import concurrent.futures
import json
import logging
import re
import subprocess
import sys
import traceback
from dataclasses import dataclass, asdict
from enum import Enum
from pathlib import Path
from typing import Any, Optional
from dexorder.tools.subprocess_runner import run_subprocess_argv, run_in_thread
log = logging.getLogger(__name__)
def _run_inprocess(fn, *args, timeout: int) -> dict:
"""
Run fn(*args) in a one-shot thread and return its result dict.
Uses a thread so the calling coroutine is not blocked and the calling
process does not fork a new Python interpreter. All already-loaded
libraries (numpy, pandas, matplotlib, etc.) are shared with the thread.
On timeout returns a dict with _timeout=True. On unexpected exception
returns a dict with error=True and the traceback in stderr.
"""
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(fn, *args)
try:
return future.result(timeout=timeout)
except concurrent.futures.TimeoutError:
return {"_timeout": True, "error": True,
"stdout": "", "stderr": "", "images": []}
except Exception:
return {"error": True, "stdout": "",
"stderr": traceback.format_exc(), "images": []}
# Paths to harness scripts run as subprocesses
_RESEARCH_HARNESS = Path(__file__).parent / "research_harness.py"
_STRATEGY_HARNESS = Path(__file__).parent / "strategy_harness.py"
# Import conda manager for package installation and tracking
try:
from dexorder.conda_manager import install_packages, cleanup_extra_packages
from dexorder.conda_manager import install_packages_async, cleanup_extra_packages_async
except ImportError:
log.warning("conda_manager not available - package installation disabled")
install_packages = None
cleanup_extra_packages = None
install_packages_async = None
cleanup_extra_packages_async = None
# =============================================================================
@@ -355,6 +336,39 @@ class GitManager:
except Exception:
pass
# ------------------------------------------------------------------
# Async variants — delegates to sync methods via asyncio.to_thread
# so the event loop stays responsive during git operations.
# ------------------------------------------------------------------
async def commit_async(self, message: str) -> Optional[str]:
import asyncio
return await asyncio.to_thread(self.commit, message)
async def log_async(self, path: Optional[Path] = None, n: int = 20) -> list[dict]:
import asyncio
return await asyncio.to_thread(self.log, path, n)
async def restore_async(self, revision: str, path: Optional[Path] = None) -> Optional[str]:
import asyncio
return await asyncio.to_thread(self.restore, revision, path)
async def head_short_hash_async(self) -> str:
import asyncio
return await asyncio.to_thread(self.head_short_hash)
async def create_worktree_async(self, worktree_path: Path, revision: str = "HEAD") -> str:
import asyncio
return await asyncio.to_thread(self.create_worktree, worktree_path, revision)
async def remove_worktree_async(self, worktree_path: Path) -> None:
import asyncio
return await asyncio.to_thread(self.remove_worktree, worktree_path)
async def prune_worktrees_async(self) -> None:
import asyncio
return await asyncio.to_thread(self.prune_worktrees)
# =============================================================================
# Custom Indicator Setup
@@ -484,7 +498,7 @@ class CategoryFileManager:
"""Root of the versioned category code (git repo root)."""
return self.data_dir / "src"
def write(
async def write(
self,
category: str,
name: str,
@@ -547,7 +561,7 @@ class CategoryFileManager:
return {"success": False, "error": f"Failed to write metadata: {e}"}
# Run validation harness
validation = self._validate(cat, item_dir)
validation = await self._validate(cat, item_dir)
result = {
"success": validation["success"],
@@ -559,19 +573,19 @@ class CategoryFileManager:
if validation["success"]:
if cat == Category.RESEARCH:
log.info(f"Auto-executing research script: {name}")
result["execution"] = self.execute_research(name)
result["execution"] = await self.execute_research(name)
elif cat == Category.INDICATOR:
log.info(f"Auto-executing indicator test: {name}")
result["execution"] = self._execute_indicator(item_dir)
result["execution"] = await self._execute_indicator(item_dir)
# Commit to git
commit_hash = self.git.commit(f"create({category}): {name}")
commit_hash = await self.git.commit_async(f"create({category}): {name}")
if commit_hash:
result["revision"] = commit_hash
return result
def edit(
async def edit(
self,
category: str,
name: str,
@@ -671,7 +685,7 @@ class CategoryFileManager:
# Run validation harness if code was updated
validation = None
if code is not None:
validation = self._validate(cat, item_dir)
validation = await self._validate(cat, item_dir)
result = {
"success": True,
@@ -685,15 +699,15 @@ class CategoryFileManager:
if code is not None and result["success"]:
if cat == Category.RESEARCH:
log.info(f"Auto-executing research script after edit: {name}")
result["execution"] = self.execute_research(name)
result["execution"] = await self.execute_research(name)
elif cat == Category.INDICATOR:
log.info(f"Auto-executing indicator test after edit: {name}")
result["execution"] = self._execute_indicator(item_dir)
result["execution"] = await self._execute_indicator(item_dir)
# Commit to git if code changed
if code is not None and result["success"]:
action = "patch" if patches is not None else "edit"
commit_hash = self.git.commit(f"{action}({category}): {name}")
commit_hash = await self.git.commit_async(f"{action}({category}): {name}")
if commit_hash:
result["revision"] = commit_hash
@@ -776,7 +790,7 @@ class CategoryFileManager:
return {"items": items}
def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
async def _validate(self, category: Category, item_dir: Path) -> dict[str, Any]:
"""
Run category-specific validation harness.
@@ -793,13 +807,13 @@ class CategoryFileManager:
# Install required packages before validation
packages_installed = []
if install_packages and meta_path.exists():
if install_packages_async and meta_path.exists():
try:
metadata = json.loads(meta_path.read_text())
conda_packages = metadata.get("conda_packages", [])
if conda_packages:
log.info(f"Installing packages for validation: {conda_packages}")
install_result = install_packages(conda_packages, data_dir=self.data_dir)
install_result = await install_packages_async(conda_packages, data_dir=self.data_dir)
if install_result.get("success"):
packages_installed = install_result.get("installed", [])
if packages_installed:
@@ -811,11 +825,11 @@ class CategoryFileManager:
# Run validation
if category == Category.STRATEGY:
result = self._validate_strategy(impl_path)
result = await self._validate_strategy(impl_path)
elif category == Category.INDICATOR:
result = self._validate_indicator(impl_path)
result = await self._validate_indicator(impl_path)
elif category == Category.RESEARCH:
result = self._validate_research(impl_path, item_dir)
result = await self._validate_research(impl_path, item_dir)
else:
result = {"success": False, "error": f"No validator for category {category}"}
@@ -825,19 +839,18 @@ class CategoryFileManager:
return result
def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
async def _validate_strategy(self, impl_path: Path) -> dict[str, Any]:
"""
Validate a strategy by running it against synthetic OHLC data.
Runs strategy_harness.py in-process via a thread. Catches import errors,
Runs strategy_harness.py as a subprocess. Catches import errors,
runtime errors in evaluate(), and wrong class hierarchy — not just syntax.
"""
meta_path = impl_path.parent / "metadata.json"
return self._execute_strategy(impl_path.parent, timeout=45)
return await self._execute_strategy(impl_path.parent, timeout=45)
def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
async def _execute_strategy(self, item_dir: Path, timeout: int = 45) -> dict[str, Any]:
"""
Run a strategy against synthetic OHLC data in-process via a thread.
Run a strategy against synthetic OHLC data via strategy_harness.py subprocess.
Returns:
dict with success, output (human-readable summary), trade_count, error
@@ -850,24 +863,26 @@ class CategoryFileManager:
if not meta_path.exists():
return {"success": False, "error": "metadata.json not found"}
from dexorder.tools.strategy_harness import run as _strategy_run
result = _run_inprocess(_strategy_run, impl_path, meta_path, timeout=timeout)
if result.get("_timeout"):
data = await run_subprocess_argv(
sys.executable, str(_STRATEGY_HARNESS), str(impl_path), str(meta_path),
timeout=timeout,
)
if data.get("_timeout"):
return {"success": False, "error": f"Strategy test timed out after {timeout}s"}
return result
if data.get("error") and not data.get("success"):
return {"success": False, "error": data.get("stderr") or "Harness failed"}
return data
def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
async def _validate_indicator(self, impl_path: Path) -> dict[str, Any]:
"""
Validate an indicator by running it against synthetic OHLC data.
Runs indicator_harness.py in-process via a thread. Catches import errors,
runtime errors, and wrong return types — not just syntax.
Runs indicator_harness.py in-process via a thread (main proc). Catches
import errors, runtime errors, and wrong return types — not just syntax.
"""
meta_path = impl_path.parent / "metadata.json"
return self._execute_indicator(impl_path.parent, timeout=30)
return await self._execute_indicator(impl_path.parent, timeout=30)
def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
async def _execute_indicator(self, item_dir: Path, timeout: int = 30) -> dict[str, Any]:
"""
Run an indicator against synthetic OHLC data in-process via a thread.
@@ -883,29 +898,32 @@ class CategoryFileManager:
return {"success": False, "error": "metadata.json not found"}
from dexorder.tools.indicator_harness import run as _indicator_run
result = _run_inprocess(_indicator_run, impl_path, meta_path, timeout=timeout)
result = await run_in_thread(_indicator_run, impl_path, meta_path, timeout=timeout)
if result.get("_timeout"):
return {"success": False, "error": f"Indicator test timed out after {timeout}s"}
return result
def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
async def _run_research_harness(self, impl_path: Path, item_dir: Path, timeout: int = 300) -> dict[str, Any]:
"""
Run a research script in-process via a thread and return captured results.
Run a research script via research_harness.py subprocess and return captured results.
Returns:
dict with stdout, stderr, images, error fields — or an error dict.
dict with stdout, stderr, images, error fields.
"""
from dexorder.tools.research_harness import run as _research_run
return _run_inprocess(_research_run, impl_path, item_dir, timeout=timeout)
return await run_subprocess_argv(
sys.executable, str(_RESEARCH_HARNESS), str(impl_path),
timeout=timeout,
cwd=item_dir,
)
def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
async def _validate_research(self, impl_path: Path, item_dir: Path) -> dict[str, Any]:
"""
Validate a research script.
Runs the script via the harness and captures output + pyplot images.
"""
data = self._run_research_harness(impl_path, item_dir, timeout=300)
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
if data.get("_timeout"):
return {"success": False, "error": "Research script timeout"}
@@ -923,7 +941,7 @@ class CategoryFileManager:
"images": data["images"],
}
def execute_research(self, name: str) -> dict[str, Any]:
async def execute_research(self, name: str) -> dict[str, Any]:
"""
Execute a research script and return structured content with images.
@@ -944,7 +962,7 @@ class CategoryFileManager:
if not impl_path.exists():
return {"error": f"Implementation file not found for '{name}'"}
data = self._run_research_harness(impl_path, item_dir, timeout=300)
data = await self._run_research_harness(impl_path, item_dir, timeout=300)
if data.get("_timeout"):
log.error(f"execute_research '{name}': timeout")
@@ -995,7 +1013,7 @@ class CategoryFileManager:
return {"content": content}
def delete(self, category: str, name: str) -> dict[str, Any]:
async def delete(self, category: str, name: str) -> dict[str, Any]:
"""
Delete a category script directory and commit the removal to git.
@@ -1031,13 +1049,13 @@ class CategoryFileManager:
except Exception as e:
return {"success": False, "error": f"Failed to delete: {e}"}
commit_hash = self.git.commit(f"delete({category}): {name}")
commit_hash = await self.git.commit_async(f"delete({category}): {name}")
result: dict[str, Any] = {"success": True, "category": category, "name": name}
if commit_hash:
result["revision"] = commit_hash
return result
def git_log(
async def git_log(
self,
category: Optional[str] = None,
name: Optional[str] = None,
@@ -1061,10 +1079,10 @@ class CategoryFileManager:
path = get_category_path(self.src_dir, cat, name)
else:
path = self.src_dir / cat.value
entries = self.git.log(path=path, n=limit)
entries = await self.git.log_async(path=path, n=limit)
return {"success": True, "commits": entries}
def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
async def git_revert(self, revision: str, category: str, name: str) -> dict[str, Any]:
"""
Restore a category item to a previous git revision (creates a new commit).
@@ -1085,11 +1103,11 @@ class CategoryFileManager:
return {"success": False, "error": f"Item '{name}' not found in '{category}'"}
try:
commit_hash = self.git.restore(revision, path=item_dir)
commit_hash = await self.git.restore_async(revision, path=item_dir)
except RuntimeError as e:
return {"success": False, "error": str(e)}
validation = self._validate(cat, item_dir)
validation = await self._validate(cat, item_dir)
return {
"success": validation["success"],
"revision": commit_hash,

View File

@@ -119,11 +119,39 @@ def run(impl_path: Path, item_dir: Path) -> dict:
stdout_buf = io.StringIO()
stderr_buf = io.StringIO()
# Eagerly capture figures when user scripts call plt.close() so images are
# not lost even if the script closes figures immediately after savefig().
captured_images: list[dict] = []
def _capture_fig(fig) -> None:
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
captured_images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
buf.close()
_orig_plt_close = plt.close
def _patched_close(fig=None):
if fig is None:
for fn in plt.get_fignums():
_capture_fig(plt.figure(fn))
elif fig == 'all':
for fn in plt.get_fignums():
_capture_fig(plt.figure(fn))
else:
try:
_capture_fig(fig if hasattr(fig, 'savefig') else plt.figure(fig))
except Exception:
pass
_orig_plt_close(fig)
error_occurred = False
old_stdout, old_stderr = sys.stdout, sys.stderr
old_cwd = os.getcwd()
sys.stdout = stdout_buf
sys.stderr = stderr_buf
plt.close = _patched_close
try:
os.chdir(impl_path.parent)
@@ -136,22 +164,26 @@ def run(impl_path: Path, item_dir: Path) -> dict:
sys.stdout = old_stdout
sys.stderr = old_stderr
os.chdir(old_cwd)
plt.close = _orig_plt_close
stdout_output = stdout_buf.getvalue()
stderr_output = stderr_buf.getvalue()
# ---------------------------------------------------------------------------
# Capture matplotlib figures
# Capture any figures still open after script completion
# ---------------------------------------------------------------------------
images = []
images = captured_images
if not error_occurred:
already_seen = {img["data"] for img in images}
for fig_num in plt.get_fignums():
fig = plt.figure(fig_num)
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
images.append({"format": "png", "data": base64.b64encode(buf.read()).decode('utf-8')})
data = base64.b64encode(buf.read()).decode('utf-8')
buf.close()
if data not in already_seen:
images.append({"format": "png", "data": data})
plt.close('all')
return {

View File

@@ -0,0 +1,182 @@
"""
subprocess_runner — non-blocking subprocess primitives for the MCP sandbox.
All three entrypoints return the same dict shape as the legacy _run_inprocess():
{
"error": bool,
"stdout": str,
"stderr": str,
"images": list, # always [] for non-research invocations
"_timeout": bool # present and True only on timeout
}
Callers can therefore pattern-match on {"_timeout", "error", "stdout", "stderr"}
uniformly regardless of whether the work ran in a subprocess or a thread.
"""
import asyncio
import json
import traceback
from pathlib import Path
from typing import Any, Callable
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _normalise(data: dict, stderr_fallback: str = "") -> dict:
"""Ensure the standard shape keys are present in a harness result dict."""
data.setdefault("error", False)
data.setdefault("stdout", "")
data.setdefault("stderr", stderr_fallback)
data.setdefault("images", [])
return data
def _err_dict(stderr: str = "", stdout: str = "") -> dict:
return {"error": True, "stdout": stdout, "stderr": stderr, "images": []}
def _timeout_dict() -> dict:
return {"_timeout": True, "error": True, "stdout": "", "stderr": "", "images": []}
# ---------------------------------------------------------------------------
# Primitive 1: run_subprocess_argv
#
# Non-blocking equivalent of:
# subprocess.run([sys.executable, harness, arg1, arg2, ...],
# capture_output=True, text=True, timeout=N, cwd=cwd)
#
# Used by: _execute_strategy, _run_research_harness
# ---------------------------------------------------------------------------
async def run_subprocess_argv(
*cmd: str,
timeout: int,
cwd: Path | None = None,
) -> dict:
"""
Spawn cmd as a subprocess, await completion, and return a normalised result dict.
stdout is expected to contain a JSON object written by the harness. It is
decoded and normalised to the standard shape. On JSON decode failure the
raw stdout text is preserved in "stdout" and error is set to True.
"""
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(cwd) if cwd else None,
)
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(), timeout=timeout
)
except asyncio.TimeoutError:
return _timeout_dict()
except Exception as exc:
return _err_dict(stderr=f"Harness launch failed: {exc}")
stdout_text = stdout_bytes.decode(errors="replace")
stderr_text = stderr_bytes.decode(errors="replace")
if proc.returncode != 0:
return _err_dict(
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
stdout=stdout_text,
)
try:
data = json.loads(stdout_text)
return _normalise(data, stderr_fallback=stderr_text)
except json.JSONDecodeError:
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
# ---------------------------------------------------------------------------
# Primitive 2: run_subprocess_stdin
#
# Non-blocking equivalent of the backtest pattern — JSON config fed via stdin.
# ---------------------------------------------------------------------------
async def run_subprocess_stdin(
*cmd: str,
stdin_data: bytes,
timeout: int,
) -> dict:
"""
Spawn cmd, write stdin_data to its stdin, await completion.
Returns the same normalised dict shape as run_subprocess_argv.
"""
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(stdin_data), timeout=timeout
)
except asyncio.TimeoutError:
return _timeout_dict()
except Exception as exc:
return _err_dict(stderr=f"Harness launch failed: {exc}")
stdout_text = stdout_bytes.decode(errors="replace")
stderr_text = stderr_bytes.decode(errors="replace")
if proc.returncode != 0:
return _err_dict(
stderr=f"Harness exited {proc.returncode}:\n{stderr_text}",
stdout=stdout_text,
)
try:
data = json.loads(stdout_text)
return _normalise(data, stderr_fallback=stderr_text)
except json.JSONDecodeError:
return {"error": True, "stdout": stdout_text, "stderr": stderr_text, "images": []}
# ---------------------------------------------------------------------------
# Primitive 3: run_in_thread
#
# Async wrapper around asyncio.to_thread so the event loop stays responsive
# while CPU-bound or blocking-IO callables run in a worker thread.
#
# Used by: _execute_indicator (in-process indicator harness)
# ---------------------------------------------------------------------------
async def run_in_thread(
fn: Callable,
*args: Any,
timeout: int,
) -> dict:
"""
Run fn(*args) in a thread pool worker and yield to the event loop while waiting.
On timeout the thread is abandoned (daemon) and _timeout_dict() is returned.
On MemoryError or unexpected exception a standard error dict is returned.
The returned dict is normalised to the standard shape.
"""
from dexorder.memory_guard import cleanup_memory
try:
result = await asyncio.wait_for(
asyncio.to_thread(fn, *args),
timeout=timeout,
)
return _normalise(result)
except asyncio.TimeoutError:
return _timeout_dict()
except MemoryError:
cleanup_memory()
return _err_dict(
stderr="Script exceeded memory limit. Try reducing the data range or batch size."
)
except Exception:
return _err_dict(stderr=traceback.format_exc())