sandbox connected and streaming

This commit is contained in:
2026-03-30 23:29:03 -04:00
parent c3a8fae132
commit 998f69fa1a
130 changed files with 7416 additions and 2123 deletions

707
sandbox/main.py Normal file
View File

@@ -0,0 +1,707 @@
#!/usr/bin/env python3
"""
DexOrder User Container Main Entry Point
Brings together:
- Config and secrets loading from k8s mounted YAML files
- ZeroMQ event publisher for user events
- MCP server with minimal "hello world" resource
- Lifecycle management integration
"""
import asyncio
import logging
import os
import signal
import sys
from pathlib import Path
from typing import Optional
import uvicorn
import yaml
from mcp.server import Server
from mcp.server.sse import SseServerTransport
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent, ImageContent
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.responses import Response
from starlette.routing import Route, Mount
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
from dexorder.api import set_api, API
from dexorder.conda_manager import sync_packages, install_packages
from dexorder.events import EventType, UserEvent, DeliverySpec
from dexorder.impl.charting_api_impl import ChartingAPIImpl
from dexorder.impl.data_api_impl import DataAPIImpl
from dexorder.tools.category_tools import get_category_manager
from dexorder.tools.workspace_tools import get_workspace_store
# =============================================================================
# Global Data Directory
# =============================================================================
# Default data directory (relative to working directory for local dev)
DEFAULT_DATA_DIR = Path("data")
# Global data directory - set after config is loaded
DATA_DIR: Path = DEFAULT_DATA_DIR
def get_data_dir() -> Path:
"""Get the global data directory."""
return DATA_DIR
# =============================================================================
# Configuration
# =============================================================================
class Config:
"""Application configuration loaded from config.yaml and secrets.yaml"""
def __init__(self):
# User ID (required)
self.user_id: str = os.getenv("USER_ID", "")
if not self.user_id:
raise ValueError("USER_ID environment variable required")
# Config and secrets paths (k8s mounted)
self.config_path = Path(os.getenv("CONFIG_PATH", "/app/config/config.yaml"))
self.secrets_path = Path(os.getenv("SECRETS_PATH", "/app/config/secrets.yaml"))
# ZMQ ports for event system
self.zmq_xpub_port: int = int(os.getenv("ZMQ_XPUB_PORT", "5570"))
self.zmq_gateway_endpoint: str = os.getenv(
"ZMQ_GATEWAY_ENDPOINT",
"tcp://gateway:5571"
)
# MCP server settings
self.mcp_server_name: str = os.getenv("MCP_SERVER_NAME", "dexorder-user")
self.mcp_transport: str = os.getenv("MCP_TRANSPORT", "sse") # "stdio" or "sse"
self.mcp_http_port: int = int(os.getenv("MCP_HTTP_PORT", "3000"))
self.mcp_http_host: str = os.getenv("MCP_HTTP_HOST", "0.0.0.0")
# Lifecycle settings
self.idle_timeout_minutes: int = int(os.getenv("IDLE_TIMEOUT_MINUTES", "15"))
self.enable_idle_shutdown: bool = os.getenv("ENABLE_IDLE_SHUTDOWN", "true").lower() == "true"
# Loaded from files
self.config_data: dict = {}
self.secrets_data: dict = {}
# Data directory (set after config load)
self.data_dir: Path = DEFAULT_DATA_DIR
def load(self) -> None:
"""Load configuration and secrets from YAML files"""
global DATA_DIR
# Load config.yaml if exists
if self.config_path.exists():
with open(self.config_path) as f:
self.config_data = yaml.safe_load(f) or {}
logging.info(f"Loaded config from {self.config_path}")
else:
logging.warning(f"Config file not found: {self.config_path}")
# Load secrets.yaml if exists
if self.secrets_path.exists():
with open(self.secrets_path) as f:
self.secrets_data = yaml.safe_load(f) or {}
logging.info(f"Loaded secrets from {self.secrets_path}")
else:
logging.warning(f"Secrets file not found: {self.secrets_path}")
# Set data directory from config or environment
# Priority: env var > config file > default
data_dir_str = os.getenv("DATA_DIR") or self.config_data.get("data_dir")
if data_dir_str:
self.data_dir = Path(data_dir_str)
else:
self.data_dir = DEFAULT_DATA_DIR
# Update global DATA_DIR
DATA_DIR = self.data_dir
# Ensure data directory exists
self.data_dir.mkdir(parents=True, exist_ok=True)
logging.info(f"Data directory: {self.data_dir}")
@property
def workspace_dir(self) -> Path:
"""Workspace directory under DATA_DIR."""
return self.data_dir / "workspace"
# =============================================================================
# MCP Server Setup
# =============================================================================
def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server:
"""Create MCP server with resources and workspace tools"""
server = Server(config.mcp_server_name)
# Initialize workspace store
workspace_store = get_workspace_store(config.workspace_dir)
logging.info(f"Workspace store initialized at {config.workspace_dir}")
# Initialize category file manager
category_manager = get_category_manager(config.data_dir)
logging.info(f"Category manager initialized at {config.data_dir}")
@server.list_resources()
async def list_resources():
"""List available resources"""
return [
{
"uri": f"dexorder://user/{config.user_id}/hello",
"name": "Hello World",
"description": "A simple hello world resource",
"mimeType": "text/plain",
}
]
@server.read_resource()
async def read_resource(uri: str):
"""Read a resource by URI"""
if uri == f"dexorder://user/{config.user_id}/hello":
# Publish an event when resource is accessed
await event_publisher.publish(UserEvent(
event_type=EventType.STRATEGY_LOG,
payload={
"message": "Hello world resource accessed",
"uri": uri,
},
delivery=DeliverySpec.informational(),
))
return {
"uri": uri,
"mimeType": "text/plain",
"text": f"Hello from DexOrder user container!\nUser ID: {config.user_id}\n",
}
else:
raise ValueError(f"Unknown resource: {uri}")
@server.list_tools()
async def list_tools():
"""List available tools including workspace and category tools"""
return [
Tool(
name="workspace_read",
description="Read a workspace store from persistent storage",
inputSchema={
"type": "object",
"properties": {
"store_name": {
"type": "string",
"description": "Name of the store (e.g., 'chartStore', 'userPreferences')"
}
},
"required": ["store_name"]
}
),
Tool(
name="workspace_write",
description="Write a workspace store to persistent storage",
inputSchema={
"type": "object",
"properties": {
"store_name": {
"type": "string",
"description": "Name of the store"
},
"data": {
"description": "Data to write"
}
},
"required": ["store_name", "data"]
}
),
Tool(
name="workspace_patch",
description="Apply JSON patch operations to a workspace store",
inputSchema={
"type": "object",
"properties": {
"store_name": {
"type": "string",
"description": "Name of the store"
},
"patch": {
"type": "array",
"description": "JSON Patch operations (RFC 6902)",
"items": {
"type": "object",
"properties": {
"op": {"type": "string", "enum": ["add", "remove", "replace", "move", "copy", "test"]},
"path": {"type": "string"},
"value": {}
},
"required": ["op", "path"]
}
}
},
"required": ["store_name", "patch"]
}
),
Tool(
name="category_write",
description="Write a new strategy, indicator, or research script with validation",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category of the script"
},
"name": {
"type": "string",
"description": "Display name (can contain special characters)"
},
"description": {
"type": "string",
"description": "LLM-generated description of what this does (required)"
},
"code": {
"type": "string",
"description": "Python implementation code"
},
"metadata": {
"type": "object",
"description": "Optional category-specific metadata (e.g., default_length for indicators, data_feeds for strategies)"
}
},
"required": ["category", "name", "description", "code"]
}
),
Tool(
name="category_edit",
description="Edit an existing category script (updates code, description, or metadata)",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category of the script"
},
"name": {
"type": "string",
"description": "Display name of the existing item"
},
"code": {
"type": "string",
"description": "Updated Python code (optional, omit to keep existing)"
},
"description": {
"type": "string",
"description": "Updated description (optional, omit to keep existing)"
},
"metadata": {
"type": "object",
"description": "Updated metadata fields (optional)"
}
},
"required": ["category", "name"]
}
),
Tool(
name="category_read",
description="Read a category script and its metadata",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category of the script"
},
"name": {
"type": "string",
"description": "Display name of the item"
}
},
"required": ["category", "name"]
}
),
Tool(
name="category_list",
description="List all items in a category with names and descriptions",
inputSchema={
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["strategy", "indicator", "research"],
"description": "Category to list"
}
},
"required": ["category"]
}
),
Tool(
name="conda_sync",
description="Sync conda packages: scan all metadata, remove unused packages (excluding base environment)",
inputSchema={
"type": "object",
"properties": {},
"required": []
}
),
Tool(
name="conda_install",
description="Install conda packages on-demand",
inputSchema={
"type": "object",
"properties": {
"packages": {
"type": "array",
"items": {"type": "string"},
"description": "List of conda package names to install"
}
},
"required": ["packages"]
}
),
Tool(
name="execute_research",
description="Execute a research script and return results with matplotlib images",
inputSchema={
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Display name of the research script to execute"
}
},
"required": ["name"]
}
)
]
@server.call_tool()
async def handle_tool_call(name: str, arguments: dict):
"""Handle tool calls including workspace and category tools"""
if name == "workspace_read":
return workspace_store.read(arguments.get("store_name", ""))
elif name == "workspace_write":
return workspace_store.write(
arguments.get("store_name", ""),
arguments.get("data")
)
elif name == "workspace_patch":
return workspace_store.patch(
arguments.get("store_name", ""),
arguments.get("patch", [])
)
elif name == "category_write":
result = category_manager.write(
category=arguments.get("category", ""),
name=arguments.get("name", ""),
description=arguments.get("description", ""),
code=arguments.get("code", ""),
metadata=arguments.get("metadata")
)
content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"]
if result.get("validation") and not result["validation"].get("success"):
meta_parts.append(f"validation errors: {result['validation'].get('errors', [])}")
content.append(TextContent(type="text", text="\n".join(meta_parts)))
if result.get("execution"):
exec_content = result["execution"].get("content", [])
content.extend(exec_content)
image_count = sum(1 for item in exec_content if item.type == "image")
logging.info(f"category_write '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else:
logging.info(f"category_write '{arguments.get('name')}': no execution result (category={arguments.get('category')})")
return content
elif name == "category_edit":
result = category_manager.edit(
category=arguments.get("category", ""),
name=arguments.get("name", ""),
code=arguments.get("code"),
description=arguments.get("description"),
metadata=arguments.get("metadata")
)
content = []
meta_parts = [f"success: {result['success']}", f"path: {result['path']}"]
if result.get("validation") and not result["validation"].get("success"):
meta_parts.append(f"validation errors: {result['validation'].get('errors', [])}")
content.append(TextContent(type="text", text="\n".join(meta_parts)))
if result.get("execution"):
exec_content = result["execution"].get("content", [])
content.extend(exec_content)
image_count = sum(1 for item in exec_content if item.type == "image")
logging.info(f"category_edit '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
else:
logging.info(f"category_edit '{arguments.get('name')}': no execution result")
return content
elif name == "category_read":
return category_manager.read(
category=arguments.get("category", ""),
name=arguments.get("name", "")
)
elif name == "category_list":
return category_manager.list_items(
category=arguments.get("category", "")
)
elif name == "conda_sync":
# Get environment.yml path relative to main.py
env_yml = Path(__file__).parent / "environment.yml"
return sync_packages(
data_dir=get_data_dir(),
environment_yml=env_yml if env_yml.exists() else None
)
elif name == "conda_install":
return install_packages(arguments.get("packages", []))
elif name == "execute_research":
result = category_manager.execute_research(name=arguments.get("name", ""))
if "error" in result:
logging.error(f"execute_research '{arguments.get('name')}': {result['error']}")
return [TextContent(type="text", text=f"Error: {result['error']}")]
content = result.get("content", [TextContent(type="text", text="No output")])
image_count = sum(1 for item in content if item.type == "image")
logging.info(f"execute_research '{arguments.get('name')}': returning {len(content)} items, {image_count} images")
return content
else:
raise ValueError(f"Unknown tool: {name}")
logging.info(f"MCP server '{config.mcp_server_name}' created with workspace and category tools")
return server
# =============================================================================
# SSE Transport Setup
# =============================================================================
def create_sse_app(mcp_server: Server) -> Starlette:
"""Create Starlette app with SSE endpoint for MCP"""
# Create SSE transport instance
sse = SseServerTransport("/messages/")
async def handle_sse(request: Request) -> Response:
"""Handle SSE connections for MCP"""
async with sse.connect_sse(
request.scope, request.receive, request._send
) as streams:
await mcp_server.run(
streams[0],
streams[1],
mcp_server.create_initialization_options()
)
return Response()
async def handle_health(request: Request) -> Response:
"""Health check endpoint for k8s probes and gateway readiness checks"""
return Response(
content='{"status":"ok"}',
media_type="application/json"
)
app = Starlette(
routes=[
Route("/sse", handle_sse),
Mount("/messages/", app=sse.handle_post_message),
Route("/health", handle_health),
]
)
return app
# =============================================================================
# Main Application
# =============================================================================
class UserContainer:
"""Main user container application"""
def __init__(self):
self.config = Config()
self.event_publisher: Optional[EventPublisher] = None
self.mcp_server: Optional[Server] = None
self.data_api: Optional[DataAPIImpl] = None
self.running = False
async def start(self) -> None:
"""Start all subsystems"""
logging.info("Starting user container")
# Load configuration
self.config.load()
# Initialize data and charting API
data_cfg = self.config.config_data.get("data", {})
iceberg_cfg = data_cfg.get("iceberg", {})
relay_cfg = data_cfg.get("relay", {})
secrets = self.config.secrets_data
s3_cfg = iceberg_cfg # S3 settings co-located with iceberg config
self.data_api = DataAPIImpl(
iceberg_catalog_uri=iceberg_cfg.get("catalog_uri", "http://iceberg-catalog:8181"),
relay_endpoint=relay_cfg.get("endpoint", "tcp://relay:5559"),
notification_endpoint=relay_cfg.get("notification_endpoint", "tcp://relay:5558"),
namespace=iceberg_cfg.get("namespace", "trading"),
s3_endpoint=s3_cfg.get("s3_endpoint") or secrets.get("s3_endpoint"),
s3_access_key=s3_cfg.get("s3_access_key") or secrets.get("s3_access_key"),
s3_secret_key=s3_cfg.get("s3_secret_key") or secrets.get("s3_secret_key"),
)
await self.data_api.start()
set_api(API(charting=ChartingAPIImpl(), data=self.data_api))
logging.info("API initialized")
# Start lifecycle manager
await start_lifecycle_manager(
user_id=self.config.user_id,
idle_timeout_minutes=self.config.idle_timeout_minutes,
enable_idle_shutdown=self.config.enable_idle_shutdown,
)
logging.info("Lifecycle manager started")
# Start event publisher
self.event_publisher = EventPublisher(
user_id=self.config.user_id,
xpub_port=self.config.zmq_xpub_port,
gateway_router_endpoint=self.config.zmq_gateway_endpoint,
)
await self.event_publisher.start()
logging.info("Event publisher started")
# Publish CONTAINER_STARTING event
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_STARTING,
payload={
"user_id": self.config.user_id,
"timestamp": None, # Will be auto-filled
},
delivery=DeliverySpec.active_or_telegram(),
))
# Create MCP server
self.mcp_server = create_mcp_server(self.config, self.event_publisher)
# Publish CONTAINER_READY event
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_READY,
payload={
"user_id": self.config.user_id,
},
delivery=DeliverySpec.active_or_telegram(),
))
self.running = True
logging.info("User container ready")
async def stop(self) -> None:
"""Stop all subsystems"""
if not self.running:
return
logging.info("Stopping user container")
self.running = False
# Publish CONTAINER_SHUTTING_DOWN event
if self.event_publisher:
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_SHUTTING_DOWN,
payload={
"user_id": self.config.user_id,
},
delivery=DeliverySpec.active_or_telegram(),
))
# Stop subsystems
if self.data_api:
await self.data_api.stop()
logging.info("Data API stopped")
if self.event_publisher:
await self.event_publisher.stop()
logging.info("Event publisher stopped")
lifecycle = get_lifecycle_manager()
if lifecycle:
await lifecycle.stop()
logging.info("Lifecycle manager stopped")
logging.info("User container stopped")
async def run(self) -> None:
"""Run the MCP server with configured transport"""
await self.start()
try:
if self.config.mcp_transport == "stdio":
# Run MCP server on stdio (for dev/testing)
logging.info("Starting MCP server with stdio transport")
async with stdio_server() as (read_stream, write_stream):
await self.mcp_server.run(
read_stream,
write_stream,
self.mcp_server.create_initialization_options()
)
elif self.config.mcp_transport == "sse":
# Run MCP server via HTTP/SSE (for production)
logging.info(f"Starting MCP server with SSE transport on {self.config.mcp_http_host}:{self.config.mcp_http_port}")
app = create_sse_app(self.mcp_server)
config = uvicorn.Config(
app,
host=self.config.mcp_http_host,
port=self.config.mcp_http_port,
log_level=os.getenv("LOG_LEVEL", "info").lower(),
access_log=True,
)
server = uvicorn.Server(config)
await server.serve()
else:
raise ValueError(f"Unknown MCP transport: {self.config.mcp_transport}")
finally:
await self.stop()
# =============================================================================
# Entry Point
# =============================================================================
async def main():
"""Main entry point"""
# Setup logging
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=getattr(logging, log_level),
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
stream=sys.stderr, # MCP uses stdout for protocol
)
# Create and run container
container = UserContainer()
# Handle shutdown signals
loop = asyncio.get_event_loop()
def handle_signal(sig):
logging.info(f"Received signal {sig}, shutting down...")
asyncio.create_task(container.stop())
loop.stop()
for sig in (signal.SIGTERM, signal.SIGINT):
loop.add_signal_handler(sig, lambda s=sig: handle_signal(s))
try:
await container.run()
except KeyboardInterrupt:
logging.info("Keyboard interrupt received")
except Exception as e:
logging.error(f"Fatal error: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())