redesign fully scaffolded and web login works

This commit is contained in:
2026-03-17 20:10:47 -04:00
parent b9cc397e05
commit f6bd22a8ef
143 changed files with 17317 additions and 693 deletions

57
client-py/.dockerignore Normal file
View File

@@ -0,0 +1,57 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
venv/
env/
ENV/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# Testing
.pytest_cache/
.coverage
htmlcov/
# Config and secrets (should come from k8s mounts)
config.yaml
secrets.yaml
*.local.yaml
# Data directories
data/
# Git
.git/
.gitignore
# Documentation
*.md
docs/
# Example files
*.example.*

67
client-py/Dockerfile Normal file
View File

@@ -0,0 +1,67 @@
# Multi-stage build for DexOrder user container
FROM python:3.11-slim as builder
WORKDIR /build
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
# Copy dependency specifications
COPY setup.py .
COPY dexorder/ dexorder/
# Install dependencies to a target directory
RUN pip install --no-cache-dir --target=/build/deps .
# =============================================================================
# Runtime stage
# =============================================================================
FROM python:3.11-slim
WORKDIR /app
# Install runtime dependencies only
RUN apt-get update && apt-get install -y --no-install-recommends \
libzmq5 \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN groupadd -r dexorder && useradd -r -g dexorder -u 1000 dexorder
# Copy installed Python packages from builder
COPY --from=builder /build/deps /usr/local/lib/python3.11/site-packages/
# Copy application code
COPY dexorder/ /app/dexorder/
COPY main.py /app/
# Create directories for config, secrets, and data
RUN mkdir -p /app/config /app/secrets /app/data && \
chown -R dexorder:dexorder /app
# Create writable tmp directory (read-only rootfs requirement)
RUN mkdir -p /tmp && chmod 1777 /tmp
# Switch to non-root user
USER dexorder
# Environment variables (can be overridden in k8s)
ENV PYTHONUNBUFFERED=1 \
LOG_LEVEL=INFO \
CONFIG_PATH=/app/config/config.yaml \
SECRETS_PATH=/app/config/secrets.yaml \
ZMQ_XPUB_PORT=5570 \
ZMQ_GATEWAY_ENDPOINT=tcp://gateway:5571 \
MCP_SERVER_NAME=dexorder-user \
IDLE_TIMEOUT_MINUTES=15 \
ENABLE_IDLE_SHUTDOWN=true
# Health check endpoint (simple check if process is running)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD python -c "import sys; sys.exit(0)"
# Run the main application
ENTRYPOINT ["python", "/app/main.py"]

View File

@@ -0,0 +1,30 @@
# Example configuration file for DexOrder user container
# Mount this at /app/config/config.yaml in k8s
# User-specific settings
user:
timezone: "UTC"
# Data sources
data:
iceberg:
catalog_name: "dexorder"
# Catalog properties loaded from secrets
relay:
endpoint: "tcp://relay:5560"
timeout_ms: 5000
# Strategy settings
strategies:
max_concurrent: 5
default_timeout_minutes: 60
# Alert settings
alerts:
max_active: 100
# Logging
logging:
level: "INFO"
include_timestamps: true

View File

@@ -5,6 +5,8 @@ Provides high-level APIs for:
- Historical OHLC data retrieval with smart caching
- Async request/response via relay
- Iceberg data warehouse queries
- User container event publishing
- Container lifecycle management
"""
__version__ = "0.1.0"
@@ -12,5 +14,36 @@ __version__ = "0.1.0"
from .ohlc_client import OHLCClient
from .iceberg_client import IcebergClient
from .history_client import HistoryClient
from .lifecycle_manager import (
LifecycleManager,
get_lifecycle_manager,
start_lifecycle_manager,
)
__all__ = ['OHLCClient', 'IcebergClient', 'HistoryClient']
# Event system
from .events import (
EventPublisher,
EventType,
Priority,
ChannelType,
DeliverySpec,
UserEvent,
)
__all__ = [
# Data clients
'OHLCClient',
'IcebergClient',
'HistoryClient',
# Lifecycle management
'LifecycleManager',
'get_lifecycle_manager',
'start_lifecycle_manager',
# Event system
'EventPublisher',
'EventType',
'Priority',
'ChannelType',
'DeliverySpec',
'UserEvent',
]

View File

@@ -0,0 +1,57 @@
"""
User Container Event System
Publishes events to the gateway via dual ZMQ patterns:
- XPUB for informational events (fire-and-forget to active sessions)
- DEALER for critical events (guaranteed delivery with ack)
See doc/protocol.md and doc/user_container_events.md for details.
"""
from .types import (
# Enums
EventType,
Priority,
ChannelType,
AckStatus,
# Message types
ChannelPreference,
DeliverySpec,
UserEvent,
EventAck,
# Serialization
MSG_TYPE_USER_EVENT,
MSG_TYPE_EVENT_ACK,
serialize_user_event,
deserialize_user_event,
serialize_event_ack,
deserialize_event_ack,
)
from .publisher import EventPublisher
from .pending_store import PendingStore
__all__ = [
# Enums
"EventType",
"Priority",
"ChannelType",
"AckStatus",
# Message types
"ChannelPreference",
"DeliverySpec",
"UserEvent",
"EventAck",
# Serialization
"MSG_TYPE_USER_EVENT",
"MSG_TYPE_EVENT_ACK",
"serialize_user_event",
"deserialize_user_event",
"serialize_event_ack",
"deserialize_event_ack",
# Publisher
"EventPublisher",
# Storage
"PendingStore",
]

View File

@@ -0,0 +1,120 @@
"""
Pending event store for crash recovery.
Persists critical events that haven't been acknowledged to disk,
so they can be replayed after a container restart.
"""
import json
import logging
from pathlib import Path
from typing import List, Optional
import asyncio
import aiofiles
from .types import UserEvent
logger = logging.getLogger(__name__)
class PendingStore:
"""
Persists pending critical events to disk for crash recovery.
Events are written to a JSON file when the publisher shuts down,
and loaded back when it starts up.
"""
def __init__(self, path: Optional[str] = None):
"""
Initialize pending store.
Args:
path: Path to the pending events file.
If None, persistence is disabled.
"""
self.path = Path(path) if path else None
self._lock = asyncio.Lock()
async def save_pending(self, events: List[UserEvent]) -> None:
"""
Save pending events to disk.
Args:
events: List of events to persist
"""
if not self.path:
return
if not events:
# No events to save, remove file if exists
await self._remove_file()
return
async with self._lock:
try:
# Ensure parent directory exists
self.path.parent.mkdir(parents=True, exist_ok=True)
# Serialize events
data = [event.to_dict() for event in events]
json_str = json.dumps(data, indent=2)
# Write atomically (write to temp, then rename)
temp_path = self.path.with_suffix(".tmp")
async with aiofiles.open(temp_path, "w") as f:
await f.write(json_str)
# Atomic rename
temp_path.rename(self.path)
logger.info("Saved %d pending events to %s", len(events), self.path)
except Exception as e:
logger.error("Failed to save pending events: %s", e, exc_info=True)
async def load_pending(self) -> List[UserEvent]:
"""
Load pending events from disk.
Returns:
List of pending events, or empty list if none/error
"""
if not self.path or not self.path.exists():
return []
async with self._lock:
try:
async with aiofiles.open(self.path, "r") as f:
content = await f.read()
data = json.loads(content)
events = [UserEvent.from_dict(d) for d in data]
# Remove file after successful load
await self._remove_file()
logger.info("Loaded %d pending events from %s", len(events), self.path)
return events
except json.JSONDecodeError as e:
logger.error("Failed to parse pending events file: %s", e)
await self._remove_file()
return []
except Exception as e:
logger.error("Failed to load pending events: %s", e, exc_info=True)
return []
async def _remove_file(self) -> None:
"""Remove the pending events file."""
if self.path and self.path.exists():
try:
self.path.unlink()
logger.debug("Removed pending events file: %s", self.path)
except Exception as e:
logger.warning("Failed to remove pending events file: %s", e)
def has_pending(self) -> bool:
"""Check if there are pending events on disk."""
return self.path is not None and self.path.exists()

View File

@@ -0,0 +1,441 @@
"""
Event publisher for user containers.
Publishes events via dual ZMQ patterns:
- XPUB for informational events (fire-and-forget)
- DEALER for critical events (guaranteed delivery with ack)
The publisher automatically routes events based on:
1. Event priority (INFORMATIONAL always uses XPUB if subscribed)
2. Whether an active gateway session is subscribed (tracked via XPUB)
"""
import asyncio
import logging
import time
import uuid
from dataclasses import dataclass, field
from typing import Dict, Optional, Set
import zmq
import zmq.asyncio
from .types import (
UserEvent,
EventAck,
Priority,
AckStatus,
serialize_user_event,
deserialize_event_ack,
get_event_type_name,
)
from .pending_store import PendingStore
logger = logging.getLogger(__name__)
@dataclass
class PendingEvent:
"""Tracks a pending critical event awaiting acknowledgment."""
event: UserEvent
sent_at: float
retries: int = 0
class EventPublisher:
"""
Publishes user events via dual ZMQ channels.
- XPUB socket (bind): For informational events to active sessions
- DEALER socket (connect): For critical events with guaranteed delivery
The publisher tracks XPUB subscriptions to know when a gateway has an
active session for this user. If subscribed, events go via XPUB (fast).
If not subscribed and priority > INFORMATIONAL, events go via DEALER.
Usage:
publisher = EventPublisher(user_id="user-123")
await publisher.start()
# Informational event (dropped if no active session)
await publisher.publish(UserEvent(
event_type=EventType.INDICATOR_UPDATED,
payload={"indicator": "RSI", "value": 65.5},
delivery=DeliverySpec.informational(),
))
# Critical event (guaranteed delivery)
await publisher.publish(UserEvent(
event_type=EventType.ORDER_FILLED,
payload={"order_id": "123", "symbol": "BTC/USDT", ...},
delivery=DeliverySpec.critical(),
))
await publisher.stop()
"""
def __init__(
self,
user_id: str,
xpub_port: int = 5570,
gateway_router_endpoint: str = "tcp://gateway:5571",
ack_timeout: float = 30.0,
max_retries: int = 3,
pending_store_path: Optional[str] = None,
):
"""
Initialize event publisher.
Args:
user_id: User ID for this container
xpub_port: Port to bind XPUB socket on (gateway connects here)
gateway_router_endpoint: Gateway ROUTER socket endpoint (we connect)
ack_timeout: Seconds to wait for ack before retrying
max_retries: Maximum retries for critical events
pending_store_path: Path to persist pending events (for crash recovery)
"""
self.user_id = user_id
self.xpub_port = xpub_port
self.gateway_router_endpoint = gateway_router_endpoint
self.ack_timeout = ack_timeout
self.max_retries = max_retries
# ZMQ context and sockets
self.ctx: Optional[zmq.asyncio.Context] = None
self.xpub_socket: Optional[zmq.asyncio.Socket] = None
self.dealer_socket: Optional[zmq.asyncio.Socket] = None
# Track active subscriptions (set of topic strings)
self.active_subscriptions: Set[str] = set()
# Track pending critical events awaiting ack
self.pending_events: Dict[str, PendingEvent] = {}
# Persistent store for crash recovery
self.pending_store = PendingStore(pending_store_path)
# Background tasks
self._subscription_task: Optional[asyncio.Task] = None
self._ack_task: Optional[asyncio.Task] = None
self._retry_task: Optional[asyncio.Task] = None
self._running = False
# Statistics
self.stats = {
"events_published": 0,
"events_via_xpub": 0,
"events_via_dealer": 0,
"events_dropped": 0,
"events_delivered": 0,
"events_failed": 0,
"retries": 0,
}
async def start(self) -> None:
"""Start the event publisher."""
if self._running:
logger.warning("Event publisher already running")
return
logger.info(
"Starting event publisher: user_id=%s, xpub_port=%d, gateway=%s",
self.user_id,
self.xpub_port,
self.gateway_router_endpoint,
)
# Create ZMQ context
self.ctx = zmq.asyncio.Context()
# Create XPUB socket for informational events
self.xpub_socket = self.ctx.socket(zmq.XPUB)
self.xpub_socket.setsockopt(zmq.XPUB_VERBOSE, 1) # Receive all sub/unsub
self.xpub_socket.bind(f"tcp://*:{self.xpub_port}")
logger.info("XPUB socket bound on port %d", self.xpub_port)
# Create DEALER socket for critical events
self.dealer_socket = self.ctx.socket(zmq.DEALER)
self.dealer_socket.setsockopt_string(zmq.IDENTITY, f"container-{self.user_id}")
self.dealer_socket.connect(self.gateway_router_endpoint)
logger.info("DEALER socket connected to %s", self.gateway_router_endpoint)
# Load any persisted pending events
persisted = await self.pending_store.load_pending()
for event in persisted:
self.pending_events[event.event_id] = PendingEvent(
event=event,
sent_at=time.time(),
retries=0,
)
if persisted:
logger.info("Loaded %d pending events from disk", len(persisted))
# Start background tasks
self._running = True
self._subscription_task = asyncio.create_task(self._subscription_loop())
self._ack_task = asyncio.create_task(self._ack_loop())
self._retry_task = asyncio.create_task(self._retry_loop())
# Resend any loaded pending events
for pending in list(self.pending_events.values()):
await self._send_via_dealer(pending.event)
logger.info("Event publisher started")
async def stop(self) -> None:
"""Stop the event publisher and persist pending events."""
if not self._running:
return
logger.info("Stopping event publisher")
self._running = False
# Cancel background tasks
for task in [self._subscription_task, self._ack_task, self._retry_task]:
if task:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# Persist pending critical events for crash recovery
if self.pending_events:
events = [pe.event for pe in self.pending_events.values()]
await self.pending_store.save_pending(events)
logger.info("Persisted %d pending events", len(events))
# Close sockets
if self.xpub_socket:
self.xpub_socket.close()
if self.dealer_socket:
self.dealer_socket.close()
if self.ctx:
self.ctx.term()
logger.info(
"Event publisher stopped. Stats: %s",
self.stats,
)
def has_active_subscriber(self) -> bool:
"""Check if any gateway is subscribed to this user's events."""
topic = f"USER:{self.user_id}"
return topic in self.active_subscriptions
async def publish(self, event: UserEvent) -> None:
"""
Publish an event via the appropriate channel.
Routing logic:
- INFORMATIONAL: XPUB only if subscribed, else drop
- NORMAL/CRITICAL + subscribed: XPUB (fast path)
- NORMAL/CRITICAL + not subscribed: DEALER (guaranteed)
Args:
event: Event to publish
"""
# Ensure event has required fields
if not event.event_id:
event.event_id = str(uuid.uuid4())
if not event.user_id:
event.user_id = self.user_id
if not event.timestamp:
event.timestamp = int(time.time() * 1000)
priority = event.delivery.priority
has_subscriber = self.has_active_subscriber()
logger.debug(
"Publishing event: id=%s, type=%s, priority=%s, has_subscriber=%s",
event.event_id,
get_event_type_name(event.event_type),
Priority(priority).name,
has_subscriber,
)
self.stats["events_published"] += 1
if priority == Priority.INFORMATIONAL:
# Fire and forget - only send if someone's listening
if has_subscriber:
await self._send_via_xpub(event)
self.stats["events_via_xpub"] += 1
else:
logger.debug(
"Dropping informational event (no subscriber): %s",
event.event_id,
)
self.stats["events_dropped"] += 1
elif has_subscriber:
# Active session exists - use fast path
await self._send_via_xpub(event)
self.stats["events_via_xpub"] += 1
else:
# No active session - use guaranteed delivery
await self._send_via_dealer(event)
self.stats["events_via_dealer"] += 1
# Track for ack
self.pending_events[event.event_id] = PendingEvent(
event=event,
sent_at=time.time(),
retries=0,
)
async def _send_via_xpub(self, event: UserEvent) -> None:
"""Send event via XPUB socket (fire-and-forget)."""
topic = f"USER:{self.user_id}"
payload = serialize_user_event(event)
await self.xpub_socket.send_multipart([topic.encode(), payload])
logger.debug(
"Sent event via XPUB: id=%s, type=%s",
event.event_id,
get_event_type_name(event.event_type),
)
async def _send_via_dealer(self, event: UserEvent) -> None:
"""Send event via DEALER socket (with ack tracking)."""
payload = serialize_user_event(event)
await self.dealer_socket.send(payload)
logger.debug(
"Sent event via DEALER: id=%s, type=%s",
event.event_id,
get_event_type_name(event.event_type),
)
async def _subscription_loop(self) -> None:
"""Process XPUB subscription/unsubscription messages."""
logger.debug("Starting subscription loop")
while self._running:
try:
# Poll with timeout to allow shutdown
if await self.xpub_socket.poll(100):
msg = await self.xpub_socket.recv()
# First byte: 1 = subscribe, 0 = unsubscribe
# Remaining bytes: topic
is_subscribe = msg[0] == 1
topic = msg[1:].decode()
if is_subscribe:
self.active_subscriptions.add(topic)
logger.info("Gateway subscribed to topic: %s", topic)
else:
self.active_subscriptions.discard(topic)
logger.info("Gateway unsubscribed from topic: %s", topic)
except asyncio.CancelledError:
break
except Exception as e:
logger.error("Error in subscription loop: %s", e, exc_info=True)
logger.debug("Subscription loop ended")
async def _ack_loop(self) -> None:
"""Process EventAck messages from gateway."""
logger.debug("Starting ack loop")
while self._running:
try:
# Poll with timeout
if await self.dealer_socket.poll(100):
payload = await self.dealer_socket.recv()
ack = deserialize_event_ack(payload)
logger.debug(
"Received ack: event_id=%s, status=%s",
ack.event_id,
AckStatus(ack.status).name,
)
if ack.event_id in self.pending_events:
pending = self.pending_events.pop(ack.event_id)
if ack.status == AckStatus.DELIVERED:
logger.info(
"Event delivered: id=%s, type=%s, via=%s",
ack.event_id,
get_event_type_name(pending.event.event_type),
ack.delivered_via.name if ack.delivered_via else "unknown",
)
self.stats["events_delivered"] += 1
elif ack.status == AckStatus.QUEUED:
logger.info(
"Event queued for delivery: id=%s",
ack.event_id,
)
self.stats["events_delivered"] += 1
elif ack.status == AckStatus.ACK_ERROR:
logger.error(
"Event delivery failed: id=%s, error=%s",
ack.event_id,
ack.error_message,
)
self.stats["events_failed"] += 1
else:
logger.debug("Received ack for unknown event: %s", ack.event_id)
except asyncio.CancelledError:
break
except Exception as e:
logger.error("Error in ack loop: %s", e, exc_info=True)
logger.debug("Ack loop ended")
async def _retry_loop(self) -> None:
"""Retry pending events that haven't been acked."""
logger.debug("Starting retry loop")
while self._running:
try:
await asyncio.sleep(5) # Check every 5 seconds
now = time.time()
for event_id, pending in list(self.pending_events.items()):
if now - pending.sent_at > self.ack_timeout:
if pending.retries >= self.max_retries:
# Give up
logger.error(
"Event exceeded max retries, dropping: id=%s, type=%s",
event_id,
get_event_type_name(pending.event.event_type),
)
del self.pending_events[event_id]
self.stats["events_failed"] += 1
else:
# Retry
pending.retries += 1
pending.sent_at = now
await self._send_via_dealer(pending.event)
logger.info(
"Retrying event: id=%s, attempt=%d/%d",
event_id,
pending.retries,
self.max_retries,
)
self.stats["retries"] += 1
except asyncio.CancelledError:
break
except Exception as e:
logger.error("Error in retry loop: %s", e, exc_info=True)
logger.debug("Retry loop ended")
def get_stats(self) -> Dict[str, int]:
"""Get publisher statistics."""
return {
**self.stats,
"pending_events": len(self.pending_events),
"active_subscriptions": len(self.active_subscriptions),
}

View File

@@ -0,0 +1,384 @@
"""
User Event Types for Container → Gateway communication.
These types mirror the protobuf definitions in protobuf/user_events.proto
and the TypeScript types in gateway/src/events/types.ts.
Message Type IDs (must match protocol.md):
- UserEvent: 0x20
- EventAck: 0x21
"""
import json
import uuid
import time
from dataclasses import dataclass, field
from enum import IntEnum
from typing import List, Optional, Any
# =============================================================================
# Message Type IDs
# =============================================================================
MSG_TYPE_USER_EVENT = 0x20
MSG_TYPE_EVENT_ACK = 0x21
# =============================================================================
# Enums
# =============================================================================
class EventType(IntEnum):
"""Types of events that containers can emit."""
# Trading events
ORDER_PLACED = 0
ORDER_FILLED = 1
ORDER_CANCELLED = 2
ORDER_REJECTED = 3
ORDER_EXPIRED = 4
# Alert events
ALERT_TRIGGERED = 10
ALERT_CREATED = 11
ALERT_DELETED = 12
# Position events
POSITION_OPENED = 20
POSITION_CLOSED = 21
POSITION_UPDATED = 22
POSITION_LIQUIDATED = 23
# Workspace/chart events
WORKSPACE_CHANGED = 30
CHART_ANNOTATION_ADDED = 31
CHART_ANNOTATION_REMOVED = 32
INDICATOR_UPDATED = 33
# Strategy events
STRATEGY_STARTED = 40
STRATEGY_STOPPED = 41
STRATEGY_LOG = 42
STRATEGY_ERROR = 43
BACKTEST_COMPLETED = 44
# System events
CONTAINER_STARTING = 50
CONTAINER_READY = 51
CONTAINER_SHUTTING_DOWN = 52
EVENT_ERROR = 53
class Priority(IntEnum):
"""
Event delivery priority.
INFORMATIONAL: Drop if no active session (fire-and-forget via XPUB)
NORMAL: Best effort - XPUB if subscribed, else DEALER
CRITICAL: Must deliver - always uses DEALER with ack
"""
INFORMATIONAL = 0
NORMAL = 1
CRITICAL = 2
class ChannelType(IntEnum):
"""Delivery channel types."""
ACTIVE_SESSION = 0 # Whatever's currently connected
WEB = 1 # WebSocket to web UI
TELEGRAM = 2 # Telegram bot message
EMAIL = 3 # Email notification
PUSH = 4 # Mobile push notification
DISCORD = 5 # Discord webhook
SLACK = 6 # Slack webhook
class AckStatus(IntEnum):
"""Event acknowledgment status."""
DELIVERED = 0 # Successfully delivered to at least one channel
QUEUED = 1 # Accepted and queued for delivery
ACK_ERROR = 2 # Permanent failure - all channels failed
# =============================================================================
# Message Types
# =============================================================================
@dataclass
class ChannelPreference:
"""
Channel delivery preference.
Args:
channel: Which channel to deliver to
only_if_active: If True, skip this channel if user is not connected
"""
channel: ChannelType
only_if_active: bool = False
def to_dict(self) -> dict:
return {
"channel": int(self.channel),
"only_if_active": self.only_if_active,
}
@classmethod
def from_dict(cls, data: dict) -> "ChannelPreference":
return cls(
channel=ChannelType(data["channel"]),
only_if_active=data.get("only_if_active", False),
)
@dataclass
class DeliverySpec:
"""
Delivery specification for an event.
Args:
priority: Delivery priority (INFORMATIONAL, NORMAL, CRITICAL)
channels: Ordered list of channel preferences to try
"""
priority: Priority = Priority.NORMAL
channels: List[ChannelPreference] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"priority": int(self.priority),
"channels": [c.to_dict() for c in self.channels],
}
@classmethod
def from_dict(cls, data: dict) -> "DeliverySpec":
return cls(
priority=Priority(data.get("priority", Priority.NORMAL)),
channels=[
ChannelPreference.from_dict(c) for c in data.get("channels", [])
],
)
# -------------------------------------------------------------------------
# Convenience constructors
# -------------------------------------------------------------------------
@staticmethod
def informational() -> "DeliverySpec":
"""
Drop if no active session.
Use for: indicator updates, chart syncs, strategy logs when watching.
"""
return DeliverySpec(
priority=Priority.INFORMATIONAL,
channels=[ChannelPreference(ChannelType.ACTIVE_SESSION, only_if_active=True)],
)
@staticmethod
def active_or_telegram() -> "DeliverySpec":
"""
Active session preferred, fallback to Telegram.
Use for: alerts, position updates.
"""
return DeliverySpec(
priority=Priority.NORMAL,
channels=[
ChannelPreference(ChannelType.ACTIVE_SESSION, only_if_active=True),
ChannelPreference(ChannelType.TELEGRAM, only_if_active=False),
],
)
@staticmethod
def active_or_push() -> "DeliverySpec":
"""
Active session preferred, fallback to push notification.
Use for: alerts, position updates on mobile.
"""
return DeliverySpec(
priority=Priority.NORMAL,
channels=[
ChannelPreference(ChannelType.ACTIVE_SESSION, only_if_active=True),
ChannelPreference(ChannelType.PUSH, only_if_active=False),
],
)
@staticmethod
def critical() -> "DeliverySpec":
"""
Must deliver through any available channel.
Use for: order fills, liquidations, critical errors.
"""
return DeliverySpec(
priority=Priority.CRITICAL,
channels=[
ChannelPreference(ChannelType.ACTIVE_SESSION, only_if_active=True),
ChannelPreference(ChannelType.TELEGRAM, only_if_active=False),
ChannelPreference(ChannelType.PUSH, only_if_active=False),
ChannelPreference(ChannelType.EMAIL, only_if_active=False),
],
)
@staticmethod
def telegram_only() -> "DeliverySpec":
"""
Deliver only to Telegram, regardless of active session.
Use for: scheduled reports, digest notifications.
"""
return DeliverySpec(
priority=Priority.NORMAL,
channels=[
ChannelPreference(ChannelType.TELEGRAM, only_if_active=False),
],
)
@dataclass
class UserEvent:
"""
Event emitted by user container to gateway.
Args:
user_id: User ID this event belongs to
event_id: Unique event ID (UUID) for deduplication and ack
timestamp: Unix milliseconds when event was generated
event_type: Type of event
payload: Event data (will be JSON encoded)
delivery: Delivery specification
"""
event_type: EventType
payload: Any = None
delivery: DeliverySpec = field(default_factory=DeliverySpec)
user_id: str = ""
event_id: str = ""
timestamp: int = 0
def __post_init__(self):
if not self.event_id:
self.event_id = str(uuid.uuid4())
if not self.timestamp:
self.timestamp = int(time.time() * 1000)
def to_dict(self) -> dict:
return {
"user_id": self.user_id,
"event_id": self.event_id,
"timestamp": self.timestamp,
"event_type": int(self.event_type),
"payload": self.payload,
"delivery": self.delivery.to_dict(),
}
@classmethod
def from_dict(cls, data: dict) -> "UserEvent":
return cls(
user_id=data.get("user_id", ""),
event_id=data.get("event_id", ""),
timestamp=data.get("timestamp", 0),
event_type=EventType(data["event_type"]),
payload=data.get("payload"),
delivery=DeliverySpec.from_dict(data.get("delivery", {})),
)
@dataclass
class EventAck:
"""
Acknowledgment from gateway for a critical event.
Args:
event_id: Event ID being acknowledged
status: Delivery status
error_message: Error message if status is ERROR
delivered_via: Which channel successfully delivered (optional)
"""
event_id: str
status: AckStatus
error_message: str = ""
delivered_via: Optional[ChannelType] = None
def to_dict(self) -> dict:
return {
"event_id": self.event_id,
"status": int(self.status),
"error_message": self.error_message,
"delivered_via": int(self.delivered_via) if self.delivered_via is not None else None,
}
@classmethod
def from_dict(cls, data: dict) -> "EventAck":
delivered_via = data.get("delivered_via")
return cls(
event_id=data["event_id"],
status=AckStatus(data["status"]),
error_message=data.get("error_message", ""),
delivered_via=ChannelType(delivered_via) if delivered_via is not None else None,
)
# =============================================================================
# Serialization
# =============================================================================
def serialize_user_event(event: UserEvent) -> bytes:
"""
Serialize UserEvent to wire format.
Format: [1 byte msg type][JSON payload]
Note: In production, replace with proper protobuf serialization.
"""
json_bytes = json.dumps(event.to_dict()).encode("utf-8")
return bytes([MSG_TYPE_USER_EVENT]) + json_bytes
def deserialize_user_event(data: bytes) -> UserEvent:
"""Deserialize UserEvent from wire format."""
if len(data) < 2:
raise ValueError("Data too short")
msg_type = data[0]
if msg_type != MSG_TYPE_USER_EVENT:
raise ValueError(f"Invalid message type: expected {MSG_TYPE_USER_EVENT}, got {msg_type}")
json_str = data[1:].decode("utf-8")
return UserEvent.from_dict(json.loads(json_str))
def serialize_event_ack(ack: EventAck) -> bytes:
"""Serialize EventAck to wire format."""
json_bytes = json.dumps(ack.to_dict()).encode("utf-8")
return bytes([MSG_TYPE_EVENT_ACK]) + json_bytes
def deserialize_event_ack(data: bytes) -> EventAck:
"""Deserialize EventAck from wire format."""
if len(data) < 2:
raise ValueError("Data too short")
msg_type = data[0]
if msg_type != MSG_TYPE_EVENT_ACK:
raise ValueError(f"Invalid message type: expected {MSG_TYPE_EVENT_ACK}, got {msg_type}")
json_str = data[1:].decode("utf-8")
return EventAck.from_dict(json.loads(json_str))
# =============================================================================
# Helpers
# =============================================================================
def get_event_type_name(event_type: EventType) -> str:
"""Get human-readable event type name."""
return event_type.name
def get_channel_type_name(channel_type: ChannelType) -> str:
"""Get human-readable channel type name."""
return channel_type.name

275
client-py/main.py Normal file
View File

@@ -0,0 +1,275 @@
#!/usr/bin/env python3
"""
DexOrder User Container Main Entry Point
Brings together:
- Config and secrets loading from k8s mounted YAML files
- ZeroMQ event publisher for user events
- MCP server with minimal "hello world" resource
- Lifecycle management integration
"""
import asyncio
import logging
import os
import signal
import sys
from pathlib import Path
from typing import Optional
import yaml
from mcp.server import Server
from mcp.server.stdio import stdio_server
from dexorder import EventPublisher, start_lifecycle_manager, get_lifecycle_manager
from dexorder.events import EventType, UserEvent, DeliverySpec
# =============================================================================
# Configuration
# =============================================================================
class Config:
"""Application configuration loaded from config.yaml and secrets.yaml"""
def __init__(self):
# User ID (required)
self.user_id: str = os.getenv("USER_ID", "")
if not self.user_id:
raise ValueError("USER_ID environment variable required")
# Config and secrets paths (k8s mounted)
self.config_path = Path(os.getenv("CONFIG_PATH", "/app/config/config.yaml"))
self.secrets_path = Path(os.getenv("SECRETS_PATH", "/app/config/secrets.yaml"))
# ZMQ ports for event system
self.zmq_xpub_port: int = int(os.getenv("ZMQ_XPUB_PORT", "5570"))
self.zmq_gateway_endpoint: str = os.getenv(
"ZMQ_GATEWAY_ENDPOINT",
"tcp://gateway:5571"
)
# MCP server settings
self.mcp_server_name: str = os.getenv("MCP_SERVER_NAME", "dexorder-user")
# Lifecycle settings
self.idle_timeout_minutes: int = int(os.getenv("IDLE_TIMEOUT_MINUTES", "15"))
self.enable_idle_shutdown: bool = os.getenv("ENABLE_IDLE_SHUTDOWN", "true").lower() == "true"
# Loaded from files
self.config_data: dict = {}
self.secrets_data: dict = {}
def load(self) -> None:
"""Load configuration and secrets from YAML files"""
# Load config.yaml if exists
if self.config_path.exists():
with open(self.config_path) as f:
self.config_data = yaml.safe_load(f) or {}
logging.info(f"Loaded config from {self.config_path}")
else:
logging.warning(f"Config file not found: {self.config_path}")
# Load secrets.yaml if exists
if self.secrets_path.exists():
with open(self.secrets_path) as f:
self.secrets_data = yaml.safe_load(f) or {}
logging.info(f"Loaded secrets from {self.secrets_path}")
else:
logging.warning(f"Secrets file not found: {self.secrets_path}")
# =============================================================================
# MCP Server Setup
# =============================================================================
def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server:
"""Create MCP server with minimal hello world resource"""
server = Server(config.mcp_server_name)
@server.list_resources()
async def list_resources():
"""List available resources"""
return [
{
"uri": f"dexorder://user/{config.user_id}/hello",
"name": "Hello World",
"description": "A simple hello world resource",
"mimeType": "text/plain",
}
]
@server.read_resource()
async def read_resource(uri: str):
"""Read a resource by URI"""
if uri == f"dexorder://user/{config.user_id}/hello":
# Publish an event when resource is accessed
await event_publisher.publish(UserEvent(
event_type=EventType.STRATEGY_LOG,
payload={
"message": "Hello world resource accessed",
"uri": uri,
},
delivery=DeliverySpec.informational(),
))
return {
"uri": uri,
"mimeType": "text/plain",
"text": f"Hello from DexOrder user container!\nUser ID: {config.user_id}\n",
}
else:
raise ValueError(f"Unknown resource: {uri}")
logging.info(f"MCP server '{config.mcp_server_name}' created")
return server
# =============================================================================
# Main Application
# =============================================================================
class UserContainer:
"""Main user container application"""
def __init__(self):
self.config = Config()
self.event_publisher: Optional[EventPublisher] = None
self.mcp_server: Optional[Server] = None
self.running = False
async def start(self) -> None:
"""Start all subsystems"""
logging.info("Starting user container")
# Load configuration
self.config.load()
# Start lifecycle manager
await start_lifecycle_manager(
user_id=self.config.user_id,
idle_timeout_minutes=self.config.idle_timeout_minutes,
enable_idle_shutdown=self.config.enable_idle_shutdown,
)
logging.info("Lifecycle manager started")
# Start event publisher
self.event_publisher = EventPublisher(
user_id=self.config.user_id,
xpub_port=self.config.zmq_xpub_port,
gateway_router_endpoint=self.config.zmq_gateway_endpoint,
)
await self.event_publisher.start()
logging.info("Event publisher started")
# Publish CONTAINER_STARTING event
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_STARTING,
payload={
"user_id": self.config.user_id,
"timestamp": None, # Will be auto-filled
},
delivery=DeliverySpec.active_or_telegram(),
))
# Create MCP server
self.mcp_server = create_mcp_server(self.config, self.event_publisher)
# Publish CONTAINER_READY event
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_READY,
payload={
"user_id": self.config.user_id,
},
delivery=DeliverySpec.active_or_telegram(),
))
self.running = True
logging.info("User container ready")
async def stop(self) -> None:
"""Stop all subsystems"""
if not self.running:
return
logging.info("Stopping user container")
self.running = False
# Publish CONTAINER_SHUTTING_DOWN event
if self.event_publisher:
await self.event_publisher.publish(UserEvent(
event_type=EventType.CONTAINER_SHUTTING_DOWN,
payload={
"user_id": self.config.user_id,
},
delivery=DeliverySpec.active_or_telegram(),
))
# Stop subsystems
if self.event_publisher:
await self.event_publisher.stop()
logging.info("Event publisher stopped")
lifecycle = get_lifecycle_manager()
if lifecycle:
await lifecycle.stop()
logging.info("Lifecycle manager stopped")
logging.info("User container stopped")
async def run(self) -> None:
"""Run the MCP server via stdio"""
await self.start()
try:
# Run MCP server on stdio
async with stdio_server() as (read_stream, write_stream):
await self.mcp_server.run(
read_stream,
write_stream,
self.mcp_server.create_initialization_options()
)
finally:
await self.stop()
# =============================================================================
# Entry Point
# =============================================================================
async def main():
"""Main entry point"""
# Setup logging
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=getattr(logging, log_level),
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
stream=sys.stderr, # MCP uses stdout for protocol
)
# Create and run container
container = UserContainer()
# Handle shutdown signals
loop = asyncio.get_event_loop()
def handle_signal(sig):
logging.info(f"Received signal {sig}, shutting down...")
asyncio.create_task(container.stop())
loop.stop()
for sig in (signal.SIGTERM, signal.SIGINT):
loop.add_signal_handler(sig, lambda s=sig: handle_signal(s))
try:
await container.run()
except KeyboardInterrupt:
logging.info("Keyboard interrupt received")
except Exception as e:
logging.error(f"Fatal error: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,25 @@
# Example secrets file for DexOrder user container
# Mount this at /app/config/secrets.yaml in k8s
# This file should be created from k8s secrets
# Iceberg catalog credentials
iceberg:
s3:
endpoint: "http://minio:9000"
access_key_id: "minioadmin"
secret_access_key: "minioadmin"
region: "us-east-1"
catalog:
uri: "http://iceberg-rest:8181"
warehouse: "s3://warehouse/"
# API keys for external services (if needed)
api_keys:
telegram_bot_token: ""
# Internal auth (for mode A - platform harness)
auth:
mode: "mtls" # or "platform_token" or "api_key"
# API key hash if using api_key mode
api_key_hash: ""

View File

@@ -10,9 +10,11 @@ setup(
"pyiceberg>=0.6.0",
"pyarrow>=14.0.0",
"pandas>=2.0.0",
"zmq>=0.0.0",
"pyzmq>=25.0.0",
"protobuf>=4.25.0",
"pyyaml>=6.0",
"aiofiles>=23.0.0",
"mcp>=0.9.0",
],
extras_require={
"dev": [