sandbox connected and streaming

This commit is contained in:
2026-03-30 23:29:03 -04:00
parent c3a8fae132
commit 998f69fa1a
130 changed files with 7416 additions and 2123 deletions

View File

@@ -1,4 +1,4 @@
# ValidatingAdmissionPolicy to restrict images in dexorder-agents namespace
# ValidatingAdmissionPolicy to restrict images in dexorder-sandboxes namespace
# Requires Kubernetes 1.30+ (or 1.28+ with feature gate)
# This is the critical security control that prevents arbitrary image execution
# even if the gateway is compromised.
@@ -6,25 +6,28 @@
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicy
metadata:
name: dexorder-agent-image-policy
name: dexorder-sandbox-image-policy
spec:
failurePolicy: Fail
matchConstraints:
namespaceSelector:
matchLabels:
dexorder.io/type: agents
dexorder.io/type: sandboxes
resourceRules:
- apiGroups: ["apps"]
apiVersions: ["v1"]
resources: ["deployments"]
operations: ["CREATE", "UPDATE"]
validations:
# Only allow images from our approved registry with agent prefix
# Only allow images from our approved registry with sandbox prefix
- expression: |
object.spec.template.spec.containers.all(c,
c.image.startsWith('ghcr.io/dexorder/agent:') ||
c.image.startsWith('ghcr.io/dexorder/agent-'))
message: "Only approved dexorder agent images are allowed in the agents namespace"
c.image.startsWith('ghcr.io/dexorder/sandbox:') ||
c.image.startsWith('ghcr.io/dexorder/sandbox-') ||
c.image.startsWith('ghcr.io/dexorder/lifecycle-sidecar:') ||
c.image.startsWith('dexorder/ai-sandbox:') ||
c.image.startsWith('dexorder/ai-lifecycle-sidecar:'))
message: "Only approved dexorder sandbox images are allowed in the sandboxes namespace"
reason: Forbidden
# No privileged containers
@@ -99,12 +102,12 @@ spec:
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicyBinding
metadata:
name: dexorder-agent-image-policy-binding
name: dexorder-sandbox-image-policy-binding
spec:
policyName: dexorder-agent-image-policy
policyName: dexorder-sandbox-image-policy
validationActions:
- Deny
matchResources:
namespaceSelector:
matchLabels:
dexorder.io/type: agents
dexorder.io/type: sandboxes

View File

@@ -1,6 +1,6 @@
# RBAC for gateway to CREATE agent deployments only
# RBAC for gateway to CREATE sandbox deployments only
# Principle of least privilege: gateway can ONLY create deployments/services/PVCs
# in the dexorder-agents namespace. Deletion is handled by the lifecycle sidecar.
# in the dexorder-sandboxes namespace. Deletion is handled by the lifecycle sidecar.
# No pods, secrets, exec, or cross-namespace access.
---
apiVersion: v1
@@ -8,12 +8,12 @@ kind: ServiceAccount
metadata:
name: gateway
---
# Role scoped to dexorder-agents namespace only
# Role scoped to dexorder-sandboxes namespace only
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: agent-creator
namespace: dexorder-agents
name: sandbox-creator
namespace: dexorder-sandboxes
rules:
# Deployments: create and read only (deletion handled by sidecar)
- apiGroups: ["apps"]
@@ -25,7 +25,7 @@ rules:
resources: ["persistentvolumeclaims"]
verbs: ["create", "get", "list", "watch"]
# Services: create and manage agent MCP endpoints
# Services: create and manage sandbox MCP endpoints
- apiGroups: [""]
resources: ["services"]
verbs: ["create", "get", "list", "watch", "patch", "update"]
@@ -52,13 +52,13 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: gateway-agent-creator
namespace: dexorder-agents
name: gateway-sandbox-creator
namespace: dexorder-sandboxes
subjects:
- kind: ServiceAccount
name: gateway
namespace: default
roleRef:
kind: Role
name: agent-creator
name: sandbox-creator
apiGroup: rbac.authorization.k8s.io

View File

@@ -43,6 +43,9 @@ spec:
- name: wait-for-qdrant
image: busybox:1.36
command: ['sh', '-c', 'until nc -z qdrant 6333; do echo waiting for qdrant; sleep 2; done;']
- name: wait-for-iceberg-catalog
image: busybox:1.36
command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;']
volumes:
- name: config

View File

@@ -6,21 +6,21 @@ resources:
- init.yaml
# Namespace definitions with PodSecurity labels
- namespaces.yaml
# RBAC for gateway to create agents (creation only)
# RBAC for gateway to create sandboxes (creation only)
- gateway-rbac.yaml
# RBAC for lifecycle sidecar (self-deletion)
- lifecycle-sidecar-rbac.yaml
# Admission policies (image restriction, security requirements)
- admission-policy.yaml
# Resource quotas and limits for agents namespace
- agent-quotas.yaml
# Resource quotas and limits for sandboxes namespace
- sandbox-quotas.yaml
# Network isolation policies
- network-policies.yaml
# Gateway service
- gateway.yaml
- gateway-ingress.yaml
# Example agent deployment (for reference, not applied by default)
# - agent-deployment-example.yaml
# Example sandbox deployment (for reference, not applied by default)
# - sandbox-deployment-example.yaml
# Services
- web.yaml
- ingress.yaml

View File

@@ -1,30 +1,30 @@
# RBAC for lifecycle sidecar - allows self-deletion only
# Each agent pod gets this ServiceAccount and can only delete its own deployment
# Each sandbox pod gets this ServiceAccount and can only delete its own deployment
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: agent-lifecycle
namespace: dexorder-agents
name: sandbox-lifecycle
namespace: dexorder-sandboxes
---
# Role allowing deletion of deployments and PVCs
# This is scoped to the dexorder-agents namespace
# This is scoped to the dexorder-sandboxes namespace
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: agent-self-delete
namespace: dexorder-agents
name: sandbox-self-delete
namespace: dexorder-sandboxes
rules:
# Allow getting and deleting deployments
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "delete"]
# Allow getting and deleting PVCs (for anonymous users)
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "delete"]
# Read-only access to pods (for status checking)
- apiGroups: [""]
resources: ["pods"]
@@ -33,15 +33,15 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: agent-self-delete
namespace: dexorder-agents
name: sandbox-self-delete
namespace: dexorder-sandboxes
subjects:
- kind: ServiceAccount
name: agent-lifecycle
namespace: dexorder-agents
name: sandbox-lifecycle
namespace: dexorder-sandboxes
roleRef:
kind: Role
name: agent-self-delete
name: sandbox-self-delete
apiGroup: rbac.authorization.k8s.io
---
# Additional security: ValidatingWebhookConfiguration to restrict deletion
@@ -49,5 +49,5 @@ roleRef:
# Requires a validating webhook server (can be added later)
# For now, we rely on:
# 1. Sidecar only knowing its own deployment name (from env)
# 2. RBAC limiting to dexorder-agents namespace
# 2. RBAC limiting to dexorder-sandboxes namespace
# 3. Admission policy restricting deployment creation (already defined)

View File

@@ -1,14 +1,14 @@
# Namespace definitions for dexorder AI platform
# - default: gateway, web, and infrastructure services
# - dexorder-agents: user agent containers (isolated, restricted)
# - dexorder-sandboxes: per-user sandbox containers (isolated, restricted)
---
apiVersion: v1
kind: Namespace
metadata:
name: dexorder-agents
name: dexorder-sandboxes
labels:
app.kubernetes.io/part-of: dexorder
dexorder.io/type: agents
dexorder.io/type: sandboxes
# Enforce restricted pod security standards
pod-security.kubernetes.io/enforce: restricted
pod-security.kubernetes.io/enforce-version: latest

View File

@@ -1,29 +1,29 @@
# Network policies for agent isolation
# Agents can only communicate with specific services, not with each other
# Network policies for sandbox isolation
# Sandboxes can only communicate with specific services, not with each other
# or with the Kubernetes API
---
# Default deny all ingress and egress in agents namespace
# Default deny all ingress and egress in sandboxes namespace
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny-all
namespace: dexorder-agents
namespace: dexorder-sandboxes
spec:
podSelector: {}
policyTypes:
- Ingress
- Egress
---
# Allow agents to receive connections from gateway (MCP)
# Allow sandboxes to receive connections from gateway (MCP)
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-gateway-ingress
namespace: dexorder-agents
namespace: dexorder-sandboxes
spec:
podSelector:
matchLabels:
dexorder.io/component: agent
dexorder.io/component: sandbox
policyTypes:
- Ingress
ingress:
@@ -37,16 +37,16 @@ spec:
- protocol: TCP
port: 5555 # ZeroMQ control channel
---
# Allow agents to connect to required services
# Allow sandboxes to connect to required services
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-agent-egress
namespace: dexorder-agents
name: allow-sandbox-egress
namespace: dexorder-sandboxes
spec:
podSelector:
matchLabels:
dexorder.io/component: agent
dexorder.io/component: sandbox
policyTypes:
- Egress
egress:
@@ -93,11 +93,11 @@ spec:
- protocol: TCP
port: 443
---
# Default namespace: allow ingress from agents to gateway
# Default namespace: allow ingress from sandboxes to gateway
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-agent-callbacks
name: allow-sandbox-callbacks
spec:
podSelector:
matchLabels:
@@ -108,7 +108,7 @@ spec:
- from:
- namespaceSelector:
matchLabels:
dexorder.io/type: agents
dexorder.io/type: sandboxes
ports:
- protocol: TCP
port: 3000

View File

@@ -1,17 +1,17 @@
# Example agent deployment with lifecycle sidecar
# Example sandbox deployment with lifecycle sidecar
# This would be created by the gateway for each user
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: agent-user-abc123
namespace: dexorder-agents
name: sandbox-user-abc123
namespace: dexorder-sandboxes
labels:
app.kubernetes.io/name: agent
app.kubernetes.io/component: user-agent
dexorder.io/component: agent
app.kubernetes.io/name: sandbox
app.kubernetes.io/component: user-sandbox
dexorder.io/component: sandbox
dexorder.io/user-id: user-abc123
dexorder.io/deployment: agent-user-abc123
dexorder.io/deployment: sandbox-user-abc123
spec:
replicas: 1
selector:
@@ -20,15 +20,15 @@ spec:
template:
metadata:
labels:
dexorder.io/component: agent
dexorder.io/component: sandbox
dexorder.io/user-id: user-abc123
dexorder.io/deployment: agent-user-abc123
dexorder.io/deployment: sandbox-user-abc123
spec:
serviceAccountName: agent-lifecycle
serviceAccountName: sandbox-lifecycle
# Share PID namespace so sidecar can monitor main container
shareProcessNamespace: true
# Security context
securityContext:
runAsNonRoot: true
@@ -36,13 +36,13 @@ spec:
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
# Main agent container
- name: agent
image: ghcr.io/dexorder/agent:latest
# Main sandbox container
- name: sandbox
image: ghcr.io/dexorder/sandbox:latest
imagePullPolicy: Always
# Security context (required by admission policy)
securityContext:
allowPrivilegeEscalation: false
@@ -52,7 +52,7 @@ spec:
capabilities:
drop:
- ALL
# Resource limits (required by admission policy)
resources:
requests:
@@ -61,7 +61,7 @@ spec:
limits:
memory: "1Gi"
cpu: "1000m"
# Environment variables
env:
- name: USER_ID
@@ -76,7 +76,7 @@ spec:
value: "3000"
- name: ZMQ_CONTROL_PORT
value: "5555"
# Ports
ports:
- name: mcp
@@ -85,17 +85,17 @@ spec:
- name: zmq-control
containerPort: 5555
protocol: TCP
# Volume mounts
volumeMounts:
- name: agent-data
- name: sandbox-data
mountPath: /app/data
- name: tmp
mountPath: /tmp
- name: shared-run
mountPath: /var/run/agent
# Liveness probe (agent's MCP server)
mountPath: /var/run/sandbox
# Liveness probe (sandbox's MCP server)
livenessProbe:
httpGet:
path: /health
@@ -103,7 +103,7 @@ spec:
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
# Readiness probe
readinessProbe:
httpGet:
@@ -111,12 +111,12 @@ spec:
port: mcp
initialDelaySeconds: 5
periodSeconds: 10
# Lifecycle sidecar
- name: lifecycle-sidecar
image: ghcr.io/dexorder/lifecycle-sidecar:latest
imagePullPolicy: Always
# Security context
securityContext:
allowPrivilegeEscalation: false
@@ -126,7 +126,7 @@ spec:
capabilities:
drop:
- ALL
# Resource limits
resources:
requests:
@@ -135,7 +135,7 @@ spec:
limits:
memory: "64Mi"
cpu: "50m"
# Environment variables (injected via downward API)
env:
- name: NAMESPACE
@@ -150,44 +150,44 @@ spec:
value: "free" # Gateway sets this based on license
- name: MAIN_CONTAINER_PID
value: "1" # In shared PID namespace, main container is typically PID 1
# Volume mounts
volumeMounts:
- name: shared-run
mountPath: /var/run/agent
mountPath: /var/run/sandbox
readOnly: true
# Volumes
volumes:
# Persistent data (user files, state)
- name: agent-data
- name: sandbox-data
persistentVolumeClaim:
claimName: agent-user-abc123-data
claimName: sandbox-user-abc123-data
# Temporary writable filesystem (read-only rootfs)
- name: tmp
emptyDir:
medium: Memory
sizeLimit: 128Mi
# Shared between main container and sidecar
- name: shared-run
emptyDir:
medium: Memory
sizeLimit: 1Mi
# Restart policy
restartPolicy: Always
# Termination grace period
terminationGracePeriodSeconds: 30
---
# PVC for agent persistent data
# PVC for sandbox persistent data
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: agent-user-abc123-data
namespace: dexorder-agents
name: sandbox-user-abc123-data
namespace: dexorder-sandboxes
labels:
dexorder.io/user-id: user-abc123
spec:
@@ -198,12 +198,12 @@ spec:
storage: 1Gi
storageClassName: standard # Or your preferred storage class
---
# Service to expose agent MCP endpoint
# Service to expose sandbox MCP endpoint
apiVersion: v1
kind: Service
metadata:
name: agent-user-abc123
namespace: dexorder-agents
name: sandbox-user-abc123
namespace: dexorder-sandboxes
labels:
dexorder.io/user-id: user-abc123
spec:

View File

@@ -1,12 +1,12 @@
# Resource constraints for the dexorder-agents namespace
# Resource constraints for the dexorder-sandboxes namespace
# These limits apply regardless of what the gateway requests
---
# LimitRange: per-container defaults and maximums
apiVersion: v1
kind: LimitRange
metadata:
name: agent-limits
namespace: dexorder-agents
name: sandbox-limits
namespace: dexorder-sandboxes
spec:
limits:
# Default limits applied if deployment doesn't specify
@@ -36,11 +36,11 @@ spec:
apiVersion: v1
kind: ResourceQuota
metadata:
name: agent-quota
namespace: dexorder-agents
name: sandbox-quota
namespace: dexorder-sandboxes
spec:
hard:
# Total compute limits for all agents combined
# Total compute limits for all sandboxes combined
requests.cpu: "20"
requests.memory: "40Gi"
limits.cpu: "40"

View File

@@ -4,13 +4,13 @@
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicy
metadata:
name: dexorder-agent-image-policy
name: dexorder-sandbox-image-policy
spec:
failurePolicy: Fail
matchConstraints:
namespaceSelector:
matchLabels:
dexorder.io/type: agents
dexorder.io/type: sandboxes
resourceRules:
- apiGroups: ["apps"]
apiVersions: ["v1"]
@@ -20,13 +20,13 @@ spec:
# Allow local dev images in addition to production registry
- expression: |
object.spec.template.spec.containers.all(c,
c.image.startsWith('ghcr.io/dexorder/agent:') ||
c.image.startsWith('ghcr.io/dexorder/agent-') ||
c.image.startsWith('localhost:5000/dexorder/agent') ||
c.image.startsWith('dexorder/agent') ||
c.image.startsWith('dexorder/ai-client-py') ||
c.image.startsWith('ghcr.io/dexorder/sandbox:') ||
c.image.startsWith('ghcr.io/dexorder/sandbox-') ||
c.image.startsWith('localhost:5000/dexorder/sandbox') ||
c.image.startsWith('dexorder/sandbox') ||
c.image.startsWith('dexorder/ai-sandbox') ||
c.image.startsWith('dexorder/ai-lifecycle-sidecar'))
message: "Only approved dexorder agent images are allowed"
message: "Only approved dexorder sandbox images are allowed"
reason: Forbidden
# No privileged containers

View File

@@ -53,13 +53,18 @@ data:
# Kubernetes configuration
kubernetes:
namespace: dexorder-agents
namespace: dexorder-sandboxes
in_cluster: true
agent_image: dexorder/ai-client-py:latest
sidecar_image: dexorder/ai-lifecycle-sidecar:latest
sandbox_image: dexorder/ai-sandbox:SANDBOX_TAG_PLACEHOLDER
sidecar_image: dexorder/ai-lifecycle-sidecar:SIDECAR_TAG_PLACEHOLDER
storage_class: standard
image_pull_policy: Never # For minikube dev - use local images
# Agent configuration
agent:
# Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end
conversation_history_limit: 20
# DragonflyDB (Redis-compatible, for hot storage and session management)
redis:
url: redis://dragonfly:6379
@@ -76,6 +81,7 @@ data:
ohlc_catalog_uri: http://iceberg-catalog:8181
ohlc_namespace: trading
s3_endpoint: http://minio:9000
conversations_bucket: warehouse # S3 bucket for conversation Parquet cold storage
# Event router (ZeroMQ)
events:

View File

@@ -8,7 +8,7 @@ spec:
spec:
containers:
- name: gateway
image: dexorder/ai-gateway:latest
image: dexorder/ai-gateway
imagePullPolicy: Never
env:
- name: NODE_OPTIONS

View File

@@ -480,7 +480,7 @@ spec:
command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;']
containers:
- name: flink-jobmanager
image: dexorder/flink:latest
image: dexorder/ai-flink
imagePullPolicy: Never
args: ["standalone-job", "--job-classname", "com.dexorder.flink.TradingFlinkApp"]
ports:
@@ -542,7 +542,7 @@ spec:
command: ['sh', '-c', 'until nc -z flink-jobmanager 6123; do echo waiting for jobmanager; sleep 2; done;']
containers:
- name: flink-taskmanager
image: dexorder/flink:latest
image: dexorder/ai-flink
imagePullPolicy: Never
args: ["taskmanager"]
env:
@@ -617,7 +617,7 @@ spec:
spec:
containers:
- name: relay
image: dexorder/relay:latest
image: dexorder/ai-relay
imagePullPolicy: Never
ports:
- containerPort: 5555
@@ -665,7 +665,7 @@ spec:
command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;']
containers:
- name: ingestor
image: dexorder/ingestor:latest
image: dexorder/ai-ingestor
imagePullPolicy: Never
env:
- name: LOG_LEVEL

View File

@@ -8,12 +8,12 @@ resources:
- storage-class.yaml
- configs/gateway-config.yaml
- gateway-health-ingress.yaml
- agent-config.yaml # ConfigMap for agent pods in dexorder-agents namespace
- sandbox-config.yaml # ConfigMap for sandbox pods in dexorder-sandboxes namespace
# Dev-specific patches
patches:
# Reduced resource quotas for minikube
- path: agent-quotas-patch.yaml
- path: sandbox-quotas-patch.yaml
# Allow local registry images
- path: admission-policy-patch.yaml
# Web environment variables for dev
@@ -155,6 +155,63 @@ generatorOptions:

View File

@@ -1,18 +1,18 @@
# Agent ConfigMap in dexorder-agents namespace
# This is mounted into dynamically created agent pods
# Sandbox ConfigMap in dexorder-sandboxes namespace
# This is mounted into dynamically created sandbox pods
---
apiVersion: v1
kind: ConfigMap
metadata:
name: agent-config
namespace: dexorder-agents
name: sandbox-config
namespace: dexorder-sandboxes
labels:
app.kubernetes.io/name: agent
app.kubernetes.io/name: sandbox
app.kubernetes.io/component: config
data:
config.yaml: |
# Default configuration for user agent containers
# This is mounted at /app/config/config.yaml in agent pods
# Default configuration for user sandbox containers
# This is mounted at /app/config/config.yaml in sandbox pods
# Data directory for persistent storage (workspace, strategies, etc.)
# This is mounted as a PVC at /app/data
@@ -26,10 +26,14 @@ data:
data:
iceberg:
catalog_name: "dexorder"
# Catalog properties loaded from secrets
catalog_uri: "http://iceberg-catalog.default.svc.cluster.local:8181"
namespace: "trading"
# S3 endpoint for MinIO in default namespace
s3_endpoint: "http://minio.default.svc.cluster.local:9000"
relay:
endpoint: "tcp://relay.dexorder.svc.cluster.local:5560"
endpoint: "tcp://relay.default.svc.cluster.local:5559"
notification_endpoint: "tcp://relay.default.svc.cluster.local:5558"
timeout_ms: 5000
# Strategy settings

View File

@@ -4,8 +4,8 @@
apiVersion: v1
kind: ResourceQuota
metadata:
name: agent-quota
namespace: dexorder-agents
name: sandbox-quota
namespace: dexorder-sandboxes
spec:
hard:
# Reduced for minikube

View File

@@ -8,7 +8,7 @@ spec:
spec:
containers:
- name: ai-web
image: dexorder/ai-web:latest
image: dexorder/ai-web
imagePullPolicy: Never
env:
- name: VITE_GATEWAY_URL

View File

@@ -28,9 +28,9 @@ data:
# Kubernetes configuration
kubernetes:
namespace: dexorder-agents
namespace: dexorder-sandboxes
in_cluster: true
agent_image: dexorder/ai-client-py:latest
sandbox_image: dexorder/ai-sandbox:latest
sidecar_image: dexorder/ai-lifecycle-sidecar:latest
storage_class: standard
image_pull_policy: Always # For production - always pull from registry