diff --git a/.gitignore b/.gitignore index 70e8f8e0..cd485607 100644 --- a/.gitignore +++ b/.gitignore @@ -108,6 +108,8 @@ deploy/k8s/dev/secrets/*.yaml deploy/k8s/prod/secrets/*.yaml !deploy/k8s/dev/secrets/*.yaml.example !deploy/k8s/prod/secrets/*.yaml.example +# 1Password secrets are safe to commit +!deploy/k8s/prod/secrets/*.tpl.yaml # Dev environment image tags .dev-image-tag diff --git a/bin/config-update b/bin/config-update index 120efbf9..7f5bdf42 100755 --- a/bin/config-update +++ b/bin/config-update @@ -23,6 +23,7 @@ usage() { echo " relay-config - ZMQ relay configuration" echo " ingestor-config - CCXT ingestor configuration" echo " flink-config - Flink job configuration" + echo " gateway-config - Gateway configuration (prod only; uses op inject)" echo "" echo "Examples:" echo " $0 # Update all dev configs" @@ -49,16 +50,18 @@ if [ ! -d "$CONFIG_DIR" ]; then exit 1 fi -# Get kubectl context +# Set kubectl command and warn for prod if [[ "$ENV" == "prod" ]]; then - CONTEXT=$(kubectl config current-context) + KUBECTL="kubectl --context=prod" echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION configs!${NC}" - echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}" + echo -e "${YELLOW}kubectl context: prod${NC}" read -p "Are you sure you want to continue? (yes/no): " confirm if [[ "$confirm" != "yes" ]]; then echo "Aborted." exit 0 fi +else + KUBECTL="kubectl" fi apply_config() { @@ -71,9 +74,15 @@ apply_config() { fi echo -e "${GREEN}→${NC} Creating/updating ConfigMap $config_name..." - kubectl create configmap "$config_name" \ - --from-file=config.yaml="$config_file" \ - --dry-run=client -o yaml | kubectl apply -f - + + if [[ "$ENV" == "prod" && "$config_name" == "gateway-config" ]]; then + # gateway-config contains op:// references — resolve via op inject + op inject -i "$config_file" | $KUBECTL apply -f - + else + $KUBECTL create configmap "$config_name" \ + --from-file=config.yaml="$config_file" \ + --dry-run=client -o yaml | $KUBECTL apply -f - + fi echo -e "${GREEN}✓${NC} $config_name updated" # Optionally restart pods that use this config @@ -88,11 +97,14 @@ apply_config() { flink-config) restart_pods="deployment/flink-jobmanager deployment/flink-taskmanager" ;; + gateway-config) + restart_pods="deployment/gateway" + ;; esac if [ -n "$restart_pods" ]; then echo -e "${YELLOW} Restarting pods...${NC}" - kubectl rollout restart $restart_pods 2>/dev/null || echo -e "${YELLOW} (No pods found to restart)${NC}" + $KUBECTL rollout restart $restart_pods 2>/dev/null || echo -e "${YELLOW} (No pods found to restart)${NC}" fi } @@ -105,11 +117,20 @@ else echo -e "${GREEN}Updating all $ENV configs...${NC}" echo "" - CONFIGS=( - "relay-config" - "ingestor-config" - "flink-config" - ) + if [[ "$ENV" == "prod" ]]; then + CONFIGS=( + "relay-config" + "ingestor-config" + "flink-config" + "gateway-config" + ) + else + CONFIGS=( + "relay-config" + "ingestor-config" + "flink-config" + ) + fi FAILED=0 for config in "${CONFIGS[@]}"; do diff --git a/bin/deploy b/bin/deploy index 998e8366..3d8d832e 100755 --- a/bin/deploy +++ b/bin/deploy @@ -28,6 +28,15 @@ if [ "$1" == "dev" ]; then DEV=1 fi +if [ "$1" == "prod" ]; then + shift + ENV=prod + KUBECTL="kubectl --context=prod" +else + ENV=dev + KUBECTL="kubectl" +fi + if [ "$PROJECT" == "dev" ]; then DEV=1 # NO_CACHE=--no-cache @@ -183,7 +192,28 @@ echo "$TAG" >&2 if [ "$DEPLOY" == "1" ]; then docker push $REMOTE/ai-$PROJECT:$TAG - YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/k8s/$KUBERNETES.yaml) - echo "$YAML" | kubectl apply -f - || echo "$YAML" "\nkubectl apply failed" && exit 1 - echo deployed $KUBERNETES.yaml $REMOTE/ai-$PROJECT:$TAG + docker push $REMOTE/ai-$PROJECT:latest + + if [ "$ENV" == "prod" ]; then + case "$PROJECT" in + flink) + $KUBECTL set image deployment/flink-jobmanager flink-jobmanager=$REMOTE/ai-flink:$TAG + $KUBECTL set image deployment/flink-taskmanager flink-taskmanager=$REMOTE/ai-flink:$TAG + ;; + web) + $KUBECTL set image deployment/ai-web ai-web=$REMOTE/ai-web:$TAG + ;; + sandbox|lifecycle-sidecar) + echo "Image pushed. New sandboxes will use $REMOTE/ai-$PROJECT:$TAG" + ;; + *) + $KUBECTL set image deployment/$PROJECT $PROJECT=$REMOTE/ai-$PROJECT:$TAG + ;; + esac + echo "deployed $PROJECT $REMOTE/ai-$PROJECT:$TAG" + else + YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/k8s/$KUBERNETES.yaml) + echo "$YAML" | kubectl apply -f - || { echo "$YAML"; echo "kubectl apply failed"; exit 1; } + echo deployed $KUBERNETES.yaml $REMOTE/ai-$PROJECT:$TAG + fi fi diff --git a/bin/dev b/bin/dev index 8953a873..6a44fa2e 100755 --- a/bin/dev +++ b/bin/dev @@ -249,7 +249,7 @@ EOF echo -e "${GREEN}→${NC} Applying Kubernetes manifests..." kubectl apply -k . - # Apply sandbox-namespace secrets (must be after kustomize creates the dexorder-sandboxes namespace) + # Apply sandbox-namespace secrets (must be after kustomize creates the sandbox namespace) echo -e "${GREEN}→${NC} Applying sandbox secrets..." if [ -f "$ROOT_DIR/deploy/k8s/dev/secrets/sandbox-secrets.yaml" ]; then kubectl apply -f "$ROOT_DIR/deploy/k8s/dev/secrets/sandbox-secrets.yaml" @@ -274,53 +274,8 @@ EOF deployment/flink-taskmanager \ 2>/dev/null || echo -e "${YELLOW}(Some deployments not ready yet)${NC}" - # Initialize gateway database schema - echo -e "${BLUE}Initializing gateway database schema...${NC}" - echo -e "${GREEN}→${NC} Waiting for postgres..." - kubectl wait --for=condition=ready --timeout=120s pod -l app=postgres 2>/dev/null || { - echo -e "${YELLOW}⚠️ Postgres not ready yet${NC}" - } - - pg_pod=$(kubectl get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) - if [ -n "$pg_pod" ]; then - # Wait for postgres to actually be ready to accept connections - echo -e "${GREEN}→${NC} Verifying postgres is ready to accept connections..." - for i in {1..30}; do - if kubectl exec "$pg_pod" -- psql -U postgres -d iceberg -c "SELECT 1;" > /dev/null 2>&1; then - echo -e "${GREEN}✓ Postgres ready${NC}" - break - fi - if [ $i -eq 30 ]; then - echo -e "${RED}✗ Postgres not ready after 30 seconds${NC}" - exit 1 - fi - sleep 1 - done - - table_count=$(kubectl exec "$pg_pod" -- psql -U postgres -d iceberg -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'user';" 2>/dev/null | tr -d ' ') - if [ "$table_count" = "1" ]; then - echo -e "${GREEN}✓ Gateway schema already exists${NC}" - else - echo -e "${GREEN}→${NC} Applying gateway schema..." - if kubectl exec -i "$pg_pod" -- psql -U postgres -d iceberg < "$ROOT_DIR/gateway/schema.sql" > /dev/null 2>&1; then - # Verify schema was actually created - sleep 1 - table_count=$(kubectl exec "$pg_pod" -- psql -U postgres -d iceberg -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'user';" 2>/dev/null | tr -d ' ') - if [ "$table_count" = "1" ]; then - echo -e "${GREEN}✓ Gateway schema initialized${NC}" - else - echo -e "${RED}✗ Failed to verify schema creation${NC}" - exit 1 - fi - else - echo -e "${RED}✗ Failed to initialize gateway schema${NC}" - exit 1 - fi - fi - - # Create dev user (refactored into reusable function) - create_dev_user - fi + # Initialize schema and dev user + "$SCRIPT_DIR/init" dev echo "" echo -e "${GREEN}✓ Dev environment ready!${NC}" @@ -555,7 +510,7 @@ deep_restart() { kubectl delete pod -l app=iceberg-catalog 2>/dev/null || true # Remove all sandbox deployments and services to free quota echo -e "${GREEN}→${NC} Removing all sandbox deployments and services..." - kubectl delete deployments,services --all -n dexorder-sandboxes 2>/dev/null || true + kubectl delete deployments,services --all -n sandbox 2>/dev/null || true ;; *) echo -e "${RED}Error: Unknown service '$service'${NC}" @@ -699,9 +654,9 @@ case "$COMMAND" in kubectl wait --for=delete pod -l app=qdrant --timeout=60s 2>/dev/null || true # Now delete PVCs delete_pvcs all - # Delete dexorder-sandboxes namespace - echo -e "${GREEN}→${NC} Deleting dexorder-sandboxes namespace..." - kubectl delete namespace dexorder-sandboxes 2>/dev/null || true + # Delete sandbox namespace + echo -e "${GREEN}→${NC} Deleting sandbox namespace..." + kubectl delete namespace sandbox 2>/dev/null || true minikube stop echo -e "${GREEN}✓ Minikube stopped and PVCs deleted${NC}" echo -e "${YELLOW}Tip: Use 'bin/dev stop --keep-data' to preserve PVCs${NC}" @@ -779,8 +734,8 @@ case "$COMMAND" in # Handle sandbox separately if [ "$sandbox_requested" == "1" ]; then - echo -e "${GREEN}→${NC} Deleting user container deployments in dexorder-sandboxes namespace..." - kubectl delete deployments --all -n dexorder-sandboxes 2>/dev/null || true + echo -e "${GREEN}→${NC} Deleting user container deployments in sandbox namespace..." + kubectl delete deployments --all -n sandbox 2>/dev/null || true echo -e "${GREEN}✓ User containers will be recreated by gateway on next login${NC}" fi fi diff --git a/bin/init b/bin/init new file mode 100755 index 00000000..d2a460fb --- /dev/null +++ b/bin/init @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +usage() { + echo "Usage: $0 [dev|prod]" + echo "" + echo "Initialize database schema and create admin user." + echo "" + echo " dev - Initialize dev environment (minikube, default namespace)" + echo " prod - Initialize prod environment (requires confirmation)" + exit 1 +} + +ENV="${1:-dev}" + +if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then + echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}" + usage +fi + +if [[ "$ENV" == "prod" ]]; then + KUBECTL="kubectl --context=prod" + BASE_URL="https://dexorder.ai" + MCP_URL="https://dexorder.ai/mcp" + echo -e "${YELLOW}⚠️ WARNING: Initializing PRODUCTION environment!${NC}" + echo -e "${YELLOW}kubectl context: prod${NC}" + read -p "Are you sure you want to continue? (yes/no): " confirm + if [[ "$confirm" != "yes" ]]; then + echo "Aborted." + exit 0 + fi +else + KUBECTL="kubectl" + BASE_URL="http://dexorder.local" + MCP_URL="http://localhost:8080/mcp" +fi + +# ---------- Schema Initialization ---------- + +echo "" +echo -e "${BLUE}=== Schema Initialization ===${NC}" +echo "" + +echo -e "${BLUE}Waiting for postgres pod...${NC}" +$KUBECTL wait --for=condition=ready --timeout=180s pod -l app=postgres 2>/dev/null || { + echo -e "${RED}Postgres not ready after 180s${NC}" + exit 1 +} + +PG_POD=$($KUBECTL get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) +if [ -z "$PG_POD" ]; then + echo -e "${RED}No postgres pod found${NC}" + exit 1 +fi +echo -e "${GREEN}Found postgres pod: $PG_POD${NC}" + +# Wait for postgres to accept connections +echo -e "${BLUE}Waiting for postgres to accept connections...${NC}" +for i in $(seq 1 30); do + if $KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -c "SELECT 1;" > /dev/null 2>&1; then + echo -e "${GREEN}Postgres ready${NC}" + break + fi + if [[ $i -eq 30 ]]; then + echo -e "${RED}Postgres not accepting connections after 60s${NC}" + exit 1 + fi + sleep 2 +done + +# Check if schema exists +TABLE_COUNT=$($KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -t \ + -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'user';" \ + 2>/dev/null | tr -d ' \n') + +if [[ "$TABLE_COUNT" == "1" ]]; then + echo -e "${GREEN}✓ Schema already initialized${NC}" +else + echo -e "${BLUE}Applying gateway schema...${NC}" + $KUBECTL exec -i "$PG_POD" -- psql -U postgres -d iceberg < "$ROOT_DIR/gateway/schema.sql" + echo -e "${GREEN}✓ Schema applied${NC}" +fi + +# ---------- Admin User Creation ---------- + +echo "" +echo -e "${BLUE}=== Admin User Setup ===${NC}" +echo "" + +if [[ "$ENV" == "dev" ]]; then + # Dev: use fixed credentials + USER_EMAIL="tim@dexorder.ai" + USER_PASSWORD="test1234" + USER_NAME="Tim" + LICENSE_TYPE="pro" + echo -e "${BLUE}Using dev defaults: $USER_EMAIL / $USER_PASSWORD ($LICENSE_TYPE)${NC}" +else + # Prod: prompt for credentials + read -p "Admin email: " USER_EMAIL + read -s -p "Admin password (min 8 chars): " USER_PASSWORD + echo "" + read -p "Admin display name: " USER_NAME + read -p "License type [free|pro|enterprise] (default: pro): " LICENSE_TYPE + LICENSE_TYPE="${LICENSE_TYPE:-pro}" +fi + +# Check if user already exists +EXISTING_ID=$($KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -t \ + -c "SELECT id FROM \"user\" WHERE email = '$USER_EMAIL';" \ + 2>/dev/null | tr -d ' \n') + +if [ -n "$EXISTING_ID" ]; then + echo -e "${GREEN}✓ User already exists in database ($USER_EMAIL)${NC}" + USER_ID="$EXISTING_ID" +else + # Register via API + echo -e "${BLUE}Waiting for gateway...${NC}" + $KUBECTL wait --for=condition=available --timeout=120s deployment/gateway 2>/dev/null || { + echo -e "${YELLOW}⚠️ Gateway not ready after 120s, trying anyway${NC}" + } + sleep 3 + + echo -e "${GREEN}→${NC} Registering user via API..." + HTTP_CODE=$(curl -s -o /tmp/dexorder-init-response.json -w "%{http_code}" \ + -X POST "$BASE_URL/api/auth/register" \ + -H "Content-Type: application/json" \ + -d "{\"email\":\"$USER_EMAIL\",\"password\":\"$USER_PASSWORD\",\"name\":\"$USER_NAME\"}") + + if [[ "$HTTP_CODE" == "200" || "$HTTP_CODE" == "201" ]]; then + echo -e "${GREEN}✓ User registered via API${NC}" + elif [[ "$HTTP_CODE" == "400" ]]; then + echo -e "${YELLOW}⚠️ API returned 400 (user may already exist), continuing...${NC}" + else + echo -e "${YELLOW}⚠️ API returned HTTP $HTTP_CODE${NC}" + cat /tmp/dexorder-init-response.json 2>/dev/null || true + fi + rm -f /tmp/dexorder-init-response.json + + sleep 2 + + USER_ID=$($KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -t \ + -c "SELECT id FROM \"user\" WHERE email = '$USER_EMAIL';" \ + 2>/dev/null | tr -d ' \n') +fi + +if [ -z "$USER_ID" ]; then + echo -e "${RED}User not found in database after registration. Is the gateway running?${NC}" + exit 1 +fi +echo -e "${GREEN}User ID: $USER_ID${NC}" + +# Build license JSON based on type +case "$LICENSE_TYPE" in + enterprise) + LICENSE_JSON='{"licenseType":"enterprise","features":{"maxIndicators":200,"maxStrategies":100,"maxBacktestDays":1825,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":20,"maxMessagesPerDay":10000,"maxTokensPerMessage":32768,"rateLimitPerMinute":300},"k8sResources":{"memoryRequest":"1Gi","memoryLimit":"4Gi","cpuRequest":"500m","cpuLimit":"4000m","storage":"50Gi","tmpSizeLimit":"1Gi","enableIdleShutdown":true,"idleTimeoutMinutes":120},"preferredModel":{"provider":"anthropic","model":"claude-opus-4-6","temperature":0.7}}' + ;; + free) + LICENSE_JSON='{"licenseType":"free","features":{"maxIndicators":10,"maxStrategies":3,"maxBacktestDays":30,"realtimeData":false,"customExecutors":false,"apiAccess":false},"resourceLimits":{"maxConcurrentSessions":1,"maxMessagesPerDay":100,"maxTokensPerMessage":4096,"rateLimitPerMinute":20},"k8sResources":{"memoryRequest":"256Mi","memoryLimit":"512Mi","cpuRequest":"100m","cpuLimit":"500m","storage":"2Gi","tmpSizeLimit":"128Mi","enableIdleShutdown":true,"idleTimeoutMinutes":30},"preferredModel":{"provider":"anthropic","model":"claude-haiku-4-5-20251001","temperature":0.7}}' + ;; + pro|*) + LICENSE_JSON='{"licenseType":"pro","features":{"maxIndicators":50,"maxStrategies":20,"maxBacktestDays":365,"realtimeData":true,"customExecutors":true,"apiAccess":true},"resourceLimits":{"maxConcurrentSessions":5,"maxMessagesPerDay":1000,"maxTokensPerMessage":8192,"rateLimitPerMinute":60},"k8sResources":{"memoryRequest":"512Mi","memoryLimit":"2Gi","cpuRequest":"250m","cpuLimit":"2000m","storage":"10Gi","tmpSizeLimit":"256Mi","enableIdleShutdown":true,"idleTimeoutMinutes":60},"preferredModel":{"provider":"anthropic","model":"claude-sonnet-4-6","temperature":0.7}}' + ;; +esac + +echo -e "${GREEN}→${NC} Creating $LICENSE_TYPE license..." +$KUBECTL exec "$PG_POD" -- psql -U postgres -d iceberg -c " + INSERT INTO user_licenses (user_id, email, license, mcp_server_url) + VALUES ( + '$USER_ID', + '$USER_EMAIL', + '$LICENSE_JSON', + '$MCP_URL' + ) + ON CONFLICT (user_id) DO UPDATE SET + license = EXCLUDED.license, + updated_at = NOW(); +" > /dev/null + +echo -e "${GREEN}✓ User ready: $USER_EMAIL ($LICENSE_TYPE)${NC}" +echo "" +echo -e "${BLUE}Initialization complete.${NC}" +if [[ "$ENV" == "dev" ]]; then + echo -e "${BLUE}Login at http://dexorder.local with $USER_EMAIL / $USER_PASSWORD${NC}" +fi diff --git a/bin/op-setup b/bin/op-setup new file mode 100755 index 00000000..146053e5 --- /dev/null +++ b/bin/op-setup @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +# Create the "AI Prod" 1Password vault and all required items with placeholder values. +# Run this once on a fresh setup, then edit each item in 1Password with real values. +# +# Usage: +# bin/op-setup # Create vault and all items +# bin/op-setup --dry-run # Print what would be created without doing it + +set -e + +VAULT="AI Prod" +DRY_RUN=false + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +if [[ "${1:-}" == "--dry-run" ]]; then + DRY_RUN=true + echo -e "${YELLOW}Dry run mode — no changes will be made${NC}" +fi + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +run() { + if $DRY_RUN; then + echo -e " ${BLUE}[dry-run]${NC} $*" + else + "$@" + fi +} + +item_exists() { + local title="$1" + op item get "$title" --vault "$VAULT" &>/dev/null +} + +create_item() { + local title="$1" + shift + if item_exists "$title"; then + echo -e " ${YELLOW}↩${NC} $title — already exists, skipping" + else + echo -e " ${GREEN}+${NC} Creating: $title" + run op item create \ + --vault "$VAULT" \ + --category "Login" \ + --title "$title" \ + "$@" + fi +} + +# --------------------------------------------------------------------------- +# Step 1: Ensure vault exists +# --------------------------------------------------------------------------- + +echo "" +echo -e "${BLUE}=== 1Password Vault ===${NC}" +echo "" + +if op vault get "$VAULT" &>/dev/null; then + echo -e "${GREEN}✓${NC} Vault '$VAULT' already exists" +else + echo -e "${GREEN}+${NC} Creating vault: $VAULT" + run op vault create "$VAULT" +fi + +# --------------------------------------------------------------------------- +# Step 2: Create items +# --------------------------------------------------------------------------- + +echo "" +echo -e "${BLUE}=== Creating Items in '$VAULT' ===${NC}" +echo "" + +# --- PostgreSQL --- +# Used by: gateway (DB connection), minio-init job (postgres metadata) +create_item "PostgreSQL" \ + "password[password]=REPLACE_WITH_STRONG_PASSWORD" + +# --- MinIO --- +# Used by: minio StatefulSet, flink-secrets, gateway-secrets (iceberg S3), sandbox-secrets +# access_key = MinIO root user (equivalent to AWS_ACCESS_KEY_ID) +# secret_key = MinIO root password (equivalent to AWS_SECRET_ACCESS_KEY) +create_item "MinIO" \ + "access_key[text]=minio-admin" \ + "secret_key[password]=REPLACE_WITH_STRONG_SECRET_KEY" + +# --- Gateway --- +# Used by: ai-secrets (anthropic_api_key), gateway-secrets (all LLM keys + jwt_secret) +# jwt_secret: used to sign user sessions — generate with: openssl rand -base64 48 +# anthropic_api_key: Anthropic Console → API Keys (https://console.anthropic.com) +# openai_api_key: OpenAI Platform → API Keys (https://platform.openai.com) +# google_api_key: Google AI Studio (https://aistudio.google.com) +# openrouter_api_key: OpenRouter (https://openrouter.ai) +create_item "Gateway" \ + "anthropic_api_key[password]=sk-ant-REPLACE_ME" \ + "jwt_secret[password]=REPLACE_WITH_RANDOM_64_CHAR_SECRET" \ + "openai_api_key[password]=sk-REPLACE_ME" \ + "google_api_key[password]=REPLACE_ME" \ + "openrouter_api_key[password]=sk-or-REPLACE_ME" + +# --- Telegram --- +# Used by: gateway-secrets (optional Telegram bot integration) +# bot_token: BotFather → /newbot (https://t.me/BotFather) +# Leave as placeholder if Telegram integration is not needed. +create_item "Telegram" \ + "bot_token[password]=REPLACE_ME_OR_LEAVE_EMPTY" + +# --- Ingestor --- +# Used by: ingestor-secrets (exchange API keys for CCXT market data) +# Keys with empty/placeholder values will cause the ingestor to skip that exchange. +# Binance: https://www.binance.com/en/my/settings/api-management +# Coinbase: https://portal.cdp.coinbase.com/ +# Kraken: https://www.kraken.com/u/security/api +create_item "Ingestor" \ + "binance_api_key[text]=REPLACE_ME" \ + "binance_api_secret[password]=REPLACE_ME" \ + "coinbase_api_key[text]=REPLACE_ME" \ + "coinbase_api_secret[password]=REPLACE_ME" \ + "kraken_api_key[text]=REPLACE_ME" \ + "kraken_api_secret[password]=REPLACE_ME" + +# --------------------------------------------------------------------------- +# Done +# --------------------------------------------------------------------------- + +echo "" +if $DRY_RUN; then + echo -e "${YELLOW}Dry run complete — no items were created.${NC}" +else + echo -e "${GREEN}✓ Setup complete.${NC}" + echo "" + echo -e "Next steps:" + echo -e " 1. Open 1Password and update each item in the '${VAULT}' vault with real values:" + echo -e " • PostgreSQL → set a strong random password" + echo -e " • MinIO → set a strong secret_key (access_key can stay as-is)" + echo -e " • Gateway → add real API keys and a random jwt_secret" + echo -e " • Ingestor → add real exchange API keys" + echo -e " • Telegram → add bot token (or leave placeholder if unused)" + echo "" + echo -e " 2. Verify op:// references resolve correctly:" + echo -e " op inject -i deploy/k8s/prod/secrets/gateway-secrets.tpl.yaml | head -20" + echo "" + echo -e " 3. Continue with cluster setup:" + echo -e " bin/secret-update prod" +fi +echo "" diff --git a/bin/secret-update b/bin/secret-update index 0f30f4b7..1f3db2b7 100755 --- a/bin/secret-update +++ b/bin/secret-update @@ -50,19 +50,21 @@ if [ ! -d "$SECRETS_DIR" ]; then exit 1 fi -# Get kubectl context +# Set kubectl command and warn for prod if [[ "$ENV" == "prod" ]]; then - CONTEXT=$(kubectl config current-context) + KUBECTL="kubectl --context=prod" echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION secrets!${NC}" - echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}" + echo -e "${YELLOW}kubectl context: prod${NC}" read -p "Are you sure you want to continue? (yes/no): " confirm if [[ "$confirm" != "yes" ]]; then echo "Aborted." exit 0 fi +else + KUBECTL="kubectl" fi -apply_secret() { +apply_secret_dev() { local secret_file="$1" local secret_basename=$(basename "$secret_file" .yaml) @@ -73,45 +75,71 @@ apply_secret() { fi echo -e "${GREEN}→${NC} Applying $secret_basename..." - kubectl apply -f "$secret_file" + $KUBECTL apply -f "$secret_file" echo -e "${GREEN}✓${NC} $secret_basename updated" } +apply_secret_prod() { + local tpl_file="$1" + local secret_basename=$(basename "$tpl_file" .tpl.yaml) + + if [ ! -f "$tpl_file" ]; then + echo -e "${RED}✗ Template file not found: $tpl_file${NC}" + return 1 + fi + + echo -e "${GREEN}→${NC} Applying $secret_basename (via op inject)..." + op inject -i "$tpl_file" | $KUBECTL apply -f - + echo -e "${GREEN}✓${NC} $secret_basename updated" +} + +SECRETS=( + "ai-secrets" + "postgres-secret" + "minio-secret" + "ingestor-secrets" + "flink-secrets" + "gateway-secrets" + "sandbox-secrets" +) + # Update specific secret or all secrets if [ -n "$SECRET_NAME" ]; then - # Update single secret - SECRET_FILE="$SECRETS_DIR/$SECRET_NAME.yaml" - apply_secret "$SECRET_FILE" + if [[ "$ENV" == "prod" ]]; then + apply_secret_prod "$SECRETS_DIR/$SECRET_NAME.tpl.yaml" + else + apply_secret_dev "$SECRETS_DIR/$SECRET_NAME.yaml" + fi else - # Update all secrets echo -e "${GREEN}Updating all $ENV secrets...${NC}" echo "" - SECRETS=( - "ai-secrets" - "postgres-secret" - "minio-secret" - "ingestor-secrets" - "flink-secrets" - "gateway-secrets" - "sandbox-secrets" - ) - FAILED=0 for secret in "${SECRETS[@]}"; do - SECRET_FILE="$SECRETS_DIR/$secret.yaml" - if ! apply_secret "$SECRET_FILE"; then - FAILED=$((FAILED + 1)) + if [[ "$ENV" == "prod" ]]; then + if ! apply_secret_prod "$SECRETS_DIR/$secret.tpl.yaml"; then + FAILED=$((FAILED + 1)) + fi + else + if ! apply_secret_dev "$SECRETS_DIR/$secret.yaml"; then + FAILED=$((FAILED + 1)) + fi fi done echo "" if [ $FAILED -gt 0 ]; then - echo -e "${YELLOW}⚠️ $FAILED secret(s) failed to apply${NC}" - echo -e "${YELLOW}Create missing secret files by copying from .example templates:${NC}" - echo -e "${YELLOW} cd $SECRETS_DIR${NC}" - echo -e "${YELLOW} cp SECRET_NAME.yaml.example SECRET_NAME.yaml${NC}" - echo -e "${YELLOW} # Edit SECRET_NAME.yaml with actual values${NC}" + if [[ "$ENV" == "prod" ]]; then + echo -e "${YELLOW}⚠️ $FAILED secret(s) failed to apply${NC}" + echo -e "${YELLOW}Ensure 1Password CLI is authenticated: op signin${NC}" + echo -e "${YELLOW}Ensure 'AI Prod' vault items exist (see deploy/k8s/prod/secrets/*.tpl.yaml)${NC}" + else + echo -e "${YELLOW}⚠️ $FAILED secret(s) failed to apply${NC}" + echo -e "${YELLOW}Create missing secret files by copying from .example templates:${NC}" + echo -e "${YELLOW} cd $SECRETS_DIR${NC}" + echo -e "${YELLOW} cp SECRET_NAME.yaml.example SECRET_NAME.yaml${NC}" + echo -e "${YELLOW} # Edit SECRET_NAME.yaml with actual values${NC}" + fi exit 1 else echo -e "${GREEN}✓ All secrets updated successfully${NC}" diff --git a/deploy/k8s/base/admission-policy.yaml b/deploy/k8s/base/admission-policy.yaml index 3133e346..292d8d95 100644 --- a/deploy/k8s/base/admission-policy.yaml +++ b/deploy/k8s/base/admission-policy.yaml @@ -1,4 +1,4 @@ -# ValidatingAdmissionPolicy to restrict images in dexorder-sandboxes namespace +# ValidatingAdmissionPolicy to restrict images in sandbox namespace # Requires Kubernetes 1.30+ (or 1.28+ with feature gate) # This is the critical security control that prevents arbitrary image execution # even if the gateway is compromised. @@ -26,7 +26,9 @@ spec: c.image.startsWith('ghcr.io/dexorder/sandbox-') || c.image.startsWith('ghcr.io/dexorder/lifecycle-sidecar:') || c.image.startsWith('dexorder/ai-sandbox:') || - c.image.startsWith('dexorder/ai-lifecycle-sidecar:')) + c.image.startsWith('dexorder/ai-lifecycle-sidecar:') || + c.image.startsWith('git.dxod.org/dexorder/dexorder/ai-sandbox:') || + c.image.startsWith('git.dxod.org/dexorder/dexorder/ai-lifecycle-sidecar:')) message: "Only approved dexorder sandbox images are allowed in the sandboxes namespace" reason: Forbidden diff --git a/deploy/k8s/base/gateway-rbac.yaml b/deploy/k8s/base/gateway-rbac.yaml index 08910d1e..f31b49f8 100644 --- a/deploy/k8s/base/gateway-rbac.yaml +++ b/deploy/k8s/base/gateway-rbac.yaml @@ -1,6 +1,6 @@ # RBAC for gateway to CREATE sandbox deployments only # Principle of least privilege: gateway can ONLY create deployments/services/PVCs -# in the dexorder-sandboxes namespace. Deletion is handled by the lifecycle sidecar. +# in the sandbox namespace. Deletion is handled by the lifecycle sidecar. # No pods, secrets, exec, or cross-namespace access. --- apiVersion: v1 @@ -8,12 +8,12 @@ kind: ServiceAccount metadata: name: gateway --- -# Role scoped to dexorder-sandboxes namespace only +# Role scoped to sandbox namespace only apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: sandbox-creator - namespace: dexorder-sandboxes + namespace: sandbox rules: # Deployments: create and read only (deletion handled by sidecar) - apiGroups: ["apps"] @@ -53,7 +53,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: gateway-sandbox-creator - namespace: dexorder-sandboxes + namespace: sandbox subjects: - kind: ServiceAccount name: gateway diff --git a/deploy/k8s/base/lifecycle-sidecar-rbac.yaml b/deploy/k8s/base/lifecycle-sidecar-rbac.yaml index 8478d9a7..b049be2f 100644 --- a/deploy/k8s/base/lifecycle-sidecar-rbac.yaml +++ b/deploy/k8s/base/lifecycle-sidecar-rbac.yaml @@ -5,15 +5,15 @@ apiVersion: v1 kind: ServiceAccount metadata: name: sandbox-lifecycle - namespace: dexorder-sandboxes + namespace: sandbox --- # Role allowing deletion of deployments and PVCs -# This is scoped to the dexorder-sandboxes namespace +# This is scoped to the sandbox namespace apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: sandbox-self-delete - namespace: dexorder-sandboxes + namespace: sandbox rules: # Allow getting and deleting deployments - apiGroups: ["apps"] @@ -34,11 +34,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: sandbox-self-delete - namespace: dexorder-sandboxes + namespace: sandbox subjects: - kind: ServiceAccount name: sandbox-lifecycle - namespace: dexorder-sandboxes + namespace: sandbox roleRef: kind: Role name: sandbox-self-delete @@ -49,5 +49,5 @@ roleRef: # Requires a validating webhook server (can be added later) # For now, we rely on: # 1. Sidecar only knowing its own deployment name (from env) -# 2. RBAC limiting to dexorder-sandboxes namespace +# 2. RBAC limiting to sandbox namespace # 3. Admission policy restricting deployment creation (already defined) diff --git a/deploy/k8s/base/namespaces.yaml b/deploy/k8s/base/namespaces.yaml index c5a8c553..c9e4f101 100644 --- a/deploy/k8s/base/namespaces.yaml +++ b/deploy/k8s/base/namespaces.yaml @@ -1,11 +1,11 @@ # Namespace definitions for dexorder AI platform # - default: gateway, web, and infrastructure services -# - dexorder-sandboxes: per-user sandbox containers (isolated, restricted) +# - sandbox: per-user sandbox containers (isolated, restricted) --- apiVersion: v1 kind: Namespace metadata: - name: dexorder-sandboxes + name: sandbox labels: app.kubernetes.io/part-of: dexorder dexorder.io/type: sandboxes diff --git a/deploy/k8s/base/network-policies.yaml b/deploy/k8s/base/network-policies.yaml index ef1d75a0..cd4b5609 100644 --- a/deploy/k8s/base/network-policies.yaml +++ b/deploy/k8s/base/network-policies.yaml @@ -2,12 +2,12 @@ # Sandboxes can only communicate with specific services, not with each other # or with the Kubernetes API --- -# Default deny all ingress and egress in sandboxes namespace +# Default deny all ingress and egress in sandbox namespace apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: default-deny-all - namespace: dexorder-sandboxes + namespace: sandbox spec: podSelector: {} policyTypes: @@ -19,7 +19,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: allow-gateway-ingress - namespace: dexorder-sandboxes + namespace: sandbox spec: podSelector: matchLabels: @@ -42,7 +42,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: allow-sandbox-egress - namespace: dexorder-sandboxes + namespace: sandbox spec: podSelector: matchLabels: @@ -69,17 +69,6 @@ spec: ports: - protocol: TCP port: 3000 - # Kafka/Redpanda for data subscriptions - - to: - - namespaceSelector: - matchLabels: - dexorder.io/type: system - podSelector: - matchLabels: - app: redpanda - ports: - - protocol: TCP - port: 9092 # External HTTPS (for exchange APIs, LLM APIs) - to: - ipBlock: @@ -93,7 +82,8 @@ spec: - protocol: TCP port: 443 --- -# Default namespace: allow ingress from sandboxes to gateway +# Allow ingress from sandboxes to gateway (no explicit namespace = context default) +# In dev: applies to 'default' namespace. In prod: applies to 'ai' namespace. apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: diff --git a/deploy/k8s/base/sandbox-deployment-example.yaml b/deploy/k8s/base/sandbox-deployment-example.yaml index 11f750c2..771b256a 100644 --- a/deploy/k8s/base/sandbox-deployment-example.yaml +++ b/deploy/k8s/base/sandbox-deployment-example.yaml @@ -5,7 +5,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: sandbox-user-abc123 - namespace: dexorder-sandboxes + namespace: sandbox labels: app.kubernetes.io/name: sandbox app.kubernetes.io/component: user-sandbox @@ -187,7 +187,7 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: sandbox-user-abc123-data - namespace: dexorder-sandboxes + namespace: sandbox labels: dexorder.io/user-id: user-abc123 spec: @@ -203,7 +203,7 @@ apiVersion: v1 kind: Service metadata: name: sandbox-user-abc123 - namespace: dexorder-sandboxes + namespace: sandbox labels: dexorder.io/user-id: user-abc123 spec: diff --git a/deploy/k8s/base/sandbox-quotas.yaml b/deploy/k8s/base/sandbox-quotas.yaml index b4a6d54d..d91c4267 100644 --- a/deploy/k8s/base/sandbox-quotas.yaml +++ b/deploy/k8s/base/sandbox-quotas.yaml @@ -1,4 +1,4 @@ -# Resource constraints for the dexorder-sandboxes namespace +# Resource constraints for the sandbox namespace # These limits apply regardless of what the gateway requests --- # LimitRange: per-container defaults and maximums @@ -6,7 +6,7 @@ apiVersion: v1 kind: LimitRange metadata: name: sandbox-limits - namespace: dexorder-sandboxes + namespace: sandbox spec: limits: # Default limits applied if deployment doesn't specify @@ -37,7 +37,7 @@ apiVersion: v1 kind: ResourceQuota metadata: name: sandbox-quota - namespace: dexorder-sandboxes + namespace: sandbox spec: hard: # Total compute limits for all sandboxes combined diff --git a/deploy/k8s/dev/configs/gateway-config.yaml b/deploy/k8s/dev/configs/gateway-config.yaml index 13767e97..b007e967 100644 --- a/deploy/k8s/dev/configs/gateway-config.yaml +++ b/deploy/k8s/dev/configs/gateway-config.yaml @@ -53,7 +53,8 @@ data: # Kubernetes configuration kubernetes: - namespace: dexorder-sandboxes + namespace: sandbox + service_namespace: default in_cluster: true sandbox_image: dexorder/ai-sandbox:SANDBOX_TAG_PLACEHOLDER sidecar_image: dexorder/ai-lifecycle-sidecar:SIDECAR_TAG_PLACEHOLDER diff --git a/deploy/k8s/dev/kustomization.yaml b/deploy/k8s/dev/kustomization.yaml index 1872ef47..fa7582d6 100644 --- a/deploy/k8s/dev/kustomization.yaml +++ b/deploy/k8s/dev/kustomization.yaml @@ -8,7 +8,7 @@ resources: - storage-class.yaml - configs/gateway-config.yaml - gateway-health-ingress.yaml - - sandbox-config.yaml # ConfigMap for sandbox pods in dexorder-sandboxes namespace + - sandbox-config.yaml # Dev-specific patches patches: @@ -275,6 +275,12 @@ generatorOptions: + + + + + + diff --git a/deploy/k8s/dev/sandbox-config.yaml b/deploy/k8s/dev/sandbox-config.yaml index 40cf2f11..b22be4f4 100644 --- a/deploy/k8s/dev/sandbox-config.yaml +++ b/deploy/k8s/dev/sandbox-config.yaml @@ -1,11 +1,11 @@ -# Sandbox ConfigMap in dexorder-sandboxes namespace +# Sandbox ConfigMap in sandbox namespace # This is mounted into dynamically created sandbox pods --- apiVersion: v1 kind: ConfigMap metadata: name: sandbox-config - namespace: dexorder-sandboxes + namespace: sandbox labels: app.kubernetes.io/name: sandbox app.kubernetes.io/component: config diff --git a/deploy/k8s/dev/sandbox-quotas-patch.yaml b/deploy/k8s/dev/sandbox-quotas-patch.yaml index 5d2f92a6..0e9afc4f 100644 --- a/deploy/k8s/dev/sandbox-quotas-patch.yaml +++ b/deploy/k8s/dev/sandbox-quotas-patch.yaml @@ -5,7 +5,7 @@ apiVersion: v1 kind: ResourceQuota metadata: name: sandbox-quota - namespace: dexorder-sandboxes + namespace: sandbox spec: hard: # Reduced for minikube diff --git a/deploy/k8s/prod/configs/gateway-config.yaml b/deploy/k8s/prod/configs/gateway-config.yaml index 80eb36df..5c7212f3 100644 --- a/deploy/k8s/prod/configs/gateway-config.yaml +++ b/deploy/k8s/prod/configs/gateway-config.yaml @@ -4,22 +4,21 @@ metadata: name: gateway-config data: config.yaml: | - # Gateway Configuration + # Gateway Configuration (production) # Server configuration server: port: 3000 host: 0.0.0.0 log_level: info - cors_origin: "https://app.dexorder.com" - base_url: https://api.dexorder.com + cors_origin: "https://dexorder.ai" + base_url: https://dexorder.ai trusted_origins: - - https://app.dexorder.com - - https://api.dexorder.com + - https://dexorder.ai # Database database: - url: postgresql://postgres:postgres@postgres:5432/iceberg + url: postgresql://postgres:{{ op://AI Prod/PostgreSQL/password }}@postgres:5432/iceberg # Default model (if user has no preference) defaults: @@ -28,12 +27,13 @@ data: # Kubernetes configuration kubernetes: - namespace: dexorder-sandboxes + namespace: sandbox + service_namespace: ai in_cluster: true - sandbox_image: dexorder/ai-sandbox:latest - sidecar_image: dexorder/ai-lifecycle-sidecar:latest + sandbox_image: git.dxod.org/dexorder/dexorder/ai-sandbox:latest + sidecar_image: git.dxod.org/dexorder/dexorder/ai-lifecycle-sidecar:latest storage_class: standard - image_pull_policy: Always # For production - always pull from registry + image_pull_policy: Always # DragonflyDB (Redis-compatible, for hot storage and session management) redis: @@ -62,4 +62,4 @@ data: # Email service configuration email: - from_address: noreply@dexorder.com + from_address: noreply@dexorder.ai diff --git a/deploy/k8s/prod/infrastructure.yaml b/deploy/k8s/prod/infrastructure.yaml new file mode 100644 index 00000000..7e30ebcd --- /dev/null +++ b/deploy/k8s/prod/infrastructure.yaml @@ -0,0 +1,678 @@ +--- +# DragonflyDB (Redis-compatible in-memory datastore) +apiVersion: v1 +kind: Service +metadata: + name: dragonfly +spec: + selector: + app: dragonfly + ports: + - protocol: TCP + port: 6379 + targetPort: 6379 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dragonfly +spec: + replicas: 1 + selector: + matchLabels: + app: dragonfly + template: + metadata: + labels: + app: dragonfly + spec: + containers: + - name: dragonfly + image: docker.dragonflydb.io/dragonflydb/dragonfly:latest + ports: + - containerPort: 6379 + name: dragonfly + args: + - --logtostderr + - --alsologtostderr=false + - --cache_mode=true + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" +--- +# Qdrant (Vector database for RAG) +apiVersion: v1 +kind: Service +metadata: + name: qdrant +spec: + selector: + app: qdrant + ports: + - name: http + protocol: TCP + port: 6333 + targetPort: 6333 + - name: grpc + protocol: TCP + port: 6334 + targetPort: 6334 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: qdrant +spec: + serviceName: qdrant + replicas: 1 + selector: + matchLabels: + app: qdrant + template: + metadata: + labels: + app: qdrant + spec: + containers: + - name: qdrant + image: qdrant/qdrant:latest + ports: + - containerPort: 6333 + name: http + - containerPort: 6334 + name: grpc + resources: + requests: + memory: "512Mi" + cpu: "200m" + limits: + memory: "1Gi" + cpu: "1000m" + volumeMounts: + - name: qdrant-data + mountPath: /qdrant/storage + volumeClaimTemplates: + - metadata: + name: qdrant-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +--- +# Kafka (KRaft mode - no Zookeeper needed) +apiVersion: v1 +kind: Service +metadata: + name: kafka +spec: + selector: + app: kafka + ports: + - name: broker + protocol: TCP + port: 9092 + targetPort: 9092 + - name: controller + protocol: TCP + port: 9093 + targetPort: 9093 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: kafka +spec: + serviceName: kafka + replicas: 1 + selector: + matchLabels: + app: kafka + template: + metadata: + labels: + app: kafka + spec: + containers: + - name: kafka + image: apache/kafka:3.9.0 + ports: + - containerPort: 9092 + name: broker + - containerPort: 9093 + name: controller + command: + - sh + - -c + - | + CLUSTER_ID="dexorder-prod-cluster" + LOG_DIR="/var/lib/kafka/data" + + # Ensure log directory exists + mkdir -p $LOG_DIR + + # Create temporary config with custom log.dirs for formatting + cp /opt/kafka/config/kraft/server.properties /tmp/server.properties + echo "log.dirs=$LOG_DIR" >> /tmp/server.properties + + # Format storage if not already formatted + if [ ! -f $LOG_DIR/meta.properties ]; then + echo "Formatting Kafka storage with cluster ID: $CLUSTER_ID" + /opt/kafka/bin/kafka-storage.sh format -t $CLUSTER_ID -c /tmp/server.properties + else + echo "Kafka storage already formatted, skipping format step" + fi + + # Start Kafka server + /opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/kraft/server.properties \ + --override node.id=1 \ + --override process.roles=broker,controller \ + --override listeners=PLAINTEXT://:9092,CONTROLLER://:9093 \ + --override advertised.listeners=PLAINTEXT://kafka:9092 \ + --override controller.quorum.voters=1@kafka:9093 \ + --override controller.listener.names=CONTROLLER \ + --override listener.security.protocol.map=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT \ + --override log.dirs=$LOG_DIR \ + --override offsets.topic.replication.factor=1 \ + --override transaction.state.log.replication.factor=1 \ + --override transaction.state.log.min.isr=1 + env: [] + volumeMounts: + - name: kafka-data + mountPath: /var/lib/kafka/data + volumeClaimTemplates: + - metadata: + name: kafka-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 20Gi +--- +# PostgreSQL (for Iceberg catalog metadata and gateway user data) +apiVersion: v1 +kind: Service +metadata: + name: postgres +spec: + selector: + app: postgres + ports: + - protocol: TCP + port: 5432 + targetPort: 5432 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres +spec: + serviceName: postgres + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:15 + ports: + - containerPort: 5432 + env: + - name: POSTGRES_USER + value: postgres + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: password + - name: POSTGRES_DB + value: iceberg + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: postgres-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +--- +# MinIO (S3-compatible object storage) +apiVersion: v1 +kind: Service +metadata: + name: minio +spec: + selector: + app: minio + ports: + - name: api + protocol: TCP + port: 9000 + targetPort: 9000 + - name: console + protocol: TCP + port: 9001 + targetPort: 9001 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: minio +spec: + serviceName: minio + replicas: 1 + selector: + matchLabels: + app: minio + template: + metadata: + labels: + app: minio + spec: + containers: + - name: minio + image: minio/minio:latest + args: + - server + - /data + - --console-address + - ":9001" + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-secret + key: root-user + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-secret + key: root-password + volumeMounts: + - name: minio-data + mountPath: /data + volumeClaimTemplates: + - metadata: + name: minio-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi +--- +# MinIO bucket initialization job +apiVersion: batch/v1 +kind: Job +metadata: + name: minio-init-buckets +spec: + ttlSecondsAfterFinished: 100 + template: + spec: + restartPolicy: OnFailure + containers: + - name: create-buckets + image: minio/mc:latest + command: + - sh + - -c + - | + echo "Waiting for MinIO to be ready..." + until mc alias set minio http://minio:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD; do + sleep 2 + done + + echo "Creating warehouse bucket..." + mc mb minio/warehouse --ignore-existing + + echo "Buckets initialized successfully" + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-secret + key: root-user + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-secret + key: root-password +--- +# Iceberg REST Catalog +apiVersion: v1 +kind: Service +metadata: + name: iceberg-catalog +spec: + selector: + app: iceberg-catalog + ports: + - protocol: TCP + port: 8181 + targetPort: 8181 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: iceberg-catalog +spec: + replicas: 1 + selector: + matchLabels: + app: iceberg-catalog + template: + metadata: + labels: + app: iceberg-catalog + spec: + initContainers: + - name: wait-for-postgres + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z postgres 5432; do echo waiting for postgres; sleep 2; done;'] + - name: wait-for-minio + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z minio 9000; do echo waiting for minio; sleep 2; done;'] + containers: + - name: iceberg-catalog + image: tabulario/iceberg-rest:latest + ports: + - containerPort: 8181 + env: + - name: CATALOG_WAREHOUSE + value: "s3://warehouse/" + - name: CATALOG_IO__IMPL + value: "org.apache.iceberg.aws.s3.S3FileIO" + - name: CATALOG_S3_ENDPOINT + value: "http://minio:9000" + - name: CATALOG_S3_ACCESS__KEY__ID + valueFrom: + secretKeyRef: + name: minio-secret + key: root-user + - name: CATALOG_S3_SECRET__ACCESS__KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: root-password + - name: CATALOG_S3_PATH__STYLE__ACCESS + value: "true" + - name: AWS_REGION + value: "us-east-1" +--- +# Flink JobManager +apiVersion: v1 +kind: Service +metadata: + name: flink-jobmanager +spec: + selector: + app: flink-jobmanager + ports: + - name: rpc + protocol: TCP + port: 6123 + targetPort: 6123 + - name: ui + protocol: TCP + port: 8081 + targetPort: 8081 + - name: zmq-market-data + protocol: TCP + port: 5558 + targetPort: 5558 + - name: zmq-notif-pull + protocol: TCP + port: 5561 + targetPort: 5561 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flink-jobmanager +spec: + replicas: 1 + selector: + matchLabels: + app: flink-jobmanager + template: + metadata: + labels: + app: flink-jobmanager + spec: + initContainers: + - name: wait-for-kafka + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] + - name: wait-for-iceberg-catalog + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;'] + containers: + - name: flink-jobmanager + image: dexorder/ai-flink + imagePullPolicy: Always + args: ["standalone-job", "--job-classname", "com.dexorder.flink.TradingFlinkApp"] + ports: + - containerPort: 6123 + name: rpc + - containerPort: 8081 + name: ui + - containerPort: 5558 + name: zmq-market-data + - containerPort: 5561 + name: zmq-notif-pull + env: + - name: JOB_MANAGER_RPC_ADDRESS + value: flink-jobmanager + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-secret-key + volumeMounts: + - name: flink-config + mountPath: /etc/config/config.yaml + subPath: config.yaml + - name: flink-secrets + mountPath: /etc/secrets + volumes: + - name: flink-config + configMap: + name: flink-config + - name: flink-secrets + secret: + secretName: flink-secrets +--- +# Flink TaskManager +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flink-taskmanager +spec: + replicas: 1 + selector: + matchLabels: + app: flink-taskmanager + template: + metadata: + labels: + app: flink-taskmanager + spec: + initContainers: + - name: wait-for-jobmanager + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z flink-jobmanager 6123; do echo waiting for jobmanager; sleep 2; done;'] + containers: + - name: flink-taskmanager + image: dexorder/ai-flink + imagePullPolicy: Always + args: ["taskmanager"] + env: + - name: JOB_MANAGER_RPC_ADDRESS + value: flink-jobmanager + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: flink-secrets + key: minio-secret-key + volumeMounts: + - name: flink-config + mountPath: /etc/config/config.yaml + subPath: config.yaml + - name: flink-secrets + mountPath: /etc/secrets + volumes: + - name: flink-config + configMap: + name: flink-config + - name: flink-secrets + secret: + secretName: flink-secrets +--- +# Relay (ZMQ router) +apiVersion: v1 +kind: Service +metadata: + name: relay +spec: + selector: + app: relay + ports: + - name: work-queue + protocol: TCP + port: 5555 + targetPort: 5555 + - name: responses + protocol: TCP + port: 5556 + targetPort: 5556 + - name: market-data + protocol: TCP + port: 5558 + targetPort: 5558 + - name: client-requests + protocol: TCP + port: 5559 + targetPort: 5559 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: relay +spec: + replicas: 1 + selector: + matchLabels: + app: relay + template: + metadata: + labels: + app: relay + spec: + containers: + - name: relay + image: dexorder/ai-relay + imagePullPolicy: Always + ports: + - containerPort: 5555 + name: work-queue + - containerPort: 5556 + name: responses + - containerPort: 5558 + name: market-data + - containerPort: 5559 + name: client-requests + env: + - name: RUST_LOG + value: relay=info + - name: CONFIG_PATH + value: /config/config.yaml + volumeMounts: + - name: relay-config + mountPath: /config + volumes: + - name: relay-config + configMap: + name: relay-config +--- +# Ingestor (CCXT data fetcher) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ingestor +spec: + replicas: 1 + selector: + matchLabels: + app: ingestor + template: + metadata: + labels: + app: ingestor + spec: + initContainers: + - name: wait-for-relay + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z relay 5555; do echo waiting for relay; sleep 2; done;'] + - name: wait-for-kafka + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z kafka 9092; do echo waiting for kafka; sleep 2; done;'] + containers: + - name: ingestor + image: dexorder/ai-ingestor + imagePullPolicy: Always + env: + - name: LOG_LEVEL + value: info + - name: CONFIG_PATH + value: /config/config.yaml + volumeMounts: + - name: ingestor-config + mountPath: /config + - name: ingestor-secrets + mountPath: /secrets + volumes: + - name: ingestor-config + configMap: + name: ingestor-config + - name: ingestor-secrets + secret: + secretName: ingestor-secrets diff --git a/deploy/k8s/prod/kustomization.yaml b/deploy/k8s/prod/kustomization.yaml index cebcfbc4..bf70e081 100644 --- a/deploy/k8s/prod/kustomization.yaml +++ b/deploy/k8s/prod/kustomization.yaml @@ -1,18 +1,27 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -# Base resources (includes all security policies) +# No namespace: transformer — kubectl --context=prod is configured with 'ai' +# as its default namespace, so all resources without an explicit namespace +# land in 'ai' automatically. + resources: + # Base: init.yaml, sandbox namespace, RBAC, admission-policy, sandbox-quotas, + # network-policies, gateway.yaml, web.yaml, ingress.yaml, gateway-ingress.yaml - ../base - - configs/gateway-config.yaml + # Add the 'ai' namespace (base only creates 'sandbox') + - namespaces.yaml + # Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg) + - infrastructure.yaml + # gateway-config ConfigMap is intentionally excluded from kustomize. + # It contains an op:// reference for the DB password. Apply via: + # bin/config-update prod gateway-config -# Production patches patches: - - path: patches.yaml + - path: patch-gateway-rbac-subject.yaml + - path: patch-web.yaml + - path: patch-gateway-ingress.yaml -# ConfigMaps for service configs -# In production, these might come from external sources -# or be managed separately, but we'll include them here for consistency configMapGenerator: - name: relay-config files: @@ -24,23 +33,28 @@ configMapGenerator: files: - config.yaml=configs/flink-config.yaml -# Secrets (managed via kubectl, not committed) -# These are created by bin/secret-update prod +# Secrets managed via bin/secret-update prod (op inject | kubectl apply) secretGenerator: [] generatorOptions: disableNameSuffixHash: true -# Images images: - - name: dexorder/ai-backend - newTag: latest - - name: dexorder/ai-web + - name: dexorder/ai-gateway + newName: git.dxod.org/dexorder/dexorder/ai-gateway newTag: latest - name: ghcr.io/dexorder/gateway + newName: git.dxod.org/dexorder/dexorder/ai-gateway newTag: latest - - name: lifecycle-sidecar - newName: ghcr.io/dexorder/lifecycle-sidecar + - name: dexorder/ai-web + newName: git.dxod.org/dexorder/dexorder/ai-web newTag: latest - - name: ghcr.io/dexorder/agent + - name: dexorder/ai-flink + newName: git.dxod.org/dexorder/dexorder/ai-flink + newTag: latest + - name: dexorder/ai-relay + newName: git.dxod.org/dexorder/dexorder/ai-relay + newTag: latest + - name: dexorder/ai-ingestor + newName: git.dxod.org/dexorder/dexorder/ai-ingestor newTag: latest diff --git a/deploy/k8s/prod/namespaces.yaml b/deploy/k8s/prod/namespaces.yaml new file mode 100644 index 00000000..e8b0ffc9 --- /dev/null +++ b/deploy/k8s/prod/namespaces.yaml @@ -0,0 +1,10 @@ +# Production namespace: ai (for all platform services) +# The 'sandbox' namespace is defined in base/namespaces.yaml +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ai + labels: + app.kubernetes.io/part-of: dexorder + dexorder.io/type: system diff --git a/deploy/k8s/prod/patch-gateway-ingress.yaml b/deploy/k8s/prod/patch-gateway-ingress.yaml new file mode 100644 index 00000000..5c971b27 --- /dev/null +++ b/deploy/k8s/prod/patch-gateway-ingress.yaml @@ -0,0 +1,10 @@ +# Production gateway ingress: WebSocket timeout annotations +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: gateway-ingress + annotations: + nginx.ingress.kubernetes.io/websocket-services: gateway + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-connect-timeout: "60" diff --git a/deploy/k8s/prod/patch-gateway-rbac-subject.yaml b/deploy/k8s/prod/patch-gateway-rbac-subject.yaml new file mode 100644 index 00000000..42b224c2 --- /dev/null +++ b/deploy/k8s/prod/patch-gateway-rbac-subject.yaml @@ -0,0 +1,10 @@ +# Fix gateway ServiceAccount namespace in RoleBinding subject (base uses 'default', prod uses 'ai') +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: gateway-sandbox-creator + namespace: sandbox +subjects: + - kind: ServiceAccount + name: gateway + namespace: ai diff --git a/deploy/k8s/prod/patch-web.yaml b/deploy/k8s/prod/patch-web.yaml new file mode 100644 index 00000000..51c70964 --- /dev/null +++ b/deploy/k8s/prod/patch-web.yaml @@ -0,0 +1,18 @@ +# Production web: imagePullPolicy and resources +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ai-web +spec: + template: + spec: + containers: + - name: ai-web + imagePullPolicy: Always + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" diff --git a/deploy/k8s/prod/patches.yaml b/deploy/k8s/prod/patches.yaml deleted file mode 100644 index c553ba01..00000000 --- a/deploy/k8s/prod/patches.yaml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# Production backend patches -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: ai-backend -spec: - replicas: 2 - template: - spec: - runtimeClassName: gvisor - containers: - - name: ai-backend - image: dexorder/ai-backend:latest - imagePullPolicy: Always - env: - - name: CONFIG - value: "prod" - resources: - requests: - memory: "2Gi" - cpu: "1000m" - limits: - memory: "4Gi" - cpu: "2000m" ---- -# Production web patches -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ai-web -spec: - replicas: 2 - template: - spec: - runtimeClassName: gvisor - containers: - - name: ai-web - image: dexorder/ai-web:latest - imagePullPolicy: Always - env: - - name: VITE_GATEWAY_URL - value: "https://dexorder.ai/api" - - name: VITE_WS_URL - value: "wss://dexorder.ai/ws" - resources: - requests: - memory: "512Mi" - cpu: "250m" - limits: - memory: "1Gi" - cpu: "500m" diff --git a/deploy/k8s/prod/secrets/ai-secrets.tpl.yaml b/deploy/k8s/prod/secrets/ai-secrets.tpl.yaml new file mode 100644 index 00000000..8cf13d9a --- /dev/null +++ b/deploy/k8s/prod/secrets/ai-secrets.tpl.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ai-secrets + namespace: ai +type: Opaque +stringData: + anthropic-api-key: "{{ op://AI Prod/Gateway/anthropic_api_key }}" diff --git a/deploy/k8s/prod/secrets/flink-secrets.tpl.yaml b/deploy/k8s/prod/secrets/flink-secrets.tpl.yaml new file mode 100644 index 00000000..6d18ecbe --- /dev/null +++ b/deploy/k8s/prod/secrets/flink-secrets.tpl.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: flink-secrets + namespace: ai +type: Opaque +stringData: + minio-access-key: "{{ op://AI Prod/MinIO/access_key }}" + minio-secret-key: "{{ op://AI Prod/MinIO/secret_key }}" diff --git a/deploy/k8s/prod/secrets/gateway-secrets.tpl.yaml b/deploy/k8s/prod/secrets/gateway-secrets.tpl.yaml new file mode 100644 index 00000000..5aa0d97a --- /dev/null +++ b/deploy/k8s/prod/secrets/gateway-secrets.tpl.yaml @@ -0,0 +1,45 @@ +apiVersion: v1 +kind: Secret +metadata: + name: gateway-secrets + namespace: ai +type: Opaque +stringData: + secrets.yaml: | + # Gateway Secrets (production) + + # Authentication secret for JWT signing + auth: + secret: "{{ op://AI Prod/Gateway/jwt_secret }}" + + # LLM Provider API Keys + llm_providers: + anthropic_api_key: "{{ op://AI Prod/Gateway/anthropic_api_key }}" + openai_api_key: "{{ op://AI Prod/Gateway/openai_api_key }}" + google_api_key: "{{ op://AI Prod/Gateway/google_api_key }}" + openrouter_api_key: "{{ op://AI Prod/Gateway/openrouter_api_key }}" + + # Telegram (optional) + telegram: + bot_token: "{{ op://AI Prod/Telegram/bot_token }}" + + # Email service (optional) + email: + service_key: "" + + # Push notification service (optional) + push: + service_key: "" + + # Qdrant API key (optional, for hosted Qdrant) + qdrant: + api_key: "" + + # Iceberg S3 credentials (must match minio-secret) + iceberg: + s3_access_key: "{{ op://AI Prod/MinIO/access_key }}" + s3_secret_key: "{{ op://AI Prod/MinIO/secret_key }}" + + # Embedding API key (if using external provider) + embedding: + api_key: "" diff --git a/deploy/k8s/prod/secrets/ingestor-secrets.tpl.yaml b/deploy/k8s/prod/secrets/ingestor-secrets.tpl.yaml new file mode 100644 index 00000000..7d9c5a68 --- /dev/null +++ b/deploy/k8s/prod/secrets/ingestor-secrets.tpl.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ingestor-secrets + namespace: ai +type: Opaque +stringData: + binance-api-key: "{{ op://AI Prod/Ingestor/binance_api_key }}" + binance-api-secret: "{{ op://AI Prod/Ingestor/binance_api_secret }}" + coinbase-api-key: "{{ op://AI Prod/Ingestor/coinbase_api_key }}" + coinbase-api-secret: "{{ op://AI Prod/Ingestor/coinbase_api_secret }}" + kraken-api-key: "{{ op://AI Prod/Ingestor/kraken_api_key }}" + kraken-api-secret: "{{ op://AI Prod/Ingestor/kraken_api_secret }}" diff --git a/deploy/k8s/prod/secrets/minio-secret.tpl.yaml b/deploy/k8s/prod/secrets/minio-secret.tpl.yaml new file mode 100644 index 00000000..8aa57456 --- /dev/null +++ b/deploy/k8s/prod/secrets/minio-secret.tpl.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret + namespace: ai +type: Opaque +stringData: + root-user: "{{ op://AI Prod/MinIO/access_key }}" + root-password: "{{ op://AI Prod/MinIO/secret_key }}" diff --git a/deploy/k8s/prod/secrets/postgres-secret.tpl.yaml b/deploy/k8s/prod/secrets/postgres-secret.tpl.yaml new file mode 100644 index 00000000..7649bb15 --- /dev/null +++ b/deploy/k8s/prod/secrets/postgres-secret.tpl.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: postgres-secret + namespace: ai +type: Opaque +stringData: + password: "{{ op://AI Prod/PostgreSQL/password }}" diff --git a/deploy/k8s/prod/secrets/sandbox-secrets.tpl.yaml b/deploy/k8s/prod/secrets/sandbox-secrets.tpl.yaml new file mode 100644 index 00000000..c6b04c29 --- /dev/null +++ b/deploy/k8s/prod/secrets/sandbox-secrets.tpl.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Secret +metadata: + name: sandbox-secrets + namespace: sandbox + labels: + app.kubernetes.io/name: sandbox + app.kubernetes.io/component: secrets +type: Opaque +stringData: + secrets.yaml: | + # S3/MinIO credentials for Iceberg data access (must match minio-secret) + s3_access_key: "{{ op://AI Prod/MinIO/access_key }}" + s3_secret_key: "{{ op://AI Prod/MinIO/secret_key }}" diff --git a/doc/CLUSTER_SETUP.md b/doc/CLUSTER_SETUP.md new file mode 100644 index 00000000..e096c831 --- /dev/null +++ b/doc/CLUSTER_SETUP.md @@ -0,0 +1,456 @@ +# Production Cluster Setup Guide + +This guide covers setting up the Dexorder AI platform from scratch on a fresh Kubernetes cluster. + +--- + +## Overview + +The platform runs across two namespaces: + +| Namespace | Contents | +|-----------|----------| +| `ai` | Gateway, web UI, all infrastructure services (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg-catalog) | +| `sandbox` | Per-user sandbox containers (created dynamically by the gateway) | + +Secrets are managed via 1Password CLI (`op inject`). All `.tpl.yaml` files in `deploy/k8s/prod/secrets/` contain `op://` references and are safe to commit; actual values are never stored in git. + +--- + +## Prerequisites + +### Tooling + +| Tool | Purpose | Min Version | +|------|---------|-------------| +| `kubectl` | Cluster management | 1.30+ | +| `kustomize` | Manifest rendering | 5.x | +| `op` | 1Password CLI | 2.x | +| `docker` | Image builds | - | + +### Cluster Requirements + +- **Kubernetes**: 1.30+ (required for `ValidatingAdmissionPolicy` GA) +- **nginx-ingress-controller**: For ingress routing and WebSocket support +- **cert-manager**: For TLS certificate provisioning (with `letsencrypt-prod` ClusterIssuer) +- **Persistent volume provisioner**: StorageClass `standard` must exist and be functional +- **DNS**: `dexorder.ai` resolves to the cluster's ingress IP/load balancer + +### Container Registry Access + +Images are hosted at `git.dxod.org/dexorder/dexorder/`. The cluster must be able to pull from this registry. If the registry requires authentication, create an image pull secret before deploying. + +--- + +## Step 1 — Configure kubectl Context + +Create a dedicated context named `prod` that defaults to the `ai` namespace: + +```bash +# Add cluster credentials (replace with your actual kubeconfig details) +kubectl config set-cluster prod-cluster \ + --server=https:// \ + --certificate-authority=/path/to/ca.crt + +kubectl config set-credentials prod-user \ + --client-certificate=/path/to/client.crt \ + --client-key=/path/to/client.key + +kubectl config set-context prod \ + --cluster=prod-cluster \ + --user=prod-user \ + --namespace=ai + +# Verify +kubectl --context=prod cluster-info +``` + +All `bin/` scripts use `kubectl --context=prod` for production operations. + +--- + +## Step 2 — Install Cluster Prerequisites + +### nginx-ingress-controller + +```bash +kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.10.0/deploy/static/provider/cloud/deploy.yaml +kubectl -n ingress-nginx wait --for=condition=ready pod -l app.kubernetes.io/component=controller --timeout=120s +``` + +### cert-manager + +```bash +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.0/cert-manager.yaml +kubectl -n cert-manager wait --for=condition=ready pod -l app=cert-manager --timeout=120s +``` + +Then create the `letsencrypt-prod` ClusterIssuer. Edit the email address: + +```yaml +# Save as /tmp/clusterissuer.yaml and apply +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: your-email@dexorder.ai + privateKeySecretRef: + name: letsencrypt-prod-key + solvers: + - http01: + ingress: + class: nginx +``` + +```bash +kubectl apply -f /tmp/clusterissuer.yaml +``` + +--- + +## Step 3 — Set Up 1Password Vault + +All production secrets are stored under the **AI Prod** vault in 1Password. The `bin/op-setup` script creates the vault and all required items with placeholder values so you can fill them in before deploying. + +```bash +# Sign in to 1Password +op signin + +# Preview what will be created (no changes) +bin/op-setup --dry-run + +# Create the vault and all items +bin/op-setup +``` + +After running the script, open 1Password and update each item in the **AI Prod** vault with real values: + +| Item | Fields | Where to get the value | +|------|--------|------------------------| +| `PostgreSQL` | `password` | Generate: `openssl rand -base64 32` | +| `MinIO` | `access_key`, `secret_key` | `access_key` can stay `minio-admin`; generate a strong `secret_key` | +| `Gateway` | `anthropic_api_key` | [Anthropic Console](https://console.anthropic.com) → API Keys | +| `Gateway` | `jwt_secret` | Generate: `openssl rand -base64 48` | +| `Gateway` | `openai_api_key` | [OpenAI Platform](https://platform.openai.com) → API Keys (optional) | +| `Gateway` | `google_api_key` | Google AI Studio (optional) | +| `Gateway` | `openrouter_api_key` | [OpenRouter](https://openrouter.ai) (optional) | +| `Telegram` | `bot_token` | BotFather → `/newbot` (optional) | +| `Ingestor` | `binance_api_key/secret` | Binance API Management (optional) | +| `Ingestor` | `coinbase_api_key/secret` | Coinbase CDP Portal (optional) | +| `Ingestor` | `kraken_api_key/secret` | Kraken API Settings (optional) | + +Verify the references resolve before continuing: + +```bash +op inject -i deploy/k8s/prod/secrets/gateway-secrets.tpl.yaml | head -20 +``` + +--- + +## Step 4 — Apply Base Manifests + +This creates namespaces, RBAC, network policies, admission policies, and resource quotas. + +```bash +kubectl --context=prod apply -k deploy/k8s/prod/ +``` + +Verify the namespaces and key resources are created: + +```bash +kubectl --context=prod get namespaces ai sandbox +kubectl --context=prod -n ai get serviceaccount gateway +kubectl --context=prod -n sandbox get serviceaccount sandbox-lifecycle +kubectl --context=prod get validatingadmissionpolicy dexorder-sandbox-image-policy +``` + +--- + +## Step 5 — Apply Secrets + +```bash +# Apply all secrets (uses op inject to resolve op:// references) +bin/secret-update prod +``` + +This will prompt for confirmation, then apply all 7 secrets: +- `ai-secrets` (Anthropic API key) +- `postgres-secret` (PostgreSQL password) +- `minio-secret` (MinIO credentials) +- `ingestor-secrets` (exchange API keys) +- `flink-secrets` (MinIO credentials for Flink) +- `gateway-secrets` (gateway application secrets) +- `sandbox-secrets` (secrets mounted in sandbox pods) + +Verify: + +```bash +kubectl --context=prod -n ai get secrets +kubectl --context=prod -n sandbox get secret sandbox-secrets +``` + +--- + +## Step 6 — Apply Configs + +```bash +# Apply all configs (gateway-config uses op inject; others are plain YAML) +bin/config-update prod +``` + +This applies: +- `relay-config` — ZMQ relay configuration +- `ingestor-config` — CCXT ingestor configuration +- `flink-config` — Flink job configuration +- `gateway-config` — Gateway config (DB credentials resolved via op inject) + +Verify: + +```bash +kubectl --context=prod -n ai get configmaps +``` + +--- + +## Step 7 — Deploy Infrastructure + +Infrastructure services (postgres, minio, kafka, iceberg-catalog, dragonfly, qdrant, relay, ingestor, flink) are defined in `deploy/k8s/prod/infrastructure.yaml` and were applied in Step 4. + +Wait for the StatefulSets and Deployments to become ready: + +```bash +kubectl --context=prod -n ai rollout status statefulset/postgres +kubectl --context=prod -n ai rollout status statefulset/minio +kubectl --context=prod -n ai rollout status statefulset/kafka +kubectl --context=prod -n ai rollout status statefulset/qdrant +kubectl --context=prod -n ai rollout status deployment/dragonfly +kubectl --context=prod -n ai rollout status deployment/iceberg-catalog +kubectl --context=prod -n ai rollout status deployment/relay +kubectl --context=prod -n ai rollout status deployment/ingestor +kubectl --context=prod -n ai rollout status deployment/flink-jobmanager +kubectl --context=prod -n ai rollout status deployment/flink-taskmanager +``` + +MinIO will automatically run a Job to create the `warehouse` bucket on first start. Confirm it completes: + +```bash +kubectl --context=prod -n ai get jobs +kubectl --context=prod -n ai wait --for=condition=complete job/minio-init --timeout=120s +``` + +--- + +## Step 8 — Deploy Application Images + +Build and push the application images: + +```bash +# Build and push all services +bin/deploy gateway prod +bin/deploy web prod +bin/deploy sandbox prod +bin/deploy lifecycle-sidecar prod +bin/deploy flink prod +bin/deploy relay prod +bin/deploy ingestor prod +``` + +Each `bin/deploy` command builds the Docker image, tags it with the current git SHA, pushes to `git.dxod.org/dexorder/dexorder/`, and updates the live deployment via `kubectl set image`. + +Wait for the gateway and web to be ready: + +```bash +kubectl --context=prod -n ai rollout status deployment/gateway +kubectl --context=prod -n ai rollout status deployment/ai-web +``` + +--- + +## Step 9 — Initialize Schema and Admin User + +```bash +bin/init prod +``` + +This will: +1. Wait for postgres to be ready +2. Check if the schema exists; apply `gateway/schema.sql` if not +3. Prompt for admin user credentials (email, password, display name, license tier) +4. Register the user via the API +5. Insert the license record into the database + +--- + +## Step 10 — Verify TLS and Ingress + +cert-manager should automatically provision TLS certificates via Let's Encrypt once the ingress resources are applied and DNS is resolving correctly. + +```bash +# Check certificate status +kubectl --context=prod -n ai get certificates +kubectl --context=prod -n ai describe certificate dexorder-ai-tls + +# Certificates are ready when READY=True +# This can take 1-2 minutes for HTTP-01 challenge completion +``` + +Once ready, verify the application is accessible: + +```bash +curl -I https://dexorder.ai/api/health +# Expected: HTTP/2 200 +``` + +--- + +## Day-2 Operations + +### Update a Service After Code Changes + +```bash +# Rebuild and redeploy a single service +bin/deploy gateway prod +bin/deploy web prod +``` + +### Update Secrets + +```bash +# Update all secrets +bin/secret-update prod + +# Update a specific secret +bin/secret-update prod ai-secrets +``` + +### Update Config + +```bash +# Update all configs (triggers pod restarts) +bin/config-update prod + +# Update a specific config +bin/config-update prod gateway-config +``` + +### Add a New User + +```bash +# Re-run init to add another user +bin/init prod +``` + +Or insert directly via psql: + +```bash +PG_POD=$(kubectl --context=prod -n ai get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}') +kubectl --context=prod -n ai exec -it "$PG_POD" -- psql -U postgres -d iceberg +``` + +### View Logs + +```bash +kubectl --context=prod -n ai logs -f deployment/gateway +kubectl --context=prod -n ai logs -f deployment/ingestor +kubectl --context=prod -n ai logs -f deployment/flink-jobmanager +kubectl --context=prod -n sandbox logs -l dexorder.io/component=sandbox +``` + +### Check Sandbox Status + +```bash +# List all running sandboxes +kubectl --context=prod -n sandbox get deployments +kubectl --context=prod -n sandbox get pods + +# Check resource usage in sandbox namespace +kubectl --context=prod -n sandbox top pods +``` + +--- + +## Namespace & Security Architecture + +``` +Internet + │ + ▼ +nginx-ingress (dexorder.ai) + │ + ├──/──────────────────► ai-web:5173 (Vue.js UI) + │ + └──/api/───────────────► gateway:3000 (Node.js API) + │ + │ Creates/manages via k8s API + ▼ + sandbox namespace + ┌──────────────────────┐ + │ sandbox- │ + │ ├── sandbox │ + │ │ (MCP server) │ + │ └── lifecycle-sidecar│ + └──────────────────────┘ + │ + │ Egress: only ai namespace + │ services + external HTTPS + ▼ + ai namespace services: + gateway:5571 (ZMQ events) + iceberg-catalog:8181 + minio:9000 + relay:5559 +``` + +### Network Isolation + +- Sandbox pods have default-deny network policy +- Sandboxes can reach: gateway (ZMQ + callbacks), iceberg-catalog, minio, relay, external HTTPS (port 443) +- Sandboxes cannot reach: other sandbox pods, the Kubernetes API, private IP ranges +- The admission policy (`dexorder-sandbox-image-policy`) prevents non-approved images from running in the sandbox namespace + +--- + +## Troubleshooting + +### Pods stuck in `Pending` + +```bash +kubectl --context=prod -n ai describe pod +# Look for: resource quota exceeded, PVC not bound, image pull errors +``` + +### Certificate not issuing + +```bash +kubectl --context=prod -n ai describe certificaterequest +kubectl --context=prod -n cert-manager logs -l app=cert-manager +# Common cause: DNS not pointing to cluster ingress IP yet +``` + +### Gateway can't create sandboxes + +```bash +# Verify RBAC is correct +kubectl --context=prod auth can-i create deployments \ + --as=system:serviceaccount:ai:gateway -n sandbox + +# Should return: yes +``` + +### Sandbox pod fails to start with "configmap not found" + +This would indicate a leftover reference to `sandbox-config` (removed from the template). Check the sandbox deployment spec: + +```bash +kubectl --context=prod -n sandbox describe deployment sandbox- +``` + +### 1Password auth expired + +```bash +op signin +bin/secret-update prod +``` diff --git a/doc/architecture.md b/doc/architecture.md index e7af7f7a..56d33032 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -1,8 +1,8 @@ -# DexOrder AI Platform Architecture +# Dexorder AI Platform Architecture ## Overview -DexOrder is an AI-powered trading platform that combines real-time market data processing, user-specific AI agents, and a flexible data pipeline. The system is designed for scalability, isolation, and extensibility. +Dexorder is an AI-powered trading platform that combines real-time market data processing, user-specific AI agents, and a flexible data pipeline. The system is designed for scalability, isolation, and extensibility. ## High-Level Architecture @@ -415,12 +415,12 @@ User authenticates → Gateway checks if deployment exists ### RBAC **Gateway ServiceAccount:** -- Create deployments/services/PVCs in `dexorder-sandboxes` namespace +- Create deployments/services/PVCs in `sandbox` namespace - Read pod status and logs - Cannot delete, exec, or access secrets **Lifecycle Sidecar ServiceAccount:** -- Delete deployments in `dexorder-sandboxes` namespace +- Delete deployments in `sandbox` namespace - Delete PVCs (conditional on user type) - Cannot access other resources @@ -428,7 +428,7 @@ User authenticates → Gateway checks if deployment exists ### Admission Control -All pods in `dexorder-sandboxes` namespace must: +All pods in `sandbox` namespace must: - Use approved images only (allowlist) - Run as non-root - Drop all capabilities @@ -550,7 +550,7 @@ docker push ghcr.io/dexorder/lifecycle-sidecar:latest **Namespaces:** - `dexorder-system` - Platform services (gateway, infrastructure) -- `dexorder-sandboxes` - User containers (isolated) +- `sandbox` - User containers (isolated) --- diff --git a/doc/container_lifecycle_management.md b/doc/container_lifecycle_management.md index 10a93489..28bbca1f 100644 --- a/doc/container_lifecycle_management.md +++ b/doc/container_lifecycle_management.md @@ -85,7 +85,7 @@ Runs alongside the agent container with shared PID namespace. Monitors the main - `USER_TYPE`: License tier (`anonymous`, `free`, `paid`, `enterprise`) - `MAIN_CONTAINER_PID`: PID of main container (default: 1) -**RBAC**: Has permission to delete deployments and PVCs **only in dexorder-sandboxes namespace**. Cannot delete other deployments due to: +**RBAC**: Has permission to delete deployments and PVCs **only in sandbox namespace**. Cannot delete other deployments due to: 1. Only knows its own deployment name (from env) 2. RBAC scoped to namespace 3. No cross-pod communication @@ -164,12 +164,12 @@ Configured via `USER_TYPE` env var in deployment. **Lifecycle Sidecar**: - Can delete its own deployment only - Cannot delete other deployments -- Scoped to dexorder-sandboxes namespace +- Scoped to sandbox namespace - No exec, no secrets access ### Admission Control -All deployments in `dexorder-sandboxes` namespace are subject to: +All deployments in `sandbox` namespace are subject to: - Image allowlist (only approved images) - Security context enforcement (non-root, drop caps, read-only rootfs) - Resource limits required @@ -198,7 +198,7 @@ kubectl apply -k deploy/k8s/dev # or prod ``` This creates: -- Namespaces (`dexorder-system`, `dexorder-sandboxes`) +- Namespaces (`dexorder-system`, `sandbox`) - RBAC (gateway, lifecycle sidecar) - Admission policies - Network policies @@ -257,7 +257,7 @@ cd lifecycle-sidecar go build -o lifecycle-sidecar main.go # Run (requires k8s config) -export NAMESPACE=dexorder-sandboxes +export NAMESPACE=sandbox export DEPLOYMENT_NAME=agent-test export USER_TYPE=free ./lifecycle-sidecar @@ -277,7 +277,7 @@ export USER_TYPE=free Check logs: ```bash -kubectl logs -n dexorder-sandboxes sandbox-user-abc123 -c agent +kubectl logs -n sandbox sandbox-user-abc123 -c agent ``` Verify: @@ -289,19 +289,19 @@ Verify: Check sidecar logs: ```bash -kubectl logs -n dexorder-sandboxes sandbox-user-abc123 -c lifecycle-sidecar +kubectl logs -n sandbox sandbox-user-abc123 -c lifecycle-sidecar ``` Verify: - Exit code file exists: `/var/run/agent/exit_code` contains `42` -- RBAC permissions: `kubectl auth can-i delete deployments --as=system:serviceaccount:dexorder-sandboxes:sandbox-lifecycle -n dexorder-sandboxes` +- RBAC permissions: `kubectl auth can-i delete deployments --as=system:serviceaccount:sandbox:sandbox-lifecycle -n sandbox` - Deployment name matches: Check `DEPLOYMENT_NAME` env var ### Gateway can't create deployments Check gateway logs and verify: - ServiceAccount exists: `kubectl get sa gateway -n dexorder-system` -- RoleBinding exists: `kubectl get rolebinding gateway-sandbox-creator -n dexorder-sandboxes` +- RoleBinding exists: `kubectl get rolebinding gateway-sandbox-creator -n sandbox` - Admission policy allows image: Check image name matches allowlist in `admission-policy.yaml` ## Future Enhancements diff --git a/doc/gateway_container_creation.md b/doc/gateway_container_creation.md index 1878a270..5c7c8ac8 100644 --- a/doc/gateway_container_creation.md +++ b/doc/gateway_container_creation.md @@ -63,7 +63,7 @@ userId: "user-abc123" deploymentName: "sandbox-user-abc123" serviceName: "sandbox-user-abc123" pvcName: "sandbox-user-abc123-data" -mcpEndpoint: "http://sandbox-user-abc123.dexorder-sandboxes.svc.cluster.local:3000" +mcpEndpoint: "http://sandbox-user-abc123.sandbox.svc.cluster.local:3000" ``` User IDs are sanitized to be Kubernetes-compliant (lowercase alphanumeric + hyphens). @@ -145,7 +145,7 @@ Environment variables: ```bash # Kubernetes -KUBERNETES_NAMESPACE=dexorder-sandboxes +KUBERNETES_NAMESPACE=sandbox KUBERNETES_IN_CLUSTER=true # false for local dev KUBERNETES_CONTEXT=minikube # for local dev only @@ -162,9 +162,9 @@ SANDBOX_STORAGE_CLASS=standard The gateway uses a restricted ServiceAccount with RBAC: **Can do:** -- ✅ Create deployments in `dexorder-sandboxes` namespace -- ✅ Create services in `dexorder-sandboxes` namespace -- ✅ Create PVCs in `dexorder-sandboxes` namespace +- ✅ Create deployments in `sandbox` namespace +- ✅ Create services in `sandbox` namespace +- ✅ Create PVCs in `sandbox` namespace - ✅ Read pod status and logs (debugging) - ✅ Update deployments (future: resource scaling) @@ -226,7 +226,7 @@ kubectl apply -k deploy/k8s/dev # .env KUBERNETES_IN_CLUSTER=false KUBERNETES_CONTEXT=minikube -KUBERNETES_NAMESPACE=dexorder-sandboxes +KUBERNETES_NAMESPACE=sandbox ``` 4. Run gateway: @@ -242,9 +242,9 @@ wscat -c "ws://localhost:3000/ws/chat" -H "Authorization: Bearer your-jwt" The gateway will create deployments in minikube. View with: ```bash -kubectl get deployments -n dexorder-sandboxes -kubectl get pods -n dexorder-sandboxes -kubectl logs -n dexorder-sandboxes sandbox-user-abc123 -c agent +kubectl get deployments -n sandbox +kubectl get pods -n sandbox +kubectl logs -n sandbox sandbox-user-abc123 -c agent ``` ## Production Deployment @@ -262,7 +262,7 @@ kubectl apply -k deploy/k8s/prod ``` 3. Gateway runs in `dexorder-system` namespace -4. Creates agent containers in `dexorder-sandboxes` namespace +4. Creates agent containers in `sandbox` namespace 5. Admission policies enforce image allowlist and security constraints ## Monitoring diff --git a/doc/user_container_events.md b/doc/user_container_events.md index 6ccc28fb..0f7c2041 100644 --- a/doc/user_container_events.md +++ b/doc/user_container_events.md @@ -1169,7 +1169,7 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: agent-to-gateway-events - namespace: dexorder-sandboxes + namespace: sandbox spec: podSelector: matchLabels: diff --git a/gateway/.env.example b/gateway/.env.example index f60a5c5c..3cc66dae 100644 --- a/gateway/.env.example +++ b/gateway/.env.example @@ -28,7 +28,7 @@ DEFAULT_MODEL=claude-sonnet-4-6 TELEGRAM_BOT_TOKEN= # Kubernetes configuration -KUBERNETES_NAMESPACE=dexorder-sandboxes +KUBERNETES_NAMESPACE=sandbox KUBERNETES_IN_CLUSTER=false KUBERNETES_CONTEXT=minikube SANDBOX_IMAGE=ghcr.io/dexorder/sandbox:latest diff --git a/gateway/config.example.yaml b/gateway/config.example.yaml index 6580055d..761e3cc5 100644 --- a/gateway/config.example.yaml +++ b/gateway/config.example.yaml @@ -47,7 +47,7 @@ license_models: # Kubernetes configuration kubernetes: - namespace: dexorder-sandboxes + namespace: sandbox in_cluster: false context: minikube sandbox_image: ghcr.io/dexorder/sandbox:latest diff --git a/gateway/src/channels/websocket-handler.ts b/gateway/src/channels/websocket-handler.ts index e0c7992d..33882981 100644 --- a/gateway/src/channels/websocket-handler.ts +++ b/gateway/src/channels/websocket-handler.ts @@ -348,8 +348,8 @@ export class WebSocketHandler { /** * Derive the container's XPUB event endpoint from the MCP server URL. * - * MCP URL format: http://sandbox-user-abc123.dexorder-sandboxes.svc.cluster.local:3000 - * Event endpoint: tcp://sandbox-user-abc123.dexorder-sandboxes.svc.cluster.local:5570 + * MCP URL format: http://sandbox-user-abc123.sandbox.svc.cluster.local:3000 + * Event endpoint: tcp://sandbox-user-abc123.sandbox.svc.cluster.local:5570 */ private getContainerEventEndpoint(mcpServerUrl: string): string { try { diff --git a/gateway/src/harness/subagents/research/memory/api-reference.md b/gateway/src/harness/subagents/research/memory/api-reference.md index 627ff1a4..9f57cb18 100644 --- a/gateway/src/harness/subagents/research/memory/api-reference.md +++ b/gateway/src/harness/subagents/research/memory/api-reference.md @@ -30,7 +30,7 @@ type hints, docstrings, and examples. ### api.py ```python """ -Main DexOrder API - provides access to market data and charting. +Main Dexorder API - provides access to market data and charting. """ import logging @@ -311,8 +311,10 @@ class ChartingAPI(ABC): ) # Overlay moving average + # NOTE: mplfinance uses integer x-positions (0..N-1) internally, + # so overlays must use range(len(df)), not df.index. fig, ax = api.plot_ohlc(df) - ax.plot(df.index, df['sma_20'], label="SMA 20", color="blue") + ax.plot(range(len(df)), df['sma_20'], label="SMA 20", color="blue") ax.legend() """ pass @@ -406,7 +408,7 @@ class ChartingAPI(ABC): ### __init__.py ```python """ -DexOrder API - market data and charting for research and trading. +Dexorder API - market data and charting for research and trading. For research scripts, import and use get_api() to access the API: diff --git a/gateway/src/harness/subagents/research/memory/pandas-ta-reference.md b/gateway/src/harness/subagents/research/memory/pandas-ta-reference.md index 94241b31..a00a0fd7 100644 --- a/gateway/src/harness/subagents/research/memory/pandas-ta-reference.md +++ b/gateway/src/harness/subagents/research/memory/pandas-ta-reference.md @@ -214,7 +214,7 @@ df['macd_signal'] = macd_df.iloc[:, 2] # Main price chart with EMA overlay fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT 1H", volume=True) -ax.plot(df.index, df['ema_20'], label="EMA 20", color="orange", linewidth=1.5) +ax.plot(range(len(df)), df['ema_20'], label="EMA 20", color="orange", linewidth=1.5) # range(len(df)), not df.index ax.legend() # RSI panel diff --git a/gateway/src/harness/subagents/research/memory/usage-examples.md b/gateway/src/harness/subagents/research/memory/usage-examples.md index 72f0e19d..839326d3 100644 --- a/gateway/src/harness/subagents/research/memory/usage-examples.md +++ b/gateway/src/harness/subagents/research/memory/usage-examples.md @@ -137,7 +137,8 @@ df['rsi'] = ta.rsi(df['close'], length=14) fig, ax = api.charting.plot_ohlc(df, title="BTC/USDT with SMA") # Overlay the SMA on the price chart -ax.plot(df.index, df['sma_20'], label="SMA 20", color="blue", linewidth=2) +# NOTE: mplfinance uses integer x-positions (0..N-1); use range(len(df)), not df.index. +ax.plot(range(len(df)), df['sma_20'], label="SMA 20", color="blue", linewidth=2) ax.legend() # Add RSI indicator panel below @@ -211,8 +212,9 @@ fig, ax = api.charting.plot_ohlc( ) # Overlay moving averages -ax.plot(df.index, df['sma_20'], label="SMA 20", color="blue", linewidth=1.5) -ax.plot(df.index, df['ema_50'], label="EMA 50", color="red", linewidth=1.5) +# NOTE: mplfinance uses integer x-positions (0..N-1); use range(len(df)), not df.index. +ax.plot(range(len(df)), df['sma_20'], label="SMA 20", color="blue", linewidth=1.5) +ax.plot(range(len(df)), df['ema_50'], label="EMA 50", color="red", linewidth=1.5) ax.legend() # Print summary statistics diff --git a/gateway/src/k8s/client.ts b/gateway/src/k8s/client.ts index af8900af..d1e7c7f8 100644 --- a/gateway/src/k8s/client.ts +++ b/gateway/src/k8s/client.ts @@ -24,6 +24,7 @@ export interface DeploymentSpec { sidecarImage: string; storageClass: string; imagePullPolicy?: string; + serviceNamespace: string; // namespace where relay/minio/iceberg/gateway services live } /** @@ -139,7 +140,8 @@ export class KubernetesClient { .replace(/\{\{storage\}\}/g, r.storage) .replace(/\{\{tmpSizeLimit\}\}/g, r.tmpSizeLimit) .replace(/\{\{enableIdleShutdown\}\}/g, String(r.enableIdleShutdown)) - .replace(/\{\{idleTimeoutMinutes\}\}/g, String(r.idleTimeoutMinutes)); + .replace(/\{\{idleTimeoutMinutes\}\}/g, String(r.idleTimeoutMinutes)) + .replace(/\{\{serviceNamespace\}\}/g, spec.serviceNamespace); // Parse YAML documents (deployment, pvc, service) const documents = yaml.loadAll(rendered) as any[]; diff --git a/gateway/src/k8s/container-manager.ts b/gateway/src/k8s/container-manager.ts index eaf72b38..7414a56c 100644 --- a/gateway/src/k8s/container-manager.ts +++ b/gateway/src/k8s/container-manager.ts @@ -9,6 +9,7 @@ export interface ContainerManagerConfig { storageClass: string; imagePullPolicy?: string; namespace: string; + serviceNamespace: string; logger: FastifyBaseLogger; } @@ -85,6 +86,7 @@ export class ContainerManager { sidecarImage: this.config.sidecarImage, storageClass: this.config.storageClass, imagePullPolicy: this.config.imagePullPolicy, + serviceNamespace: this.config.serviceNamespace, }; await this.config.k8sClient.createAgentDeployment(spec); diff --git a/gateway/src/k8s/templates/sandbox.yaml b/gateway/src/k8s/templates/sandbox.yaml index 71e5bae3..85589fd3 100644 --- a/gateway/src/k8s/templates/sandbox.yaml +++ b/gateway/src/k8s/templates/sandbox.yaml @@ -1,7 +1,7 @@ # Sandbox deployment template — variables are populated from the user's License k8sResources. # Variables: {{userId}}, {{deploymentName}}, {{pvcName}}, {{serviceName}}, # {{sandboxImage}}, {{sidecarImage}}, {{imagePullPolicy}}, {{storageClass}}, -# {{licenseType}}, +# {{serviceNamespace}}, {{licenseType}}, # {{memoryRequest}}, {{memoryLimit}}, {{cpuRequest}}, {{cpuLimit}}, # {{storage}}, {{tmpSizeLimit}}, # {{enableIdleShutdown}}, {{idleTimeoutMinutes}} @@ -10,7 +10,6 @@ apiVersion: apps/v1 kind: Deployment metadata: name: {{deploymentName}} - namespace: dexorder-sandboxes labels: app.kubernetes.io/name: sandbox app.kubernetes.io/component: user-sandbox @@ -77,15 +76,15 @@ spec: - name: ZMQ_CONTROL_PORT value: "5555" - name: ZMQ_GATEWAY_ENDPOINT - value: "tcp://gateway.default.svc.cluster.local:5571" + value: "tcp://gateway.{{serviceNamespace}}.svc.cluster.local:5571" - name: ICEBERG_CATALOG_URI - value: "http://iceberg-catalog.default.svc.cluster.local:8181" + value: "http://iceberg-catalog.{{serviceNamespace}}.svc.cluster.local:8181" - name: ICEBERG_NAMESPACE value: "trading" - name: S3_ENDPOINT - value: "http://minio.default.svc.cluster.local:9000" + value: "http://minio.{{serviceNamespace}}.svc.cluster.local:9000" - name: RELAY_ENDPOINT - value: "tcp://relay.default.svc.cluster.local:5559" + value: "tcp://relay.{{serviceNamespace}}.svc.cluster.local:5559" ports: - name: mcp @@ -188,7 +187,6 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: {{pvcName}} - namespace: dexorder-sandboxes labels: dexorder.io/user-id: {{userId}} dexorder.io/license-tier: {{licenseType}} @@ -204,7 +202,6 @@ apiVersion: v1 kind: Service metadata: name: {{serviceName}} - namespace: dexorder-sandboxes labels: dexorder.io/user-id: {{userId}} dexorder.io/license-tier: {{licenseType}} diff --git a/gateway/src/main.ts b/gateway/src/main.ts index fc744838..b0371731 100644 --- a/gateway/src/main.ts +++ b/gateway/src/main.ts @@ -172,7 +172,8 @@ function loadConfig() { // Kubernetes configuration kubernetes: { - namespace: configData.kubernetes?.namespace || process.env.KUBERNETES_NAMESPACE || 'dexorder-sandboxes', + namespace: configData.kubernetes?.namespace || process.env.KUBERNETES_NAMESPACE || 'sandbox', + serviceNamespace: configData.kubernetes?.service_namespace || process.env.KUBERNETES_SERVICE_NAMESPACE || 'default', inCluster: configData.kubernetes?.in_cluster ?? (process.env.KUBERNETES_IN_CLUSTER === 'true'), context: configData.kubernetes?.context || process.env.KUBERNETES_CONTEXT, sandboxImage: configData.kubernetes?.sandbox_image || process.env.SANDBOX_IMAGE || 'ghcr.io/dexorder/sandbox:latest', @@ -312,6 +313,7 @@ const containerManager = new ContainerManager({ storageClass: config.kubernetes.storageClass, imagePullPolicy: config.kubernetes.imagePullPolicy, namespace: config.kubernetes.namespace, + serviceNamespace: config.kubernetes.serviceNamespace, logger: app.log, }); app.log.debug('Container manager initialized'); diff --git a/lifecycle-sidecar/README.md b/lifecycle-sidecar/README.md index a8f62c5d..b7fe0345 100644 --- a/lifecycle-sidecar/README.md +++ b/lifecycle-sidecar/README.md @@ -90,5 +90,5 @@ See `deploy/k8s/base/agent-deployment-example.yaml` for a complete example of ho 1. **Self-delete only**: The sidecar can only delete the deployment it's part of (enforced by label matching in admission policy) 2. **Non-privileged**: Runs as non-root user (UID 1000) 3. **Minimal permissions**: Only has `get` and `delete` on deployments/PVCs in the sandboxes namespace -4. **No cross-namespace access**: Scoped to `dexorder-sandboxes` namespace only +4. **No cross-namespace access**: Scoped to `sandbox` namespace only 5. **Crash-safe**: Only triggers cleanup on exit code 42, never on crashes diff --git a/sandbox/Dockerfile b/sandbox/Dockerfile index 3f4f75ee..abca358b 100644 --- a/sandbox/Dockerfile +++ b/sandbox/Dockerfile @@ -1,4 +1,4 @@ -# Multi-stage build for DexOrder user container +# Multi-stage build for Dexorder user container FROM continuumio/miniconda3:latest AS builder WORKDIR /build diff --git a/sandbox/README.md b/sandbox/README.md index 78dfd336..cbc33b9a 100644 --- a/sandbox/README.md +++ b/sandbox/README.md @@ -1,6 +1,6 @@ -# DexOrder Python Client Library +# Dexorder Python Client Library -High-level Python API for accessing historical OHLC data from the DexOrder trading platform. +High-level Python API for accessing historical OHLC data from the Dexorder trading platform. ## Features diff --git a/sandbox/config.example.yaml b/sandbox/config.example.yaml index dcca2dc1..0040d68f 100644 --- a/sandbox/config.example.yaml +++ b/sandbox/config.example.yaml @@ -1,4 +1,4 @@ -# Example configuration file for DexOrder user container +# Example configuration file for Dexorder user container # Mount this at /app/config/config.yaml in k8s # Data directory for persistent storage (workspace, strategies, etc.) diff --git a/sandbox/dexorder/__init__.py b/sandbox/dexorder/__init__.py index 1a04c556..f5b156d1 100644 --- a/sandbox/dexorder/__init__.py +++ b/sandbox/dexorder/__init__.py @@ -1,5 +1,5 @@ """ -DexOrder Trading Platform Python Client +Dexorder Trading Platform Python Client Provides high-level APIs for: - Historical OHLC data retrieval with smart caching diff --git a/sandbox/dexorder/api/__init__.py b/sandbox/dexorder/api/__init__.py index 04a3954a..2f3ac60d 100644 --- a/sandbox/dexorder/api/__init__.py +++ b/sandbox/dexorder/api/__init__.py @@ -1,5 +1,5 @@ """ -DexOrder API - market data and charting for research and trading. +Dexorder API - market data and charting for research and trading. For research scripts, import and use get_api() to access the API: diff --git a/sandbox/dexorder/api/api.py b/sandbox/dexorder/api/api.py index 32b0b719..45493e48 100644 --- a/sandbox/dexorder/api/api.py +++ b/sandbox/dexorder/api/api.py @@ -1,5 +1,5 @@ """ -Main DexOrder API - provides access to market data and charting. +Main Dexorder API - provides access to market data and charting. """ import logging diff --git a/sandbox/dexorder/api/charting_api.py b/sandbox/dexorder/api/charting_api.py index 54434b2b..fe006287 100644 --- a/sandbox/dexorder/api/charting_api.py +++ b/sandbox/dexorder/api/charting_api.py @@ -64,8 +64,10 @@ class ChartingAPI(ABC): ) # Overlay moving average + # NOTE: mplfinance uses integer x-positions (0..N-1) internally, + # so overlays must use range(len(df)), not df.index. fig, ax = api.plot_ohlc(df) - ax.plot(df.index, df['sma_20'], label="SMA 20", color="blue") + ax.plot(range(len(df)), df['sma_20'], label="SMA 20", color="blue") ax.legend() """ pass diff --git a/sandbox/dexorder/impl/charting_api_impl.py b/sandbox/dexorder/impl/charting_api_impl.py index cd6952d0..23c3209e 100644 --- a/sandbox/dexorder/impl/charting_api_impl.py +++ b/sandbox/dexorder/impl/charting_api_impl.py @@ -134,16 +134,11 @@ class ChartingAPIImpl(ChartingAPI): new_ax.sharex(existing_axes[0]) # Plot the indicator data + # mplfinance uses integer x-positions (0..N-1) internally, so we must + # use range(len(df)) to align with the candlestick axes. for col in columns: if col in df.columns: - # Handle potential timestamp index (convert from microseconds) - if df.index.name == 'timestamp' or 'timestamp' in str(df.index.dtype): - # Assume nanoseconds, convert to datetime - plot_index = pd.to_datetime(df.index, unit='ns') - else: - plot_index = df.index - - new_ax.plot(plot_index, df[col], label=col, **kwargs) + new_ax.plot(range(len(df)), df[col], label=col, **kwargs) # Styling if ylabel: diff --git a/sandbox/main.py b/sandbox/main.py index 84e28f61..8d6c6d78 100644 --- a/sandbox/main.py +++ b/sandbox/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -DexOrder User Container Main Entry Point +Dexorder User Container Main Entry Point Brings together: - Config and secrets loading from k8s mounted YAML files @@ -181,7 +181,7 @@ def create_mcp_server(config: Config, event_publisher: EventPublisher) -> Server return { "uri": uri, "mimeType": "text/plain", - "text": f"Hello from DexOrder user container!\nUser ID: {config.user_id}\n", + "text": f"Hello from Dexorder user container!\nUser ID: {config.user_id}\n", } else: raise ValueError(f"Unknown resource: {uri}") diff --git a/sandbox/secrets.example.yaml b/sandbox/secrets.example.yaml index 2fd4a7fe..a3143173 100644 --- a/sandbox/secrets.example.yaml +++ b/sandbox/secrets.example.yaml @@ -1,4 +1,4 @@ -# Example secrets file for DexOrder user container +# Example secrets file for Dexorder user container # Mount this at /app/config/secrets.yaml in k8s # This file should be created from k8s secrets diff --git a/sandbox/setup.py b/sandbox/setup.py index e64959d6..86639461 100644 --- a/sandbox/setup.py +++ b/sandbox/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name="dexorder-sandbox", version="0.1.0", - description="DexOrder Trading Platform Sandbox", + description="Dexorder Trading Platform Sandbox", packages=find_packages(), python_requires=">=3.9", install_requires=[ diff --git a/test/README.md b/test/README.md index 806c95f9..10550b3a 100644 --- a/test/README.md +++ b/test/README.md @@ -1,6 +1,6 @@ # Test Clients -Test clients for the DexOrder trading system. +Test clients for the Dexorder trading system. ## History Client diff --git a/test/history_client/client_ohlc_api.py b/test/history_client/client_ohlc_api.py index d930f9ed..7359e93b 100755 --- a/test/history_client/client_ohlc_api.py +++ b/test/history_client/client_ohlc_api.py @@ -20,7 +20,7 @@ async def main(): Test the high-level OHLC client API with smart caching. """ - print("=== DexOrder OHLC Client Test ===\n") + print("=== Dexorder OHLC Client Test ===\n") # Initialize client client = OHLCClient(