backend redesign

This commit is contained in:
2026-03-11 18:47:11 -04:00
parent 8ff277c8c6
commit e99ef5d2dd
210 changed files with 12147 additions and 155 deletions

13
.gitignore vendored
View File

@@ -1,5 +1,5 @@
/backend/data /backend.old/data
/backend/uploads/ /backend.old/uploads/
# Environment variables # Environment variables
.env .env
@@ -101,3 +101,12 @@ Thumbs.db
*.swp *.swp
*.swo *.swo
*.bak *.bak
# Kubernetes secrets (never commit actual secrets!)
deploy/k8s/dev/secrets/*.yaml
deploy/k8s/prod/secrets/*.yaml
!deploy/k8s/dev/secrets/*.yaml.example
!deploy/k8s/prod/secrets/*.yaml.example
# Dev environment image tags
.dev-image-tag

8
.idea/ai.iml generated
View File

@@ -2,10 +2,12 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/backend/src" isTestSource="false" /> <sourceFolder url="file://$MODULE_DIR$/backend.old/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/backend/tests" isTestSource="true" /> <sourceFolder url="file://$MODULE_DIR$/backend.old/tests" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/client-py" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/.venv" /> <excludeFolder url="file://$MODULE_DIR$/.venv" />
<excludeFolder url="file://$MODULE_DIR$/backend/data" /> <excludeFolder url="file://$MODULE_DIR$/backend.old/data" />
<excludeFolder url="file://$MODULE_DIR$/doc.old" />
</content> </content>
<orderEntry type="jdk" jdkName="Python 3.12 (ai)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.12 (ai)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />

17
bin/build-all Executable file
View File

@@ -0,0 +1,17 @@
#!/bin/bash
# Build all container images
set -e
DIR="$(cd "$(dirname "$0")" && pwd)"
echo "Building all container images..."
echo
"$DIR/build" flink "$@"
"$DIR/build" relay "$@"
"$DIR/build" ingestor "$@"
"$DIR/build" web "$@"
echo
echo "All images built successfully!"

128
bin/config-update Executable file
View File

@@ -0,0 +1,128 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
usage() {
echo "Usage: $0 [ENVIRONMENT] [CONFIG_NAME]"
echo ""
echo "Update Kubernetes ConfigMaps from YAML files"
echo ""
echo "Arguments:"
echo " ENVIRONMENT Target environment: dev or prod (default: dev)"
echo " CONFIG_NAME Specific config to update (optional, updates all if not specified)"
echo ""
echo "Available configs:"
echo " relay-config - ZMQ relay configuration"
echo " ingestor-config - CCXT ingestor configuration"
echo " flink-config - Flink job configuration"
echo ""
echo "Examples:"
echo " $0 # Update all dev configs"
echo " $0 dev # Update all dev configs"
echo " $0 dev relay-config # Update only relay-config in dev"
echo " $0 prod # Update all prod configs"
echo " $0 prod flink-config # Update only flink-config in prod"
exit 1
}
# Parse arguments
ENV="${1:-dev}"
CONFIG_NAME="${2:-}"
if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then
echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}"
usage
fi
CONFIG_DIR="$ROOT_DIR/deploy/k8s/$ENV/configs"
if [ ! -d "$CONFIG_DIR" ]; then
echo -e "${RED}Error: Config directory not found: $CONFIG_DIR${NC}"
exit 1
fi
# Get kubectl context
if [[ "$ENV" == "prod" ]]; then
CONTEXT=$(kubectl config current-context)
echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION configs!${NC}"
echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}"
read -p "Are you sure you want to continue? (yes/no): " confirm
if [[ "$confirm" != "yes" ]]; then
echo "Aborted."
exit 0
fi
fi
apply_config() {
local config_name="$1"
local config_file="$CONFIG_DIR/$config_name.yaml"
if [ ! -f "$config_file" ]; then
echo -e "${RED}✗ Config file not found: $config_file${NC}"
return 1
fi
echo -e "${GREEN}→${NC} Creating/updating ConfigMap $config_name..."
kubectl create configmap "$config_name" \
--from-file=config.yaml="$config_file" \
--dry-run=client -o yaml | kubectl apply -f -
echo -e "${GREEN}✓${NC} $config_name updated"
# Optionally restart pods that use this config
local restart_pods=""
case "$config_name" in
relay-config)
restart_pods="deployment/relay"
;;
ingestor-config)
restart_pods="deployment/ingestor"
;;
flink-config)
restart_pods="deployment/flink-jobmanager deployment/flink-taskmanager"
;;
esac
if [ -n "$restart_pods" ]; then
echo -e "${YELLOW} Restarting pods...${NC}"
kubectl rollout restart $restart_pods 2>/dev/null || echo -e "${YELLOW} (No pods found to restart)${NC}"
fi
}
# Update specific config or all configs
if [ -n "$CONFIG_NAME" ]; then
# Update single config
apply_config "$CONFIG_NAME"
else
# Update all configs
echo -e "${GREEN}Updating all $ENV configs...${NC}"
echo ""
CONFIGS=(
"relay-config"
"ingestor-config"
"flink-config"
)
FAILED=0
for config in "${CONFIGS[@]}"; do
if ! apply_config "$config"; then
FAILED=$((FAILED + 1))
fi
done
echo ""
if [ $FAILED -gt 0 ]; then
echo -e "${YELLOW}⚠️ $FAILED config(s) failed to apply${NC}"
exit 1
else
echo -e "${GREEN}✓ All configs updated successfully${NC}"
fi
fi

View File

@@ -1,11 +1,11 @@
#!/bin/bash #!/bin/bash
#REMOTE=northamerica-northeast2-docker.pkg.dev/dexorder-430504/dexorder #REMOTE=northamerica-northeast2-docker.pkg.dev/dexorder-430504/dexorder
REMOTE=git.dxod.org/dexorder/dexorder REMOTE=${REMOTE:-git.dxod.org/dexorder/dexorder}
if [ "$1" != "backend" ] && [ "$1" != "web" ]; then if [ "$1" != "flink" ] && [ "$1" != "relay" ] && [ "$1" != "ingestor" ] && [ "$1" != "web" ]; then
echo echo
echo usage: "$0 "'{backend|web} [''dev''] [config] [deployment] [kubernetes] [image_tag]' echo usage: "$0 "'{flink|relay|ingestor|web} [''dev''] [config] [deployment] [kubernetes] [image_tag]'
echo echo
echo ' [''dev''] if the literal string ''dev'' is not the second argument, then the build refuses to run if source code is not checked in. Otherwise, the git revision numbers are used in the image tag.' echo ' [''dev''] if the literal string ''dev'' is not the second argument, then the build refuses to run if source code is not checked in. Otherwise, the git revision numbers are used in the image tag.'
echo echo
@@ -86,14 +86,21 @@ fi
if [ "$DEPLOY" == "0" ]; then if [ "$DEPLOY" == "0" ]; then
ACTION=Building ACTION=Building
NO_CACHE=--no-cache #NO_CACHE=--no-cache
else else
ACTION=Making ACTION=Making
fi fi
echo $ACTION $PROJECT config=$CONFIG deployment=$DEPLOYMENT '=>' $TAG echo $ACTION $PROJECT config=$CONFIG deployment=$DEPLOYMENT '=>' $TAG
docker build $NO_CACHE -f deploy/Dockerfile-$PROJECT --build-arg="CONFIG=$CONFIG" --build-arg="DEPLOYMENT=$DEPLOYMENT" -t dexorder/ai-$PROJECT:latest . || exit 1
# Copy protobuf definitions into project directory for Docker build
# Using rsync --checksum so unchanged files keep their timestamps (preserves docker layer cache)
rsync -a --checksum --delete protobuf/ $PROJECT/protobuf/
docker build $NO_CACHE -f $PROJECT/Dockerfile --build-arg="CONFIG=$CONFIG" --build-arg="DEPLOYMENT=$DEPLOYMENT" -t dexorder/ai-$PROJECT:latest $PROJECT || exit 1
# Cleanup is handled by trap
docker tag dexorder/ai-$PROJECT:latest dexorder/ai-$PROJECT:$TAG docker tag dexorder/ai-$PROJECT:latest dexorder/ai-$PROJECT:$TAG
docker tag dexorder/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:$TAG docker tag dexorder/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:$TAG
docker tag $REMOTE/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:latest docker tag $REMOTE/ai-$PROJECT:$TAG $REMOTE/ai-$PROJECT:latest
@@ -105,7 +112,7 @@ echo "$(date)" built $REMOTE/ai-$PROJECT:$TAG
if [ "$DEPLOY" == "1" ]; then if [ "$DEPLOY" == "1" ]; then
docker push $REMOTE/ai-$PROJECT:$TAG docker push $REMOTE/ai-$PROJECT:$TAG
YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/$KUBERNETES.yaml) YAML=$(sed "s#image: dexorder/ai-$PROJECT*#image: $REMOTE/ai-$PROJECT:$TAG#" deploy/k8s/$KUBERNETES.yaml)
echo "$YAML" | kubectl apply -f - || echo "$YAML" "\nkubectl apply failed" && exit 1 echo "$YAML" | kubectl apply -f - || echo "$YAML" "\nkubectl apply failed" && exit 1
echo deployed $KUBERNETES.yaml $REMOTE/ai-$PROJECT:$TAG echo deployed $KUBERNETES.yaml $REMOTE/ai-$PROJECT:$TAG
fi fi

364
bin/dev Executable file
View File

@@ -0,0 +1,364 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
usage() {
echo "Usage: $0 [COMMAND]"
echo ""
echo "Manage the minikube development environment"
echo ""
echo "Commands:"
echo " start Start minikube and deploy all services"
echo " stop Stop minikube"
echo " restart [svc] Rebuild and redeploy all services, or just one (relay|ingestor|flink)"
echo " rebuild [svc] Rebuild all custom images, or just one"
echo " deploy [svc] Deploy/update all services, or just one"
echo " status Show status of all services"
echo " logs Tail logs for a service"
echo " shell Open a shell in a service pod"
echo " clean Delete all resources and volumes"
echo " tunnel Start minikube tunnel (for LoadBalancer access)"
echo ""
echo "Examples:"
echo " $0 start # Start minikube and deploy everything"
echo " $0 rebuild # Rebuild all custom images"
echo " $0 logs relay # Tail logs for relay service"
echo " $0 shell ingestor # Open shell in ingestor pod"
exit 1
}
COMMAND="${1:-start}"
check_minikube() {
if ! command -v minikube &> /dev/null; then
echo -e "${RED}Error: minikube not found. Please install minikube first.${NC}"
echo "https://minikube.sigs.k8s.io/docs/start/"
exit 1
fi
}
check_kubectl() {
if ! command -v kubectl &> /dev/null; then
echo -e "${RED}Error: kubectl not found. Please install kubectl first.${NC}"
exit 1
fi
}
start_minikube() {
echo -e "${BLUE}Starting minikube...${NC}"
if minikube status &> /dev/null; then
echo -e "${GREEN}✓ Minikube already running${NC}"
else
minikube start --cpus=6 --memory=12g --driver=docker
echo -e "${GREEN}✓ Minikube started${NC}"
fi
# Enable ingress addon
echo -e "${BLUE}Enabling ingress addon...${NC}"
minikube addons enable ingress
echo -e "${GREEN}✓ Ingress enabled${NC}"
# Set docker environment
echo -e "${YELLOW}Setting docker environment to minikube...${NC}"
eval $(minikube docker-env)
echo -e "${GREEN}✓ Docker environment set${NC}"
# Add /etc/hosts entry
MINIKUBE_IP=$(minikube ip)
if ! grep -q "dexorder.local" /etc/hosts; then
echo -e "${YELLOW}Adding dexorder.local to /etc/hosts (requires sudo)...${NC}"
echo "$MINIKUBE_IP dexorder.local" | sudo tee -a /etc/hosts
else
echo -e "${GREEN}✓ /etc/hosts entry exists${NC}"
fi
}
rebuild_images() {
local service="${1:-all}"
echo -e "${BLUE}Building custom images...${NC}"
# Use minikube's docker daemon
eval $(minikube docker-env)
# Build images using the standard bin/build script with dev flag
cd "$ROOT_DIR"
# Load existing tags so we preserve whichever services we're not rebuilding
if [ -f "$ROOT_DIR/.dev-image-tag" ]; then
source "$ROOT_DIR/.dev-image-tag"
fi
# Helper: run build, show output, and return just the dev tag via stdout
# Build output goes to stderr so the caller can capture only the tag via $()
build_and_get_tag() {
local svc="$1"
local output
output=$("$SCRIPT_DIR/build" "$svc" dev 2>&1) || { echo "$output" >&2; return 1; }
echo "$output" >&2
# Extract tag from "built <remote>/ai-<svc>:<tag>" line
echo "$output" | grep -oE "ai-${svc}:dev[0-9]+" | tail -1 | cut -d: -f2
}
if [ "$service" == "all" ] || [ "$service" == "relay" ]; then
echo -e "${GREEN}→${NC} Building relay..."
RELAY_TAG=$(build_and_get_tag relay) || exit 1
docker tag "dexorder/ai-relay:$RELAY_TAG" "dexorder/relay:$RELAY_TAG"
fi
if [ "$service" == "all" ] || [ "$service" == "ingestor" ]; then
echo -e "${GREEN}→${NC} Building ingestor..."
INGEST_TAG=$(build_and_get_tag ingestor) || exit 1
docker tag "dexorder/ai-ingestor:$INGEST_TAG" "dexorder/ingestor:$INGEST_TAG"
fi
if [ "$service" == "all" ] || [ "$service" == "flink" ]; then
echo -e "${GREEN}→${NC} Building flink..."
FLINK_TAG=$(build_and_get_tag flink) || exit 1
docker tag "dexorder/ai-flink:$FLINK_TAG" "dexorder/flink:$FLINK_TAG"
fi
# Save the tags for deployment (all three, preserving any we didn't rebuild)
echo "RELAY_TAG=$RELAY_TAG" > "$ROOT_DIR/.dev-image-tag"
echo "INGEST_TAG=$INGEST_TAG" >> "$ROOT_DIR/.dev-image-tag"
echo "FLINK_TAG=$FLINK_TAG" >> "$ROOT_DIR/.dev-image-tag"
echo -e "${GREEN}✓ Images built: relay=$RELAY_TAG, ingestor=$INGEST_TAG, flink=$FLINK_TAG${NC}"
}
deploy_services() {
echo -e "${BLUE}Deploying services to minikube...${NC}"
cd "$ROOT_DIR"
# Get the dev image tags
if [ -f "$ROOT_DIR/.dev-image-tag" ]; then
source "$ROOT_DIR/.dev-image-tag"
echo -e "${BLUE}Using image tags:${NC}"
echo -e " Relay: $RELAY_TAG"
echo -e " Ingestor: $INGEST_TAG"
echo -e " Flink: $FLINK_TAG"
else
echo -e "${YELLOW}⚠️ No dev tags found. Using 'latest'. Run rebuild first.${NC}"
RELAY_TAG="latest"
INGEST_TAG="latest"
FLINK_TAG="latest"
fi
# Create secrets first (if they exist)
echo -e "${GREEN}→${NC} Checking secrets..."
if ls deploy/k8s/dev/secrets/*.yaml &> /dev/null; then
"$SCRIPT_DIR/secret-update" dev || echo -e "${YELLOW} (Some secrets missing - copy from .example files)${NC}"
else
echo -e "${YELLOW}⚠️ No secrets found. Copy from .example files:${NC}"
echo -e "${YELLOW} cd deploy/k8s/dev/secrets${NC}"
echo -e "${YELLOW} cp ai-secrets.yaml.example ai-secrets.yaml${NC}"
echo -e "${YELLOW} # Edit with actual values, then run: bin/secret-update dev${NC}"
fi
# Update configs
echo -e "${GREEN}→${NC} Updating configs..."
"$SCRIPT_DIR/config-update" dev
# Apply kustomize with image tag substitution
echo -e "${GREEN}→${NC} Applying Kubernetes manifests..."
kubectl kustomize deploy/k8s/dev/ | \
sed "s|image: dexorder/flink:latest|image: dexorder/flink:$FLINK_TAG|g" | \
sed "s|image: dexorder/relay:latest|image: dexorder/relay:$RELAY_TAG|g" | \
sed "s|image: dexorder/ingestor:latest|image: dexorder/ingestor:$INGEST_TAG|g" | \
kubectl apply -f -
echo -e "${GREEN}✓ Services deployed${NC}"
echo ""
echo -e "${BLUE}Waiting for deployments to be ready...${NC}"
kubectl wait --for=condition=available --timeout=300s \
deployment/relay \
deployment/ingestor \
deployment/iceberg-catalog \
deployment/flink-jobmanager \
deployment/flink-taskmanager \
2>/dev/null || echo -e "${YELLOW}(Some deployments not ready yet)${NC}"
echo ""
echo -e "${GREEN}✓ Dev environment ready!${NC}"
echo ""
echo -e "${BLUE}Access the application:${NC}"
echo -e " Web UI: http://dexorder.local/cryptochimp/"
echo -e " Backend WS: ws://dexorder.local/ws"
echo ""
echo -e "${BLUE}Admin UIs (use port-forward):${NC}"
echo -e " Flink UI: kubectl port-forward svc/flink-jobmanager 8081:8081"
echo -e " Then open http://localhost:8081"
echo -e " MinIO Console: kubectl port-forward svc/minio 9001:9001"
echo -e " Then open http://localhost:9001"
echo ""
echo -e "${YELLOW}Note: Run 'minikube tunnel' in another terminal for dexorder.local ingress to work${NC}"
}
show_status() {
echo -e "${BLUE}Kubernetes Resources:${NC}"
echo ""
kubectl get pods,svc,ingress
}
show_logs() {
local service="$1"
if [ -z "$service" ]; then
echo -e "${RED}Error: Please specify a service name${NC}"
echo "Available services: relay, ingestor, flink-jobmanager, flink-taskmanager, kafka, postgres, minio, iceberg-catalog"
exit 1
fi
# Try to find pod by label or name
local pod=$(kubectl get pods -l app="$service" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -z "$pod" ]; then
pod=$(kubectl get pods | grep "$service" | head -n1 | awk '{print $1}')
fi
if [ -z "$pod" ]; then
echo -e "${RED}Error: No pod found for service '$service'${NC}"
exit 1
fi
echo -e "${BLUE}Tailing logs for $pod...${NC}"
kubectl logs -f "$pod"
}
open_shell() {
local service="$1"
if [ -z "$service" ]; then
echo -e "${RED}Error: Please specify a service name${NC}"
exit 1
fi
local pod=$(kubectl get pods -l app="$service" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -z "$pod" ]; then
pod=$(kubectl get pods | grep "$service" | head -n1 | awk '{print $1}')
fi
if [ -z "$pod" ]; then
echo -e "${RED}Error: No pod found for service '$service'${NC}"
exit 1
fi
echo -e "${BLUE}Opening shell in $pod...${NC}"
kubectl exec -it "$pod" -- /bin/sh || kubectl exec -it "$pod" -- /bin/bash
}
clean_all() {
echo -e "${RED}⚠️ WARNING: This will delete all resources and volumes!${NC}"
read -p "Are you sure? (yes/no): " confirm
if [[ "$confirm" != "yes" ]]; then
echo "Aborted."
exit 0
fi
echo -e "${BLUE}Deleting all resources...${NC}"
kubectl delete -k deploy/k8s/dev/ || true
kubectl delete pvc --all || true
echo -e "${GREEN}✓ Resources deleted${NC}"
}
start_tunnel() {
echo -e "${BLUE}Starting minikube tunnel...${NC}"
echo -e "${YELLOW}This requires sudo and will run in the foreground.${NC}"
echo -e "${YELLOW}Press Ctrl+C to stop.${NC}"
echo ""
minikube tunnel
}
# Deploy a single service using kubectl set image with the dev tag (never uses 'latest')
deploy_service() {
local service="$1"
if [ -f "$ROOT_DIR/.dev-image-tag" ]; then
source "$ROOT_DIR/.dev-image-tag"
fi
local image
case "$service" in
relay) image="dexorder/relay:$RELAY_TAG" ;;
ingestor) image="dexorder/ingestor:$INGEST_TAG" ;;
flink) image="dexorder/flink:$FLINK_TAG" ;;
*)
echo -e "${RED}Unknown service: $service. Use relay, ingestor, or flink.${NC}"
exit 1
;;
esac
echo -e "${GREEN}→${NC} Deploying $service with image $image..."
case "$service" in
flink)
kubectl set image deployment/flink-jobmanager flink-jobmanager=$image
kubectl set image deployment/flink-taskmanager flink-taskmanager=$image
;;
*)
kubectl set image deployment/$service $service=$image
;;
esac
echo -e "${GREEN}✓ $service updated to $image${NC}"
}
# Main command routing
check_minikube
check_kubectl
case "$COMMAND" in
start)
start_minikube
rebuild_images
deploy_services
;;
stop)
echo -e "${BLUE}Stopping minikube...${NC}"
minikube stop
echo -e "${GREEN}✓ Minikube stopped${NC}"
;;
restart)
if [ -n "$2" ]; then
rebuild_images "$2"
deploy_service "$2"
else
rebuild_images
deploy_services
fi
;;
rebuild)
rebuild_images "${2:-}"
;;
deploy)
if [ -n "$2" ]; then
deploy_service "$2"
else
deploy_services
fi
;;
status)
show_status
;;
logs)
show_logs "$2"
;;
shell)
open_shell "$2"
;;
clean)
clean_all
;;
tunnel)
start_tunnel
;;
*)
usage
;;
esac

117
bin/secret-update Executable file
View File

@@ -0,0 +1,117 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
usage() {
echo "Usage: $0 [ENVIRONMENT] [SECRET_NAME]"
echo ""
echo "Update Kubernetes secrets from YAML files"
echo ""
echo "Arguments:"
echo " ENVIRONMENT Target environment: dev or prod (default: dev)"
echo " SECRET_NAME Specific secret to update (optional, updates all if not specified)"
echo ""
echo "Available secrets:"
echo " ai-secrets - AI backend API keys"
echo " postgres-secret - PostgreSQL password"
echo " minio-secret - MinIO credentials"
echo " ingestor-secrets - Exchange API keys"
echo ""
echo "Examples:"
echo " $0 # Update all dev secrets"
echo " $0 dev # Update all dev secrets"
echo " $0 dev ai-secrets # Update only ai-secrets in dev"
echo " $0 prod # Update all prod secrets"
echo " $0 prod minio-secret # Update only minio-secret in prod"
exit 1
}
# Parse arguments
ENV="${1:-dev}"
SECRET_NAME="${2:-}"
if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then
echo -e "${RED}Error: Environment must be 'dev' or 'prod'${NC}"
usage
fi
SECRETS_DIR="$ROOT_DIR/deploy/k8s/$ENV/secrets"
if [ ! -d "$SECRETS_DIR" ]; then
echo -e "${RED}Error: Secrets directory not found: $SECRETS_DIR${NC}"
exit 1
fi
# Get kubectl context
if [[ "$ENV" == "prod" ]]; then
CONTEXT=$(kubectl config current-context)
echo -e "${YELLOW}⚠️ WARNING: Updating PRODUCTION secrets!${NC}"
echo -e "${YELLOW}Current kubectl context: $CONTEXT${NC}"
read -p "Are you sure you want to continue? (yes/no): " confirm
if [[ "$confirm" != "yes" ]]; then
echo "Aborted."
exit 0
fi
fi
apply_secret() {
local secret_file="$1"
local secret_basename=$(basename "$secret_file" .yaml)
if [ ! -f "$secret_file" ]; then
echo -e "${RED}✗ Secret file not found: $secret_file${NC}"
echo -e "${YELLOW} Copy from ${secret_basename}.yaml.example and fill in values${NC}"
return 1
fi
echo -e "${GREEN}→${NC} Applying $secret_basename..."
kubectl apply -f "$secret_file"
echo -e "${GREEN}✓${NC} $secret_basename updated"
}
# Update specific secret or all secrets
if [ -n "$SECRET_NAME" ]; then
# Update single secret
SECRET_FILE="$SECRETS_DIR/$SECRET_NAME.yaml"
apply_secret "$SECRET_FILE"
else
# Update all secrets
echo -e "${GREEN}Updating all $ENV secrets...${NC}"
echo ""
SECRETS=(
"ai-secrets"
"postgres-secret"
"minio-secret"
"ingestor-secrets"
"flink-secrets"
)
FAILED=0
for secret in "${SECRETS[@]}"; do
SECRET_FILE="$SECRETS_DIR/$secret.yaml"
if ! apply_secret "$SECRET_FILE"; then
FAILED=$((FAILED + 1))
fi
done
echo ""
if [ $FAILED -gt 0 ]; then
echo -e "${YELLOW}⚠️ $FAILED secret(s) failed to apply${NC}"
echo -e "${YELLOW}Create missing secret files by copying from .example templates:${NC}"
echo -e "${YELLOW} cd $SECRETS_DIR${NC}"
echo -e "${YELLOW} cp SECRET_NAME.yaml.example SECRET_NAME.yaml${NC}"
echo -e "${YELLOW} # Edit SECRET_NAME.yaml with actual values${NC}"
exit 1
else
echo -e "${GREEN}✓ All secrets updated successfully${NC}"
fi
fi

259
client-py/README.md Normal file
View File

@@ -0,0 +1,259 @@
# DexOrder Python Client Library
High-level Python API for accessing historical OHLC data from the DexOrder trading platform.
## Features
- **Smart Caching**: Automatically checks Iceberg warehouse before requesting new data
- **Async Request/Response**: Non-blocking historical data requests via relay
- **Gap Detection**: Identifies and requests only missing data ranges
- **Transparent Access**: Single API for both cached and on-demand data
## Installation
```bash
cd redesign/client-py
pip install -e .
```
## Quick Start
```python
import asyncio
from dexorder import OHLCClient
async def main():
# Initialize client
client = OHLCClient(
iceberg_catalog_uri="http://iceberg-catalog:8181",
relay_endpoint="tcp://relay:5555",
notification_endpoint="tcp://flink:5557"
)
# Start background notification listener
await client.start()
try:
# Fetch OHLC data (automatically checks cache and requests missing data)
df = await client.fetch_ohlc(
ticker="BINANCE:BTC/USDT",
period_seconds=3600, # 1-hour candles
start_time=1735689600000000, # microseconds
end_time=1736294399000000
)
print(f"Fetched {len(df)} candles")
print(df.head())
finally:
await client.stop()
# Run
asyncio.run(main())
```
## Using Context Manager
```python
async def main():
async with OHLCClient(...) as client:
df = await client.fetch_ohlc(...)
```
## Architecture
### Components
1. **OHLCClient**: High-level API with smart caching
2. **IcebergClient**: Direct queries to Iceberg warehouse
3. **HistoryClient**: Submit requests via relay and wait for notifications
### Data Flow
```
┌─────────┐
│ Client │
└────┬────┘
│ 1. fetch_ohlc()
┌─────────────────┐
│ OHLCClient │
└────┬────────────┘
│ 2. Check Iceberg
┌─────────────────┐ ┌──────────┐
│ IcebergClient │─────▶│ Iceberg │
└─────────────────┘ └──────────┘
│ 3. Missing data?
┌─────────────────┐ ┌──────────┐
│ HistoryClient │─────▶│ Relay │
└────┬────────────┘ └──────────┘
│ │
│ 4. Wait for notification │
│◀─────────────────────────┘
│ 5. Query Iceberg again
┌─────────────────┐
│ Return data │
└─────────────────┘
```
## API Reference
### OHLCClient
#### `__init__(iceberg_catalog_uri, relay_endpoint, notification_endpoint, namespace="trading")`
Initialize the client with connection parameters.
#### `async fetch_ohlc(ticker, period_seconds, start_time, end_time, request_timeout=30.0)`
Fetch OHLC data with smart caching.
**Parameters:**
- `ticker` (str): Market identifier (e.g., "BINANCE:BTC/USDT")
- `period_seconds` (int): OHLC period in seconds (60, 300, 3600, 86400, etc.)
- `start_time` (int): Start timestamp in microseconds
- `end_time` (int): End timestamp in microseconds
- `request_timeout` (float): Timeout for historical requests in seconds
**Returns:** `pd.DataFrame` with columns:
- `ticker`: Market identifier
- `period_seconds`: Period in seconds
- `timestamp`: Candle timestamp (microseconds)
- `open`, `high`, `low`, `close`: Prices (integer format)
- `volume`: Trading volume
- Additional fields: `buy_vol`, `sell_vol`, `open_interest`, etc.
### IcebergClient
Direct access to Iceberg warehouse.
#### `query_ohlc(ticker, period_seconds, start_time, end_time)`
Query OHLC data directly from Iceberg.
#### `find_missing_ranges(ticker, period_seconds, start_time, end_time)`
Identify missing data ranges. Returns list of `(start_time, end_time)` tuples.
#### `has_data(ticker, period_seconds, start_time, end_time)`
Check if any data exists for the given parameters.
### HistoryClient
Low-level client for submitting historical data requests.
**IMPORTANT**: Always call `connect()` before making requests to prevent race condition.
#### `async connect()`
Connect to relay and start notification listener. **MUST be called before making any requests.**
This subscribes to the notification topic `RESPONSE:{client_id}` BEFORE any requests are sent,
preventing the race condition where notifications arrive before subscription.
#### `async request_historical_ohlc(ticker, period_seconds, start_time, end_time, timeout=30.0, limit=None)`
Submit historical data request and wait for completion notification.
**Returns:** dict with keys:
- `request_id`: The request ID
- `status`: 'OK', 'NOT_FOUND', or 'ERROR'
- `error_message`: Error message if status is 'ERROR'
- `iceberg_namespace`, `iceberg_table`, `row_count`: Available when status is 'OK'
**Example:**
```python
from dexorder import HistoryClient
client = HistoryClient(
relay_endpoint="tcp://relay:5559",
notification_endpoint="tcp://relay:5558"
)
# CRITICAL: Connect first to prevent race condition
await client.connect()
# Now safe to make requests
result = await client.request_historical_ohlc(
ticker="BINANCE:BTC/USDT",
period_seconds=3600,
start_time=1735689600000000,
end_time=1736294399000000
)
await client.close()
```
## Configuration
The client requires the following endpoints:
- **Iceberg Catalog URI**: REST API endpoint for Iceberg metadata (default: `http://iceberg-catalog:8181`)
- **Relay Endpoint**: ZMQ REQ/REP endpoint for submitting requests (default: `tcp://relay:5555`)
- **Notification Endpoint**: ZMQ PUB/SUB endpoint for receiving notifications (default: `tcp://flink:5557`)
## Development
### Generate Protobuf Files
```bash
cd redesign/protobuf
protoc -I . --python_out=../client-py/dexorder ingestor.proto ohlc.proto
```
### Run Tests
```bash
pytest tests/
```
## Examples
See `../relay/test/async_client.py` for a complete example.
## Timestamp Format
All timestamps are in **microseconds since epoch**:
```python
# Convert from datetime
from datetime import datetime, timezone
dt = datetime(2024, 1, 1, tzinfo=timezone.utc)
timestamp_micros = int(dt.timestamp() * 1_000_000)
# Convert to datetime
dt = datetime.fromtimestamp(timestamp_micros / 1_000_000, tz=timezone.utc)
```
## Period Seconds
Common period values:
- `60` - 1 minute
- `300` - 5 minutes
- `900` - 15 minutes
- `3600` - 1 hour
- `14400` - 4 hours
- `86400` - 1 day
- `604800` - 1 week
## Error Handling
```python
try:
df = await client.fetch_ohlc(...)
except TimeoutError:
print("Request timed out")
except ValueError as e:
print(f"Request failed: {e}")
except ConnectionError:
print("Unable to connect to relay")
```
## License
Internal use only.

3
client-py/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
import logging
log = logging.getLogger(__name__)

View File

@@ -0,0 +1,16 @@
"""
DexOrder Trading Platform Python Client
Provides high-level APIs for:
- Historical OHLC data retrieval with smart caching
- Async request/response via relay
- Iceberg data warehouse queries
"""
__version__ = "0.1.0"
from .ohlc_client import OHLCClient
from .iceberg_client import IcebergClient
from .history_client import HistoryClient
__all__ = ['OHLCClient', 'IcebergClient', 'HistoryClient']

View File

@@ -0,0 +1,296 @@
"""
HistoryClient - Submit historical data requests via relay and wait for notifications
RACE CONDITION PREVENTION:
The client must subscribe to notification topics BEFORE submitting requests.
Notification topics are deterministic: RESPONSE:{client_id} or HISTORY_READY:{request_id}
Since both are client-generated, we can subscribe before sending the request.
"""
import asyncio
import uuid
import zmq
import zmq.asyncio
from typing import Optional
import struct
import sys
import os
# Import protobuf messages (assuming they're generated in ../protobuf)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../protobuf'))
try:
from ingestor_pb2 import SubmitHistoricalRequest, SubmitResponse, HistoryReadyNotification
except ImportError:
print("Warning: Protobuf files not found. Run: protoc -I ../protobuf --python_out=../protobuf ../protobuf/*.proto")
raise
class HistoryClient:
"""
Client for submitting historical data requests via relay.
IMPORTANT: Call connect() before making any requests. This ensures the notification
listener is running and subscribed BEFORE any requests are submitted, preventing
the race condition where notifications arrive before subscription.
Provides:
- Submit historical OHLC data requests
- Wait for completion notifications
- Handle request timeouts and errors
"""
def __init__(self, relay_endpoint: str, notification_endpoint: str, client_id: Optional[str] = None):
"""
Initialize history client.
Args:
relay_endpoint: ZMQ endpoint for relay client requests (e.g., "tcp://relay:5559")
notification_endpoint: ZMQ endpoint for notifications (e.g., "tcp://relay:5558")
client_id: Optional client ID for notification routing. If not provided, generates one.
All notifications for this client will be sent to topic RESPONSE:{client_id}
"""
self.relay_endpoint = relay_endpoint
self.notification_endpoint = notification_endpoint
self.client_id = client_id or f"client-{uuid.uuid4().hex[:8]}"
self.context = zmq.asyncio.Context()
self.pending_requests = {} # request_id -> asyncio.Event
self.notification_task = None
self.connected = False
async def connect(self):
"""
Connect to relay and start notification listener.
CRITICAL: This MUST be called before making any requests to prevent race condition.
The notification listener subscribes to the deterministic topic RESPONSE:{client_id}
BEFORE any requests are sent, ensuring we never miss notifications.
"""
if self.connected:
return
# Start notification listener FIRST
self.notification_task = asyncio.create_task(self._notification_listener())
# Give the listener a moment to connect and subscribe
await asyncio.sleep(0.1)
self.connected = True
async def request_historical_ohlc(
self,
ticker: str,
period_seconds: int,
start_time: int,
end_time: int,
timeout: float = 30.0,
limit: Optional[int] = None
) -> dict:
"""
Request historical OHLC data and wait for completion notification.
IMPORTANT: Call connect() before using this method.
Args:
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
timeout: Request timeout in seconds (default: 30)
limit: Optional limit on number of candles
Returns:
dict with keys:
- request_id: The request ID
- status: 'OK', 'NOT_FOUND', or 'ERROR'
- error_message: Error message if status is 'ERROR'
- iceberg_namespace: Iceberg namespace (if status is 'OK')
- iceberg_table: Iceberg table name (if status is 'OK')
- row_count: Number of rows written (if status is 'OK')
Raises:
TimeoutError: If request times out
ConnectionError: If unable to connect to relay or not connected
"""
if not self.connected:
raise ConnectionError("Client not connected. Call connect() first to prevent race condition.")
request_id = str(uuid.uuid4())
# Register the pending request BEFORE sending to eliminate any race condition.
# The notification topic is deterministic (RESPONSE:{client_id}) and the listener
# is already subscribed, so we just need pending_requests populated before Flink
# could possibly publish the notification.
event = asyncio.Event()
self.pending_requests[request_id] = {
'event': event,
'result': None
}
try:
# Create protobuf request with client_id for notification routing
request = SubmitHistoricalRequest(
request_id=request_id,
ticker=ticker,
period_seconds=period_seconds,
start_time=start_time,
end_time=end_time,
client_id=self.client_id # CRITICAL: Enables deterministic notification topic
)
if limit is not None:
request.limit = limit
# Encode with ZMQ envelope: version (1 byte) + message type (1 byte) + protobuf payload
MESSAGE_TYPE_SUBMIT_HISTORICAL = 0x10
version_frame = struct.pack('B', 0x01)
message_frame = struct.pack('B', MESSAGE_TYPE_SUBMIT_HISTORICAL) + request.SerializeToString()
# Send request to relay
socket = self.context.socket(zmq.REQ)
socket.connect(self.relay_endpoint)
try:
# Send two frames: version, then message
await socket.send(version_frame, zmq.SNDMORE)
await socket.send(message_frame)
# Wait for immediate response
response_frames = []
while True:
frame = await asyncio.wait_for(socket.recv(), timeout=5.0)
response_frames.append(frame)
if not socket.get(zmq.RCVMORE):
break
# Parse response (expect 2 frames: version, message)
if len(response_frames) < 2:
raise ConnectionError(f"Expected 2 frames, got {len(response_frames)}")
msg_type = response_frames[1][0]
response_payload = response_frames[1][1:]
response = SubmitResponse()
response.ParseFromString(response_payload)
if response.status != 0:
raise ConnectionError(f"Request failed: {response.error_message}")
finally:
socket.close()
# Wait for Flink notification with timeout
try:
await asyncio.wait_for(event.wait(), timeout=timeout)
result = self.pending_requests[request_id]['result']
return result
except asyncio.TimeoutError:
raise TimeoutError(f"Request {request_id} timed out after {timeout}s")
finally:
self.pending_requests.pop(request_id, None)
async def _notification_listener(self):
"""
Internal notification listener that subscribes to RESPONSE:{client_id} topic.
CRITICAL: This runs BEFORE any requests are submitted to prevent race condition.
The notification topic is deterministic based on our client_id.
"""
socket = self.context.socket(zmq.SUB)
socket.connect(self.notification_endpoint)
# Subscribe to our client-specific topic
# CRITICAL: This topic is deterministic (RESPONSE:{client_id}) and we know it
# before sending any requests, so we can subscribe first to prevent race condition
notification_topic = f"RESPONSE:{self.client_id}"
socket.setsockopt_string(zmq.SUBSCRIBE, notification_topic)
try:
while True:
# Receive multi-frame message: [topic][version][message]
frames = []
while True:
frame = await socket.recv()
frames.append(frame)
if not socket.get(zmq.RCVMORE):
break
# Parse frames
if len(frames) < 3:
continue
topic_frame = frames[0]
version_frame = frames[1]
message_frame = frames[2]
# Validate version
if len(version_frame) != 1 or version_frame[0] != 0x01:
continue
# Validate message type
if len(message_frame) < 1:
continue
msg_type = message_frame[0]
payload = message_frame[1:]
MESSAGE_TYPE_HISTORY_READY = 0x12
if msg_type != MESSAGE_TYPE_HISTORY_READY:
continue
# Parse notification (protobuf)
try:
notification = HistoryReadyNotification()
notification.ParseFromString(payload)
except Exception as e:
print(f"Warning: failed to parse notification payload: {e}")
continue
request_id = notification.request_id
# Check if we're waiting for this request
if request_id in self.pending_requests:
# Map protobuf enum to string status
# NotificationStatus: OK=0, NOT_FOUND=1, ERROR=2, TIMEOUT=3
status_map = {0: 'OK', 1: 'NOT_FOUND', 2: 'ERROR', 3: 'TIMEOUT'}
status = status_map.get(notification.status, 'ERROR')
result = {
'request_id': request_id,
'status': status,
'error_message': notification.error_message if notification.error_message else None
}
# Add Iceberg details if available
if status == 'OK':
result.update({
'iceberg_namespace': notification.iceberg_namespace,
'iceberg_table': notification.iceberg_table,
'row_count': notification.row_count,
'ticker': notification.ticker,
'period_seconds': notification.period_seconds,
'start_time': notification.start_time,
'end_time': notification.end_time,
})
self.pending_requests[request_id]['result'] = result
self.pending_requests[request_id]['event'].set()
except asyncio.CancelledError:
pass
finally:
socket.close()
async def close(self):
"""
Close the client and cleanup resources.
"""
if self.notification_task:
self.notification_task.cancel()
try:
await self.notification_task
except asyncio.CancelledError:
pass
self.context.term()
self.connected = False

View File

@@ -0,0 +1,179 @@
"""
IcebergClient - Query OHLC data from Iceberg warehouse (Iceberg 1.10.1)
"""
from typing import Optional, List, Tuple
import pandas as pd
from pyiceberg.catalog import load_catalog
from pyiceberg.expressions import (
And,
EqualTo,
GreaterThanOrEqual,
LessThanOrEqual
)
class IcebergClient:
"""
Client for querying OHLC data from Iceberg warehouse (Iceberg 1.10.1).
Note: Iceberg 1.x does not enforce primary keys at the table level.
Deduplication is handled by:
- Flink upsert mode with equality delete files
- PyIceberg automatically filters deleted rows during queries
- Last-write-wins semantics for duplicates
Provides:
- Query OHLC data by ticker, period, and time range
- Identify missing data gaps
- Efficient partition pruning for large datasets
"""
def __init__(
self,
catalog_uri: str,
namespace: str = "trading",
s3_endpoint: Optional[str] = None,
s3_access_key: Optional[str] = None,
s3_secret_key: Optional[str] = None,
):
"""
Initialize Iceberg client.
Args:
catalog_uri: URI of the Iceberg catalog (e.g., "http://iceberg-catalog:8181")
namespace: Iceberg namespace (default: "trading")
s3_endpoint: S3/MinIO endpoint URL (e.g., "http://localhost:9000")
s3_access_key: S3/MinIO access key
s3_secret_key: S3/MinIO secret key
"""
self.catalog_uri = catalog_uri
self.namespace = namespace
catalog_props = {"uri": catalog_uri}
if s3_endpoint:
catalog_props["s3.endpoint"] = s3_endpoint
catalog_props["s3.path-style-access"] = "true"
if s3_access_key:
catalog_props["s3.access-key-id"] = s3_access_key
if s3_secret_key:
catalog_props["s3.secret-access-key"] = s3_secret_key
self.catalog = load_catalog("trading", **catalog_props)
self.table = self.catalog.load_table(f"{namespace}.ohlc")
def query_ohlc(
self,
ticker: str,
period_seconds: int,
start_time: int,
end_time: int
) -> pd.DataFrame:
"""
Query OHLC data for a specific ticker, period, and time range.
Args:
ticker: Market identifier (e.g., "BINANCE:BTC/USDT")
period_seconds: OHLC period in seconds (60, 300, 3600, etc.)
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
Returns:
DataFrame with OHLC data sorted by timestamp
"""
# Reload table metadata to pick up snapshots committed after this client was initialized
self.table = self.catalog.load_table(f"{self.namespace}.ohlc")
df = self.table.scan(
row_filter=And(
EqualTo("ticker", ticker),
EqualTo("period_seconds", period_seconds),
GreaterThanOrEqual("timestamp", start_time),
LessThanOrEqual("timestamp", end_time)
)
).to_pandas()
if not df.empty:
df = df.sort_values("timestamp")
return df
def find_missing_ranges(
self,
ticker: str,
period_seconds: int,
start_time: int,
end_time: int
) -> List[Tuple[int, int]]:
"""
Identify missing data ranges in the requested time period.
Returns list of (start, end) tuples for missing ranges.
Expected candles are calculated based on period_seconds.
Args:
ticker: Market identifier
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
Returns:
List of (start_time, end_time) tuples for missing ranges
"""
df = self.query_ohlc(ticker, period_seconds, start_time, end_time)
if df.empty:
# All data is missing
return [(start_time, end_time)]
# Convert period to microseconds
period_micros = period_seconds * 1_000_000
# Generate expected timestamps
expected_timestamps = list(range(start_time, end_time + 1, period_micros))
actual_timestamps = set(df['timestamp'].values)
# Find gaps
missing = sorted(set(expected_timestamps) - actual_timestamps)
if not missing:
return []
# Consolidate consecutive missing timestamps into ranges
ranges = []
range_start = missing[0]
prev_ts = missing[0]
for ts in missing[1:]:
if ts > prev_ts + period_micros:
# Gap in missing data - close previous range
ranges.append((range_start, prev_ts))
range_start = ts
prev_ts = ts
# Close final range
ranges.append((range_start, prev_ts))
return ranges
def has_data(
self,
ticker: str,
period_seconds: int,
start_time: int,
end_time: int
) -> bool:
"""
Check if any data exists for the given parameters.
Args:
ticker: Market identifier
period_seconds: OHLC period in seconds
start_time: Start timestamp in microseconds
end_time: End timestamp in microseconds
Returns:
True if at least one candle exists, False otherwise
"""
df = self.query_ohlc(ticker, period_seconds, start_time, end_time)
return not df.empty

Some files were not shown because too many files have changed in this diff Show More