feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages
- Support Anthropic models (Sonnet, Haiku, Opus) via @tag
- Remove Qdrant vector database from infrastructure and configs
- Simplify license model config to use null fallbacks
- Add greeting stream after model switch via @tag
- Fix protobuf field names to camelCase for v7 compatibility
- Add 429 rate limit retry logic with exponential backoff
- Remove RAG references from agent harness documentation
This commit is contained in:
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions

View File

@@ -44,9 +44,6 @@ spec:
- name: wait-for-dragonfly
image: busybox:1.36
command: ['sh', '-c', 'until nc -z dragonfly 6379; do echo waiting for dragonfly; sleep 2; done;']
- name: wait-for-qdrant
image: busybox:1.36
command: ['sh', '-c', 'until nc -z qdrant 6333; do echo waiting for qdrant; sleep 2; done;']
- name: wait-for-iceberg-catalog
image: busybox:1.36
command: ['sh', '-c', 'until nc -z iceberg-catalog 8181; do echo waiting for iceberg-catalog; sleep 2; done;']

View File

@@ -27,29 +27,22 @@ data:
model_provider: deepinfra
model: zai-org/GLM-5
# License tier model configuration
# License tier model configuration (null = fall back to defaults.model)
license_models:
# Free tier models
free:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
allowed_models:
- zai-org/GLM-5
default: ~
cost_optimized: ~
complex: ~
# Pro tier models
pro:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
blocked_models:
- Qwen/Qwen3-235B-A22B-Instruct-2507
default: ~
cost_optimized: ~
complex: ~
# Enterprise tier models
enterprise:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
default: ~
cost_optimized: ~
complex: ~
# Kubernetes configuration
kubernetes:
@@ -70,11 +63,6 @@ data:
redis:
url: redis://dragonfly:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://qdrant:6333
collection: gateway_memory
# Iceberg (for durable storage via REST catalog)
iceberg:
catalog_uri: http://iceberg-catalog:8181

View File

@@ -45,68 +45,6 @@ spec:
memory: "512Mi"
cpu: "500m"
---
# Qdrant (Vector database for RAG)
apiVersion: v1
kind: Service
metadata:
name: qdrant
spec:
selector:
app: qdrant
ports:
- name: http
protocol: TCP
port: 6333
targetPort: 6333
- name: grpc
protocol: TCP
port: 6334
targetPort: 6334
type: ClusterIP
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: qdrant
spec:
serviceName: qdrant
replicas: 1
selector:
matchLabels:
app: qdrant
template:
metadata:
labels:
app: qdrant
spec:
containers:
- name: qdrant
image: qdrant/qdrant:latest
ports:
- containerPort: 6333
name: http
- containerPort: 6334
name: grpc
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
volumeMounts:
- name: qdrant-data
mountPath: /qdrant/storage
volumeClaimTemplates:
- metadata:
name: qdrant-data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: dev-ephemeral
resources:
requests:
storage: 10Gi
---
# Kafka (KRaft mode - no Zookeeper needed)
# Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because:
# - cp-kafka's entrypoint script has issues with KRaft configuration

View File

@@ -21,30 +21,6 @@ data:
model_provider: deepinfra
model: zai-org/GLM-5
# License tier model configuration
license_models:
# Free tier models
free:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
allowed_models:
- zai-org/GLM-5
# Pro tier models
pro:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
blocked_models:
- Qwen/Qwen3-235B-A22B-Instruct-2507
# Enterprise tier models
enterprise:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
# Kubernetes configuration
kubernetes:
namespace: sandbox
@@ -59,11 +35,6 @@ data:
redis:
url: redis://dragonfly:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://qdrant:6333
collection: gateway_memory
# Agent configuration
agent:
# Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end

View File

@@ -45,67 +45,6 @@ spec:
memory: "512Mi"
cpu: "500m"
---
# Qdrant (Vector database for RAG)
apiVersion: v1
kind: Service
metadata:
name: qdrant
spec:
selector:
app: qdrant
ports:
- name: http
protocol: TCP
port: 6333
targetPort: 6333
- name: grpc
protocol: TCP
port: 6334
targetPort: 6334
type: ClusterIP
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: qdrant
spec:
serviceName: qdrant
replicas: 1
selector:
matchLabels:
app: qdrant
template:
metadata:
labels:
app: qdrant
spec:
containers:
- name: qdrant
image: qdrant/qdrant:latest
ports:
- containerPort: 6333
name: http
- containerPort: 6334
name: grpc
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
volumeMounts:
- name: qdrant-data
mountPath: /qdrant/storage
volumeClaimTemplates:
- metadata:
name: qdrant-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
---
# Kafka (KRaft mode - no Zookeeper needed)
apiVersion: v1
kind: Service

View File

@@ -11,7 +11,7 @@ resources:
- ../base
# Add the 'ai' namespace (base only creates 'sandbox')
- namespaces.yaml
# Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg)
# Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, dragonfly, iceberg)
- infrastructure.yaml
# Sandbox namespace resources (go to sandbox namespace, not ai)
- sandbox-config.yaml

View File

@@ -19,6 +19,7 @@ stringData:
# LLM Provider API Keys
llm_providers:
deepinfra_api_key: "{{ op://AI Prod/Gateway/deepinfra_api_key }}"
anthropic_api_key: "{{ op://AI Prod/Gateway/anthropic_api_key }}"
# Search API Keys
search:
@@ -36,10 +37,6 @@ stringData:
push:
service_key: ""
# Qdrant API key (optional, for hosted Qdrant)
qdrant:
api_key: ""
# Iceberg S3 credentials (must match minio-secret)
iceberg:
s3_access_key: "{{ op://AI Prod/MinIO/access_key }}"