feat: add @tag model override support and remove Qdrant dependencies
- Add model-tags parser for @Tag syntax in chat messages - Support Anthropic models (Sonnet, Haiku, Opus) via @tag - Remove Qdrant vector database from infrastructure and configs - Simplify license model config to use null fallbacks - Add greeting stream after model switch via @tag - Fix protobuf field names to camelCase for v7 compatibility - Add 429 rate limit retry logic with exponential backoff - Remove RAG references from agent harness documentation
This commit is contained in:
@@ -21,30 +21,6 @@ data:
|
||||
model_provider: deepinfra
|
||||
model: zai-org/GLM-5
|
||||
|
||||
# License tier model configuration
|
||||
license_models:
|
||||
# Free tier models
|
||||
free:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: zai-org/GLM-5
|
||||
allowed_models:
|
||||
- zai-org/GLM-5
|
||||
|
||||
# Pro tier models
|
||||
pro:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: zai-org/GLM-5
|
||||
blocked_models:
|
||||
- Qwen/Qwen3-235B-A22B-Instruct-2507
|
||||
|
||||
# Enterprise tier models
|
||||
enterprise:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
|
||||
|
||||
# Kubernetes configuration
|
||||
kubernetes:
|
||||
namespace: sandbox
|
||||
@@ -59,11 +35,6 @@ data:
|
||||
redis:
|
||||
url: redis://dragonfly:6379
|
||||
|
||||
# Qdrant (for RAG vector search)
|
||||
qdrant:
|
||||
url: http://qdrant:6333
|
||||
collection: gateway_memory
|
||||
|
||||
# Agent configuration
|
||||
agent:
|
||||
# Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end
|
||||
|
||||
@@ -45,67 +45,6 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
---
|
||||
# Qdrant (Vector database for RAG)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: qdrant
|
||||
spec:
|
||||
selector:
|
||||
app: qdrant
|
||||
ports:
|
||||
- name: http
|
||||
protocol: TCP
|
||||
port: 6333
|
||||
targetPort: 6333
|
||||
- name: grpc
|
||||
protocol: TCP
|
||||
port: 6334
|
||||
targetPort: 6334
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: qdrant
|
||||
spec:
|
||||
serviceName: qdrant
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: qdrant
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: qdrant
|
||||
spec:
|
||||
containers:
|
||||
- name: qdrant
|
||||
image: qdrant/qdrant:latest
|
||||
ports:
|
||||
- containerPort: 6333
|
||||
name: http
|
||||
- containerPort: 6334
|
||||
name: grpc
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
volumeMounts:
|
||||
- name: qdrant-data
|
||||
mountPath: /qdrant/storage
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: qdrant-data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
# Kafka (KRaft mode - no Zookeeper needed)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
@@ -11,7 +11,7 @@ resources:
|
||||
- ../base
|
||||
# Add the 'ai' namespace (base only creates 'sandbox')
|
||||
- namespaces.yaml
|
||||
# Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, qdrant, dragonfly, iceberg)
|
||||
# Prod infrastructure (postgres, minio, kafka, flink, relay, ingestor, dragonfly, iceberg)
|
||||
- infrastructure.yaml
|
||||
# Sandbox namespace resources (go to sandbox namespace, not ai)
|
||||
- sandbox-config.yaml
|
||||
|
||||
@@ -19,6 +19,7 @@ stringData:
|
||||
# LLM Provider API Keys
|
||||
llm_providers:
|
||||
deepinfra_api_key: "{{ op://AI Prod/Gateway/deepinfra_api_key }}"
|
||||
anthropic_api_key: "{{ op://AI Prod/Gateway/anthropic_api_key }}"
|
||||
|
||||
# Search API Keys
|
||||
search:
|
||||
@@ -36,10 +37,6 @@ stringData:
|
||||
push:
|
||||
service_key: ""
|
||||
|
||||
# Qdrant API key (optional, for hosted Qdrant)
|
||||
qdrant:
|
||||
api_key: ""
|
||||
|
||||
# Iceberg S3 credentials (must match minio-secret)
|
||||
iceberg:
|
||||
s3_access_key: "{{ op://AI Prod/MinIO/access_key }}"
|
||||
|
||||
Reference in New Issue
Block a user