feat: add @tag model override support and remove Qdrant dependencies
- Add model-tags parser for @Tag syntax in chat messages - Support Anthropic models (Sonnet, Haiku, Opus) via @tag - Remove Qdrant vector database from infrastructure and configs - Simplify license model config to use null fallbacks - Add greeting stream after model switch via @tag - Fix protobuf field names to camelCase for v7 compatibility - Add 429 rate limit retry logic with exponential backoff - Remove RAG references from agent harness documentation
This commit is contained in:
@@ -27,29 +27,22 @@ data:
|
||||
model_provider: deepinfra
|
||||
model: zai-org/GLM-5
|
||||
|
||||
# License tier model configuration
|
||||
# License tier model configuration (null = fall back to defaults.model)
|
||||
license_models:
|
||||
# Free tier models
|
||||
free:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: zai-org/GLM-5
|
||||
allowed_models:
|
||||
- zai-org/GLM-5
|
||||
default: ~
|
||||
cost_optimized: ~
|
||||
complex: ~
|
||||
|
||||
# Pro tier models
|
||||
pro:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: zai-org/GLM-5
|
||||
blocked_models:
|
||||
- Qwen/Qwen3-235B-A22B-Instruct-2507
|
||||
default: ~
|
||||
cost_optimized: ~
|
||||
complex: ~
|
||||
|
||||
# Enterprise tier models
|
||||
enterprise:
|
||||
default: zai-org/GLM-5
|
||||
cost_optimized: zai-org/GLM-5
|
||||
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
|
||||
default: ~
|
||||
cost_optimized: ~
|
||||
complex: ~
|
||||
|
||||
# Kubernetes configuration
|
||||
kubernetes:
|
||||
@@ -70,11 +63,6 @@ data:
|
||||
redis:
|
||||
url: redis://dragonfly:6379
|
||||
|
||||
# Qdrant (for RAG vector search)
|
||||
qdrant:
|
||||
url: http://qdrant:6333
|
||||
collection: gateway_memory
|
||||
|
||||
# Iceberg (for durable storage via REST catalog)
|
||||
iceberg:
|
||||
catalog_uri: http://iceberg-catalog:8181
|
||||
|
||||
@@ -45,68 +45,6 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
---
|
||||
# Qdrant (Vector database for RAG)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: qdrant
|
||||
spec:
|
||||
selector:
|
||||
app: qdrant
|
||||
ports:
|
||||
- name: http
|
||||
protocol: TCP
|
||||
port: 6333
|
||||
targetPort: 6333
|
||||
- name: grpc
|
||||
protocol: TCP
|
||||
port: 6334
|
||||
targetPort: 6334
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: qdrant
|
||||
spec:
|
||||
serviceName: qdrant
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: qdrant
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: qdrant
|
||||
spec:
|
||||
containers:
|
||||
- name: qdrant
|
||||
image: qdrant/qdrant:latest
|
||||
ports:
|
||||
- containerPort: 6333
|
||||
name: http
|
||||
- containerPort: 6334
|
||||
name: grpc
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
volumeMounts:
|
||||
- name: qdrant-data
|
||||
mountPath: /qdrant/storage
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: qdrant-data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
storageClassName: dev-ephemeral
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
# Kafka (KRaft mode - no Zookeeper needed)
|
||||
# Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because:
|
||||
# - cp-kafka's entrypoint script has issues with KRaft configuration
|
||||
|
||||
Reference in New Issue
Block a user