feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages - Support Anthropic models (Sonnet, Haiku, Opus) via @tag - Remove Qdrant vector database from infrastructure and configs - Simplify license model config to use null fallbacks - Add greeting stream after model switch via @tag - Fix protobuf field names to camelCase for v7 compatibility - Add 429 rate limit retry logic with exponential backoff - Remove RAG references from agent harness documentation
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions
--- a/deploy/k8s/dev/configs/gateway-config.yaml.tpl
+++ b/deploy/k8s/dev/configs/gateway-config.yaml.tpl
@@ -27,29 +27,22 @@ data:
      model_provider: deepinfra
      model: zai-org/GLM-5

-    # License tier model configuration
+    # License tier model configuration (null = fall back to defaults.model)
    license_models:
-      # Free tier models
      free:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: zai-org/GLM-5
-        allowed_models:
-          - zai-org/GLM-5
+        default: ~
+        cost_optimized: ~
+        complex: ~

-      # Pro tier models
      pro:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: zai-org/GLM-5
-        blocked_models:
-          - Qwen/Qwen3-235B-A22B-Instruct-2507
+        default: ~
+        cost_optimized: ~
+        complex: ~

-      # Enterprise tier models
      enterprise:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: Qwen/Qwen3-235B-A22B-Instruct-2507
+        default: ~
+        cost_optimized: ~
+        complex: ~

    # Kubernetes configuration
    kubernetes:
@@ -70,11 +63,6 @@ data:
    redis:
      url: redis://dragonfly:6379

-    # Qdrant (for RAG vector search)
-    qdrant:
-      url: http://qdrant:6333
-      collection: gateway_memory
-
    # Iceberg (for durable storage via REST catalog)
    iceberg:
      catalog_uri: http://iceberg-catalog:8181
--- a/deploy/k8s/dev/infrastructure.yaml
+++ b/deploy/k8s/dev/infrastructure.yaml
@@ -45,68 +45,6 @@ spec:
            memory: "512Mi"
            cpu: "500m"
 ---
-# Qdrant (Vector database for RAG)
-apiVersion: v1
-kind: Service
-metadata:
-  name: qdrant
-spec:
-  selector:
-    app: qdrant
-  ports:
-    - name: http
-      protocol: TCP
-      port: 6333
-      targetPort: 6333
-    - name: grpc
-      protocol: TCP
-      port: 6334
-      targetPort: 6334
-  type: ClusterIP
---
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: qdrant
-spec:
-  serviceName: qdrant
-  replicas: 1
-  selector:
-    matchLabels:
-      app: qdrant
-  template:
-    metadata:
-      labels:
-        app: qdrant
-    spec:
-      containers:
-      - name: qdrant
-        image: qdrant/qdrant:latest
-        ports:
-        - containerPort: 6333
-          name: http
-        - containerPort: 6334
-          name: grpc
-        resources:
-          requests:
-            memory: "512Mi"
-            cpu: "200m"
-          limits:
-            memory: "1Gi"
-            cpu: "1000m"
-        volumeMounts:
-        - name: qdrant-data
-          mountPath: /qdrant/storage
-  volumeClaimTemplates:
-  - metadata:
-      name: qdrant-data
-    spec:
-      accessModes: ["ReadWriteOnce"]
-      storageClassName: dev-ephemeral
-      resources:
-        requests:
-          storage: 10Gi
---
 # Kafka (KRaft mode - no Zookeeper needed)
 # Using apache/kafka:3.9.0 instead of confluentinc/cp-kafka because:
 # - cp-kafka's entrypoint script has issues with KRaft configuration