feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages - Support Anthropic models (Sonnet, Haiku, Opus) via @tag - Remove Qdrant vector database from infrastructure and configs - Simplify license model config to use null fallbacks - Add greeting stream after model switch via @tag - Fix protobuf field names to camelCase for v7 compatibility - Add 429 rate limit retry logic with exponential backoff - Remove RAG references from agent harness documentation
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions
--- a/deploy/k8s/prod/configs/gateway-config.yaml
+++ b/deploy/k8s/prod/configs/gateway-config.yaml
@@ -21,30 +21,6 @@ data:
      model_provider: deepinfra
      model: zai-org/GLM-5

-    # License tier model configuration
-    license_models:
-      # Free tier models
-      free:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: zai-org/GLM-5
-        allowed_models:
-          - zai-org/GLM-5
-
-      # Pro tier models
-      pro:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: zai-org/GLM-5
-        blocked_models:
-          - Qwen/Qwen3-235B-A22B-Instruct-2507
-
-      # Enterprise tier models
-      enterprise:
-        default: zai-org/GLM-5
-        cost_optimized: zai-org/GLM-5
-        complex: Qwen/Qwen3-235B-A22B-Instruct-2507
-
    # Kubernetes configuration
    kubernetes:
      namespace: sandbox
@@ -59,11 +35,6 @@ data:
    redis:
      url: redis://dragonfly:6379

-    # Qdrant (for RAG vector search)
-    qdrant:
-      url: http://qdrant:6333
-      collection: gateway_memory
-
    # Agent configuration
    agent:
      # Number of prior conversation turns loaded as LLM context and flushed to Iceberg at session end