feat: add @tag model override support and remove Qdrant dependencies

- Add model-tags parser for @Tag syntax in chat messages
- Support Anthropic models (Sonnet, Haiku, Opus) via @tag
- Remove Qdrant vector database from infrastructure and configs
- Simplify license model config to use null fallbacks
- Add greeting stream after model switch via @tag
- Fix protobuf field names to camelCase for v7 compatibility
- Add 429 rate limit retry logic with exponential backoff
- Remove RAG references from agent harness documentation
This commit is contained in:
2026-04-27 20:55:18 -04:00
parent 6f937f9e5e
commit d41fcd0499
50 changed files with 956 additions and 798 deletions

View File

@@ -27,29 +27,22 @@ data:
model_provider: deepinfra
model: zai-org/GLM-5
# License tier model configuration
# License tier model configuration (null = fall back to defaults.model)
license_models:
# Free tier models
free:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
allowed_models:
- zai-org/GLM-5
default: ~
cost_optimized: ~
complex: ~
# Pro tier models
pro:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: zai-org/GLM-5
blocked_models:
- Qwen/Qwen3-235B-A22B-Instruct-2507
default: ~
cost_optimized: ~
complex: ~
# Enterprise tier models
enterprise:
default: zai-org/GLM-5
cost_optimized: zai-org/GLM-5
complex: Qwen/Qwen3-235B-A22B-Instruct-2507
default: ~
cost_optimized: ~
complex: ~
# Kubernetes configuration
kubernetes:
@@ -70,11 +63,6 @@ data:
redis:
url: redis://dragonfly:6379
# Qdrant (for RAG vector search)
qdrant:
url: http://qdrant:6333
collection: gateway_memory
# Iceberg (for durable storage via REST catalog)
iceberg:
catalog_uri: http://iceberg-catalog:8181