redesign fully scaffolded and web login works

This commit is contained in:
2026-03-17 20:10:47 -04:00
parent b9cc397e05
commit f6bd22a8ef
143 changed files with 17317 additions and 693 deletions

View File

@@ -0,0 +1,209 @@
import { IcebergRestCatalog } from 'iceberg-js';
import type { FastifyBaseLogger } from 'fastify';
/**
* Iceberg client configuration
*/
export interface IcebergConfig {
catalogUri: string;
namespace: string;
s3Endpoint?: string;
s3AccessKey?: string;
s3SecretKey?: string;
}
/**
* Message record for Iceberg storage
*/
export interface IcebergMessage {
id: string;
user_id: string;
session_id: string;
role: 'user' | 'assistant' | 'system';
content: string;
metadata: string; // JSON string
timestamp: number; // microseconds
}
/**
* Checkpoint record for Iceberg storage
*/
export interface IcebergCheckpoint {
user_id: string;
session_id: string;
checkpoint_id: string;
checkpoint_data: string; // JSON string
metadata: string; // JSON string
timestamp: number; // microseconds
}
/**
* Iceberg REST client wrapper for durable storage
*
* Uses Iceberg REST Catalog API to:
* - Query conversation history from gateway.conversations
* - Query checkpoints from gateway.checkpoints
* - Note: Writes are handled by Flink; this is read-only
*
* For writes, we'll send to a Kafka topic that Flink consumes
* (or implement direct REST catalog write if needed)
*/
export class IcebergClient {
private namespace: string;
private logger: FastifyBaseLogger;
constructor(config: IcebergConfig, logger: FastifyBaseLogger) {
this.logger = logger;
this.namespace = config.namespace;
// Initialize Iceberg REST client
const clientConfig: any = {
uri: config.catalogUri,
};
if (config.s3Endpoint) {
clientConfig.s3 = {
endpoint: config.s3Endpoint,
'access-key-id': config.s3AccessKey,
'secret-access-key': config.s3SecretKey,
'path-style-access': 'true',
};
}
// TODO: Store client for queries when needed
new IcebergRestCatalog(clientConfig);
this.logger.info({
catalogUri: config.catalogUri,
namespace: this.namespace,
}, 'Iceberg client initialized');
}
/**
* Query messages from gateway.conversations table
*
* Note: This is a simplified interface. The actual Iceberg REST API
* returns table metadata, and you'd need to query the underlying
* Parquet files via S3 or use a query engine like DuckDB/Trino.
*
* For now, we'll document the expected schema and leave actual
* implementation as TODO since Flink handles writes.
*/
async queryMessages(
userId: string,
sessionId: string,
_options?: {
startTime?: number;
endTime?: number;
limit?: number;
}
): Promise<IcebergMessage[]> {
this.logger.debug({
userId,
sessionId,
table: `${this.namespace}.conversations`,
}, 'Querying messages from Iceberg');
// TODO: Implement actual Iceberg query
// Options:
// 1. Use iceberg-js to get table metadata and Parquet file locations
// 2. Query Parquet files directly via S3 + parquet-wasm
// 3. Use external query engine (DuckDB, Trino, Presto)
// 4. Use Flink SQL REST endpoint for queries
this.logger.warn('Iceberg query not yet implemented - returning empty array');
return [];
}
/**
* Query checkpoint from gateway.checkpoints table
*/
async queryCheckpoint(
userId: string,
sessionId: string,
checkpointId?: string
): Promise<IcebergCheckpoint | null> {
this.logger.debug({
userId,
sessionId,
checkpointId,
table: `${this.namespace}.checkpoints`,
}, 'Querying checkpoint from Iceberg');
// TODO: Implement actual Iceberg query
this.logger.warn('Iceberg query not yet implemented - returning null');
return null;
}
/**
* Get table metadata
*/
async getTableMetadata(tableName: string): Promise<any> {
try {
const tableId = `${this.namespace}.${tableName}`;
// Note: iceberg-js provides catalog operations
// For actual data queries, you'd need to:
// 1. Get table metadata
// 2. Find data file locations
// 3. Query Parquet files from S3
this.logger.info({ table: tableId }, 'Getting table metadata');
// TODO: Implement table metadata query via REST API
return null;
} catch (error) {
this.logger.error({ error, tableName }, 'Failed to get table metadata');
throw error;
}
}
/**
* List tables in namespace
*/
async listTables(): Promise<string[]> {
try {
this.logger.info({ namespace: this.namespace }, 'Listing tables');
// TODO: Use iceberg-js to list tables
// const tables = await this.client.listTables(this.namespace);
// return tables.map(t => t.name);
return [];
} catch (error) {
this.logger.error({ error }, 'Failed to list tables');
throw error;
}
}
/**
* Check if table exists
*/
async tableExists(tableName: string): Promise<boolean> {
try {
const tables = await this.listTables();
return tables.includes(tableName);
} catch (error) {
this.logger.error({ error, tableName }, 'Failed to check table existence');
return false;
}
}
}
/**
* Note on Iceberg Writes:
*
* For appending messages and checkpoints to Iceberg, we have two options:
*
* 1. **Via Kafka + Flink** (Recommended):
* - Gateway writes to Kafka topics (gateway_messages, gateway_checkpoints)
* - Flink consumes and writes to Iceberg with proper partitioning
* - Benefits: Proven architecture, handles backpressure, deduplication
*
* 2. **Direct REST Catalog Write**:
* - Use Iceberg REST API to commit new data files
* - More complex: need to create Parquet files, upload to S3, commit transaction
* - Library like parquet-wasm could help
*
* For now, recommend Option 1 (Kafka + Flink) for consistency with existing architecture.
*/

View File

@@ -0,0 +1,319 @@
import { QdrantClient as QdrantRestClient } from '@qdrant/js-client-rest';
import type { FastifyBaseLogger } from 'fastify';
/**
* Qdrant client configuration
*/
export interface QdrantConfig {
url: string;
apiKey?: string;
collectionName?: string;
}
/**
* Qdrant client wrapper for RAG vector storage
*
* Features:
* - Global namespace (user_id = "0") for platform knowledge
* - User-specific namespaces for personal memories
* - Payload-indexed by user_id for GDPR compliance
* - Cosine similarity search
*/
export class QdrantClient {
private client: QdrantRestClient;
private collectionName: string;
private vectorDimension: number;
private logger: FastifyBaseLogger;
constructor(config: QdrantConfig, logger: FastifyBaseLogger, vectorDimension: number = 1536) {
this.logger = logger;
this.collectionName = config.collectionName || 'gateway_memory';
this.vectorDimension = vectorDimension;
// Initialize Qdrant REST client
this.client = new QdrantRestClient({
url: config.url,
apiKey: config.apiKey,
});
this.logger.info({
url: config.url,
collection: this.collectionName,
vectorDimension,
}, 'Qdrant client initialized');
}
/**
* Initialize collection with proper schema and indexes
*/
async initialize(): Promise<void> {
this.logger.info({ collection: this.collectionName }, 'Initializing Qdrant collection');
try {
// Check if collection exists
const collections = await this.client.getCollections();
const exists = collections.collections.some(c => c.name === this.collectionName);
if (!exists) {
this.logger.info({ collection: this.collectionName }, 'Creating new collection');
// Create collection with vector configuration
await this.client.createCollection(this.collectionName, {
vectors: {
size: this.vectorDimension,
distance: 'Cosine',
},
});
// Create payload indexes for efficient filtering
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'user_id',
field_schema: 'keyword',
});
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'session_id',
field_schema: 'keyword',
});
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'timestamp',
field_schema: 'integer',
});
this.logger.info({ collection: this.collectionName }, 'Collection created successfully');
} else {
this.logger.info({ collection: this.collectionName }, 'Collection already exists');
}
} catch (error) {
this.logger.error({ error, collection: this.collectionName }, 'Failed to initialize collection');
throw error;
}
}
/**
* Store a vector point with payload
*/
async upsertPoint(
id: string,
vector: number[],
payload: Record<string, any>
): Promise<void> {
try {
await this.client.upsert(this.collectionName, {
wait: true,
points: [{
id,
vector,
payload,
}],
});
} catch (error) {
this.logger.error({ error, id }, 'Failed to upsert point');
throw error;
}
}
/**
* Search for similar vectors
* Queries both global (user_id="0") and user-specific vectors
*/
async search(
userId: string,
queryVector: number[],
options?: {
limit?: number;
scoreThreshold?: number;
sessionId?: string;
timeRange?: { start: number; end: number };
}
): Promise<Array<{
id: string;
score: number;
payload: Record<string, any>;
}>> {
const limit = options?.limit || 5;
const scoreThreshold = options?.scoreThreshold || 0.7;
try {
// Build filter: (user_id = userId OR user_id = "0") AND other conditions
const mustConditions: any[] = [];
const shouldConditions: any[] = [
{ key: 'user_id', match: { value: userId } },
{ key: 'user_id', match: { value: '0' } }, // Global namespace
];
// Add session filter if provided
if (options?.sessionId) {
mustConditions.push({
key: 'session_id',
match: { value: options.sessionId },
});
}
// Add time range filter if provided
if (options?.timeRange) {
mustConditions.push({
key: 'timestamp',
range: {
gte: options.timeRange.start,
lte: options.timeRange.end,
},
});
}
// Perform search
const results = await this.client.search(this.collectionName, {
vector: queryVector,
filter: {
must: mustConditions.length > 0 ? mustConditions : undefined,
should: shouldConditions,
},
limit,
score_threshold: scoreThreshold,
with_payload: true,
});
return results.map(r => ({
id: r.id as string,
score: r.score,
payload: r.payload || {},
}));
} catch (error) {
this.logger.error({ error, userId }, 'Search failed');
throw error;
}
}
/**
* Get points by filter (without vector search)
*/
async scroll(
userId: string,
options?: {
limit?: number;
sessionId?: string;
offset?: string;
}
): Promise<{
points: Array<{ id: string; payload: Record<string, any> }>;
nextOffset?: string;
}> {
try {
const filter: any = {
must: [
{ key: 'user_id', match: { value: userId } },
],
};
if (options?.sessionId) {
filter.must.push({
key: 'session_id',
match: { value: options.sessionId },
});
}
const result = await this.client.scroll(this.collectionName, {
filter,
limit: options?.limit || 10,
offset: options?.offset,
with_payload: true,
with_vector: false,
});
return {
points: result.points.map(p => ({
id: p.id as string,
payload: p.payload || {},
})),
nextOffset: result.next_page_offset as string | undefined,
};
} catch (error) {
this.logger.error({ error, userId }, 'Scroll failed');
throw error;
}
}
/**
* Delete all points for a user (GDPR compliance)
*/
async deleteUserData(userId: string): Promise<void> {
this.logger.info({ userId }, 'Deleting user vectors for GDPR compliance');
try {
await this.client.delete(this.collectionName, {
wait: true,
filter: {
must: [
{ key: 'user_id', match: { value: userId } },
],
},
});
this.logger.info({ userId }, 'User vectors deleted');
} catch (error) {
this.logger.error({ error, userId }, 'Failed to delete user data');
throw error;
}
}
/**
* Delete points for a specific session
*/
async deleteSession(userId: string, sessionId: string): Promise<void> {
this.logger.info({ userId, sessionId }, 'Deleting session vectors');
try {
await this.client.delete(this.collectionName, {
wait: true,
filter: {
must: [
{ key: 'user_id', match: { value: userId } },
{ key: 'session_id', match: { value: sessionId } },
],
},
});
this.logger.info({ userId, sessionId }, 'Session vectors deleted');
} catch (error) {
this.logger.error({ error, userId, sessionId }, 'Failed to delete session');
throw error;
}
}
/**
* Get collection info and statistics
*/
async getCollectionInfo(): Promise<{
vectorsCount: number;
indexedVectorsCount: number;
pointsCount: number;
}> {
try {
const info = await this.client.getCollection(this.collectionName);
return {
vectorsCount: (info as any).vectors_count || 0,
indexedVectorsCount: info.indexed_vectors_count || 0,
pointsCount: info.points_count || 0,
};
} catch (error) {
this.logger.error({ error }, 'Failed to get collection info');
throw error;
}
}
/**
* Store global platform knowledge (user_id = "0")
*/
async storeGlobalKnowledge(
id: string,
vector: number[],
payload: Omit<Record<string, any>, 'user_id'>
): Promise<void> {
return this.upsertPoint(id, vector, {
...payload,
user_id: '0', // Global namespace
});
}
}