redesign fully scaffolded and web login works
This commit is contained in:
209
gateway/src/clients/iceberg-client.ts
Normal file
209
gateway/src/clients/iceberg-client.ts
Normal file
@@ -0,0 +1,209 @@
|
||||
import { IcebergRestCatalog } from 'iceberg-js';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
/**
|
||||
* Iceberg client configuration
|
||||
*/
|
||||
export interface IcebergConfig {
|
||||
catalogUri: string;
|
||||
namespace: string;
|
||||
s3Endpoint?: string;
|
||||
s3AccessKey?: string;
|
||||
s3SecretKey?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Message record for Iceberg storage
|
||||
*/
|
||||
export interface IcebergMessage {
|
||||
id: string;
|
||||
user_id: string;
|
||||
session_id: string;
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
metadata: string; // JSON string
|
||||
timestamp: number; // microseconds
|
||||
}
|
||||
|
||||
/**
|
||||
* Checkpoint record for Iceberg storage
|
||||
*/
|
||||
export interface IcebergCheckpoint {
|
||||
user_id: string;
|
||||
session_id: string;
|
||||
checkpoint_id: string;
|
||||
checkpoint_data: string; // JSON string
|
||||
metadata: string; // JSON string
|
||||
timestamp: number; // microseconds
|
||||
}
|
||||
|
||||
/**
|
||||
* Iceberg REST client wrapper for durable storage
|
||||
*
|
||||
* Uses Iceberg REST Catalog API to:
|
||||
* - Query conversation history from gateway.conversations
|
||||
* - Query checkpoints from gateway.checkpoints
|
||||
* - Note: Writes are handled by Flink; this is read-only
|
||||
*
|
||||
* For writes, we'll send to a Kafka topic that Flink consumes
|
||||
* (or implement direct REST catalog write if needed)
|
||||
*/
|
||||
export class IcebergClient {
|
||||
private namespace: string;
|
||||
private logger: FastifyBaseLogger;
|
||||
|
||||
constructor(config: IcebergConfig, logger: FastifyBaseLogger) {
|
||||
this.logger = logger;
|
||||
this.namespace = config.namespace;
|
||||
|
||||
// Initialize Iceberg REST client
|
||||
const clientConfig: any = {
|
||||
uri: config.catalogUri,
|
||||
};
|
||||
|
||||
if (config.s3Endpoint) {
|
||||
clientConfig.s3 = {
|
||||
endpoint: config.s3Endpoint,
|
||||
'access-key-id': config.s3AccessKey,
|
||||
'secret-access-key': config.s3SecretKey,
|
||||
'path-style-access': 'true',
|
||||
};
|
||||
}
|
||||
|
||||
// TODO: Store client for queries when needed
|
||||
new IcebergRestCatalog(clientConfig);
|
||||
|
||||
this.logger.info({
|
||||
catalogUri: config.catalogUri,
|
||||
namespace: this.namespace,
|
||||
}, 'Iceberg client initialized');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query messages from gateway.conversations table
|
||||
*
|
||||
* Note: This is a simplified interface. The actual Iceberg REST API
|
||||
* returns table metadata, and you'd need to query the underlying
|
||||
* Parquet files via S3 or use a query engine like DuckDB/Trino.
|
||||
*
|
||||
* For now, we'll document the expected schema and leave actual
|
||||
* implementation as TODO since Flink handles writes.
|
||||
*/
|
||||
async queryMessages(
|
||||
userId: string,
|
||||
sessionId: string,
|
||||
_options?: {
|
||||
startTime?: number;
|
||||
endTime?: number;
|
||||
limit?: number;
|
||||
}
|
||||
): Promise<IcebergMessage[]> {
|
||||
this.logger.debug({
|
||||
userId,
|
||||
sessionId,
|
||||
table: `${this.namespace}.conversations`,
|
||||
}, 'Querying messages from Iceberg');
|
||||
|
||||
// TODO: Implement actual Iceberg query
|
||||
// Options:
|
||||
// 1. Use iceberg-js to get table metadata and Parquet file locations
|
||||
// 2. Query Parquet files directly via S3 + parquet-wasm
|
||||
// 3. Use external query engine (DuckDB, Trino, Presto)
|
||||
// 4. Use Flink SQL REST endpoint for queries
|
||||
|
||||
this.logger.warn('Iceberg query not yet implemented - returning empty array');
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Query checkpoint from gateway.checkpoints table
|
||||
*/
|
||||
async queryCheckpoint(
|
||||
userId: string,
|
||||
sessionId: string,
|
||||
checkpointId?: string
|
||||
): Promise<IcebergCheckpoint | null> {
|
||||
this.logger.debug({
|
||||
userId,
|
||||
sessionId,
|
||||
checkpointId,
|
||||
table: `${this.namespace}.checkpoints`,
|
||||
}, 'Querying checkpoint from Iceberg');
|
||||
|
||||
// TODO: Implement actual Iceberg query
|
||||
this.logger.warn('Iceberg query not yet implemented - returning null');
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get table metadata
|
||||
*/
|
||||
async getTableMetadata(tableName: string): Promise<any> {
|
||||
try {
|
||||
const tableId = `${this.namespace}.${tableName}`;
|
||||
|
||||
// Note: iceberg-js provides catalog operations
|
||||
// For actual data queries, you'd need to:
|
||||
// 1. Get table metadata
|
||||
// 2. Find data file locations
|
||||
// 3. Query Parquet files from S3
|
||||
|
||||
this.logger.info({ table: tableId }, 'Getting table metadata');
|
||||
|
||||
// TODO: Implement table metadata query via REST API
|
||||
return null;
|
||||
} catch (error) {
|
||||
this.logger.error({ error, tableName }, 'Failed to get table metadata');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List tables in namespace
|
||||
*/
|
||||
async listTables(): Promise<string[]> {
|
||||
try {
|
||||
this.logger.info({ namespace: this.namespace }, 'Listing tables');
|
||||
|
||||
// TODO: Use iceberg-js to list tables
|
||||
// const tables = await this.client.listTables(this.namespace);
|
||||
// return tables.map(t => t.name);
|
||||
|
||||
return [];
|
||||
} catch (error) {
|
||||
this.logger.error({ error }, 'Failed to list tables');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if table exists
|
||||
*/
|
||||
async tableExists(tableName: string): Promise<boolean> {
|
||||
try {
|
||||
const tables = await this.listTables();
|
||||
return tables.includes(tableName);
|
||||
} catch (error) {
|
||||
this.logger.error({ error, tableName }, 'Failed to check table existence');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note on Iceberg Writes:
|
||||
*
|
||||
* For appending messages and checkpoints to Iceberg, we have two options:
|
||||
*
|
||||
* 1. **Via Kafka + Flink** (Recommended):
|
||||
* - Gateway writes to Kafka topics (gateway_messages, gateway_checkpoints)
|
||||
* - Flink consumes and writes to Iceberg with proper partitioning
|
||||
* - Benefits: Proven architecture, handles backpressure, deduplication
|
||||
*
|
||||
* 2. **Direct REST Catalog Write**:
|
||||
* - Use Iceberg REST API to commit new data files
|
||||
* - More complex: need to create Parquet files, upload to S3, commit transaction
|
||||
* - Library like parquet-wasm could help
|
||||
*
|
||||
* For now, recommend Option 1 (Kafka + Flink) for consistency with existing architecture.
|
||||
*/
|
||||
319
gateway/src/clients/qdrant-client.ts
Normal file
319
gateway/src/clients/qdrant-client.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
import { QdrantClient as QdrantRestClient } from '@qdrant/js-client-rest';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
/**
|
||||
* Qdrant client configuration
|
||||
*/
|
||||
export interface QdrantConfig {
|
||||
url: string;
|
||||
apiKey?: string;
|
||||
collectionName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Qdrant client wrapper for RAG vector storage
|
||||
*
|
||||
* Features:
|
||||
* - Global namespace (user_id = "0") for platform knowledge
|
||||
* - User-specific namespaces for personal memories
|
||||
* - Payload-indexed by user_id for GDPR compliance
|
||||
* - Cosine similarity search
|
||||
*/
|
||||
export class QdrantClient {
|
||||
private client: QdrantRestClient;
|
||||
private collectionName: string;
|
||||
private vectorDimension: number;
|
||||
private logger: FastifyBaseLogger;
|
||||
|
||||
constructor(config: QdrantConfig, logger: FastifyBaseLogger, vectorDimension: number = 1536) {
|
||||
this.logger = logger;
|
||||
this.collectionName = config.collectionName || 'gateway_memory';
|
||||
this.vectorDimension = vectorDimension;
|
||||
|
||||
// Initialize Qdrant REST client
|
||||
this.client = new QdrantRestClient({
|
||||
url: config.url,
|
||||
apiKey: config.apiKey,
|
||||
});
|
||||
|
||||
this.logger.info({
|
||||
url: config.url,
|
||||
collection: this.collectionName,
|
||||
vectorDimension,
|
||||
}, 'Qdrant client initialized');
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize collection with proper schema and indexes
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
this.logger.info({ collection: this.collectionName }, 'Initializing Qdrant collection');
|
||||
|
||||
try {
|
||||
// Check if collection exists
|
||||
const collections = await this.client.getCollections();
|
||||
const exists = collections.collections.some(c => c.name === this.collectionName);
|
||||
|
||||
if (!exists) {
|
||||
this.logger.info({ collection: this.collectionName }, 'Creating new collection');
|
||||
|
||||
// Create collection with vector configuration
|
||||
await this.client.createCollection(this.collectionName, {
|
||||
vectors: {
|
||||
size: this.vectorDimension,
|
||||
distance: 'Cosine',
|
||||
},
|
||||
});
|
||||
|
||||
// Create payload indexes for efficient filtering
|
||||
await this.client.createPayloadIndex(this.collectionName, {
|
||||
field_name: 'user_id',
|
||||
field_schema: 'keyword',
|
||||
});
|
||||
|
||||
await this.client.createPayloadIndex(this.collectionName, {
|
||||
field_name: 'session_id',
|
||||
field_schema: 'keyword',
|
||||
});
|
||||
|
||||
await this.client.createPayloadIndex(this.collectionName, {
|
||||
field_name: 'timestamp',
|
||||
field_schema: 'integer',
|
||||
});
|
||||
|
||||
this.logger.info({ collection: this.collectionName }, 'Collection created successfully');
|
||||
} else {
|
||||
this.logger.info({ collection: this.collectionName }, 'Collection already exists');
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error({ error, collection: this.collectionName }, 'Failed to initialize collection');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a vector point with payload
|
||||
*/
|
||||
async upsertPoint(
|
||||
id: string,
|
||||
vector: number[],
|
||||
payload: Record<string, any>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await this.client.upsert(this.collectionName, {
|
||||
wait: true,
|
||||
points: [{
|
||||
id,
|
||||
vector,
|
||||
payload,
|
||||
}],
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error({ error, id }, 'Failed to upsert point');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar vectors
|
||||
* Queries both global (user_id="0") and user-specific vectors
|
||||
*/
|
||||
async search(
|
||||
userId: string,
|
||||
queryVector: number[],
|
||||
options?: {
|
||||
limit?: number;
|
||||
scoreThreshold?: number;
|
||||
sessionId?: string;
|
||||
timeRange?: { start: number; end: number };
|
||||
}
|
||||
): Promise<Array<{
|
||||
id: string;
|
||||
score: number;
|
||||
payload: Record<string, any>;
|
||||
}>> {
|
||||
const limit = options?.limit || 5;
|
||||
const scoreThreshold = options?.scoreThreshold || 0.7;
|
||||
|
||||
try {
|
||||
// Build filter: (user_id = userId OR user_id = "0") AND other conditions
|
||||
const mustConditions: any[] = [];
|
||||
const shouldConditions: any[] = [
|
||||
{ key: 'user_id', match: { value: userId } },
|
||||
{ key: 'user_id', match: { value: '0' } }, // Global namespace
|
||||
];
|
||||
|
||||
// Add session filter if provided
|
||||
if (options?.sessionId) {
|
||||
mustConditions.push({
|
||||
key: 'session_id',
|
||||
match: { value: options.sessionId },
|
||||
});
|
||||
}
|
||||
|
||||
// Add time range filter if provided
|
||||
if (options?.timeRange) {
|
||||
mustConditions.push({
|
||||
key: 'timestamp',
|
||||
range: {
|
||||
gte: options.timeRange.start,
|
||||
lte: options.timeRange.end,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Perform search
|
||||
const results = await this.client.search(this.collectionName, {
|
||||
vector: queryVector,
|
||||
filter: {
|
||||
must: mustConditions.length > 0 ? mustConditions : undefined,
|
||||
should: shouldConditions,
|
||||
},
|
||||
limit,
|
||||
score_threshold: scoreThreshold,
|
||||
with_payload: true,
|
||||
});
|
||||
|
||||
return results.map(r => ({
|
||||
id: r.id as string,
|
||||
score: r.score,
|
||||
payload: r.payload || {},
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error({ error, userId }, 'Search failed');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get points by filter (without vector search)
|
||||
*/
|
||||
async scroll(
|
||||
userId: string,
|
||||
options?: {
|
||||
limit?: number;
|
||||
sessionId?: string;
|
||||
offset?: string;
|
||||
}
|
||||
): Promise<{
|
||||
points: Array<{ id: string; payload: Record<string, any> }>;
|
||||
nextOffset?: string;
|
||||
}> {
|
||||
try {
|
||||
const filter: any = {
|
||||
must: [
|
||||
{ key: 'user_id', match: { value: userId } },
|
||||
],
|
||||
};
|
||||
|
||||
if (options?.sessionId) {
|
||||
filter.must.push({
|
||||
key: 'session_id',
|
||||
match: { value: options.sessionId },
|
||||
});
|
||||
}
|
||||
|
||||
const result = await this.client.scroll(this.collectionName, {
|
||||
filter,
|
||||
limit: options?.limit || 10,
|
||||
offset: options?.offset,
|
||||
with_payload: true,
|
||||
with_vector: false,
|
||||
});
|
||||
|
||||
return {
|
||||
points: result.points.map(p => ({
|
||||
id: p.id as string,
|
||||
payload: p.payload || {},
|
||||
})),
|
||||
nextOffset: result.next_page_offset as string | undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error({ error, userId }, 'Scroll failed');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all points for a user (GDPR compliance)
|
||||
*/
|
||||
async deleteUserData(userId: string): Promise<void> {
|
||||
this.logger.info({ userId }, 'Deleting user vectors for GDPR compliance');
|
||||
|
||||
try {
|
||||
await this.client.delete(this.collectionName, {
|
||||
wait: true,
|
||||
filter: {
|
||||
must: [
|
||||
{ key: 'user_id', match: { value: userId } },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.info({ userId }, 'User vectors deleted');
|
||||
} catch (error) {
|
||||
this.logger.error({ error, userId }, 'Failed to delete user data');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete points for a specific session
|
||||
*/
|
||||
async deleteSession(userId: string, sessionId: string): Promise<void> {
|
||||
this.logger.info({ userId, sessionId }, 'Deleting session vectors');
|
||||
|
||||
try {
|
||||
await this.client.delete(this.collectionName, {
|
||||
wait: true,
|
||||
filter: {
|
||||
must: [
|
||||
{ key: 'user_id', match: { value: userId } },
|
||||
{ key: 'session_id', match: { value: sessionId } },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.info({ userId, sessionId }, 'Session vectors deleted');
|
||||
} catch (error) {
|
||||
this.logger.error({ error, userId, sessionId }, 'Failed to delete session');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get collection info and statistics
|
||||
*/
|
||||
async getCollectionInfo(): Promise<{
|
||||
vectorsCount: number;
|
||||
indexedVectorsCount: number;
|
||||
pointsCount: number;
|
||||
}> {
|
||||
try {
|
||||
const info = await this.client.getCollection(this.collectionName);
|
||||
|
||||
return {
|
||||
vectorsCount: (info as any).vectors_count || 0,
|
||||
indexedVectorsCount: info.indexed_vectors_count || 0,
|
||||
pointsCount: info.points_count || 0,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error({ error }, 'Failed to get collection info');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store global platform knowledge (user_id = "0")
|
||||
*/
|
||||
async storeGlobalKnowledge(
|
||||
id: string,
|
||||
vector: number[],
|
||||
payload: Omit<Record<string, any>, 'user_id'>
|
||||
): Promise<void> {
|
||||
return this.upsertPoint(id, vector, {
|
||||
...payload,
|
||||
user_id: '0', // Global namespace
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user