import type Redis from 'ioredis'; import type { FastifyBaseLogger } from 'fastify'; import type { BaseMessage } from '@langchain/core/messages'; import { HumanMessage, AIMessage, SystemMessage } from '@langchain/core/messages'; /** * Message record for storage */ export interface StoredMessage { id: string; userId: string; sessionId: string; role: 'user' | 'assistant' | 'system'; content: string; timestamp: number; // microseconds (Iceberg convention) metadata?: Record; } /** * Conversation store: Redis (hot) + Iceberg (cold) * * Hot path: Recent messages in Redis for fast access * Cold path: Full history in Iceberg for durability and analytics * * Architecture: * - Redis stores last N messages per session with TTL * - Iceberg stores all messages partitioned by user_id, session_id * - Supports time-travel queries for debugging and analysis */ export class ConversationStore { private readonly HOT_MESSAGE_LIMIT = 50; // Keep last 50 messages in Redis private readonly HOT_TTL_SECONDS = 3600; // 1 hour constructor( private redis: Redis, private logger: FastifyBaseLogger // TODO: Add Iceberg catalog // private iceberg: IcebergCatalog ) {} /** * Save a message to both Redis and Iceberg */ async saveMessage( userId: string, sessionId: string, role: 'user' | 'assistant' | 'system', content: string, metadata?: Record ): Promise { const message: StoredMessage = { id: `${userId}:${sessionId}:${Date.now()}`, userId, sessionId, role, content, timestamp: Date.now() * 1000, // Convert to microseconds metadata, }; this.logger.debug({ userId, sessionId, role }, 'Saving message'); // Hot: Add to Redis list (LPUSH for newest first) const key = this.getRedisKey(userId, sessionId); await this.redis.lpush(key, JSON.stringify(message)); // Trim to keep only recent messages await this.redis.ltrim(key, 0, this.HOT_MESSAGE_LIMIT - 1); // Set TTL await this.redis.expire(key, this.HOT_TTL_SECONDS); // Cold: Async append to Iceberg this.appendToIceberg(message).catch((error) => { this.logger.error({ error, userId, sessionId }, 'Failed to append message to Iceberg'); }); } /** * Get recent messages from Redis (hot path) */ async getRecentMessages( userId: string, sessionId: string, limit: number = 20 ): Promise { const key = this.getRedisKey(userId, sessionId); const messages = await this.redis.lrange(key, 0, limit - 1); return messages .map((msg) => { try { return JSON.parse(msg) as StoredMessage; } catch (error) { this.logger.error({ error, message: msg }, 'Failed to parse message'); return null; } }) .filter((msg): msg is StoredMessage => msg !== null) .reverse(); // Oldest first } /** * Get full conversation history from Iceberg (cold path) */ async getFullHistory( userId: string, sessionId: string, timeRange?: { start: number; end: number } ): Promise { this.logger.debug({ userId, sessionId, timeRange }, 'Loading full history from Iceberg'); // TODO: Implement Iceberg query // const table = this.iceberg.loadTable('gateway.conversations'); // const filters = [ // EqualTo('user_id', userId), // EqualTo('session_id', sessionId), // ]; // // if (timeRange) { // filters.push(GreaterThanOrEqual('timestamp', timeRange.start)); // filters.push(LessThanOrEqual('timestamp', timeRange.end)); // } // // const df = await table.scan({ // row_filter: And(...filters) // }).to_pandas(); // // if (!df.empty) { // return df.sort_values('timestamp').to_dict('records'); // } // Fallback to Redis if Iceberg not available return await this.getRecentMessages(userId, sessionId, 1000); } /** * Convert stored messages to LangChain message format */ toLangChainMessages(messages: StoredMessage[]): BaseMessage[] { return messages.map((msg) => { switch (msg.role) { case 'user': return new HumanMessage(msg.content); case 'assistant': return new AIMessage(msg.content); case 'system': return new SystemMessage(msg.content); default: throw new Error(`Unknown role: ${msg.role}`); } }); } /** * Delete all messages for a session (Redis only, Iceberg handled separately) */ async deleteSession(userId: string, sessionId: string): Promise { this.logger.info({ userId, sessionId }, 'Deleting session from Redis'); const key = this.getRedisKey(userId, sessionId); await this.redis.del(key); } /** * Delete all messages for a user (GDPR compliance) */ async deleteUserData(userId: string): Promise { this.logger.info({ userId }, 'Deleting all user messages for GDPR compliance'); // Delete from Redis const pattern = `conv:${userId}:*`; const keys = await this.redis.keys(pattern); if (keys.length > 0) { await this.redis.del(...keys); } // Delete from Iceberg // Note: For GDPR compliance, need to: // 1. Send delete command via Kafka OR // 2. Use Iceberg REST API to delete rows (if supported) OR // 3. Coordinate with Flink job to handle deletes // // Iceberg delete flow: // - Mark rows for deletion (equality delete files) // - Run compaction to physically remove // - Expire old snapshots this.logger.info({ userId }, 'User messages deleted from Redis - Iceberg GDPR delete not yet implemented'); } /** * Get Redis key for conversation */ private getRedisKey(userId: string, sessionId: string): string { return `conv:${userId}:${sessionId}`; } /** * Append message to Iceberg for durable storage * * Note: For production, send to Kafka topic that Flink consumes: * - Topic: gateway_conversations * - Flink job writes to gateway.conversations Iceberg table * - Ensures consistent write pattern with rest of system */ private async appendToIceberg(message: StoredMessage): Promise { // TODO: Send to Kafka topic for Flink processing // const kafkaMessage = { // id: message.id, // user_id: message.userId, // session_id: message.sessionId, // role: message.role, // content: message.content, // metadata: JSON.stringify(message.metadata || {}), // timestamp: message.timestamp, // }; // await this.kafkaProducer.send({ // topic: 'gateway_conversations', // messages: [{ value: JSON.stringify(kafkaMessage) }] // }); this.logger.debug( { messageId: message.id, userId: message.userId, sessionId: message.sessionId }, 'Message append to Iceberg (via Kafka) not yet implemented' ); } /** * Get conversation statistics */ async getStats(userId: string, sessionId: string): Promise<{ messageCount: number; firstMessage?: Date; lastMessage?: Date; }> { const key = this.getRedisKey(userId, sessionId); const count = await this.redis.llen(key); if (count === 0) { return { messageCount: 0 }; } const messages = await this.getRecentMessages(userId, sessionId, count); const timestamps = messages.map((m) => m.timestamp / 1000); // Convert to milliseconds return { messageCount: count, firstMessage: new Date(Math.min(...timestamps)), lastMessage: new Date(Math.max(...timestamps)), }; } }