253 lines
7.5 KiB
TypeScript
253 lines
7.5 KiB
TypeScript
import type Redis from 'ioredis';
|
|
import type { FastifyBaseLogger } from 'fastify';
|
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
import { HumanMessage, AIMessage, SystemMessage } from '@langchain/core/messages';
|
|
|
|
/**
|
|
* Message record for storage
|
|
*/
|
|
export interface StoredMessage {
|
|
id: string;
|
|
userId: string;
|
|
sessionId: string;
|
|
role: 'user' | 'assistant' | 'system';
|
|
content: string;
|
|
timestamp: number; // microseconds (Iceberg convention)
|
|
metadata?: Record<string, unknown>;
|
|
}
|
|
|
|
/**
|
|
* Conversation store: Redis (hot) + Iceberg (cold)
|
|
*
|
|
* Hot path: Recent messages in Redis for fast access
|
|
* Cold path: Full history in Iceberg for durability and analytics
|
|
*
|
|
* Architecture:
|
|
* - Redis stores last N messages per session with TTL
|
|
* - Iceberg stores all messages partitioned by user_id, session_id
|
|
* - Supports time-travel queries for debugging and analysis
|
|
*/
|
|
export class ConversationStore {
|
|
private readonly HOT_MESSAGE_LIMIT = 50; // Keep last 50 messages in Redis
|
|
private readonly HOT_TTL_SECONDS = 3600; // 1 hour
|
|
|
|
constructor(
|
|
private redis: Redis,
|
|
private logger: FastifyBaseLogger
|
|
// TODO: Add Iceberg catalog
|
|
// private iceberg: IcebergCatalog
|
|
) {}
|
|
|
|
/**
|
|
* Save a message to both Redis and Iceberg
|
|
*/
|
|
async saveMessage(
|
|
userId: string,
|
|
sessionId: string,
|
|
role: 'user' | 'assistant' | 'system',
|
|
content: string,
|
|
metadata?: Record<string, unknown>
|
|
): Promise<void> {
|
|
const message: StoredMessage = {
|
|
id: `${userId}:${sessionId}:${Date.now()}`,
|
|
userId,
|
|
sessionId,
|
|
role,
|
|
content,
|
|
timestamp: Date.now() * 1000, // Convert to microseconds
|
|
metadata,
|
|
};
|
|
|
|
this.logger.debug({ userId, sessionId, role }, 'Saving message');
|
|
|
|
// Hot: Add to Redis list (LPUSH for newest first)
|
|
const key = this.getRedisKey(userId, sessionId);
|
|
await this.redis.lpush(key, JSON.stringify(message));
|
|
|
|
// Trim to keep only recent messages
|
|
await this.redis.ltrim(key, 0, this.HOT_MESSAGE_LIMIT - 1);
|
|
|
|
// Set TTL
|
|
await this.redis.expire(key, this.HOT_TTL_SECONDS);
|
|
|
|
// Cold: Async append to Iceberg
|
|
this.appendToIceberg(message).catch((error) => {
|
|
this.logger.error({ error, userId, sessionId }, 'Failed to append message to Iceberg');
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get recent messages from Redis (hot path)
|
|
*/
|
|
async getRecentMessages(
|
|
userId: string,
|
|
sessionId: string,
|
|
limit: number = 20
|
|
): Promise<StoredMessage[]> {
|
|
const key = this.getRedisKey(userId, sessionId);
|
|
const messages = await this.redis.lrange(key, 0, limit - 1);
|
|
|
|
return messages
|
|
.map((msg) => {
|
|
try {
|
|
return JSON.parse(msg) as StoredMessage;
|
|
} catch (error) {
|
|
this.logger.error({ error, message: msg }, 'Failed to parse message');
|
|
return null;
|
|
}
|
|
})
|
|
.filter((msg): msg is StoredMessage => msg !== null)
|
|
.reverse(); // Oldest first
|
|
}
|
|
|
|
/**
|
|
* Get full conversation history from Iceberg (cold path)
|
|
*/
|
|
async getFullHistory(
|
|
userId: string,
|
|
sessionId: string,
|
|
timeRange?: { start: number; end: number }
|
|
): Promise<StoredMessage[]> {
|
|
this.logger.debug({ userId, sessionId, timeRange }, 'Loading full history from Iceberg');
|
|
|
|
// TODO: Implement Iceberg query
|
|
// const table = this.iceberg.loadTable('gateway.conversations');
|
|
// const filters = [
|
|
// EqualTo('user_id', userId),
|
|
// EqualTo('session_id', sessionId),
|
|
// ];
|
|
//
|
|
// if (timeRange) {
|
|
// filters.push(GreaterThanOrEqual('timestamp', timeRange.start));
|
|
// filters.push(LessThanOrEqual('timestamp', timeRange.end));
|
|
// }
|
|
//
|
|
// const df = await table.scan({
|
|
// row_filter: And(...filters)
|
|
// }).to_pandas();
|
|
//
|
|
// if (!df.empty) {
|
|
// return df.sort_values('timestamp').to_dict('records');
|
|
// }
|
|
|
|
// Fallback to Redis if Iceberg not available
|
|
return await this.getRecentMessages(userId, sessionId, 1000);
|
|
}
|
|
|
|
/**
|
|
* Convert stored messages to LangChain message format
|
|
*/
|
|
toLangChainMessages(messages: StoredMessage[]): BaseMessage[] {
|
|
return messages.map((msg) => {
|
|
switch (msg.role) {
|
|
case 'user':
|
|
return new HumanMessage(msg.content);
|
|
case 'assistant':
|
|
return new AIMessage(msg.content);
|
|
case 'system':
|
|
return new SystemMessage(msg.content);
|
|
default:
|
|
throw new Error(`Unknown role: ${msg.role}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Delete all messages for a session (Redis only, Iceberg handled separately)
|
|
*/
|
|
async deleteSession(userId: string, sessionId: string): Promise<void> {
|
|
this.logger.info({ userId, sessionId }, 'Deleting session from Redis');
|
|
const key = this.getRedisKey(userId, sessionId);
|
|
await this.redis.del(key);
|
|
}
|
|
|
|
/**
|
|
* Delete all messages for a user (GDPR compliance)
|
|
*/
|
|
async deleteUserData(userId: string): Promise<void> {
|
|
this.logger.info({ userId }, 'Deleting all user messages for GDPR compliance');
|
|
|
|
// Delete from Redis
|
|
const pattern = `conv:${userId}:*`;
|
|
const keys = await this.redis.keys(pattern);
|
|
if (keys.length > 0) {
|
|
await this.redis.del(...keys);
|
|
}
|
|
|
|
// Delete from Iceberg
|
|
// Note: For GDPR compliance, need to:
|
|
// 1. Send delete command via Kafka OR
|
|
// 2. Use Iceberg REST API to delete rows (if supported) OR
|
|
// 3. Coordinate with Flink job to handle deletes
|
|
//
|
|
// Iceberg delete flow:
|
|
// - Mark rows for deletion (equality delete files)
|
|
// - Run compaction to physically remove
|
|
// - Expire old snapshots
|
|
|
|
this.logger.info({ userId }, 'User messages deleted from Redis - Iceberg GDPR delete not yet implemented');
|
|
}
|
|
|
|
/**
|
|
* Get Redis key for conversation
|
|
*/
|
|
private getRedisKey(userId: string, sessionId: string): string {
|
|
return `conv:${userId}:${sessionId}`;
|
|
}
|
|
|
|
/**
|
|
* Append message to Iceberg for durable storage
|
|
*
|
|
* Note: For production, send to Kafka topic that Flink consumes:
|
|
* - Topic: gateway_conversations
|
|
* - Flink job writes to gateway.conversations Iceberg table
|
|
* - Ensures consistent write pattern with rest of system
|
|
*/
|
|
private async appendToIceberg(message: StoredMessage): Promise<void> {
|
|
// TODO: Send to Kafka topic for Flink processing
|
|
// const kafkaMessage = {
|
|
// id: message.id,
|
|
// user_id: message.userId,
|
|
// session_id: message.sessionId,
|
|
// role: message.role,
|
|
// content: message.content,
|
|
// metadata: JSON.stringify(message.metadata || {}),
|
|
// timestamp: message.timestamp,
|
|
// };
|
|
// await this.kafkaProducer.send({
|
|
// topic: 'gateway_conversations',
|
|
// messages: [{ value: JSON.stringify(kafkaMessage) }]
|
|
// });
|
|
|
|
this.logger.debug(
|
|
{ messageId: message.id, userId: message.userId, sessionId: message.sessionId },
|
|
'Message append to Iceberg (via Kafka) not yet implemented'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get conversation statistics
|
|
*/
|
|
async getStats(userId: string, sessionId: string): Promise<{
|
|
messageCount: number;
|
|
firstMessage?: Date;
|
|
lastMessage?: Date;
|
|
}> {
|
|
const key = this.getRedisKey(userId, sessionId);
|
|
const count = await this.redis.llen(key);
|
|
|
|
if (count === 0) {
|
|
return { messageCount: 0 };
|
|
}
|
|
|
|
const messages = await this.getRecentMessages(userId, sessionId, count);
|
|
const timestamps = messages.map((m) => m.timestamp / 1000); // Convert to milliseconds
|
|
|
|
return {
|
|
messageCount: count,
|
|
firstMessage: new Date(Math.min(...timestamps)),
|
|
lastMessage: new Date(Math.max(...timestamps)),
|
|
};
|
|
}
|
|
}
|