Project: AI Agent Long-Term Chat Memory Store
AI Large Language Models (LLMs) have limited context windows. To implement persistent conversation recall, we can store historical dialogues in Pinecone, using user namespaces to isolate chats and metadata to filter messages.
1. System Architecture
- User input: User sends a new message to the chat interface.
- Retrieve context: The system converts the new message into a vector, and queries the user private namespace in Pinecone for the top 3 most relevant historical conversation pairs.
- Assemble context: The system appends these historical matches to the LLM prompt.
- Save dialog: The system generates vector embeddings for the new QA pair and saves it back to Pinecone for future retrieval.
2. Implementing the Memory Engine
// src/services/agentMemory.ts
import { pc } from "../lib/pinecone";
import { OpenAI } from "openai";
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const index = pc.index("agent-memory");
interface ChatMessage {
role: "user" | "assistant";
content: string;
}
// A. Save a chat message pair into Pinecone memory
export async function saveMessageToMemory(
userId: string,
userText: string,
assistantText: string
) {
const userNamespace = index.namespace(userId);
const combinedText = `User: ${userText}\nAssistant: ${assistantText}`;
const messageId = `msg_${Date.now()}`;
// Generate vector embedding
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: combinedText,
});
const vector = response.data[0].embedding;
await userNamespace.upsert([
{
id: messageId,
values: vector,
metadata: {
user_input: userText,
assistant_response: assistantText,
timestamp: Date.now(),
},
},
]);
console.log(`Saved dialogue match: ${messageId}`);
}
// B. Query the user namespace memory to retrieve context
export async function retrieveRelevantContext(
userId: string,
currentQueryText: string
): Promise<string> {
const userNamespace = index.namespace(userId);
// Generate vector embedding for the active search query
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: currentQueryText,
});
const queryVector = response.data[0].embedding;
// Search Pinecone user namespace
const searchResults = await userNamespace.query({
vector: queryVector,
topK: 2, // Retrieve the top 2 most relevant matches
includeMetadata: true,
});
if (!searchResults.matches || searchResults.matches.length === 0) {
return "";
}
// Format historical memories for LLM prompt context injection
const contextString = searchResults.matches
.map((match: any) => {
const meta = match.metadata;
return `[Past Dialogue]\nUser: ${meta.user_input}\nAssistant: ${meta.assistant_response}`;
})
.join("\n\n");
return contextString;
}Published on Last updated: