Project: Local Document Vector Search
In this project, we will build a completely offline RAG (Retrieval-Augmented Generation) script. The application reads local knowledge documents, converts texts into vector embeddings using Ollama, performs semantic searches, and feeds context into a local LLM.
1. RAG Core Pipeline
graph TD
A[Raw local document text] --> B[Generate Embeddings via Ollama API]
B --> C[Compute cosine similarity of query]
C --> D[Retrieve top matching text chunks]
D --> E[Pass retrieved context to Qwen model]2. Implementing the RAG Engine
Write the helper to compute vectors and search document chunks locally:
// src/services/localRag.ts
import ollama from "@ollama/ollama";
interface DocumentChunk {
text: string;
embedding: number[];
}
// Simple helper to compute cosine similarity between two vector arrays
function cosineSimilarity(vecA: number[], vecB: number[]): number {
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
normA += vecA[i] * vecA[i];
normB += vecB[i] * vecB[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
export async function buildLocalRagSystem(documentChunks: string[], userQuery: string) {
const modelName = "qwen2.5";
const database: DocumentChunk[] = [];
// 1. Convert all text chunks into embeddings
for (const chunk of documentChunks) {
const response = await ollama.embeddings({
model: modelName,
prompt: chunk,
});
database.push({
text: chunk,
embedding: response.embedding,
});
}
// 2. Generate embedding vector for the user query
const queryResponse = await ollama.embeddings({
model: modelName,
prompt: userQuery,
});
const queryVector = queryResponse.embedding;
// 3. Compute similarities and sort chunks
const matches = database.map((chunk) => {
return {
text: chunk.text,
score: cosineSimilarity(queryVector, chunk.embedding),
};
});
matches.sort((a, b) => b.score - a.score);
const bestContextText = matches.slice(0, 2).map((m) => m.text).join("\n\n");
// 4. Query local model passing retrieved context data
const finalPrompt = `
Use the following verified context documents to answer the question:
---
${bestContextText}
---
Question: ${userQuery}
`;
const finalResponse = await ollama.chat({
model: modelName,
messages: [{ role: "user", content: finalPrompt }],
});
return finalResponse.message.content;
}3. Testing Local Document Queries
Call the function passing mock document chunks:
const chunks = [
"Company policy: employees can work from home on Fridays.",
"Office location: the main office is in Seattle.",
"Financial calendar: the fiscal year ends on December 31."
];
const answer = await buildLocalRagSystem(chunks, "Can I work remotely on Friday?");
console.log("Local RAG Answer:", answer);Published on Last updated: