What we’re building

A small REST API that can answer questions about your own local docs by using RAG (Retrieval‑Augmented Generation).

Flow:

/docs/*.md|.txt|.html  --(ingest)--> Postgres (text + embeddings)
                                     ^
                              Gemini embeddings (768‑d)

POST /query  -> embed query -> vector search (pgvector) -> top K chunks ->
                                                Gemini answers only from context

Why this is cool: you can make the model answer private facts it never knew before — because you feed it those facts at query time.

Prerequisites

Node.js 18+
PostgreSQL locally (or remote) and the pgvector extension installed
A Gemini API key (from Google AI Studio)

New to Postgres? You can use pgAdmin or psql. Make sure you can connect and run SQL in the target database.

0) Make an empty project

mkdir rag-local-gemini && cd rag-local-gemini
npm init -y

Install dependencies:

npm i express axios @google/genai pg pgvector dotenv html-to-text glob
npm i -D nodemon

package.json (add scripts + use ESM):

{
  "name": "rag-local-gemini",
  "version": "1.0.0",
  "type": "module",
  "scripts": {
    "dev": "nodemon src/server.js",
    "ingest": "node src/ingest.js",
    "start": "node src/server.js"
  }
}

Create folders:

mkdir -p src docs

Create .env:

PORT=3000
DATABASE_URL=postgres://USER:PASS@localhost:5432/ragdemo
GEMINI_API_KEY=AIzaSy...your_key...
DOCS_GLOB=docs/**/*.{md,txt,html}
EMBED_DIM=768

Replace USER:PASS and database name with your actual values.

1) Prepare Postgres (tables + indexes)

Connect to the DB in your .env and run once:

CREATE EXTENSION IF NOT EXISTS vector;

CREATE TABLE IF NOT EXISTS local_chunks (
  id           BIGSERIAL PRIMARY KEY,
  doc_path     TEXT NOT NULL,
  title        TEXT,
  url          TEXT,
  chunk_index  INT  NOT NULL,
  content      TEXT NOT NULL,
  embedding    vector(768),
  updated_at   TIMESTAMPTZ DEFAULT now()
);

-- Prevent duplicate chunks for the same file
CREATE UNIQUE INDEX IF NOT EXISTS local_chunks_unique
  ON local_chunks (doc_path, chunk_index);

-- Fast approximate nearest‑neighbor search (cosine)
CREATE INDEX IF NOT EXISTS local_chunks_embed_hnsw
  ON local_chunks USING hnsw (embedding vector_cosine_ops);

2) Add example docs (private knowledge)

Create docs/intro.md:

# Profile: Muhammad Ilyas

This document captures key, internal details about **Muhammad Ilyas** to be used by our RAG demo.

Create docs/faq.md:

# Muhammad Ilyas – FAQ

3) Code the app (copy these files into `src/`)

`src/util.js`

import { htmlToText } from "html-to-text";
import fs from "fs";

export function htmlToPlain(html) {
  return htmlToText(html, {
    wordwrap: false,
    selectors: [{ selector: "a", options: { ignoreHref: true } }],
  }).replace(/\n{3,}/g, "\n\n").trim();
}

export function readFileSmart(path) {
  const raw = fs.readFileSync(path, "utf8");
  if (path.endsWith(".html")) return htmlToPlain(raw);
  return raw; // md/txt are already plain
}

// Simple character-based chunking
export function chunk(text, maxChars = 1500, overlap = 150) {
  const chunks = [];
  let i = 0;
  while (i < text.length) {
    const end = Math.min(i + maxChars, text.length);
    chunks.push(text.slice(i, end));
    if (end === text.length) break;
    i = end - overlap;
  }
  return chunks;
}

`src/embed.js`

import { GoogleGenAI } from "@google/genai";
import "dotenv/config";

const genai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
const DIM = Number(process.env.EMBED_DIM || 768);

export async function embedTexts(texts) {
  const res = await genai.models.embedContent({
    model: "gemini-embedding-001",
    contents: texts,
    config: { outputDimensionality: DIM, taskType: "RETRIEVAL_DOCUMENT" }
  });
  return res.embeddings.map(e => e.values);
}

export async function embedQuery(text) {
  const res = await genai.models.embedContent({
    model: "gemini-embedding-001",
    contents: text,
    config: { outputDimensionality: DIM, taskType: "RETRIEVAL_QUERY" }
  });
  return res.embeddings[0].values;
}

`src/db.js`

import { Pool } from "pg";
import "dotenv/config";

export const pool = new Pool({ connectionString: process.env.DATABASE_URL });

export async function initDb() {
  await pool.query(`CREATE EXTENSION IF NOT EXISTS vector`);
}

export async function insertChunks(rows) {
  const client = await pool.connect();
  try {
    await client.query("BEGIN");
    for (const r of rows) {
      const embeddingLiteral = `[${r.embedding.join(",")}]`;
      await client.query(
        `
        INSERT INTO local_chunks
          (doc_path, title, url, chunk_index, content, embedding, updated_at)
        VALUES ($1, $2, $3, $4, $5, CAST($6 AS vector), now())
        ON CONFLICT (doc_path, chunk_index)
        DO UPDATE SET
          title = EXCLUDED.title,
          url = EXCLUDED.url,
          content = EXCLUDED.content,
          embedding = EXCLUDED.embedding,
          updated_at = now()
        `,
        [r.docPath, r.title || null, r.url || null, r.chunkIndex, r.content, embeddingLiteral]
      );
    }
    await client.query("COMMIT");
  } catch (e) {
    await client.query("ROLLBACK");
    throw e;
  } finally {
    client.release();
  }
}

export async function searchByEmbedding(embedding, k = 6) {
  const embeddingLiteral = `[${embedding.join(",")}]`;
  const { rows } = await pool.query(
    `
    SELECT id, doc_path, title, url, content, chunk_index
    FROM local_chunks
    ORDER BY embedding <-> CAST($1 AS vector)
    LIMIT $2
    `,
    [embeddingLiteral, k]
  );
  return rows;
}

`src/ingest.js`

import "dotenv/config";
import path from "path";
import { glob } from "glob";
import { readFileSmart, chunk } from "./util.js";
import { embedTexts } from "./embed.js";
import { initDb, insertChunks } from "./db.js";

async function main() {
  await initDb();

  const pattern = process.env.DOCS_GLOB || "docs/**/*.{md,txt,html}";
  const files = await glob(pattern, { nodir: true });
  console.log(`Found ${files.length} files`);

  for (const file of files) {
    const text = readFileSmart(file).trim();
    if (!text) continue;

    const title = path.basename(file);
    const pieces = chunk(text, 1500, 150);
    const embeddings = await embedTexts(pieces);

    const rows = pieces.map((content, i) => ({
      docPath: file,
      title,
      url: null,
      chunkIndex: i,
      content,
      embedding: embeddings[i],
    }));

    await insertChunks(rows);
    console.log(`Ingested ${title} (${pieces.length} chunks)`);
  }
}

main().catch(e => { console.error(e); process.exit(1); });

`src/server.js`

import "dotenv/config";
import express from "express";
import { initDb, searchByEmbedding } from "./db.js";
import { embedQuery } from "./embed.js";
import { GoogleGenAI } from "@google/genai";

const app = express();
app.use(express.json());

const genai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });

const SYSTEM_PROMPT = `
You are a helpful assistant that must answer using ONLY the provided context.
- If info is missing, say: "I don't find this in the knowledge base."
- Cite sources at the end as [Title](Path).
- Keep answers concise and factual.
`;

app.post("/query", async (req, res) => {
  try {
    const q = req.body?.q?.toString?.().trim();
    if (!q) return res.status(400).json({ error: "Missing q" });

    await initDb();
    const qEmbed = await embedQuery(q);
    const topK = await searchByEmbedding(qEmbed, 6);

    const context = topK.map((r, i) => `### Doc ${i+1}: ${r.title}\nPath: ${r.doc_path}\n---\n${r.content}`).join("\n\n");

    const prompt = `${SYSTEM_PROMPT}\n\n# Question\n${q}\n\n# Context\n${context}\n\nAnswer:`;

    const result = await genai.models.generateContent({
      model: "gemini-1.5-pro", // or "gemini-1.5-flash" for speed
      contents: prompt,
    });

    const answer = (result.text ?? "").trim();
    return res.json({
      answer,
      sources: topK.map(r => ({ title: r.title, path: r.doc_path }))
    });
  } catch (e) {
    console.error(e);
    res.status(500).json({ error: e.message || "Server error" });
  }
});

const port = Number(process.env.PORT || 3000);
app.listen(port, () => console.log(`RAG server on http://localhost:${port}`));

4) Run it

Ingest your docs (creates/updates chunks):

npm run ingest

Expected:

Found 2 files
Ingested intro.md (X chunks)
Ingested faq.md (Y chunks)

Start the API:

npm run dev   # or npm start

Console:

RAG server on http://localhost:3000

Ask questions:

curl -s http://localhost:3000/query -H "Content-Type: application/json" \
  -d '{"q":"Who leads Muhammad Ilyas currently and who mentors him?"}' | jq

curl -s http://localhost:3000/query -H "Content-Type: application/json" \
  -d '{"q":"What was his previous role and who led that team?"}' | jq

You should see an answer plus a sources array pointing at docs/intro.md or docs/faq.md.

Change a fact in the docs, re‑run npm run ingest, ask again → you’ll see freshness.

Zero‑to‑Hero checklist ✅

Node + deps installed
.env filled (DB + Gemini key)
Postgres table + pgvector index created
Docs added
npm run ingest OK
npm run dev OK
Queries return answers + sources

Zero‑to‑Hero: Build a RAG App with Node.js + Express + Gemini + Postgres (pgvector)

What we’re building

Prerequisites

0) Make an empty project

1) Prepare Postgres (tables + indexes)

2) Add example docs (private knowledge)

3) Code the app (copy these files into `src/`)

`src/util.js`

`src/embed.js`

`src/db.js`

`src/ingest.js`

`src/server.js`

4) Run it

Zero‑to‑Hero checklist ✅

Subscribe to my newsletter

Muhammad Sufiyan

Muhammad Sufiyan

Zero‑to‑Hero: Build a RAG App with Node.js + Express + Gemini + Postgres (pgvector)

What we’re building

Prerequisites

0) Make an empty project

1) Prepare Postgres (tables + indexes)

2) Add example docs (private knowledge)

3) Code the app (copy these files into src/)

src/util.js

src/embed.js

src/db.js

src/ingest.js

src/server.js

4) Run it

Zero‑to‑Hero checklist ✅

Subscribe to my newsletter

Muhammad Sufiyan

Muhammad Sufiyan

3) Code the app (copy these files into `src/`)

`src/util.js`

`src/embed.js`

`src/db.js`

`src/ingest.js`

`src/server.js`