zoobzio January 20, 2025 Edit this page

Vector Search with PostgreSQL

PostgreSQL with the pgvector extension provides vector similarity search directly in your database. Rather than treating vectors as a separate storage concern, grub handles them as typed columns via Database[T].

Setup

Install the pgvector extension:

CREATE EXTENSION IF NOT EXISTS vector;

Create a table with a vector column:

CREATE TABLE documents (
    id BIGSERIAL PRIMARY KEY,
    title TEXT NOT NULL,
    content TEXT NOT NULL,
    category TEXT NOT NULL,
    embedding vector(1536)
);

CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);

Define Your Type

Map the table to a Go struct. The vector column uses a custom type with sql.Scanner and driver.Valuer:

type Document struct {
    ID        int64   `db:"id"`
    Title     string  `db:"title"`
    Content   string  `db:"content"`
    Category  string  `db:"category"`
    Embedding Vector  `db:"embedding"`
}

// Vector wraps []float32 for pgvector compatibility
type Vector []float32

func (v *Vector) Scan(src any) error {
    if src == nil {
        *v = nil
        return nil
    }
    s, ok := src.(string)
    if !ok {
        return fmt.Errorf("expected string, got %T", src)
    }
    // pgvector format: [1.0,2.0,3.0]
    var floats []float32
    if err := json.Unmarshal([]byte(s), &floats); err != nil {
        return err
    }
    *v = floats
    return nil
}

func (v Vector) Value() (driver.Value, error) {
    if v == nil {
        return nil, nil
    }
    // Convert to pgvector string format
    b, err := json.Marshal([]float32(v))
    if err != nil {
        return nil, err
    }
    return string(b), nil
}

Create the Database

import (
    "github.com/jmoiron/sqlx"
    "github.com/zoobz-io/astql/postgres"
    "github.com/zoobz-io/grub"
    _ "github.com/lib/pq"
)

db, _ := sqlx.Connect("postgres", "postgres://user:pass@localhost/mydb?sslmode=disable")
docs, _ := grub.NewDatabase[Document](db, "documents", postgres.New())

CRUD Operations

Standard operations work as expected:

// Insert
doc := &Document{
    Title:     "Introduction to Vectors",
    Content:   "Vectors represent points in high-dimensional space...",
    Category:  "tutorial",
    Embedding: embedding, // []float32 from your embedding model
}
docs.Set(ctx, "1", doc)

// Retrieve
doc, _ := docs.Get(ctx, "1")

// Delete
docs.Delete(ctx, "1")

Use the executor's soy interface for vector queries:

// Find 10 most similar documents
results, err := docs.Executor().Soy().Query().
    SelectExpr("embedding", "<=>", "query_vec", "distance").
    OrderByExpr("embedding", "<=>", "query_vec", "asc").
    Limit(10).
    Exec(ctx, map[string]any{
        "query_vec": queryEmbedding,
    })

Distance Operators

OperatorMetricUse Case
<->L2 (Euclidean)General similarity
<=>CosineNormalized embeddings
<#>Inner ProductWhen vectors are normalized
<+>L1 (Manhattan)Sparse vectors

Combine vector search with WHERE clauses:

results, err := docs.Executor().Soy().Query().
    SelectExpr("embedding", "<=>", "query_vec", "distance").
    Where("category", "=", "cat").
    OrderByExpr("embedding", "<=>", "query_vec", "asc").
    Limit(10).
    Exec(ctx, map[string]any{
        "query_vec": queryEmbedding,
        "cat":       "tutorial",
    })

Complete Example

package main

import (
    "context"
    "database/sql/driver"
    "encoding/json"
    "fmt"
    "log"

    "github.com/jmoiron/sqlx"
    "github.com/zoobz-io/astql/postgres"
    "github.com/zoobz-io/grub"
    _ "github.com/lib/pq"
)

type Vector []float32

func (v *Vector) Scan(src any) error {
    if src == nil {
        *v = nil
        return nil
    }
    s, ok := src.(string)
    if !ok {
        return fmt.Errorf("expected string, got %T", src)
    }
    var floats []float32
    if err := json.Unmarshal([]byte(s), &floats); err != nil {
        return err
    }
    *v = floats
    return nil
}

func (v Vector) Value() (driver.Value, error) {
    if v == nil {
        return nil, nil
    }
    b, _ := json.Marshal([]float32(v))
    return string(b), nil
}

type Document struct {
    ID        int64   `db:"id"`
    Title     string  `db:"title"`
    Category  string  `db:"category"`
    Embedding Vector  `db:"embedding"`
}

func main() {
    ctx := context.Background()

    conn, err := sqlx.Connect("postgres", "postgres://user:pass@localhost/mydb?sslmode=disable")
    if err != nil {
        log.Fatal(err)
    }
    defer conn.Close()

    docs := grub.NewDatabase[Document](conn, "documents", postgres.New())

    // Insert a document with embedding
    doc := &Document{
        Title:     "Vector Search Guide",
        Category:  "tutorial",
        Embedding: Vector{0.1, 0.2, 0.3}, // Your actual embedding
    }
    if err := docs.Set(ctx, "1", doc); err != nil {
        log.Fatal(err)
    }

    // Search for similar documents
    queryVec := Vector{0.1, 0.2, 0.3}
    results, err := docs.Executor().Soy().Query().
        SelectExpr("embedding", "<=>", "query_vec", "distance").
        Where("category", "=", "cat").
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(5).
        Exec(ctx, map[string]any{
            "query_vec": queryVec,
            "cat":       "tutorial",
        })
    if err != nil {
        log.Fatal(err)
    }

    for _, r := range results {
        fmt.Printf("ID: %d, Title: %s\n", r.ID, r.Title)
    }
}

Why DatabaseT Instead of VectorProvider?

PostgreSQL with pgvector is fundamentally a relational database with vector support, not a dedicated vector store. Using Database[T]:

  • Type-safe columns — Vector is one column among many typed fields
  • Full SQL capabilities — Joins, CTEs, transactions, constraints
  • No serialization overhead — No marshal/unmarshal at provider boundary
  • Existing infrastructure — Use your PostgreSQL instance directly

For dedicated vector databases (Pinecone, Qdrant, Milvus, Weaviate), use Index[T] with their respective providers.