zoobzio January 20, 2026 Edit this page

pgvector

Recipe: Implement semantic search using PostgreSQL vector similarity.

Soy supports pgvector operators for embedding-based similarity search.

Setup

Install the pgvector extension:

CREATE EXTENSION IF NOT EXISTS vector;

Define a model with a vector column:

type Document struct {
    ID        int       `db:"id" type:"serial" constraints:"primary key"`
    Title     string    `db:"title" type:"text"`
    Content   string    `db:"content" type:"text"`
    Embedding []float32 `db:"embedding" type:"vector(1536)"` // OpenAI dimensions
}

Create the soy instance:

documents, err := soy.New[Document](db, "documents")

Distance Operators

pgvector provides four distance measures:

OperatorNameUse Case
<->L2 (Euclidean)General similarity
<#>Inner productNormalized vectors
<=>CosineText embeddings
<+>L1 (Manhattan)Sparse vectors

Find documents similar to a query embedding:

func SearchDocuments(ctx context.Context, queryEmbedding []float32, limit int) ([]*Document, error) {
    return documents.Query().
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(limit).
        Exec(ctx, map[string]any{
            "query_vec": queryEmbedding,
        })
}

The OrderByExpr method generates:

SELECT * FROM documents
ORDER BY embedding <=> :query_vec ASC
LIMIT :limit

Retrieving Distance Scores

Use SelectExpr to include computed distance values in results:

func SearchWithScores(ctx context.Context, queryEmbedding []float32, limit int) ([]*Document, error) {
    return documents.Query().
        SelectExpr("embedding", "<=>", "query_vec", "score").
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(limit).
        Exec(ctx, map[string]any{
            "query_vec": queryEmbedding,
        })
}

This generates:

SELECT *, "embedding" <=> :query_vec AS "score"
FROM documents
ORDER BY "embedding" <=> :query_vec ASC
LIMIT :limit

Note: The score column is added to the result set. To access it, you can either:

  • Add a Score field to your struct with the db:"score" tag
  • Use ExecAtom to get results as a map

Combine similarity with conditions:

func SearchByCategory(ctx context.Context, embedding []float32, category string, limit int) ([]*Document, error) {
    return documents.Query().
        Where("category", "=", "category").
        Where("published", "=", "is_published").
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(limit).
        Exec(ctx, map[string]any{
            "query_vec":    embedding,
            "category":     category,
            "is_published": true,
        })
}

Distance Threshold

Filter by maximum distance:

func SearchWithinDistance(ctx context.Context, embedding []float32, maxDistance float64) ([]*Document, error) {
    return documents.Query().
        Where("embedding", "<=>", "query_vec").  // This adds distance to WHERE
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(100).
        Exec(ctx, map[string]any{
            "query_vec":    embedding,
            "max_distance": maxDistance,
        })
}

Note: For distance thresholds, you may need a raw WHERE clause or post-filtering.

With Pagination

Paginate similarity results:

type SearchResult struct {
    Documents []*Document
    HasMore   bool
}

func SearchPaginated(ctx context.Context, embedding []float32, page, perPage int) (*SearchResult, error) {
    offset := (page - 1) * perPage
    limit := perPage + 1 // Fetch one extra

    results, err := documents.Query().
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(limit).
        Offset(offset).
        Exec(ctx, map[string]any{"query_vec": embedding})
    if err != nil {
        return nil, err
    }

    hasMore := len(results) > perPage
    if hasMore {
        results = results[:perPage]
    }

    return &SearchResult{
        Documents: results,
        HasMore:   hasMore,
    }, nil
}

Combine keyword and semantic search:

func HybridSearch(ctx context.Context, query string, embedding []float32) ([]*Document, error) {
    // Semantic results
    semanticResults, err := documents.Query().
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(50).
        Exec(ctx, map[string]any{"query_vec": embedding})
    if err != nil {
        return nil, err
    }

    // Keyword results
    keywordResults, err := documents.Query().
        Where("content", "ILIKE", "search_pattern").
        Limit(50).
        Exec(ctx, map[string]any{
            "search_pattern": "%" + query + "%",
        })
    if err != nil {
        return nil, err
    }

    // Merge and deduplicate (implement based on your needs)
    return mergeResults(semanticResults, keywordResults), nil
}

From Spec

Build vector queries from specs:

spec := soy.QuerySpec{
    Fields: []string{"id", "title", "content"},
    Where: []soy.ConditionSpec{
        {Field: "category", Operator: "=", Param: "category"},
    },
    OrderBy: []soy.OrderBySpec{
        {Expression: "embedding <=> :query_vec", Direction: "asc"},
    },
    Limit: intPtr(10),
}

results, err := documents.QueryFromSpec(spec).Exec(ctx, map[string]any{
    "category":  "technology",
    "query_vec": embedding,
})

Indexing

Create an index for efficient similarity search:

-- HNSW index (recommended for most cases)
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);

-- IVFFlat index (faster builds, slightly lower recall)
CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);

Choose the index based on distance operator:

OperatorIndex Ops Class
<->vector_l2_ops
<#>vector_ip_ops
<=>vector_cosine_ops

Embedding Generation

Generate embeddings before insert:

func CreateDocument(ctx context.Context, title, content string) (*Document, error) {
    // Generate embedding (example with OpenAI)
    embedding, err := generateEmbedding(content)
    if err != nil {
        return nil, err
    }

    doc := &Document{
        Title:     title,
        Content:   content,
        Embedding: embedding,
    }

    return documents.Insert().Build().Exec(ctx, doc)
}

func generateEmbedding(text string) ([]float32, error) {
    // Call your embedding API (OpenAI, Cohere, etc.)
    // Returns []float32 with appropriate dimensions
}

Complete Example

package search

import (
    "context"

    "github.com/jmoiron/sqlx"
    "github.com/zoobz-io/soy"
)

type Document struct {
    ID        int       `db:"id" type:"serial" constraints:"primary key"`
    Title     string    `db:"title" type:"text" constraints:"not null"`
    Content   string    `db:"content" type:"text"`
    Category  string    `db:"category" type:"text"`
    Embedding []float32 `db:"embedding" type:"vector(1536)"`
}

type SearchService struct {
    documents *soy.Soy[Document]
    embedder  Embedder
}

type Embedder interface {
    Embed(text string) ([]float32, error)
}

func NewSearchService(db *sqlx.DB, embedder Embedder) (*SearchService, error) {
    docs, err := soy.New[Document](db, "documents")
    if err != nil {
        return nil, err
    }
    return &SearchService{documents: docs, embedder: embedder}, nil
}

func (s *SearchService) Search(ctx context.Context, query string, opts SearchOptions) ([]*Document, error) {
    embedding, err := s.embedder.Embed(query)
    if err != nil {
        return nil, err
    }

    q := s.documents.Query()

    if opts.Category != "" {
        q = q.Where("category", "=", "category")
    }

    // Use SelectExpr to include distance score in results
    if opts.IncludeScore {
        q = q.SelectExpr("embedding", "<=>", "query_vec", "score")
    }

    return q.
        OrderByExpr("embedding", "<=>", "query_vec", "asc").
        Limit(opts.Limit).
        Exec(ctx, map[string]any{
            "query_vec": embedding,
            "category":  opts.Category,
        })
}

type SearchOptions struct {
    Category     string
    Limit        int
    IncludeScore bool
}