zoobzio January 6, 2025 Edit this page

Pagination

This guide covers patterns for listing and paginating data across different storage modes.

Store List Pagination

Basic Listing

// List first 100 keys with prefix
keys, err := store.List(ctx, "user:", 100)

Manual Pagination

Grub's List doesn't provide cursors. For pagination, track the last key:

func ListAll(ctx context.Context, store *grub.Store[User], prefix string) ([]string, error) {
    const pageSize = 100
    var allKeys []string

    for {
        keys, err := store.List(ctx, prefix, pageSize)
        if err != nil {
            return nil, err
        }

        allKeys = append(allKeys, keys...)

        if len(keys) < pageSize {
            break // No more results
        }

        // Use last key as next prefix (lexicographic ordering)
        prefix = keys[len(keys)-1] + "\x00"
    }

    return allKeys, nil
}

Note: This assumes lexicographic key ordering, which varies by provider.

Provider-Specific Behavior

ProviderList Implementation
RedisSCAN (cursor-based, safe for production)
BadgerIterator (snapshot isolation)
BoltCursor (within transaction)

Redis SCAN: Grub uses SCAN internally, which is safe for large datasets. The limit parameter maps to COUNT hint.

Batch Loading

After listing keys, load values in batches:

keys, _ := store.List(ctx, "user:", 1000)

const batchSize = 100
for i := 0; i < len(keys); i += batchSize {
    end := min(i+batchSize, len(keys))
    batch := keys[i:end]

    users, err := store.GetBatch(ctx, batch)
    if err != nil {
        return err
    }

    for _, user := range users {
        process(user)
    }
}

Bucket List Pagination

Basic Listing

// List first 100 objects with prefix
infos, err := bucket.List(ctx, "docs/", 100)

for _, info := range infos {
    fmt.Printf("%s (%d bytes)\n", info.Key, info.Size)
}

Continuation Pattern

Similar to stores, but using key-based continuation:

func ListAllObjects(ctx context.Context, bucket *grub.Bucket[Doc], prefix string) ([]grub.ObjectInfo, error) {
    const pageSize = 100
    var allInfos []grub.ObjectInfo
    marker := prefix

    for {
        infos, err := bucket.List(ctx, marker, pageSize)
        if err != nil {
            return nil, err
        }

        allInfos = append(allInfos, infos...)

        if len(infos) < pageSize {
            break
        }

        // Use last key as marker for next page
        marker = infos[len(infos)-1].Key
    }

    return allInfos, nil
}

Provider-Specific Behavior

ProviderList Implementation
S3ListObjectsV2 with continuation tokens
GCSstorage.Iterator
AzureNewListBlobsFlatPager

Database Query Pagination

For SQL databases, use query builders or pre-defined statements.

Using Query Builder

// Offset-based pagination with builder
users, err := db.Query().
    OrderBy("created_at", "DESC").
    Limit(20).
    Offset(40). // Page 3
    Exec(ctx, nil)

Using Pre-defined Statements

// Define statement with LIMIT/OFFSET params
paginatedStmt := edamame.NewQueryStatement("paginated", "Paginated users", edamame.QuerySpec{
    OrderBy:     []edamame.OrderBySpec{{Field: "created_at", Direction: "desc"}},
    LimitParam:  "limit",
    OffsetParam: "offset",
})

users, err := db.ExecQuery(ctx, paginatedStmt, map[string]any{
    "limit":  20,
    "offset": 40, // Page 3
})

Cursor-Based Pagination

More efficient for large datasets:

// Using builder
users, err := db.Query().
    Where("id", ">", "cursor").
    OrderBy("id", "ASC").
    Limit(20).
    Exec(ctx, map[string]any{"cursor": lastID})

Performance Considerations

Key Design for Efficient Listing

Design keys to enable efficient prefix queries:

// Good: hierarchical, scannable
"tenant:acme:user:123"
"tenant:acme:user:456"
"tenant:beta:user:789"

// List all users for tenant
keys, _ := store.List(ctx, "tenant:acme:user:", 100)
// Bad: UUIDs as first segment
"a1b2c3d4:user:data"
"e5f6g7h8:user:data"

// Can't efficiently list by type

Limiting Result Sets

Always use reasonable limits:

// Good: bounded
keys, _ := store.List(ctx, prefix, 1000)

// Dangerous: unbounded on large datasets
keys, _ := store.List(ctx, prefix, 0)

Memory Considerations

For large datasets, stream results:

const pageSize = 100
prefix := "user:"

for {
    keys, err := store.List(ctx, prefix, pageSize)
    if err != nil {
        return err
    }

    for _, key := range keys {
        user, err := store.Get(ctx, key)
        if err != nil {
            continue
        }
        // Process immediately, don't accumulate
        process(user)
    }

    if len(keys) < pageSize {
        break
    }
    prefix = keys[len(keys)-1] + "\x00"
}

Patterns by Use Case

Export All Data

func Export[T any](ctx context.Context, store *grub.Store[T], prefix string, emit func(*T)) error {
    const pageSize = 100
    marker := prefix

    for {
        keys, err := store.List(ctx, marker, pageSize)
        if err != nil {
            return err
        }

        if len(keys) == 0 {
            break
        }

        values, err := store.GetBatch(ctx, keys)
        if err != nil {
            return err
        }

        for _, v := range values {
            emit(v)
        }

        marker = keys[len(keys)-1] + "\x00"
    }

    return nil
}

Count by Prefix

func Count[T any](ctx context.Context, store *grub.Store[T], prefix string) (int, error) {
    const pageSize = 1000
    count := 0
    marker := prefix

    for {
        keys, err := store.List(ctx, marker, pageSize)
        if err != nil {
            return 0, err
        }

        count += len(keys)

        if len(keys) < pageSize {
            break
        }

        marker = keys[len(keys)-1] + "\x00"
    }

    return count, nil
}

Delete by Prefix

func DeletePrefix[T any](ctx context.Context, store *grub.Store[T], prefix string) error {
    const pageSize = 100

    for {
        keys, err := store.List(ctx, prefix, pageSize)
        if err != nil {
            return err
        }

        if len(keys) == 0 {
            break
        }

        for _, key := range keys {
            if err := store.Delete(ctx, key); err != nil && !errors.Is(err, grub.ErrNotFound) {
                return err
            }
        }
    }

    return nil
}

Context Cancellation

List operations respect context cancellation in most providers:

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

keys, err := store.List(ctx, prefix, 10000)
if errors.Is(err, context.DeadlineExceeded) {
    // Timed out during iteration
}
ProviderCancellation Support
RedisPer-operation
BadgerDuring iteration
BoltDuring iteration
S3Per-operation
GCSPer-operation
AzurePer-operation