Skip to content

HeliosDB Document Store Guide

Version: 1.0 Last Updated: 2025-11-30


Quick Start

-- Create collection
CREATE COLLECTION documents;

-- Insert document
db.documents.insert_one({
  "_id": ObjectId(),
  "title": "My Document",
  "content": "Document content",
  "tags": ["tag1", "tag2"],
  "created_at": new Date()
});

-- Query documents
SELECT * FROM documents WHERE title LIKE '%Document%';

Collections

Creating Collections

-- Create with validation
CREATE COLLECTION articles WITH (
  validation: {
    bsonType: "object",
    required: ["title", "content"],
    properties: {
      title: { bsonType: "string" },
      content: { bsonType: "string" },
      status: { enum: ["draft", "published"] }
    }
  }
);

Document Operations

-- Insert
INSERT INTO articles (title, content, status)
VALUES ('Article', 'Content', 'published');

-- Update
UPDATE articles
SET content = 'Updated content'
WHERE title = 'Article';

-- Delete
DELETE FROM articles WHERE status = 'draft';

-- Find
SELECT * FROM articles WHERE status = 'published' ORDER BY created_at DESC;

Querying

Basic Queries

-- String matching
SELECT * FROM documents WHERE title ILIKE '%query%';

-- Array operations
SELECT * FROM documents WHERE tags @> ARRAY['important'];

-- Nested document access
SELECT * FROM documents WHERE data->>'author' = 'John';

Advanced Queries

-- Text search
SELECT * FROM documents
WHERE to_tsvector('english', content) @@ plainto_tsquery('english', 'database');

-- Geospatial
SELECT * FROM documents
WHERE ST_DWithin(location::GEOMETRY, 'POINT(0 0)'::GEOMETRY, 5000);

-- Array aggregation
SELECT
  author,
  COUNT(*) as count,
  array_agg(DISTINCT tags) as all_tags
FROM documents
GROUP BY author;

Indexing

Document Indexes

-- Index common fields
CREATE INDEX idx_doc_title ON documents(title);
CREATE INDEX idx_doc_status ON documents((data->>'status'));

-- Index arrays
CREATE INDEX idx_doc_tags ON documents USING GIN(tags);

-- Full-text index
CREATE INDEX idx_doc_search ON documents USING GIN(to_tsvector('english', content));

Aggregation

Document Aggregation Pipeline

-- Group and aggregate documents
SELECT
  data->>'category' as category,
  COUNT(*) as total,
  AVG((data->>'views')::INT) as avg_views,
  MAX((data->>'updated_at')::TIMESTAMP) as latest
FROM documents
WHERE (data->>'published')::BOOLEAN = true
GROUP BY category
ORDER BY total DESC;

Transactions

Multi-Document Transactions

BEGIN;
  INSERT INTO documents (title, content)
  VALUES ('Doc1', 'Content1');

  INSERT INTO documents (title, content)
  VALUES ('Doc2', 'Content2');

  UPDATE documents SET status = 'processed'
  WHERE title IN ('Doc1', 'Doc2');
COMMIT;

Backup & Restore

-- Export documents
SELECT * FROM documents WHERE created_at > NOW() - INTERVAL '30 days'
INTO OUTFILE '/backups/documents.jsonl';

-- Import documents
COPY documents FROM '/backups/documents.jsonl' WITH FORMAT JSON;

Best Practices

  1. Define document schema validation
  2. Index frequently queried fields
  3. Use proper data types (not all strings)
  4. Manage document size (keep under 16MB)
  5. Use transactions for multi-document changes
  6. Archive old documents

Related Documentation: - Multi-Model Data Guide - JSONB Operations Reference