HeliosDB Document Store Guide¶
Version: 1.0 Last Updated: 2025-11-30
Quick Start¶
-- Create collection
CREATE COLLECTION documents;
-- Insert document
db.documents.insert_one({
"_id": ObjectId(),
"title": "My Document",
"content": "Document content",
"tags": ["tag1", "tag2"],
"created_at": new Date()
});
-- Query documents
SELECT * FROM documents WHERE title LIKE '%Document%';
Collections¶
Creating Collections¶
-- Create with validation
CREATE COLLECTION articles WITH (
validation: {
bsonType: "object",
required: ["title", "content"],
properties: {
title: { bsonType: "string" },
content: { bsonType: "string" },
status: { enum: ["draft", "published"] }
}
}
);
Document Operations¶
-- Insert
INSERT INTO articles (title, content, status)
VALUES ('Article', 'Content', 'published');
-- Update
UPDATE articles
SET content = 'Updated content'
WHERE title = 'Article';
-- Delete
DELETE FROM articles WHERE status = 'draft';
-- Find
SELECT * FROM articles WHERE status = 'published' ORDER BY created_at DESC;
Querying¶
Basic Queries¶
-- String matching
SELECT * FROM documents WHERE title ILIKE '%query%';
-- Array operations
SELECT * FROM documents WHERE tags @> ARRAY['important'];
-- Nested document access
SELECT * FROM documents WHERE data->>'author' = 'John';
Advanced Queries¶
-- Text search
SELECT * FROM documents
WHERE to_tsvector('english', content) @@ plainto_tsquery('english', 'database');
-- Geospatial
SELECT * FROM documents
WHERE ST_DWithin(location::GEOMETRY, 'POINT(0 0)'::GEOMETRY, 5000);
-- Array aggregation
SELECT
author,
COUNT(*) as count,
array_agg(DISTINCT tags) as all_tags
FROM documents
GROUP BY author;
Indexing¶
Document Indexes¶
-- Index common fields
CREATE INDEX idx_doc_title ON documents(title);
CREATE INDEX idx_doc_status ON documents((data->>'status'));
-- Index arrays
CREATE INDEX idx_doc_tags ON documents USING GIN(tags);
-- Full-text index
CREATE INDEX idx_doc_search ON documents USING GIN(to_tsvector('english', content));
Aggregation¶
Document Aggregation Pipeline¶
-- Group and aggregate documents
SELECT
data->>'category' as category,
COUNT(*) as total,
AVG((data->>'views')::INT) as avg_views,
MAX((data->>'updated_at')::TIMESTAMP) as latest
FROM documents
WHERE (data->>'published')::BOOLEAN = true
GROUP BY category
ORDER BY total DESC;
Transactions¶
Multi-Document Transactions¶
BEGIN;
INSERT INTO documents (title, content)
VALUES ('Doc1', 'Content1');
INSERT INTO documents (title, content)
VALUES ('Doc2', 'Content2');
UPDATE documents SET status = 'processed'
WHERE title IN ('Doc1', 'Doc2');
COMMIT;
Backup & Restore¶
-- Export documents
SELECT * FROM documents WHERE created_at > NOW() - INTERVAL '30 days'
INTO OUTFILE '/backups/documents.jsonl';
-- Import documents
COPY documents FROM '/backups/documents.jsonl' WITH FORMAT JSON;
Best Practices¶
- Define document schema validation
- Index frequently queried fields
- Use proper data types (not all strings)
- Manage document size (keep under 16MB)
- Use transactions for multi-document changes
- Archive old documents
Related Documentation: - Multi-Model Data Guide - JSONB Operations Reference