HeliosDB SQL API Reference¶
Version: 6.0 (PostgreSQL 17 Compatible) Last Updated: November 2, 2025
Introduction¶
HeliosDB provides a PostgreSQL-compatible SQL interface with extensions for multi-model data operations. This reference documents all supported SQL statements, functions, operators, and multi-model extensions.
Connection¶
Connection String¶
Connection Parameters¶
host=localhost
port=5432
dbname=heliosdb
user=admin
password=secret
tenant=acme-corp # Multi-tenancy
application_name=myapp
Using pg Client Libraries¶
import psycopg2
conn = psycopg2.connect(
host="localhost",
port=5432,
dbname="heliosdb",
user="admin",
password="secret"
)
Data Definition Language (DDL)¶
CREATE TABLE¶
Basic Table:
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(255) NOT NULL UNIQUE,
email VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
With Constraints:
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL,
status VARCHAR(50) CHECK (status IN ('pending', 'completed', 'cancelled')),
total DECIMAL(10, 2) CHECK (total >= 0),
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT fk_user FOREIGN KEY (user_id)
REFERENCES users(id) ON DELETE CASCADE
);
Partitioned Table:
CREATE TABLE events (
id BIGSERIAL,
event_type VARCHAR(100),
payload JSONB,
created_at TIMESTAMP NOT NULL
) PARTITION BY RANGE (created_at);
CREATE TABLE events_2025_01 PARTITION OF events
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
Vector Table:
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
embedding VECTOR(1536) -- Multi-model extension
);
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
ALTER TABLE¶
Add Column:
Drop Column:
Modify Column:
ALTER TABLE users ALTER COLUMN email TYPE TEXT;
ALTER TABLE users ALTER COLUMN username SET NOT NULL;
ALTER TABLE users ALTER COLUMN age DROP DEFAULT;
Add Constraint:
ALTER TABLE users ADD CONSTRAINT unique_email UNIQUE (email);
ALTER TABLE orders ADD CONSTRAINT check_total CHECK (total >= 0);
Rename:
DROP TABLE¶
DROP TABLE users;
DROP TABLE IF EXISTS users;
DROP TABLE users CASCADE; -- Also drop dependent objects
CREATE INDEX¶
B-Tree Index (Default):
CREATE INDEX idx_users_email ON users(email);
CREATE UNIQUE INDEX idx_users_username ON users(username);
Partial Index:
Composite Index:
Expression Index:
Full-Text Search Index:
Vector Index (HNSW):
CREATE INDEX idx_documents_embedding ON documents
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
Spatial Index:
CREATE VIEW¶
Simple View:
Materialized View:
CREATE MATERIALIZED VIEW user_stats AS
SELECT
DATE_TRUNC('day', created_at) as day,
COUNT(*) as user_count,
COUNT(DISTINCT email) as unique_emails
FROM users
GROUP BY DATE_TRUNC('day', created_at);
-- Refresh materialized view
REFRESH MATERIALIZED VIEW user_stats;
REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;
CREATE SCHEMA¶
CREATE SCHEMA analytics;
CREATE SCHEMA IF NOT EXISTS reporting;
-- Create table in schema
CREATE TABLE analytics.metrics (
id SERIAL PRIMARY KEY,
metric_name VARCHAR(255),
value DOUBLE PRECISION
);
CREATE TYPE¶
Enum Type:
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
CREATE TABLE users (
id SERIAL PRIMARY KEY,
role user_role DEFAULT 'user'
);
Composite Type:
CREATE TYPE address AS (
street VARCHAR(255),
city VARCHAR(100),
country VARCHAR(100),
postal_code VARCHAR(20)
);
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(255),
shipping_address address
);
Data Manipulation Language (DML)¶
SELECT¶
Basic Query:
SELECT * FROM users;
SELECT id, username, email FROM users;
SELECT username AS login, email FROM users;
WHERE Clause:
SELECT * FROM users WHERE age >= 18;
SELECT * FROM users WHERE status = 'active' AND age > 21;
SELECT * FROM users WHERE email LIKE '%@gmail.com';
SELECT * FROM users WHERE created_at BETWEEN '2025-01-01' AND '2025-12-31';
SELECT * FROM users WHERE id IN (1, 2, 3, 4, 5);
ORDER BY:
SELECT * FROM users ORDER BY created_at DESC;
SELECT * FROM users ORDER BY age ASC, username DESC;
SELECT * FROM users ORDER BY RANDOM() LIMIT 10; -- Random sample
LIMIT and OFFSET:
SELECT * FROM users LIMIT 10;
SELECT * FROM users LIMIT 10 OFFSET 20;
SELECT * FROM users ORDER BY id LIMIT 10; -- Top 10
GROUP BY and HAVING:
SELECT status, COUNT(*) as count
FROM users
GROUP BY status;
SELECT DATE_TRUNC('month', created_at) as month, COUNT(*) as signups
FROM users
GROUP BY DATE_TRUNC('month', created_at)
HAVING COUNT(*) > 100
ORDER BY month DESC;
DISTINCT:
SELECT DISTINCT status FROM users;
SELECT DISTINCT ON (email) * FROM users ORDER BY email, created_at DESC;
JOINS:
-- INNER JOIN
SELECT u.username, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- LEFT JOIN
SELECT u.username, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;
-- RIGHT JOIN
SELECT u.username, o.id as order_id
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id;
-- FULL OUTER JOIN
SELECT u.username, o.id
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;
-- CROSS JOIN
SELECT a.name, b.color
FROM products a
CROSS JOIN colors b;
Subqueries:
-- Scalar subquery
SELECT username,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) as order_count
FROM users;
-- IN subquery
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders WHERE total > 100);
-- EXISTS subquery
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
-- WITH (CTE)
WITH active_users AS (
SELECT * FROM users WHERE status = 'active'
),
user_orders AS (
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id
)
SELECT au.username, COALESCE(uo.order_count, 0) as orders
FROM active_users au
LEFT JOIN user_orders uo ON au.id = uo.user_id;
Window Functions:
-- ROW_NUMBER
SELECT username,
ROW_NUMBER() OVER (ORDER BY created_at) as row_num
FROM users;
-- RANK and DENSE_RANK
SELECT username, score,
RANK() OVER (ORDER BY score DESC) as rank,
DENSE_RANK() OVER (ORDER BY score DESC) as dense_rank
FROM leaderboard;
-- PARTITION BY
SELECT username, category, sales,
RANK() OVER (PARTITION BY category ORDER BY sales DESC) as rank_in_category
FROM sales_data;
-- LAG and LEAD
SELECT date, revenue,
LAG(revenue) OVER (ORDER BY date) as prev_revenue,
LEAD(revenue) OVER (ORDER BY date) as next_revenue
FROM daily_revenue;
-- Running totals
SELECT date, amount,
SUM(amount) OVER (ORDER BY date) as running_total
FROM transactions;
INSERT¶
Single Row:
Multiple Rows:
INSERT INTO users (username, email) VALUES
('alice', 'alice@example.com'),
('bob', 'bob@example.com'),
('charlie', 'charlie@example.com');
Returning:
From SELECT:
ON CONFLICT (Upsert):
INSERT INTO users (id, username, email)
VALUES (1, 'john', 'john@example.com')
ON CONFLICT (id) DO UPDATE SET
username = EXCLUDED.username,
email = EXCLUDED.email,
updated_at = NOW();
-- Do nothing on conflict
INSERT INTO users (username, email)
VALUES ('john', 'john@example.com')
ON CONFLICT (username) DO NOTHING;
UPDATE¶
Basic Update:
Multiple Columns:
From Another Table:
UPDATE orders o
SET status = 'completed'
FROM shipments s
WHERE o.id = s.order_id AND s.delivered_at IS NOT NULL;
Returning:
DELETE¶
Basic Delete:
Delete All:
DELETE FROM users; -- Deletes all rows (use with caution!)
TRUNCATE TABLE users; -- Faster for deleting all rows
Returning:
Using Another Table:
Transaction Control Language (TCL)¶
Transactions¶
Basic Transaction:
BEGIN;
INSERT INTO users (username, email) VALUES ('test', 'test@example.com');
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;
Rollback:
Savepoints:
BEGIN;
INSERT INTO users (username) VALUES ('user1');
SAVEPOINT sp1;
INSERT INTO users (username) VALUES ('user2');
SAVEPOINT sp2;
INSERT INTO users (username) VALUES ('user3');
-- Rollback to savepoint
ROLLBACK TO sp2;
COMMIT; -- Only user1 and user2 are inserted
Isolation Levels:
BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
Multi-Model Extensions¶
Vector Operations¶
Vector Similarity Search:
-- Cosine similarity
SELECT id, title,
embedding <=> '[0.1, 0.2, 0.3, ...]'::VECTOR as distance
FROM documents
ORDER BY embedding <=> '[0.1, 0.2, 0.3, ...]'::VECTOR
LIMIT 10;
-- Euclidean distance (L2)
SELECT id, title,
embedding <-> '[0.1, 0.2, 0.3, ...]'::VECTOR as distance
FROM documents
ORDER BY embedding <-> '[0.1, 0.2, 0.3, ...]'::VECTOR
LIMIT 10;
-- Inner product
SELECT id, title,
embedding <#> '[0.1, 0.2, 0.3, ...]'::VECTOR as distance
FROM documents
ORDER BY embedding <#> '[0.1, 0.2, 0.3, ...]'::VECTOR DESC
LIMIT 10;
Vector Functions:
-- Vector dimensions
SELECT vector_dims(embedding) FROM documents LIMIT 1;
-- Vector norm
SELECT vector_norm(embedding) FROM documents;
-- Normalize vector
UPDATE documents SET embedding = vector_normalize(embedding);
-- Average vector
SELECT AVG(embedding) FROM documents;
Graph Operations¶
Recursive CTE for Graph Traversal:
WITH RECURSIVE connected AS (
-- Base case: start node
SELECT id, name, parent_id, 1 as depth
FROM categories
WHERE id = 1
UNION ALL
-- Recursive case: connected nodes
SELECT c.id, c.name, c.parent_id, co.depth + 1
FROM categories c
INNER JOIN connected co ON c.parent_id = co.id
WHERE co.depth < 10 -- Max depth
)
SELECT * FROM connected;
Cypher-Style Graph Query:
-- Find friends of friends
SELECT DISTINCT u3.username
FROM users u1
JOIN friendships f1 ON u1.id = f1.user_id
JOIN users u2 ON f1.friend_id = u2.id
JOIN friendships f2 ON u2.id = f2.user_id
JOIN users u3 ON f2.friend_id = u3.id
WHERE u1.username = 'alice' AND u3.id != u1.id;
Shortest Path:
WITH RECURSIVE paths AS (
SELECT
id as node,
ARRAY[id] as path,
0 as length
FROM nodes
WHERE id = 1 -- Start node
UNION ALL
SELECT
e.target_node,
p.path || e.target_node,
p.length + e.weight
FROM paths p
JOIN edges e ON p.node = e.source_node
WHERE NOT e.target_node = ANY(p.path) -- Avoid cycles
)
SELECT path, length
FROM paths
WHERE node = 10 -- End node
ORDER BY length
LIMIT 1;
Document Operations¶
JSON Queries:
-- Extract JSON field
SELECT id, metadata->>'name' as name
FROM documents
WHERE metadata->>'status' = 'active';
-- JSON array contains
SELECT * FROM documents
WHERE metadata @> '{"tags": ["important"]}';
-- JSON path query
SELECT id, jsonb_path_query(metadata, '$.user.email') as email
FROM documents;
-- Aggregate JSON
SELECT jsonb_agg(metadata) FROM documents;
MongoDB-Compatible Syntax:
-- Find documents
SELECT * FROM collection
WHERE doc @> '{"status": "active", "age": {"$gte": 18}}';
-- Update document
UPDATE collection
SET doc = jsonb_set(doc, '{status}', '"inactive"')
WHERE doc->>'_id' = '123';
Time-Series Operations¶
Time-Bucket Aggregation:
SELECT
time_bucket('1 hour', timestamp) as hour,
AVG(value) as avg_value,
MAX(value) as max_value,
MIN(value) as min_value
FROM metrics
WHERE timestamp > NOW() - INTERVAL '24 hours'
GROUP BY time_bucket('1 hour', timestamp)
ORDER BY hour;
Gap Filling:
SELECT
time_bucket_gapfill('5 minutes', timestamp) as time,
interpolate(AVG(temperature)) as temperature
FROM sensor_data
WHERE timestamp BETWEEN '2025-11-01' AND '2025-11-02'
GROUP BY time_bucket_gapfill('5 minutes', timestamp);
Continuous Aggregates:
CREATE MATERIALIZED VIEW hourly_metrics
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', timestamp) as hour,
sensor_id,
AVG(value) as avg_value
FROM metrics
GROUP BY hour, sensor_id;
Spatial Operations¶
Point in Polygon:
SELECT * FROM locations
WHERE ST_Contains(
ST_GeomFromText('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))'),
geom
);
Distance Query:
SELECT id, name,
ST_Distance(geom, ST_MakePoint(-122.4194, 37.7749)) as distance
FROM locations
WHERE ST_DWithin(geom, ST_MakePoint(-122.4194, 37.7749), 10000) -- 10km
ORDER BY distance
LIMIT 10;
Nearest Neighbor:
Built-in Functions¶
String Functions¶
SELECT CONCAT('Hello', ' ', 'World'); -- 'Hello World'
SELECT SUBSTRING('Hello World' FROM 1 FOR 5); -- 'Hello'
SELECT UPPER('hello'); -- 'HELLO'
SELECT LOWER('WORLD'); -- 'world'
SELECT LENGTH('Hello'); -- 5
SELECT TRIM(' hello '); -- 'hello'
SELECT REPLACE('Hello World', 'World', 'Universe'); -- 'Hello Universe'
SELECT SPLIT_PART('a,b,c', ',', 2); -- 'b'
SELECT REGEXP_REPLACE('Hello123', '[0-9]', '', 'g'); -- 'Hello'
Numeric Functions¶
SELECT ABS(-5); -- 5
SELECT CEIL(4.3); -- 5
SELECT FLOOR(4.8); -- 4
SELECT ROUND(4.567, 2); -- 4.57
SELECT POWER(2, 3); -- 8
SELECT SQRT(16); -- 4
SELECT MOD(10, 3); -- 1
SELECT RANDOM(); -- Random number between 0 and 1
Date/Time Functions¶
SELECT NOW(); -- Current timestamp
SELECT CURRENT_DATE; -- Current date
SELECT CURRENT_TIME; -- Current time
SELECT EXTRACT(YEAR FROM NOW()); -- 2025
SELECT DATE_TRUNC('hour', NOW()); -- Truncate to hour
SELECT AGE('2025-01-01', '2020-01-01'); -- Interval '5 years'
SELECT NOW() + INTERVAL '1 day'; -- Tomorrow
SELECT TO_CHAR(NOW(), 'YYYY-MM-DD HH24:MI:SS'); -- Format timestamp
Aggregate Functions¶
SELECT COUNT(*) FROM users;
SELECT COUNT(DISTINCT email) FROM users;
SELECT SUM(total) FROM orders;
SELECT AVG(age) FROM users;
SELECT MIN(created_at), MAX(created_at) FROM users;
SELECT STRING_AGG(username, ', ') FROM users; -- 'alice, bob, charlie'
SELECT ARRAY_AGG(id) FROM users; -- [1, 2, 3, ...]
SELECT JSON_AGG(username) FROM users; -- ["alice", "bob", ...]
JSON Functions¶
SELECT json_build_object('name', 'John', 'age', 30);
SELECT json_agg(username) FROM users;
SELECT jsonb_pretty('{"a":1}'::jsonb);
SELECT jsonb_set('{"a":1}'::jsonb, '{b}', '2'::jsonb);
SELECT jsonb_strip_nulls('{"a":1,"b":null}'::jsonb);
Array Functions¶
SELECT ARRAY[1, 2, 3, 4, 5];
SELECT ARRAY_LENGTH(ARRAY[1, 2, 3], 1); -- 3
SELECT 2 = ANY(ARRAY[1, 2, 3]); -- true
SELECT ARRAY_AGG(id) FROM users;
SELECT UNNEST(ARRAY[1, 2, 3]); -- Expands array to rows
Conditional Functions¶
SELECT COALESCE(NULL, NULL, 'default'); -- 'default'
SELECT NULLIF(value, 0); -- Returns NULL if value is 0
SELECT GREATEST(1, 5, 3, 9, 2); -- 9
SELECT LEAST(1, 5, 3, 9, 2); -- 1
SELECT CASE
WHEN age < 18 THEN 'minor'
WHEN age < 65 THEN 'adult'
ELSE 'senior'
END as category FROM users;
Full-Text Search¶
Basic Search¶
-- Create text search vector
SELECT to_tsvector('english', 'The quick brown fox jumps over the lazy dog');
-- Search
SELECT * FROM documents
WHERE to_tsvector('english', content) @@ to_tsquery('english', 'fox & dog');
With Ranking¶
SELECT id, title,
ts_rank(to_tsvector('english', content), query) as rank
FROM documents, to_tsquery('english', 'database & search') query
WHERE to_tsvector('english', content) @@ query
ORDER BY rank DESC;
With Index¶
-- Add generated column
ALTER TABLE documents
ADD COLUMN content_tsv tsvector
GENERATED ALWAYS AS (to_tsvector('english', content)) STORED;
-- Create GIN index
CREATE INDEX idx_documents_content_tsv ON documents USING GIN(content_tsv);
-- Query
SELECT * FROM documents
WHERE content_tsv @@ to_tsquery('english', 'database & search');
Examples¶
User Registration and Login¶
-- Register user
INSERT INTO users (username, email, password_hash)
VALUES ('john', 'john@example.com', crypt('password123', gen_salt('bf')))
RETURNING id;
-- Login verification
SELECT id, username FROM users
WHERE username = 'john'
AND password_hash = crypt('password123', password_hash);
E-Commerce Order Processing¶
BEGIN;
-- Create order
INSERT INTO orders (user_id, status, total)
VALUES (123, 'pending', 0)
RETURNING id INTO @order_id;
-- Add order items
INSERT INTO order_items (order_id, product_id, quantity, price)
SELECT @order_id, product_id, quantity, price
FROM cart_items
WHERE user_id = 123;
-- Update order total
UPDATE orders
SET total = (
SELECT SUM(quantity * price)
FROM order_items
WHERE order_id = @order_id
)
WHERE id = @order_id;
-- Clear cart
DELETE FROM cart_items WHERE user_id = 123;
COMMIT;
Analytics Query¶
WITH daily_metrics AS (
SELECT
DATE_TRUNC('day', created_at) as day,
COUNT(*) as signups,
COUNT(*) FILTER (WHERE referrer IS NOT NULL) as referred_signups
FROM users
WHERE created_at >= NOW() - INTERVAL '30 days'
GROUP BY DATE_TRUNC('day', created_at)
),
retention AS (
SELECT
DATE_TRUNC('day', u.created_at) as cohort_day,
COUNT(DISTINCT CASE WHEN a.created_at >= u.created_at + INTERVAL '7 days' THEN u.id END) as week_1_retained
FROM users u
LEFT JOIN activities a ON u.id = a.user_id
GROUP BY DATE_TRUNC('day', u.created_at)
)
SELECT
dm.day,
dm.signups,
dm.referred_signups,
ROUND(100.0 * dm.referred_signups / dm.signups, 2) as referral_rate,
COALESCE(r.week_1_retained, 0) as week_1_retained
FROM daily_metrics dm
LEFT JOIN retention r ON dm.day = r.cohort_day
ORDER BY dm.day DESC;
Hybrid Search (Vector + Full-Text)¶
WITH vector_results AS (
SELECT id, title,
embedding <=> $1::vector as vector_distance
FROM documents
ORDER BY embedding <=> $1::vector
LIMIT 100
),
text_results AS (
SELECT id, title,
ts_rank(to_tsvector('english', content), query) as text_rank
FROM documents, to_tsquery('english', $2) query
WHERE to_tsvector('english', content) @@ query
ORDER BY text_rank DESC
LIMIT 100
)
SELECT DISTINCT
COALESCE(vr.id, tr.id) as id,
COALESCE(vr.title, tr.title) as title,
(0.7 * (1 - COALESCE(vr.vector_distance, 1)) +
0.3 * COALESCE(tr.text_rank, 0)) as combined_score
FROM vector_results vr
FULL OUTER JOIN text_results tr ON vr.id = tr.id
ORDER BY combined_score DESC
LIMIT 20;
Performance Tips¶
- Use indexes for frequently queried columns
- EXPLAIN ANALYZE to understand query plans
- Batch inserts instead of many single inserts
- Use CTEs for complex queries
- Partition large tables by date or range
- Materialized views for expensive aggregations
- Connection pooling for better concurrency
- Prepared statements to avoid SQL injection and improve performance