Querying Data in HeliosDB¶
Last Updated: November 1, 2025 Version: v6.0 Phase 2 M1
Comprehensive guide to querying data in HeliosDB, from basic SELECT statements to advanced AI-powered queries.
Table of Contents¶
- Basic SELECT Queries
- Filtering with WHERE
- Joins
- Aggregations
- Window Functions
- Common Table Expressions (CTEs)
- JSON/JSONB Queries
- Array Queries
- Full-Text Search
- Vector Search
- Natural Language Queries (NL2SQL)
- Time-Series Queries
- Graph Queries
- Performance Optimization
Basic SELECT Queries¶
Select All Columns¶
-- Select everything
SELECT * FROM users;
-- Select specific columns
SELECT id, username, email FROM users;
-- Column aliases
SELECT
id AS user_id,
username AS name,
email AS contact_email
FROM users;
LIMIT and OFFSET (Pagination)¶
-- First 10 rows
SELECT * FROM users LIMIT 10;
-- Skip first 10, get next 10 (page 2)
SELECT * FROM users LIMIT 10 OFFSET 10;
-- Better pagination (using keyset)
SELECT * FROM users
WHERE id > 100
ORDER BY id
LIMIT 10;
ORDER BY (Sorting)¶
-- Ascending order (default)
SELECT * FROM users ORDER BY created_at ASC;
-- Descending order
SELECT * FROM users ORDER BY created_at DESC;
-- Multiple columns
SELECT * FROM users
ORDER BY created_at DESC, username ASC;
-- NULL handling
SELECT * FROM users
ORDER BY last_login NULLS LAST;
DISTINCT (Remove Duplicates)¶
-- Unique usernames
SELECT DISTINCT username FROM users;
-- Distinct combinations
SELECT DISTINCT country, city FROM users;
Filtering with WHERE¶
Comparison Operators¶
-- Equality
SELECT * FROM users WHERE id = 123;
-- Inequality
SELECT * FROM users WHERE status != 'banned';
-- Greater than / Less than
SELECT * FROM products WHERE price > 100;
SELECT * FROM products WHERE price <= 50;
-- BETWEEN (inclusive range)
SELECT * FROM orders
WHERE order_date BETWEEN '2025-01-01' AND '2025-12-31';
-- IN (multiple values)
SELECT * FROM users
WHERE status IN ('active', 'pending', 'verified');
-- NOT IN
SELECT * FROM users
WHERE status NOT IN ('banned', 'deleted');
Pattern Matching¶
-- LIKE (case-sensitive)
SELECT * FROM users
WHERE email LIKE '%@gmail.com';
-- ILIKE (case-insensitive)
SELECT * FROM users
WHERE username ILIKE 'admin%';
-- Wildcards:
-- % = any characters
-- _ = single character
-- Examples:
WHERE name LIKE 'A%' -- Starts with A
WHERE name LIKE '%son' -- Ends with son
WHERE name LIKE '%admin%' -- Contains admin
WHERE code LIKE 'US-__-2025' -- US-XX-2025
NULL Handling¶
-- Check for NULL
SELECT * FROM users WHERE last_login IS NULL;
-- Check for NOT NULL
SELECT * FROM users WHERE last_login IS NOT NULL;
-- COALESCE (return first non-NULL)
SELECT
username,
COALESCE(display_name, username) AS name
FROM users;
-- NULLIF (return NULL if equal)
SELECT NULLIF(status, 'unknown') FROM users;
Logical Operators¶
-- AND
SELECT * FROM users
WHERE status = 'active' AND created_at > '2025-01-01';
-- OR
SELECT * FROM users
WHERE status = 'admin' OR status = 'moderator';
-- NOT
SELECT * FROM users
WHERE NOT (status = 'banned' OR status = 'deleted');
-- Complex combinations
SELECT * FROM orders
WHERE (status = 'shipped' OR status = 'delivered')
AND total_amount > 100
AND created_at > NOW() - INTERVAL '30 days';
Joins¶
INNER JOIN¶
-- Only matching rows from both tables
SELECT
u.username,
p.title,
p.created_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id;
LEFT JOIN (LEFT OUTER JOIN)¶
-- All rows from left table, matching from right
SELECT
u.username,
COUNT(p.id) AS post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username;
RIGHT JOIN¶
-- All rows from right table, matching from left
SELECT
u.username,
p.title
FROM users u
RIGHT JOIN posts p ON u.id = p.user_id;
FULL OUTER JOIN¶
-- All rows from both tables
SELECT
u.username,
p.title
FROM users u
FULL OUTER JOIN posts p ON u.id = p.user_id;
CROSS JOIN (Cartesian Product)¶
Self Join¶
-- Join table to itself (e.g., employee-manager relationship)
SELECT
e.name AS employee,
m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
Multiple Joins¶
SELECT
u.username,
p.title,
c.content AS comment,
c.created_at AS comment_date
FROM users u
INNER JOIN posts p ON u.id = p.user_id
INNER JOIN comments c ON p.id = c.post_id
WHERE u.status = 'active'
ORDER BY c.created_at DESC;
Aggregations¶
Basic Aggregate Functions¶
-- COUNT
SELECT COUNT(*) FROM users;
SELECT COUNT(DISTINCT country) FROM users;
-- SUM
SELECT SUM(amount) FROM orders;
-- AVG
SELECT AVG(price) FROM products;
-- MIN / MAX
SELECT MIN(created_at), MAX(created_at) FROM users;
-- Multiple aggregates
SELECT
COUNT(*) AS total_orders,
SUM(amount) AS total_revenue,
AVG(amount) AS avg_order_value,
MIN(amount) AS min_order,
MAX(amount) AS max_order
FROM orders;
GROUP BY¶
-- Group by single column
SELECT
country,
COUNT(*) AS user_count
FROM users
GROUP BY country
ORDER BY user_count DESC;
-- Group by multiple columns
SELECT
country,
city,
COUNT(*) AS user_count
FROM users
GROUP BY country, city;
-- With aggregates
SELECT
user_id,
COUNT(*) AS order_count,
SUM(amount) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 1000
ORDER BY total_spent DESC;
HAVING (Filter After Grouping)¶
-- Find users with more than 10 posts
SELECT
user_id,
COUNT(*) AS post_count
FROM posts
GROUP BY user_id
HAVING COUNT(*) > 10;
-- Multiple conditions
SELECT
category,
AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50 AND COUNT(*) > 10;
Window Functions¶
ROW_NUMBER (Assign Row Numbers)¶
RANK and DENSE_RANK¶
-- RANK (gaps in ranking)
SELECT
username,
score,
RANK() OVER (ORDER BY score DESC) AS rank
FROM leaderboard;
-- DENSE_RANK (no gaps)
SELECT
username,
score,
DENSE_RANK() OVER (ORDER BY score DESC) AS dense_rank
FROM leaderboard;
PARTITION BY (Group Window Functions)¶
-- Rank users within each country
SELECT
username,
country,
score,
RANK() OVER (PARTITION BY country ORDER BY score DESC) AS country_rank
FROM users;
LAG and LEAD (Access Previous/Next Rows)¶
-- Compare with previous row
SELECT
order_date,
amount,
LAG(amount) OVER (ORDER BY order_date) AS prev_amount,
amount - LAG(amount) OVER (ORDER BY order_date) AS change
FROM orders;
-- Look ahead to next row
SELECT
order_date,
amount,
LEAD(amount) OVER (ORDER BY order_date) AS next_amount
FROM orders;
Running Totals¶
Moving Average¶
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7_days
FROM orders;
Common Table Expressions (CTEs)¶
Basic CTE¶
-- Define reusable query
WITH high_value_customers AS (
SELECT
user_id,
SUM(amount) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 1000
)
SELECT
u.username,
u.email,
hvc.total_spent
FROM high_value_customers hvc
JOIN users u ON hvc.user_id = u.id
ORDER BY hvc.total_spent DESC;
Multiple CTEs¶
WITH
active_users AS (
SELECT id, username FROM users WHERE status = 'active'
),
recent_orders AS (
SELECT user_id, SUM(amount) AS total
FROM orders
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY user_id
)
SELECT
au.username,
COALESCE(ro.total, 0) AS revenue_30d
FROM active_users au
LEFT JOIN recent_orders ro ON au.id = ro.user_id
ORDER BY revenue_30d DESC;
Recursive CTEs (Hierarchical Data)¶
-- Organizational hierarchy
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 0 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees reporting to previous level
SELECT e.id, e.name, e.manager_id, ot.level + 1
FROM employees e
JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT * FROM org_tree ORDER BY level, name;
JSON/JSONB Queries¶
Extract JSON Fields¶
-- Extract as text
SELECT profile->>'name' AS name FROM users;
-- Extract as JSON
SELECT profile->'address' AS address FROM users;
-- Nested extraction
SELECT profile->'address'->>'city' AS city FROM users;
Filter on JSON Data¶
-- Exact match
SELECT * FROM users
WHERE profile->>'role' = 'admin';
-- JSON contains
SELECT * FROM users
WHERE profile @> '{"verified": true}';
-- JSON exists
SELECT * FROM users
WHERE profile ? 'premium_until';
JSON Array Operations¶
-- Array length
SELECT profile->'tags', jsonb_array_length(profile->'tags')
FROM users;
-- Expand JSON array to rows
SELECT username, jsonb_array_elements_text(profile->'tags') AS tag
FROM users;
Modify JSON¶
-- Add/update field
UPDATE users
SET profile = profile || '{"verified": true}'
WHERE id = 123;
-- Remove field
UPDATE users
SET profile = profile - 'temp_field'
WHERE id = 123;
Array Queries¶
Array Operations¶
-- Check if array contains value
SELECT * FROM posts
WHERE 'rust' = ANY(tags);
-- All values match
SELECT * FROM posts
WHERE 'published' = ALL(statuses);
-- Array overlap
SELECT * FROM posts
WHERE tags && ARRAY['rust', 'database'];
-- Array contains array
SELECT * FROM posts
WHERE tags @> ARRAY['rust', 'tutorial'];
Unnest Arrays¶
Full-Text Search¶
Basic Text Search¶
-- Create index
CREATE INDEX idx_posts_content_fts ON posts USING GIN(to_tsvector('english', content));
-- Search
SELECT title, content
FROM posts
WHERE to_tsvector('english', content) @@ to_tsquery('english', 'database & performance');
Ranking Results¶
SELECT
title,
ts_rank(to_tsvector('english', content), query) AS rank
FROM posts, to_tsquery('english', 'database & performance') query
WHERE to_tsvector('english', content) @@ query
ORDER BY rank DESC;
Vector Search¶
Dense Vector Search (HNSW)¶
-- Find similar products
SELECT
name,
embedding <=> query_embedding AS distance
FROM products
ORDER BY distance
LIMIT 10;
-- Cosine similarity
SELECT
name,
1 - (embedding <=> query_embedding) AS similarity
FROM products
ORDER BY similarity DESC
LIMIT 10;
Hybrid Search (Dense + Sparse)¶
SELECT * FROM hybrid_search(
table_name := 'products',
query_text := 'wireless headphones',
query_embedding := get_embedding('wireless headphones'),
fusion_algorithm := 'rrf',
k := 60,
limit := 10
);
See Vector Search Guide for details.
Natural Language Queries (NL2SQL)¶
Basic NL2SQL¶
-- Ask in natural language
SELECT NL2SQL('Show me top 10 customers by revenue this month');
-- Automatically translates to:
-- SELECT customer_id, SUM(amount) as revenue
-- FROM orders
-- WHERE order_date >= DATE_TRUNC('month', NOW())
-- GROUP BY customer_id
-- ORDER BY revenue DESC
-- LIMIT 10
With Context¶
-- Provide schema context
SELECT NL2SQL(
'Find users who haven''t logged in for 30 days',
schema := 'public',
tables := ARRAY['users', 'login_history']
);
See NL2SQL Guide for details.
Time-Series Queries¶
Time Bucketing¶
-- Group by 1-hour intervals
SELECT
time_bucket('1 hour', created_at) AS hour,
COUNT(*) AS events_count
FROM events
WHERE created_at > NOW() - INTERVAL '24 hours'
GROUP BY hour
ORDER BY hour;
Continuous Aggregates¶
-- Create materialized view
CREATE MATERIALIZED VIEW hourly_metrics
WITH (timeseries_aggregate = true) AS
SELECT
time_bucket('1 hour', timestamp) AS hour,
device_id,
AVG(temperature) AS avg_temp,
MAX(temperature) AS max_temp
FROM sensor_data
GROUP BY hour, device_id;
-- Query aggregate
SELECT * FROM hourly_metrics
WHERE hour > NOW() - INTERVAL '7 days';
See Time-Series Guide.
Graph Queries¶
Path Finding¶
-- Find friends of friends
SELECT name
FROM GRAPH_MATCH (
(a:users)-[:follows]->(b:users)-[:follows]->(c:users)
)
WHERE a.id = 123 AND c.id != 123
RETURN DISTINCT c.name;
Shortest Path¶
SELECT shortest_path(
start_node := 'user_123',
end_node := 'user_456',
max_depth := 6
) FROM social_graph;
See Graph Query Guide.
Performance Optimization¶
EXPLAIN ANALYZE¶
Create Indexes¶
-- B-tree index (default)
CREATE INDEX idx_users_email ON users(email);
-- Partial index
CREATE INDEX idx_active_users ON users(id) WHERE status = 'active';
-- Covering index
CREATE INDEX idx_users_email_username ON users(email, username);
-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
Query Hints¶
-- Force index usage
SELECT /*+ INDEX(users idx_users_email) */
* FROM users WHERE email = 'alice@example.com';
-- Disable sequential scan
SET enable_seqscan = OFF;
SELECT * FROM users WHERE email = 'alice@example.com';
SET enable_seqscan = ON;
Last Updated: November 1, 2025 Version: v6.0 Phase 2 M1 Maintained by: HeliosDB Documentation Team
Next: Data Types Reference →