Skip to content

Querying Data in HeliosDB

Last Updated: November 1, 2025 Version: v6.0 Phase 2 M1

Comprehensive guide to querying data in HeliosDB, from basic SELECT statements to advanced AI-powered queries.


Table of Contents

  1. Basic SELECT Queries
  2. Filtering with WHERE
  3. Joins
  4. Aggregations
  5. Window Functions
  6. Common Table Expressions (CTEs)
  7. JSON/JSONB Queries
  8. Array Queries
  9. Full-Text Search
  10. Vector Search
  11. Natural Language Queries (NL2SQL)
  12. Time-Series Queries
  13. Graph Queries
  14. Performance Optimization

Basic SELECT Queries

Select All Columns

-- Select everything
SELECT * FROM users;

-- Select specific columns
SELECT id, username, email FROM users;

-- Column aliases
SELECT
    id AS user_id,
    username AS name,
    email AS contact_email
FROM users;

LIMIT and OFFSET (Pagination)

-- First 10 rows
SELECT * FROM users LIMIT 10;

-- Skip first 10, get next 10 (page 2)
SELECT * FROM users LIMIT 10 OFFSET 10;

-- Better pagination (using keyset)
SELECT * FROM users
WHERE id > 100
ORDER BY id
LIMIT 10;

ORDER BY (Sorting)

-- Ascending order (default)
SELECT * FROM users ORDER BY created_at ASC;

-- Descending order
SELECT * FROM users ORDER BY created_at DESC;

-- Multiple columns
SELECT * FROM users
ORDER BY created_at DESC, username ASC;

-- NULL handling
SELECT * FROM users
ORDER BY last_login NULLS LAST;

DISTINCT (Remove Duplicates)

-- Unique usernames
SELECT DISTINCT username FROM users;

-- Distinct combinations
SELECT DISTINCT country, city FROM users;

Filtering with WHERE

Comparison Operators

-- Equality
SELECT * FROM users WHERE id = 123;

-- Inequality
SELECT * FROM users WHERE status != 'banned';

-- Greater than / Less than
SELECT * FROM products WHERE price > 100;
SELECT * FROM products WHERE price <= 50;

-- BETWEEN (inclusive range)
SELECT * FROM orders
WHERE order_date BETWEEN '2025-01-01' AND '2025-12-31';

-- IN (multiple values)
SELECT * FROM users
WHERE status IN ('active', 'pending', 'verified');

-- NOT IN
SELECT * FROM users
WHERE status NOT IN ('banned', 'deleted');

Pattern Matching

-- LIKE (case-sensitive)
SELECT * FROM users
WHERE email LIKE '%@gmail.com';

-- ILIKE (case-insensitive)
SELECT * FROM users
WHERE username ILIKE 'admin%';

-- Wildcards:
-- % = any characters
-- _ = single character

-- Examples:
WHERE name LIKE 'A%'       -- Starts with A
WHERE name LIKE '%son'     -- Ends with son
WHERE name LIKE '%admin%'  -- Contains admin
WHERE code LIKE 'US-__-2025' -- US-XX-2025

NULL Handling

-- Check for NULL
SELECT * FROM users WHERE last_login IS NULL;

-- Check for NOT NULL
SELECT * FROM users WHERE last_login IS NOT NULL;

-- COALESCE (return first non-NULL)
SELECT
    username,
    COALESCE(display_name, username) AS name
FROM users;

-- NULLIF (return NULL if equal)
SELECT NULLIF(status, 'unknown') FROM users;

Logical Operators

-- AND
SELECT * FROM users
WHERE status = 'active' AND created_at > '2025-01-01';

-- OR
SELECT * FROM users
WHERE status = 'admin' OR status = 'moderator';

-- NOT
SELECT * FROM users
WHERE NOT (status = 'banned' OR status = 'deleted');

-- Complex combinations
SELECT * FROM orders
WHERE (status = 'shipped' OR status = 'delivered')
  AND total_amount > 100
  AND created_at > NOW() - INTERVAL '30 days';

Joins

INNER JOIN

-- Only matching rows from both tables
SELECT
    u.username,
    p.title,
    p.created_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id;

LEFT JOIN (LEFT OUTER JOIN)

-- All rows from left table, matching from right
SELECT
    u.username,
    COUNT(p.id) AS post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username;

RIGHT JOIN

-- All rows from right table, matching from left
SELECT
    u.username,
    p.title
FROM users u
RIGHT JOIN posts p ON u.id = p.user_id;

FULL OUTER JOIN

-- All rows from both tables
SELECT
    u.username,
    p.title
FROM users u
FULL OUTER JOIN posts p ON u.id = p.user_id;

CROSS JOIN (Cartesian Product)

-- Every combination
SELECT
    u.username,
    r.role_name
FROM users u
CROSS JOIN roles r;

Self Join

-- Join table to itself (e.g., employee-manager relationship)
SELECT
    e.name AS employee,
    m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

Multiple Joins

SELECT
    u.username,
    p.title,
    c.content AS comment,
    c.created_at AS comment_date
FROM users u
INNER JOIN posts p ON u.id = p.user_id
INNER JOIN comments c ON p.id = c.post_id
WHERE u.status = 'active'
ORDER BY c.created_at DESC;

Aggregations

Basic Aggregate Functions

-- COUNT
SELECT COUNT(*) FROM users;
SELECT COUNT(DISTINCT country) FROM users;

-- SUM
SELECT SUM(amount) FROM orders;

-- AVG
SELECT AVG(price) FROM products;

-- MIN / MAX
SELECT MIN(created_at), MAX(created_at) FROM users;

-- Multiple aggregates
SELECT
    COUNT(*) AS total_orders,
    SUM(amount) AS total_revenue,
    AVG(amount) AS avg_order_value,
    MIN(amount) AS min_order,
    MAX(amount) AS max_order
FROM orders;

GROUP BY

-- Group by single column
SELECT
    country,
    COUNT(*) AS user_count
FROM users
GROUP BY country
ORDER BY user_count DESC;

-- Group by multiple columns
SELECT
    country,
    city,
    COUNT(*) AS user_count
FROM users
GROUP BY country, city;

-- With aggregates
SELECT
    user_id,
    COUNT(*) AS order_count,
    SUM(amount) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 1000
ORDER BY total_spent DESC;

HAVING (Filter After Grouping)

-- Find users with more than 10 posts
SELECT
    user_id,
    COUNT(*) AS post_count
FROM posts
GROUP BY user_id
HAVING COUNT(*) > 10;

-- Multiple conditions
SELECT
    category,
    AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50 AND COUNT(*) > 10;

Window Functions

ROW_NUMBER (Assign Row Numbers)

SELECT
    username,
    email,
    ROW_NUMBER() OVER (ORDER BY created_at) AS row_num
FROM users;

RANK and DENSE_RANK

-- RANK (gaps in ranking)
SELECT
    username,
    score,
    RANK() OVER (ORDER BY score DESC) AS rank
FROM leaderboard;

-- DENSE_RANK (no gaps)
SELECT
    username,
    score,
    DENSE_RANK() OVER (ORDER BY score DESC) AS dense_rank
FROM leaderboard;

PARTITION BY (Group Window Functions)

-- Rank users within each country
SELECT
    username,
    country,
    score,
    RANK() OVER (PARTITION BY country ORDER BY score DESC) AS country_rank
FROM users;

LAG and LEAD (Access Previous/Next Rows)

-- Compare with previous row
SELECT
    order_date,
    amount,
    LAG(amount) OVER (ORDER BY order_date) AS prev_amount,
    amount - LAG(amount) OVER (ORDER BY order_date) AS change
FROM orders;

-- Look ahead to next row
SELECT
    order_date,
    amount,
    LEAD(amount) OVER (ORDER BY order_date) AS next_amount
FROM orders;

Running Totals

SELECT
    order_date,
    amount,
    SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM orders;

Moving Average

SELECT
    order_date,
    amount,
    AVG(amount) OVER (
        ORDER BY order_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7_days
FROM orders;

Common Table Expressions (CTEs)

Basic CTE

-- Define reusable query
WITH high_value_customers AS (
    SELECT
        user_id,
        SUM(amount) AS total_spent
    FROM orders
    GROUP BY user_id
    HAVING SUM(amount) > 1000
)
SELECT
    u.username,
    u.email,
    hvc.total_spent
FROM high_value_customers hvc
JOIN users u ON hvc.user_id = u.id
ORDER BY hvc.total_spent DESC;

Multiple CTEs

WITH
active_users AS (
    SELECT id, username FROM users WHERE status = 'active'
),
recent_orders AS (
    SELECT user_id, SUM(amount) AS total
    FROM orders
    WHERE created_at > NOW() - INTERVAL '30 days'
    GROUP BY user_id
)
SELECT
    au.username,
    COALESCE(ro.total, 0) AS revenue_30d
FROM active_users au
LEFT JOIN recent_orders ro ON au.id = ro.user_id
ORDER BY revenue_30d DESC;

Recursive CTEs (Hierarchical Data)

-- Organizational hierarchy
WITH RECURSIVE org_tree AS (
    -- Base case: top-level managers
    SELECT id, name, manager_id, 0 AS level
    FROM employees
    WHERE manager_id IS NULL

    UNION ALL

    -- Recursive case: employees reporting to previous level
    SELECT e.id, e.name, e.manager_id, ot.level + 1
    FROM employees e
    JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT * FROM org_tree ORDER BY level, name;

JSON/JSONB Queries

Extract JSON Fields

-- Extract as text
SELECT profile->>'name' AS name FROM users;

-- Extract as JSON
SELECT profile->'address' AS address FROM users;

-- Nested extraction
SELECT profile->'address'->>'city' AS city FROM users;

Filter on JSON Data

-- Exact match
SELECT * FROM users
WHERE profile->>'role' = 'admin';

-- JSON contains
SELECT * FROM users
WHERE profile @> '{"verified": true}';

-- JSON exists
SELECT * FROM users
WHERE profile ? 'premium_until';

JSON Array Operations

-- Array length
SELECT profile->'tags', jsonb_array_length(profile->'tags')
FROM users;

-- Expand JSON array to rows
SELECT username, jsonb_array_elements_text(profile->'tags') AS tag
FROM users;

Modify JSON

-- Add/update field
UPDATE users
SET profile = profile || '{"verified": true}'
WHERE id = 123;

-- Remove field
UPDATE users
SET profile = profile - 'temp_field'
WHERE id = 123;

Array Queries

Array Operations

-- Check if array contains value
SELECT * FROM posts
WHERE 'rust' = ANY(tags);

-- All values match
SELECT * FROM posts
WHERE 'published' = ALL(statuses);

-- Array overlap
SELECT * FROM posts
WHERE tags && ARRAY['rust', 'database'];

-- Array contains array
SELECT * FROM posts
WHERE tags @> ARRAY['rust', 'tutorial'];

Unnest Arrays

-- Expand array to rows
SELECT
    id,
    title,
    unnest(tags) AS tag
FROM posts;

-- Create index
CREATE INDEX idx_posts_content_fts ON posts USING GIN(to_tsvector('english', content));

-- Search
SELECT title, content
FROM posts
WHERE to_tsvector('english', content) @@ to_tsquery('english', 'database & performance');

Ranking Results

SELECT
    title,
    ts_rank(to_tsvector('english', content), query) AS rank
FROM posts, to_tsquery('english', 'database & performance') query
WHERE to_tsvector('english', content) @@ query
ORDER BY rank DESC;

Dense Vector Search (HNSW)

-- Find similar products
SELECT
    name,
    embedding <=> query_embedding AS distance
FROM products
ORDER BY distance
LIMIT 10;

-- Cosine similarity
SELECT
    name,
    1 - (embedding <=> query_embedding) AS similarity
FROM products
ORDER BY similarity DESC
LIMIT 10;

Hybrid Search (Dense + Sparse)

SELECT * FROM hybrid_search(
    table_name := 'products',
    query_text := 'wireless headphones',
    query_embedding := get_embedding('wireless headphones'),
    fusion_algorithm := 'rrf',
    k := 60,
    limit := 10
);

See Vector Search Guide for details.


Natural Language Queries (NL2SQL)

Basic NL2SQL

-- Ask in natural language
SELECT NL2SQL('Show me top 10 customers by revenue this month');

-- Automatically translates to:
-- SELECT customer_id, SUM(amount) as revenue
-- FROM orders
-- WHERE order_date >= DATE_TRUNC('month', NOW())
-- GROUP BY customer_id
-- ORDER BY revenue DESC
-- LIMIT 10

With Context

-- Provide schema context
SELECT NL2SQL(
    'Find users who haven''t logged in for 30 days',
    schema := 'public',
    tables := ARRAY['users', 'login_history']
);

See NL2SQL Guide for details.


Time-Series Queries

Time Bucketing

-- Group by 1-hour intervals
SELECT
    time_bucket('1 hour', created_at) AS hour,
    COUNT(*) AS events_count
FROM events
WHERE created_at > NOW() - INTERVAL '24 hours'
GROUP BY hour
ORDER BY hour;

Continuous Aggregates

-- Create materialized view
CREATE MATERIALIZED VIEW hourly_metrics
WITH (timeseries_aggregate = true) AS
SELECT
    time_bucket('1 hour', timestamp) AS hour,
    device_id,
    AVG(temperature) AS avg_temp,
    MAX(temperature) AS max_temp
FROM sensor_data
GROUP BY hour, device_id;

-- Query aggregate
SELECT * FROM hourly_metrics
WHERE hour > NOW() - INTERVAL '7 days';

See Time-Series Guide.


Graph Queries

Path Finding

-- Find friends of friends
SELECT name
FROM GRAPH_MATCH (
    (a:users)-[:follows]->(b:users)-[:follows]->(c:users)
)
WHERE a.id = 123 AND c.id != 123
RETURN DISTINCT c.name;

Shortest Path

SELECT shortest_path(
    start_node := 'user_123',
    end_node := 'user_456',
    max_depth := 6
) FROM social_graph;

See Graph Query Guide.


Performance Optimization

EXPLAIN ANALYZE

-- See query execution plan
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'alice@example.com';

Create Indexes

-- B-tree index (default)
CREATE INDEX idx_users_email ON users(email);

-- Partial index
CREATE INDEX idx_active_users ON users(id) WHERE status = 'active';

-- Covering index
CREATE INDEX idx_users_email_username ON users(email, username);

-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

Query Hints

-- Force index usage
SELECT /*+ INDEX(users idx_users_email) */
    * FROM users WHERE email = 'alice@example.com';

-- Disable sequential scan
SET enable_seqscan = OFF;
SELECT * FROM users WHERE email = 'alice@example.com';
SET enable_seqscan = ON;

Last Updated: November 1, 2025 Version: v6.0 Phase 2 M1 Maintained by: HeliosDB Documentation Team

Next: Data Types Reference