Skip to content

Production Deployment: Infrastructure Setup

Part of: Production Deployment Guide


2.1 Cloud Provider Setup

2.1.1 Amazon Web Services (AWS)

VPC Configuration:

# Create VPC
aws ec2 create-vpc \
  --cidr-block 10.0.0.0/16 \
  --tag-specifications 'ResourceType=vpc,Tags=[{Key=Name,Value=heliosdb-vpc}]'

# Store VPC ID
VPC_ID=$(aws ec2 describe-vpcs \
  --filters "Name=tag:Name,Values=heliosdb-vpc" \
  --query 'Vpcs[0].VpcId' --output text)

# Enable DNS hostnames
aws ec2 modify-vpc-attribute \
  --vpc-id $VPC_ID \
  --enable-dns-hostnames

# Create Internet Gateway
IGW_ID=$(aws ec2 create-internet-gateway \
  --query 'InternetGateway.InternetGatewayId' --output text)

aws ec2 attach-internet-gateway \
  --vpc-id $VPC_ID \
  --internet-gateway-id $IGW_ID

# Create subnets across 3 availability zones
for i in 1 2 3; do
  aws ec2 create-subnet \
    --vpc-id $VPC_ID \
    --cidr-block 10.0.$i.0/24 \
    --availability-zone us-east-1${ZONES[$i-1]} \
    --tag-specifications "ResourceType=subnet,Tags=[{Key=Name,Value=heliosdb-subnet-$i}]"
done

Security Groups:

# Create security group for HeliosDB cluster
SG_ID=$(aws ec2 create-security-group \
  --group-name heliosdb-cluster-sg \
  --description "HeliosDB Cluster Security Group" \
  --vpc-id $VPC_ID \
  --query 'GroupId' --output text)

# Allow PostgreSQL protocol (5432)
aws ec2 authorize-security-group-ingress \
  --group-id $SG_ID \
  --protocol tcp \
  --port 5432 \
  --cidr 10.0.0.0/16

# Allow internal cluster communication (7000-7010)
aws ec2 authorize-security-group-ingress \
  --group-id $SG_ID \
  --protocol tcp \
  --port 7000-7010 \
  --source-group $SG_ID

# Allow Raft consensus (8300)
aws ec2 authorize-security-group-ingress \
  --group-id $SG_ID \
  --protocol tcp \
  --port 8300 \
  --source-group $SG_ID

# Allow metrics (9090-9100)
aws ec2 authorize-security-group-ingress \
  --group-id $SG_ID \
  --protocol tcp \
  --port 9090-9100 \
  --cidr 10.0.0.0/16

# Allow SSH (restricted to bastion host)
aws ec2 authorize-security-group-ingress \
  --group-id $SG_ID \
  --protocol tcp \
  --port 22 \
  --source-group $BASTION_SG_ID

EKS Cluster Creation:

# Create EKS cluster
eksctl create cluster \
  --name heliosdb-prod \
  --version 1.28 \
  --region us-east-1 \
  --vpc-public-subnets subnet-xxx,subnet-yyy,subnet-zzz \
  --nodegroup-name compute-nodes \
  --node-type m5.2xlarge \
  --nodes 3 \
  --nodes-min 3 \
  --nodes-max 10 \
  --managed \
  --asg-access \
  --full-ecr-access \
  --alb-ingress-access

# Add storage node group
eksctl create nodegroup \
  --cluster heliosdb-prod \
  --name storage-nodes \
  --node-type r5.4xlarge \
  --nodes 5 \
  --nodes-min 5 \
  --nodes-max 20 \
  --node-labels role=storage \
  --node-taints dedicated=storage:NoSchedule

# Add metadata node group
eksctl create nodegroup \
  --cluster heliosdb-prod \
  --name metadata-nodes \
  --node-type m5.xlarge \
  --nodes 3 \
  --nodes-min 3 \
  --nodes-max 5 \
  --node-labels role=metadata \
  --node-taints dedicated=metadata:NoSchedule

2.1.2 Google Cloud Platform (GCP)

VPC and Network Setup:

# Create VPC
gcloud compute networks create heliosdb-vpc \
  --subnet-mode=custom \
  --bgp-routing-mode=regional

# Create subnets
gcloud compute networks subnets create heliosdb-subnet-us-east1 \
  --network=heliosdb-vpc \
  --region=us-east1 \
  --range=10.0.1.0/24

gcloud compute networks subnets create heliosdb-subnet-us-west1 \
  --network=heliosdb-vpc \
  --region=us-west1 \
  --range=10.0.2.0/24

gcloud compute networks subnets create heliosdb-subnet-us-central1 \
  --network=heliosdb-vpc \
  --region=us-central1 \
  --range=10.0.3.0/24

# Create firewall rules
gcloud compute firewall-rules create heliosdb-internal \
  --network=heliosdb-vpc \
  --allow=tcp:7000-7010,tcp:8300,tcp:9090-9100 \
  --source-ranges=10.0.0.0/16 \
  --description="HeliosDB internal cluster communication"

gcloud compute firewall-rules create heliosdb-postgres \
  --network=heliosdb-vpc \
  --allow=tcp:5432 \
  --source-ranges=10.0.0.0/16 \
  --description="PostgreSQL protocol access"

GKE Cluster Creation:

# Create GKE cluster
gcloud container clusters create heliosdb-prod \
  --region=us-east1 \
  --num-nodes=3 \
  --machine-type=n2-standard-8 \
  --disk-type=pd-ssd \
  --disk-size=100 \
  --network=heliosdb-vpc \
  --subnetwork=heliosdb-subnet-us-east1 \
  --enable-autoscaling \
  --min-nodes=3 \
  --max-nodes=10 \
  --enable-autorepair \
  --enable-autoupgrade \
  --maintenance-window-start="2025-01-01T00:00:00Z" \
  --maintenance-window-duration=4h \
  --addons=HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver

# Add storage node pool
gcloud container node-pools create storage-nodes \
  --cluster=heliosdb-prod \
  --region=us-east1 \
  --machine-type=n2-highmem-16 \
  --num-nodes=5 \
  --disk-type=pd-ssd \
  --disk-size=500 \
  --enable-autoscaling \
  --min-nodes=5 \
  --max-nodes=20 \
  --node-taints=dedicated=storage:NoSchedule \
  --node-labels=role=storage

2.1.3 Microsoft Azure

Resource Group and VNet:

# Create resource group
az group create \
  --name heliosdb-prod-rg \
  --location eastus

# Create virtual network
az network vnet create \
  --resource-group heliosdb-prod-rg \
  --name heliosdb-vnet \
  --address-prefix 10.0.0.0/16 \
  --subnet-name heliosdb-subnet-1 \
  --subnet-prefix 10.0.1.0/24

# Create additional subnets
az network vnet subnet create \
  --resource-group heliosdb-prod-rg \
  --vnet-name heliosdb-vnet \
  --name heliosdb-subnet-2 \
  --address-prefix 10.0.2.0/24

az network vnet subnet create \
  --resource-group heliosdb-prod-rg \
  --vnet-name heliosdb-vnet \
  --name heliosdb-subnet-3 \
  --address-prefix 10.0.3.0/24

# Create network security group
az network nsg create \
  --resource-group heliosdb-prod-rg \
  --name heliosdb-nsg

# Add security rules
az network nsg rule create \
  --resource-group heliosdb-prod-rg \
  --nsg-name heliosdb-nsg \
  --name AllowPostgreSQL \
  --priority 100 \
  --source-address-prefixes 10.0.0.0/16 \
  --destination-port-ranges 5432 \
  --access Allow \
  --protocol Tcp

az network nsg rule create \
  --resource-group heliosdb-prod-rg \
  --nsg-name heliosdb-nsg \
  --name AllowClusterInternal \
  --priority 110 \
  --source-address-prefixes 10.0.0.0/16 \
  --destination-port-ranges 7000-7010 8300 9090-9100 \
  --access Allow \
  --protocol Tcp

AKS Cluster Creation:

# Create AKS cluster
az aks create \
  --resource-group heliosdb-prod-rg \
  --name heliosdb-prod \
  --location eastus \
  --network-plugin azure \
  --vnet-subnet-id /subscriptions/<sub-id>/resourceGroups/heliosdb-prod-rg/providers/Microsoft.Network/virtualNetworks/heliosdb-vnet/subnets/heliosdb-subnet-1 \
  --node-count 3 \
  --node-vm-size Standard_D8s_v3 \
  --enable-cluster-autoscaler \
  --min-count 3 \
  --max-count 10 \
  --enable-addons monitoring \
  --generate-ssh-keys

# Add storage node pool
az aks nodepool add \
  --resource-group heliosdb-prod-rg \
  --cluster-name heliosdb-prod \
  --name storagenodes \
  --node-count 5 \
  --node-vm-size Standard_E16s_v3 \
  --enable-cluster-autoscaler \
  --min-count 5 \
  --max-count 20 \
  --node-taints dedicated=storage:NoSchedule \
  --labels role=storage

2.2 Network Configuration

2.2.1 DNS Setup

Internal DNS Records (using Route53 on AWS):

# Create hosted zone
ZONE_ID=$(aws route53 create-hosted-zone \
  --name heliosdb.internal \
  --vpc VPCRegion=us-east-1,VPCId=$VPC_ID \
  --caller-reference $(date +%s) \
  --query 'HostedZone.Id' --output text)

# Create A records for service discovery
cat > dns-records.json <<EOF
{
  "Changes": [
    {
      "Action": "CREATE",
      "ResourceRecordSet": {
        "Name": "cluster.heliosdb.internal",
        "Type": "A",
        "TTL": 60,
        "ResourceRecords": [
          {"Value": "10.0.1.10"},
          {"Value": "10.0.1.11"},
          {"Value": "10.0.1.12"}
        ]
      }
    },
    {
      "Action": "CREATE",
      "ResourceRecordSet": {
        "Name": "metadata.heliosdb.internal",
        "Type": "A",
        "TTL": 60,
        "ResourceRecords": [
          {"Value": "10.0.2.10"},
          {"Value": "10.0.2.11"},
          {"Value": "10.0.2.12"}
        ]
      }
    }
  ]
}
EOF

aws route53 change-resource-record-sets \
  --hosted-zone-id $ZONE_ID \
  --change-batch file://dns-records.json

2.2.2 Load Balancer Configuration

Application Load Balancer (AWS):

# Create target group
TG_ARN=$(aws elbv2 create-target-group \
  --name heliosdb-compute-tg \
  --protocol TCP \
  --port 5432 \
  --vpc-id $VPC_ID \
  --health-check-protocol TCP \
  --health-check-port 5432 \
  --health-check-interval-seconds 30 \
  --health-check-timeout-seconds 10 \
  --healthy-threshold-count 3 \
  --unhealthy-threshold-count 3 \
  --query 'TargetGroups[0].TargetGroupArn' --output text)

# Create Network Load Balancer
NLB_ARN=$(aws elbv2 create-load-balancer \
  --name heliosdb-nlb \
  --type network \
  --subnets subnet-xxx subnet-yyy subnet-zzz \
  --scheme internal \
  --query 'LoadBalancers[0].LoadBalancerArn' --output text)

# Create listener
aws elbv2 create-listener \
  --load-balancer-arn $NLB_ARN \
  --protocol TCP \
  --port 5432 \
  --default-actions Type=forward,TargetGroupArn=$TG_ARN

2.3 Storage Provisioning

2.3.1 Block Storage (AWS EBS)

Storage Classes:

# storage-class-gp3.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: heliosdb-gp3
provisioner: ebs.csi.aws.com
parameters:
  type: gp3
  iops: "10000"
  throughput: "500"
  fsType: ext4
  encrypted: "true"
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer
---
# storage-class-io2.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: heliosdb-io2
provisioner: ebs.csi.aws.com
parameters:
  type: io2
  iops: "64000"
  throughput: "1000"
  fsType: ext4
  encrypted: "true"
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer

Apply storage classes:

kubectl apply -f storage-class-gp3.yaml
kubectl apply -f storage-class-io2.yaml

2.3.2 Object Storage (S3)

S3 Bucket for Backups:

# Create S3 bucket
aws s3api create-bucket \
  --bucket heliosdb-backups-prod \
  --region us-east-1 \
  --create-bucket-configuration LocationConstraint=us-east-1

# Enable versioning
aws s3api put-bucket-versioning \
  --bucket heliosdb-backups-prod \
  --versioning-configuration Status=Enabled

# Enable encryption
aws s3api put-bucket-encryption \
  --bucket heliosdb-backups-prod \
  --server-side-encryption-configuration '{
    "Rules": [{
      "ApplyServerSideEncryptionByDefault": {
        "SSEAlgorithm": "aws:kms",
        "KMSMasterKeyID": "arn:aws:kms:us-east-1:xxx:key/xxx"
      }
    }]
  }'

# Configure lifecycle policy
cat > lifecycle-policy.json <<EOF
{
  "Rules": [
    {
      "Id": "DeleteOldBackups",
      "Status": "Enabled",
      "Filter": {"Prefix": "backups/"},
      "Transitions": [
        {
          "Days": 30,
          "StorageClass": "STANDARD_IA"
        },
        {
          "Days": 90,
          "StorageClass": "GLACIER"
        }
      ],
      "Expiration": {
        "Days": 365
      }
    }
  ]
}
EOF

aws s3api put-bucket-lifecycle-configuration \
  --bucket heliosdb-backups-prod \
  --lifecycle-configuration file://lifecycle-policy.json

2.4 Security Groups and Firewalls

2.4.1 Network Security Best Practices

Principle of Least Privilege: - Only allow necessary ports - Use security group references instead of CIDR blocks where possible - Implement egress filtering - Use separate security groups for different node types

Port Reference:

Port Protocol Purpose Access
5432 TCP PostgreSQL protocol Client access
7000 TCP Cluster gossip Internal only
7001 TCP Metadata service Internal only
7002 TCP Storage service Internal only
8300 TCP Raft consensus Internal only
9090 TCP Prometheus metrics Monitoring
9100 TCP Node exporter Monitoring
9256 TCP WASM runtime Internal only
10000 TCP GraphQL endpoint Client access