Production Deployment: Kubernetes Deployment¶
Part of: Production Deployment Guide
3.1 Cluster Creation¶
We covered cluster creation in Section 2.1. Now let's configure the cluster for HeliosDB.
Install Required Components:
# Install CSI drivers
kubectl apply -k "github.com/kubernetes-sigs/aws-ebs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-1.25"
# Install metrics server
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
# Install Prometheus Operator
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--create-namespace \
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false
3.2 Namespace Setup¶
Create Namespaces:
# namespaces.yaml
apiVersion: v1
kind: Namespace
metadata:
name: heliosdb
labels:
name: heliosdb
environment: production
---
apiVersion: v1
kind: Namespace
metadata:
name: heliosdb-system
labels:
name: heliosdb-system
environment: production
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
name: monitoring
Apply namespaces:
Resource Quotas:
# resource-quotas.yaml
apiVersion: v1
kind: ResourceQuota
metadata:
name: heliosdb-quota
namespace: heliosdb
spec:
hard:
requests.cpu: "100"
requests.memory: 500Gi
limits.cpu: "200"
limits.memory: 1000Gi
persistentvolumeclaims: "50"
requests.storage: 10Ti
Apply quotas:
3.3 StatefulSet Deployment¶
3.3.1 Metadata Nodes (Raft Consensus)¶
metadata-statefulset.yaml:
apiVersion: v1
kind: Service
metadata:
name: heliosdb-metadata
namespace: heliosdb
labels:
app: heliosdb
component: metadata
spec:
type: ClusterIP
clusterIP: None
ports:
- port: 7001
name: metadata
- port: 8300
name: raft
- port: 9090
name: metrics
selector:
app: heliosdb
component: metadata
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: heliosdb-metadata
namespace: heliosdb
spec:
serviceName: heliosdb-metadata
replicas: 3
selector:
matchLabels:
app: heliosdb
component: metadata
template:
metadata:
labels:
app: heliosdb
component: metadata
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: component
operator: In
values:
- metadata
topologyKey: kubernetes.io/hostname
tolerations:
- key: dedicated
operator: Equal
value: metadata
effect: NoSchedule
nodeSelector:
role: metadata
containers:
- name: metadata
image: heliosdb/heliosdb:6.0.0
imagePullPolicy: IfNotPresent
command:
- /usr/local/bin/heliosdb-metadata
args:
- --node-id=$(POD_NAME)
- --listen-addr=0.0.0.0:7001
- --raft-addr=0.0.0.0:8300
- --data-dir=/data/metadata
- --cluster-peers=heliosdb-metadata-0.heliosdb-metadata:8300,heliosdb-metadata-1.heliosdb-metadata:8300,heliosdb-metadata-2.heliosdb-metadata:8300
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: RUST_LOG
value: "info,heliosdb=debug"
- name: RUST_BACKTRACE
value: "1"
ports:
- containerPort: 7001
name: metadata
- containerPort: 8300
name: raft
- containerPort: 9090
name: metrics
resources:
requests:
cpu: "2"
memory: 4Gi
limits:
cpu: "4"
memory: 8Gi
volumeMounts:
- name: data
mountPath: /data
- name: config
mountPath: /etc/heliosdb
livenessProbe:
httpGet:
path: /health
port: 9090
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 9090
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: config
configMap:
name: heliosdb-config
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: heliosdb-gp3
resources:
requests:
storage: 100Gi
3.3.2 Storage Nodes¶
storage-statefulset.yaml:
apiVersion: v1
kind: Service
metadata:
name: heliosdb-storage
namespace: heliosdb
labels:
app: heliosdb
component: storage
spec:
type: ClusterIP
clusterIP: None
ports:
- port: 7002
name: storage
- port: 9090
name: metrics
selector:
app: heliosdb
component: storage
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: heliosdb-storage
namespace: heliosdb
spec:
serviceName: heliosdb-storage
replicas: 5
selector:
matchLabels:
app: heliosdb
component: storage
template:
metadata:
labels:
app: heliosdb
component: storage
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: component
operator: In
values:
- storage
topologyKey: kubernetes.io/hostname
tolerations:
- key: dedicated
operator: Equal
value: storage
effect: NoSchedule
nodeSelector:
role: storage
containers:
- name: storage
image: heliosdb/heliosdb:6.0.0
imagePullPolicy: IfNotPresent
command:
- /usr/local/bin/heliosdb-storage
args:
- --node-id=$(POD_NAME)
- --listen-addr=0.0.0.0:7002
- --data-dir=/data/storage
- --wal-dir=/wal
- --metadata-endpoints=heliosdb-metadata:7001
- --replication-factor=3
- --enable-compression=true
- --compression-algorithm=zstd
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: RUST_LOG
value: "info,heliosdb=debug"
ports:
- containerPort: 7002
name: storage
- containerPort: 9090
name: metrics
resources:
requests:
cpu: "4"
memory: 16Gi
limits:
cpu: "8"
memory: 32Gi
volumeMounts:
- name: data
mountPath: /data
- name: wal
mountPath: /wal
- name: config
mountPath: /etc/heliosdb
livenessProbe:
httpGet:
path: /health
port: 9090
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 5
readinessProbe:
httpGet:
path: /ready
port: 9090
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: config
configMap:
name: heliosdb-config
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: heliosdb-io2
resources:
requests:
storage: 500Gi
- metadata:
name: wal
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: heliosdb-io2
resources:
requests:
storage: 100Gi
3.3.3 Compute Nodes¶
compute-deployment.yaml:
apiVersion: v1
kind: Service
metadata:
name: heliosdb-compute
namespace: heliosdb
labels:
app: heliosdb
component: compute
spec:
type: LoadBalancer
ports:
- port: 5432
targetPort: 5432
name: postgres
- port: 10000
targetPort: 10000
name: graphql
- port: 9090
targetPort: 9090
name: metrics
selector:
app: heliosdb
component: compute
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: heliosdb-compute
namespace: heliosdb
spec:
replicas: 3
selector:
matchLabels:
app: heliosdb
component: compute
template:
metadata:
labels:
app: heliosdb
component: compute
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: component
operator: In
values:
- compute
topologyKey: kubernetes.io/hostname
containers:
- name: compute
image: heliosdb/heliosdb:6.0.0
imagePullPolicy: IfNotPresent
command:
- /usr/local/bin/heliosdb-compute
args:
- --listen-addr=0.0.0.0:5432
- --graphql-addr=0.0.0.0:10000
- --metadata-endpoints=heliosdb-metadata:7001
- --storage-endpoints=heliosdb-storage:7002
- --max-connections=1000
- --enable-query-cache=true
- --enable-ai-optimization=true
env:
- name: RUST_LOG
value: "info,heliosdb=debug"
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: heliosdb-secrets
key: database-url
ports:
- containerPort: 5432
name: postgres
- containerPort: 10000
name: graphql
- containerPort: 9090
name: metrics
resources:
requests:
cpu: "4"
memory: 8Gi
limits:
cpu: "8"
memory: 16Gi
volumeMounts:
- name: config
mountPath: /etc/heliosdb
- name: cache
mountPath: /cache
livenessProbe:
tcpSocket:
port: 5432
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
tcpSocket:
port: 5432
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: config
configMap:
name: heliosdb-config
- name: cache
emptyDir:
sizeLimit: 10Gi
3.4 Service Configuration¶
Service Types:
- ClusterIP (default): Internal access only
- NodePort: External access via node ports (30000-32767)
- LoadBalancer: Cloud provider load balancer
- ExternalName: DNS CNAME alias
3.5 Ingress Setup¶
ingress.yaml:
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: heliosdb-ingress
namespace: heliosdb
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/backend-protocol: "TCP"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
spec:
tls:
- hosts:
- heliosdb.example.com
- api.heliosdb.example.com
secretName: heliosdb-tls
rules:
- host: heliosdb.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: heliosdb-compute
port:
number: 5432
- host: api.heliosdb.example.com
http:
paths:
- path: /graphql
pathType: Prefix
backend:
service:
name: heliosdb-compute
port:
number: 10000
3.6 Auto-scaling Configuration¶
3.6.1 Horizontal Pod Autoscaler (HPA)¶
compute-hpa.yaml:
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: heliosdb-compute-hpa
namespace: heliosdb
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: heliosdb-compute
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
- type: Pods
pods:
metric:
name: heliosdb_query_latency_seconds
target:
type: AverageValue
averageValue: "500m" # 500ms
behavior:
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Max
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
- type: Pods
value: 1
periodSeconds: 60
selectPolicy: Min
3.6.2 Vertical Pod Autoscaler (VPA)¶
storage-vpa.yaml:
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: heliosdb-storage-vpa
namespace: heliosdb
spec:
targetRef:
apiVersion: apps/v1
kind: StatefulSet
name: heliosdb-storage
updatePolicy:
updateMode: "Auto"
resourcePolicy:
containerPolicies:
- containerName: storage
minAllowed:
cpu: "2"
memory: 8Gi
maxAllowed:
cpu: "16"
memory: 64Gi
controlledResources:
- cpu
- memory
Navigation¶
- Previous: Infrastructure Setup
- Next: Docker Deployment
- Index: Production Deployment Guide