Skip to content

HCP Vault Docker Setup

Advantages:

  • Internal storage option
  • Leverage raft consensus protocol
  • All nodes have copy of data
  • Eliminates network hop
  • Supports HA
  • Only need to troubleshoot vault
  • Build in snapshots
  • Officially supported
  • In the Raft-based setup, Vault nodes communicate with each other directly to form a quorum, elect a leader, and replicate data
  • retry_join: This parameter uses the leader_api_addr values in this block to find and join an existing cluster. It will attempt to connect to the other nodes listed until it successfully joins the cluster.
  • The leader_api_addr points to the api_addr of another node in the cluster
  • Each node advertises its own addresses, and the other nodes use that information to connect to it. The retry_join block is what bootstraps this process by giving a new node a starting point to find the rest of the cluster
  • When a Vault node starts up and attempts to join a cluster, it uses another node’s api_addr as the initial point of contact. During that initial communication, the nodes exchange information about themselves, including their respective cluster_addr
  • api_addr: This is the address that the Vault server advertises to other Vault nodes in the cluster for client-like requests (e.g., forwarding requests from a follower to the leader). It is the address that other members of the cluster should use to contact this node.
  • cluster_addr: This is the address used for server-to-server communication within the cluster, specifically for things like Raft consensus and request forwarding. This is the address that the node will advertise to other nodes for cluster-related traffic. It’s the β€œbackend” communication channel
architecture-beta
group vault_cluster[Vault Cluster]
  group vaultnodeA[Vault Node A] in vault_cluster
    service storageA(disk)[Raft Storage] in vaultnodeA
    service vaultA(logos:vault)[Vault A] in vaultnodeA
  group vaultnodeB[Vault Node B] in vault_cluster
    service storageB(disk)[Raft Storage] in vaultnodeB
    service vaultB(logos:vault)[Vault B] in vaultnodeB
  group vaultnodeC[Vault Node C] in vault_cluster
    service storageC(disk)[Raft Storage] in vaultnodeC
    service vaultC(logos:vault)[Vault C] in vaultnodeC

  service vaultClient(logos:vault)[Vault Client]
  service nginx(logos:nginx)[Nginx]

vaultA:B -- T:storageA
vaultB:B -- T:storageB
vaultC:B -- T:storageC

nginx:L -- T:vaultA
nginx:B -- T:vaultB
nginx:R -- T:vaultC

%% vaultClient:L -- T:vaultA
%% vaultClient:B -- T:vaultB
%% vaultClient:R -- T:vaultC

vaultClient:B -- T:nginx

vaultA:R -- L:vaultB
vaultB:R -- L:vaultC

Project Structure

vault-cluster/
β”œβ”€β”€ docker-compose.yml
β”œβ”€β”€ nginx
β”‚Β Β  └── conf.d
└── vault-config/
β”œβ”€β”€ vault1/
β”‚ └── vault.hcl
β”œβ”€β”€ vault2/
β”‚ └── vault.hcl
└── vault3/
└── vault.hcl

Docker Volume

docker-compose.yml
services:
vault1:
image: hashicorp/vault:latest
container_name: vault1
restart: unless-stopped
# ports:
# - "8200:8200"
environment:
- VAULT_ADDR=http://0.0.0.0:8200
cap_add:
- IPC_LOCK
volumes:
- vault1-data:/vault/file
- ./vault-config/vault1:/vault/config
command: vault server -config=/vault/config/vault.hcl
networks:
- vault-net
vault2:
image: hashicorp/vault:latest
container_name: vault2
restart: unless-stopped
# ports:
# - "8201:8200"
environment:
- VAULT_ADDR=http://0.0.0.0:8200
cap_add:
- IPC_LOCK
volumes:
- vault2-data:/vault/file
- ./vault-config/vault2:/vault/config
command: vault server -config=/vault/config/vault.hcl
networks:
- vault-net
vault3:
image: hashicorp/vault:latest
container_name: vault3
restart: unless-stopped
# ports:
# - "8202:8200"
environment:
- VAULT_ADDR=http://0.0.0.0:8200
cap_add:
- IPC_LOCK
volumes:
- vault3-data:/vault/file
- ./vault-config/vault3:/vault/config
command: vault server -config=/vault/config/vault.hcl
networks:
- vault-net
vault:
image: nginx:stable-alpine
container_name: vault
restart: unless-stopped
ports:
- "80:80"
volumes:
- ./nginx/conf.d:/etc/nginx/nginx.conf:ro
- ./nginx/log:/var/log/nginx
depends_on:
- vault1
- vault2
- vault3
networks:
- vault-net
vaultclient:
image: hashicorp/vault:latest
container_name: vaultclient
restart: unless-stopped
command: sleep 100000000
networks:
- vault-net
volumes:
vault1-data:
vault2-data:
vault3-data:
networks:
vault-net:

Vault Configuration File

vault-config/vault1/vault.hcl
ui = true
api_addr = "http://vault:80"
cluster_addr = "http://vault1:8201"
disable_mlock = true
storage "raft" {
path = "/vault/file"
node_id = "vault1"
retry_join {
leader_api_addr = "http://vault2:8200"
}
retry_join {
leader_api_addr = "http://vault3:8200"
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1
}
vault-config/vault2/vault.hcl
ui = true
api_addr = "http://vault:80"
cluster_addr = "http://vault2:8201"
disable_mlock = true
storage "raft" {
path = "/vault/file"
node_id = "vault2"
retry_join {
leader_api_addr = "http://vault1:8200"
}
retry_join {
leader_api_addr = "http://vault3:8200"
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1
}
vault-config/vault3/vault.hcl
ui = true
api_addr = "http://vault:80"
cluster_addr = "http://vault3:8201"
disable_mlock = true
storage "raft" {
path = "/vault/file"
node_id = "vault3"
retry_join {
leader_api_addr = "http://vault1:8200"
}
retry_join {
leader_api_addr = "http://vault2:8200"
}
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1
}
nginx/conf.d
worker_processes 1;
events {
worker_connections 1024;
}
http {
upstream vault_backend {
server vault1:8200 max_fails=1 fail_timeout=1s;
server vault2:8200 max_fails=1 fail_timeout=1s;
server vault3:8200 max_fails=1 fail_timeout=1s;
}
server {
listen 80;
server_name vault;
location / {
proxy_pass http://vault_backend;
proxy_set_header Host vault;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_next_upstream error timeout http_502 http_503 http_504;
proxy_next_upstream_tries 3;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_buffering off;
}
}
}

Advantages:

  • Provides durable storage
  • Can independently scale backend
  • Easy to automate
  • Builtin integration for consul/vault
  • Supports HA
  • Distributed system
  • Built-in snapshots for data retention
  • Officially supported

Requirements:

  • Consul deployed using multiple nodes
  • Clusters are deployed in odd numbers
  • All is replicated among all nodes
  • Leader election promotes a single consul node as the leader
  • The leader accepts the new entries and requests and replicates among all nodes
  • Consul cluster being for vault storage backend should not be used for consul functions in a production, like (service mesh, network automation, service discovery, DNS, …)
architecture-beta
group region[Region]
  group vpc[VPC] in region
    group az1[Availability Zone 1] in vpc
      group subnet1(server)[Private Subnet] in az1
        service vault1(logos:vault)[Vault Node] in subnet1
        service consul11(logos:consul)[Consul Node 1] in subnet1
        service consul12(logos:consul)[Consul Node 2] in subnet1
    group az2[Availability Zone 2] in vpc
      group subnet2(server)[Private Subnet] in az2
        service vault2(logos:vault)[Vault Node] in subnet2
        service consul21(logos:consul)[Consul Node 1] in subnet2
        service consul22(logos:consul)[Consul Node 2] in subnet2
    group az3[Availability Zone 3] in vpc
      group subnet3(server)[Private Subnet] in az3
        service vault3(logos:vault)[Vault Node] in subnet3
        service consul31(logos:consul)[Consul Node 1] in subnet3
        service consul32(logos:consul)[Consul Node 2] in subnet3

vault1:T -- B:consul11
vault1:L -- R:consul12

vault2:T -- B:consul21
vault2:R -- L:consul22

vault3:L -- R:consul31
vault3:B -- T:consul32

vault1:R -- L:vault2
vault1:B -- T:vault3
vault2:B -- R:vault3

%% vault3:T -- B:consul11
%% vault3:L -- R:consul12
  • Here are 3 redundancy zones, which allows to have more reliable and resilient deployment.
    • If one Availability zone goes out, the other 2 keeps working well
  • Redundancy at each zone also helps in reducing data loss issues.
architecture-beta
group vault_box[Vault Nodes]
  group vault1[Vault Node A] in vault_box
    service consulA(logos:consul)[Agent A] in vault1
    service vaultA(logos:vault)[Vault A] in vault1
  group vault2[Vault Node B] in vault_box
    service consulB(logos:consul)[Agent B] in vault2
    service vaultB(logos:vault)[Vault B] in vault2
  group vault3[Vault Node C] in vault_box
    service consulC(logos:consul)[Agent C] in vault3
    service vaultC(logos:vault)[Vault C] in vault3

  junction vaultJunctionA in vault_box
  junction vaultJunctionB in vault_box
  junction vaultJunctionC in vault_box

group consul_cluster[Consul Cluster]

  service consul1(logos:consul)[Consul Node 1] in consul_cluster
  service consul2(logos:consul)[Consul Node 2] in consul_cluster
  service consul3(logos:consul)[Consul Node 3] in consul_cluster
  service consul4(logos:consul)[Consul Node 4] in consul_cluster
  service consul5(logos:consul)[Consul Node 5] in consul_cluster

  junction consulJunction1 in consul_cluster
  junction consulJunction2 in consul_cluster
  junction consulJunction3 in consul_cluster
  junction consulJunction4 in consul_cluster
  junction consulJunction5 in consul_cluster

%% vault1:L -- R:vault2


vaultA:B --> T:consulA
vaultB:B --> T:consulB
vaultC:B --> T:consulC

vaultA:R -- L:vaultB
vaultC:L -- R:vaultB

consul1:R <--> L:consul2
consul2:R <--> L:consul3
consul3:R <--> L:consul4
consul4:R <--> L:consul5

consul1:T <-- B:consulJunction1
consul2:T <-- B:consulJunction2
consul3:T <-- B:consulJunction3
consul4:T <-- B:consulJunction4
consul5:T <-- B:consulJunction5
consulJunction1:R -- L:consulJunction2
consulJunction2:R -- L:consulJunction3
consulJunction3:R -- L:consulJunction4
consulJunction4:R -- L:consulJunction5

consulA:B --> T:vaultJunctionA
consulB:B --> T:vaultJunctionB
consulC:B --> T:vaultJunctionC

vaultJunctionA:R -- L:vaultJunctionB
vaultJunctionB:R -- L:vaultJunctionC

vaultJunctionB:B --> T:consulJunction3
  • Vault keeps running consul agents in client mode, which joins the consul cluster.
  • Vault does not even know about consul cluster. Vault just communicates with the agent and the agent resolves all the requests to the cluster. This allows to maintain good connectivity to cluster as we do not need to change the vault configuration whenever a consul node comes up or go down
  • Consul clients are configured one time to handle all the connections to cluster