diff --git a/.github/workflows/build-ova.yml b/.github/workflows/build-ova.yml index 2a2cfdf..3ea8e43 100644 --- a/.github/workflows/build-ova.yml +++ b/.github/workflows/build-ova.yml @@ -53,7 +53,7 @@ jobs: - name: Setup Packer uses: hashicorp/setup-packer@v3 with: - version: "1.11.2" + version: "latest" - name: Download ISO run: | @@ -91,6 +91,30 @@ jobs: aws s3 cp output/defguard/defguard.ova "s3://defguard-downloads/ova/${FILENAME}" echo "Uploaded: s3://defguard-downloads/ova/${FILENAME}" + - name: Test deployment modes on Proxmox + if: ${{ github.event_name == 'workflow_dispatch' }} + env: + PROXMOX_HOST: ${{ secrets.PROXMOX_HOST }} + PROXMOX_SSH_KEY: ${{ secrets.PROXMOX_SSH_KEY }} + PVE_STORAGE: ${{ secrets.PVE_STORAGE }} + SNIPPET_STORAGE: ${{ secrets.SNIPPET_STORAGE }} + TEST_IP_PREFIX: ${{ secrets.TEST_IP_PREFIX }} + TEST_GW: ${{ secrets.TEST_GW }} + VMID_BASE: ${{ secrets.VMID_BASE }} + run: | + set -euo pipefail + mkdir -p ~/.ssh + echo "${PROXMOX_SSH_KEY}" > ~/.ssh/proxmox_ci + chmod 600 ~/.ssh/proxmox_ci + SSHOPT="-i $HOME/.ssh/proxmox_ci -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=$HOME/.ssh/known_hosts" + scp $SSHOPT output/defguard/defguard.ova test/test-deployment-modes.sh "ovaci@${PROXMOX_HOST}:/tmp/" + # Forward only the secrets that are set; unset ones fall back to the script's defaults. + REMOTE_ENV="STACK_TIMEOUT=1200" + for v in PVE_STORAGE SNIPPET_STORAGE TEST_IP_PREFIX TEST_GW VMID_BASE; do + [ -n "${!v}" ] && REMOTE_ENV="$REMOTE_ENV $v='${!v}'" + done + ssh $SSHOPT "ovaci@${PROXMOX_HOST}" "$REMOTE_ENV bash /tmp/test-deployment-modes.sh /tmp/defguard.ova" + - name: Publish as latest if: ${{ inputs.publish_latest }} env: diff --git a/ova/defguard.pkr.hcl b/ova/defguard.pkr.hcl index c7f3eaa..47c014c 100644 --- a/ova/defguard.pkr.hcl +++ b/ova/defguard.pkr.hcl @@ -102,6 +102,16 @@ build { destination = "/tmp/defguard-firewall.service" } + provisioner "file" { + source = "files/99-wireguard-tuning.conf" + destination = "/tmp/99-wireguard-tuning.conf" + } + + provisioner "file" { + source = "files/defguard-modules.conf" + destination = "/tmp/defguard-modules.conf" + } + provisioner "shell" { inline = [ "sudo bash /tmp/docker-setup.sh", @@ -120,6 +130,10 @@ build { "sudo mv /tmp/defguard-firewall.sh /opt/stacks/defguard/defguard-firewall.sh", "sudo chmod +x /opt/stacks/defguard/defguard-firewall.sh", "sudo mv /tmp/defguard-firewall.service /etc/systemd/system/defguard-firewall.service", + "sudo mv /tmp/99-wireguard-tuning.conf /etc/sysctl.d/99-wireguard-tuning.conf", + "sudo chown root:root /etc/sysctl.d/99-wireguard-tuning.conf", + "sudo mv /tmp/defguard-modules.conf /etc/modules-load.d/defguard.conf", + "sudo chown root:root /etc/modules-load.d/defguard.conf", "sudo systemctl daemon-reload", "sudo systemctl enable docker.service", "sudo systemctl enable defguard-init.service", diff --git a/ova/files/99-wireguard-tuning.conf b/ova/files/99-wireguard-tuning.conf new file mode 100644 index 0000000..fb8368a --- /dev/null +++ b/ova/files/99-wireguard-tuning.conf @@ -0,0 +1,25 @@ +# Defguard WireGuard gateway kernel tuning. +# Based on https://docs.defguard.net/deployment-strategies/linux-kernel-wireguard-tuning +# Sized for the default 2 vCPU / 2 GB appliance (baseline, up to ~100 active devices). +# IP forwarding is set separately in 99-defguard-forward.conf (defguard-firewall.sh). + +# BBR reduces bufferbloat and is less sensitive to loss than the default CUBIC. +net.core.default_qdisc = fq +net.ipv4.tcp_congestion_control = bbr + +# WireGuard is UDP; default socket buffers are too small for 1 Gbps+ and drop packets. +net.core.rmem_max = 16777216 +net.core.wmem_max = 16777216 +net.core.rmem_default = 262144 +net.core.wmem_default = 262144 + +# Absorb traffic bursts without dropping at the kernel input queue / accept queue. +net.core.netdev_max_backlog = 5000 +net.core.somaxconn = 8192 + +# NAPI polling budget for higher PPS (50 users and above recommendation). +net.core.netdev_budget = 600 +net.core.netdev_budget_usecs = 4000 + +# Connection tracking for egress/masquerade through the VPN. +net.netfilter.nf_conntrack_max = 131072 diff --git a/ova/files/defguard-modules.conf b/ova/files/defguard-modules.conf new file mode 100644 index 0000000..b93734b --- /dev/null +++ b/ova/files/defguard-modules.conf @@ -0,0 +1,3 @@ +# Load nf_conntrack early so net.netfilter.nf_conntrack_max applies on first boot, +# before Docker would otherwise load it. See 99-wireguard-tuning.conf. +nf_conntrack diff --git a/ova/files/docker-setup.sh b/ova/files/docker-setup.sh index 631a555..ab56151 100644 --- a/ova/files/docker-setup.sh +++ b/ova/files/docker-setup.sh @@ -3,7 +3,8 @@ set -e apt-get update apt-get full-upgrade -y -apt-get install -y ca-certificates curl +# open-vm-tools: graceful shutdown, guest IP reporting, and time sync under VMware. +apt-get install -y ca-certificates curl open-vm-tools install -m 0755 -d /etc/apt/keyrings curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc chmod a+r /etc/apt/keyrings/docker.asc diff --git a/ova/files/ubuntu.vmx b/ova/files/ubuntu.vmx index ba815c2..2d1f6ef 100644 --- a/ova/files/ubuntu.vmx +++ b/ova/files/ubuntu.vmx @@ -18,3 +18,11 @@ ide1:0.present = "TRUE" ide1:0.deviceType = "cdrom-raw" ide1:0.autodetect = "TRUE" ide1:0.startConnected = "FALSE" + +ethernet0.present = "TRUE" +ethernet0.virtualDev = "vmxnet3" +ethernet0.connectionType = "nat" +ethernet0.startConnected = "TRUE" +ethernet0.addressType = "generated" +ethernet0.networkName = "VM Network" +ethernet0.wakeOnPcktRcv = "FALSE" diff --git a/ova/test/test-deployment-modes.sh b/ova/test/test-deployment-modes.sh new file mode 100755 index 0000000..0cac79e --- /dev/null +++ b/ova/test/test-deployment-modes.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Boot the defguard OVA on Proxmox once per deployment mode and assert the right +# components come up. Meant to run ON the Proxmox host (invoked over SSH by CI). +# Usage: test-deployment-modes.sh /path/to/defguard.ova +set -euo pipefail + +OVA="${1:?usage: $0 /path/to/defguard.ova}" + +PVE_STORAGE="${PVE_STORAGE:-local-zfs}" +PVE_BRIDGE="${PVE_BRIDGE:-vmbr0}" +SNIPPET_STORAGE="${SNIPPET_STORAGE:-local}" +SNIPPET_DIR="${SNIPPET_DIR:-/var/lib/vz/snippets}" + +# The template takes VMID_BASE; the four test VMs take the next four IDs. Shift the whole +# block with VMID_BASE if that range is not free on the node. +VMID_BASE="${VMID_BASE:-9000}" +TEMPLATE_VMID="$VMID_BASE" + +# Static IPs: the image has no guest agent, so there is no DHCP lease to query; a known +# IP per VM is how we reach it. +TEST_IP_PREFIX="${TEST_IP_PREFIX:-10.2.0}" +TEST_GW="${TEST_GW:-10.2.0.1}" +TEST_CIDR="${TEST_CIDR:-24}" + +BOOT_TIMEOUT="${BOOT_TIMEOUT:-300}" +# Generous: the stack pulls images from ghcr on first boot. +STACK_TIMEOUT="${STACK_TIMEOUT:-600}" + +MODES=(full core edge gateway) +declare -A VMID=( [full]=$((VMID_BASE+1)) [core]=$((VMID_BASE+2)) [edge]=$((VMID_BASE+3)) [gateway]=$((VMID_BASE+4)) ) +declare -A IP_LAST=( [full]=150 [core]=151 [edge]=152 [gateway]=153 ) +declare -A PROFILE=( [full]="" [core]=core [edge]=edge [gateway]=gateway ) +# Ground truth from ova/files/docker-compose.standalone.yaml (full uses the all-in-one). +declare -A EXPECT=( [full]="core db edge gateway" [core]="core db" [edge]="edge" [gateway]="gateway" ) +declare -A FORBID=( [full]="" [core]="edge gateway" [edge]="core db gateway" [gateway]="core db edge" ) + +declare -A RESULT + +WORKDIR="$(mktemp -d)" +KEY="$WORKDIR/id" +PUBKEY="$WORKDIR/id.pub" + +# VMs this script creates are named with this prefix; nothing else is ever touched. +VM_PREFIX="defguard-test" + +log() { echo "[test] $*"; } + +vm_exists() { sudo qm status "$1" &>/dev/null; } +vm_name() { sudo qm config "$1" 2>/dev/null | sed -n 's/^name: //p'; } +is_ours() { [[ "$(vm_name "$1")" == "$VM_PREFIX"* ]]; } + +destroy_vm() { + local v="$1" + vm_exists "$v" || return 0 + is_ours "$v" || { log "refusing to touch VMID $v ('$(vm_name "$v")'): not created by this script"; return 0; } + sudo qm stop "$v" --skiplock &>/dev/null || true + sudo qm destroy "$v" --purge &>/dev/null || true +} + +# Abort before doing anything if one of our IDs is already a foreign VM. +guard_vmids() { + local v + for v in "$TEMPLATE_VMID" "${VMID[@]}"; do + if vm_exists "$v" && ! is_ours "$v"; then + log "ERROR: VMID $v is in use by '$(vm_name "$v")'. Set VMID_BASE to a free range." + exit 2 + fi + done +} + +cleanup() { + for m in "${MODES[@]}"; do destroy_vm "${VMID[$m]}"; done + destroy_vm "$TEMPLATE_VMID" + rm -rf "$WORKDIR" +} +trap cleanup EXIT + +vm_ssh() { + local ip="$1"; shift + ssh -i "$KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -o ConnectTimeout=5 -o BatchMode=yes "cloudtest@$ip" "$@" +} + +# A running service shows up as a "--" token in compose container names. +has_service() { grep -q -- "-$2-" <<<"$1"; } + +wait_ssh() { + local ip="$1" deadline=$(( $(date +%s) + BOOT_TIMEOUT )) + until vm_ssh "$ip" true 2>/dev/null; do + [ "$(date +%s)" -ge "$deadline" ] && return 1 + sleep 10 + done +} + +wait_services() { + local ip="$1" expected="$2" deadline=$(( $(date +%s) + STACK_TIMEOUT )) names svc ok + while :; do + # sudo: the fresh cloudtest user is not in the docker group. + names="$(vm_ssh "$ip" "sudo docker ps --format '{{.Names}}'" 2>/dev/null || true)" + ok=1 + for svc in $expected; do has_service "$names" "$svc" || ok=0; done + [ "$ok" = 1 ] && return 0 + [ "$(date +%s)" -ge "$deadline" ] && { echo "$names"; return 1; } + sleep 10 + done +} + +verify_mode() { + local mode="$1" ip="$2" profile="${PROFILE[$mode]}" names actual + + if [ -z "$profile" ]; then + vm_ssh "$ip" "test ! -e /opt/stacks/defguard/active-profiles" \ + || { log "$mode: active-profiles unexpectedly present"; return 1; } + else + actual="$(vm_ssh "$ip" "cat /opt/stacks/defguard/active-profiles 2>/dev/null" | tr -d '[:space:]')" + [ "$actual" = "$profile" ] \ + || { log "$mode: active-profiles is '$actual', expected '$profile'"; return 1; } + fi + + names="$(wait_services "$ip" "${EXPECT[$mode]}")" \ + || { log "$mode: expected services did not all start; running: $(tr '\n' ' ' <<<"$names")"; return 1; } + + local svc + for svc in ${FORBID[$mode]}; do + has_service "$names" "$svc" \ + && { log "$mode: unexpected service '$svc' is running"; return 1; } + done + return 0 +} + +import_template() { + log "importing OVA as template $TEMPLATE_VMID" + destroy_vm "$TEMPLATE_VMID" + tar -xf "$OVA" -C "$WORKDIR" + local vmdk import_out volid + vmdk="$(find "$WORKDIR" -name '*.vmdk' | head -n1)" + [ -n "$vmdk" ] || { log "no .vmdk found inside OVA"; return 1; } + + sudo qm create "$TEMPLATE_VMID" --name defguard-test-tpl --memory 2048 --cores 2 \ + --net0 "virtio,bridge=$PVE_BRIDGE" --scsihw virtio-scsi-single --ostype l26 + import_out="$(sudo qm importdisk "$TEMPLATE_VMID" "$vmdk" "$PVE_STORAGE" 2>&1)" + echo "$import_out" + volid="$(grep -oE "$PVE_STORAGE:[^ '\"]+" <<<"$import_out" | tail -n1)" + [ -n "$volid" ] || { log "could not determine imported disk volume id"; return 1; } + + sudo qm set "$TEMPLATE_VMID" --scsi0 "$volid" + sudo qm set "$TEMPLATE_VMID" --boot order=scsi0 + sudo qm set "$TEMPLATE_VMID" --ide2 "$PVE_STORAGE:cloudinit" + sudo qm template "$TEMPLATE_VMID" +} + +write_snippets() { + local m + for m in core edge gateway; do + cat > "$SNIPPET_DIR/defguard-test-$m.yaml" <