summaryrefslogtreecommitdiffstats
path: root/lass/3modules/sync-containers3.nix
diff options
context:
space:
mode:
authortv <tv@krebsco.de>2022-11-22 19:38:36 +0100
committertv <tv@krebsco.de>2022-11-22 19:38:36 +0100
commit1c4e27473c46faf4b4a4c800cb29cdabf73f716f (patch)
treed9ec3e9b4a2e77f9e8baf2e8601a5cc9439131eb /lass/3modules/sync-containers3.nix
parent12ce60ff1435a71ee4cf0431223c129010e7df73 (diff)
parentfef385511d07c1ea5be1eae4fd8dd7eba563ab2c (diff)
Merge remote-tracking branch 'prism/master'
Diffstat (limited to 'lass/3modules/sync-containers3.nix')
-rw-r--r--lass/3modules/sync-containers3.nix313
1 files changed, 313 insertions, 0 deletions
diff --git a/lass/3modules/sync-containers3.nix b/lass/3modules/sync-containers3.nix
new file mode 100644
index 000000000..1371d5233
--- /dev/null
+++ b/lass/3modules/sync-containers3.nix
@@ -0,0 +1,313 @@
+{ config, lib, pkgs, ... }: let
+ cfg = config.lass.sync-containers3;
+ slib = pkgs.stockholm.lib;
+in {
+ options.lass.sync-containers3 = {
+ inContainer = {
+ enable = lib.mkEnableOption "container config for syncing";
+ pubkey = lib.mkOption {
+ type = lib.types.str; # TODO ssh key
+ };
+ };
+ containers = lib.mkOption {
+ default = {};
+ type = lib.types.attrsOf (lib.types.submodule ({ config, ... }: {
+ options = {
+ name = lib.mkOption {
+ type = lib.types.str;
+ default = config._module.args.name;
+ };
+ sshKey = lib.mkOption {
+ type = slib.types.absolute-pathname;
+ };
+ luksKey = lib.mkOption {
+ type = slib.types.absolute-pathname;
+ default = config.sshKey;
+ };
+ ephemeral = lib.mkOption {
+ type = lib.types.bool;
+ default = false;
+ };
+ };
+ }));
+ };
+ };
+ config = lib.mkMerge [
+ (lib.mkIf (cfg.containers != {}) {
+
+ containers = lib.mapAttrs' (n: ctr: lib.nameValuePair ctr.name {
+ config = {
+ environment.systemPackages = [
+ pkgs.dhcpcd
+ pkgs.git
+ pkgs.jq
+ ];
+ networking.useDHCP = lib.mkForce true;
+ systemd.services.autoswitch = {
+ environment = {
+ NIX_REMOTE = "daemon";
+ };
+ wantedBy = [ "multi-user.target" ];
+ serviceConfig.ExecStart = pkgs.writers.writeDash "autoswitch" ''
+ set -efu
+ ln -frs /var/state/var_src /var/src
+ if test -e /var/src/nixos-config; then
+ /run/current-system/sw/bin/nixos-rebuild -I /var/src switch || :
+ fi
+ '';
+ unitConfig.X-StopOnRemoval = false;
+ };
+ };
+ autoStart = false;
+ enableTun = true;
+ ephemeral = ctr.ephemeral;
+ privateNetwork = true;
+ hostBridge = "ctr0";
+ bindMounts = {
+ "/etc/resolv.conf".hostPath = "/etc/resolv.conf";
+ "/var/lib/self/disk" = {
+ hostPath = "/var/lib/sync-containers3/${ctr.name}/disk";
+ isReadOnly = false;
+ };
+ "/var/state" = {
+ hostPath = "/var/lib/sync-containers3/${ctr.name}/state";
+ isReadOnly = false;
+ };
+ };
+ }) cfg.containers;
+
+ systemd.services = lib.foldr lib.recursiveUpdate {} (lib.flatten (map (ctr: [
+ { "${ctr.name}_syncer" = {
+ path = with pkgs; [
+ coreutils
+ consul
+ rsync
+ openssh
+ systemd
+ ];
+ startAt = "*:0/1";
+ serviceConfig = {
+ User = "${ctr.name}_container";
+ LoadCredential = [
+ "ssh_key:${ctr.sshKey}"
+ ];
+ ExecCondition = pkgs.writers.writeDash "${ctr.name}_checker" ''
+ set -efu
+ ! systemctl is-active --quiet container@${ctr.name}.service
+ '';
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_syncer" ''
+ set -efux
+ consul lock sync_${ctr.name} ${pkgs.writers.writeDash "${ctr.name}-sync" ''
+ set -efux
+ if /run/wrappers/bin/ping -c 1 ${ctr.name}.r; then
+ touch "$HOME"/incomplete
+ rsync -a -e "ssh -i $CREDENTIALS_DIRECTORY/ssh_key" --inplace container_sync@${ctr.name}.r:disk "$HOME"/disk
+ rm "$HOME"/incomplete
+ fi
+ ''}
+ '';
+ };
+ }; }
+ { "${ctr.name}_watcher" = {
+ path = with pkgs; [
+ coreutils
+ consul
+ cryptsetup
+ curl
+ mount
+ util-linux
+ jq
+ retry
+ ];
+ serviceConfig = {
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_watcher" ''
+ set -efux
+ while sleep 5; do
+ # get the payload
+ # check if the host reacted recently
+ case $(curl -s -o /dev/null --retry 10 --retry-delay 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
+ 404)
+ echo 'got 404 from kv, should kill the container'
+ break
+ ;;
+ 500)
+ echo 'got 500 from kv, will kill container'
+ break
+ ;;
+ 200)
+ # echo 'got 200 from kv, will check payload'
+ export payload=$(consul kv get containers/${ctr.name})
+ if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
+ # echo 'we are the host, trying to reach container'
+ if $(retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null); then
+ # echo 'container is reachable, continueing'
+ continue
+ else
+ # echo 'container seems dead, killing'
+ break
+ fi
+ else
+ echo 'we are not host, killing container'
+ break
+ fi
+ ;;
+ *)
+ echo 'unknown state, continuing'
+ continue
+ ;;
+ esac
+ done
+ /run/current-system/sw/bin/nixos-container stop ${ctr.name} || :
+ umount /var/lib/sync-containers3/${ctr.name}/state || :
+ cryptsetup luksClose ${ctr.name} || :
+ '';
+ };
+ }; }
+ { "${ctr.name}_scheduler" = {
+ wantedBy = [ "multi-user.target" ];
+ path = with pkgs; [
+ coreutils
+ consul
+ cryptsetup
+ mount
+ util-linux
+ curl
+ systemd
+ jq
+ retry
+ bc
+ ];
+ serviceConfig = {
+ Restart = "always";
+ RestartSec = "30s";
+ ExecStart = pkgs.writers.writeDash "${ctr.name}_scheduler" ''
+ set -efux
+ # get the payload
+ # check if the host reacted recently
+ case $(curl -s -o /dev/null --retry 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
+ 404)
+ # echo 'got 404 from kv, will create container'
+ ;;
+ 500)
+ # echo 'got 500 from kv, retrying again'
+ exit 0
+ ;;
+ 200)
+ # echo 'got 200 from kv, will check payload'
+ export payload=$(consul kv get containers/${ctr.name})
+ if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
+ echo 'we are the host, starting container'
+ else
+ # echo 'we are not host, checking timestamp'
+ # if [ $(echo "$(date +%s) - $(jq -rn 'env.payload | fromjson.time') > 100" | bc) -eq 1 ]; then
+ if [ "$(jq -rn 'env.payload | fromjson.time | now - tonumber > 100')" = 'true' ]; then
+ echo 'last beacon is more than 100s ago, taking over'
+ else
+ # echo 'last beacon was recent. trying again'
+ exit 0
+ fi
+ fi
+ ;;
+ *)
+ echo 'unknown state, bailing out'
+ exit 0
+ ;;
+ esac
+ if test -e /var/lib/sync-containers3/${ctr.name}/incomplete; then
+ echo 'data is inconistent, start aborted'
+ exit 1
+ fi
+ consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
+ consul lock -verbose -monitor-retry=100 -timeout 30s -name container_${ctr.name} container_${ctr.name} ${pkgs.writers.writeBash "${ctr.name}-start" ''
+ set -efu
+ cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name} || :
+ mkdir -p /var/lib/sync-containers3/${ctr.name}/state
+ mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state
+ /run/current-system/sw/bin/nixos-container start ${ctr.name}
+ # wait for system to become reachable for the first time
+ retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null
+ systemctl start ${ctr.name}_watcher.service
+ while systemctl is-active container@${ctr.name}.service >/devnull && /run/wrappers/bin/ping -q -c 3 ${ctr.name}.r >/dev/null; do
+ consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
+ sleep 10
+ done
+ ''}
+ '';
+ };
+ }; }
+ ]) (lib.attrValues cfg.containers)));
+
+ systemd.timers = lib.mapAttrs' (n: ctr: lib.nameValuePair "${ctr.name}_syncer" {
+ timerConfig = {
+ RandomizedDelaySec = 100;
+ };
+ }) cfg.containers;
+
+ users.groups = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" {
+ }) cfg.containers;
+ users.users = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" ({
+ group = "container_${ctr.name}";
+ isNormalUser = true;
+ uid = slib.genid_uint31 "container_${ctr.name}";
+ home = "/var/lib/sync-containers3/${ctr.name}";
+ createHome = true;
+ homeMode = "705";
+ })) cfg.containers;
+
+ })
+ (lib.mkIf (cfg.containers != {}) {
+ # networking
+ networking.networkmanager.unmanaged = [ "ctr0" ];
+ networking.interfaces.dummy0.virtual = true;
+ networking.bridges.ctr0.interfaces = [ "dummy0" ];
+ networking.interfaces.ctr0.ipv4.addresses = [{
+ address = "10.233.0.1";
+ prefixLength = 24;
+ }];
+ systemd.services."dhcpd-ctr0" = {
+ wantedBy = [ "multi-user.target" ];
+ after = [ "network.target" ];
+ serviceConfig = {
+ Type = "forking";
+ Restart = "always";
+ DynamicUser = true;
+ StateDirectory = "dhcpd-ctr0";
+ User = "dhcpd-ctr0";
+ Group = "dhcpd-ctr0";
+ AmbientCapabilities = [
+ "CAP_NET_RAW" # to send ICMP messages
+ "CAP_NET_BIND_SERVICE" # to bind on DHCP port (67)
+ ];
+ ExecStartPre = "${pkgs.coreutils}/bin/touch /var/lib/dhcpd-ctr0/dhcpd.leases";
+ ExecStart = "${pkgs.dhcp}/bin/dhcpd -4 -lf /var/lib/dhcpd-ctr0/dhcpd.leases -cf ${pkgs.writeText "dhpd.conf" ''
+ default-lease-time 600;
+ max-lease-time 7200;
+ authoritative;
+ ddns-update-style interim;
+ log-facility local1; # see dhcpd.nix
+
+ option subnet-mask 255.255.255.0;
+ option routers 10.233.0.1;
+ # option domain-name-servers 8.8.8.8; # TODO configure dns server
+ subnet 10.233.0.0 netmask 255.255.255.0 {
+ range 10.233.0.10 10.233.0.250;
+ }
+ ''} ctr0";
+ };
+ };
+ })
+ (lib.mkIf cfg.inContainer.enable {
+ users.groups.container_sync = {};
+ users.users.container_sync = {
+ group = "container_sync";
+ uid = slib.genid_uint31 "container_sync";
+ isNormalUser = true;
+ home = "/var/lib/self";
+ createHome = true;
+ openssh.authorizedKeys.keys = [
+ cfg.inContainer.pubkey
+ ];
+ };
+ })
+ ];
+}