nix-git/hosts/yodaHedgehog/host-specific.nix

154 lines
5.7 KiB
Nix
Raw Permalink Normal View History

2023-11-20 14:09:10 +01:00
# Suspend:
# sudo systemctl suspend
# Suspend for 60 seconds:
# sudo rtcwake -m mem -s 60
2023-11-20 19:12:54 +01:00
# View service log:
# journalctl -u daily-backup-and-suspend
# Print unit file:
# cat "$(systemctl show -P FragmentPath daily-backup-and-suspend.service)"
2023-11-20 14:09:10 +01:00
2023-11-05 17:21:29 +01:00
{ config, pkgs, ... }:
2023-11-15 13:54:37 +01:00
let
backup-source = "rootNas";
# The "stay-awake" file is located at `${backup-source}:${stay-awake-file}`.
2024-01-02 11:42:22 +01:00
# Example: ssh rootNas 'touch yodaHedgehog.stay-awake'
2023-11-15 13:54:37 +01:00
stay-awake-file = "${config.networking.hostName}.stay-awake";
# How often to try to establish an SSH connection with ${backup-source}.
retries = "10";
# How many seconds to wait between failed SSH connection attempts to ${backup-source}.
wait-seconds = "15";
2023-11-15 13:54:37 +01:00
in
2023-11-05 17:21:29 +01:00
{
2023-11-15 13:54:37 +01:00
assertions = [{
assertion = config.services.openssh.enable;
message = "systemd service daily-backup-and-suspend requires SSH.";
} {
assertion = config.services.journalwatch.enable;
message = "systemd service daily-backup-and-suspend requires journalwatch.";
}];
2023-11-05 17:21:29 +01:00
2023-11-15 13:54:37 +01:00
systemd.timers."daily-backup-and-suspend" = {
2023-11-05 17:21:29 +01:00
wantedBy = [ "multi-user.target" ];
timerConfig = {
OnCalendar = [
# Daily
2024-04-11 12:05:00 +02:00
"*-*-* 12:05:00"
2023-11-05 17:21:29 +01:00
];
WakeSystem = true;
2023-11-15 13:54:37 +01:00
};
};
systemd.services."daily-backup-and-suspend" = {
2023-11-21 13:01:45 +01:00
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
2023-11-15 13:54:37 +01:00
# Packages required for this script.
# For `ssh` and `journalwatch`, there are assertions above.
path = with pkgs; [
2023-11-19 14:12:18 +01:00
# Provides `ssh`
openssh
2024-02-23 19:12:30 +01:00
# Provides `sync`, `readlink` (with support for parameter `-e`, required by `btrbk`)
2023-11-20 20:30:55 +01:00
coreutils
# Provides `awk`, `grep`, `sleep`, `printf`, `echo`, 'sendmail', `readlink` (without support for parameter `-e`)
2023-11-20 19:12:54 +01:00
busybox
# Provides `smtpctl`
opensmtpd
2023-11-20 20:30:55 +01:00
# Provides `btrbk`
btrbk
# Provides `sudo` required by `btrbk`.
# Alternatively we could configure `btrbk` to use the "btrfs-progs" instead of the "btrfs-progs-sudo" backend. But the `btrbk` NixOS module has no option for this.
sudo
2023-11-15 13:54:37 +01:00
];
# Script to execute as main process.
2023-11-05 17:21:29 +01:00
script = ''
set -eu -o pipefail
2023-11-15 13:54:37 +01:00
for i in $(seq 1 ${retries}); do
# Check if ${backup-source} is reachable via SSH.
#
# This check is useful if ${backup-source} is disconnected for a short period.
# Additionally, this is necessary because of the following issue:
# If the system resumes at 12:05, it is not directly connected to the Internet, even if "after" and "wants" are set to "network-online.target".
# TODO: How can we fix this?
# TODO: Once fixed, send notification already after first failed connection attempt (instead of fourth).
#
2023-11-22 14:32:35 +01:00
result="$(ssh ${backup-source} 'echo ${backup-source}')" && e=0 || e=$?
if [ "''${e}" = 0 ] && [ "''${result}" = ${backup-source} ]; then
# Continue if successful.
2023-11-22 14:32:35 +01:00
break
fi
# Otherwise do some error handling and try again.
2023-11-22 14:32:35 +01:00
printf '%s\n' 'Delaying backup due to SSH connectivity problems.'
# After the fourth failed connection attempt, send a notification by email.
if [ "''${i}" = "4" ]; then
printf '%s\n\n%s' 'Subject: ${config.networking.hostName}' 'Error connecting to ${backup-source}. Will retry in some seconds.' | sendmail -f langbein@mail.de daniel@systemli.org
fi
# After ${retries} failed connection attempts, send a second notification by email and give up.
if [ "''${i}" = "${retries}" ]; then
printf '%s\n\n%s' 'Subject: ${config.networking.hostName}' 'Error connecting to ${backup-source} for ${retries} times. Giving up!' | sendmail -f langbein@mail.de daniel@systemli.org
exit 1
fi
# Wait some seconds before repeating.
sleep "${wait-seconds}"s
2023-11-22 14:32:35 +01:00
done
2023-11-20 20:30:55 +01:00
# Pull BTRFS snapshots from ${backup-source}.
btrbk -c /etc/btrbk/remote-backup-ssd.conf run
btrbk -c /etc/btrbk/remote-backup-hdd.conf run
2023-11-15 13:54:37 +01:00
# Don't suspend as long as `${backup-source}:${stay-awake-file}` exists.
while :; do
2023-11-20 19:12:54 +01:00
result="$(ssh ${backup-source} 'ls ${stay-awake-file} 2>&1')" ||:
case "''${result}" in
2023-11-23 12:44:23 +01:00
*'No such file or directory')
2023-11-15 13:54:37 +01:00
break
;;
2023-11-23 12:44:23 +01:00
'${stay-awake-file}')
2023-11-15 13:54:37 +01:00
printf '%s\n' 'Delaying suspend due to ${stay-awake-file} file.'
;;
*)
printf '%s\n' 'Delaying suspend due to SSH connectivity problems.'
;;
esac
2023-11-22 14:32:35 +01:00
sleep 60s
2023-11-15 13:54:37 +01:00
done
# Wait until no BTRFS scrub service is running.
2023-11-20 19:12:54 +01:00
while :; do
running_services="$(systemctl list-units --type=service --plain --quiet | awk '{ print $1 }')"
if ! printf '%s' "''${running_services}" | grep '^btrfs-scrub'; then
break;
fi
2023-11-15 13:54:37 +01:00
printf '%s\n' 'Delaying suspend due to running BTRFS scrub service.'
sleep 60s
done
# Send filtered journal by email.
systemctl start journalwatch.service ||:
2023-11-20 19:12:54 +01:00
# Send notification by email.
printf '%s\n\n%s' 'Subject: ${config.networking.hostName}' 'Finished backup.' | sendmail -f langbein@mail.de daniel@systemli.org
# Let sendmail send emails.
#while :; do
# # TODO: Plain usage of `smtpctl` gives the error:
# # smtpctl: this program must be setgid smtpq
# queue="$(smtpctl show queue)"
# if [ "''${queue}" = "" ]; then
# break
# fi
# printf '%s\n' 'Delaying suspend due to non-empty smtpd email queue.'
# sleep 1s
#done
2023-11-15 13:54:37 +01:00
sleep 15s
2023-11-20 20:30:55 +01:00
#printf '%s\n' 'Finished backup script.'
2024-02-23 19:12:30 +01:00
# Sync changed files to disk to reduce risk of file corruption in case of power loss.
sync
2023-11-15 13:54:37 +01:00
# Suspend to save power.
2023-11-20 19:12:54 +01:00
systemctl suspend
2023-11-05 17:21:29 +01:00
'';
};
}