borg-backup: implement saving a stream, use for database backups

Add facility to borg-backup role to run a command and save the output
of it to a separate archive file during the backup process.

This is mostly useful for database backups.  Compressed on-disk logs
are terrible for differential backups because revisions have
essentially no common data.  By saving the uncompressed stream
directly from mysqldump, we allow borg the chance to de-duplicate,
saving considerable space on the backup servers.

This is implemented for our ansible-managed servers currently doing
dumps.  We also add it to the testinfra.

This also separates the archive names for the filesystem and stream
backup with unique prefixes so they can be pruned separately.
Otherwise we end up keeping only one of the stream or filesystem
backups which isn't the intention.  However, due to issues with
--append-only mode we are not issuing prune commands at this time.

Note the updated dump commands are updated slightly, particularly with
"--skip-extended-insert" which was suggested by mordred and
significantly improves incremental diff-ability by being slightly more
verbose but keeping much more of the output stable across dumps.

Change-Id: I500062c1c52c74a567621df9aaa716de804ffae7
changes/38/771738/10
Ian Wienand 2 years ago
parent 30c05ebeb1
commit 51733e5623
  1. 2
      inventory/service/host_vars/etherpad01.opendev.org.yaml
  2. 2
      inventory/service/host_vars/gitea01.opendev.org.yaml
  3. 2
      inventory/service/host_vars/review01.openstack.org.yaml
  4. 6
      playbooks/roles/borg-backup/README.rst
  5. 37
      playbooks/roles/borg-backup/templates/borg-backup.j2
  6. 16
      playbooks/roles/etherpad/tasks/main.yaml
  7. 13
      playbooks/roles/gerrit/tasks/main.yaml
  8. 13
      playbooks/roles/gitea/tasks/main.yaml
  9. 14
      playbooks/test-borg-backup.yaml
  10. 3
      zuul.d/system-config-run.yaml

@ -8,3 +8,5 @@ etherpad_redirect_vhost: etherpad.openstack.org
borg_backup_excludes_extra:
# live db; we store daily dumps
- /var/etherpad/*
# local db backups, we store stream
- /var/backups/etherpad-mariadb

@ -7,3 +7,5 @@ borg_backup_excludes_extra:
- /var/gitea/data/
# db is backed up in dumps, don't capture live files
- /var/gitea/db
# backed up by streaming backup
- /var/backups/gitea-mariadb

@ -76,3 +76,5 @@ borg_backup_excludes_extra:
- /home/gerrit2/review_site/cache/*
- /home/gerrit2/review_site/tmp/*
- /home/gerrit2/review_site/index/*
# dump directly via stream
- /home/gerrit2/mysql_backups/*

@ -15,6 +15,12 @@ correctly on the backup server. This role sets a tuple ``borg_user``
with the username and public key; the ``borg-backup-server`` role uses this
variable for each host in the ``borg-backup`` group to initalise users.
Hosts can place into ``/etc/borg-streams`` which should be a script
that outputs to stdout data to be fed into a backup archive on each
run. This will be saved to an archive with the name of the file.
This is useful for raw database dumps which allow ``borg`` to
deduplicate as much as possible.
**Role Variables**
.. zuul:rolevar:: borg_username

@ -9,6 +9,7 @@ if [ -z "$1" ]; then
fi
BORG="/opt/borg/bin/borg"
BORG_CREATE="${BORG} create --verbose --filter AME --list --stats --show-rc --compression lz4 --exclude-caches "
# Setting this, so the repo does not need to be given on the commandline:
export BORG_REPO="ssh://{{ borg_username}}@${1}/opt/backups/{{ borg_username }}/backup"
@ -24,31 +25,35 @@ export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=1
# Backup the most important directories into an archive named after
# the machine this script is currently running on:
${BORG} create \
--verbose \
--filter AME \
--list \
--stats \
--show-rc \
--compression lz4 \
--exclude-caches \
${BORG_CREATE} \
{% for item in borg_backup_excludes + borg_backup_excludes_extra -%}
--exclude '{{ item }}' \
{% endfor -%}
\
::'{hostname}-{now}' \
::'{hostname}-filesystem-{now}' \
{% for item in borg_backup_dirs + borg_backup_dirs_extra -%}
{{ item }} {{ '\\' if not loop.last }}
{% endfor -%}
backup_exit=$?
if [ ${backup_exit} -eq 0 ]; then
info "Running prune"
${BORG} prune --verbose --list --prefix '{hostname}-' \
--show-rc --keep-daily 7 --keep-weekly 4 --keep-monthly 12
backup_exit=$?
fi
for f in $(shopt -s nullglob; echo /etc/borg-streams/*)
do
stream_name=$(basename $f)
info "Backing up stream archive $stream_name"
bash $f | ${BORG_CREATE} --stdin-name ${stream_name} \
::"{hostname}-${stream_name}-{now}" -
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
info "Streaming script ${f} failed!"
stream_exit=${PIPESTATUS[0]}
elif [[ ${PIPESTATUS[1]} -ne 1 ]]; then
info "Borg failed!"
stream_exit=${PIPESTATUS[1]}
else
stream_exit=0
fi
(( backup_exit = backup_exit || stream_exit ))
done
if [ ${backup_exit} -eq 0 ]; then
info "Backup finished successfully"

@ -123,7 +123,7 @@
owner: root
group: root
- name: Set up cron job to backup the database
- name: Set up cron job for local database backup
cron:
name: etherpad-db-backup
state: present
@ -142,3 +142,17 @@
logrotate_rotate: 2
logrotate_file_name: /var/backups/etherpad-mariadb/etherpad-mariadb.sql.gz
logrotate_compress: false
- name: Setup db backup streaming job
block:
- name: Create backup streaming config dir
file:
path: /etc/borg-streams
state: directory
- name: Create db streaming file
copy:
content: >-
/usr/local/bin/docker-compose -f /etc/etherpad-docker/docker-compose.yaml exec -T mariadb
bash -c '/usr/bin/mysqldump --skip-extended-insert --databases etherpad-lite --single-transaction -uroot -p"$MYSQL_ROOT_PASSWORD"'
dest: /etc/borg-streams/mysql

@ -338,3 +338,16 @@
job: 'find /home/gerrit2/review_site/logs/*.gz -mtime +30 -exec rm -f {} \;'
minute: 1
hour: 6
- name: Setup db backup streaming job
block:
- name: Create backup streaming config dir
file:
path: /etc/borg-streams
state: directory
- name: Create db streaming file
copy:
content: >-
/usr/bin/mysqldump --defaults-file=/root/.gerrit_db.cnf --skip-extended-insert --ignore-table mysql.event --all-databases --single-transaction
dest: /etc/borg-streams/mysql

@ -186,3 +186,16 @@
vars:
logrotate_file_name: /var/backups/gitea-mariadb/gitea-mariadb.sql.gz
logrotate_compress: false
- name: Setup db backup streaming job
block:
- name: Create backup streaming config dir
file:
path: /etc/borg-streams
state: directory
- name: Create db streaming file
copy:
content: >-
/usr/local/bin/docker-compose -f /etc/gitea-docker/docker-compose.yaml exec -T mariadb
bash -c '/usr/bin/mysqldump --skip-extended-insert --databases gitea --single-transaction -uroot -p"$MYSQL_ROOT_PASSWORD"'T_PASSWORD"'
dest: /etc/borg-streams/mysql

@ -0,0 +1,14 @@
- hosts: "borg-backup"
tasks:
- name: Setup db backup streaming job
block:
- name: Create backup streaming config dir
file:
path: /etc/borg-streams
state: directory
- name: Create sample streaming file
copy:
content: >-
dd if=/dev/urandom bs=1M count=5
dest: /etc/borg-streams/random

@ -347,7 +347,10 @@
vars:
run_playbooks:
- playbooks/service-borg-backup.yaml
run_test_playbook: playbooks/test-borg-backup.yaml
files:
- playbooks/service-borg-backup.yaml
- playbooks/test-borg-bcakup.yaml
- playbooks/install-ansible.yaml
- playbooks/roles/install-borg
- playbooks/roles/borg-backup

Loading…
Cancel
Save