Scott M. Mcdermott

UNIX Systems & Network Administrator
available for contract or salaried positions

snapdb.sh

#!/bin/bash

source  /usr/local/lib/sh/include

include env
include args
include lvm
include lock
include rc

require bomb
require kill_pid_wait

rcset   u runas         : "user to own the files of temporary dbms instance"
rcset   l datalv        : "source logical volume housing the PGDATA directory"
rcset   d dumpdb        : "postgresql cluster to dump, on said logical volume"
rcset   f dumpfile      : "name of dump file (renames atomically from tmpfile)"
rcset   p dumpdir       : "where to place dump (will use random tmp name)"
rcset   c commentprog   : "program to run, output inserted as comment in dump"
rcset   w pswait        : "wait this long for tmp postgres instance to come up"
rcset   b beginprog     : "program to modify SQL database on start"
rcset   e endprog       : "program to emit SQL to be appended to dumpfile"

setvar  RCFILE          ${RCFILE:-"/etc/foo/snapdbrc"}
esetvar TMPDIR          ${TMPDIR:-"/var/tmp"} # avoid wrath of tmpwatch
esetvar NOW             `date +%Y%m%d%H%M%S`

rcsetusage \
        `get_invocation_name` \
        "SQL dump an LVM-backed PostgreSQL database to a bzip2" \
        "
        Makes a bzip2ed SQL dump of a PostgreSQL database, found in a
        standard PGDATA filesystem on the given LVM volume.  Writes the
        dumpfile atomically by way of a tmp file and rename(2) (NOTE:
        this clobbers existing files).  Embeds a comment in line1 of
        dump by running the given program and capturing its output.

        A temporary LVM snapshot (using 1GB free space) is taken of the
        PGDATA volume, mounted at a temporary location, and a minimal
        DBMS process started on it for just long enough to do a dump.
        The instance will be queriable by comment program prior to dump.

        After completion, all server processes, temporary files, mounts,
        and snapshot devices are removed.  They are guaranteed not to
        collide with or allow access from other PostgreSQL instances.
        "

##############################################################################

do_sanity_checks ()
{
        (($(id -u) == 0))                       || bomb "must be root"
        getent passwd $cf_runas &>/dev/null     || bomb "bad db user"
        test -x $cf_commentprog                 || bomb "noexec commentprog"
        rccheckall                              || { warn "not all options set";
                                                     rcusage_exit; }
}

setup_safety ()
{
        set -u
        umask 027
        cd /
}

get_volume_group ()
{
        local lv=$1
        local vg

        vg=`lv_get_parent_vg $lv`               || bomb "bad source vg"

        printf $vg
}

check_devices ()
{
        local vg=$1
        local src=$2

        (($(vg_freespace_gb $vg) >= 1))         || bomb "need 1G free space"
        test -b $src                            || bomb "bad snap source dev"
}

make_configs ()
{
        local confdir=$1
        local sockdir=$2
        local datetime=$NOW

        make_pgserver_config ()
        {
                cat <<- HERE > $confdir/postgresql.conf
                listen_addresses        ''              # only listen on unix
                unix_socket_directory   '$sockdir'      # ephemeral socket dir
                log_destination         syslog          # keep logs out of band
                shared_buffers          16              # min shared resources
                max_connections         1               # nobody else allowed
                autovacuum              off             # lvremove clean enough
                client_encoding         'UTF-8'         # is this necessary?
                HERE
                (($? == 0)) || return 1
        }

        make_pgaccess_config ()
        {
                echo "local all all trust" > $confdir/pg_hba.conf ||
                        return 1
        }

        make_pgserver_config                    || bomb "conf create failed"
        make_pgaccess_config                    || bomb "access create failed"
        mkdir $sockdir                          || bomb "make sockdir failed"
}

snap_and_mount ()
{
        local src=$1
        local dst=$2
        local mnt=$3
        local name=${dst##*/}

        mkdir $mnt                              || bomb "making snap mntpoint"

        lvcreate                                \
                --quiet                         \
                --snapshot                      \
                --size 1G                       \
                --name $name                    \
                $src                            \
        &>/dev/null                             || bomb "snap create failed"

        mount $dst $mnt                         || bomb "snap mount failed"
}

setup_permissions ()
{
        chown -R $cf_runas $@                   || bomb "changing ownerships"
}

# the postgres instance we snapped may have been running at
# snap-time; we have stale pid files, etc to erase so we can
# start up our own copy on the snapshot
#
erase_runfiles ()
{
        local rundir=$1

        rm -f $rundir/postmaster.pid
        rm -f $rundir/postmaster.opts
}

spawn_restricted_pgsql ()
{
        local supid
        local pgpid

        # this expects PGDATA set up correctly to refer to our
        # lv snapshot of the DRBD volume
        #
        (su -c postgres $cf_runas </dev/null &>/dev/null) & supid=$!

        # we might have returned from the fork() before 'su'
        # managed to start its child postgres instance, so wait
        # for it to appear (really this only takes a second, but
        # we reuse the constant value below)
        #
        for ((i = 0; i < cf_pswait; i++))
        do pgpid=`pgrep -P $supid` && break; sleep 1
        done

        # this will never really have to wait this long
        ((i == cf_pswait)) &&
                bomb "user simulation never completed"

        # give it time to replay logs if it needs to; remember,
        # our snapshot was taken from a database undergoing live
        # IO, and may need some recovery, as well as this system
        # being just plain busy
        #
        # expects PGHOST -----------> set to our temp sockdir
        # expects PGDATABASE -------> back up this database
        #
        for ((i = 0; i < cf_pswait; i++))
        do
                if (echo | su -c "psql -S" $cf_runas 2>/dev/null)
                then break
                else sleep 1
                fi
        done

        # no sense continuing if it never came up
        #
        ((i == cf_pswait)) &&
                bomb "postgres never came up"

        # allow caller to kill the postgres instance we started
        #
        printf $pgpid
}

get_embeddable_comment ()
{
        local var

        # have to do this separate from the 'local' or it
        # won't capture the return value! lame...
        #
        if var="$(su -c $cf_commentprog $cf_runas)"
        then printf "$var"
        else bomb "cannot obtain embedded comment"
        fi
}

# make the dumpfile and move it into the desired filename
# atomically when done.
#
do_bzip2_sql_dump ()
{
        local comment="$1"
        local ddir=${dumppath%/*}
        local dbase=$(basename $(mktemp -u))
        local dpath=$ddir/$dbase.$cf_dumpdb.sql.bz2

        # put the comment in the first line of the dumpfile
        # with a SQL comment leader so it won't affect the
        # restore, and make sure to pass out errors anywhere
        # in the pipeline so failures are not hidden
        #
        local dumpcmd="
                (set -o pipefail &>/dev/null;
                 (/bin/echo '-- $comment' && pg_dump && $cf_endprog) |
                 bzip2;
                ) >> $dpath;
        "

        test -d $ddir                           || bomb "dump dir dne"
        test -w $ddir                           || bomb "dump dir unwritable"

        $cf_beginprog                           || bomb "start program failed"
        su -c "$dumpcmd" $cf_runas              || bomb "dump create failed"
        mv -f $dpath $dumppath                  || bomb "dumpfile move failed"
}

do_cleanup ()
{
        local snapdev=$1
        local snapmnt=$2
        local sockdir=$3

        umount $snapdev                         || bomb "unmount snap failed"
        rmdir $snapmnt                          || bomb "cannot remove snapdir"
        rm -r "$sockdir"                        || bomb "cannot remove sockdir"
        lvremove -f $snapdev &>/dev/null        || bomb "snap removal failed"
}

kill_pgsql ()
{
        local pid=$1

        kill_pid_wait $pid $cf_pswait           || bomb "postgres still lives"
}

##############################################################################

main ()
{
        rcinit $@
        serialize_exec_lock
        do_sanity_checks
        setup_safety

        # gather all the information we need to proceed
        #
        setvar dumppath         $cf_dumpdir/$cf_dumpfile
        setvar snapname         $cf_datalv.$NOW
        setvar snapmnt          `mktemp -u`
        setvar sockdir          `mktemp -u`
        setvar srcvg            `get_volume_group $cf_datalv`
        setvar srcdev           /dev/$srcvg/$cf_datalv
        setvar snapdev          /dev/$srcvg/$snapname

        # create LVM snapshot and ready it for access via DBMS
        #
        check_devices           $srcvg $srcdev
        snap_and_mount          $srcdev $snapdev $snapmnt
        make_configs            $snapmnt $sockdir
        setup_permissions       $snapmnt $sockdir
        erase_runfiles          $snapmnt

        # start a crippled DBMS instance on the snapshot
        #
        esetvar PGDATA          $snapmnt
        esetvar PGDATABASE      $cf_dumpdb
        esetvar PGHOST          $sockdir
        esetvar PGUSER          $cf_runas
        setvar  pgpid           `spawn_restricted_pgsql`

        # at this point we are certain a postgres instance
        # exists, we can connect to it, and we also have its PID
        # to enable us to kill later, so do the backup, clean up
        # after ourselves, and exit
        #
        do_bzip2_sql_dump       "`get_embeddable_comment`" # needs PG*
        kill_pgsql              $pgpid
        do_cleanup              $snapdev $snapmnt $sockdir

        serialize_exec_unlock
}

main $@