← Back to team overview

canonical-ubuntu-qa team mailing list archive

Re: [Merge] ~andersson123/autopkgtest-cloud:sqlite-db-backup into autopkgtest-cloud:master

 

Review: Needs Fixing

Added a bunch of notes but it's mostly trivial stuff (unnecessary globals). There's a few bits that warrant a bit of attention though (and/or explanation as to why they're actually correct). Looking much better though, thanks!

Diff comments:

> diff --git a/charms/focal/autopkgtest-web/webcontrol/db-backup b/charms/focal/autopkgtest-web/webcontrol/db-backup
> new file mode 100755
> index 0000000..273e153
> --- /dev/null
> +++ b/charms/focal/autopkgtest-web/webcontrol/db-backup
> @@ -0,0 +1,201 @@
> +#!/usr/bin/python3
> +"""
> +This script periodically backs up the sqlite3 db to swift storage
> +and clears up old backups
> +"""
> +
> +import atexit
> +import configparser
> +import datetime
> +import gzip
> +import logging
> +import os
> +import sqlite3
> +import sys
> +from pathlib import Path
> +
> +import swiftclient
> +from helpers.utils import init_db
> +
> +DB_PATH = ""
> +DB_COPY_LOCATION = ""
> +DB_NAME = ""
> +CONTAINER_NAME = "db-backups"
> +MAX_DAYS = 7
> +SQLITE_DUMP_CMD = (
> +    'sqlite3 /home/ubuntu/autopkgtest.db ".backup autopkgtest.db.bak"'
> +)
> +DB_BACKUP_NAME = ""
> +DB_BACKUP_PATH = ""
> +
> +
> +def db_connect() -> sqlite3.Connection:
> +    """
> +    Establish connection to sqlite3 db
> +    """
> +    global DB_PATH
> +    global DB_NAME
> +    global DB_BACKUP_NAME
> +    global DB_BACKUP_PATH
> +    cp = configparser.ConfigParser()
> +    cp.read(os.path.expanduser("~ubuntu/autopkgtest-cloud.conf"))

You may want to check the return of ConfigParser.read here. If it can't read the specified file, it won't complain (because that method's meant to be used with multiple files, some of which may not exist). If you *definitely* want to read that file you probably want ConfigParser.read_file instead.

> +    DB_PATH = Path(cp["web"]["database"])
> +    DB_NAME = DB_PATH.name
> +    DB_BACKUP_NAME = "%s.bak" % DB_NAME
> +    DB_BACKUP_PATH = Path("/tmp") / (DB_PATH.name + ".bak")
> +
> +    db_con = init_db(cp["web"]["database"])
> +
> +    return db_con
> +
> +
> +def backup_db(db_con: sqlite3.Connection):
> +    global DB_BACKUP_PATH

No need to declare this global; it's not being modified by the function

> +    db_backup_con = sqlite3.connect(DB_BACKUP_PATH)
> +    with db_backup_con:
> +        db_con.backup(db_backup_con, pages=1)
> +    db_backup_con.close()
> +
> +
> +def compress_db():
> +    """
> +    use gzip to compress database
> +    """
> +    global DB_BACKUP_PATH

Likewise, no need for global here

> +    with open(DB_BACKUP_PATH, "rb") as f_in, gzip.open(
> +        "%s.gz" % DB_BACKUP_PATH, "wb"
> +    ) as f_out:
> +        f_out.writelines(f_in)

I'm not sure writelines is the correct method here. That'll try and iterate through the "lines" of f_in, but f_in isn't a text file (it does work, but the "lines" aren't really lines, they're just blocks delimited by a coincidental, essentially random \n's in the binary data).

I think what you probably want here is shutil.copyfileobj(f_in, f_out) which'll ensure a fixed-size (minimal) buffer is used for the copy.

> +
> +
> +def init_swift_con() -> swiftclient.Connection:
> +    """
> +    Establish connection to swift storage
> +    """
> +    swift_creds = {
> +        "authurl": os.environ["OS_AUTH_URL"],
> +        "user": os.environ["OS_USERNAME"],
> +        "key": os.environ["OS_PASSWORD"],
> +        "os_options": {
> +            "region_name": os.environ["OS_REGION_NAME"],
> +            "project_domain_name": os.environ["OS_PROJECT_DOMAIN_NAME"],
> +            "project_name": os.environ["OS_PROJECT_NAME"],
> +            "user_domain_name": os.environ["OS_USER_DOMAIN_NAME"],
> +        },
> +        "auth_version": 3,
> +    }
> +    swift_conn = swiftclient.Connection(**swift_creds)
> +    return swift_conn
> +
> +
> +def create_container_if_it_doesnt_exist(swift_conn: swiftclient.Connection):
> +    """
> +    create db-backups container if it doesn't already exist
> +    """
> +    global CONTAINER_NAME

Again, no need for global here

> +    try:
> +        swift_conn.get_container(CONTAINER_NAME)
> +    except swiftclient.exceptions.ClientException:
> +        swift_conn.put_container(
> +            CONTAINER_NAME,
> +        )
> +
> +
> +def upload_backup_to_db(
> +    swift_conn: swiftclient.Connection,
> +) -> swiftclient.Connection:
> +    """
> +    Upload compressed database to swift storage under container db-backups
> +    """
> +    global DB_PATH

Same for these three; they're all used read-only in this routine

> +    global DB_BACKUP_PATH
> +    global CONTAINER_NAME
> +    now = datetime.datetime.now().strftime("%Y/%m/%d/%H_%M_%S")

Do we really want now() here and not utcnow()? If the host's local timezone uses DST there's the potential for confusion in the ordering of backups (though it rather depends how often backups are taken -- if it's only daily then this is a nitpick)

> +    object_path = "%s/%s" % (now, DB_PATH.name + ".gz")
> +    for _ in range(5):

Wow, that's ... disturbing. Swift is sufficiently unreliable that we just throw 5 retries with a re-auth on connection failure around every single swift transaction? Now I understand why the main bit down the bottom keeps re-assigning swift_conn.

Still, is this really necessary? I've no experience here, so maybe it is, but my gut instinct is "urgh!"

> +        try:
> +            swift_conn.put_object(
> +                CONTAINER_NAME,
> +                object_path,
> +                "%s.gz" % DB_BACKUP_PATH,
> +                content_type="text/plain; charset=UTF-8",
> +                headers={"Content-Encoding": "gzip"},
> +            )
> +            break
> +        except swiftclient.exceptions.ClientException as e:
> +            logging.info("exception: %s" % str(e))
> +            swift_conn = init_swift_con()
> +    return swift_conn
> +
> +
> +def delete_old_backups(
> +    swift_conn: swiftclient.Connection,
> +) -> swiftclient.Connection:
> +    """
> +    Delete objects in db-backups container that are older than 7 days
> +    """
> +    global CONTAINER_NAME
> +    global MAX_DAYS

Same again, no need for global

> +    logging.info("Removing old db backups...")
> +    _, objects = swift_conn.get_container(CONTAINER_NAME)
> +    now = datetime.datetime.now()
> +
> +    for obj in objects:
> +        last_modified = obj["last_modified"].split(".")[0]

Is this just stripping off microseconds? You could just parse them into the timestamp below with .%f in the format string?

> +        timestamp = datetime.datetime.strptime(
> +            last_modified, "%Y-%m-%dT%H:%M:%S"
> +        )
> +        diff = now - timestamp
> +        if diff > datetime.timedelta(days=MAX_DAYS):
> +            logging.info("Deleting %s" % obj["name"])
> +            for _ in range(5):

More swifty-retry shenanigans; again, if it's really necessary ... okay, but I'd rather not do this if it's not absolutely necessary

> +                try:
> +                    swift_conn.delete_object(CONTAINER_NAME, obj["name"])
> +                    break
> +                except swiftclient.exceptions.ClientException as _:
> +                    swift_conn = init_swift_con()
> +    return swift_conn
> +
> +
> +def cleanup():
> +    """
> +    Delete db and compressed db under /tmp
> +    """
> +    global DB_COPY_LOCATION

Another global you can scrub :)

> +    if os.path.isfile(DB_COPY_LOCATION):
> +        os.remove(DB_COPY_LOCATION)

TOCTOU; don't bother with isfile; just os.remove and catch OSError/FileNotFoundError (or use Path.unlink(missingok=True)).

> +    if os.path.isfile("%s.gz" % DB_COPY_LOCATION):
> +        os.remove("%s.gz" % DB_COPY_LOCATION)

Same; just remove and catch the error.

> +
> +
> +if __name__ == "__main__":
> +    # Only bother running script if this unit is the leader.
> +    logging.basicConfig(level="INFO")
> +    if not os.path.isfile("/run/autopkgtest-web-is-leader"):
> +        logging.info("unit is not leader, exiting...")
> +        sys.exit(0)
> +    # connect to db
> +    logging.info("Connecting to db")
> +    db_con = db_connect()
> +    # backup the db
> +    logging.info("Creating a backup of the db...")
> +    backup_db(db_con)
> +    # compress it
> +    logging.info("Compressing db")
> +    compress_db()
> +    # register cleanup if anything fails
> +    logging.info("Registering cleanup function")
> +    atexit.register(cleanup)
> +    # initialise swift conn
> +    logging.info("Setting up swift connection")
> +    swift_conn = init_swift_con()
> +    # create container if it doesn't exist
> +    create_container_if_it_doesnt_exist(swift_conn)
> +    # upload to swift container
> +    logging.info("Uploading db to swift!")
> +    swift_conn = upload_backup_to_db(swift_conn)
> +    # Remove old results
> +    logging.info("Pruning old database backups")
> +    swift_conn = delete_old_backups(swift_conn)
> +    # run cleanup
> +    cleanup()


-- 
https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/460043
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~andersson123/autopkgtest-cloud:sqlite-db-backup into autopkgtest-cloud:master.



References