From 364d1f35e31dfc72828ac6c7dd2d95eac75d7f4e Mon Sep 17 00:00:00 2001 From: Stephen Margheim Date: Sun, 5 May 2024 13:03:03 +0200 Subject: [PATCH 1/3] Remove the existing verification command and task --- lib/litestream/commands.rb | 40 ------ lib/tasks/litestream_tasks.rake | 62 -------- test/litestream/test_commands.rb | 216 ---------------------------- test/tasks/test_litestream_tasks.rb | 66 --------- 4 files changed, 384 deletions(-) diff --git a/lib/litestream/commands.rb b/lib/litestream/commands.rb index 62a6911..340ae6e 100644 --- a/lib/litestream/commands.rb +++ b/lib/litestream/commands.rb @@ -88,46 +88,6 @@ def restore(database, async: false, **argv) execute("restore", argv, database, async: async, tabled_output: false) end - def verify(database, async: false, **argv) - raise DatabaseRequiredException, "database argument is required for verify command, e.g. litestream:verify -- --database=path/to/database.sqlite" if database.nil? || !File.exist?(database) - argv.stringify_keys! - - dir, file = File.split(database) - ext = File.extname(file) - base = File.basename(file, ext) - now = Time.now.utc.strftime("%Y%m%d%H%M%S") - backup = File.join(dir, "#{base}-#{now}#{ext}") - args = { - "-o" => backup - }.merge(argv) - restore(database, async: false, **args) - - restored_schema = `sqlite3 #{backup} "select name, type from sqlite_schema;"`.chomp.split("\n") - restored_data = restored_schema.map { _1.split("|") }.group_by(&:last) - restored_rows_count = restored_data["table"]&.sum { |tbl, _| `sqlite3 #{backup} "select count(*) from #{tbl};"`.chomp.to_i } - - original_schema = `sqlite3 #{database} "select name, type from sqlite_schema;"`.chomp.split("\n") - original_data = original_schema.map { _1.split("|") }.group_by(&:last) - original_rows_count = original_data["table"]&.sum { |tbl, _| `sqlite3 #{database} "select count(*) from #{tbl};"`.chomp.to_i } - - Dir.glob(backup + "*").each { |file| File.delete(file) } - - { - "original" => { - "path" => database, - "tables" => original_data["table"]&.size, - "indexes" => original_data["index"]&.size, - "rows" => original_rows_count - }, - "restored" => { - "path" => backup, - "tables" => restored_data["table"]&.size, - "indexes" => restored_data["index"]&.size, - "rows" => restored_rows_count - } - } - end - def databases(async: false, **argv) execute("databases", argv, async: async, tabled_output: true) end diff --git a/lib/tasks/litestream_tasks.rake b/lib/tasks/litestream_tasks.rake index edeffe1..5edd154 100644 --- a/lib/tasks/litestream_tasks.rake +++ b/lib/tasks/litestream_tasks.rake @@ -80,66 +80,4 @@ namespace :litestream do Litestream::Commands.snapshots(database, async: true, **options) end - - desc "verify backup of SQLite database from a Litestream replica, e.g. rake litestream:verify -- -database=storage/production.sqlite3" - task verify: :environment do - options = {} - if (separator_index = ARGV.index("--")) - ARGV.slice(separator_index + 1, ARGV.length) - .map { |pair| pair.split("=") } - .each { |opt| options[opt[0]] = opt[1] || nil } - end - database = options.delete("--database") || options.delete("-database") - options.symbolize_keys! - - result = Litestream::Commands.verify(database, async: true, **options) - original_tables = result["original"]["tables"] - restored_tables = result["restored"]["tables"] - original_indexes = result["original"]["indexes"] - restored_indexes = result["restored"]["indexes"] - original_rows = result["original"]["rows"] - restored_rows = result["restored"]["rows"] - - same_number_of_tables = original_tables == restored_tables - same_number_of_indexes = original_indexes == restored_indexes - same_number_of_rows = original_rows == restored_rows - - if same_number_of_tables && same_number_of_indexes && same_number_of_rows - puts "Backup for `#{database}` verified as consistent!\n" + [ - " tables #{original_tables}", - " indexes #{original_indexes}", - " rows #{original_rows}" - ].compact.join("\n") - else - abort "Verification failed for #{database}:\n" + [ - (unless same_number_of_tables - if original_tables > restored_tables - diff = original_tables - restored_tables - " Backup is missing #{diff} table#{"s" if diff > 1}" - else - diff = restored_tables - original_tables - " Backup has extra #{diff} table#{"s" if diff > 1}" - end - end), - (unless same_number_of_indexes - if original_indexes > restored_indexes - diff = original_indexes - restored_indexes - " Backup is missing #{diff} index#{"es" if diff > 1}" - else - diff = restored_indexes - original_indexes - " Backup has extra #{diff} index#{"es" if diff > 1}" - end - end), - (unless same_number_of_rows - if original_rows > restored_rows - diff = original_rows - restored_rows - " Backup is missing #{diff} row#{"s" if diff > 1}" - else - diff = restored_rows - original_rows - " Backup has extra #{diff} row#{"s" if diff > 1}" - end - end) - ].compact.join("\n") - end - end end diff --git a/test/litestream/test_commands.rb b/test/litestream/test_commands.rb index ceb0ff0..0bbeb2d 100644 --- a/test/litestream/test_commands.rb +++ b/test/litestream/test_commands.rb @@ -740,220 +740,4 @@ def test_snapshots_does_not_set_env_var_from_config_when_env_vars_already_set assert_equal "original_access", ENV["LITESTREAM_SECRET_ACCESS_KEY"] end end - - class TestverifyCommand < TestCommands - def test_verify_with_no_database - assert_raises ArgumentError do - Litestream::Commands.verify - end - end - - def test_verify_with_non_existent_database - assert_raises Litestream::Commands::DatabaseRequiredException do - Litestream::Commands.verify("db/non_existent.sqlite3") - end - end - - def test_verify_with_restore_not_succeeding - stub = proc do |cmd, async| - executable, command, *argv = cmd - assert_match Regexp.new("exe/test/litestream"), executable - assert_equal "restore", command - assert_equal 5, argv.size - assert_equal "--config", argv[0] - assert_match Regexp.new("dummy/config/litestream.yml"), argv[1] - assert_equal "-o", argv[2] - assert_match Regexp.new('db/test-\d{14}.sqlite3'), argv[3] - assert_equal "test/dummy/db/test.sqlite3", argv[4] - - [{"level" => "ERROR", "error" => "cannot restore"}] - end - Litestream::Commands.stub :run, stub do - assert_raises Litestream::Commands::CommandFailedException do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - end - end - - def test_verify_with_restore_succeeding - stub = proc do |cmd, async| - executable, command, *argv = cmd - assert_match Regexp.new("exe/test/litestream"), executable - assert_equal "restore", command - assert_equal 5, argv.size - assert_equal "--config", argv[0] - assert_match Regexp.new("dummy/config/litestream.yml"), argv[1] - assert_equal "-o", argv[2] - assert_match Regexp.new('db/test-\d{14}.sqlite3'), argv[3] - assert_equal "test/dummy/db/test.sqlite3", argv[4] - end - result = nil - Litestream::Commands.stub :run, stub do - result = Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_equal 2, result["original"]["tables"] - assert_nil result["restored"]["tables"] - assert_equal 0, result["original"]["rows"] - assert_nil result["restored"]["rows"] - assert_equal 2, result["original"]["indexes"] - assert_nil result["restored"]["indexes"] - end - - def test_verify_with_boolean_option - stub = proc do |cmd, async| - executable, command, *argv = cmd - assert_match Regexp.new("exe/test/litestream"), executable - assert_equal "restore", command - assert_equal 6, argv.size - assert_equal "--config", argv[0] - assert_match Regexp.new("dummy/config/litestream.yml"), argv[1] - assert_equal "-o", argv[2] - assert_match Regexp.new('db/test-\d{14}.sqlite3'), argv[3] - assert_equal "--if-db-not-exists", argv[4] - assert_equal "test/dummy/db/test.sqlite3", argv[5] - end - result = nil - Litestream::Commands.stub :run, stub do - result = Litestream::Commands.verify("test/dummy/db/test.sqlite3", "--if-db-not-exists" => nil) - end - - assert_equal 2, result["original"]["tables"] - assert_nil result["restored"]["tables"] - assert_equal 0, result["original"]["rows"] - assert_nil result["restored"]["rows"] - assert_equal 2, result["original"]["indexes"] - assert_nil result["restored"]["indexes"] - end - - def test_verify_with_string_option - stub = proc do |cmd, async| - executable, command, *argv = cmd - assert_match Regexp.new("exe/test/litestream"), executable - assert_equal "restore", command - assert_equal 7, argv.size - assert_equal "--config", argv[0] - assert_match Regexp.new("dummy/config/litestream.yml"), argv[1] - assert_equal "-o", argv[2] - assert_match Regexp.new('db/test-\d{14}.sqlite3'), argv[3] - assert_equal "--parallelism", argv[4] - assert_equal 10, argv[5] - assert_equal "test/dummy/db/test.sqlite3", argv[6] - end - result = nil - Litestream::Commands.stub :run, stub do - result = Litestream::Commands.verify("test/dummy/db/test.sqlite3", "--parallelism" => 10) - end - - assert_equal 2, result["original"]["tables"] - assert_nil result["restored"]["tables"] - assert_equal 0, result["original"]["rows"] - assert_nil result["restored"]["rows"] - assert_equal 2, result["original"]["indexes"] - assert_nil result["restored"]["indexes"] - end - - def test_verify_with_config_option - stub = proc do |cmd, async| - executable, command, *argv = cmd - assert_match Regexp.new("exe/test/litestream"), executable - assert_equal "restore", command - assert_equal 5, argv.size - assert_equal "--config", argv[0] - assert_equal "CONFIG", argv[1] - assert_equal "-o", argv[2] - assert_match Regexp.new('db/test-\d{14}.sqlite3'), argv[3] - assert_equal "test/dummy/db/test.sqlite3", argv[4] - end - result = nil - Litestream::Commands.stub :run, stub do - result = Litestream::Commands.verify("test/dummy/db/test.sqlite3", "--config" => "CONFIG") - end - - assert_equal 2, result["original"]["tables"] - assert_nil result["restored"]["tables"] - assert_equal 0, result["original"]["rows"] - assert_nil result["restored"]["rows"] - assert_equal 2, result["original"]["indexes"] - assert_nil result["restored"]["indexes"] - end - - def test_verify_sets_replica_bucket_env_var_from_config_when_env_var_not_set - Litestream.configure do |config| - config.replica_bucket = "mybkt" - end - - Litestream::Commands.stub :run, "" do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_equal "mybkt", ENV["LITESTREAM_REPLICA_BUCKET"] - assert_nil ENV["LITESTREAM_ACCESS_KEY_ID"] - assert_nil ENV["LITESTREAM_SECRET_ACCESS_KEY"] - end - - def test_verify_sets_replica_key_id_env_var_from_config_when_env_var_not_set - Litestream.configure do |config| - config.replica_key_id = "mykey" - end - - Litestream::Commands.stub :run, "" do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_nil ENV["LITESTREAM_REPLICA_BUCKET"] - assert_equal "mykey", ENV["LITESTREAM_ACCESS_KEY_ID"] - assert_nil ENV["LITESTREAM_SECRET_ACCESS_KEY"] - end - - def test_verify_sets_replica_access_key_env_var_from_config_when_env_var_not_set - Litestream.configure do |config| - config.replica_access_key = "access" - end - - Litestream::Commands.stub :run, "" do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_nil ENV["LITESTREAM_REPLICA_BUCKET"] - assert_nil ENV["LITESTREAM_ACCESS_KEY_ID"] - assert_equal "access", ENV["LITESTREAM_SECRET_ACCESS_KEY"] - end - - def test_verify_sets_all_env_vars_from_config_when_env_vars_not_set - Litestream.configure do |config| - config.replica_bucket = "mybkt" - config.replica_key_id = "mykey" - config.replica_access_key = "access" - end - - Litestream::Commands.stub :run, "" do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_equal "mybkt", ENV["LITESTREAM_REPLICA_BUCKET"] - assert_equal "mykey", ENV["LITESTREAM_ACCESS_KEY_ID"] - assert_equal "access", ENV["LITESTREAM_SECRET_ACCESS_KEY"] - end - - def test_verify_does_not_set_env_var_from_config_when_env_vars_already_set - ENV["LITESTREAM_REPLICA_BUCKET"] = "original_bkt" - ENV["LITESTREAM_ACCESS_KEY_ID"] = "original_key" - ENV["LITESTREAM_SECRET_ACCESS_KEY"] = "original_access" - - Litestream.configure do |config| - config.replica_bucket = "mybkt" - config.replica_key_id = "mykey" - config.replica_access_key = "access" - end - - Litestream::Commands.stub :run, "" do - Litestream::Commands.verify("test/dummy/db/test.sqlite3") - end - - assert_equal "original_bkt", ENV["LITESTREAM_REPLICA_BUCKET"] - assert_equal "original_key", ENV["LITESTREAM_ACCESS_KEY_ID"] - assert_equal "original_access", ENV["LITESTREAM_SECRET_ACCESS_KEY"] - end - end end diff --git a/test/tasks/test_litestream_tasks.rb b/test/tasks/test_litestream_tasks.rb index 02af4ee..574112b 100644 --- a/test/tasks/test_litestream_tasks.rb +++ b/test/tasks/test_litestream_tasks.rb @@ -11,7 +11,6 @@ def setup Rake::Task["litestream:databases"].reenable Rake::Task["litestream:generations"].reenable Rake::Task["litestream:snapshots"].reenable - Rake::Task["litestream:verify"].reenable end def teardown @@ -217,69 +216,4 @@ def test_snapshots_task_with_arguments_without_separator fake.verify end end - - class TestVerifyTask < TestLitestreamTasks - def test_verify_task_with_only_database_using_single_dash_failing - ARGV.replace ["--", "-database=db/test.sqlite3"] - fake = Minitest::Mock.new - fake.expect :call, - {"original" => {"tables" => 2, "indexes" => 4, "rows" => 6}, "restored" => {"tables" => 1, "indexes" => 2, "rows" => 3}}, - ["db/test.sqlite3"], - async: true - - Litestream::Commands.stub :verify, fake do - error = assert_raises SystemExit do - capture_io { Rake.application.invoke_task "litestream:verify" } - end - assert_match("Verification failed for db/test.sqlite3", error.message) - assert_match("Backup is missing 1 table", error.message) - assert_match("Backup is missing 2 indexes", error.message) - assert_match("Backup is missing 3 rows", error.message) - end - - fake.verify - end - - def test_verify_task_with_only_database_using_double_dash_failing - ARGV.replace ["--", "--database=db/test.sqlite3"] - fake = Minitest::Mock.new - fake.expect :call, - {"original" => {"tables" => 1, "indexes" => 2, "rows" => 3}, "restored" => {"tables" => 2, "indexes" => 4, "rows" => 6}}, - ["db/test.sqlite3"], - async: true - - Litestream::Commands.stub :verify, fake do - error = assert_raises SystemExit do - capture_io { Rake.application.invoke_task "litestream:verify" } - end - assert_match("Verification failed for db/test.sqlite3", error.message) - assert_match("Backup has extra 1 table", error.message) - assert_match("Backup has extra 2 indexes", error.message) - assert_match("Backup has extra 3 rows", error.message) - end - - fake.verify - end - - def test_verify_task_with_arguments_succeeding - ARGV.replace ["--", "-database=db/test.sqlite3", "--if-db-not-exists"] - fake = Minitest::Mock.new - out = nil - fake.expect :call, - {"original" => {"tables" => 2, "indexes" => 2, "rows" => 2}, "restored" => {"tables" => 2, "indexes" => 2, "rows" => 2}}, - ["db/test.sqlite3"], - async: true, - "--if-db-not-exists": nil - - Litestream::Commands.stub :verify, fake do - out, _err = capture_io { Rake.application.invoke_task "litestream:verify" } - assert_match("Backup for `db/test.sqlite3` verified as consistent!", out) - assert_match("tables 2", out) - assert_match("indexes 2", out) - assert_match("rows 2", out) - end - - fake.verify - end - end end From 8fb9663e941876d0807629ca7adf65cd2d43d430 Mon Sep 17 00:00:00 2001 From: Stephen Margheim Date: Sun, 5 May 2024 13:04:48 +0200 Subject: [PATCH 2/3] Add a new Litestream.verify! method that uses a sentinel record to ensure that backups are fresh --- Gemfile.lock | 1 + lib/litestream.rb | 25 +++++++++++++++++++++++++ litestream.gemspec | 1 + 3 files changed, 27 insertions(+) diff --git a/Gemfile.lock b/Gemfile.lock index 15d33f3..91f5d1a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -3,6 +3,7 @@ PATH specs: litestream (0.9.0) logfmt (>= 0.0.10) + sqlite3 GEM remote: https://rubygems.org/ diff --git a/lib/litestream.rb b/lib/litestream.rb index 681df1a..40759e2 100644 --- a/lib/litestream.rb +++ b/lib/litestream.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "sqlite3" + module Litestream class << self attr_accessor :configuration @@ -16,6 +18,29 @@ class Configuration def initialize end end + + VerificationFailure = Class.new(StandardError) + + def self.verify!(database_path) + database = SQLite3::Database.new(database_path) + database.execute("CREATE TABLE IF NOT EXISTS _litestream_verification (id INTEGER PRIMARY KEY, uuid BLOB)") + sentinel = SecureRandom.uuid + database.execute("INSERT INTO _litestream_verification (uuid) VALUES (?)", [sentinel]) + # give the Litestream replication process time to replicate the sentinel value + sleep 10 + + backup_path = "tmp/#{Time.now.utc.strftime("%Y%m%d%H%M%S")}_#{sentinel}.sqlite3" + Litestream::Commands.restore(database_path, **{"-o" => backup_path}) + + backup = SQLite3::Database.new(backup_path) + result = backup.execute("SELECT 1 FROM _litestream_verification WHERE uuid = ? LIMIT 1", sentinel) # => [[1]] || [] + + raise VerificationFailure, "Verification failed, sentinel not found" if result.empty? + ensure + database.execute("DELETE FROM _litestream_verification WHERE uuid = ?", sentinel) + database.close + Dir.glob(backup_path + "*").each { |file| File.delete(file) } + end end require_relative "litestream/version" diff --git a/litestream.gemspec b/litestream.gemspec index d85c3fa..32f24b2 100644 --- a/litestream.gemspec +++ b/litestream.gemspec @@ -26,6 +26,7 @@ Gem::Specification.new do |spec| # Uncomment to register a new dependency of your gem spec.add_dependency "logfmt", ">= 0.0.10" + spec.add_dependency "sqlite3" spec.add_development_dependency "rubyzip" spec.add_development_dependency "rails" spec.add_development_dependency "sqlite3" From 9bf1ad1ce9e7f32c6059ffe35a71a471edf5d74a Mon Sep 17 00:00:00 2001 From: Stephen Margheim Date: Sun, 5 May 2024 14:05:42 +0200 Subject: [PATCH 3/3] Update README with details of new verification strategy --- README.md | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 204e18a..323b8e3 100644 --- a/README.md +++ b/README.md @@ -168,37 +168,19 @@ You can forward arguments in whatever order you like, you simply need to ensure ### Verification -You can verify the integrity of your backed-up databases using the gem's provided `litestream:verify` rake task. This rake task requires that you specify which specific database you want to verify. As with the `litestream:restore` tasks, you pass arguments to the rake task via argument forwarding. For example, to verify the production database, you would run: +You can verify the integrity of your backed-up databases using the gem's provided `Litestream.verify!` method. The method takes the path to a database file that you have configured Litestream to backup; that is, it takes one of the `path` values under the `dbs` key in your `litestream.yml` configuration file. For example, to verify the production database, you would run: -```shell -bin/rails litestream:verify -- --database=storage/production.sqlite3 -# or -bundle exec rake litestream:verify -- --database=storage/production.sqlite3 +```ruby +Litestream.verify! "storage/production.sqlite3" ``` -The `litestream:verify` rake task takes the same options as the `litestream:restore` rake task. After restoring the backup, the rake task will verify the integrity of the restored database by ensuring that the restored database file +In order to verify that the backup for that database is both restorable and fresh, the method will add a new row to that database under the `_litestream_verification` table, which it will create if needed. It will then wait 10 seconds to give the Litestream utility time to replicate that change to whatever storage providers you have configured. After that, it will download the latest backup from that storage provider and ensure that this verification row is present in the backup. If the verification row is _not_ present, the method will raise a `Litestream::VerificationFailure` exception. This check ensures that the restored database file 1. exists, 2. can be opened by SQLite, and -3. sufficiently matches the original database file. - -Since point 3 is subjective, the rake task will output a message providing both the file size and number of tables of both the "original" and "restored" databases. You must manually verify that the restored database is within an acceptable range of the original database. - -The rake task will output a message similar to the following: - -``` -size - original 21688320 - restored 21688320 - delta 0 - -tables - original 9 - restored 9 - delta 0 -``` +3. has up-to-date data. -After restoring the backup, the `litestream:verify` rake task will delete the restored database file. If you need the restored database file, use the `litestream:restore` rake task instead. +After restoring the backup, the `Litestream.verify!` method will delete the restored database file. If you need the restored database file, use the `litestream:restore` rake task or `Litestream::Commands.restore` method instead. ### Introspection