Skip to content

Commit 5624528

Browse files
TP2000-1407 Sqlite dump failure fix (#1265)
* Support disabling sqlite dump process. * Allow sqlite dump schedule to be disabled via env var. * Tighten function naming. * Add new local fs storage. * Self-doc var names and comment. * Self-doc vars and doc strings. * Doc strings. * Add local fs dump support. * Rename storage class and refactor. The class name refactor provides a more specific description of its functionality and prepares for another, similar, but different storage class. The functionality refactor moves export creation behaviour into the storage class for better encapsulation. * Rename mixin. * Command docs and flag tweak. * Comment tweak. * Factor out base class and add new Storage. * Use new storage. * Add and use decorator to log function timings. * Log message tweak. * Relocate log timing decorator. * Method rename, add docstring and close connection. * Fix typo. * Variable renaming. * Improve docstring. * Comment correction. * Replace mocked storage with correct new Storage class. * Better docstrings. * Improve help text. * Function name tweak. * Remove temporary implementation. * Rationalise imports. * Corrected annotation. * Unit test test_local_export_task_does_not_replace. * Unit test test_local_export_task_saves. * Unit test test_local_export_task_ignores_unpublished_and_unapproved_transactions. * Unit test dump_sqlite command. * Break up line.
1 parent e64cd92 commit 5624528

File tree

15 files changed

+373
-111
lines changed

15 files changed

+373
-111
lines changed

common/inspect_tap_tasks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ def clean_tasks(self, tasks, task_status="", task_name="") -> List[Dict]:
5959

6060
return tasks_cleaned
6161

62-
def current_rule_checks(self, task_name="") -> List[CeleryTask]:
62+
def current_tasks(self, task_name="") -> List[CeleryTask]:
6363
"""Return the list of tasks queued or started, ready to display in the
6464
view."""
65+
6566
inspect = app.control.inspect()
6667
if not inspect:
6768
return []

common/util.py

+46
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
from __future__ import annotations
44

5+
import os
56
import re
7+
import typing
68
from datetime import date
79
from datetime import datetime
810
from datetime import timedelta
@@ -43,6 +45,7 @@
4345
from django.db.models.functions.text import Upper
4446
from django.db.transaction import atomic
4547
from django.template import loader
48+
from django.utils import timezone
4649
from lxml import etree
4750
from psycopg.types.range import DateRange
4851
from psycopg.types.range import TimestampRange
@@ -611,3 +614,46 @@ def format_date_string(date_string: str, short_format=False) -> str:
611614
return date_parser.parse(date_string).strftime(settings.DATE_FORMAT)
612615
except:
613616
return ""
617+
618+
619+
def log_timing(logger_function: typing.Callable):
620+
"""
621+
Decorator function to log start and end times of a decorated function.
622+
623+
When decorating a function, `logger_function` must be passed in to the
624+
decorator to ensure the correct logger instance and function are applied.
625+
`logger_function` may be any one of the logging output functions, but is
626+
likely to be either `debug` or `info`.
627+
Example:
628+
```
629+
import logging
630+
631+
logger = logging.getLogger(__name__)
632+
633+
@log_timing(logger_function=logger.info)
634+
def my_function():
635+
...
636+
```
637+
"""
638+
639+
@wrapt.decorator
640+
def wrapper(wrapped, instance, args, kwargs):
641+
start_time = timezone.localtime()
642+
logger_function(
643+
f"Entering the function {wrapped.__name__}() on process "
644+
f"pid={os.getpid()} at {start_time.isoformat()}",
645+
)
646+
647+
result = wrapped(*args, **kwargs)
648+
649+
end_time = timezone.localtime()
650+
elapsed_time = end_time - start_time
651+
logger_function(
652+
f"Exited the function {wrapped.__name__}() on "
653+
f"process pid={os.getpid()} at {end_time.isoformat()} after "
654+
f"an elapsed time of {elapsed_time}.",
655+
)
656+
657+
return result
658+
659+
return wrapper

common/views.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from commodities.models import GoodsNomenclature
4848
from common.business_rules import BusinessRule
4949
from common.business_rules import BusinessRuleViolation
50-
from common.celery import app
50+
from common.celery import app as celery_app
5151
from common.forms import HomeSearchForm
5252
from common.models import TrackedModel
5353
from common.models import Transaction
@@ -65,8 +65,6 @@
6565
from workbaskets.models import WorkflowStatus
6666
from workbaskets.views.mixins import WithCurrentWorkBasket
6767

68-
from .celery import app as celery_app
69-
7068

7169
class HomeView(LoginRequiredMixin, FormView):
7270
template_name = "common/homepage.jinja"
@@ -350,7 +348,7 @@ class AppInfoView(
350348
DATETIME_FORMAT = "%d %b %Y, %H:%M"
351349

352350
def active_tasks(self) -> Dict:
353-
inspect = app.control.inspect()
351+
inspect = celery_app.control.inspect()
354352
if not inspect:
355353
return {}
356354

conftest.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1119,13 +1119,13 @@ def hmrc_storage(s3):
11191119

11201120
@pytest.fixture
11211121
def sqlite_storage(s3, s3_bucket_names):
1122-
"""Patch SQLiteStorage with moto so that nothing is really uploaded to
1122+
"""Patch SQLiteS3VFSStorage with moto so that nothing is really uploaded to
11231123
s3."""
1124-
from exporter.storages import SQLiteStorage
1124+
from exporter.storages import SQLiteS3VFSStorage
11251125

11261126
storage = make_storage_mock(
11271127
s3,
1128-
SQLiteStorage,
1128+
SQLiteS3VFSStorage,
11291129
bucket_name=settings.SQLITE_STORAGE_BUCKET_NAME,
11301130
)
11311131
assert storage.endpoint_url is settings.SQLITE_S3_ENDPOINT_URL

exporter/management/commands/dump_sqlite.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,39 @@
1111

1212

1313
class Command(BaseCommand):
14+
help = (
15+
"Create a snapshot of the application database to a file in SQLite "
16+
"format. Snapshot file names take the form <transaction-order>.db, "
17+
"where <transaction-order> is the value of the last published "
18+
"transaction's order attribute. Care should be taken to ensure that "
19+
"there is sufficient local file system storage to accomodate the "
20+
"SQLite file - if you choose to target remote S3 storage, then a "
21+
"temporary local copy of the file will be created and cleaned up."
22+
)
23+
1424
def add_arguments(self, parser: CommandParser) -> None:
1525
parser.add_argument(
16-
"--immediately",
26+
"--asynchronous",
1727
action="store_const",
18-
help="Run the task in this process now rather than queueing it up",
28+
help="Queue the snapshot task to run in an asynchronous process.",
1929
const=True,
2030
default=False,
2131
)
32+
parser.add_argument(
33+
"--save-local",
34+
help=(
35+
"Save the SQLite snapshot to the local file system under the "
36+
"(existing) directory given by DIRECTORY_PATH."
37+
),
38+
dest="DIRECTORY_PATH",
39+
)
2240
return super().add_arguments(parser)
2341

2442
def handle(self, *args: Any, **options: Any) -> Optional[str]:
2543
logger.info(f"Triggering tariff database export to SQLite")
2644

27-
if options["immediately"]:
28-
export_and_upload_sqlite()
45+
local_path = options["DIRECTORY_PATH"]
46+
if options["asynchronous"]:
47+
export_and_upload_sqlite.delay(local_path)
2948
else:
30-
export_and_upload_sqlite.delay()
49+
export_and_upload_sqlite(local_path)

exporter/sqlite/__init__.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,41 @@
4141
}
4242

4343

44-
def make_export_plan(sqlite: runner.Runner) -> plan.Plan:
45-
names = (
44+
def make_export_plan(sqlite_runner: runner.Runner) -> plan.Plan:
45+
app_names = (
4646
name.split(".")[0]
4747
for name in settings.DOMAIN_APPS
4848
if name not in settings.SQLITE_EXCLUDED_APPS
4949
)
50-
all_models = chain(*[apps.get_app_config(name).get_models() for name in names])
50+
all_models = chain(*[apps.get_app_config(name).get_models() for name in app_names])
5151
models_by_table = {model._meta.db_table: model for model in all_models}
5252

5353
import_script = plan.Plan()
54-
for table, sql in sqlite.tables:
54+
for table, create_table_statement in sqlite_runner.tables:
5555
model = models_by_table.get(table)
5656
if model is None or model.__name__ in SKIPPED_MODELS:
5757
continue
5858

59-
columns = list(sqlite.read_column_order(model._meta.db_table))
60-
import_script.add_schema(sql)
59+
columns = list(sqlite_runner.read_column_order(model._meta.db_table))
60+
import_script.add_schema(create_table_statement)
6161
import_script.add_data(model, columns)
6262

6363
return import_script
6464

6565

6666
def make_export(connection: apsw.Connection):
67-
with NamedTemporaryFile() as db_name:
68-
sqlite = runner.Runner.make_tamato_database(Path(db_name.name))
69-
plan = make_export_plan(sqlite)
70-
71-
export = runner.Runner(connection)
72-
export.run_operations(plan.operations)
67+
with NamedTemporaryFile() as temp_sqlite_db:
68+
# Create Runner instance with its SQLite file name pointing at a path on
69+
# the local file system. This is only required temporarily in order to
70+
# create an in-memory plan that can be run against a target database
71+
# object.
72+
plan_runner = runner.Runner.make_tamato_database(
73+
Path(temp_sqlite_db.name),
74+
)
75+
plan = make_export_plan(plan_runner)
76+
# make_tamato_database() creates a Connection instance that needs
77+
# closing once an in-memory plan has been created from it.
78+
plan_runner.database.close()
79+
80+
export_runner = runner.Runner(connection)
81+
export_runner.run_operations(plan.operations)

exporter/sqlite/plan.py

+2
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,11 @@ def operations(self) -> Iterable[Operation]:
100100
]
101101

102102
def add_schema(self, sql: str):
103+
"""Add sql schema (table) creation statements to this Plan instance."""
103104
self._operations.append((sql, [[]]))
104105

105106
def add_data(self, model: Type[Model], columns: Iterable[str]):
107+
"""Add data insert statements to this Plan instance."""
106108
queryset = model.objects
107109
output_columns = []
108110
for column in columns:

exporter/sqlite/runner.py

+33-20
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def normalise_loglevel(cls, loglevel):
4040
return loglevel
4141

4242
@classmethod
43-
def manage(cls, db: Path, *args: str):
43+
def manage(cls, sqlite_file: Path, *args: str):
4444
"""
4545
Runs a Django management command on the SQLite database.
4646
@@ -56,7 +56,7 @@ def manage(cls, db: Path, *args: str):
5656
sqlite_env["CELERY_LOG_LEVEL"],
5757
)
5858

59-
sqlite_env["DATABASE_URL"] = f"sqlite:///{str(db)}"
59+
sqlite_env["DATABASE_URL"] = f"sqlite:///{str(sqlite_file)}"
6060
# Required to make sure the postgres default isn't set as the DB_URL
6161
if sqlite_env.get("VCAP_SERVICES"):
6262
vcap_env = json.loads(sqlite_env["VCAP_SERVICES"])
@@ -71,28 +71,36 @@ def manage(cls, db: Path, *args: str):
7171
)
7272

7373
@classmethod
74-
def make_tamato_database(cls, db: Path) -> "Runner":
75-
"""
76-
Generate a new and empty SQLite database with the TaMaTo schema.
77-
78-
Because SQLite uses different fields to PostgreSQL, first missing
79-
migrations are generated to bring in the different style of validity
80-
fields. However, these should not generally stick around and be applied
81-
to Postgres so they are removed after being applied.
82-
"""
74+
def make_tamato_database(cls, sqlite_file: Path) -> "Runner":
75+
"""Generate a new and empty SQLite database with the TaMaTo schema
76+
derived from Tamato's models - by performing 'makemigrations' followed
77+
by 'migrate' on the Sqlite file located at `sqlite_file`."""
8378
try:
84-
cls.manage(db, "makemigrations", "--name", "sqlite_export")
85-
cls.manage(db, "migrate")
86-
assert db.exists()
87-
return cls(apsw.Connection(str(db)))
88-
79+
# Because SQLite uses different fields to PostgreSQL, missing
80+
# migrations are first generated to bring in the different style of
81+
# validity fields. However, these should not be applied to Postgres
82+
# and so should be removed (in the `finally` block) after they have
83+
# been applied (when running `migrate`).
84+
cls.manage(sqlite_file, "makemigrations", "--name", "sqlite_export")
85+
cls.manage(sqlite_file, "migrate")
86+
assert sqlite_file.exists()
87+
return cls(apsw.Connection(str(sqlite_file)))
8988
finally:
9089
for file in Path(settings.BASE_DIR).rglob(
9190
"**/migrations/*sqlite_export.py",
9291
):
9392
file.unlink()
9493

9594
def read_schema(self, type: str) -> Iterator[Tuple[str, str]]:
95+
"""
96+
Generator yielding a tuple of 'name' and 'sql' column values from
97+
Sqlite's "schema table", 'sqlite_schema'.
98+
99+
The `type` param filters rows that have a matching 'type' column value,
100+
which may be any one of: 'table', 'index', 'view', or 'trigger'.
101+
102+
See https://www.sqlite.org/schematab.html for further details.
103+
"""
96104
cursor = self.database.cursor()
97105
cursor.execute(
98106
f"""
@@ -110,16 +118,21 @@ def read_schema(self, type: str) -> Iterator[Tuple[str, str]]:
110118

111119
@property
112120
def tables(self) -> Iterator[Tuple[str, str]]:
121+
"""Generator yielding a tuple of each Sqlite table object's 'name' and
122+
the SQL `CREATE_TABLE` statement that can be used to create the
123+
table."""
113124
yield from self.read_schema("table")
114125

115126
@property
116127
def indexes(self) -> Iterator[Tuple[str, str]]:
128+
"""Generator yielding a tuple of each SQLite table index object name and
129+
the SQL `CREATE_INDEX` statement that can be used to create it."""
117130
yield from self.read_schema("index")
118131

119132
def read_column_order(self, table: str) -> Iterator[str]:
120133
"""
121-
Returns the name of the columns in the order they are defined in an
122-
SQLite database.
134+
Returns the name of `table`'s columns in the order they are defined in
135+
an SQLite database.
123136
124137
This is necessary because the Django migrations do not generate the
125138
columns in the order they are defined on the model, and there's no other
@@ -131,8 +144,8 @@ def read_column_order(self, table: str) -> Iterator[str]:
131144
yield column[1]
132145

133146
def run_operations(self, operations: Iterable[Operation]):
134-
"""Runs the supplied sequence of operations against the SQLite
135-
database."""
147+
"""Runs each operation in `operations` against `database` member
148+
attribute (a connection object to an SQLite database file)."""
136149
cursor = self.database.cursor()
137150
for operation in operations:
138151
logger.debug("%s: %s", self.database, operation[0])

0 commit comments

Comments
 (0)