Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open data #1411

Draft
wants to merge 17 commits into
base: master
Choose a base branch
from
Draft
7 changes: 7 additions & 0 deletions common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,3 +783,10 @@ def make_real_edit(
cls.objects.filter(pk=obj.pk).update(update_type=UpdateType.DELETE)

return None


def in_test() -> bool:
"""Returns True if we're currently in a test situation (e.g. running via
pytest or python manage.py test), False otherwise."""

return os.environ.get("DJANGO_SETTINGS_MODULE") == "settings.test"
Empty file added open_data/__init__.py
Empty file.
1 change: 1 addition & 0 deletions open_data/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Register your models here.
7 changes: 7 additions & 0 deletions open_data/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from common.app_config import CommonConfig

APP_LABEL = "open_data"


class OpenDataConfig(CommonConfig):
name = APP_LABEL
57 changes: 57 additions & 0 deletions open_data/commodities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import time
from datetime import date

from commodities.models.dc import CommodityCollectionLoader
from commodities.models.dc import CommodityTreeSnapshot
from commodities.models.dc import SnapshotMoment
from open_data.models import ReportGoodsNomenclature


def tree_edge_to_db(tree_edges):
for comm in tree_edges:
parent = tree_edges[comm]
if parent:
parent_obj_pk = parent.obj.pk
else:
parent_obj_pk = None
try:
commodity = ReportGoodsNomenclature.objects.get(
trackedmodel_ptr=comm.obj.pk,
)
commodity.indent = comm.indent
commodity.parent_trackedmodel_ptr_id = parent_obj_pk
commodity.description = comm.description
commodity.save()
except ReportGoodsNomenclature.DoesNotExist:
pass


def save_commodities_parent(verbose=False):
# Brute force approach to find the commodity parents.
# CommodityTreeSnapshot creates the list of commodities and parent,
# given a two number prefix.
# Provide 99 prefix to be sure to cover all the possible
# combination.
# Once the tree is created, the parents are saved to
# ReportGoodsNomenclature
# In this way, Tomato code finds the correct information, without the need to
# replicate it in sql

moment = SnapshotMoment(transaction=None, date=date.today())
start = time.time()
for i in range(0, 100):
prefix = f"{i:02d}"
if verbose:
print(f"Starting prefix {prefix}")
commodities_collection = CommodityCollectionLoader(prefix=prefix).load(
current_only=True,
effective_only=True,
)
snapshot = CommodityTreeSnapshot(
commodities=commodities_collection.commodities,
moment=moment,
)
# snapshot = commodities_collection.get_snapshot(None, date.today())
tree_edge_to_db(snapshot.edges)
if verbose:
print(f"Elapsed time {time.time() - start}")
36 changes: 36 additions & 0 deletions open_data/direct_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from open_data.apps import APP_LABEL
from open_data.models.utils import get_lookup_name


def get_create_materialised_view_sql():
return f"""
CREATE MATERIALIZED VIEW IF NOT EXISTS {get_lookup_name()} AS
SELECT common_trackedmodel.ID as old_id, current_version_id
FROM public.common_trackedmodel
INNER JOIN common_versiongroup
ON (common_trackedmodel.version_group_id = common_versiongroup.id)
WHERE (current_version_id IS NOT NULL AND NOT (common_trackedmodel.update_type = 2));

CREATE UNIQUE INDEX old_id_idx ON {get_lookup_name()} (old_id);
CREATE INDEX current_version_id_idx ON {get_lookup_name()} (current_version_id);
"""


def get_drop_fk_sql():
# It will be impossible to update the tables in the open data area with the
# foreign keys constrain in place. But it is useful to declare them in the Django
# models, so Django will create the correct queryset: the following query t
# dropped them in the database while they are still the model definition.
# The 'magic' query has been copied from somewhere in Stackoverflow!

return f"""
DO $$DECLARE r record;
BEGIN
FOR r IN SELECT table_schema, table_name, constraint_name
FROM information_schema.table_constraints AS tc
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name like '{APP_LABEL}%'
LOOP
EXECUTE 'ALTER TABLE '|| quote_ident(r.table_schema) || '.' || quote_ident(r.table_name)|| ' DROP CONSTRAINT '|| quote_ident(r.constraint_name) || ';';
END LOOP;
END$$;
"""
29 changes: 29 additions & 0 deletions open_data/geo_areas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import time

from geo_areas.models import GeographicalArea
from open_data.models import ReportGeographicalArea


def save_geo_areas(verbose):
report_geo_areas = ReportGeographicalArea.objects.select_related(
"trackedmodel_ptr",
).all()
start = time.time()
for report_geo_area in report_geo_areas:
geo_area = report_geo_area.trackedmodel_ptr
report_geo_area.is_single_region_or_country = (
geo_area.is_single_region_or_country()
)
report_geo_area.is_all_countries = geo_area.is_all_countries()
report_geo_area.is_group = geo_area.is_group()
report_geo_area.is_all_countries = geo_area.is_all_countries()

description = (
GeographicalArea.objects.get(pk=report_geo_area.trackedmodel_ptr_id)
.get_description()
.description
)
report_geo_area.description = description
report_geo_area.save()
if verbose:
print(f"Elapsed time {time.time() - start}")
Empty file.
21 changes: 21 additions & 0 deletions open_data/management/commands/refresh_open_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import logging

from django.core.management.base import BaseCommand

from open_data.tasks import populate_open_data

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = (
"It deletes all the data in the reporting tables, and copy a fresh set of data"
"from the tracked tables in the database."
)

def handle(self, *args, **options):
logger.info(f"Starting the update of all the tables in the database")
populate_open_data(True)
self.stdout.write(
self.style.SUCCESS("Successfully updated the reporting tables."),
)
93 changes: 93 additions & 0 deletions open_data/measures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import time

from django.db import connection

from common.models.transactions import Transaction
from common.models.utils import override_current_transaction
from open_data.models import ReportMeasure
from open_data.models import ReportMeasureCondition


def update_measure_components(verbose):
# Unless there is a current transaction, reading the latest description will fail in a misterious way
# Because this is called in a command, there is no transaction set"""
counter = 0
tx = Transaction.objects.last()
start = time.time()
if verbose:
print("Updating measure components")
with override_current_transaction(tx):
measures_qs = (
ReportMeasure.objects.filter(sid__gte=20000000)
.only("trackedmodel_ptr")
.select_related("trackedmodel_ptr")
)
component_list = []
for measure in measures_qs:
counter += 1
if counter % 1000 == 0:
print(f"Measure count {counter}")
# comp_counter = 0
for (
component
) in (
measure.trackedmodel_ptr.conditions.latest_approved().with_reference_price_string()
):
# comp_counter += 1
# print(f" Condition count {comp_counter}")
component_list.append(
ReportMeasureCondition(
trackedmodel_ptr_id=component.trackedmodel_ptr_id,
sid=component.sid,
component_sequence_number=component.component_sequence_number,
duty_amount=component.duty_amount,
action_id=component.action_id,
condition_code_id=component.condition_code_id,
condition_measurement_id=component.condition_measurement_id,
dependent_measure_id=measure.trackedmodel_ptr_id,
monetary_unit_id=component.monetary_unit_id,
required_certificate_id=component.required_certificate_id,
reference_price=component.reference_price_string,
),
)

ReportMeasureCondition.objects.bulk_create(component_list)
print("Completed Measure condition creation")
# The required_certificate_id is not updated when the certificate is updated
# In the UI it works because the certificate is selected using the SID and
# 'approved to last Transaction'. In data workspace works because when a
# certificate is updated, only the validity is changed so even if the data is not read from the latest,
# the SID is correct. I am not sure what is the best way to fix this!!!
# I'll try patching the required_certificate_id and hope for the best
fk_query_list = ReportMeasureCondition.update_fk_queries()
if fk_query_list:
with connection.cursor() as cursor:
for query in fk_query_list:
cursor.execute(query)

if verbose:
print(f"Elapsed time {time.time() - start}")


def update_measure(verbose):
# Unless there is a current transaction, reading the latest description will fail in a misterious way
# Because this is called in a command, there is no transaction set"""
tx = Transaction.objects.last()
start = time.time()
if verbose:
print("Updating measure")

with override_current_transaction(tx):
measures_qs = (
ReportMeasure.objects.filter(sid__gte=20000000)
.only("trackedmodel_ptr", "duty_sentence")
.select_related("trackedmodel_ptr")
)
for measure in measures_qs:
duty_sentence = measure.trackedmodel_ptr.duty_sentence
if duty_sentence:
measure.duty_sentence = duty_sentence
measure.save()

if verbose:
print(f"Elapsed time {time.time() - start}")
21 changes: 21 additions & 0 deletions open_data/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.2.15 on 2024-10-30 11:07

from django.db import migrations

from open_data.models.utils import schema_required


class Migration(migrations.Migration):
initial = True

dependencies = []

if schema_required():
operations = [
migrations.RunSQL(
sql=[("CREATE SCHEMA reporting;")],
reverse_sql=[("DROP SCHEMA reporting;")],
),
]
else:
operations = []
Loading
Loading