Skip to content

Commit

Permalink
Add cross cycle data quality checks (#4695)
Browse files Browse the repository at this point in the history
* Initial cross-cycle data quality changes

* create default cross cycle rules

* update propertyview label name

* rule span

* merge develop

* add target to rule

* change cross-cycle to goal and add action

* order

* update goalnote passed_check based on rule

* update goalnote passed_check based on rule

* add and remove labels based on cross cycle data quality checks

* specify goal and baseline_view when applying cc labels

* add default labels, update rule model fields

* cross cycle rules functional

* labels limited to goal, or none goal

* update test

* checking default single cycle

* results for range and not null

* frontend results

* precommit

* precommit

* fix eui inverse

* add help text

* prevent new goal rules

* precommit

* lint

* test hard coded goal rules

* fix frontend test

* update migration to remove old constraint

* fix tests

* precommit

* remove consoles

* label width bugfix

* help text if not passing checks

* clarifying text

* default goal labels to show in list

* lint

* modify migration

* show all labels

* qaqc

* precommit

* lint

* temp disable wui rules

* lint

* remove WUI data type - indroduced in different pr

* precommit

* update rule count

* migration order

* update migration

update migration

precommit

* move spinner utility earlier

---------

Co-authored-by: Alex Swindler <[email protected]>
Co-authored-by: Ross Perry <[email protected]>
Co-authored-by: Ross Perry <[email protected]>
Co-authored-by: Katherine Fleming <[email protected]>
  • Loading branch information
5 people authored Aug 9, 2024
1 parent d19f19e commit af1ac6a
Show file tree
Hide file tree
Showing 35 changed files with 1,315 additions and 143 deletions.
2 changes: 2 additions & 0 deletions seed/api/v3/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from seed.views.v3.properties import PropertyViewSet
from seed.views.v3.property_measures import PropertyMeasureViewSet
from seed.views.v3.property_scenarios import PropertyScenarioViewSet
from seed.views.v3.property_view_labels import PropertyViewLabelViewSet
from seed.views.v3.property_views import PropertyViewViewSet
from seed.views.v3.public import PublicCycleViewSet, PublicOrganizationViewSet
from seed.views.v3.salesforce_configs import SalesforceConfigViewSet
Expand Down Expand Up @@ -102,6 +103,7 @@
api_v3_router.register(r"postoffice_email", PostOfficeEmailViewSet, basename="postoffice_email")
api_v3_router.register(r"progress", ProgressViewSet, basename="progress")
api_v3_router.register(r"properties", PropertyViewSet, basename="properties")
api_v3_router.register(r"property_view_labels", PropertyViewLabelViewSet, basename="property_view_labels")
api_v3_router.register(r"property_views", PropertyViewViewSet, basename="property_views")
api_v3_router.register(r"salesforce_configs", SalesforceConfigViewSet, basename="salesforce_configs")
api_v3_router.register(r"salesforce_mappings", SalesforceMappingViewSet, basename="salesforce_mappings")
Expand Down
45 changes: 34 additions & 11 deletions seed/data_importer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
ColumnMapping,
Cycle,
DataLogger,
Goal,
Meter,
PropertyAuditLog,
PropertyState,
Expand All @@ -86,6 +87,7 @@
from seed.models.data_quality import DataQualityCheck, Rule
from seed.utils.buildings import get_source_type
from seed.utils.geocode import MapQuestAPIKeyError, create_geocoded_additional_columns, geocode_buildings
from seed.utils.goals import get_state_pairs
from seed.utils.match import update_sub_progress_total
from seed.utils.ubid import decode_unique_ids

Expand All @@ -95,17 +97,26 @@


@shared_task(ignore_result=True)
def check_data_chunk(model, ids, dq_id):
def check_data_chunk(org_id, model, ids, dq_id, goal_id=None):
try:
organization = Organization.objects.get(id=org_id)
super_organization = organization.get_parent()
except Organization.DoesNotExist:
return

if model == "PropertyState":
qs = PropertyState.objects.filter(id__in=ids)
elif model == "TaxLotState":
qs = TaxLotState.objects.filter(id__in=ids)
else:
qs = None
organization = qs.first().organization
super_organization = organization.get_parent()
elif model == "Property" and goal_id:
# return a list of dicts with property, basseline_state, and current_state
state_pairs = get_state_pairs(ids, goal_id)

d = DataQualityCheck.retrieve(super_organization.id)
d.check_data(model, qs.iterator())
if not goal_id:
d.check_data(model, qs.iterator())
else:
d.check_data_cross_cycle(goal_id, state_pairs)
d.save_to_cache(dq_id, organization.id)


Expand All @@ -122,13 +133,14 @@ def finish_checking(progress_key):
return progress_data.result()


def do_checks(org_id, propertystate_ids, taxlotstate_ids, import_file_id=None):
def do_checks(org_id, propertystate_ids, taxlotstate_ids, goal_id, import_file_id=None):
"""
Run the dq checks on the data
:param org_id:
:param propertystate_ids:
:param taxlotstate_ids:
:param goal_id:
:param import_file_id: int, if present, find the data to check by the import file id
:return:
"""
Expand All @@ -151,7 +163,7 @@ def do_checks(org_id, propertystate_ids, taxlotstate_ids, import_file_id=None):
.values_list("id", flat=True)
)

tasks = _data_quality_check_create_tasks(org_id, propertystate_ids, taxlotstate_ids, dq_id)
tasks = _data_quality_check_create_tasks(org_id, propertystate_ids, taxlotstate_ids, goal_id, dq_id)
progress_data.total = len(tasks)
progress_data.save()
if tasks:
Expand Down Expand Up @@ -589,7 +601,7 @@ def _map_data_create_tasks(import_file_id, progress_key):
return tasks


def _data_quality_check_create_tasks(org_id, property_state_ids, taxlot_state_ids, dq_id):
def _data_quality_check_create_tasks(org_id, property_state_ids, taxlot_state_ids, goal_id, dq_id):
"""
Entry point into running data quality checks.
Expand All @@ -612,12 +624,23 @@ def _data_quality_check_create_tasks(org_id, property_state_ids, taxlot_state_id
if property_state_ids:
id_chunks = [list(chunk) for chunk in batch(property_state_ids, 100)]
for ids in id_chunks:
tasks.append(check_data_chunk.s("PropertyState", ids, dq_id))
tasks.append(check_data_chunk.s(org_id, "PropertyState", ids, dq_id))

if taxlot_state_ids:
id_chunks_tl = [list(chunk) for chunk in batch(taxlot_state_ids, 100)]
for ids in id_chunks_tl:
tasks.append(check_data_chunk.s("TaxLotState", ids, dq_id))
tasks.append(check_data_chunk.s(org_id, "TaxLotState", ids, dq_id))

if goal_id:
# If goal_id is passed, treat as a cross cycle data quality check.
try:
goal = Goal.objects.get(id=goal_id)
property_ids = goal.properties().values_list("id", flat=True)
id_chunks = [list(chunk) for chunk in batch(property_ids, 100)]
for ids in id_chunks:
tasks.append(check_data_chunk.s(org_id, "Property", ids, dq_id, goal.id))
except Goal.DoesNotExist:
pass

return tasks

Expand Down
214 changes: 214 additions & 0 deletions seed/migrations/0222_cross_cycle_data_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
# Generated by Django 3.2.20 on 2023-10-24 21:54

import django.db.models.deletion
from django.db import connection, migrations, models


def forwards(apps, schema_editor):
Organization = apps.get_model("orgs", "Organization")
Rule = apps.get_model("seed", "Rule")
DataQualityCheck = apps.get_model("seed", "DataQualityCheck")
Label = apps.get_model("seed", "StatusLabel")

# Populate the default labels for goal rules.
NEW_DEFAULT_LABELS = [
"High EUI % Change",
"Low EUI % Change",
"High Area",
"Low Area",
"High Area % Change",
"Low Area % Change",
]

# Populate the default data quality goal rules if they do not already exist.
TYPE_AREA = 4
TYPE_EUI = 5
RULE_TYPE_DEFAULT = 0
SEVERITY_ERROR = 0
RULE_NOT_NULL = "not_null"
RULE_RANGE = "range"

NEW_DEFAULT_RULES = [
{
"table_name": "Goal",
"name": "High EUI % Change",
"field": "eui",
"data_type": TYPE_EUI,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"max": 40,
"cross_cycle": True,
},
{
"table_name": "Goal",
"name": "Low EUI % Change",
"field": "eui",
"data_type": TYPE_EUI,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"min": -40,
"cross_cycle": True,
},
{
"table_name": "Goal",
"name": "High Area % Change",
"field": "area",
"data_type": TYPE_AREA,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"max": 5,
"cross_cycle": True,
},
{
"table_name": "Goal",
"name": "Low Area % Change",
"field": "area",
"data_type": TYPE_AREA,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"min": -5,
"cross_cycle": True,
},
{
"table_name": "Goal",
"name": "High EUI",
"field": "eui",
"data_type": TYPE_EUI,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"max": 1000,
},
{
"table_name": "Goal",
"name": "Low EUI",
"field": "eui",
"data_type": TYPE_EUI,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"min": 40,
},
{
"table_name": "Goal",
"name": "High Area",
"field": "area",
"data_type": TYPE_AREA,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"max": 1000000,
},
{
"table_name": "Goal",
"name": "Low Area",
"field": "area",
"data_type": TYPE_AREA,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_RANGE,
"min": 1000,
},
{
"table_name": "Goal",
"name": "Missing EUI",
"field": "eui",
"data_type": TYPE_EUI,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_NOT_NULL,
},
{
"table_name": "Goal",
"name": "Missing Area",
"field": "area",
"data_type": TYPE_AREA,
"rule_type": RULE_TYPE_DEFAULT,
"severity": SEVERITY_ERROR,
"condition": RULE_NOT_NULL,
},
]

for org in Organization.objects.all():
for label in NEW_DEFAULT_LABELS:
Label.objects.get_or_create(name=label, super_organization=org, defaults={"color": "blue"})
for dqc in DataQualityCheck.objects.all():
for rule in NEW_DEFAULT_RULES:
Rule.objects.get_or_create(**rule, data_quality_check=dqc)


def remove_unique_constraint(apps, schema_editor):
# The auto generated unique constraint for PropertyViewLabels is PropertyView_id and StatusLabel_id.
# The constraint needs to be updated to include PropertyView_id, StatusLabel_id, and Goal.id
PropertyViewLabel = apps.get_model("seed", "PropertyViewLabel")
table_name = PropertyViewLabel._meta.db_table

# Get the original unique constraint
constraints = connection.introspection.get_constraints(connection.cursor(), table_name)
constraint_names = [
name
for name, details in constraints.items()
if details.get("unique") and set(details.get("columns")) == {"propertyview_id", "statuslabel_id"}
]
# Remove the constraint
if constraint_names:
with connection.cursor() as cursor:
cursor.execute(f"ALTER TABLE {table_name} DROP CONSTRAINT {constraint_names[0]};")
# A new unique constraint will be created in the following operation


class Migration(migrations.Migration):
dependencies = [
("seed", "0221_audittemplateconfig"),
]

operations = [
migrations.SeparateDatabaseAndState(
database_operations=[
migrations.RunSQL(
sql="ALTER TABLE seed_propertyview_labels RENAME TO seed_propertyviewlabel",
reverse_sql="ALTER TABLE seed_propertyviewlabel RENAME TO seed_propertyview_labels",
),
],
state_operations=[
migrations.CreateModel(
name="PropertyViewLabel",
fields=[
("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("propertyview", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="seed.propertyview")),
("statuslabel", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="seed.statuslabel")),
],
),
migrations.AlterField(
model_name="propertyview",
name="labels",
field=models.ManyToManyField(through="seed.PropertyViewLabel", to="seed.StatusLabel"),
),
],
),
migrations.AddField(
model_name="propertyviewlabel",
name="goal",
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to="seed.goal"),
),
migrations.AlterField(
model_name="rule",
name="field",
field=models.CharField(blank=True, max_length=200, null=True),
),
migrations.AddField(
model_name="rule",
name="cross_cycle",
field=models.BooleanField(default=False),
),
migrations.RunPython(remove_unique_constraint),
migrations.AddConstraint(
model_name="propertyviewlabel",
constraint=models.UniqueConstraint(fields=("propertyview", "statuslabel", "goal"), name="unique_propertyview_statuslabel_goal"),
),
migrations.RunPython(forwards),
]
Loading

0 comments on commit af1ac6a

Please sign in to comment.