diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 56fd0b4d..187e3e78 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,28 +1,20 @@ name: cml_runtimes - +permissions: + contents: read + pull-requests: read on: # Triggers the workflow on pull requests to main branch pull_request: branches: [ main ] jobs: - commit-hooks: - runs-on: ubuntu-20.04 + + pre-commit: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - - uses: actions/setup-python@v3 - with: - python-version: 3.10.13 - - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install .[dev] - - - name: Check commit hooks - run: | - pre-commit run --all-files + - uses: actions/setup-python@v4 + - uses: pre-commit/action@646c83fcd040023954eafda54b4db0192ce70507 # hash for v3.0.0 testing-cml: runs-on: ubuntu-latest diff --git a/mbs_results/config.json b/mbs_results/config.json index 79bf447d..029ed363 100755 --- a/mbs_results/config.json +++ b/mbs_results/config.json @@ -17,6 +17,10 @@ "sic_domain_mapping_path": "", "threshold_filepath":"", + "back_data_type":"response_type", + "imputation_marker_col":"imputation_marker", + + "period_selected": 202303, "current_period" : 202303, "previous_period" : 202302, @@ -27,7 +31,7 @@ "calibration_factor": "calibration_factor", "cell_number": "cell_no", "design_weight": "design_weight", - "errormarker": "statusencoded", + "status": "statusencoded", "form_id_idbr": "formtype", "group": "calibration_group", "calibration_group": "calibration_group", @@ -35,7 +39,7 @@ "question_no": "questioncode", "reference": "reference", "region": "region", - "sampled": "sampled", + "sampled": "is_sampled", "state": "frozen", "strata": "cell_no", "target": "adjustedresponse", @@ -154,6 +158,6 @@ "13":"fir" }, - "additional_outputs":["create_imputation_link_output"] + "additional_outputs":[] } diff --git a/mbs_results/estimation/apply_estimation.py b/mbs_results/estimation/apply_estimation.py index df02f7fb..12ced8f1 100644 --- a/mbs_results/estimation/apply_estimation.py +++ b/mbs_results/estimation/apply_estimation.py @@ -100,9 +100,7 @@ def apply_estimation( estimation_df = pd.concat(estimation_df_list, ignore_index=True) - create_population_count_output( - estimation_df, period, calibration_group, save_output=True, **config - ) + create_population_count_output(estimation_df, period, save_output=True, **config) # validate_estimation(estimation_df, **config) diff --git a/mbs_results/imputation/impute.py b/mbs_results/imputation/impute.py index ff61ce0b..a3f0f189 100644 --- a/mbs_results/imputation/impute.py +++ b/mbs_results/imputation/impute.py @@ -51,6 +51,9 @@ def impute(dataframe: pd.DataFrame, config: dict) -> pd.DataFrame: reference=config["reference"], target=config["target"], period=config["period"], + current_period=config["current_period"], + revision_period=config["revision_period"], + question_no=config["question_no"], strata="imputation_class", auxiliary=config["auxiliary"], ) @@ -66,12 +69,13 @@ def impute(dataframe: pd.DataFrame, config: dict) -> pd.DataFrame: question_no=config["question_no"], spp_form_id=config["form_id_spp"], ) + target = config["target"] post_constrain["imputed_and_derived_flag"] = post_constrain.apply( lambda row: ( "d" if "sum" in str(row["constrain_marker"]).lower() - else row["imputation_flags_adjusted_value"] + else row[f"imputation_flags_{target}"] ), axis=1, ) diff --git a/mbs_results/imputation/ratio_of_means.py b/mbs_results/imputation/ratio_of_means.py index 66c61f49..3d106cde 100644 --- a/mbs_results/imputation/ratio_of_means.py +++ b/mbs_results/imputation/ratio_of_means.py @@ -341,8 +341,9 @@ def ratio_of_means( reference: str, strata: str, auxiliary: str, - current_period: str, - revision_period: str, + current_period: int, + revision_period: int, + question_no: str, filters: pd.DataFrame = None, manual_constructions: pd.DataFrame = None, imputation_links: Dict[str, str] = {}, @@ -372,6 +373,12 @@ def ratio_of_means( Column name containing strata information (sic). auxiliary : str Column name containing auxiliary information (sic). + current_period: int + Value with current period to be imputed as int. + revision_period: int + Value containing the amount of periods for imputation. + question_no: str + Column name containing question_no filters : pd.DataFrame, optional Dataframe with values to exclude from imputation method. manual_constructions : pd.DataFrame, optional @@ -429,7 +436,9 @@ def ratio_of_means( if manual_constructions is not None: # Need to join mc dataframe to original df - df = join_manual_constructions(df, manual_constructions, reference, period) + df = join_manual_constructions( + df, manual_constructions, reference, period, question_no + ) if f"{target}_man" in df.columns: # Manual Construction @@ -510,14 +519,3 @@ def calculate_back_data_period(current_period, revision_period) -> str: (current_period - pd.DateOffset(months=revision_period)).date().strftime("%Y%m") ) return back_data_period - - -if __name__ == "__main__": - from mbs_results.utilities.inputs import load_config - - config = load_config() - bdp = calculate_back_data_period( - current_period=config["current_period"], - revision_period=config["revision_period"], - ) - print(config["current_period"], bdp) diff --git a/mbs_results/outputs/produce_additional_outputs.py b/mbs_results/outputs/produce_additional_outputs.py index 63a54249..6e9ce58f 100644 --- a/mbs_results/outputs/produce_additional_outputs.py +++ b/mbs_results/outputs/produce_additional_outputs.py @@ -52,7 +52,6 @@ def get_additional_outputs_df( "response", "froempment", "cell_no", - "referencename", "imputation_flags_adjustedresponse", "f_link_adjustedresponse", "b_link_adjustedresponse", diff --git a/mbs_results/outputs/weighted_adj_val_time_series.py b/mbs_results/outputs/weighted_adj_val_time_series.py index e7076f81..abb22893 100644 --- a/mbs_results/outputs/weighted_adj_val_time_series.py +++ b/mbs_results/outputs/weighted_adj_val_time_series.py @@ -1,7 +1,8 @@ import numpy as np import pandas as pd -from staging.merge_domain import merge_domain -from utilities.utils import convert_column_to_datetime + +from mbs_results.staging.merge_domain import merge_domain +from mbs_results.utilities.utils import convert_column_to_datetime def get_weighted_adj_val_time_series( diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py index b9ac7bf2..51e7f32a 100644 --- a/mbs_results/staging/data_cleaning.py +++ b/mbs_results/staging/data_cleaning.py @@ -171,7 +171,6 @@ def load_manual_constructions( manual_constructions[period] = convert_column_to_datetime( manual_constructions[period] ) - manual_constructions[reference] = manual_constructions[reference].astype("str") manual_constructions.set_index([reference, period], inplace=True) validate_manual_constructions(df, manual_constructions) @@ -186,7 +185,7 @@ def join_manual_constructions( manual_constructions: pd.DataFrame, reference: str, period: str, - question_no: str = "question_no", + question_no: str, **config, ): """ @@ -205,7 +204,7 @@ def join_manual_constructions( the name of the reference column period: str the name of the period column - period: str + question_no: str the name of the question number column **config: Dict main pipeline configuration. Can be used to input the entire config dictionary diff --git a/mbs_results/staging/stage_dataframe.py b/mbs_results/staging/stage_dataframe.py index 60beaa3e..cbd84b63 100644 --- a/mbs_results/staging/stage_dataframe.py +++ b/mbs_results/staging/stage_dataframe.py @@ -184,7 +184,7 @@ def stage_dataframe(config: dict) -> pd.DataFrame: df = run_live_or_frozen( df, config["target"], - error_marker=config["errormarker"], + status=config["status"], state=config["state"], error_values=[201], ) diff --git a/mbs_results/utilities/constrains.py b/mbs_results/utilities/constrains.py index 9c6e57de..33474b69 100644 --- a/mbs_results/utilities/constrains.py +++ b/mbs_results/utilities/constrains.py @@ -1,4 +1,5 @@ import operator +import warnings from typing import List import pandas as pd @@ -162,14 +163,21 @@ def constrain( ) pre_derive_df = pre_derive_df[[target]] - derived_values = pd.concat( - [ - sum_sub_df(pre_derive_df.loc[form_type], derives["from"]) - .assign(**{question_no: derives["derive"]}) - .assign(**{spp_form_id: form_type}) - for form_type, derives in derive_map.items() - ] - ) + derived_values_list = [ + sum_sub_df(pre_derive_df.loc[form_type], derives["from"]) + .assign(**{question_no: derives["derive"]}) + .assign(**{spp_form_id: form_type}) + for form_type, derives in derive_map.items() + ] + + if derived_values_list: + + derived_values = pd.concat(derived_values_list) + + else: + warnings.warn("No derived questions created") + derived_values = pd.DataFrame(columns=["constrain_marker"]) + unique_q_numbers = df[question_no].unique() df.set_index([question_no, period, reference], inplace=True) @@ -238,15 +246,20 @@ def derive_questions( # Assuming default value of o-weight is 1 pre_derive_df = pre_derive_df[[target]].fillna(value=0) - derived_values = pd.concat( - [ - sum_sub_df(pre_derive_df.loc[form_type], derives["from"]) - .assign(**{question_no: derives["derive"]}) - .assign(**{spp_form_id: form_type}) - # Create a task on Backlog to fix this. - for form_type, derives in derive_map.items() - ] - ) + derived_values_list = [ + sum_sub_df(pre_derive_df.loc[form_type], derives["from"]) + .assign(**{question_no: derives["derive"]}) + .assign(**{spp_form_id: form_type}) + # Create a task on Backlog to fix this. + for form_type, derives in derive_map.items() + ] + if derived_values_list: + derived_values = pd.concat(derived_values_list) + + else: + warnings.warn("No derived questions created") + derived_values = pd.DataFrame(columns=["constrain_marker"]) + unique_q_numbers = df[question_no].unique() df.set_index([question_no, period, reference], inplace=True) diff --git a/setup.cfg b/setup.cfg index b28becfc..c00b3be9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,9 +25,9 @@ install_requires = pyyaml pandas numpy - # rdsa-utils - # raz-client - # boto3 + rdsa-utils + raz-client + boto3 python_requires = >=3.6 zip_safe = no diff --git a/tests/data/test_main/input/test_cell_no_calibration_group_mapping.csv b/tests/data/test_main/input/test_cell_no_calibration_group_mapping.csv new file mode 100755 index 00000000..e5523398 --- /dev/null +++ b/tests/data/test_main/input/test_cell_no_calibration_group_mapping.csv @@ -0,0 +1,2 @@ +cell_no,calibration_group +999,9999 diff --git a/tests/data/test_main/input/test_classification_question_number_l_value_mapping.csv b/tests/data/test_main/input/test_classification_question_number_l_value_mapping.csv new file mode 100755 index 00000000..3a8ef197 --- /dev/null +++ b/tests/data/test_main/input/test_classification_question_number_l_value_mapping.csv @@ -0,0 +1,2 @@ +classification,question_no,l_value +99999,40,9999999 diff --git a/tests/data/test_main/input/test_classification_sic_mapping.csv b/tests/data/test_main/input/test_classification_sic_mapping.csv new file mode 100755 index 00000000..abd7e60c --- /dev/null +++ b/tests/data/test_main/input/test_classification_sic_mapping.csv @@ -0,0 +1,2 @@ +classification,sic_5_digit +99999,999 diff --git a/tests/data/test_main/input/test_cp_009_202112.csv b/tests/data/test_main/input/test_cp_009_202112.csv new file mode 100755 index 00000000..b5fa9248 --- /dev/null +++ b/tests/data/test_main/input/test_cp_009_202112.csv @@ -0,0 +1,2 @@ +period,reference,form_type,sic92,error_mkr,response_type +202112,1,ZZZ,45310,O,1 diff --git a/tests/data/test_main/input/test_finalsel009_202112 b/tests/data/test_main/input/test_finalsel009_202112 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202112 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202201 b/tests/data/test_main/input/test_finalsel009_202201 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202201 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202202 b/tests/data/test_main/input/test_finalsel009_202202 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202202 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202203 b/tests/data/test_main/input/test_finalsel009_202203 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202203 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202204 b/tests/data/test_main/input/test_finalsel009_202204 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202204 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202205 b/tests/data/test_main/input/test_finalsel009_202205 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202205 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_finalsel009_202206 b/tests/data/test_main/input/test_finalsel009_202206 new file mode 100755 index 00000000..c4576361 --- /dev/null +++ b/tests/data/test_main/input/test_finalsel009_202206 @@ -0,0 +1 @@ +1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z diff --git a/tests/data/test_main/input/test_form_domain_threshold_mapping.csv b/tests/data/test_main/input/test_form_domain_threshold_mapping.csv new file mode 100755 index 00000000..0152fcf9 --- /dev/null +++ b/tests/data/test_main/input/test_form_domain_threshold_mapping.csv @@ -0,0 +1 @@ +form,domain,threshold,IDBR_form diff --git a/tests/data/test_main/input/test_manual_constructions.csv b/tests/data/test_main/input/test_manual_constructions.csv new file mode 100755 index 00000000..eadd1cd1 --- /dev/null +++ b/tests/data/test_main/input/test_manual_constructions.csv @@ -0,0 +1,2 @@ +period,reference,questioncode,adjustedresponse +202204,1,40,888 diff --git a/tests/data/test_main/input/test_qv_009_202112.csv b/tests/data/test_main/input/test_qv_009_202112.csv new file mode 100755 index 00000000..33fb4743 --- /dev/null +++ b/tests/data/test_main/input/test_qv_009_202112.csv @@ -0,0 +1,2 @@ +period,reference,question_no,returned_value,adjusted_value,instance +202112,1,40,999,999,999 diff --git a/tests/data/test_main/input/test_sic_domain_mapping.csv b/tests/data/test_main/input/test_sic_domain_mapping.csv new file mode 100755 index 00000000..3ed9d0c1 --- /dev/null +++ b/tests/data/test_main/input/test_sic_domain_mapping.csv @@ -0,0 +1 @@ +sic_5_digit,domain diff --git a/tests/data/test_main/input/test_snaphot.json b/tests/data/test_main/input/test_snaphot.json new file mode 100644 index 00000000..e8346184 --- /dev/null +++ b/tests/data/test_main/input/test_snaphot.json @@ -0,0 +1,15 @@ +{"snapshot_id": "test_data", "contributors": [ +{"reference":1,"period":202201,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202202,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202203,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202204,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202205,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202206,"survey":9,"formid":1,"status":"Clear","statusencoded":999,"receiptdate":null,"lockedby":null,"lockeddate":null,"formtype":999,"checkletter":null,"frozensicoutdated":999,"rusicoutdated":999,"frozensic":999,"rusic":999,"frozenemployees":999,"employees":999,"frozenemployment":999,"employment":999,"frozenfteemployment":999,"fteemployment":999,"frozenturnover":999,"turnover":999,"enterprisereference":999,"wowenterprisereference":999,"cellnumber":999,"currency":null,"vatreference":null,"payereference":null,"companyregistrationnumber":null,"numberlivelocalunits":999,"numberlivevat":999,"numberlivepaye":999,"legalstatus":999,"reportingunitmarker":null,"region":null,"birthdate":null,"referencename":null,"referencepostcode":null,"tradingstyle":null,"selectiontype":null,"inclusionexclusion":" ","createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null} +], +"responses":[ +{"reference":1,"period":202201,"survey":9,"questioncode":40,"response":9999.0,"adjustedresponse":9999.0,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202202,"survey":9,"questioncode":40,"response":null,"adjustedresponse":9999.0,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202203,"survey":9,"questioncode":40,"response":9999.0,"adjustedresponse":null,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202204,"survey":9,"questioncode":40,"response":null,"adjustedresponse":null,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202205,"survey":9,"questioncode":40,"response":null,"adjustedresponse":null,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}, +{"reference":1,"period":202206,"survey":9,"questioncode":40,"response":9999.0,"adjustedresponse":9999.0,"instance":null,"createdby":null,"createddate":null,"lastupdatedby":null,"lastupdateddate":null}]} diff --git a/tests/data/test_main/input/test_universe009_202112 b/tests/data/test_main/input/test_universe009_202112 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202112 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202201 b/tests/data/test_main/input/test_universe009_202201 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202201 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202202 b/tests/data/test_main/input/test_universe009_202202 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202202 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202203 b/tests/data/test_main/input/test_universe009_202203 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202203 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202204 b/tests/data/test_main/input/test_universe009_202204 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202204 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202205 b/tests/data/test_main/input/test_universe009_202205 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202205 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/input/test_universe009_202206 b/tests/data/test_main/input/test_universe009_202206 new file mode 100755 index 00000000..cec2f21b --- /dev/null +++ b/tests/data/test_main/input/test_universe009_202206 @@ -0,0 +1 @@ +1:ZZZ:999:999:999:999:999:999:999: 999: 999: 999: 999: 999.99: 999.99: 999: 999:ZZ:999:999:ZZ:ZZ: 999: 999: 999: : : 999:Z: diff --git a/tests/data/test_main/output/.gitignore b/tests/data/test_main/output/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/tests/data/test_main/output/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/tests/helper_functions.py b/tests/helper_functions.py index aa7a41f7..e214ff5d 100644 --- a/tests/helper_functions.py +++ b/tests/helper_functions.py @@ -1,3 +1,4 @@ +import json from contextlib import contextmanager from pathlib import Path @@ -30,3 +31,15 @@ def load_filter(filter_path): @contextmanager def does_not_raise(): yield + + +def create_testing_config(file_paths): + """Copy config to testing directory and update file_paths""" + with open("mbs_results/config.json") as f: + config = json.load(f) + + config.update(file_paths) + + with open("config.json", "w") as f: + + json.dump(config, f) diff --git a/tests/imputation/test_ratio_of_means.py b/tests/imputation/test_ratio_of_means.py index 70d70cc3..4e3b767e 100644 --- a/tests/imputation/test_ratio_of_means.py +++ b/tests/imputation/test_ratio_of_means.py @@ -83,6 +83,7 @@ def test_ratio_of_means(self, base_file_name): reference="identifier", strata="group", auxiliary="other", + question_no="questioncode", filters=filter_df, imputation_links={ "forward": "f_link_question", @@ -100,6 +101,7 @@ def test_ratio_of_means(self, base_file_name): reference="identifier", strata="group", auxiliary="other", + question_no="questioncode", filters=filter_df, current_period=202001, revision_period=10, @@ -195,6 +197,7 @@ def test_manual_construction_input(self, mc_base_file_name): reference="identifier", strata="group", auxiliary="other", + question_no="question_no", manual_constructions=manual_constructions, current_period=202001, revision_period=10, diff --git a/tests/imputation/test_ratio_of_means_back_data.py b/tests/imputation/test_ratio_of_means_back_data.py index bcc011f9..c5288df8 100644 --- a/tests/imputation/test_ratio_of_means_back_data.py +++ b/tests/imputation/test_ratio_of_means_back_data.py @@ -39,6 +39,7 @@ def test_ratio_of_means_back_data(self, base_file_name): reference="identifier", strata="group", auxiliary="other", + question_no="questioncode", current_period=202003, revision_period=2, ) diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 00000000..ab02c382 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,36 @@ +from helper_functions import create_testing_config + +from mbs_results.main import run_mbs_main + +input_path = "tests/data/test_main/input/" + +test_config = { + "calibration_group_map_path": input_path + + "test_cell_no_calibration_group_mapping.csv", + "classification_values_path": input_path + "test_classification_sic_mapping.csv", + "folder_path": input_path, + "l_values_path": input_path + + "test_classification_question_number_l_value_mapping.csv", + "manual_constructions_path": input_path + "test_manual_constructions.csv", + "mbs_file_name": "test_snaphot.json", + "output_path": "tests/data/test_main/output/", + "population_path": input_path + "test_universe009_*", + "sample_path": input_path + "test_finalsel009_*", + "back_data_qv_path": input_path + "test_qv_009_202112.csv", + "back_data_cp_path": input_path + "test_cp_009_202112.csv", + "back_data_finalsel_path": input_path + "test_finalsel009_202112", + "period_selected": 202206, + "current_period": 202206, + "previous_period": 202205, + "revision_period": 6, +} + + +def test_main(): + """Testing if main works, this test aims to check if all methods are + integrated together. Updating config to match testing data, also saving + config in tests directory. + """ + create_testing_config(test_config) + + run_mbs_main()