ONSdigital · AntonZogk · Jan 27, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
@@ -1,28 +1,20 @@
 name: cml_runtimes
-
+permissions:
+  contents: read
+  pull-requests: read
 on:
   # Triggers the workflow on pull requests to main branch
   pull_request:
     branches: [ main ]
 
 jobs:
-  commit-hooks:
-    runs-on: ubuntu-20.04
+
+  pre-commit:
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v3
-        with:
-          python-version: 3.10.13
-
-      - name: Install Python dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[dev]
-
-      - name: Check commit hooks
-        run: |
-          pre-commit run --all-files
+      - uses: actions/setup-python@v4
+      - uses: pre-commit/action@646c83fcd040023954eafda54b4db0192ce70507 # hash for v3.0.0
 
   testing-cml:
     runs-on: ubuntu-latest

@@ -17,6 +17,10 @@
     "sic_domain_mapping_path": "",
     "threshold_filepath":"",
 
+    "back_data_type":"response_type",
+    "imputation_marker_col":"imputation_marker",
+
+
     "period_selected": 202303,
     "current_period" : 202303,
     "previous_period" : 202302,
@@ -27,15 +31,15 @@
     "calibration_factor": "calibration_factor",
     "cell_number": "cell_no",
     "design_weight": "design_weight",
-    "errormarker": "statusencoded",
+    "status": "statusencoded",
     "form_id_idbr": "formtype",
     "group": "calibration_group",
     "calibration_group": "calibration_group",
     "period": "period",
     "question_no": "questioncode",
     "reference": "reference",
     "region": "region",
-    "sampled": "sampled",
+    "sampled": "is_sampled",
     "state": "frozen",
     "strata": "cell_no",
     "target": "adjustedresponse",
@@ -154,6 +158,6 @@
         "13":"fir"
         },
 
-  "additional_outputs":["create_imputation_link_output"]
+  "additional_outputs":[]
 
 }
@@ -100,9 +100,7 @@ def apply_estimation(
 
     estimation_df = pd.concat(estimation_df_list, ignore_index=True)
 
-    create_population_count_output(
-        estimation_df, period, calibration_group, save_output=True, **config
-    )
+    create_population_count_output(estimation_df, period, save_output=True, **config)
 
     # validate_estimation(estimation_df, **config)
 

@@ -51,6 +51,9 @@ def impute(dataframe: pd.DataFrame, config: dict) -> pd.DataFrame:
             reference=config["reference"],
             target=config["target"],
             period=config["period"],
+            current_period=config["current_period"],
+            revision_period=config["revision_period"],
+            question_no=config["question_no"],
             strata="imputation_class",
             auxiliary=config["auxiliary"],
         )
@@ -66,12 +69,13 @@ def impute(dataframe: pd.DataFrame, config: dict) -> pd.DataFrame:
         question_no=config["question_no"],
         spp_form_id=config["form_id_spp"],
     )
+    target = config["target"]
 
     post_constrain["imputed_and_derived_flag"] = post_constrain.apply(
         lambda row: (
             "d"
             if "sum" in str(row["constrain_marker"]).lower()
-            else row["imputation_flags_adjusted_value"]
+            else row[f"imputation_flags_{target}"]
         ),
         axis=1,
     )

@@ -341,8 +341,9 @@ def ratio_of_means(
     reference: str,
     strata: str,
     auxiliary: str,
-    current_period: str,
-    revision_period: str,
+    current_period: int,
+    revision_period: int,
+    question_no: str,
     filters: pd.DataFrame = None,
     manual_constructions: pd.DataFrame = None,
     imputation_links: Dict[str, str] = {},
@@ -372,6 +373,12 @@ def ratio_of_means(
         Column name containing strata information (sic).
     auxiliary : str
         Column name containing auxiliary information (sic).
+    current_period: int
+        Value with current period to be imputed as int.
+    revision_period: int
+        Value containing the amount of periods for imputation.
+    question_no: str
+        Column name containing question_no
     filters : pd.DataFrame, optional
         Dataframe with values to exclude from imputation method.
     manual_constructions : pd.DataFrame, optional
@@ -429,7 +436,9 @@ def ratio_of_means(
 
     if manual_constructions is not None:
         # Need to join mc dataframe to original df
-        df = join_manual_constructions(df, manual_constructions, reference, period)
+        df = join_manual_constructions(
+            df, manual_constructions, reference, period, question_no
+        )
 
     if f"{target}_man" in df.columns:
         # Manual Construction
@@ -510,14 +519,3 @@ def calculate_back_data_period(current_period, revision_period) -> str:
         (current_period - pd.DateOffset(months=revision_period)).date().strftime("%Y%m")
     )
     return back_data_period
-
-
-if __name__ == "__main__":
-    from mbs_results.utilities.inputs import load_config
-
-    config = load_config()
-    bdp = calculate_back_data_period(
-        current_period=config["current_period"],
-        revision_period=config["revision_period"],
-    )
-    print(config["current_period"], bdp)
@@ -52,7 +52,6 @@ def get_additional_outputs_df(
             "response",
             "froempment",
             "cell_no",
-            "referencename",
             "imputation_flags_adjustedresponse",
             "f_link_adjustedresponse",
             "b_link_adjustedresponse",

@@ -1,7 +1,8 @@
 import numpy as np
 import pandas as pd
-from staging.merge_domain import merge_domain
-from utilities.utils import convert_column_to_datetime
+
+from mbs_results.staging.merge_domain import merge_domain
+from mbs_results.utilities.utils import convert_column_to_datetime
 
 
 def get_weighted_adj_val_time_series(

@@ -171,7 +171,6 @@ def load_manual_constructions(
     manual_constructions[period] = convert_column_to_datetime(
         manual_constructions[period]
     )
-    manual_constructions[reference] = manual_constructions[reference].astype("str")
     manual_constructions.set_index([reference, period], inplace=True)
 
     validate_manual_constructions(df, manual_constructions)
@@ -186,7 +185,7 @@ def join_manual_constructions(
     manual_constructions: pd.DataFrame,
     reference: str,
     period: str,
-    question_no: str = "question_no",
+    question_no: str,
     **config,
 ):
     """
@@ -205,7 +204,7 @@ def join_manual_constructions(
         the name of the reference column
     period: str
         the name of the period column
-    period: str
+    question_no: str
         the name of the question number column
     **config: Dict
         main pipeline configuration. Can be used to input the entire config dictionary

@@ -184,7 +184,7 @@ def stage_dataframe(config: dict) -> pd.DataFrame:
     df = run_live_or_frozen(
         df,
         config["target"],
-        error_marker=config["errormarker"],
+        status=config["status"],
         state=config["state"],
         error_values=[201],
     )

@@ -1,4 +1,5 @@
 import operator
+import warnings
 from typing import List
 
 import pandas as pd
@@ -162,14 +163,21 @@ def constrain(
     )
     pre_derive_df = pre_derive_df[[target]]
 
-    derived_values = pd.concat(
-        [
-            sum_sub_df(pre_derive_df.loc[form_type], derives["from"])
-            .assign(**{question_no: derives["derive"]})
-            .assign(**{spp_form_id: form_type})
-            for form_type, derives in derive_map.items()
-        ]
-    )
+    derived_values_list = [
+        sum_sub_df(pre_derive_df.loc[form_type], derives["from"])
+        .assign(**{question_no: derives["derive"]})
+        .assign(**{spp_form_id: form_type})
+        for form_type, derives in derive_map.items()
+    ]
+
+    if derived_values_list:
+
+        derived_values = pd.concat(derived_values_list)
+
+    else:
+        warnings.warn("No derived questions created")
+        derived_values = pd.DataFrame(columns=["constrain_marker"])
+
     unique_q_numbers = df[question_no].unique()
     df.set_index([question_no, period, reference], inplace=True)
 
@@ -238,15 +246,20 @@ def derive_questions(
     # Assuming default value of o-weight is 1
     pre_derive_df = pre_derive_df[[target]].fillna(value=0)
 
-    derived_values = pd.concat(
-        [
-            sum_sub_df(pre_derive_df.loc[form_type], derives["from"])
-            .assign(**{question_no: derives["derive"]})
-            .assign(**{spp_form_id: form_type})
-            # Create a task on Backlog to fix this.
-            for form_type, derives in derive_map.items()
-        ]
-    )
+    derived_values_list = [
+        sum_sub_df(pre_derive_df.loc[form_type], derives["from"])
+        .assign(**{question_no: derives["derive"]})
+        .assign(**{spp_form_id: form_type})
+        # Create a task on Backlog to fix this.
+        for form_type, derives in derive_map.items()
+    ]
+    if derived_values_list:
+        derived_values = pd.concat(derived_values_list)
+
+    else:
+        warnings.warn("No derived questions created")
+        derived_values = pd.DataFrame(columns=["constrain_marker"])
+
     unique_q_numbers = df[question_no].unique()
 
     df.set_index([question_no, period, reference], inplace=True)

@@ -25,9 +25,9 @@ install_requires =
     pyyaml
     pandas
     numpy
-    # rdsa-utils
-    # raz-client
-    # boto3
+    rdsa-utils
+    raz-client
+    boto3
 python_requires = >=3.6
 zip_safe = no
 

@@ -0,0 +1,2 @@
+cell_no,calibration_group
+999,9999
@@ -0,0 +1,2 @@
+classification,question_no,l_value
+99999,40,9999999
@@ -0,0 +1,2 @@
+classification,sic_5_digit
+99999,999
@@ -0,0 +1,2 @@
+period,reference,form_type,sic92,error_mkr,response_type
+202112,1,ZZZ,45310,O,1
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+1::999:999:999:999:          999:          999:          999:          999:    999.99:    999:       999:        999:999:999:ZZZ      :ZZZ      :ZZZ     :     999:     999:     999:999:ZZZ:ZZZ:01/01/1900               :ZZZ ZZ                           :                                   :                                   :ZZZ ZZ                           :                                   :                                   :99 ZZZ ZZ               :ZZZ                   :ZZZ ZZZ          :                              :                              :ZZ9 9ZZ :ZZ ZZZ ZZZ                      :                                   :                                   :ZZZ ZZZ               :9999       :9999       :Z: :  999:9999:*      :Z
@@ -0,0 +1 @@
+form,domain,threshold,IDBR_form
@@ -0,0 +1,2 @@
+period,reference,questioncode,adjustedresponse
+202204,1,40,888
@@ -0,0 +1,2 @@
+period,reference,question_no,returned_value,adjusted_value,instance
+202112,1,40,999,999,999
@@ -0,0 +1 @@
+sic_5_digit,domain
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		classification,question_no,l_value
		99999,40,9999999
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		period,reference,form_type,sic92,error_mkr,response_type
		202112,1,ZZZ,45310,O,1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		1::999:999:999:999: 999: 999: 999: 999: 999.99: 999: 999: 999:999:999:ZZZ :ZZZ :ZZZ : 999: 999: 999:999:ZZZ:ZZZ:01/01/1900 :ZZZ ZZ : : :ZZZ ZZ : : :99 ZZZ ZZ :ZZZ :ZZZ ZZZ : : :ZZ9 9ZZ :ZZ ZZZ ZZZ : : :ZZZ ZZZ :9999 :9999 :Z: : 999:9999:* :Z
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		period,reference,questioncode,adjustedresponse
		202204,1,40,888
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		period,reference,question_no,returned_value,adjusted_value,instance
		202112,1,40,999,999,999