ARM-DOE · zssherman · Jul 29, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024
@@ -248,7 +248,7 @@ def read_arm_netcdf(
             file_dates.append(pts[2])
             file_times.append(pts[3])
         else:
-            if ds['time'].size > 1:
+            if len(ds['time'].shape) > 0:
                 dummy = ds['time'].values[0]
             else:
                 dummy = ds['time'].values

@@ -15,6 +15,7 @@
         'clean',
         'comparison_tests',
         'qcfilter',
+        'qc_summary',
         'qctests',
         'radiometer_tests',
         'sp2',
@@ -27,6 +28,7 @@
             'set_bit',
             'unset_bit',
         ],
+        'qc_summary': ['QCSummary'],
         'qctests': [
             'QCTests',
         ],

@@ -185,6 +185,14 @@ def add_dqr_to_qc(
             if variable is not None and var_name not in variable:
                 continue
 
+            # Do not process quality control variables as this will create a new
+            # quality control variable for the quality control varible.
+            try:
+                if ds[var_name].attrs['standard_name'] == 'quality_flag':
+                    continue
+            except KeyError:
+                pass
+
             try:
                 ds.qcfilter.add_test(
                     var_name,

@@ -91,6 +91,7 @@ def cleanup(
         link_qc_variables=True,
         normalize_assessment=False,
         cleanup_cf_qc=True,
+        cleanup_incorrect_qc_attributes=True,
         **kwargs,
     ):
         """
@@ -118,6 +119,9 @@ def cleanup(
             Option to clean up assessments to use the same terminology. Set to
             False for default because should only be an issue after adding DQRs
             and the function to add DQRs calls this method.
+        cleanup_incorrect_qc_attributes : bool
+            Fix incorrectly named quality control variable attributes before
+            converting to standardized QC.
         **kwargs : keywords
             Keyword arguments passed through to clean.clean_arm_qc
             method.
@@ -131,6 +135,12 @@ def cleanup(
                 ds.clean.cleanup()
 
         """
+        # There are some QC variables with incorrect bit_#_description attribute names.
+        # This will check for the incorrect attribute names and correct to allow next
+        # process to work correctly
+        if cleanup_incorrect_qc_attributes:
+            self._ds.clean.fix_incorrect_variable_bit_description_attributes()
+
         # Convert ARM QC to be more like CF state fields
         if cleanup_arm_qc:
             self._ds.clean.clean_arm_qc(**kwargs)
@@ -579,10 +589,7 @@ def correct_valid_minmax(self, qc_variable):
 
     def link_variables(self):
         """
-        Add some attributes to link and explain data
-        to QC data relationship. Will use non-CF standard_name
-        of quality_flag. Hopefully this will be added to the
-        standard_name table in the future.
+        Add some attributes to link and explain data to QC data relationship.
         """
         for var in self._ds.data_vars:
             aa = re.match(r'^qc_(.+)', var)
@@ -591,9 +598,10 @@ def link_variables(self):
                 qc_variable = var
             except AttributeError:
                 continue
+
             # Skip data quality fields.
             try:
-                if "Quality check results on field:" not in self._ds[var].attrs["long_name"]:
+                if not self._ds[var].attrs["long_name"].startswith("Quality check results on"):
                     continue
             except KeyError:
                 pass
@@ -607,7 +615,11 @@ def link_variables(self):
             # If the QC variable is not in ancillary_variables add
             if qc_variable not in ancillary_variables:
                 ancillary_variables = qc_variable
-            self._ds[variable].attrs['ancillary_variables'] = copy.copy(ancillary_variables)
+
+            try:
+                self._ds[variable].attrs['ancillary_variables'] = copy.copy(ancillary_variables)
+            except KeyError:
+                pass
 
             # Check if QC variable has correct standard_name and iff not fix it.
             correct_standard_name = 'quality_flag'
@@ -648,6 +660,7 @@ def clean_arm_qc(
 
         """
         global_qc = self.get_attr_info()
+        qc_attributes = None
         for qc_var in self.matched_qc_variables:
             # Clean up units attribute from unitless to udunits '1'
             try:
@@ -749,6 +762,32 @@ def clean_arm_qc(
                             qc_var_name=qc_var_name, test_number=test_to_remove
                         )
 
+        # If the QC was not cleaned up because it is not correctly formatted with SERI QC
+        # call the SERI QC method.
+        if global_qc is None and qc_attributes is None:
+            try:
+                DQMS = self._ds.attrs['qc_method'] == 'DQMS'
+                self._ds.attrs['comment']
+            except KeyError:
+                try:
+                    DQMS = 'sirs_seriqc' in self._ds.attrs['Command_Line']
+                except KeyError:
+                    DQMS = False
+
+            if DQMS:
+                self._ds.clean.clean_seri_qc()
+
+        # If the QC was not cleaned up because it is not correctly formatted with
+        # SWATS global attributes call the SWATS QC method.
+        try:
+            text = 'SWATS QC checks (bit values)'
+            SWATS_QC = text in self._ds.attrs['Mentor_QC_Field_Information']
+        except KeyError:
+            SWATS_QC = False
+
+        if SWATS_QC and global_qc is None and qc_attributes is None:
+            self._ds.clean.clean_swats_qc()
+
     def normalize_assessment(
         self,
         variables=None,
@@ -896,3 +935,205 @@ def clean_cf_qc(self, variables=None, sep='__', **kwargs):
 
             except KeyError:
                 pass
+
+    def fix_incorrect_variable_bit_description_attributes(self):
+        """
+        Method to correct incorrectly defined quality control variable attributes.
+        There are some datastreams with the attribute names incorrectly having 'qc_'
+        prepended to the attribute name. This will fix those attributes so the cleanqc
+        method can correctly read the attributes.
+
+        If the variable long_name starts with the string "Quality check results on"
+        and a variable attribute follows the pattern qc_bit_#_description the 'qc_' part of
+        the variable attribute will be removed.
+
+        """
+
+        attr_description_pattern = r'^qc_bit_([0-9]+)_description$'
+        attr_assessment_pattern = r'^qc_bit_([0-9]+)_assessment$'
+
+        for var_name in self._ds.data_vars:
+            try:
+                if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"):
+                    continue
+            except KeyError:
+                continue
+
+            for attr, value in self._ds[var_name].attrs.copy().items():
+                for pattern in [attr_description_pattern, attr_assessment_pattern]:
+                    description = re.match(pattern, attr)
+                    if description is not None:
+                        new_attr = attr[3:]
+                        self._ds[var_name].attrs[new_attr] = self._ds[var_name].attrs.pop(attr)
+
+    def clean_seri_qc(self):
+        """
+        Method to apply SERI QC to the quality control variables. The definition of the QC
+        is listed in a single global attribute and not easily parsable. This method will update
+        the quality control variable to correctly set the test descriptions for each of the
+        SERI QC tests defined in the global attributes.
+
+        """
+        for var_name in self._ds.data_vars:
+            if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"):
+                continue
+
+            qc_var_name = var_name
+            var_name = var_name.replace('qc_', '')
+            qc_data = self._ds[qc_var_name].values.copy()
+            self._ds[qc_var_name] = xr.zeros_like(self._ds[qc_var_name], dtype=np.int32)
+
+            if qc_var_name in [
+                "qc_down_short_diffuse",
+                "qc_short_direct_normal",
+                "qc_down_short_hemisp",
+            ]:
+                value_number = [1, 2, 3, 6, 7, 8, 9, 94, 95, 96, 97]
+                test_number = list(range(2, len(value_number) + 2))
+                test_description = [
+                    'Passed 1-component test; data fall within max-min limits of Kt,Kn, or Kd',
+                    'Passed 2-component test; data fall within 0.03 of the Gompertz boundaries',
+                    'Passed 3-component test; data come within +/- 0.03 of satifying Kt=Kn+Kd',
+                    'Value estimated; passes all pertinent SERI QC tests',
+                    'Failed 1-component test; lower than allowed minimum',
+                    'Falied 1-component test; higher than allowed maximum',
+                    'Passed 3-component test but failed 2-component test by >0.05',
+                    'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.05 to 0.10.',
+                    'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.10 to 0.15.',
+                    'Data fall into a physically impossible region where Kn>Kt by K-space distances of 0.15 to 0.20.',
+                    'Data fall into a physically impossible region where Kn>Kt by K-space distances of >= 0.20.',
+                ]
+                test_assessment = [
+                    'Not failing',
+                    'Not failing',
+                    'Not failing',
+                    'Not failing',
+                    'Bad',
+                    'Bad',
+                    'Indeterminate',
+                    'Bad',
+                    'Bad',
+                    'Bad',
+                    'Bad',
+                ]
+            elif qc_var_name in ["qc_up_long_hemisp", "qc_down_long_hemisp_shaded"]:
+                value_number = [1, 2, 7, 8, 31]
+                test_number = list(range(2, len(value_number) + 2))
+                test_description = [
+                    'Passed 1-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, but short_direct_normal and down_short_hemisp or down_short_diffuse fail the SERI QC tests.',
+                    'Passed 2-component test; data fall within max-min limits of up_long_hemisp and down_long_hemisp_shaded, and short_direct_normal, or down_short_hemisp and down_short_diffuse pass the SERI QC tests while the difference between down_short_hemisp and down_short_diffuse is greater than 20 W/m2.',
+                    'Failed 1-component test; lower than allowed minimum',
+                    'Failed 1-component test; higher than allowed maximum',
+                    'Failed 2-component test',
+                ]
+                test_assessment = [
+                    'Not failing',
+                    'Not failing',
+                    'Bad',
+                    'Bad',
+                    'Bad',
+                ]
+            elif qc_var_name in ["qc_up_short_hemisp"]:
+                value_number = [1, 2, 7, 8, 31]
+                test_number = list(range(2, len(value_number) + 2))
+                test_description = [
+                    'Passed 1-component test',
+                    'Passed 2-component test',
+                    'Failed 1-component test; lower than allowed minimum',
+                    'Failed 1-component test; higher than allowed maximum',
+                    'Failed 2-component test; solar zenith angle is less than 80 degrees and down_short_hemisp is 0 or missing',
+                ]
+                test_assessment = [
+                    'Not failing',
+                    'Not failing',
+                    'Bad',
+                    'Bad',
+                    'Bad',
+                ]
+
+            self._ds[var_name].attrs['ancillary_variables'] = qc_var_name
+            self._ds[qc_var_name].attrs['standard_name'] = 'quality_flag'
+            self._ds[qc_var_name].attrs['flag_masks'] = []
+            self._ds[qc_var_name].attrs['flag_meanings'] = []
+            self._ds[qc_var_name].attrs['flag_assessments'] = []
+
+            self._ds.qcfilter.add_missing_value_test(var_name)
+
+            for ii, _ in enumerate(value_number):
+                index = qc_data == value_number[ii]
+                self._ds.qcfilter.add_test(
+                    var_name,
+                    index=index,
+                    test_number=test_number[ii],
+                    test_meaning=test_description[ii],
+                    test_assessment=test_assessment[ii],
+                )
+
+            if qc_var_name in [
+                "qc_down_short_diffuse",
+                "qc_short_direct_normal",
+                "qc_down_short_hemisp",
+            ]:
+                calculation = ((qc_data + 2) / 4.0) % 4
+                calculation = calculation.astype(np.int16)
+                value_number = [0, 1, 2, 3]
+                test_description = [
+                    'Parameter too low by 3-component test (Kt=Kn+Kd)',
+                    'Parameter too high by 3-component test (Kt=Kn+Kd)',
+                    'Parameter too low by 2-component test (Gompertz boundary)',
+                    'Parameter too high by 2-component test (Gompertz boundary)',
+                ]
+                test_assessment = ['Bad', 'Bad', 'Bad', 'Bad']
+                for ii, _ in enumerate(value_number):
+                    index = (qc_data >= 10) & (qc_data <= 93) & (calculation == value_number[ii])
+                    self._ds.qcfilter.add_test(
+                        var_name,
+                        index=index,
+                        test_meaning=test_description[ii],
+                        test_assessment=test_assessment[ii],
+                    )
+
+    def clean_swats_qc(self, fix_data_units=True):
+        """
+        Method to apply SWATS global attribute quality control definition to the
+        quality control variables.
+
+        Parameters
+        ----------
+        fix_data_units : bool
+            The units string for some data variables incorrectly defines degrees Celsius
+            as 'C' insted of the udunits 'degC'. When set to true those units strings
+            are updated.
+
+        """
+
+        for var_name in self._ds.data_vars:
+            if fix_data_units:
+                try:
+                    unit = self._ds[var_name].attrs['units']
+                    if unit == 'C':
+                        self._ds[var_name].attrs['units'] = 'degC'
+                except KeyError:
+                    pass
+
+            if not self._ds[var_name].attrs['long_name'].startswith("Quality check results on"):
+                continue
+
+            qc_var_name = var_name
+            self._ds[qc_var_name].attrs['flag_masks'] = [1, 2, 4, 8]
+            self._ds[qc_var_name].attrs['flag_meanings'] = [
+                'Value is set to missing_value.',
+                'Data value less than valid_min.',
+                'Data value greater than valid_max.',
+                'Difference between current and previous values exceeds valid_delta.',
+            ]
+            self._ds[qc_var_name].attrs['flag_assessments'] = [
+                'Bad',
+                'Bad',
+                'Bad',
+                'Indeterminate',
+            ]
+
+            self._ds.clean.correct_valid_minmax(qc_var_name)
+
+        del self._ds.attrs['Mentor_QC_Field_Information']