diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 9f6ac9dd21164..6b97d2eb456da 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -406,22 +406,52 @@ def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: def _get_dataset_column_min( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_min_value: + if not self.config.include_field_min_value: + return + try: column_profile.min = str(self.dataset.get_column_min(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column min for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column min", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_max( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_max_value: + if not self.config.include_field_max_value: + return + try: column_profile.max = str(self.dataset.get_column_max(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column max for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column max", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_mean( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_mean_value: + if not self.config.include_field_mean_value: + return + try: column_profile.mean = str(self.dataset.get_column_mean(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column mean for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column mean", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_median(