diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py index eb431f31d..e4756f77b 100644 --- a/src/ydata_profiling/config.py +++ b/src/ydata_profiling/config.py @@ -191,7 +191,7 @@ def primary_color(self) -> str: return self.primary_colors[0] # Primary color used for comparisons (default: blue, red, green) - primary_colors: List[str] = ["#377eb8", "#e41a1c", "#4daf4a"] + primary_colors: List[str] = ["#0d6efd", "#dc3545", "#198754"] # Base64-encoded logo image logo: str = "" diff --git a/src/ydata_profiling/config_default.yaml b/src/ydata_profiling/config_default.yaml index 5d84d9663..a72b8bdc2 100644 --- a/src/ydata_profiling/config_default.yaml +++ b/src/ydata_profiling/config_default.yaml @@ -1,5 +1,5 @@ # Title of the document -title: "Pandas Profiling Report" +title: YData Profiling Report # Metadata dataset: @@ -27,93 +27,97 @@ progress_bar: true # Per variable type description settings vars: - num: - quantiles: - - 0.05 - - 0.25 - - 0.5 - - 0.75 - - 0.95 - skewness_threshold: 20 - low_categorical_threshold: 5 - # Set to zero to disable - chi_squared_threshold: 0.999 - cat: - length: true - characters: true - words: true - cardinality_threshold: 50 - n_obs: 5 - # Set to zero to disable - chi_squared_threshold: 0.999 - coerce_str_to_date: false - redact: false - histogram_largest: 50 - stop_words: [] - bool: - n_obs: 3 - # string to boolean mapping dict - mappings: - t: true - f: false - yes: true - no: false - y: true - n: false - true: true - false: false - file: - active: false - image: - active: false - exif: true - hash: true - path: - active: false - url: - active: false - timeseries: - active: false - autocorrelation: 0.7 - lags: [1, 7, 12, 24, 30] - significance: 0.05 - pacf_acf_lag: 100 + num: + quantiles: + - 0.05 + - 0.25 + - 0.5 + - 0.75 + - 0.95 + skewness_threshold: 20 + low_categorical_threshold: 5 + # Set to zero to disable + chi_squared_threshold: 0.999 + cat: + length: true + characters: true + words: true + cardinality_threshold: 50 + n_obs: 5 + # Set to zero to disable + chi_squared_threshold: 0.999 + coerce_str_to_date: false + redact: false + histogram_largest: 50 + stop_words: [] + bool: + n_obs: 3 + # string to boolean mapping dict + mappings: + t: true + f: false + yes: true + no: false + y: true + n: false + "true": true + "false": false + file: + active: false + image: + active: false + exif: true + hash: true + path: + active: false + url: + active: false + timeseries: + active: false + autocorrelation: 0.7 + lags: + - 1 + - 7 + - 12 + - 24 + - 30 + significance: 0.05 + pacf_acf_lag: 100 # Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting) sort: null # which diagrams to show missing_diagrams: - bar: true - matrix: true - heatmap: true + bar: true + matrix: true + heatmap: true correlations: - pearson: - calculate: false - warn_high_correlations: true - threshold: 0.9 - spearman: - calculate: false - warn_high_correlations: false - threshold: 0.9 - kendall: - calculate: false - warn_high_correlations: false - threshold: 0.9 - phi_k: - calculate: false - warn_high_correlations: false - threshold: 0.9 - cramers: - calculate: false - warn_high_correlations: true - threshold: 0.9 - auto: - calculate: true - warn_high_correlations: true - threshold: 0.9 - + pearson: + calculate: false + warn_high_correlations: true + threshold: 0.9 + spearman: + calculate: false + warn_high_correlations: false + threshold: 0.9 + kendall: + calculate: false + warn_high_correlations: false + threshold: 0.9 + phi_k: + calculate: false + warn_high_correlations: false + threshold: 0.9 + cramers: + calculate: false + warn_high_correlations: true + threshold: 0.9 + auto: + calculate: true + warn_high_correlations: true + threshold: 0.9 # Bivariate / Pairwise relations interactions: @@ -128,38 +132,38 @@ report: # Plot-specific settings plot: - # Image format (svg or png) - image_format: "svg" - dpi: 800 + # Image format (svg or png) + image_format: svg + dpi: 800 - scatter_threshold: 1000 + scatter_threshold: 1000 - correlation: - cmap: 'RdBu' - bad: '#000000' + correlation: + cmap: RdBu + bad: "#000000" - missing: - cmap: 'RdBu' - # Force labels when there are > 50 variables - # https://github.com/ResidentMario/missingno/issues/93#issuecomment-513322615 - force_labels: true + missing: + cmap: RdBu + # Force labels when there are > 50 variables + # https://github.com/ResidentMario/missingno/issues/93#issuecomment-513322615 + force_labels: true - cat_frequency: - show: true # if false, the category frequency plot is turned off - type: 'bar' # options: 'bar', 'pie' - max_unique: 10 - colors: null # use null for default or give a list of matplotlib recognised strings + cat_frequency: + show: true # if false, the category frequency plot is turned off + type: bar # options: 'bar', 'pie' + max_unique: 10 + colors: null # use null for default or give a list of matplotlib recognized strings - histogram: - x_axis_labels: true + histogram: + x_axis_labels: true - # Number of bins (set to 0 to automatically detect the bin size) - bins: 50 + # Number of bins (set to 0 to automatically detect the bin size) + bins: 50 - # Maximum number of bins (when bins=0) - max_bins: 250 + # Maximum number of bins (when bins=0) + max_bins: 250 - font_path: null + font_path: null # The number of observations to show n_obs_unique: 5 @@ -171,49 +175,49 @@ memory_deep: false # Configuration related to the duplicates duplicates: - head: 10 - key: "# duplicates" + head: 10 + key: "# duplicates" # Configuration related to the samples area samples: - head: 10 - tail: 10 - random: 0 + head: 10 + tail: 10 + random: 0 # Configuration related to the rejection of variables reject_variables: true # When in a Jupyter notebook notebook: - iframe: - height: '800px' - width: '100%' - # or 'src' - attribute: 'srcdoc' + iframe: + height: 800px + width: 100% + # or 'src' + attribute: srcdoc html: - # Minify the html - minify_html: true + # Minify the html + minify_html: true - # Offline support - use_local_assets: true + # Offline support + use_local_assets: true - # If true, single file, else directory with assets - inline: true + # If true, single file, else directory with assets + inline: true - # Show navbar - navbar_show: true + # Show navbar + navbar_show: true - # Assets prefix if inline = true - assets_prefix: null + # Assets prefix if inline = true + assets_prefix: null - # Styling options for the HTML report - style: - theme: null - logo: "" - primary_colors: - - "#377eb8" - - "#e41a1c" - - "#4daf4a" + # Styling options for the HTML report + style: + theme: null + logo: "" + primary_colors: + - "#0d6efd" + - "#dc3545" + - "#198754" - full_width: false + full_width: false diff --git a/src/ydata_profiling/config_minimal.yaml b/src/ydata_profiling/config_minimal.yaml index d588ce037..a3cb46211 100644 --- a/src/ydata_profiling/config_minimal.yaml +++ b/src/ydata_profiling/config_minimal.yaml @@ -1,5 +1,5 @@ # Title of the document -title: "Pandas Profiling Report" +title: YData Profiling Report # Metadata dataset: @@ -27,94 +27,97 @@ progress_bar: true # Per variable type description settings vars: - num: - quantiles: - - 0.05 - - 0.25 - - 0.5 - - 0.75 - - 0.95 - skewness_threshold: 20 - low_categorical_threshold: 5 - # Set to zero to disable - chi_squared_threshold: 0.0 - cat: - length: false - characters: false - words: false - cardinality_threshold: 50 - n_obs: 5 - # Set to zero to disable - chi_squared_threshold: 0.0 - coerce_str_to_date: false - redact: false - histogram_largest: 10 - stop_words: [] - - bool: - n_obs: 3 - # string to boolean mapping dict - mappings: - t: true - f: false - yes: true - no: false - y: true - n: false - true: true - false: false - path: - active: false - file: - active: false - image: - active: false - exif: false - hash: false - url: - active: false - timeseries: - active: false - autocorrelation: 0.7 - lags: [1, 7, 12, 24, 30] - significance: 0.05 - pacf_acf_lag: 100 + num: + quantiles: + - 0.05 + - 0.25 + - 0.5 + - 0.75 + - 0.95 + skewness_threshold: 20 + low_categorical_threshold: 5 + # Set to zero to disable + chi_squared_threshold: 0 + cat: + length: false + characters: false + words: false + cardinality_threshold: 50 + n_obs: 5 + # Set to zero to disable + chi_squared_threshold: 0 + coerce_str_to_date: false + redact: false + histogram_largest: 10 + stop_words: [] + bool: + n_obs: 3 + # string to boolean mapping dict + mappings: + t: true + f: false + yes: true + no: false + y: true + n: false + "true": true + "false": false + path: + active: false + file: + active: false + image: + active: false + exif: false + hash: false + url: + active: false + timeseries: + active: false + autocorrelation: 0.7 + lags: + - 1 + - 7 + - 12 + - 24 + - 30 + significance: 0.05 + pacf_acf_lag: 100 # Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting) sort: null # which diagrams to show missing_diagrams: - bar: false - matrix: false - heatmap: false + bar: false + matrix: false + heatmap: false correlations: - pearson: - calculate: false - warn_high_correlations: true - threshold: 0.9 - spearman: - calculate: false - warn_high_correlations: false - threshold: 0.9 - kendall: - calculate: false - warn_high_correlations: false - threshold: 0.9 - phi_k: - calculate: false - warn_high_correlations: false - threshold: 0.9 - cramers: - calculate: false - warn_high_correlations: true - threshold: 0.9 - auto: - calculate: false - warn_high_correlations: true - threshold: 0.9 - + pearson: + calculate: false + warn_high_correlations: true + threshold: 0.9 + spearman: + calculate: false + warn_high_correlations: false + threshold: 0.9 + kendall: + calculate: false + warn_high_correlations: false + threshold: 0.9 + phi_k: + calculate: false + warn_high_correlations: false + threshold: 0.9 + cramers: + calculate: false + warn_high_correlations: true + threshold: 0.9 + auto: + calculate: false + warn_high_correlations: true + threshold: 0.9 # Bivariate / Pairwise relations interactions: @@ -129,37 +132,37 @@ report: # Plot-specific settings plot: - # Image format (svg or png) - image_format: "svg" - dpi: 800 + # Image format (svg or png) + image_format: svg + dpi: 800 - scatter_threshold: 1000 + scatter_threshold: 1000 - correlation: - cmap: 'RdBu' - bad: '#000000' + correlation: + cmap: RdBu + bad: "#000000" - missing: - cmap: 'RdBu' - # Force labels when there are > 50 variables - force_labels: true + missing: + cmap: RdBu + # Force labels when there are > 50 variables + force_labels: true - cat_frequency: - show: true # if false, the category frequency plot is turned off - type: 'bar' # options: 'bar', 'pie' - max_unique: 0 - colors: null # use null for default or give a list of matplotlib recognised strings + cat_frequency: + show: true # if false, the category frequency plot is turned off + type: bar # options: 'bar', 'pie' + max_unique: 0 + colors: null # use null for default or give a list of matplotlib recognized strings - histogram: - x_axis_labels: true + histogram: + x_axis_labels: true - # Number of bins (set to 0 to automatically detect the bin size) - bins: 50 + # Number of bins (set to 0 to automatically detect the bin size) + bins: 50 - # Maximum number of bins (when bins=0) - max_bins: 250 + # Maximum number of bins (when bins=0) + max_bins: 250 - font_path: null + font_path: null # The number of observations to show n_obs_unique: 5 @@ -171,49 +174,49 @@ memory_deep: false # Configuration related to the duplicates duplicates: - head: 0 - key: "# duplicates" + head: 0 + key: "# duplicates" # Configuration related to the samples area samples: - head: 0 - tail: 0 - random: 0 + head: 0 + tail: 0 + random: 0 # Configuration related to the rejection of variables reject_variables: true # When in a Jupyter notebook notebook: - iframe: - height: '800px' - width: '100%' - # or 'src' - attribute: 'srcdoc' + iframe: + height: 800px + width: 100% + # or 'src' + attribute: srcdoc html: - # Minify the html - minify_html: true + # Minify the html + minify_html: true - # Offline support - use_local_assets: true + # Offline support + use_local_assets: true - # If true, single file, else directory with assets - inline: true + # If true, single file, else directory with assets + inline: true - # Show navbar - navbar_show: true + # Show navbar + navbar_show: true - # Assets prefix if inline = true - assets_prefix: null + # Assets prefix if inline = true + assets_prefix: null - # Styling options for the HTML report - style: - theme: null - logo: "" - primary_colors: - - "#377eb8" - - "#e41a1c" - - "#4daf4a" + # Styling options for the HTML report + style: + theme: null + logo: "" + primary_colors: + - "#0d6efd" + - "#dc3545" + - "#198754" - full_width: false + full_width: false diff --git a/src/ydata_profiling/model/alerts.py b/src/ydata_profiling/model/alerts.py index d3232ea9b..6f41894dc 100644 --- a/src/ydata_profiling/model/alerts.py +++ b/src/ydata_profiling/model/alerts.py @@ -8,6 +8,7 @@ from ydata_profiling.config import Settings from ydata_profiling.model.correlations import perform_check_correlation +from ydata_profiling.utils.styles import get_alert_styles def fmt_percent(value: float, edge_cases: bool = True) -> str: @@ -105,10 +106,11 @@ def __init__( self.values = values or {} self.column_name = column_name self._is_empty = is_empty + self._styles = get_alert_styles() @property def alert_type_name(self) -> str: - return self.alert_type.name.replace("_", " ").lower().title() + return self.alert_type.name.replace("_", " ").capitalize() @property def anchor_id(self) -> Optional[str]: @@ -118,13 +120,16 @@ def anchor_id(self) -> Optional[str]: def fmt(self) -> str: # TODO: render in template - name = self.alert_type.name.replace("_", " ") - if name == "HIGH CORRELATION" and self.values is not None: + style = self._styles.get(self.alert_type.name.lower(), 'secondary') + hint = '' + + if self.alert_type == AlertType.HIGH_CORRELATION and self.values is not None: num = len(self.values["fields"]) title = ", ".join(self.values["fields"]) corr = self.values["corr"] - name = f'HIGH CORRELATION' - return name + hint = f'data-bs-toggle="tooltip" data-bs-placement="right" data-bs-title="This variable has a high {corr} correlation with {num} fields: {title}"' + + return f'{self.alert_type_name}' def _get_description(self) -> str: """Return a human level description of the alert. diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py index ac8a73e2b..e3f4d1601 100644 --- a/src/ydata_profiling/profile_report.py +++ b/src/ydata_profiling/profile_report.py @@ -134,14 +134,14 @@ def __init__( for condition, key in groups: if condition: cfg = cfg.update(Config.get_arg_groups(key)) - report_config = cfg.update(report_config.dict(exclude_defaults=True)) + report_config = report_config.update(cfg.dict(exclude_defaults=True)) if len(kwargs) > 0: shorthands, kwargs = Config.shorthands(kwargs) - report_config = ( + report_config = report_config.update( Settings() .update(shorthands) - .update(report_config.dict(exclude_defaults=True)) + .dict(exclude_defaults=True) ) if kwargs: diff --git a/src/ydata_profiling/report/formatters.py b/src/ydata_profiling/report/formatters.py index 8d12bff2c..051299da7 100644 --- a/src/ydata_profiling/report/formatters.py +++ b/src/ydata_profiling/report/formatters.py @@ -329,11 +329,11 @@ def help(title: str, url: Optional[str] = None) -> str: HTML formatted help badge """ if url is not None: - return f'?' + return f'?' else: - return f'?' + return f'?' @list_args def fmt_badge(value: str) -> str: - return re.sub(r"\((\d+)\)", r'\1', value) + return re.sub(r"\((\d+)\)", r'\1', value) diff --git a/src/ydata_profiling/report/presentation/flavours/html/alerts.py b/src/ydata_profiling/report/presentation/flavours/html/alerts.py index 5b878e175..d07c1a9a4 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/alerts.py +++ b/src/ydata_profiling/report/presentation/flavours/html/alerts.py @@ -1,27 +1,10 @@ from ydata_profiling.report.presentation.core.alerts import Alerts from ydata_profiling.report.presentation.flavours.html import templates +from ydata_profiling.utils.styles import get_alert_styles class HTMLAlerts(Alerts): def render(self) -> str: - styles = { - "constant": "warning", - "unsupported": "warning", - "type_date": "warning", - "constant_length": "primary", - "high_cardinality": "primary", - "imbalance": "primary", - "unique": "primary", - "uniform": "primary", - "infinite": "info", - "zeros": "info", - "truncated": "info", - "missing": "info", - "skewed": "info", - "high_correlation": "default", - "duplicates": "default", - "non_stationary": "default", - "seasonal": "default", - } + styles = get_alert_styles() return templates.template("alerts.html").render(**self.content, styles=styles) diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates.py b/src/ydata_profiling/report/presentation/flavours/html/templates.py index a97d150cd..85e24a46a 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates.py +++ b/src/ydata_profiling/report/presentation/flavours/html/templates.py @@ -51,9 +51,8 @@ def create_html_assets(config: Settings, output_file: Path) -> None: css.append(f"wrapper/assets/{theme.value}.bootstrap.min.css") else: css.append("wrapper/assets/bootstrap.min.css") - css.append("wrapper/assets/bootstrap-theme.min.css") - js.append("wrapper/assets/jquery-3.7.1.min.js") - js.append("wrapper/assets/bootstrap.min.js") + + js.append("wrapper/assets/bootstrap.bundle.min.js") css.append("wrapper/assets/style.css") js.append("wrapper/assets/script.js") diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html index d46e578c8..3a53428b8 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html +++ b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html @@ -1,6 +1,6 @@