From 93c285144998cabf00bbc1f7fc927e5c25cd7511 Mon Sep 17 00:00:00 2001 From: David Beal Date: Sun, 3 Nov 2024 10:03:19 +0100 Subject: [PATCH] fixup --- polars_db_schema/models/db_config.py | 121 ++++++++++++++++++++------- polars_db_schema/models/db_table.py | 56 +++++++++++-- polars_db_schema/views/db_config.xml | 12 +-- polars_db_schema/views/db_table.xml | 25 ++++-- 4 files changed, 164 insertions(+), 50 deletions(-) diff --git a/polars_db_schema/models/db_config.py b/polars_db_schema/models/db_config.py index 3af33fecc..ad624adb3 100644 --- a/polars_db_schema/models/db_config.py +++ b/polars_db_schema/models/db_config.py @@ -2,7 +2,8 @@ import polars as pl -from odoo import fields, models +from odoo import exceptions, fields, models +from odoo.tools.safe_eval import safe_eval class DbConfig(models.Model): @@ -11,56 +12,116 @@ class DbConfig(models.Model): db_type_id = fields.Many2one(comodel_name="db.type") db_table_ids = fields.One2many(comodel_name="db.table", inverse_name="db_config_id") row_count_query = fields.Text(related="db_type_id.row_count_query") - matching_model = fields.Char(help="Odoo matching models") - - def _get_foreign_keys(self): - foreign = defaultdict(list) - df = self._read_sql(self.db_type_id.foreign_key_query) - for mdict in df.to_dicts(): - primary_table = mdict["primary_table"] - foreign[mdict["foreign_table"]].append( - f"{mdict['fk_column_name']} = {primary_table}.{mdict['fk_column_name']}" - ) + table_filter = fields.Char( + help="Remove tables with a name matching like sql expression" + ) + table_sort = fields.Selection( + selection=[("row_count", "Count"), ("alias", "Alias"), ("odoo", "Odoo")], + default="row_count", + ) + manually_entries = fields.Text( + readonly=True, + help="Odoo matching models, alias and display. " "Can be backup in your module", + ) def get_db_metadata(self): self.ensure_one() + foreign, entries = {}, {} if self.db_type_id.foreign_key_query: - foreign = self._get_foreign_keys() + foreign = self._get_foreign_keys(self._get_aliases()) if self.row_count_query: - self._read_sql("SELECT 1") - df = self._read_sql(self.row_count_query) - if self.db_type_id.code == "sqlite": - # https://docs.pola.rs/user-guide/expressions/user-defined-functions/#processing-individual-values-with-map_elements - df = ( - df.with_columns( - pl.col("stat").map_elements(sqlite, return_dtype=pl.Int32) - ) - # rename columns - .rename({"tbl": "name", "stat": "row_count"}) - # stat columns store extra info leading to duplicate lines, - # then make it unique - .unique(maintain_order=True) + sql = self.row_count_query + if self.table_filter: + sql = sql.replace( + "WHERE", f"WHERE name NOT like '{self.table_filter}' AND" ) + df = self._read_sql(sql) + if self.db_type_id.code == "sqlite": + # Sqlite has weird information schema structure + # we need a little hack + df = sqlite(df) df = df.filter(pl.col("row_count") > 0).with_columns( # add m2o foreign key db_config_id=pl.lit(self.id) ) df = self._filter_df(df) self.env["db.table"].search([("db_config_id", "=", self.id)]).unlink() + if self.manually_entries: + entries = safe_eval(self.manually_entries) vals_list = [] for row in df.to_dicts(): name = row.get("name") - if name and name in foreign: - row["foreign"] = "\n".join(foreign[name]) + if name in foreign: + row["foreign_keys"] = "\n".join(foreign[name]) + if entries: + if name in entries.get("odoo_model"): + row["odoo_model"] = entries["odoo_model"][name] + if name in entries.get("alias"): + row["alias"] = entries["alias"][name] vals_list.append(row) self.env["db.table"].create(vals_list) + def _get_foreign_keys(self, aliases): + foreign = defaultdict(list) + df = self._read_sql(self.db_type_id.foreign_key_query) + mdicts = df.to_dicts() + cols = ["primary_table", "foreign_table", "fk_column_name"] + if mdicts and any([x for x in cols if x not in mdicts[0].keys()]): + raise exceptions.ValidationError( + f"Missing one of these columns {cols} in the query" + ) + for mdict in mdicts: + primary_table = aliases.get(mdict["primary_table"]) + primary_table = ( + aliases.get(mdict["primary_table"]) or mdict["primary_table"] + ) + foreign[mdict["foreign_table"]].append( + f"{mdict['fk_column_name']} = {primary_table}.{mdict['fk_column_name']}" + ) + return foreign + + def _get_aliases(self, reverse=False): + self.ensure_one() + aliases = {x.name: x.alias for x in self.db_table_ids if x.alias} + if reverse: + return {value: key for key, value in aliases.items()} + return aliases + + def _save_manually_entered_data(self): + def get_dict_format(column): + res = ", ".join( + [f"'{x.name}': '{x[column]}'" for x in self.db_table_ids if x[column]] + ) + if res: + return safe_eval(f"{ {res} }".replace('"', "")) + return + + mdict = {} + for mvar in ("odoo_model", "alias", "display"): + sub_dict = get_dict_format(mvar) + if sub_dict: + mdict[mvar] = sub_dict + self.manually_entries = str(mdict).replace("}, '", "},\n'") + def _filter_df(self, df): "You may want ignore some tables: inherit me" return df -def sqlite(value): +def sqlite(df): "Extract row_count info from 'stat' column" - values = value.split(" ") - return values and int(values[0]) or int(value) + + def extract_first_part(value): + values = value.split(" ") + return values and int(values[0]) or int(value) + + return ( + df.with_columns( + pl.col("stat").map_elements(extract_first_part, return_dtype=pl.Int32) + ) + # rename columns + .rename({"tbl": "name", "stat": "row_count"}) + # stat columns store extra info leading to duplicate lines, + # then make it unique + .unique(maintain_order=True) + ) diff --git a/polars_db_schema/models/db_table.py b/polars_db_schema/models/db_table.py index 855a35b3e..2452133a1 100644 --- a/polars_db_schema/models/db_table.py +++ b/polars_db_schema/models/db_table.py @@ -19,10 +19,15 @@ class DbTable(models.Model): xlsx = fields.Binary(string="File", attachment=False, readonly=True) db_config_id = fields.Many2one(comodel_name="db.config", readonly=True) filename = fields.Char(readonly=True) - foreign = fields.Text(readonly=True, help="Foreign keys towards other tables") - matching_model = fields.Char(string="Matching", help="Odoo matching model") + foreign_keys = fields.Text(readonly=True, help="Foreign keys towards other tables") + alias = fields.Char(help="Used to make SQL query easier to read") + odoo_model = fields.Char(help="Odoo matching model") + display = fields.Char( + help="Fields to combinate (separated by comma) to give a " + "user friendly representation of the record" + ) sql = fields.Text( - string="Significant Columns", + string="Relevant Columns", readonly=True, help="Columns with variable data over rows", ) @@ -44,6 +49,8 @@ def get_metadata_info(self): cols = [x[0] for x in df.schema.items() if str(x[1]) not in excluded_types] relevant_cols = [] unique = {} + key_cols, relations = "", "" + # Search columns with non unique value in rows for col in cols: # TODO improve it # Some database have dirty column names: :-( @@ -53,19 +60,52 @@ def get_metadata_info(self): query = f"SELECT distinct {col} FROM self" res = df.sql(query) if len(res) > 1: - relevant_cols.append(col) + relevant_cols.append(f"{self.alias or self.name}.{col}") else: # column has the same value on any rows # we prefer ignore them unique[col] = res.to_series()[0] self.unique = f"{unique}" + if self.foreign_keys: + joint = [ + x.split(".")[0].split(" = ")[1] for x in self.foreign_keys.split("\n") + ] + count = {x: joint.count(x) for x in set(joint)} + print(joint, count) + # breakpoint() # import pdb; pdb.set_trace() + foreign_list = [ + # table, foreign=othertable.colname + # x[0], x[1][0] x[1][1] + (self.name, x.split(" = ")) + for x in self.foreign_keys.split("\n") + ] + key_cols = ", ".join([x[1][1] for x in foreign_list]) + "," + aliases = self.db_config_id._get_aliases() + aliases_rev = self.db_config_id._get_aliases(reverse=True) + relations = "\n\t".join( + [ + f"LEFT JOIN {aliases_rev.get(x[1][1].split('.')[0], x[0])} " + f"{x[1][1].split('.')[0]} ON {aliases.get(x[0], x[0])}" + f".{x[1][0]} = {x[1][1]}" + for x in foreign_list + ] + ) + print(relations) if relevant_cols: - self.sql = f"SELECT {', '. join(relevant_cols)}\nFROM {self.name};\n" + self.sql = f"""SELECT {key_cols} {', '. join(relevant_cols)} +FROM {self.name} {self.alias or ''}\n\t{relations};\n""" # WARNING Thread virtual real time limit (151/120s) reached. # Dumping stacktrace of limit exceeding threads before reloading + def write(self, vals): + res = super().write(vals) + if "odoo_model" in vals or "alias" in vals or "display" in vals: + for conf in self.mapped("db_config_id"): + conf._save_manually_entered_data() + return res + def get_spreadsheet(self): self.ensure_one() if not self.sql: @@ -73,7 +113,7 @@ def get_spreadsheet(self): if not self.sql: raise exceptions.ValidationError( _( - "There is no column with varaiable data in this table: " + "There is no column with variable data in this table: " "check Uniques Values column" ) ) @@ -82,13 +122,13 @@ def get_spreadsheet(self): ) excel_stream = io.BytesIO() vals = {"workbook": excel_stream} - vals.update(self.get_spreadsheet_settings()) + vals.update(self._get_spreadsheet_settings()) df.write_excel(**vals) excel_stream.seek(0) self.filename = f"{self.name}.xlsx" self.xlsx = base64.encodebytes(excel_stream.read()) - def get_spreadsheet_settings(self): + def _get_spreadsheet_settings(self): return { "position": "A1", "table_style": "Table Style Light 16", diff --git a/polars_db_schema/views/db_config.xml b/polars_db_schema/views/db_config.xml index 566e0196d..b67d3a12a 100644 --- a/polars_db_schema/views/db_config.xml +++ b/polars_db_schema/views/db_config.xml @@ -18,16 +18,18 @@ invisible="not db_table_ids" /> + + + - + + + + diff --git a/polars_db_schema/views/db_table.xml b/polars_db_schema/views/db_table.xml index 51dbdf245..e3a080222 100644 --- a/polars_db_schema/views/db_table.xml +++ b/polars_db_schema/views/db_table.xml @@ -15,11 +15,12 @@ filename="filename" options="{'accepted_file_extensions': '.xlsx'}" /> - - - - + + + + +