Skip to content

Commit

Permalink
Feat taxhub v2 : Change taxref migration procedure (without bib_noms)
Browse files Browse the repository at this point in the history
  • Loading branch information
amandine-sahl committed Aug 6, 2024
1 parent e5f2bf1 commit 43185e4
Show file tree
Hide file tree
Showing 11 changed files with 483 additions and 515 deletions.
18 changes: 18 additions & 0 deletions apptax/taxonomie/commands/migrate_taxref/commands_v16.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from apptax.taxonomie.commands.migrate_taxref.test_commands_v16 import (
populate_data,
clean_data,
test_import_taxref_v16,
)

from flask import Blueprint

import importlib
Expand Down Expand Up @@ -191,3 +197,15 @@ def import_and_format_dbc_status():
"""
truncate_bdc_statuts()
import_bdc_statuts_v16(logger)


@migrate_to_v16.command()
@with_appcontext
def test_taxref_v16_migration():
populate_data()
try:
test_import_taxref_v16()
except AssertionError as e:
raise (e)
finally:
clean_data()

This file was deleted.

2 changes: 0 additions & 2 deletions apptax/taxonomie/commands/migrate_taxref/data/5_clean_db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,3 @@ DROP TABLE IF EXISTS taxonomie.dps_fk_cd_nom;
DROP TABLE IF EXISTS taxonomie.import_taxref_rangs;

DROP TABLE IF EXISTS taxonomie.cdnom_disparu;

DROP TABLE IF EXISTS taxonomie.tmp_bib_noms_copy;
Original file line number Diff line number Diff line change
@@ -1,51 +0,0 @@
DROP TABLE IF EXISTS taxonomie.tmp_bib_noms_copy;

CREATE TABLE taxonomie.tmp_bib_noms_copy (
id_nom serial PRIMARY KEY,
cd_nom integer,
cd_ref integer,
nom_francais character varying(1000),
comments character varying(1000),
commentaire_disparition Varchar(500),
cd_nom_remplacement int,
deleted boolean DEFAULT(FALSE),
tmp_import boolean
);

INSERT INTO taxonomie.tmp_bib_noms_copy (
id_nom, cd_nom, cd_ref, nom_francais, comments
)
SELECT id_nom, cd_nom, cd_ref, nom_francais, comments
FROM taxonomie.bib_noms;


SELECT setval(
'taxonomie.tmp_bib_noms_copy_id_nom_seq',
(SELECT max(id_nom) FROM taxonomie.tmp_bib_noms_copy ),
true
);

--- ajout Nicolas Imbert
create index IF NOT EXISTS i_tmp_cdnom_disparu_cd_nom on taxonomie.cdnom_disparu (cd_nom);


--- CAS 1 - cd_nom de remplacement à utiliser.
UPDATE taxonomie.tmp_bib_noms_copy n SET deleted = true ,
commentaire_disparition = raison_suppression || COALESCE(' nouveau cd_nom :' || a.cd_nom_remplacement, ''),
cd_nom_remplacement = a.cd_nom_remplacement
FROM (
SELECT d.*
FROM taxonomie.bib_noms n
JOIN taxonomie.cdnom_disparu d
ON n.cd_nom = d.cd_nom
) a
WHERE n.cd_nom = a.cd_nom;

------------- Cas avec cd_nom de remplacement
-- Ajout du cd_nom de remplacement quand il n'existait pas dans bib_noms
INSERT INTO taxonomie.tmp_bib_noms_copy(cd_nom, cd_ref, nom_francais, tmp_import)
SELECT d.cd_nom_remplacement, n.cd_ref, n.nom_francais, true
FROM taxonomie.tmp_bib_noms_copy n
JOIN taxonomie.cdnom_disparu d ON n.cd_nom = d.cd_nom
WHERE NOT n.cd_nom_remplacement IS NULL
ON CONFLICT DO NOTHING;
154 changes: 116 additions & 38 deletions ...axonomie/commands/migrate_taxref/data/changes_detection/1.1_taxref_changes_detections.sql
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,59 +1,137 @@
---- #################################################################################
---- #################################################################################
---- Répercussion des changements taxonomiques
---- #################################################################################
---- #################################################################################


CREATE SCHEMA IF NOT EXISTS tmp_taxref_changes;

DROP TABLE IF EXISTS tmp_taxref_changes.comp_grap ;

-- Détection des changements taxonomiques cd_ref initial vs cd_ref final
-- Pour tous des cd_ref référencés dans la table t_medias ou cor_taxon_attribut
CREATE TABLE tmp_taxref_changes.comp_grap AS
WITH grappe_init AS (
SELECT distinct b.cd_ref , array_agg(cd_nom ORDER BY cd_nom) as array_agg, count(DISTINCT cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
WHERE NOT deleted = true and cd_nom is not null
GROUP BY cd_ref
),
grappe_final AS (
SELECT distinct t.cd_ref , array_agg(b.cd_nom ORDER BY b.cd_nom) as array_agg, count(DISTINCT b.cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
JOIN taxonomie.import_taxref t
ON b.cd_nom = t.cd_nom
WHERE NOT deleted = true and b.cd_nom is not null
GROUP BY t.cd_ref
WITH used_cd_ref AS (
SELECT cd_ref FROM taxonomie.t_medias
UNION
SELECT cd_ref FROM taxonomie.cor_taxon_attribut
),
attribs AS (
SELECT DISTINCT a.cd_ref, array_agg(id_attribut) as att_list, count(DISTINCT id_attribut) as att_nb
FROM taxonomie.cor_taxon_attribut a
WHERE NOT valeur_attribut ='{}' AND NOT valeur_attribut =''
GROUP BY a.cd_ref
SELECT DISTINCT a.cd_ref, array_agg(id_attribut) as att_list, count(DISTINCT id_attribut) as att_nb
FROM taxonomie.cor_taxon_attribut a
WHERE NOT valeur_attribut ='{}' AND NOT valeur_attribut =''
GROUP BY a.cd_ref
),
media AS (
SELECT DISTINCT cd_ref, count(id_media) as media_nb
FROM taxonomie.t_medias
GROUP BY cd_ref
SELECT DISTINCT cd_ref, count(id_media) as media_nb
FROM taxonomie.t_medias
GROUP BY cd_ref
)
SELECT i.cd_ref as i_cd_ref,
f.cd_ref as f_cd_ref,
att_list, att_nb, media_nb
FROM used_cd_ref i
LEFT OUTER JOIN taxonomie.import_taxref f ON i.cd_ref = f.cd_nom
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref;


ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50);
ALTER TABLE tmp_taxref_changes.comp_grap ADD action varchar(500);

-- 'no changes' = Cas ou il n'y a aucun changement
-- cd_ref initial correspond au cd_ref final
UPDATE tmp_taxref_changes.comp_grap SET cas = 'no changes'
WHERE i_cd_ref = f_cd_ref;

-- 'update cd_ref' = Cas ou le cd_ref est modifié
-- cd_ref initial différent du cd_ref final
UPDATE tmp_taxref_changes.comp_grap SET cas = 'update cd_ref'
WHERE NOT i_cd_ref = f_cd_ref;

-- 'merge' = Cas de fusion de cd_ref
-- quand 2 cd_ref initiaux ont le même cd_ref final
UPDATE tmp_taxref_changes.comp_grap SET cas = 'merge'
WHERE f_cd_ref IN (
SELECT f_cd_ref
FROM tmp_taxref_changes.comp_grap
GROUP BY f_cd_ref
HAVING count(*)>1
);

-- Détection des conflits
-- Cas de merge de cd_ref avec des attributs
-- Conflit si 2 cd_ref ont le même attribut avec des valeurs différentes
WITH c AS (
SELECT f_cd_ref , array_agg(i_cd_ref) AS li_cd_ref
FROM tmp_taxref_changes.comp_grap
WHERE cas = 'merge' AND att_nb > 0
GROUP BY f_cd_ref
HAVING count(*) >1
), atts AS (
SELECT DISTINCT *
FROM taxonomie.cor_taxon_attribut a
JOIN c ON a.cd_ref = ANY(c.li_cd_ref)
) , conflict_atts AS (
SELECT
f_cd_ref,
atts.id_attribut,
count(DISTINCT valeur_attribut),
string_agg(DISTINCT CONCAT(nom_attribut::varchar, ': ' , valeur_attribut), ', ') AS atts
FROM atts
JOIN taxonomie.bib_attributs a
ON a.id_attribut = atts.id_attribut
GROUP BY f_cd_ref, atts.id_attribut
HAVING count(DISTINCT valeur_attribut) >1
)
UPDATE tmp_taxref_changes.comp_grap c SET action = 'Conflicts with attributes : ' || atts
FROM conflict_atts a
WHERE a.f_cd_ref = c.f_cd_ref AND cas = 'merge';

UPDATE tmp_taxref_changes.comp_grap SET action = 'no changes'
WHERE cas = 'no changes';


UPDATE tmp_taxref_changes.comp_grap SET action = 'update cd_ref'
WHERE cas = 'update cd_ref' ;


-- Analyse des splits
DROP TABLE IF EXISTS tmp_taxref_changes.split_analyze ;

CREATE TABLE tmp_taxref_changes.split_analyze AS
WITH
grappe_init AS (
SELECT b.cd_ref , array_agg(cnl.cd_nom ORDER BY cnl.cd_nom) as array_agg, count(DISTINCT cnl.cd_nom)
FROM taxonomie.taxref b
JOIN taxonomie.cor_nom_liste cnl
ON cnl.cd_nom = b.cd_nom
GROUP BY b.cd_ref
),
grappe_final AS (
SELECT new_ref.cd_ref , array_agg(cnl.cd_nom ORDER BY cnl.cd_nom) as array_agg, count(DISTINCT cnl.cd_nom)
FROM taxonomie.import_taxref new_ref
JOIN taxonomie.cor_nom_liste cnl
ON cnl.cd_nom = new_ref.cd_nom
GROUP BY new_ref.cd_ref
),
init_cdnom as (
select distinct t1.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_init t1, taxonomie.tmp_bib_noms_copy t2
where t1.cd_ref = t2.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
from taxonomie.cor_nom_liste t2
JOIN taxonomie.taxref t ON t.cd_nom = t2.cd_nom
JOIN grappe_init t1 ON t1.cd_ref = t.cd_ref
order by 1,2),
final_cdnom as (
select distinct t3.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_final t1, taxonomie.tmp_bib_noms_copy t2, taxonomie.import_taxref t3
where t1.cd_ref = t3.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
from grappe_final t1, taxonomie.cor_nom_liste t2, taxonomie.import_taxref t3
where t1.cd_ref = t3.cd_ref
and t2.cd_nom = t3.cd_nom
order by 1,2)
SELECT distinct i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count,
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count,
att_list, att_nb, media_nb
order by 1,2)
SELECT DISTINCT i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count,
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count
FROM init_cdnom i
LEFT OUTER JOIN final_cdnom f ON i.cd_nom = f.cd_nom
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref;
WHERE NOT i.array_agg = f.array_agg;



ALTER TABLE tmp_taxref_changes.split_analyze ADD cas varchar(50);

ALTER TABLE tmp_taxref_changes.comp_grap ADD grappe_change varchar(250);
ALTER TABLE tmp_taxref_changes.comp_grap ADD action varchar(250);
ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50);
UPDATE tmp_taxref_changes.split_analyze SET cas = 'split'
WHERE i_array_agg @> f_array_agg AND NOT i_array_agg = f_array_agg ;
Loading

0 comments on commit 43185e4

Please sign in to comment.