-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Change taxref migration procedure (without bib_noms)
Change test procedure now command
- Loading branch information
1 parent
b2dc6a1
commit 52bb0c5
Showing
11 changed files
with
483 additions
and
515 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 0 additions & 9 deletions
9
apptax/taxonomie/commands/migrate_taxref/data/0_keep_missing_cd_nom.sql
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 0 additions & 51 deletions
51
...xonomie/commands/migrate_taxref/data/changes_detection/0.1_generate_tmp_bib_noms_copy.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +0,0 @@ | ||
DROP TABLE IF EXISTS taxonomie.tmp_bib_noms_copy; | ||
|
||
CREATE TABLE taxonomie.tmp_bib_noms_copy ( | ||
id_nom serial PRIMARY KEY, | ||
cd_nom integer, | ||
cd_ref integer, | ||
nom_francais character varying(1000), | ||
comments character varying(1000), | ||
commentaire_disparition Varchar(500), | ||
cd_nom_remplacement int, | ||
deleted boolean DEFAULT(FALSE), | ||
tmp_import boolean | ||
); | ||
|
||
INSERT INTO taxonomie.tmp_bib_noms_copy ( | ||
id_nom, cd_nom, cd_ref, nom_francais, comments | ||
) | ||
SELECT id_nom, cd_nom, cd_ref, nom_francais, comments | ||
FROM taxonomie.bib_noms; | ||
|
||
|
||
SELECT setval( | ||
'taxonomie.tmp_bib_noms_copy_id_nom_seq', | ||
(SELECT max(id_nom) FROM taxonomie.tmp_bib_noms_copy ), | ||
true | ||
); | ||
|
||
--- ajout Nicolas Imbert | ||
create index IF NOT EXISTS i_tmp_cdnom_disparu_cd_nom on taxonomie.cdnom_disparu (cd_nom); | ||
|
||
|
||
--- CAS 1 - cd_nom de remplacement à utiliser. | ||
UPDATE taxonomie.tmp_bib_noms_copy n SET deleted = true , | ||
commentaire_disparition = raison_suppression || COALESCE(' nouveau cd_nom :' || a.cd_nom_remplacement, ''), | ||
cd_nom_remplacement = a.cd_nom_remplacement | ||
FROM ( | ||
SELECT d.* | ||
FROM taxonomie.bib_noms n | ||
JOIN taxonomie.cdnom_disparu d | ||
ON n.cd_nom = d.cd_nom | ||
) a | ||
WHERE n.cd_nom = a.cd_nom; | ||
|
||
------------- Cas avec cd_nom de remplacement | ||
-- Ajout du cd_nom de remplacement quand il n'existait pas dans bib_noms | ||
INSERT INTO taxonomie.tmp_bib_noms_copy(cd_nom, cd_ref, nom_francais, tmp_import) | ||
SELECT d.cd_nom_remplacement, n.cd_ref, n.nom_francais, true | ||
FROM taxonomie.tmp_bib_noms_copy n | ||
JOIN taxonomie.cdnom_disparu d ON n.cd_nom = d.cd_nom | ||
WHERE NOT n.cd_nom_remplacement IS NULL | ||
ON CONFLICT DO NOTHING; | ||
154 changes: 116 additions & 38 deletions
154
...axonomie/commands/migrate_taxref/data/changes_detection/1.1_taxref_changes_detections.sql
100755 → 100644
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,137 @@ | ||
---- ################################################################################# | ||
---- ################################################################################# | ||
---- Répercussion des changements taxonomiques | ||
---- ################################################################################# | ||
---- ################################################################################# | ||
|
||
|
||
CREATE SCHEMA IF NOT EXISTS tmp_taxref_changes; | ||
|
||
DROP TABLE IF EXISTS tmp_taxref_changes.comp_grap ; | ||
|
||
-- Détection des changements taxonomiques cd_ref initial vs cd_ref final | ||
-- Pour tous des cd_ref référencés dans la table t_medias ou cor_taxon_attribut | ||
CREATE TABLE tmp_taxref_changes.comp_grap AS | ||
WITH grappe_init AS ( | ||
SELECT distinct b.cd_ref , array_agg(cd_nom ORDER BY cd_nom) as array_agg, count(DISTINCT cd_nom) | ||
FROM taxonomie.tmp_bib_noms_copy b | ||
WHERE NOT deleted = true and cd_nom is not null | ||
GROUP BY cd_ref | ||
), | ||
grappe_final AS ( | ||
SELECT distinct t.cd_ref , array_agg(b.cd_nom ORDER BY b.cd_nom) as array_agg, count(DISTINCT b.cd_nom) | ||
FROM taxonomie.tmp_bib_noms_copy b | ||
JOIN taxonomie.import_taxref t | ||
ON b.cd_nom = t.cd_nom | ||
WHERE NOT deleted = true and b.cd_nom is not null | ||
GROUP BY t.cd_ref | ||
WITH used_cd_ref AS ( | ||
SELECT cd_ref FROM taxonomie.t_medias | ||
UNION | ||
SELECT cd_ref FROM taxonomie.cor_taxon_attribut | ||
), | ||
attribs AS ( | ||
SELECT DISTINCT a.cd_ref, array_agg(id_attribut) as att_list, count(DISTINCT id_attribut) as att_nb | ||
FROM taxonomie.cor_taxon_attribut a | ||
WHERE NOT valeur_attribut ='{}' AND NOT valeur_attribut ='' | ||
GROUP BY a.cd_ref | ||
SELECT DISTINCT a.cd_ref, array_agg(id_attribut) as att_list, count(DISTINCT id_attribut) as att_nb | ||
FROM taxonomie.cor_taxon_attribut a | ||
WHERE NOT valeur_attribut ='{}' AND NOT valeur_attribut ='' | ||
GROUP BY a.cd_ref | ||
), | ||
media AS ( | ||
SELECT DISTINCT cd_ref, count(id_media) as media_nb | ||
FROM taxonomie.t_medias | ||
GROUP BY cd_ref | ||
SELECT DISTINCT cd_ref, count(id_media) as media_nb | ||
FROM taxonomie.t_medias | ||
GROUP BY cd_ref | ||
) | ||
SELECT i.cd_ref as i_cd_ref, | ||
f.cd_ref as f_cd_ref, | ||
att_list, att_nb, media_nb | ||
FROM used_cd_ref i | ||
LEFT OUTER JOIN taxonomie.import_taxref f ON i.cd_ref = f.cd_nom | ||
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref | ||
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref; | ||
|
||
|
||
ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50); | ||
ALTER TABLE tmp_taxref_changes.comp_grap ADD action varchar(500); | ||
|
||
-- 'no changes' = Cas ou il n'y a aucun changement | ||
-- cd_ref initial correspond au cd_ref final | ||
UPDATE tmp_taxref_changes.comp_grap SET cas = 'no changes' | ||
WHERE i_cd_ref = f_cd_ref; | ||
|
||
-- 'update cd_ref' = Cas ou le cd_ref est modifié | ||
-- cd_ref initial différent du cd_ref final | ||
UPDATE tmp_taxref_changes.comp_grap SET cas = 'update cd_ref' | ||
WHERE NOT i_cd_ref = f_cd_ref; | ||
|
||
-- 'merge' = Cas de fusion de cd_ref | ||
-- quand 2 cd_ref initiaux ont le même cd_ref final | ||
UPDATE tmp_taxref_changes.comp_grap SET cas = 'merge' | ||
WHERE f_cd_ref IN ( | ||
SELECT f_cd_ref | ||
FROM tmp_taxref_changes.comp_grap | ||
GROUP BY f_cd_ref | ||
HAVING count(*)>1 | ||
); | ||
|
||
-- Détection des conflits | ||
-- Cas de merge de cd_ref avec des attributs | ||
-- Conflit si 2 cd_ref ont le même attribut avec des valeurs différentes | ||
WITH c AS ( | ||
SELECT f_cd_ref , array_agg(i_cd_ref) AS li_cd_ref | ||
FROM tmp_taxref_changes.comp_grap | ||
WHERE cas = 'merge' AND att_nb > 0 | ||
GROUP BY f_cd_ref | ||
HAVING count(*) >1 | ||
), atts AS ( | ||
SELECT DISTINCT * | ||
FROM taxonomie.cor_taxon_attribut a | ||
JOIN c ON a.cd_ref = ANY(c.li_cd_ref) | ||
) , conflict_atts AS ( | ||
SELECT | ||
f_cd_ref, | ||
atts.id_attribut, | ||
count(DISTINCT valeur_attribut), | ||
string_agg(DISTINCT CONCAT(nom_attribut::varchar, ': ' , valeur_attribut), ', ') AS atts | ||
FROM atts | ||
JOIN taxonomie.bib_attributs a | ||
ON a.id_attribut = atts.id_attribut | ||
GROUP BY f_cd_ref, atts.id_attribut | ||
HAVING count(DISTINCT valeur_attribut) >1 | ||
) | ||
UPDATE tmp_taxref_changes.comp_grap c SET action = 'Conflicts with attributes : ' || atts | ||
FROM conflict_atts a | ||
WHERE a.f_cd_ref = c.f_cd_ref AND cas = 'merge'; | ||
|
||
UPDATE tmp_taxref_changes.comp_grap SET action = 'no changes' | ||
WHERE cas = 'no changes'; | ||
|
||
|
||
UPDATE tmp_taxref_changes.comp_grap SET action = 'update cd_ref' | ||
WHERE cas = 'update cd_ref' ; | ||
|
||
|
||
-- Analyse des splits | ||
DROP TABLE IF EXISTS tmp_taxref_changes.split_analyze ; | ||
|
||
CREATE TABLE tmp_taxref_changes.split_analyze AS | ||
WITH | ||
grappe_init AS ( | ||
SELECT b.cd_ref , array_agg(cnl.cd_nom ORDER BY cnl.cd_nom) as array_agg, count(DISTINCT cnl.cd_nom) | ||
FROM taxonomie.taxref b | ||
JOIN taxonomie.cor_nom_liste cnl | ||
ON cnl.cd_nom = b.cd_nom | ||
GROUP BY b.cd_ref | ||
), | ||
grappe_final AS ( | ||
SELECT new_ref.cd_ref , array_agg(cnl.cd_nom ORDER BY cnl.cd_nom) as array_agg, count(DISTINCT cnl.cd_nom) | ||
FROM taxonomie.import_taxref new_ref | ||
JOIN taxonomie.cor_nom_liste cnl | ||
ON cnl.cd_nom = new_ref.cd_nom | ||
GROUP BY new_ref.cd_ref | ||
), | ||
init_cdnom as ( | ||
select distinct t1.cd_ref, t2.cd_nom, t1.array_agg, t1.count | ||
from grappe_init t1, taxonomie.tmp_bib_noms_copy t2 | ||
where t1.cd_ref = t2.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null | ||
from taxonomie.cor_nom_liste t2 | ||
JOIN taxonomie.taxref t ON t.cd_nom = t2.cd_nom | ||
JOIN grappe_init t1 ON t1.cd_ref = t.cd_ref | ||
order by 1,2), | ||
final_cdnom as ( | ||
select distinct t3.cd_ref, t2.cd_nom, t1.array_agg, t1.count | ||
from grappe_final t1, taxonomie.tmp_bib_noms_copy t2, taxonomie.import_taxref t3 | ||
where t1.cd_ref = t3.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null | ||
from grappe_final t1, taxonomie.cor_nom_liste t2, taxonomie.import_taxref t3 | ||
where t1.cd_ref = t3.cd_ref | ||
and t2.cd_nom = t3.cd_nom | ||
order by 1,2) | ||
SELECT distinct i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count, | ||
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count, | ||
att_list, att_nb, media_nb | ||
order by 1,2) | ||
SELECT DISTINCT i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count, | ||
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count | ||
FROM init_cdnom i | ||
LEFT OUTER JOIN final_cdnom f ON i.cd_nom = f.cd_nom | ||
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref | ||
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref; | ||
WHERE NOT i.array_agg = f.array_agg; | ||
|
||
|
||
|
||
ALTER TABLE tmp_taxref_changes.split_analyze ADD cas varchar(50); | ||
|
||
ALTER TABLE tmp_taxref_changes.comp_grap ADD grappe_change varchar(250); | ||
ALTER TABLE tmp_taxref_changes.comp_grap ADD action varchar(250); | ||
ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50); | ||
UPDATE tmp_taxref_changes.split_analyze SET cas = 'split' | ||
WHERE i_array_agg @> f_array_agg AND NOT i_array_agg = f_array_agg ; |
Oops, something went wrong.