Skip to content

Commit

Permalink
refactor(migrate_taxref): refactor all SQL files
Browse files Browse the repository at this point in the history
  • Loading branch information
jpm-cbna committed Aug 7, 2024
1 parent 14614a7 commit 14532af
Show file tree
Hide file tree
Showing 7 changed files with 783 additions and 528 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
DROP TABLE IF EXISTS taxonomie.tmp_bib_noms_copy;
-- ----------------------------------------------------------------------
-- Create a copy of bib_noms table
DROP TABLE IF EXISTS taxonomie.tmp_bib_noms_copy ;

CREATE TABLE taxonomie.tmp_bib_noms_copy (
id_nom serial PRIMARY KEY,
Expand All @@ -12,40 +14,41 @@ CREATE TABLE taxonomie.tmp_bib_noms_copy (
tmp_import boolean
);

INSERT INTO taxonomie.tmp_bib_noms_copy (
id_nom, cd_nom, cd_ref, nom_francais, comments
)
SELECT id_nom, cd_nom, cd_ref, nom_francais, comments
FROM taxonomie.bib_noms;

INSERT INTO taxonomie.tmp_bib_noms_copy (id_nom, cd_nom, cd_ref, nom_francais, comments)
SELECT id_nom, cd_nom, cd_ref, nom_francais, comments
FROM taxonomie.bib_noms ;

SELECT setval(
'taxonomie.tmp_bib_noms_copy_id_nom_seq',
(SELECT max(id_nom) FROM taxonomie.tmp_bib_noms_copy ),
true
);

--- ajout Nicolas Imbert
create index IF NOT EXISTS i_tmp_cdnom_disparu_cd_nom on taxonomie.cdnom_disparu (cd_nom);

-- ----------------------------------------------------------------------
-- CASE 1 - Deleted cd_nom with replacement

--- CAS 1 - cd_nom de remplacement à utiliser.
UPDATE taxonomie.tmp_bib_noms_copy n SET deleted = true ,
commentaire_disparition = raison_suppression || COALESCE(' nouveau cd_nom :' || a.cd_nom_remplacement, ''),
cd_nom_remplacement = a.cd_nom_remplacement
-- Update tmp_bib_noms_copy fields about deleted cd_nom
UPDATE taxonomie.tmp_bib_noms_copy AS nc SET
deleted = true,
commentaire_disparition = (
a.raison_suppression || COALESCE(' nouveau cd_nom :' || a.cd_nom_remplacement, '')
),
cd_nom_remplacement = a.cd_nom_remplacement
FROM (
SELECT d.*
FROM taxonomie.bib_noms n
JOIN taxonomie.cdnom_disparu d
ON n.cd_nom = d.cd_nom
) a
WHERE n.cd_nom = a.cd_nom;

------------- Cas avec cd_nom de remplacement
-- Ajout du cd_nom de remplacement quand il n'existait pas dans bib_noms
INSERT INTO taxonomie.tmp_bib_noms_copy(cd_nom, cd_ref, nom_francais, tmp_import)
SELECT d.cd_nom_remplacement, n.cd_ref, n.nom_francais, true
FROM taxonomie.tmp_bib_noms_copy n
JOIN taxonomie.cdnom_disparu d ON n.cd_nom = d.cd_nom
WHERE NOT n.cd_nom_remplacement IS NULL
ON CONFLICT DO NOTHING;
SELECT d.*
FROM taxonomie.bib_noms AS n
JOIN taxonomie.cdnom_disparu AS d
ON n.cd_nom = d.cd_nom
) AS a
WHERE nc.cd_nom = a.cd_nom ;

-- Add replacement cd_nom when not already exists in tmp_bib_noms_copy
-- TODO: check if this query is mandatory and not a duplicate of previous queries !
INSERT INTO taxonomie.tmp_bib_noms_copy (cd_nom, cd_ref, nom_francais, tmp_import)
SELECT d.cd_nom_remplacement, nc.cd_ref, nc.nom_francais, true
FROM taxonomie.tmp_bib_noms_copy AS nc
JOIN taxonomie.cdnom_disparu AS d
ON nc.cd_nom = d.cd_nom
WHERE nc.cd_nom_remplacement IS NOT NULL
ON CONFLICT DO NOTHING ;
Original file line number Diff line number Diff line change
@@ -1,48 +1,55 @@
-- ----------------------------------------------------------------------
-- Create temporary taxref schema
CREATE SCHEMA IF NOT EXISTS tmp_taxref_changes;


-- ----------------------------------------------------------------------
-- Add the cd_nom dependency search function
DROP FUNCTION IF EXISTS public.deps_test_fk_dependencies_cd_nom();
CREATE OR REPLACE FUNCTION public.deps_test_fk_dependencies_cd_nom()
RETURNS void AS
$BODY$
declare
v_curr record;
begin
DROP TABLE IF EXISTS tmp_taxref_changes.dps_fk_cd_nom;
CREATE TABLE tmp_taxref_changes.dps_fk_cd_nom (
cd_nom int,
table_name varchar(250)
);
FOR v_curr IN (
SELECT
'SELECT DISTINCT d.' || kcu.column_name || ' as cd_nom, ''' || tc.table_schema || '.' || tc.table_name || ''' as table
FROM ' || tc.table_schema || '.' || tc.table_name || ' d
LEFT OUTER JOIN taxonomie.import_taxref it ON it.cd_nom = d.' || kcu.column_name || '
WHERE it.cd_nom IS NULL AND NOT d.' || kcu.column_name || ' IS NULL' as select,
tc.table_schema,
tc.table_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_catalog = kcu.constraint_catalog
AND tc.constraint_schema = kcu.constraint_schema
AND tc.constraint_name = kcu.constraint_name
JOIN information_schema.referential_constraints rc
ON tc.constraint_catalog = rc.constraint_catalog
AND tc.constraint_schema = rc.constraint_schema
AND tc.constraint_name = rc.constraint_name
JOIN information_schema.constraint_column_usage ccu
ON rc.unique_constraint_catalog = ccu.constraint_catalog
AND rc.unique_constraint_schema = ccu.constraint_schema
AND rc.unique_constraint_name = ccu.constraint_name
WHERE lower(tc.constraint_type) in ('foreign key')
AND ccu.column_name = 'cd_nom' OR ccu.column_name = 'cd_ref'
)
LOOP
EXECUTE 'INSERT INTO tmp_taxref_changes.dps_fk_cd_nom ' || v_curr.select;
END LOOP;
END
RETURNS void AS
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
DECLARE
v_curr record;
BEGIN
DROP TABLE IF EXISTS tmp_taxref_changes.dps_fk_cd_nom;

CREATE TABLE tmp_taxref_changes.dps_fk_cd_nom (
cd_nom int,
table_name varchar(250)
);

FOR v_curr IN (
SELECT
'SELECT DISTINCT
d.' || kcu.column_name || ' AS cd_nom, ''' ||
tc.table_schema || '.' || tc.table_name || ''' AS table
FROM ' || tc.table_schema || '.' || tc.table_name || ' AS d
LEFT JOIN taxonomie.import_taxref AS it
ON it.cd_nom = d.' || kcu.column_name || '
WHERE it.cd_nom IS NULL
AND d.' || kcu.column_name || ' IS NOT NULL ' AS SELECT,
tc.table_schema,
tc.table_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_catalog = kcu.constraint_catalog
AND tc.constraint_schema = kcu.constraint_schema
AND tc.constraint_name = kcu.constraint_name
JOIN information_schema.referential_constraints rc
ON tc.constraint_catalog = rc.constraint_catalog
AND tc.constraint_schema = rc.constraint_schema
AND tc.constraint_name = rc.constraint_name
JOIN information_schema.constraint_column_usage ccu
ON rc.unique_constraint_catalog = ccu.constraint_catalog
AND rc.unique_constraint_schema = ccu.constraint_schema
AND rc.unique_constraint_name = ccu.constraint_name
WHERE lower(tc.constraint_type) IN ('foreign key')
AND ccu.column_name = 'cd_nom' OR ccu.column_name = 'cd_ref'
)
LOOP
EXECUTE 'INSERT INTO tmp_taxref_changes.dps_fk_cd_nom ' || v_curr.select;
END LOOP;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100 ;
Original file line number Diff line number Diff line change
@@ -1,59 +1,98 @@
---- #################################################################################
---- #################################################################################
---- Répercussion des changements taxonomiques
---- #################################################################################
---- #################################################################################

-- ----------------------------------------------------------------------
-- Create temporary taxref schema
CREATE SCHEMA IF NOT EXISTS tmp_taxref_changes;


-- ----------------------------------------------------------------------
-- Intialize comp_grap table to store impacts of taxonomic changes

DROP TABLE IF EXISTS tmp_taxref_changes.comp_grap ;

-- TODO: why not used import_taxref for array_agg in "grappe_final" !?
CREATE TABLE tmp_taxref_changes.comp_grap AS
WITH grappe_init AS (
SELECT distinct b.cd_ref , array_agg(cd_nom ORDER BY cd_nom) as array_agg, count(DISTINCT cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
WHERE NOT deleted = true and cd_nom is not null
GROUP BY cd_ref
),
grappe_final AS (
SELECT distinct t.cd_ref , array_agg(b.cd_nom ORDER BY b.cd_nom) as array_agg, count(DISTINCT b.cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
JOIN taxonomie.import_taxref t
ON b.cd_nom = t.cd_nom
WHERE NOT deleted = true and b.cd_nom is not null
GROUP BY t.cd_ref
),
attribs AS (
SELECT DISTINCT a.cd_ref, array_agg(id_attribut) as att_list, count(DISTINCT id_attribut) as att_nb
FROM taxonomie.cor_taxon_attribut a
WHERE NOT valeur_attribut ='{}' AND NOT valeur_attribut =''
GROUP BY a.cd_ref
),
media AS (
SELECT DISTINCT cd_ref, count(id_media) as media_nb
FROM taxonomie.t_medias
GROUP BY cd_ref
),
init_cdnom as (
select distinct t1.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_init t1, taxonomie.tmp_bib_noms_copy t2
where t1.cd_ref = t2.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
order by 1,2),
final_cdnom as (
select distinct t3.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_final t1, taxonomie.tmp_bib_noms_copy t2, taxonomie.import_taxref t3
where t1.cd_ref = t3.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
and t2.cd_nom = t3.cd_nom
order by 1,2)
SELECT distinct i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count,
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count,
att_list, att_nb, media_nb
FROM init_cdnom i
LEFT OUTER JOIN final_cdnom f ON i.cd_nom = f.cd_nom
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref;
WITH grappe_init AS (
SELECT DISTINCT
cd_ref,
array_agg(cd_nom ORDER BY cd_nom) AS array_agg,
count(DISTINCT cd_nom)
FROM taxonomie.tmp_bib_noms_copy
WHERE deleted != true
AND cd_nom IS NOT NULL
GROUP BY cd_ref
),
grappe_final AS (
SELECT DISTINCT
it.cd_ref,
array_agg(n.cd_nom ORDER BY n.cd_nom) AS array_agg,
count(DISTINCT n.cd_nom)
FROM taxonomie.tmp_bib_noms_copy AS n
JOIN taxonomie.import_taxref AS it
ON n.cd_nom = it.cd_nom
WHERE n.deleted != true
AND n.cd_nom IS NOT NULL
GROUP BY it.cd_ref
),
attribs AS (
SELECT DISTINCT
cd_ref,
array_agg(id_attribut) AS att_list,
count(DISTINCT id_attribut) AS att_nb
FROM taxonomie.cor_taxon_attribut
WHERE valeur_attribut !='{}'
AND valeur_attribut != ''
GROUP BY cd_ref
),
media AS (
SELECT DISTINCT
cd_ref,
count(id_media) AS media_nb
FROM taxonomie.t_medias
GROUP BY cd_ref
),
init_cdnom AS (
SELECT DISTINCT
gi.cd_ref,
n.cd_nom,
gi.array_agg,
gi.count
FROM grappe_init AS gi, taxonomie.tmp_bib_noms_copy AS n
WHERE gi.cd_ref = n.cd_ref
AND n.deleted != true
AND n.cd_nom IS NOT NULL
ORDER BY 1, 2
),
final_cdnom AS (
SELECT DISTINCT
it.cd_ref,
n.cd_nom,
gf.array_agg,
gf.count
FROM grappe_final AS gf, taxonomie.tmp_bib_noms_copy AS n, taxonomie.import_taxref AS it
WHERE gf.cd_ref = it.cd_ref
AND n.deleted != true
AND n.cd_nom IS NOT NULL
AND n.cd_nom = it.cd_nom
ORDER BY 1, 2
)
SELECT DISTINCT
i.cd_ref AS i_cd_ref,
i.array_agg AS i_array_agg,
i.count AS i_count,
f.cd_ref AS f_cd_ref,
f.array_agg AS f_array_agg,
f.count AS f_count,
att_list,
att_nb,
media_nb
FROM init_cdnom AS i
LEFT JOIN final_cdnom AS f
ON i.cd_nom = f.cd_nom
LEFT JOIN attribs AS a
ON i.cd_ref = a.cd_ref
LEFT JOIN media AS m
ON i.cd_ref = m.cd_ref ;


ALTER TABLE tmp_taxref_changes.comp_grap ADD grappe_change varchar(250);
ALTER TABLE tmp_taxref_changes.comp_grap ADD action varchar(250);
ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50);
ALTER TABLE tmp_taxref_changes.comp_grap ADD grappe_change varchar(250) ;
ALTER TABLE tmp_taxref_changes.comp_grap ADD "action" varchar(250) ;
ALTER TABLE tmp_taxref_changes.comp_grap ADD cas varchar(50) ;
Loading

0 comments on commit 14532af

Please sign in to comment.