From 77f8dd2352079a0e17248f7d25ddc8065f10ca12 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 7 May 2024 17:33:46 +0100 Subject: [PATCH 01/14] add the value to the struct - need a test next --- src/fixers/db_mismatch.rs | 7 +++++++ src/parsers/strategies.rs | 11 +++++++++++ src/parsers/strategy_structs.rs | 4 ++++ src/test_builders.rs | 1 + 4 files changed, 23 insertions(+) diff --git a/src/fixers/db_mismatch.rs b/src/fixers/db_mismatch.rs index 71f743f..0acd8a4 100644 --- a/src/fixers/db_mismatch.rs +++ b/src/fixers/db_mismatch.rs @@ -32,6 +32,7 @@ fn add_missing(current: Vec, missing: &[SimpleColumn]) -> Vec { let mut new_table = StrategyInFile { + truncate: false, table_name: table.clone(), description: "".to_string(), columns: vec![], @@ -95,6 +96,7 @@ mod tests { let current = vec![StrategyInFile { table_name: "public.person".to_string(), description: "".to_string(), + truncate: false, columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")], }]; @@ -119,6 +121,7 @@ mod tests { StrategyInFile { table_name: "public.person".to_string(), description: "".to_string(), + truncate: false, columns: vec![ ColumnInFile::new("id"), ColumnInFile::new("first_name"), @@ -128,6 +131,7 @@ mod tests { StrategyInFile { table_name: "public.location".to_string(), description: "".to_string(), + truncate: false, columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")], }, ]; @@ -141,11 +145,13 @@ mod tests { StrategyInFile { table_name: "public.location".to_string(), description: "".to_string(), + truncate: false, columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")], }, StrategyInFile { table_name: "public.person".to_string(), description: "".to_string(), + truncate: false, columns: vec![ ColumnInFile::new("id"), ColumnInFile::new("first_name"), @@ -174,6 +180,7 @@ mod tests { let expected = vec![StrategyInFile { table_name: "public.person".to_string(), description: "".to_string(), + truncate: false, columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")], }]; diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index 6e3cfbe..02f3044 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -280,6 +280,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![column_in_file( DataCategory::Pii, column_name, @@ -314,16 +315,19 @@ mod tests { StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![], }, StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![], }, StrategyInFile { table_name: table2_name.to_string(), description: "description".to_string(), + truncate: false, columns: vec![duplicated_column.clone(), duplicated_column], }, ]; @@ -343,6 +347,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: "public.person".to_string(), description: "description".to_string(), + truncate: false, columns: vec![column_in_file( DataCategory::Unknown, "first_name", @@ -364,6 +369,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: "public.person".to_string(), description: "description".to_string(), + truncate: false, columns: vec![column_in_file( DataCategory::General, "first_name", @@ -385,6 +391,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: "public.person".to_string(), description: "description".to_string(), + truncate: false, columns: vec![ column_in_file(DataCategory::Pii, "first_name", TransformerType::Identity), column_in_file( @@ -413,6 +420,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![ column_in_file( DataCategory::PotentialPii, @@ -455,6 +463,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![ column_in_file( DataCategory::PotentialPii, @@ -498,6 +507,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![column_in_file( DataCategory::General, SCRAMBLED_COLUMN_NAME, @@ -524,6 +534,7 @@ mod tests { let strategies = vec![StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), + truncate: false, columns: vec![ column_in_file( DataCategory::PotentialPii, diff --git a/src/parsers/strategy_structs.rs b/src/parsers/strategy_structs.rs index e94e198..76098cc 100644 --- a/src/parsers/strategy_structs.rs +++ b/src/parsers/strategy_structs.rs @@ -47,6 +47,10 @@ impl PartialEq for ColumnInFile { pub struct StrategyInFile { pub table_name: String, pub description: String, + + #[serde(default)] + pub truncate: bool, + pub columns: Vec, } diff --git a/src/test_builders.rs b/src/test_builders.rs index bb59aa2..d73dc33 100644 --- a/src/test_builders.rs +++ b/src/test_builders.rs @@ -81,6 +81,7 @@ pub mod builders { pub fn build(self) -> StrategyInFile { StrategyInFile { table_name: self.table_name, + truncate: false, description: self .description .unwrap_or_else(|| "Any description".to_string()), From 536722189223457bfaeb92dc808b4d54cb85319e Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 8 May 2024 09:01:44 +0100 Subject: [PATCH 02/14] wip --- src/anonymiser.rs | 36 ++++++++++++++++++++++++ src/parsers/copy_row.rs | 58 ++++++++++++++++++++------------------- src/parsers/strategies.rs | 46 ++++++++++++++++++++++++++----- test_files/dump_file.sql | 48 ++++++++++++++++++++++++++++++++ test_files/strategy.json | 6 ++++ 5 files changed, 159 insertions(+), 35 deletions(-) diff --git a/src/anonymiser.rs b/src/anonymiser.rs index 8726ac6..b7dd32b 100644 --- a/src/anonymiser.rs +++ b/src/anonymiser.rs @@ -103,4 +103,40 @@ mod tests { String::from_utf8(result.stderr).unwrap() ); } + + #[test] + fn successfully_truncates() { + assert!(anonymise( + "test_files/dump_file.sql".to_string(), + "test_files/results.sql".to_string(), + "test_files/strategy.json".to_string(), + None, + TransformerOverrides::none(), + ) + .is_ok()); + + let db_url = "postgresql://postgres:postgres@localhost"; + let postgres = format!("{}/postgres", db_url); + let mut conn = Client::connect(&postgres, NoTls).expect("expected connection to succeed"); + + conn.simple_query("drop database if exists anonymiser_test") + .unwrap(); + conn.simple_query("create database anonymiser_test") + .unwrap(); + + let result = Command::new("psql") + .arg(format!("{}/anonymiser_test", db_url)) + .arg("-f") + .arg("test_files/results.sql") + .arg("-v") + .arg("ON_ERROR_STOP=1") + .output() + .expect("failed!"); + + assert!( + result.status.success(), + "failed to restore backup:\n{:?}", + String::from_utf8(result.stderr).unwrap() + ); + } } diff --git a/src/parsers/copy_row.rs b/src/parsers/copy_row.rs index 2cd3cb7..e28a61b 100644 --- a/src/parsers/copy_row.rs +++ b/src/parsers/copy_row.rs @@ -1,5 +1,6 @@ use crate::parsers::sanitiser; use crate::parsers::strategies::Strategies; +use crate::parsers::strategies::TableStrategy; use crate::parsers::strategy_structs::ColumnInfo; use lazy_static::lazy_static; use regex::Regex; @@ -7,7 +8,7 @@ use regex::Regex; #[derive(Clone, Debug, PartialEq, Eq)] pub struct CurrentTableTransforms { pub table_name: String, - pub columns: Vec, + pub table_strategy: TableStrategy, // pub columns: Vec, } pub fn parse(copy_row: &str, strategies: &Strategies) -> CurrentTableTransforms { @@ -40,31 +41,38 @@ fn get_current_table_information( .split(", ") .map(sanitiser::dequote_column_or_table_name_data) .collect(); - let columns = columns_from_strategy(strategies, &table_name, &column_list); + let table_strategy = table_strategy(strategies, &table_name, &column_list); CurrentTableTransforms { table_name, - columns, + table_strategy, } } -fn columns_from_strategy( +fn table_strategy( strategies: &Strategies, table_name: &str, column_list: &[String], -) -> Vec { - match strategies.for_table(table_name) { - Some(columns) => column_list - .iter() - .map(|c| match columns.get(c) { - Some(column_info) => column_info.clone(), - None => panic!( - "No transform found for column: {:?} in table: {:?}", - c, table_name - ), - }) - .collect(), - _ => panic!("No transforms found for table: {:?}", table_name), +) -> TableStrategy { + let strategies_for_table = strategies.for_table(table_name); + + match strategies_for_table { + Some(columns_strategy @ TableStrategy::Columns(columns)) => { + for (i, c) in column_list.iter().enumerate() { + match columns.get(c) { + Some(column_info) => (), + None => panic!( + "No transform found for column: {:?} in table: {:?}", + c, table_name + ), + } + } + + return columns_strategy.clone(); + } + + Some(TableStrategy::Truncate) => TableStrategy::Truncate, + None => panic!("No transforms found for table: {:?}", table_name), } } @@ -105,24 +113,18 @@ mod tests { let expected = CurrentTableTransforms { table_name: "public.users".to_string(), - columns: vec![ - ColumnInfo::builder().build(), - ColumnInfo::builder() - .with_transformer(TransformerType::FakeFirstName, None) - .build(), - ColumnInfo::builder() - .with_transformer(TransformerType::FakeLastName, None) - .build(), - ], + table_strategy: TableStrategy::Columns(column_infos), }; assert_eq!(expected.table_name, parsed_copy_row.table_name); - assert_eq!(expected.columns, parsed_copy_row.columns); + assert_eq!(expected.table_strategy, parsed_copy_row.table_strategy); } #[test] fn removes_quotes_around_table_and_column_names() { let expected_column = ColumnInfo::builder().with_name("from").build(); + let expected_table_strategy = + TableStrategy::Columns(HashMap::from([("from".to_string(), expected_column)])); let strategies = Strategies::new_from( "public.references".to_string(), @@ -135,7 +137,7 @@ mod tests { ); assert_eq!("public.references", parsed_copy_row.table_name); - assert_eq!(vec![expected_column], parsed_copy_row.columns); + assert_eq!(expected_table_strategy, parsed_copy_row.table_strategy); } #[test] diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index 02f3044..a4879be 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -5,7 +5,23 @@ use std::collections::HashSet; #[derive(Debug, PartialEq, Eq)] pub struct Strategies { - tables: HashMap>, + tables: HashMap, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum TableStrategy { + Columns(HashMap), + Truncate, +} + +impl TableStrategy { + fn to_columns(self) -> HashMap { + if let TableStrategy::Columns(c) = self { + c + } else { + panic!("Not columns!") + } + } } impl Strategies { @@ -73,7 +89,7 @@ impl Strategies { } } - pub fn for_table(&self, table_name: &str) -> Option<&HashMap> { + pub fn for_table(&self, table_name: &str) -> Option<&TableStrategy> { self.tables.get(table_name) } @@ -81,19 +97,32 @@ impl Strategies { &mut self, table_name: String, columns: HashMap, - ) -> Option> { - self.tables.insert(table_name, columns) + ) -> Option { + self.tables + .insert(table_name, TableStrategy::Columns(columns)) } + // TODO here, we need to work out how to do validation for tuncation pub fn validate_against_db( &self, columns_from_db: HashSet, ) -> Result<(), DbErrors> { - let columns_from_strategy_file: HashSet = self + let (columns, truncate): ( + HashMap, + HashMap, + ) = self .tables + .into_iter() + .partition(|(table, table_strategy)| match table_strategy { + TableStrategy::Columns(columns) => true, + TableStrategy::Truncate => false, + }); + + let columns_from_strategy_file: HashSet = columns .iter() .flat_map(|(table, columns)| { return columns + .to_columns() .iter() .map(|(column, _)| create_simple_column(column, table)); }) @@ -129,14 +158,17 @@ impl Strategies { ) -> Option { self.tables .get(table_name) - .and_then(|table| table.get(column_name)) + .and_then(|table| match table { + TableStrategy::Columns(columns) => columns.get(column_name), + TableStrategy::Truncate => None, + }) .map(|column| column.transformer.clone()) } #[allow(dead_code)] //This is used in tests for convenience pub fn new_from(table_name: String, columns: HashMap) -> Strategies { Strategies { - tables: HashMap::from([(table_name, columns)]), + tables: HashMap::from([(table_name, TableStrategy::Columns(columns))]), } } } diff --git a/test_files/dump_file.sql b/test_files/dump_file.sql index 8c9ca9d..ffd9b7d 100644 --- a/test_files/dump_file.sql +++ b/test_files/dump_file.sql @@ -44,6 +44,29 @@ ALTER TABLE public.orders ALTER COLUMN id ADD GENERATED ALWAYS AS IDENTITY ( CACHE 1 ); +-- +-- Name: extra_data; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.extra_data ( + id bigint NOT NULL, + data character varying(255) NOT NULL, +); + +-- +-- Name: extra_data_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +ALTER TABLE public.extra_data ALTER COLUMN id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME public.extra_data_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + -- -- Name: products; Type: TABLE; Schema: public; Owner: - @@ -120,6 +143,18 @@ COPY public.orders (id, user_id, product_id) FROM stdin; 8 5 2 \. +-- +-- Data for Name: extra_data; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.extra_data (id, user_id, product_id) FROM stdin; +1 this is jank +2 more jank +3 another line of jank +4 yuk, not more jank! +5 you guess it. +\. + -- -- Data for Name: products; Type: TABLE DATA; Schema: public; Owner: - @@ -154,6 +189,12 @@ COPY public.users (id, email, password, last_login, inserted_at, updated_at, fir SELECT pg_catalog.setval('public.orders_id_seq', 8, true); +-- +-- Name: extra_data_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - +-- + +SELECT pg_catalog.setval('public.extra_data_id_seq', 5, true); + -- -- Name: products_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - @@ -176,6 +217,13 @@ SELECT pg_catalog.setval('public.users_id_seq', 7, true); ALTER TABLE ONLY public.orders ADD CONSTRAINT orders_pkey PRIMARY KEY (id); +-- +-- Name: extra_data extra_data_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.extra_data + ADD CONSTRAINT extra_data_pkey PRIMARY KEY (id); + -- -- Name: products products_pkey; Type: CONSTRAINT; Schema: public; Owner: - diff --git a/test_files/strategy.json b/test_files/strategy.json index a468001..3524a4f 100644 --- a/test_files/strategy.json +++ b/test_files/strategy.json @@ -1,4 +1,10 @@ [ + { + "table_name": "public.extra_data", + "description": "", + "truncate": true, + "columns": [] + }, { "table_name": "public.orders", "description": "", From 62f6804f309468a68980d86ea998738da88098e1 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 9 May 2024 09:12:47 +0100 Subject: [PATCH 03/14] wip --- src/parsers/copy_row.rs | 1 - src/parsers/row_parser.rs | 14 ++++++++------ src/parsers/state.rs | 3 ++- src/parsers/strategies.rs | 6 +++--- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/parsers/copy_row.rs b/src/parsers/copy_row.rs index e28a61b..3b4c902 100644 --- a/src/parsers/copy_row.rs +++ b/src/parsers/copy_row.rs @@ -1,7 +1,6 @@ use crate::parsers::sanitiser; use crate::parsers::strategies::Strategies; use crate::parsers::strategies::TableStrategy; -use crate::parsers::strategy_structs::ColumnInfo; use lazy_static::lazy_static; use regex::Regex; diff --git a/src/parsers/row_parser.rs b/src/parsers/row_parser.rs index d5a53af..ac34f7c 100644 --- a/src/parsers/row_parser.rs +++ b/src/parsers/row_parser.rs @@ -147,6 +147,7 @@ fn add_create_table_row_to_types(line: &str, mut current_types: Vec) -> mod tests { use super::*; use crate::parsers::rng; + use crate::parsers::strategies::TableStrategy; use crate::parsers::strategy_structs::{ColumnInfo, DataCategory, TransformerType}; use crate::parsers::types::{SubType, Type}; use std::collections::HashMap; @@ -309,8 +310,9 @@ mod tests { match state.position { Position::InCopy { current_table } => { - let expected_columns = vec![id_column, first_name_column, last_name_column]; - assert_eq!(expected_columns, current_table.columns) + let expected_columns = + TableStrategy::Columns(vec![id_column, first_name_column, last_name_column]); + assert_eq!(expected_columns, current_table.table_strategy) } _other => unreachable!("Position is not InCopy!"), }; @@ -375,11 +377,11 @@ mod tests { position: Position::InCopy { current_table: CurrentTableTransforms { table_name: "public.users".to_string(), - columns: vec![ + table_strategy: TableStrategy::Columns(vec![ ColumnInfo::builder().with_name("column_1").build(), ColumnInfo::builder().with_name("column_2").build(), ColumnInfo::builder().with_name("column_3").build(), - ], + ]), }, }, types: Types::builder() @@ -402,7 +404,7 @@ mod tests { position: Position::InCopy { current_table: CurrentTableTransforms { table_name: "public.users".to_string(), - columns: vec![ + table_strategy: TableStrategy::Columns(vec![ ColumnInfo::builder() .with_name("column_1") .with_transformer( @@ -424,7 +426,7 @@ mod tests { Some(HashMap::from([("value".to_string(), "third".to_string())])), ) .build(), - ], + ]), }, }, types: Types::builder() diff --git a/src/parsers/state.rs b/src/parsers/state.rs index 31afd43..d8a66c6 100644 --- a/src/parsers/state.rs +++ b/src/parsers/state.rs @@ -78,6 +78,7 @@ impl State { #[cfg(test)] mod tests { use super::*; + use crate::parsers::strategies::TableStrategy; use crate::parsers::types::Column; use std::collections::HashMap; @@ -94,7 +95,7 @@ mod tests { let new_position = Position::InCopy { current_table: CurrentTableTransforms { table_name: "table-mc-tableface".to_string(), - columns: Vec::new(), + table_strategy: TableStrategy::Columns(HashMap::from([])), }, }; diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index a4879be..943f608 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -10,12 +10,12 @@ pub struct Strategies { #[derive(Debug, Clone, Eq, PartialEq)] pub enum TableStrategy { - Columns(HashMap), + Columns(Vec), Truncate, } impl TableStrategy { - fn to_columns(self) -> HashMap { + fn to_columns(self) -> Vec { if let TableStrategy::Columns(c) = self { c } else { @@ -96,7 +96,7 @@ impl Strategies { pub fn insert( &mut self, table_name: String, - columns: HashMap, + columns: Vec, ) -> Option { self.tables .insert(table_name, TableStrategy::Columns(columns)) From 3a6acfdb3d25292249bf6bb9da252507f7d103bf Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 May 2024 09:07:12 +0100 Subject: [PATCH 04/14] passing tests --- src/anonymiser.rs | 25 +++++-- src/file_reader.rs | 6 ++ src/parsers/copy_row.rs | 94 ++++++++++++++---------- src/parsers/row_parser.rs | 54 +++++++++----- src/parsers/state.rs | 4 +- src/parsers/strategies.rs | 134 ++++++++++++++++------------------- src/parsers/strategy_file.rs | 1 + test_files/dump_file.sql | 5 +- 8 files changed, 187 insertions(+), 136 deletions(-) diff --git a/src/anonymiser.rs b/src/anonymiser.rs index b7dd32b..e27532d 100644 --- a/src/anonymiser.rs +++ b/src/anonymiser.rs @@ -83,13 +83,13 @@ mod tests { let postgres = format!("{}/postgres", db_url); let mut conn = Client::connect(&postgres, NoTls).expect("expected connection to succeed"); - conn.simple_query("drop database if exists anonymiser_test") + conn.simple_query("drop database if exists successfully_transforms_test_db") .unwrap(); - conn.simple_query("create database anonymiser_test") + conn.simple_query("create database successfully_transforms_test_db") .unwrap(); let result = Command::new("psql") - .arg(format!("{}/anonymiser_test", db_url)) + .arg(format!("{}/successfully_transforms_test_db", db_url)) .arg("-f") .arg("test_files/results.sql") .arg("-v") @@ -119,13 +119,15 @@ mod tests { let postgres = format!("{}/postgres", db_url); let mut conn = Client::connect(&postgres, NoTls).expect("expected connection to succeed"); - conn.simple_query("drop database if exists anonymiser_test") + conn.simple_query("drop database if exists successfully_truncates_db_name") .unwrap(); - conn.simple_query("create database anonymiser_test") + conn.simple_query("create database successfully_truncates_db_name") .unwrap(); + conn.close().expect("expected connection to close"); + let result = Command::new("psql") - .arg(format!("{}/anonymiser_test", db_url)) + .arg(format!("{}/successfully_truncates_db_name", db_url)) .arg("-f") .arg("test_files/results.sql") .arg("-v") @@ -138,5 +140,16 @@ mod tests { "failed to restore backup:\n{:?}", String::from_utf8(result.stderr).unwrap() ); + + let test_db = format!("{}/successfully_truncates_db_name", db_url); + let mut test_db_conn = + Client::connect(&test_db, NoTls).expect("expected connection to succeed"); + + let extra_data_row_count: i64 = test_db_conn + .query_one("select count(*) from extra_data", &[]) + .unwrap() + .get(0); + + assert_eq!(extra_data_row_count, 0); } } diff --git a/src/file_reader.rs b/src/file_reader.rs index 2a5282f..6709a13 100644 --- a/src/file_reader.rs +++ b/src/file_reader.rs @@ -98,6 +98,12 @@ mod tests { strategy_tuple("phone_number"), ]), ); + + strategies.insert( + "public.extra_data".to_string(), + HashMap::from([strategy_tuple("id"), strategy_tuple("data")]), + ); + strategies } diff --git a/src/parsers/copy_row.rs b/src/parsers/copy_row.rs index 3b4c902..8adf5a1 100644 --- a/src/parsers/copy_row.rs +++ b/src/parsers/copy_row.rs @@ -1,13 +1,20 @@ use crate::parsers::sanitiser; use crate::parsers::strategies::Strategies; use crate::parsers::strategies::TableStrategy; +use crate::parsers::strategy_structs::ColumnInfo; use lazy_static::lazy_static; use regex::Regex; #[derive(Clone, Debug, PartialEq, Eq)] pub struct CurrentTableTransforms { pub table_name: String, - pub table_strategy: TableStrategy, // pub columns: Vec, + pub table_transformers: TableTransformers, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TableTransformers { + ColumnTransformer(Vec), + Truncator, } pub fn parse(copy_row: &str, strategies: &Strategies) -> CurrentTableTransforms { @@ -36,41 +43,45 @@ fn get_current_table_information( strategies: &Strategies, ) -> CurrentTableTransforms { let table_name = sanitiser::dequote_column_or_table_name_data(table); - let column_list: Vec = unsplit_columns + let column_name_list: Vec = unsplit_columns .split(", ") .map(sanitiser::dequote_column_or_table_name_data) .collect(); - let table_strategy = table_strategy(strategies, &table_name, &column_list); + println!("column_name_list: {:?}", column_name_list); + println!("strategies: {:?}", strategies); + let table_transformers = table_strategy(strategies, &table_name, &column_name_list); CurrentTableTransforms { table_name, - table_strategy, + table_transformers, } } fn table_strategy( strategies: &Strategies, table_name: &str, - column_list: &[String], -) -> TableStrategy { + column_name_list: &[String], +) -> TableTransformers { let strategies_for_table = strategies.for_table(table_name); + println!("Strategies for table: {:?}", strategies_for_table); + println!("Column name list: {:?}", column_name_list); match strategies_for_table { - Some(columns_strategy @ TableStrategy::Columns(columns)) => { - for (i, c) in column_list.iter().enumerate() { - match columns.get(c) { - Some(column_info) => (), + Some(TableStrategy::Columns(columns_with_names)) => { + let column_infos = column_name_list + .iter() + .map(|column_name| match columns_with_names.get(column_name) { + Some(column_info) => column_info.clone(), None => panic!( "No transform found for column: {:?} in table: {:?}", - c, table_name + column_name, table_name ), - } - } - - return columns_strategy.clone(); + }) + .collect(); + TableTransformers::ColumnTransformer(column_infos) } - Some(TableStrategy::Truncate) => TableStrategy::Truncate, + Some(TableStrategy::Truncate) => TableTransformers::Truncator, None => panic!("No transforms found for table: {:?}", table_name), } } @@ -89,22 +100,23 @@ mod tests { #[test] fn returns_transforms_for_table() { - let column_infos = HashMap::from([ - ("id".to_string(), ColumnInfo::builder().build()), - ( - "first_name".to_string(), - ColumnInfo::builder() - .with_transformer(TransformerType::FakeFirstName, None) - .build(), - ), - ( - "last_name".to_string(), - ColumnInfo::builder() - .with_transformer(TransformerType::FakeLastName, None) - .build(), - ), - ]); - let strategies = Strategies::new_from("public.users".to_string(), column_infos); + let columns = vec![ + ColumnInfo::builder().with_name("id").build(), + ColumnInfo::builder() + .with_transformer(TransformerType::FakeFirstName, None) + .with_name("first_name") + .build(), + ColumnInfo::builder() + .with_transformer(TransformerType::FakeLastName, None) + .with_name("last_name") + .build(), + ]; + let column_infos_with_name: HashMap = columns + .iter() + .map(|column| (column.name.clone(), column.clone())) + .collect(); + let strategies = + Strategies::new_from("public.users".to_string(), column_infos_with_name.clone()); let parsed_copy_row = parse( "COPY public.users (id, first_name, last_name) FROM stdin;\n", &strategies, @@ -112,19 +124,19 @@ mod tests { let expected = CurrentTableTransforms { table_name: "public.users".to_string(), - table_strategy: TableStrategy::Columns(column_infos), + table_transformers: TableTransformers::ColumnTransformer(columns), }; assert_eq!(expected.table_name, parsed_copy_row.table_name); - assert_eq!(expected.table_strategy, parsed_copy_row.table_strategy); + assert_eq!( + expected.table_transformers, + parsed_copy_row.table_transformers + ); } #[test] fn removes_quotes_around_table_and_column_names() { let expected_column = ColumnInfo::builder().with_name("from").build(); - let expected_table_strategy = - TableStrategy::Columns(HashMap::from([("from".to_string(), expected_column)])); - let strategies = Strategies::new_from( "public.references".to_string(), HashMap::from([("from".to_string(), expected_column.clone())]), @@ -135,8 +147,14 @@ mod tests { &strategies, ); + let expected_table_transformers = + TableTransformers::ColumnTransformer(vec![expected_column]); + assert_eq!("public.references", parsed_copy_row.table_name); - assert_eq!(expected_table_strategy, parsed_copy_row.table_strategy); + assert_eq!( + expected_table_transformers, + parsed_copy_row.table_transformers + ); } #[test] diff --git a/src/parsers/row_parser.rs b/src/parsers/row_parser.rs index ac34f7c..921b721 100644 --- a/src/parsers/row_parser.rs +++ b/src/parsers/row_parser.rs @@ -1,8 +1,9 @@ -use crate::parsers::copy_row::CurrentTableTransforms; +use crate::parsers::copy_row::{CurrentTableTransforms, TableTransformers}; use crate::parsers::create_row; use crate::parsers::sanitiser; use crate::parsers::state::*; use crate::parsers::strategies::Strategies; +use crate::parsers::strategy_structs::ColumnInfo; use crate::parsers::transformer; use crate::parsers::types; use crate::parsers::types::Column; @@ -101,22 +102,38 @@ fn transform_row( line: &str, current_table: &CurrentTableTransforms, types: &Types, +) -> String { + match current_table.table_transformers { + TableTransformers::ColumnTransformer(ref columns) => { + transform_row_with_columns(rng, line, ¤t_table.table_name, columns, types) + } + + TableTransformers::Truncator => "".to_string(), + } +} + +fn transform_row_with_columns( + rng: &mut SmallRng, + line: &str, + table_name: &str, + columns: &[ColumnInfo], + types: &Types, ) -> String { let column_values = data_row::split(line); let mut transformed = column_values.enumerate().map(|(i, value)| { - let current_column = ¤t_table.columns[i]; + let current_column = &columns[i]; let column_type = types //TODO this lookup, we do a double hashmap lookup for every column... already know the //table, so we shouldnt need to do both... can we cache the current tables columns //hashmap? - .lookup(¤t_table.table_name, ¤t_column.name) + .lookup(table_name, ¤t_column.name) .unwrap_or_else(|| { panic!( "No type found for {}.{}\nI did find these for the table: {:?}", - current_table.table_name, + table_name, current_column.name, - types.for_table(¤t_table.table_name) + types.for_table(table_name) ) }); @@ -125,7 +142,7 @@ fn transform_row( value, column_type, ¤t_column.transformer, - ¤t_table.table_name, + table_name, ) }); @@ -146,8 +163,8 @@ fn add_create_table_row_to_types(line: &str, mut current_types: Vec) -> #[cfg(test)] mod tests { use super::*; + use crate::parsers::copy_row::TableTransformers; use crate::parsers::rng; - use crate::parsers::strategies::TableStrategy; use crate::parsers::strategy_structs::{ColumnInfo, DataCategory, TransformerType}; use crate::parsers::types::{SubType, Type}; use std::collections::HashMap; @@ -310,9 +327,12 @@ mod tests { match state.position { Position::InCopy { current_table } => { - let expected_columns = - TableStrategy::Columns(vec![id_column, first_name_column, last_name_column]); - assert_eq!(expected_columns, current_table.table_strategy) + let expected_columns = TableTransformers::ColumnTransformer(vec![ + id_column, + first_name_column, + last_name_column, + ]); + assert_eq!(expected_columns, current_table.table_transformers) } _other => unreachable!("Position is not InCopy!"), }; @@ -377,7 +397,7 @@ mod tests { position: Position::InCopy { current_table: CurrentTableTransforms { table_name: "public.users".to_string(), - table_strategy: TableStrategy::Columns(vec![ + table_transformers: TableTransformers::ColumnTransformer(vec![ ColumnInfo::builder().with_name("column_1").build(), ColumnInfo::builder().with_name("column_2").build(), ColumnInfo::builder().with_name("column_3").build(), @@ -404,7 +424,7 @@ mod tests { position: Position::InCopy { current_table: CurrentTableTransforms { table_name: "public.users".to_string(), - table_strategy: TableStrategy::Columns(vec![ + table_transformers: TableTransformers::ColumnTransformer(vec![ ColumnInfo::builder() .with_name("column_1") .with_transformer( @@ -449,10 +469,12 @@ mod tests { position: Position::InCopy { current_table: CurrentTableTransforms { table_name: "public.users".to_string(), - columns: vec![ColumnInfo::builder() - .with_name("column_1") - .with_transformer(TransformerType::Scramble, None) - .build()], + table_transformers: TableTransformers::ColumnTransformer(vec![ + ColumnInfo::builder() + .with_name("column_1") + .with_transformer(TransformerType::Scramble, None) + .build(), + ]), }, }, types: Types::builder() diff --git a/src/parsers/state.rs b/src/parsers/state.rs index d8a66c6..c92a67e 100644 --- a/src/parsers/state.rs +++ b/src/parsers/state.rs @@ -78,7 +78,7 @@ impl State { #[cfg(test)] mod tests { use super::*; - use crate::parsers::strategies::TableStrategy; + use crate::parsers::copy_row::TableTransformers; use crate::parsers::types::Column; use std::collections::HashMap; @@ -95,7 +95,7 @@ mod tests { let new_position = Position::InCopy { current_table: CurrentTableTransforms { table_name: "table-mc-tableface".to_string(), - table_strategy: TableStrategy::Columns(HashMap::from([])), + table_transformers: TableTransformers::ColumnTransformer(vec![]), }, }; diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index 943f608..0cd82e2 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -1,8 +1,11 @@ use crate::parsers::strategy_errors::{DbErrors, ValidationErrors}; use crate::parsers::strategy_structs::*; +use itertools::{Either, Itertools}; use std::collections::HashMap; use std::collections::HashSet; +type ColumnNamesToInfo = HashMap; + #[derive(Debug, PartialEq, Eq)] pub struct Strategies { tables: HashMap, @@ -10,20 +13,10 @@ pub struct Strategies { #[derive(Debug, Clone, Eq, PartialEq)] pub enum TableStrategy { - Columns(Vec), + Columns(ColumnNamesToInfo), Truncate, } -impl TableStrategy { - fn to_columns(self) -> Vec { - if let TableStrategy::Columns(c) = self { - c - } else { - panic!("Not columns!") - } - } -} - impl Strategies { pub fn new() -> Strategies { Strategies { @@ -39,45 +32,49 @@ impl Strategies { let mut errors = ValidationErrors::new(); for strategy in strategies_in_file { - let mut columns = HashMap::::new(); - for column in strategy.columns { - if (column.data_category == DataCategory::PotentialPii - || column.data_category == DataCategory::Pii) - && column.transformer.name == TransformerType::Identity - { - errors - .unanonymised_pii - .push(create_simple_column(&column.name, &strategy.table_name)); - } - if column.data_category == DataCategory::Unknown { - errors - .unknown_data_categories - .push(create_simple_column(&column.name, &strategy.table_name)); - } - if column.transformer.name == TransformerType::Error { - errors - .error_transformer_types - .push(create_simple_column(&column.name, &strategy.table_name)); - } - let result = columns.insert( - column.name.clone(), - ColumnInfo { - data_category: column.data_category.clone(), - name: column.name.clone(), - transformer: transformer(column, transformer_overrides), - }, - ); - if let Some(dupe) = result { - errors.duplicate_columns.push(create_simple_column( - &dupe.name, - &strategy.table_name.clone(), - )) + if strategy.truncate { + transformed_strategies.insert_truncate(strategy.table_name); + } else { + let mut columns = HashMap::::new(); + for column in strategy.columns { + if (column.data_category == DataCategory::PotentialPii + || column.data_category == DataCategory::Pii) + && column.transformer.name == TransformerType::Identity + { + errors + .unanonymised_pii + .push(create_simple_column(&strategy.table_name, &column.name)); + } + if column.data_category == DataCategory::Unknown { + errors + .unknown_data_categories + .push(create_simple_column(&strategy.table_name, &column.name)); + } + if column.transformer.name == TransformerType::Error { + errors + .error_transformer_types + .push(create_simple_column(&strategy.table_name, &column.name)); + } + let result = columns.insert( + column.name.clone(), + ColumnInfo { + data_category: column.data_category.clone(), + name: column.name.clone(), + transformer: transformer(column, transformer_overrides), + }, + ); + if let Some(dupe) = result { + errors.duplicate_columns.push(create_simple_column( + &strategy.table_name.clone(), + &dupe.name, + )) + } } - } - let result = transformed_strategies.insert(strategy.table_name.clone(), columns); - if result.is_some() { - errors.duplicate_tables.push(strategy.table_name); + let result = transformed_strategies.insert(strategy.table_name.clone(), columns); + if result.is_some() { + errors.duplicate_tables.push(strategy.table_name); + } } } @@ -96,35 +93,37 @@ impl Strategies { pub fn insert( &mut self, table_name: String, - columns: Vec, + columns: HashMap, ) -> Option { self.tables .insert(table_name, TableStrategy::Columns(columns)) } + pub fn insert_truncate(&mut self, table_name: String) -> Option { + self.tables.insert(table_name, TableStrategy::Truncate) + } // TODO here, we need to work out how to do validation for tuncation pub fn validate_against_db( &self, columns_from_db: HashSet, ) -> Result<(), DbErrors> { - let (columns, truncate): ( - HashMap, - HashMap, - ) = self + // from self, split into 2 groups, one for tables, one for truncate + + let (columns_by_table, _truncate): (Vec<(String, ColumnNamesToInfo)>, Vec<_>) = self .tables + .clone() .into_iter() - .partition(|(table, table_strategy)| match table_strategy { - TableStrategy::Columns(columns) => true, - TableStrategy::Truncate => false, + .partition_map(|(table, table_strategy)| match table_strategy { + TableStrategy::Columns(columns) => Either::Left((table, columns)), + TableStrategy::Truncate => Either::Right(table), }); - let columns_from_strategy_file: HashSet = columns + let columns_from_strategy_file: HashSet = columns_by_table .iter() - .flat_map(|(table, columns)| { - return columns - .to_columns() - .iter() - .map(|(column, _)| create_simple_column(column, table)); + .flat_map(|(table_name, columns)| { + columns.iter().map(|(column_name, _column_info)| { + create_simple_column(table_name, column_name) + }) }) .collect(); @@ -173,7 +172,7 @@ impl Strategies { } } -fn create_simple_column(column_name: &str, table_name: &str) -> SimpleColumn { +fn create_simple_column(table_name: &str, column_name: &str) -> SimpleColumn { SimpleColumn { table_name: table_name.to_string(), column_name: column_name.to_string(), @@ -631,7 +630,7 @@ mod tests { I: Iterator, { let mut strategies = Strategies::new(); - strategies.insert(table_name.to_string(), HashMap::from_iter(columns)); + add_table(&mut strategies, table_name, columns); strategies } @@ -664,11 +663,4 @@ mod tests { .build(), ) } - - fn create_simple_column(table_name: &str, column_name: &str) -> SimpleColumn { - SimpleColumn { - table_name: table_name.to_string(), - column_name: column_name.to_string(), - } - } } diff --git a/src/parsers/strategy_file.rs b/src/parsers/strategy_file.rs index 6432abe..8eb027f 100644 --- a/src/parsers/strategy_file.rs +++ b/src/parsers/strategy_file.rs @@ -15,6 +15,7 @@ pub fn read(file_name: &str) -> Result, std::io::Error> { }) }); + println!("{:?}", result); match result { Ok(_) => result, Err(ref err) => match err.kind() { diff --git a/test_files/dump_file.sql b/test_files/dump_file.sql index ffd9b7d..647655a 100644 --- a/test_files/dump_file.sql +++ b/test_files/dump_file.sql @@ -50,7 +50,7 @@ ALTER TABLE public.orders ALTER COLUMN id ADD GENERATED ALWAYS AS IDENTITY ( CREATE TABLE public.extra_data ( id bigint NOT NULL, - data character varying(255) NOT NULL, + data character varying(255) NOT NULL ); -- @@ -147,7 +147,7 @@ COPY public.orders (id, user_id, product_id) FROM stdin; -- Data for Name: extra_data; Type: TABLE DATA; Schema: public; Owner: - -- -COPY public.extra_data (id, user_id, product_id) FROM stdin; +COPY public.extra_data (id, data) FROM stdin; 1 this is jank 2 more jank 3 another line of jank @@ -155,7 +155,6 @@ COPY public.extra_data (id, user_id, product_id) FROM stdin; 5 you guess it. \. - -- -- Data for Name: products; Type: TABLE DATA; Schema: public; Owner: - -- From d223bf099942c045ed50462ccc1d033fae164151 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 May 2024 09:12:17 +0100 Subject: [PATCH 05/14] fix warning --- src/parsers/copy_row.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parsers/copy_row.rs b/src/parsers/copy_row.rs index 8adf5a1..0d3337d 100644 --- a/src/parsers/copy_row.rs +++ b/src/parsers/copy_row.rs @@ -115,8 +115,7 @@ mod tests { .iter() .map(|column| (column.name.clone(), column.clone())) .collect(); - let strategies = - Strategies::new_from("public.users".to_string(), column_infos_with_name.clone()); + let strategies = Strategies::new_from("public.users".to_string(), column_infos_with_name); let parsed_copy_row = parse( "COPY public.users (id, first_name, last_name) FROM stdin;\n", &strategies, From 216ed1757111f5d7c34d6ee884e1e43f248e2232 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 May 2024 09:14:20 +0100 Subject: [PATCH 06/14] removing printlns --- src/parsers/copy_row.rs | 4 ---- src/parsers/strategy_file.rs | 1 - src/parsers/transformer.rs | 1 - 3 files changed, 6 deletions(-) diff --git a/src/parsers/copy_row.rs b/src/parsers/copy_row.rs index 0d3337d..dcff72a 100644 --- a/src/parsers/copy_row.rs +++ b/src/parsers/copy_row.rs @@ -47,8 +47,6 @@ fn get_current_table_information( .split(", ") .map(sanitiser::dequote_column_or_table_name_data) .collect(); - println!("column_name_list: {:?}", column_name_list); - println!("strategies: {:?}", strategies); let table_transformers = table_strategy(strategies, &table_name, &column_name_list); CurrentTableTransforms { @@ -64,8 +62,6 @@ fn table_strategy( ) -> TableTransformers { let strategies_for_table = strategies.for_table(table_name); - println!("Strategies for table: {:?}", strategies_for_table); - println!("Column name list: {:?}", column_name_list); match strategies_for_table { Some(TableStrategy::Columns(columns_with_names)) => { let column_infos = column_name_list diff --git a/src/parsers/strategy_file.rs b/src/parsers/strategy_file.rs index 8eb027f..6432abe 100644 --- a/src/parsers/strategy_file.rs +++ b/src/parsers/strategy_file.rs @@ -15,7 +15,6 @@ pub fn read(file_name: &str) -> Result, std::io::Error> { }) }); - println!("{:?}", result); match result { Ok(_) => result, Err(ref err) => match err.kind() { diff --git a/src/parsers/transformer.rs b/src/parsers/transformer.rs index b5283b7..4016f94 100644 --- a/src/parsers/transformer.rs +++ b/src/parsers/transformer.rs @@ -1037,7 +1037,6 @@ mod tests { }, TABLE_NAME, ); - println!("{new_value}"); assert!(new_value != initial_value); assert!(!new_value.contains("Second line")); assert!(!new_value.contains("Third line")); From 9db637af5c8faf89b76feb6970b919dbe76c1b67 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 May 2024 09:15:10 +0100 Subject: [PATCH 07/14] typo --- test_files/dump_file.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_files/dump_file.sql b/test_files/dump_file.sql index 647655a..209d37d 100644 --- a/test_files/dump_file.sql +++ b/test_files/dump_file.sql @@ -152,7 +152,7 @@ COPY public.extra_data (id, data) FROM stdin; 2 more jank 3 another line of jank 4 yuk, not more jank! -5 you guess it. +5 you guessed it. \. -- From 03376511af0fdb610fdf257c3fb36019a6890023 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 16 May 2024 08:53:44 +0100 Subject: [PATCH 08/14] add retry commands to make things a bit easier --- Cargo.lock | 11 +++++++++++ Cargo.toml | 1 + src/main.rs | 49 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e943992..0401b03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,6 +48,7 @@ dependencies = [ "base16", "base32", "chrono", + "colored", "fake", "flate2", "itertools", @@ -232,6 +233,16 @@ dependencies = [ "cc", ] +[[package]] +name = "colored" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +dependencies = [ + "lazy_static", + "windows-sys 0.48.0", +] + [[package]] name = "core-foundation" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index 26cbb61..d4e2f4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ uuid = { version = "1.8", features = [ "v4"] } mimalloc = "0.1.41" log = "0.4.21" zstd = "0.13.1" +colored = "2.1.0" [dev-dependencies] pretty_assertions = "1.4.0" diff --git a/src/main.rs b/src/main.rs index d1b051d..03e6c88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ use crate::opts::{Anonymiser, Opts}; use crate::parsers::strategies::Strategies; use crate::parsers::strategy_errors::StrategyFileError; use crate::parsers::strategy_structs::{StrategyInFile, TransformerOverrides}; +use colored::Colorize; use native_tls::TlsConnector; use postgres_native_tls::MakeTlsConnector; @@ -57,30 +58,35 @@ fn main() -> Result<(), std::io::Error> { Anonymiser::CheckStrategies { strategy_file, db_url, - } => { - let strategies = strategy_file::read(&strategy_file).unwrap_or_else(|_| Vec::new()); - - match strategy_differences(strategies, db_url) { + } => match read_strategy_file(&strategy_file, &db_url) { + Ok(strategies) => match strategy_differences(strategies, db_url.clone()) { Ok(()) => println!("All up to date"), Err(err) => { println!("{}", err); if fixer::can_fix(&err) { - println!("But the great news is we can fix at least some of your mess... try running with \"fix-strategies\""); + let retry_command = format!( + "anonymiser fix-strategies --db-url={} --strategy-file={}", + db_url, strategy_file + ) + .green(); + println!("But the great news is we can fix at least some of your mess... try running:\n{}", retry_command); } else { println!("Bad news... we currently cannot fix this for you, you'll have to sort it out yourself!"); } std::process::exit(1); } + }, + Err(err) => { + println!("{}", err); + std::process::exit(1); } - } + }, Anonymiser::FixStrategies { strategy_file, db_url, - } => { - let strategies = strategy_file::read(&strategy_file).unwrap_or_else(|_| Vec::new()); - - match strategy_differences(strategies, db_url) { + } => match read_strategy_file(&strategy_file, &db_url) { + Ok(strategies) => match strategy_differences(strategies, db_url) { Ok(()) => match fixer::just_sort(&strategy_file) { SortResult::Sorted => { println!("Ok, we've updated that for you, check your diff!") @@ -95,8 +101,12 @@ fn main() -> Result<(), std::io::Error> { fixer::fix(&strategy_file, err); println!("All done, you probably want to run \"check-strategies\" again to make sure"); } + }, + Err(err) => { + println!("{}", err); + std::process::exit(1); } - } + }, Anonymiser::GenerateStrategies { strategy_file, @@ -121,6 +131,23 @@ fn main() -> Result<(), std::io::Error> { Ok(()) } +fn read_strategy_file(strategy_file: &str, db_url: &str) -> Result, String> { + match strategy_file::read(strategy_file) { + Ok(strategies) => Ok(strategies), + Err(_) => { + let retry_command = format!( + "anonymiser generate-strategies --db-url={} --strategy-file={}", + db_url, strategy_file + ) + .green(); + Err(format!( + "Strategy file {} not found. You can use \n{}\nto create an initial file", + strategy_file, retry_command + )) + } + } +} + fn strategy_differences( strategies: Vec, db_url: String, From 65ea71c26fd0ef0c80582bd069b41f1069f97dcf Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Jun 2024 10:08:03 +0100 Subject: [PATCH 09/14] add truncate validation + fix flaky tests --- src/anonymiser.rs | 5 +++-- src/file_reader.rs | 5 +++-- src/parsers/strategies.rs | 22 +++++++++++++++++++--- src/parsers/strategy_errors.rs | 2 +- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/anonymiser.rs b/src/anonymiser.rs index e27532d..fa38807 100644 --- a/src/anonymiser.rs +++ b/src/anonymiser.rs @@ -70,9 +70,10 @@ mod tests { #[test] fn successfully_transforms() { + let result_file_name = "test_files/results_successfully_transforms.sql"; assert!(anonymise( "test_files/dump_file.sql".to_string(), - "test_files/results.sql".to_string(), + result_file_name.to_string(), "test_files/strategy.json".to_string(), None, TransformerOverrides::none(), @@ -91,7 +92,7 @@ mod tests { let result = Command::new("psql") .arg(format!("{}/successfully_transforms_test_db", db_url)) .arg("-f") - .arg("test_files/results.sql") + .arg(result_file_name) .arg("-v") .arg("ON_ERROR_STOP=1") .output() diff --git a/src/file_reader.rs b/src/file_reader.rs index 6709a13..3c44c91 100644 --- a/src/file_reader.rs +++ b/src/file_reader.rs @@ -129,7 +129,8 @@ mod tests { fn can_read_and_output_compressed_with_default() { let input_file = "test_files/dump_file.sql".to_string(); let compressed_file = "test_files/compressed_file_reader_test_results.sql".to_string(); - let uncompressed_file_name = "test_files/uncompressed_file_reader_test_results.sql"; + let uncompressed_file_name = + "test_files/uncompressed_file_reader_can_read_and_output_compressed_with_default.sql"; let _ = fs::remove_file(&compressed_file); let _ = fs::remove_file(uncompressed_file_name); @@ -162,7 +163,7 @@ mod tests { fn can_read_and_output_compressed_with_specific_compression_type() { let input_file = "test_files/dump_file.sql".to_string(); let compressed_file = "test_files/compressed_file_reader_test_results.sql".to_string(); - let uncompressed_file_name = "test_files/uncompressed_file_reader_test_results.sql"; + let uncompressed_file_name = "test_files/uncompressed_file_reader_can_read_and_output_compressed_with_sepcific_compression_type.sql"; let _ = fs::remove_file(&compressed_file); let _ = fs::remove_file(uncompressed_file_name); diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index 0cd82e2..f5b1d6c 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -109,7 +109,7 @@ impl Strategies { ) -> Result<(), DbErrors> { // from self, split into 2 groups, one for tables, one for truncate - let (columns_by_table, _truncate): (Vec<(String, ColumnNamesToInfo)>, Vec<_>) = self + let (columns_by_table, truncate): (Vec<(String, ColumnNamesToInfo)>, Vec<_>) = self .tables .clone() .into_iter() @@ -127,13 +127,19 @@ impl Strategies { }) .collect(); + let columns_from_db_without_truncate: HashSet = columns_from_db + .iter() + .filter(|column| !truncate.contains(&column.table_name)) + .cloned() + .collect(); + let mut errors = DbErrors { - missing_from_strategy_file: columns_from_db + missing_from_strategy_file: columns_from_db_without_truncate .difference(&columns_from_strategy_file) .cloned() .collect(), missing_from_db: columns_from_strategy_file - .difference(&columns_from_db) + .difference(&columns_from_db_without_truncate) .cloned() .collect(), }; @@ -298,6 +304,16 @@ mod tests { vec!(create_simple_column("public.person", "first_name")) ); } + #[test] + fn validates_truncate() { + let mut strategies = Strategies::new(); + strategies.insert_truncate("public.location".to_string()); + + let columns_from_db = HashSet::from([create_simple_column("public.location", "postcode")]); + let result = strategies.validate_against_db(columns_from_db); + + assert_eq!(Ok(()), result); + } const TABLE_NAME: &str = "gert_lush_table"; const PII_COLUMN_NAME: &str = "pii_column"; diff --git a/src/parsers/strategy_errors.rs b/src/parsers/strategy_errors.rs index d8e5941..ee6532f 100644 --- a/src/parsers/strategy_errors.rs +++ b/src/parsers/strategy_errors.rs @@ -29,7 +29,7 @@ impl From for StrategyFileError { } } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct DbErrors { pub missing_from_strategy_file: Vec, pub missing_from_db: Vec, From 5892843dd5b616248a79ac5bc8b8f455138b9015 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Jun 2024 10:20:11 +0100 Subject: [PATCH 10/14] adding test for whole missing table --- src/parsers/strategies.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index f5b1d6c..4174e7d 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -315,6 +315,19 @@ mod tests { assert_eq!(Ok(()), result); } + #[test] + fn validates_missing_entire_table() { + let strategies = Strategies::new(); + + let columns_from_db = HashSet::from([create_simple_column("public.location", "postcode")]); + let error = strategies.validate_against_db(columns_from_db).unwrap_err(); + + assert_eq!( + error.missing_from_strategy_file, + vec!(create_simple_column("public.location", "postcode")) + ); + } + const TABLE_NAME: &str = "gert_lush_table"; const PII_COLUMN_NAME: &str = "pii_column"; const COMMERCIALLY_SENSITIVE_COLUMN_NAME: &str = "commercially_sensitive_column"; From f92a8d35f74081c48baf08512dce88c206e9d65a Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Jun 2024 10:38:30 +0100 Subject: [PATCH 11/14] bump rust --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 082758f..8114837 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ parameters: jobs: test: docker: - - image: cimg/rust:1.67 + - image: cimg/rust:1.78 environment: CARGO_NET_GIT_FETCH_WITH_CLI: true - image: postgres:13.4 @@ -41,7 +41,7 @@ jobs: build_release_linux_universal: docker: - - image: clux/muslrust:1.67.1 + - image: clux/muslrust:1.78.0 environment: CARGO_NET_GIT_FETCH_WITH_CLI: true steps: @@ -134,7 +134,7 @@ jobs: - run: brew install cmake - run: TAG="${CIRCLE_TAG:-v0.0.0}"; ./update_version $TAG - run: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - - run: rustup install 1.67 + - run: rustup install 1.78 - run: rustup target add aarch64-apple-darwin - run: cargo build --release --target=x86_64-apple-darwin - run: cargo build --release --target=aarch64-apple-darwin From e906ed01f48fdb9b6a9ff8a8ea722d813eb51913 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Jun 2024 11:14:25 +0100 Subject: [PATCH 12/14] sorting cargo2junit --- build_and_test | 4 ++-- src/fixers/db_mismatch.rs | 26 +++++++++++++++----------- src/parsers/transformer.rs | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/build_and_test b/build_and_test index 679a55c..8bcde13 100755 --- a/build_and_test +++ b/build_and_test @@ -1,8 +1,8 @@ #!/bin/bash set -e -cargo install cargo2junit --version 0.1.12 +cargo install cargo2junit --version 0.1.13 cargo fmt -- --check cargo clippy --all-targets --all-features -- -D warnings -cargo test -- -Z unstable-options --format json --report-time | cargo2junit > results.xml +RUSTC_BOOTSTRAP=1 cargo test -- -Z unstable-options --format json --report-time | cargo2junit > results.xml exit ${PIPESTATUS[0]} diff --git a/src/fixers/db_mismatch.rs b/src/fixers/db_mismatch.rs index 0acd8a4..23d739f 100644 --- a/src/fixers/db_mismatch.rs +++ b/src/fixers/db_mismatch.rs @@ -13,12 +13,15 @@ pub fn fix( } fn add_missing(current: Vec, missing: &[SimpleColumn]) -> Vec { - let missing_columns_by_table = missing.iter().fold(HashMap::new(), |mut acc, column| { - acc.entry(column.table_name.clone()) - .or_insert_with(Vec::new) - .push(column.column_name.clone()); - acc - }); + let missing_columns_by_table = missing.iter().fold( + HashMap::new(), + |mut acc: HashMap>, column| { + acc.entry(column.table_name.clone()) + .or_default() + .push(column.column_name.clone()); + acc + }, + ); let mut new_strategies = current; @@ -52,14 +55,15 @@ fn remove_redundant( existing: Vec, redundant_columns_to_remove: &[SimpleColumn], ) -> Vec { - let table_names = redundant_columns_to_remove - .iter() - .fold(HashMap::new(), |mut acc, column| { + let table_names = redundant_columns_to_remove.iter().fold( + HashMap::new(), + |mut acc: HashMap>, column| { acc.entry(column.table_name.clone()) - .or_insert_with(Vec::new) + .or_default() .push(column.column_name.clone()); acc - }); + }, + ); existing .into_iter() diff --git a/src/parsers/transformer.rs b/src/parsers/transformer.rs index 4016f94..8203816 100644 --- a/src/parsers/transformer.rs +++ b/src/parsers/transformer.rs @@ -95,7 +95,7 @@ fn transform_array<'value>( transformer: &Transformer, table_name: &str, ) -> Cow<'value, str> { - let quoted_types = vec![SubType::Character, SubType::Json]; + let quoted_types = [SubType::Character, SubType::Json]; let requires_quotes = quoted_types.contains(underlying_type); let sub_type = SingleValue { From 743877f6f617a8e702f07ff68d5b88d2b3178bf7 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Jun 2024 11:21:51 +0100 Subject: [PATCH 13/14] use correct tag --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8114837..2247d00 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,7 +41,7 @@ jobs: build_release_linux_universal: docker: - - image: clux/muslrust:1.78.0 + - image: clux/muslrust:1.78.0-stable environment: CARGO_NET_GIT_FETCH_WITH_CLI: true steps: From 54c4c05a444e020a9180321421e5678549f57a7c Mon Sep 17 00:00:00 2001 From: Daniel Turner Date: Tue, 29 Oct 2024 15:52:28 +0000 Subject: [PATCH 14/14] Update config.yml --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index bef409c..3f54559 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -136,6 +136,7 @@ jobs: - run: TAG="${CIRCLE_TAG:-v0.0.0}"; ./update_version $TAG - run: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - run: rustup install 1.78 + - run: rustup target add x86_64-apple-darwin - run: rustup target add aarch64-apple-darwin - run: cargo build --release --target=x86_64-apple-darwin - run: cargo build --release --target=aarch64-apple-darwin