Skip to content

Commit

Permalink
[ELEBUILD-140] Anonymizer script moet niet afbreken wanneer deze een …
Browse files Browse the repository at this point in the history
…tabel of veld niet kan vinden

- try en catch concept vervangen met valideren of de tabellen en kolommen bestaan voordat data wordt geupdate
  • Loading branch information
Cindy Yap committed Jul 18, 2023
1 parent 02efc87 commit 3836fef
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 86 deletions.
118 changes: 56 additions & 62 deletions src/database/anonymizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,80 +156,74 @@ export const updateToFakerValue = async (table: string, rowConfig: RowConfig) =>
console.log(` [i] Seeding merge table`);
schema.unshift('id');

let columnId = '';
const columnId = await getPrimaryColumnForTable(table);

try {
columnId = await getPrimaryColumnForTable(table);
const {rows: countRows} = await client.execute(`SELECT COUNT(??) as count
FROM ??`, [columnId, table]);

const {rows: countRows} = await client.execute(`SELECT COUNT(??) as count
FROM ??`, [columnId, table]);

const count = countRows?.[0]?.count;
if (count === undefined) {
throw new Error('Expected value');
const count = countRows?.[0]?.count;
if (count === undefined) {
throw new Error('Expected value');
}
const progressBar = progress(10_000);

const queue = new Set<string[]>();
for (let j = 0; j < 10; j++) {
for (let i = 0; i < 1000; i++) {
progressBar.render((j * 1000) + i);
const data = [] as string[];

rowConfig.fakerValue.forEach(({value}) => {
data.push(getFakeData(value));
});
queue.add(data);
}
const progressBar = progress(10_000);

const queue = new Set<string[]>();
for (let j = 0; j < 10; j++) {
for (let i = 0; i < 1000; i++) {
progressBar.render((j * 1000) + i);
const data = [] as string[];

rowConfig.fakerValue.forEach(({value}) => {
data.push(getFakeData(value));
});
queue.add(data);
}

const schemaWithoutId = schema.slice(1);
const columnPlaceholders = Array(schemaWithoutId.length).fill('??').join(', ');
const valuePlaceholders = Array(schemaWithoutId.length).fill('?').join(', ');
const value = Array(queue.size).fill(`(${valuePlaceholders})`).join(', ');
await client.execute(`INSERT INTO ANONYMIZER_JOIN_TABLE (${columnPlaceholders})
VALUES ${value}`, [...schemaWithoutId, ...[...queue].flat(1)]);
queue.clear();
}
progressBar.render(10_000);
const schemaWithoutId = schema.slice(1);
const columnPlaceholders = Array(schemaWithoutId.length).fill('??').join(', ');
const valuePlaceholders = Array(schemaWithoutId.length).fill('?').join(', ');
const value = Array(queue.size).fill(`(${valuePlaceholders})`).join(', ');
await client.execute(`INSERT INTO ANONYMIZER_JOIN_TABLE (${columnPlaceholders})
VALUES ${value}`, [...schemaWithoutId, ...[...queue].flat(1)]);
queue.clear();
}
progressBar.render(10_000);

console.log(` [i] Replacing faker data`);
console.log(` [i] Replacing faker data`);

const updatePlaceholders = schema.slice(1).map(() => `?? = ??`).join(', ');
const updateData = schema.slice(1).flatMap(colName => [`target.${colName}`, `data.${colName}`]);
const updatePlaceholders = schema.slice(1).map(() => `?? = ??`).join(', ');
const updateData = schema.slice(1).flatMap(colName => [`target.${colName}`, `data.${colName}`]);

const updateQuery = `
UPDATE \`${table}\` as target
INNER JOIN \`ANONYMIZER_JOIN_TABLE\` as data
ON IF (target.${columnId} % 10000 = 0, 10000, target.${columnId} % 10000) = data.id
SET ${updatePlaceholders}
`;
const updateQuery = `
UPDATE \`${table}\` as target
INNER JOIN \`ANONYMIZER_JOIN_TABLE\` as data
ON IF (target.${columnId} % 10000 = 0, 10000, target.${columnId} % 10000) = data.id
SET ${updatePlaceholders}
`;

let updateProgress: Progress;
let interval: number;
let updateProgress: Progress;
let interval: number;

if (count > 10_000) {
console.log(` [i] Executing update for ${count} rows`);
let current = timeout;
if (count > 10_000) {
console.log(` [i] Executing update for ${count} rows`);
let current = timeout;

updateProgress = countdown(current);
interval = setInterval(() => {
current -= 1;
updateProgress.render(current);
}, 1000);
}
updateProgress = countdown(current);
interval = setInterval(() => {
current -= 1;
updateProgress.render(current);
}, 1000);
}

const result = await client.execute(updateQuery, updateData);
const result = await client.execute(updateQuery, updateData);

if (count > 10_000) {
clearInterval(interval!);
updateProgress!.end();
}
if (count > 10_000) {
clearInterval(interval!);
updateProgress!.end();
}

if (result.affectedRows != count) {
console.warn(' [!] Some rows were not updated!');
console.warn(` affectedRows: ${result.affectedRows}, rowCount: ${count}`);
}
} catch (e) {
throw new Error(`[!] Error with anonymizing for table ${table}: ${e.message}`)
if (result.affectedRows != count) {
console.warn(' [!] Some rows were not updated!');
console.warn(` affectedRows: ${result.affectedRows}, rowCount: ${count}`);
}
};
102 changes: 78 additions & 24 deletions src/database/transactions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import {Column, Table} from '../interfaces/anonymizer.rules.ts';
import {config} from './config.ts';
import {client} from './connection.ts';
import {parseRowConfig, truncate, updateToFakerValue, updateToStaticValue} from './anonymizer.ts';
import {parseRowConfig, RowConfig, truncate, updateToFakerValue, updateToStaticValue} from './anonymizer.ts';

/**
* Execute the custom_queries in the 'after' object
Expand Down Expand Up @@ -45,27 +45,24 @@ const executeCustomQuery = async (query: string): Promise<void> => {
});
};

async function updateTable(table: string, tables: Record<string, Table>, errors: Error[]) {
console.log(`>> Starting for table ${table}`);
console.time(` Table ${table} done in`);
const columns: Record<string, Column> = tables[table];
const columnNames: string[] = Object.keys(columns);
const rowConfig = parseRowConfig(columnNames, columns);
async function getDatabaseTables() {
return (await client.execute('SHOW TABLES;')).rows.map((row) => {
return row[`Tables_in_${client.config.db}`]
});
}

async function updateTable(rowConfig: RowConfig, table: string) {
if (rowConfig.truncate) {
await truncate(table);
} else {
if (rowConfig.empty.length > 0 || rowConfig.staticValue.length > 0) {
await updateToStaticValue(table, rowConfig);
}
return;
}

if (rowConfig.fakerValue.length > 0) {
try {
await updateToFakerValue(table, rowConfig);
} catch (e) {
errors.push(e);
}
}
if (rowConfig.empty.length > 0 || rowConfig.staticValue.length > 0) {
await updateToStaticValue(table, rowConfig);
}

if (rowConfig.fakerValue.length > 0) {
await updateToFakerValue(table, rowConfig);
}

console.timeEnd(` Table ${table} done in`);
Expand All @@ -74,27 +71,84 @@ async function updateTable(table: string, tables: Record<string, Table>, errors:
console.log('');
}

async function getDatabaseTableColumns(table: string) {
return await client.execute(`SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = ?
AND TABLE_NAME = ?;`, [
client.config.db,
table
]);
}

/**
* Only process columns that exists in the database and is specified in the config file.
* Also skip the 'id' column because it's the primary key and thereby not allowed to be anonymized.
*/
async function getColumnsToBeProcessed(table: string, configColumns: Record<string, Column>) {
return (await getDatabaseTableColumns(table)).rows.map(row => row['COLUMN_NAME'])
.filter(column => column !== 'id')
.filter(column => Object.keys(configColumns).includes(column));
}

/**
* Check if the tables specified in the config file exists in the database, if not, log an error.
* @param configTables tables as defined in the config file (i.e. the json file in anonymizer folder)
* @param databaseTables tables found in the database
* @param errors
*/
function validateConfigTables(configTables: Record<string, Table>, databaseTables: string[], errors: Error[]) {
Object.keys(configTables)
.filter(table => !databaseTables.includes(table))
.forEach(table => errors.push(new Error(`Given table '${table}' does not exist in the database`)));
}

/**
* Check if the columns specified in the config file exists in the database, if not, log an error.
* @param configColumns the columns as defined in the config file (i.e. the json file in anonymizer folder)
* @param toBeProcessedColumns columns found in the database and specified in the config file
* @param errors
* @param table the name of the table
*/
function validateConfigColumns(configColumns: Record<string, Column>, toBeProcessedColumns: string[], errors: Error[], table: string) {
Object.keys(configColumns)
.filter(column => !toBeProcessedColumns.includes(column))
.forEach(column => errors.push(new Error(`Given column '${column}' does not exist in database table '${table}'`)));
}

/**
* Run the queries specified in the JSON config file.
*/
const runQueriesFromConfig = async () => {
const errors: Error[] = [];
console.time('Anonymizer done in: ');

const tables: Record<string, Table> = config.tables;
const tableNames: string[] = Object.keys(tables);
const errors: Error[] = [];
const configTables: Record<string, Table> = config.tables;
const databaseTables: string[] = await getDatabaseTables();
const toBeProcessedTables: string[] = Object.keys(configTables).filter(table => databaseTables.includes(table));

validateConfigTables(configTables, databaseTables, errors);

for (const table of toBeProcessedTables) {
console.log(`>> Starting for table ${table}`);
console.time(` Table ${table} done in`);

for (const table of tableNames) {
await updateTable(table, tables, errors);
const configColumns: Record<string, Column> = configTables[table];
const toBeProcessedColumns: string[] = await getColumnsToBeProcessed(table, configColumns);

validateConfigColumns(configColumns, toBeProcessedColumns, errors, table);

await updateTable(parseRowConfig(toBeProcessedColumns, configColumns), table);
}

await client.execute('DROP TABLE IF EXISTS `ANONYMIZER_JOIN_TABLE`;');
console.timeEnd('Anonymizer done in: ');

if (errors.length > 0) {
console.log('------------------------------------------Error Report------------------------------------------');

for (const error of errors) {
console.log(error.message);
console.log(' [error] ' + error.message);
}
}
};
Expand Down

0 comments on commit 3836fef

Please sign in to comment.