diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bb24300 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM python:3.9 + +ARG SOURCE +ARG COMMIT_HASH +ARG COMMIT_ID +ARG BUILD_TIME +LABEL source=${SOURCE} +LABEL commit_hash=${COMMIT_HASH} +LABEL commit_id=${COMMIT_ID} +LABEL build_time=${BUILD_TIME} + +ARG container_user=mosip +ARG container_user_group=mosip +ARG container_user_uid=1001 +ARG container_user_gid=1001 + +# Create user group +RUN groupadd -r ${container_user_group} + +# Create user with specific ID +RUN useradd -u ${container_user_uid} -r -g ${container_user_group} -s /bin/bash -m -d /home/${container_user} ${container_user} + +WORKDIR /home/${container_user} +USER ${container_user} + +ENV MYDIR=`pwd` +ENV DATE="$(date --utc +%FT%T.%3NZ)" +ENV ENABLE_INSECURE=false +ENV MODULE= + +ENV s3-host= +ENV s3-region= +ENV s3-user-key= +ENV s3-user-secret= +ENV s3-bucket-name= + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY mosipvaluefinder.py . +CMD ["python", "mosipvaluefinder.py"] \ No newline at end of file diff --git a/README.md b/README.md index 1f09f06..6de2e61 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -# security-tools -Repository containing required security tool's code for MOSIP. +# DBvaluefinder (WIP) +## This is a script that looks for certain types of Data in the DB and keeps reporting in a simple text file diff --git a/mosipvaluefinder.py b/mosipvaluefinder.py new file mode 100644 index 0000000..2b3f88a --- /dev/null +++ b/mosipvaluefinder.py @@ -0,0 +1,115 @@ +import psycopg2 +from stdnum import verhoeff +from deduce import Deduce +import re + +def is_valid_verhoeff(number): + return verhoeff.is_valid(str(number)) + +def is_valid_email(email): + email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$') + match = email_pattern.match(str(email)) + return bool(match) + +def is_valid_mobile_number(phone_number): + pattern = re.compile(r'^[9]\d{9}$') + match = re.match(pattern, str(phone_number)) + return bool(match) + +def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables): + deduce_instance = Deduce() + + with connection.cursor() as cursor: + cursor.execute(f"SET search_path TO {schema_name}") + cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,)) + tables = [table[0] for table in cursor.fetchall()] + + with open(output_file, 'a') as deduced_file: + for table_name in tables: + + if ignore_tables and table_name in ignore_tables: + print(f"Ignoring Table: {table_name} in Database: {database_name}") + continue + + print(f"Currently checking Table: {table_name} in Database: {database_name}") + deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n") + + cursor.execute(f'SELECT * FROM {table_name}') + rows = cursor.fetchall() + + for row in rows: + for i, column_value in enumerate(row): + column_name = cursor.description[i][0] + + + if ignore_columns and column_name in ignore_columns: + continue + + deduced_result = deduce_instance.deidentify( + str(column_value), + disabled={'names', 'institutions', 'locations', 'dates', 'ages','urls'} + ) + + if deduced_result.annotations and is_valid_verhoeff(column_value): + deduced_file.write(f"Column: {column_name}, Data: {column_value}\n") + deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mobile_numbers.txt', 'a') as file: + + if deduced_result.annotations and is_valid_mobile_number(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mails.txt', 'a') as file: + + if deduced_result.annotations and is_valid_email(column_value): + file.write("Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + + +def deduce_sensitive_data_in_databases(): + databases = [ + {"name": "mosip_prereg", "schema": "prereg"}, + #{"name": "mosip_keymgr", "schema": "keymgr"}, + #{"name": "mosip_credential", "schema": "credential"}, + #{"name": "mosip_esignet", "schema": "esignet"}, + #{"name": "mosip_hotlist", "schema": "hotlist"}, + #{"name": "mosip_ida", "schema": "ida"}, + #{"name": "mosip_idmap", "schema": "idmap"}, + #{"name": "mosip_idrepo", "schema": "idrepo"}, + #{"name": "mosip_kernel", "schema": "kernel"}, + #{"name": "mosip_master", "schema": "master"}, + #{"name": "mosip_mockidentitysystem", "schema": "mockidentitysystem"}, + #{"name": "mosip_pms", "schema": "pms"}, + #{"name": "mosip_regprc", "schema": "regprc"}, + #{"name": "mosip_resident", "schema": "resident"}, + #{"name": "mosip_toolkit", "schema": "toolkit"} + + + ] + + connection = psycopg2.connect( + host='postgres.dev.mosip.net', + user='postgres', + password='mQi298ZW7p', + database=databases[0]['name'] + ) + + try: + output_file_path = 'id.txt' + ignore_columns = ['status', 'cr_by'] + ignore_tables = ['client_detail','reg_available_slot','batch_job_execution', + 'batch_job_execution_context','batch_job_execution_params','batch_job_instance', + 'batch_step_execution','batch_step_execution_context'] + + for db_info in databases: + print(f"\nAnalyzing data in Database: {db_info['name']}\n") + deduce_sensitive_data(connection, db_info['name'], db_info['schema'], output_file_path, ignore_columns, ignore_tables) + + print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") + + finally: + connection.close() + +deduce_sensitive_data_in_databases() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4e3891f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +psycopg2-binary==2.9.1 +python-stdnum==1.19 +deduce==2.4.4