From 18acffeb7f8e4a98319467790a8ad3062df52486 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 12:06:59 +0530 Subject: [PATCH 01/35] [MOSIP-29854] Creating dockerfile for dbvaluefinder script --- Dockerfile | 40 +++++++++++++++ README.md | 4 +- mosipvaluefinder.py | 115 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 ++ 4 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 mosipvaluefinder.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bb24300 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM python:3.9 + +ARG SOURCE +ARG COMMIT_HASH +ARG COMMIT_ID +ARG BUILD_TIME +LABEL source=${SOURCE} +LABEL commit_hash=${COMMIT_HASH} +LABEL commit_id=${COMMIT_ID} +LABEL build_time=${BUILD_TIME} + +ARG container_user=mosip +ARG container_user_group=mosip +ARG container_user_uid=1001 +ARG container_user_gid=1001 + +# Create user group +RUN groupadd -r ${container_user_group} + +# Create user with specific ID +RUN useradd -u ${container_user_uid} -r -g ${container_user_group} -s /bin/bash -m -d /home/${container_user} ${container_user} + +WORKDIR /home/${container_user} +USER ${container_user} + +ENV MYDIR=`pwd` +ENV DATE="$(date --utc +%FT%T.%3NZ)" +ENV ENABLE_INSECURE=false +ENV MODULE= + +ENV s3-host= +ENV s3-region= +ENV s3-user-key= +ENV s3-user-secret= +ENV s3-bucket-name= + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY mosipvaluefinder.py . +CMD ["python", "mosipvaluefinder.py"] \ No newline at end of file diff --git a/README.md b/README.md index 1f09f06..6de2e61 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -# security-tools -Repository containing required security tool's code for MOSIP. +# DBvaluefinder (WIP) +## This is a script that looks for certain types of Data in the DB and keeps reporting in a simple text file diff --git a/mosipvaluefinder.py b/mosipvaluefinder.py new file mode 100644 index 0000000..2b3f88a --- /dev/null +++ b/mosipvaluefinder.py @@ -0,0 +1,115 @@ +import psycopg2 +from stdnum import verhoeff +from deduce import Deduce +import re + +def is_valid_verhoeff(number): + return verhoeff.is_valid(str(number)) + +def is_valid_email(email): + email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$') + match = email_pattern.match(str(email)) + return bool(match) + +def is_valid_mobile_number(phone_number): + pattern = re.compile(r'^[9]\d{9}$') + match = re.match(pattern, str(phone_number)) + return bool(match) + +def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables): + deduce_instance = Deduce() + + with connection.cursor() as cursor: + cursor.execute(f"SET search_path TO {schema_name}") + cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,)) + tables = [table[0] for table in cursor.fetchall()] + + with open(output_file, 'a') as deduced_file: + for table_name in tables: + + if ignore_tables and table_name in ignore_tables: + print(f"Ignoring Table: {table_name} in Database: {database_name}") + continue + + print(f"Currently checking Table: {table_name} in Database: {database_name}") + deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n") + + cursor.execute(f'SELECT * FROM {table_name}') + rows = cursor.fetchall() + + for row in rows: + for i, column_value in enumerate(row): + column_name = cursor.description[i][0] + + + if ignore_columns and column_name in ignore_columns: + continue + + deduced_result = deduce_instance.deidentify( + str(column_value), + disabled={'names', 'institutions', 'locations', 'dates', 'ages','urls'} + ) + + if deduced_result.annotations and is_valid_verhoeff(column_value): + deduced_file.write(f"Column: {column_name}, Data: {column_value}\n") + deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mobile_numbers.txt', 'a') as file: + + if deduced_result.annotations and is_valid_mobile_number(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mails.txt', 'a') as file: + + if deduced_result.annotations and is_valid_email(column_value): + file.write("Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + + +def deduce_sensitive_data_in_databases(): + databases = [ + {"name": "mosip_prereg", "schema": "prereg"}, + #{"name": "mosip_keymgr", "schema": "keymgr"}, + #{"name": "mosip_credential", "schema": "credential"}, + #{"name": "mosip_esignet", "schema": "esignet"}, + #{"name": "mosip_hotlist", "schema": "hotlist"}, + #{"name": "mosip_ida", "schema": "ida"}, + #{"name": "mosip_idmap", "schema": "idmap"}, + #{"name": "mosip_idrepo", "schema": "idrepo"}, + #{"name": "mosip_kernel", "schema": "kernel"}, + #{"name": "mosip_master", "schema": "master"}, + #{"name": "mosip_mockidentitysystem", "schema": "mockidentitysystem"}, + #{"name": "mosip_pms", "schema": "pms"}, + #{"name": "mosip_regprc", "schema": "regprc"}, + #{"name": "mosip_resident", "schema": "resident"}, + #{"name": "mosip_toolkit", "schema": "toolkit"} + + + ] + + connection = psycopg2.connect( + host='postgres.dev.mosip.net', + user='postgres', + password='mQi298ZW7p', + database=databases[0]['name'] + ) + + try: + output_file_path = 'id.txt' + ignore_columns = ['status', 'cr_by'] + ignore_tables = ['client_detail','reg_available_slot','batch_job_execution', + 'batch_job_execution_context','batch_job_execution_params','batch_job_instance', + 'batch_step_execution','batch_step_execution_context'] + + for db_info in databases: + print(f"\nAnalyzing data in Database: {db_info['name']}\n") + deduce_sensitive_data(connection, db_info['name'], db_info['schema'], output_file_path, ignore_columns, ignore_tables) + + print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") + + finally: + connection.close() + +deduce_sensitive_data_in_databases() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4e3891f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +psycopg2-binary==2.9.1 +python-stdnum==1.19 +deduce==2.4.4 From f232b409c3775b97a3fc0fa232602dab71621fd6 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 12:29:47 +0530 Subject: [PATCH 02/35] [MOSIP-29854] Updating directory --- Dockerfile => DataBreachDetector/Dockerfile | 0 README.md => DataBreachDetector/README.md | 0 mosipvaluefinder.py => DataBreachDetector/mosipvaluefinder.py | 0 requirements.txt => DataBreachDetector/requirements.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename Dockerfile => DataBreachDetector/Dockerfile (100%) rename README.md => DataBreachDetector/README.md (100%) rename mosipvaluefinder.py => DataBreachDetector/mosipvaluefinder.py (100%) rename requirements.txt => DataBreachDetector/requirements.txt (100%) diff --git a/Dockerfile b/DataBreachDetector/Dockerfile similarity index 100% rename from Dockerfile rename to DataBreachDetector/Dockerfile diff --git a/README.md b/DataBreachDetector/README.md similarity index 100% rename from README.md rename to DataBreachDetector/README.md diff --git a/mosipvaluefinder.py b/DataBreachDetector/mosipvaluefinder.py similarity index 100% rename from mosipvaluefinder.py rename to DataBreachDetector/mosipvaluefinder.py diff --git a/requirements.txt b/DataBreachDetector/requirements.txt similarity index 100% rename from requirements.txt rename to DataBreachDetector/requirements.txt From 8dabb96c883b0ef7fac5d23bc81e339207583427 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 12:33:29 +0530 Subject: [PATCH 03/35] [MOSIP-29854] Updating filenames --- .../{mosipvaluefinder.py => DataBreachDetector.py} | 0 DataBreachDetector/Dockerfile | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename DataBreachDetector/{mosipvaluefinder.py => DataBreachDetector.py} (100%) diff --git a/DataBreachDetector/mosipvaluefinder.py b/DataBreachDetector/DataBreachDetector.py similarity index 100% rename from DataBreachDetector/mosipvaluefinder.py rename to DataBreachDetector/DataBreachDetector.py diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index bb24300..09f46ee 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -36,5 +36,5 @@ ENV s3-bucket-name= COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY mosipvaluefinder.py . +COPY DataBreachDetector.py . CMD ["python", "mosipvaluefinder.py"] \ No newline at end of file From 1aa1c78bf8595db4aea7dc2c03b5d54a248e8ed7 Mon Sep 17 00:00:00 2001 From: Mahesh-Binayak <76687012+Mahesh-Binayak@users.noreply.github.com> Date: Thu, 11 Jan 2024 12:51:32 +0530 Subject: [PATCH 04/35] [MOSIP-29854]Created push_trigger.yml Signed-off-by: Mahesh-Binayak <76687012+Mahesh-Binayak@users.noreply.github.com> --- .github/workflows/docker-image.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/docker-image.yml diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..eac633f --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,18 @@ +name: Docker Image CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) From 8130bd12adb2c57409b8252cc5f36e9755d49413 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 12:54:43 +0530 Subject: [PATCH 05/35] [MOSIP-29854] Updating filenames --- .github/workflows/{docker-image.yml => push_trigger.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{docker-image.yml => push_trigger.yml} (100%) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/push_trigger.yml similarity index 100% rename from .github/workflows/docker-image.yml rename to .github/workflows/push_trigger.yml From bcc87bb96b0ebad5db5cc704d3bcb8081344fa77 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:10:00 +0530 Subject: [PATCH 06/35] [MOSIP-29854] Updated push_trigger.yml --- .github/workflows/push_trigger.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index eac633f..a3fb3e4 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -2,9 +2,13 @@ name: Docker Image CI on: push: - branches: [ "master" ] + branches: + - master + - develop pull_request: - branches: [ "master" ] + branches: + - master + - develop jobs: @@ -15,4 +19,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) + run: docker build . --file Dockerfile --tag Databreachdetector:$(date +%s) From 2a4fd2beec5d0d9eb94993f8184e50ea560ce7a6 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:16:30 +0530 Subject: [PATCH 07/35] [MOSIP-29854] Updated push_trigger.yml --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index a3fb3e4..c46a3f1 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -19,4 +19,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag Databreachdetector:$(date +%s) + run: docker build . --file Dockerfile --tag databreachdetector:$(date +%s) From 288f32bf8fd4105ac5191004600934d963f84994 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:18:10 +0530 Subject: [PATCH 08/35] [MOSIP-29854] Updated push_trigger.yml --- .../.github}/workflows/push_trigger.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {.github => DataBreachDetector/.github}/workflows/push_trigger.yml (100%) diff --git a/.github/workflows/push_trigger.yml b/DataBreachDetector/.github/workflows/push_trigger.yml similarity index 100% rename from .github/workflows/push_trigger.yml rename to DataBreachDetector/.github/workflows/push_trigger.yml From 9c17fca9d42c30a8ec02c9811f9962a850df17a3 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:20:21 +0530 Subject: [PATCH 09/35] [MOSIP-29854] Updated push_trigger.yml --- DataBreachDetector/.github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataBreachDetector/.github/workflows/push_trigger.yml b/DataBreachDetector/.github/workflows/push_trigger.yml index c46a3f1..bd0cb68 100644 --- a/DataBreachDetector/.github/workflows/push_trigger.yml +++ b/DataBreachDetector/.github/workflows/push_trigger.yml @@ -1,4 +1,4 @@ -name: Docker Image CI +name: Build Docker Image on: push: From bab1c4a70b9fbecdca2732dfd44723de28e6143c Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:29:30 +0530 Subject: [PATCH 10/35] [MOSIP-29854] Updated push_trigger.yml --- .../.github => .github}/workflows/push_trigger.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {DataBreachDetector/.github => .github}/workflows/push_trigger.yml (100%) diff --git a/DataBreachDetector/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml similarity index 100% rename from DataBreachDetector/.github/workflows/push_trigger.yml rename to .github/workflows/push_trigger.yml From 717ddb09c9fc0c2e7a448c30ea8feed3b54c0ad7 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:31:00 +0530 Subject: [PATCH 11/35] [MOSIP-29854] Updated push_trigger.yml --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index bd0cb68..88222a3 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -19,4 +19,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag databreachdetector:$(date +%s) + run: docker build . --file DataBreachdetector/Dockerfile --tag databreachdetector:$(date +%s) From 27de7eb77c3b7a753032cb57793c04dc97dbf93c Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:31:53 +0530 Subject: [PATCH 12/35] [MOSIP-29854] Updated push_trigger.yml --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 88222a3..263ef48 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -19,4 +19,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file DataBreachdetector/Dockerfile --tag databreachdetector:$(date +%s) + run: docker build . --file DataBreachDetector/Dockerfile --tag databreachdetector:$(date +%s) From 006f175abe6d3d567741b613143f5980c92f0ce1 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 13:33:23 +0530 Subject: [PATCH 13/35] [MOSIP-29854] Updated push_trigger.yml --- DataBreachDetector/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index 09f46ee..1c8db9c 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -37,4 +37,4 @@ ENV s3-bucket-name= COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY DataBreachDetector.py . -CMD ["python", "mosipvaluefinder.py"] \ No newline at end of file +CMD ["python", "DataBreachDetector.py"] \ No newline at end of file From 6608b0c23a150b96bb310dfc2a1377199320f5e6 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 14:06:52 +0530 Subject: [PATCH 14/35] [MOSIP-29854] Updated push_trigger.yml --- DataBreachDetector/Dockerfile | 6 +++--- .../{DataBreachDetector.py => databreachdetector.py} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename DataBreachDetector/{DataBreachDetector.py => databreachdetector.py} (100%) diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index 1c8db9c..ff6059b 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -34,7 +34,7 @@ ENV s3-user-key= ENV s3-user-secret= ENV s3-bucket-name= -COPY requirements.txt . +COPY DataBreachDetector/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY DataBreachDetector.py . -CMD ["python", "DataBreachDetector.py"] \ No newline at end of file +COPY DataBreachDetector/databreachdetector.py . +CMD ["python", "databreachdetector.py"] \ No newline at end of file diff --git a/DataBreachDetector/DataBreachDetector.py b/DataBreachDetector/databreachdetector.py similarity index 100% rename from DataBreachDetector/DataBreachDetector.py rename to DataBreachDetector/databreachdetector.py From 3cbe423bb03e1830fbfc334b6257d5c5047498e8 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 14:20:47 +0530 Subject: [PATCH 15/35] [MOSIP-29854] Updated port number --- DataBreachDetector/databreachdetector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 2b3f88a..8d19369 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -91,6 +91,7 @@ def deduce_sensitive_data_in_databases(): connection = psycopg2.connect( host='postgres.dev.mosip.net', + port=5432, user='postgres', password='mQi298ZW7p', database=databases[0]['name'] From fac267339d806dc485eea4d3ecc6b148a1fdf296 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 16:39:39 +0530 Subject: [PATCH 16/35] [MOSIP-29854] created own image --- .github/workflows/push_trigger.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 263ef48..486997b 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -19,4 +19,8 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file DataBreachDetector/Dockerfile --tag databreachdetector:$(date +%s) + run: docker build . --file DataBreachDetector/Dockerfile --tag databreachdetector:latest + - name: Log in to Docker Hub + run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + - name: Push the Docker image + run: docker push databreachdetector:latest \ No newline at end of file From 5e82dfca3c4e2691080bdc8e406041641b62c8dd Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 16:46:03 +0530 Subject: [PATCH 17/35] [MOSIP-29854] created own image --- .github/workflows/push_trigger.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 486997b..2ae2415 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -15,7 +15,6 @@ jobs: build: runs-on: ubuntu-latest - steps: - uses: actions/checkout@v3 - name: Build the Docker image From 09a30cd4cd645cf5ae4e975b9b43f5ea7352a675 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 16:49:32 +0530 Subject: [PATCH 18/35] [MOSIP-29854] created own image --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 2ae2415..9ee3726 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -22,4 +22,4 @@ jobs: - name: Log in to Docker Hub run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - name: Push the Docker image - run: docker push databreachdetector:latest \ No newline at end of file + run: docker push maheshbinayak1/databreachdetector:latest \ No newline at end of file From 220765cd54a58dd90bd80c33a49098b24e9c42ee Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 11 Jan 2024 16:52:59 +0530 Subject: [PATCH 19/35] [MOSIP-29854] created own image --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 9ee3726..dd54f55 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file DataBreachDetector/Dockerfile --tag databreachdetector:latest + run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:latest - name: Log in to Docker Hub run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - name: Push the Docker image From c135419f065971bc49d81a85f50b1248e93a8db9 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 12 Jan 2024 10:23:00 +0530 Subject: [PATCH 20/35] [MOSIP-29854] created own image and updated script --- DataBreachDetector/databreachdetector.py | 82 ++++++++++++++---------- DataBreachDetector/requirements.txt | 1 + 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 8d19369..97b0256 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -1,7 +1,10 @@ +from minio import Minio +from minio.error import ResponseError import psycopg2 from stdnum import verhoeff from deduce import Deduce import re +import os def is_valid_verhoeff(number): return verhoeff.is_valid(str(number)) @@ -26,7 +29,6 @@ def deduce_sensitive_data(connection, database_name, schema_name, output_file, i with open(output_file, 'a') as deduced_file: for table_name in tables: - if ignore_tables and table_name in ignore_tables: print(f"Ignoring Table: {table_name} in Database: {database_name}") continue @@ -39,54 +41,54 @@ def deduce_sensitive_data(connection, database_name, schema_name, output_file, i for row in rows: for i, column_value in enumerate(row): - column_name = cursor.description[i][0] + column_name = cursor.description[i][0] - if ignore_columns and column_name in ignore_columns: continue deduced_result = deduce_instance.deidentify( str(column_value), - disabled={'names', 'institutions', 'locations', 'dates', 'ages','urls'} + disabled={'names', 'institutions', 'locations', 'dates', 'ages', 'urls'} ) if deduced_result.annotations and is_valid_verhoeff(column_value): deduced_file.write(f"Column: {column_name}, Data: {column_value}\n") deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - - with open('mobile_numbers.txt', 'a') as file: - if deduced_result.annotations and is_valid_mobile_number(column_value): - file.write(f"Column: {column_name}, Data: {column_value}\n") - file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + with open('mobile_numbers.txt', 'a') as file: + if deduced_result.annotations and is_valid_mobile_number(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - with open('mails.txt', 'a') as file: + with open('mails.txt', 'a') as file: + if deduced_result.annotations and is_valid_email(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - if deduced_result.annotations and is_valid_email(column_value): - file.write("Column: {column_name}, Data: {column_value}\n") - file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") +def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name): + mc = Minio(s3_host, + access_key=s3_user_key, + secret_key=s3_user_secret, + region=s3_region, + secure=False) # Set secure=True if using HTTPS - + try: + if not mc.bucket_exists(s3_bucket_name): + mc.make_bucket(s3_bucket_name, location=s3_region) + + mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt') + mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt') + mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt') + + print("\nReports pushed to MinIO") + + except ResponseError as err: + print(f"MinIO Error: {err}") def deduce_sensitive_data_in_databases(): databases = [ {"name": "mosip_prereg", "schema": "prereg"}, - #{"name": "mosip_keymgr", "schema": "keymgr"}, - #{"name": "mosip_credential", "schema": "credential"}, - #{"name": "mosip_esignet", "schema": "esignet"}, - #{"name": "mosip_hotlist", "schema": "hotlist"}, - #{"name": "mosip_ida", "schema": "ida"}, - #{"name": "mosip_idmap", "schema": "idmap"}, - #{"name": "mosip_idrepo", "schema": "idrepo"}, - #{"name": "mosip_kernel", "schema": "kernel"}, - #{"name": "mosip_master", "schema": "master"}, - #{"name": "mosip_mockidentitysystem", "schema": "mockidentitysystem"}, - #{"name": "mosip_pms", "schema": "pms"}, - #{"name": "mosip_regprc", "schema": "regprc"}, - #{"name": "mosip_resident", "schema": "resident"}, - #{"name": "mosip_toolkit", "schema": "toolkit"} - - + # ... other databases ] connection = psycopg2.connect( @@ -99,17 +101,27 @@ def deduce_sensitive_data_in_databases(): try: output_file_path = 'id.txt' - ignore_columns = ['status', 'cr_by'] - ignore_tables = ['client_detail','reg_available_slot','batch_job_execution', - 'batch_job_execution_context','batch_job_execution_params','batch_job_instance', - 'batch_step_execution','batch_step_execution_context'] + ignore_columns = ['status', 'cr_by'] + ignore_tables = ['client_detail', 'reg_available_slot', 'batch_job_execution', + 'batch_job_execution_context', 'batch_job_execution_params', 'batch_job_instance', + 'batch_step_execution', 'batch_step_execution_context'] for db_info in databases: print(f"\nAnalyzing data in Database: {db_info['name']}\n") - deduce_sensitive_data(connection, db_info['name'], db_info['schema'], output_file_path, ignore_columns, ignore_tables) + deduce_sensitive_data(connection, db_info['name'], db_info['schema'], output_file_path, ignore_columns, + ignore_tables) print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") + # Add the following lines to push reports to MinIO + s3_host = "http://minio.minio:9000" # Update with your MinIO host + s3_region = "" # Update with your S3 region + s3_user_key = "admin" # Update with your S3 user key + s3_user_secret = "http://minio.minio:9000" # Update with your S3 user secret + s3_bucket_name = "security-testrig" # Update with your S3 bucket name + + push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name) + finally: connection.close() diff --git a/DataBreachDetector/requirements.txt b/DataBreachDetector/requirements.txt index 4e3891f..b8706d6 100644 --- a/DataBreachDetector/requirements.txt +++ b/DataBreachDetector/requirements.txt @@ -1,3 +1,4 @@ psycopg2-binary==2.9.1 python-stdnum==1.19 deduce==2.4.4 +minio==6.0.2 \ No newline at end of file From fae25377bbc00f5524f40a3c927e1a42523ea725 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 12 Jan 2024 10:59:45 +0530 Subject: [PATCH 21/35] [MOSIP-29854] created own image and updated script --- DataBreachDetector/databreachdetector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 97b0256..f532fe2 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -87,7 +87,7 @@ def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucke def deduce_sensitive_data_in_databases(): databases = [ - {"name": "mosip_prereg", "schema": "prereg"}, + {"name": "mosip_esignet", "schema": "esignet"}, # ... other databases ] @@ -114,7 +114,7 @@ def deduce_sensitive_data_in_databases(): print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") # Add the following lines to push reports to MinIO - s3_host = "http://minio.minio:9000" # Update with your MinIO host + s3_host = "10.3.148.78" # Update with your MinIO host s3_region = "" # Update with your S3 region s3_user_key = "admin" # Update with your S3 user key s3_user_secret = "http://minio.minio:9000" # Update with your S3 user secret From 4f5737d24dd40024073064626b6930bb848bbe24 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 12 Jan 2024 12:48:25 +0530 Subject: [PATCH 22/35] [MOSIP-29854] created own image and updated script --- DataBreachDetector/databreachdetector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index f532fe2..ea3fea2 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -114,10 +114,10 @@ def deduce_sensitive_data_in_databases(): print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") # Add the following lines to push reports to MinIO - s3_host = "10.3.148.78" # Update with your MinIO host + s3_host = "minio.minio:9000" # Update with your MinIO host s3_region = "" # Update with your S3 region s3_user_key = "admin" # Update with your S3 user key - s3_user_secret = "http://minio.minio:9000" # Update with your S3 user secret + s3_user_secret = "aYjhgRDXB4" # Update with your S3 user secret s3_bucket_name = "security-testrig" # Update with your S3 bucket name push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name) From 5e7a2933ba91ddbabed327f0ff3567f28f025ade Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 12 Jan 2024 12:49:05 +0530 Subject: [PATCH 23/35] [MOSIP-29854] created own image and updated script --- .github/workflows/push_trigger.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index dd54f55..cbbf01a 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -5,10 +5,12 @@ on: branches: - master - develop + - update pull_request: branches: - master - develop + - update jobs: From e3c6b5700adccc94c7dc25f32204d64eae9056a8 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 18 Jan 2024 15:04:59 +0530 Subject: [PATCH 24/35] [MOSIP-29854] created own image and updated script on update branch --- .github/workflows/push_trigger.yml | 2 +- DataBreachDetector/Dockerfile | 5 +++ DataBreachDetector/databreachdetector.py | 51 ++++++++++++++++++------ DataBreachDetector/db.properties | 14 +++++++ 4 files changed, 58 insertions(+), 14 deletions(-) create mode 100644 DataBreachDetector/db.properties diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index cbbf01a..6d29b2a 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -20,7 +20,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:latest + run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:update - name: Log in to Docker Hub run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - name: Push the Docker image diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index ff6059b..6c75b82 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -33,8 +33,13 @@ ENV s3-region= ENV s3-user-key= ENV s3-user-secret= ENV s3-bucket-name= +ENV pg-host= +ENV pg-port= +ENV pg-user= +ENV pg-password= COPY DataBreachDetector/requirements.txt . +COPY DataBreachDetector/db.properties . RUN pip install --no-cache-dir -r requirements.txt COPY DataBreachDetector/databreachdetector.py . CMD ["python", "databreachdetector.py"] \ No newline at end of file diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index ea3fea2..a0b3e16 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -1,3 +1,4 @@ +from configparser import ConfigParser from minio import Minio from minio.error import ResponseError import psycopg2 @@ -86,18 +87,22 @@ def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucke print(f"MinIO Error: {err}") def deduce_sensitive_data_in_databases(): + # Read connection details from db.properties file + db_host, db_port, db_user, db_password, \ + minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name = read_db_properties() + + # Define the databases list databases = [ - {"name": "mosip_esignet", "schema": "esignet"}, - # ... other databases + {"name": "mosip_esignet", "schema": "esignet"}, + # Add other databases as needed ] connection = psycopg2.connect( - host='postgres.dev.mosip.net', - port=5432, - user='postgres', - password='mQi298ZW7p', - database=databases[0]['name'] - ) + host=db_host, + port=db_port, + user=db_user, + password=db_password, + database="") # The database name is taken from the script's 'databases' list try: output_file_path = 'id.txt' @@ -114,15 +119,35 @@ def deduce_sensitive_data_in_databases(): print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt") # Add the following lines to push reports to MinIO - s3_host = "minio.minio:9000" # Update with your MinIO host - s3_region = "" # Update with your S3 region - s3_user_key = "admin" # Update with your S3 user key - s3_user_secret = "aYjhgRDXB4" # Update with your S3 user secret - s3_bucket_name = "security-testrig" # Update with your S3 bucket name + s3_host = minio_host + s3_region = minio_region + s3_user_key = minio_user_key + s3_user_secret = minio_user_secret + s3_bucket_name = minio_bucket_name push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name) finally: connection.close() +# Function to read properties from db.properties file +def read_db_properties(): + config = ConfigParser() + config.read('db.properties') + + db_host = config.get('PostgreSQL Connection', 'db_host') + db_port = config.getint('PostgreSQL Connection', 'db_port') + db_user = config.get('PostgreSQL Connection', 'db_user') + db_password = config.get('PostgreSQL Connection', 'db_password') + + minio_host = config.get('MinIO Connection', 'minio_host') + minio_region = config.get('MinIO Connection', 'minio_region') + minio_user_key = config.get('MinIO Connection', 'minio_user_key') + minio_user_secret = config.get('MinIO Connection', 'minio_user_secret') + minio_bucket_name = config.get('MinIO Connection', 'minio_bucket_name') + + return (db_host, db_port, db_user, db_password, + minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name) + +# Call the main function deduce_sensitive_data_in_databases() diff --git a/DataBreachDetector/db.properties b/DataBreachDetector/db.properties new file mode 100644 index 0000000..82f2027 --- /dev/null +++ b/DataBreachDetector/db.properties @@ -0,0 +1,14 @@ +# PostgreSQL Connection +[PostgreSQL Connection] +db_host=postgres.dev.mosip.net +db_port=5432 +db_user=postgres +db_password=mQi298ZW7p + +# MinIO Connection +[MinIO Connection] +minio_host=minio.minio:9000 +minio_region=your_s3_region +minio_user_key=admin +minio_user_secret=aYjhgRDXB4 +minio_bucket_name=security-testrig From 57cdf2274f3125d61461f1c967e33ca255fb634d Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 18 Jan 2024 15:06:30 +0530 Subject: [PATCH 25/35] [MOSIP-29854] created own image and updated script on update branch --- .github/workflows/push_trigger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index 6d29b2a..bf20b89 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -24,4 +24,4 @@ jobs: - name: Log in to Docker Hub run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - name: Push the Docker image - run: docker push maheshbinayak1/databreachdetector:latest \ No newline at end of file + run: docker push maheshbinayak1/databreachdetector:update \ No newline at end of file From de7f1ce51e9d4a84a2b52bc511903848dff98724 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 18 Jan 2024 15:17:30 +0530 Subject: [PATCH 26/35] [MOSIP-29854] created own image and updated script on update branch --- DataBreachDetector/databreachdetector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index a0b3e16..e3b54ee 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -77,6 +77,10 @@ def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucke if not mc.bucket_exists(s3_bucket_name): mc.make_bucket(s3_bucket_name, location=s3_region) + # Ensure files exist before attempting to upload + for filename in ['id.txt', 'mails.txt', 'mobile_numbers.txt']: + open(filename, 'a').close() + mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt') mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt') mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt') From 5d92313202ab1ab94441f3025414abf61fb4c84a Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 18 Jan 2024 18:15:39 +0530 Subject: [PATCH 27/35] [MOSIP-29854] created own image and updated script on update branch --- .github/workflows/push_trigger.yml | 4 +- DataBreachDetector/databreachdetector.py | 49 +++++++++++++----------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index bf20b89..a9ba04c 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -20,8 +20,8 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:update + run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:final - name: Log in to Docker Hub run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - name: Push the Docker image - run: docker push maheshbinayak1/databreachdetector:update \ No newline at end of file + run: docker push maheshbinayak1/databreachdetector:final \ No newline at end of file diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index e3b54ee..8b26eab 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -91,9 +91,33 @@ def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucke print(f"MinIO Error: {err}") def deduce_sensitive_data_in_databases(): - # Read connection details from db.properties file - db_host, db_port, db_user, db_password, \ - minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name = read_db_properties() + # Read connection details from environment variables or db.properties file + db_host = os.environ.get('pg-host') + db_port = os.environ.get('pg-port') + db_user = os.environ.get('pg-user') + db_password = os.environ.get('pg-password') + + minio_host = os.environ.get('s3-host') + minio_region = os.environ.get('s3-region') + minio_user_key = os.environ.get('s3-user-key') + minio_user_secret = os.environ.get('s3-user-secret') + minio_bucket_name = os.environ.get('s3-bucket-name') + + # If environment variables are not set, read from db.properties file + if not all([db_host, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): + config = ConfigParser() + config.read('db.properties') + + db_host = config.get('PostgreSQL Connection', 'db_host') + db_port = config.get('PostgreSQL Connection', 'db_port') + db_user = config.get('PostgreSQL Connection', 'db_user') + db_password = config.get('PostgreSQL Connection', 'db_password') + + minio_host = config.get('MinIO Connection', 'minio_host') + minio_region = config.get('MinIO Connection', 'minio_region') + minio_user_key = config.get('MinIO Connection', 'minio_user_key') + minio_user_secret = config.get('MinIO Connection', 'minio_user_secret') + minio_bucket_name = config.get('MinIO Connection', 'minio_bucket_name') # Define the databases list databases = [ @@ -134,24 +158,5 @@ def deduce_sensitive_data_in_databases(): finally: connection.close() -# Function to read properties from db.properties file -def read_db_properties(): - config = ConfigParser() - config.read('db.properties') - - db_host = config.get('PostgreSQL Connection', 'db_host') - db_port = config.getint('PostgreSQL Connection', 'db_port') - db_user = config.get('PostgreSQL Connection', 'db_user') - db_password = config.get('PostgreSQL Connection', 'db_password') - - minio_host = config.get('MinIO Connection', 'minio_host') - minio_region = config.get('MinIO Connection', 'minio_region') - minio_user_key = config.get('MinIO Connection', 'minio_user_key') - minio_user_secret = config.get('MinIO Connection', 'minio_user_secret') - minio_bucket_name = config.get('MinIO Connection', 'minio_bucket_name') - - return (db_host, db_port, db_user, db_password, - minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name) - # Call the main function deduce_sensitive_data_in_databases() From 238b76e47e94ed7fcb3e68e424e0ff7ff5d76d47 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 18 Jan 2024 18:48:01 +0530 Subject: [PATCH 28/35] [MOSIP-29854] created own image and updated script on update branch --- DataBreachDetector/db.properties | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DataBreachDetector/db.properties b/DataBreachDetector/db.properties index 82f2027..ed7f744 100644 --- a/DataBreachDetector/db.properties +++ b/DataBreachDetector/db.properties @@ -1,14 +1,14 @@ # PostgreSQL Connection [PostgreSQL Connection] -db_host=postgres.dev.mosip.net +db_host=postgres.mosip.net db_port=5432 db_user=postgres -db_password=mQi298ZW7p +db_password= # MinIO Connection [MinIO Connection] minio_host=minio.minio:9000 minio_region=your_s3_region minio_user_key=admin -minio_user_secret=aYjhgRDXB4 +minio_user_secret= minio_bucket_name=security-testrig From d0f8a8fc5f18021245dbb18654dca85652168235 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 19 Jan 2024 15:40:19 +0530 Subject: [PATCH 29/35] [MOSIP-29854] updated script on update branch --- DataBreachDetector/Dockerfile | 8 +++---- DataBreachDetector/databreachdetector.py | 30 ++++++++++++------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index 6c75b82..5f7eff1 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -33,10 +33,10 @@ ENV s3-region= ENV s3-user-key= ENV s3-user-secret= ENV s3-bucket-name= -ENV pg-host= -ENV pg-port= -ENV pg-user= -ENV pg-password= +ENV db-server= +ENV db-port= +ENV db-su-user= +ENV postgres-password= COPY DataBreachDetector/requirements.txt . COPY DataBreachDetector/db.properties . diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 8b26eab..fb503e6 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -92,10 +92,10 @@ def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucke def deduce_sensitive_data_in_databases(): # Read connection details from environment variables or db.properties file - db_host = os.environ.get('pg-host') - db_port = os.environ.get('pg-port') - db_user = os.environ.get('pg-user') - db_password = os.environ.get('pg-password') + db_server = os.environ.get('db-server') + db_port = os.environ.get('db-port') + db_user = os.environ.get('db-su-user') + db_password = os.environ.get('postgres-password') minio_host = os.environ.get('s3-host') minio_region = os.environ.get('s3-region') @@ -104,20 +104,20 @@ def deduce_sensitive_data_in_databases(): minio_bucket_name = os.environ.get('s3-bucket-name') # If environment variables are not set, read from db.properties file - if not all([db_host, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): + if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): config = ConfigParser() config.read('db.properties') - db_host = config.get('PostgreSQL Connection', 'db_host') - db_port = config.get('PostgreSQL Connection', 'db_port') - db_user = config.get('PostgreSQL Connection', 'db_user') - db_password = config.get('PostgreSQL Connection', 'db_password') + db_server = config.get('PostgreSQL Connection', 'db-server') + db_port = config.get('PostgreSQL Connection', 'db-port') + db_user = config.get('PostgreSQL Connection', 'db-su-user') + db_password = config.get('PostgreSQL Connection', 'postgres-password') - minio_host = config.get('MinIO Connection', 'minio_host') - minio_region = config.get('MinIO Connection', 'minio_region') - minio_user_key = config.get('MinIO Connection', 'minio_user_key') - minio_user_secret = config.get('MinIO Connection', 'minio_user_secret') - minio_bucket_name = config.get('MinIO Connection', 'minio_bucket_name') + minio_host = config.get('MinIO Connection', 's3-host') + minio_region = config.get('MinIO Connection', 's3-region') + minio_user_key = config.get('MinIO Connection', 's3-user-key') + minio_user_secret = config.get('MinIO Connection', 's3-user-secret') + minio_bucket_name = config.get('MinIO Connection', 's3-bucket-name') # Define the databases list databases = [ @@ -126,7 +126,7 @@ def deduce_sensitive_data_in_databases(): ] connection = psycopg2.connect( - host=db_host, + host=db_server, port=db_port, user=db_user, password=db_password, From 9e269079ef669f9064840cdd1979d72021942170 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 19 Jan 2024 16:00:46 +0530 Subject: [PATCH 30/35] [MOSIP-29854] updated script on update branch --- DataBreachDetector/databreachdetector.py | 117 +++++++---------------- 1 file changed, 35 insertions(+), 82 deletions(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index fb503e6..81b8c9f 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -20,88 +20,41 @@ def is_valid_mobile_number(phone_number): match = re.match(pattern, str(phone_number)) return bool(match) -def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables): - deduce_instance = Deduce() - - with connection.cursor() as cursor: - cursor.execute(f"SET search_path TO {schema_name}") - cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,)) - tables = [table[0] for table in cursor.fetchall()] - - with open(output_file, 'a') as deduced_file: - for table_name in tables: - if ignore_tables and table_name in ignore_tables: - print(f"Ignoring Table: {table_name} in Database: {database_name}") - continue - - print(f"Currently checking Table: {table_name} in Database: {database_name}") - deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n") - - cursor.execute(f'SELECT * FROM {table_name}') - rows = cursor.fetchall() - - for row in rows: - for i, column_value in enumerate(row): - column_name = cursor.description[i][0] - - if ignore_columns and column_name in ignore_columns: - continue - - deduced_result = deduce_instance.deidentify( - str(column_value), - disabled={'names', 'institutions', 'locations', 'dates', 'ages', 'urls'} - ) - - if deduced_result.annotations and is_valid_verhoeff(column_value): - deduced_file.write(f"Column: {column_name}, Data: {column_value}\n") - deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - - with open('mobile_numbers.txt', 'a') as file: - if deduced_result.annotations and is_valid_mobile_number(column_value): - file.write(f"Column: {column_name}, Data: {column_value}\n") - file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - - with open('mails.txt', 'a') as file: - if deduced_result.annotations and is_valid_email(column_value): - file.write(f"Column: {column_name}, Data: {column_value}\n") - file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") - -def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name): - mc = Minio(s3_host, - access_key=s3_user_key, - secret_key=s3_user_secret, - region=s3_region, - secure=False) # Set secure=True if using HTTPS - - try: - if not mc.bucket_exists(s3_bucket_name): - mc.make_bucket(s3_bucket_name, location=s3_region) - - # Ensure files exist before attempting to upload - for filename in ['id.txt', 'mails.txt', 'mobile_numbers.txt']: - open(filename, 'a').close() - - mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt') - mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt') - mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt') - - print("\nReports pushed to MinIO") - - except ResponseError as err: - print(f"MinIO Error: {err}") - -def deduce_sensitive_data_in_databases(): - # Read connection details from environment variables or db.properties file - db_server = os.environ.get('db-server') - db_port = os.environ.get('db-port') - db_user = os.environ.get('db-su-user') - db_password = os.environ.get('postgres-password') - - minio_host = os.environ.get('s3-host') - minio_region = os.environ.get('s3-region') - minio_user_key = os.environ.get('s3-user-key') - minio_user_secret = os.environ.get('s3-user-secret') - minio_bucket_name = os.environ.get('s3-bucket-name') +# Print environment variable values +print("Environment Variables:") +for env_var in [ + 'db-server', 'db-port', 'db-su-user', 'postgres-password', + 's3-host', 's3-region', 's3-user-key', 's3-user-secret', 's3-bucket-name' +]: + print(f"{env_var}: {os.environ.get(env_var)}") + +# Read connection details from environment variables or db.properties file +db_server = os.environ.get('db-server') +db_port = os.environ.get('db-port') +db_user = os.environ.get('db-su-user') +db_password = os.environ.get('postgres-password') + +minio_host = os.environ.get('s3-host') +minio_region = os.environ.get('s3-region') +minio_user_key = os.environ.get('s3-user-key') +minio_user_secret = os.environ.get('s3-user-secret') +minio_bucket_name = os.environ.get('s3-bucket-name') + +# If environment variables are not set, read from db.properties file +if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): + config = ConfigParser() + config.read('db.properties') + + db_server = config.get('PostgreSQL Connection', 'db-host') + db_port = config.get('PostgreSQL Connection', 'db-port') + db_user = config.get('PostgreSQL Connection', 'db-su-user') + db_password = config.get('PostgreSQL Connection', 'postgres-password') + + minio_host = config.get('MinIO Connection', 's3-host') + minio_region = config.get('MinIO Connection', 's3-region') + minio_user_key = config.get('MinIO Connection', 's3-user-key') + minio_user_secret = config.get('MinIO Connection', 's3-user-secret') + minio_bucket_name = config.get('MinIO Connection', 's3-bucket-name') # If environment variables are not set, read from db.properties file if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): From d4a17d1f36c6eefc2684dfe3dcd0f4b79bad98d5 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 19 Jan 2024 16:14:21 +0530 Subject: [PATCH 31/35] [MOSIP-29854] updated script on update branch --- DataBreachDetector/databreachdetector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 81b8c9f..47562fe 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -41,7 +41,7 @@ def is_valid_mobile_number(phone_number): minio_bucket_name = os.environ.get('s3-bucket-name') # If environment variables are not set, read from db.properties file -if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): +if not all([db_server, db_port, db_user, db_password, minio_host, minio_user_key, minio_user_secret, minio_bucket_name]): config = ConfigParser() config.read('db.properties') From 46f9ff47e71f95871fb8fde5092aef6b849f2adc Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Fri, 19 Jan 2024 16:19:15 +0530 Subject: [PATCH 32/35] [MOSIP-29854] updated script on update branch --- DataBreachDetector/databreachdetector.py | 121 ++++++++++++++++------- 1 file changed, 84 insertions(+), 37 deletions(-) diff --git a/DataBreachDetector/databreachdetector.py b/DataBreachDetector/databreachdetector.py index 47562fe..b46f1c8 100644 --- a/DataBreachDetector/databreachdetector.py +++ b/DataBreachDetector/databreachdetector.py @@ -20,44 +20,91 @@ def is_valid_mobile_number(phone_number): match = re.match(pattern, str(phone_number)) return bool(match) -# Print environment variable values -print("Environment Variables:") -for env_var in [ - 'db-server', 'db-port', 'db-su-user', 'postgres-password', - 's3-host', 's3-region', 's3-user-key', 's3-user-secret', 's3-bucket-name' -]: - print(f"{env_var}: {os.environ.get(env_var)}") - -# Read connection details from environment variables or db.properties file -db_server = os.environ.get('db-server') -db_port = os.environ.get('db-port') -db_user = os.environ.get('db-su-user') -db_password = os.environ.get('postgres-password') - -minio_host = os.environ.get('s3-host') -minio_region = os.environ.get('s3-region') -minio_user_key = os.environ.get('s3-user-key') -minio_user_secret = os.environ.get('s3-user-secret') -minio_bucket_name = os.environ.get('s3-bucket-name') - -# If environment variables are not set, read from db.properties file -if not all([db_server, db_port, db_user, db_password, minio_host, minio_user_key, minio_user_secret, minio_bucket_name]): - config = ConfigParser() - config.read('db.properties') - - db_server = config.get('PostgreSQL Connection', 'db-host') - db_port = config.get('PostgreSQL Connection', 'db-port') - db_user = config.get('PostgreSQL Connection', 'db-su-user') - db_password = config.get('PostgreSQL Connection', 'postgres-password') - - minio_host = config.get('MinIO Connection', 's3-host') - minio_region = config.get('MinIO Connection', 's3-region') - minio_user_key = config.get('MinIO Connection', 's3-user-key') - minio_user_secret = config.get('MinIO Connection', 's3-user-secret') - minio_bucket_name = config.get('MinIO Connection', 's3-bucket-name') +def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables): + deduce_instance = Deduce() + + with connection.cursor() as cursor: + cursor.execute(f"SET search_path TO {schema_name}") + cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,)) + tables = [table[0] for table in cursor.fetchall()] + + with open(output_file, 'a') as deduced_file: + for table_name in tables: + if ignore_tables and table_name in ignore_tables: + print(f"Ignoring Table: {table_name} in Database: {database_name}") + continue + + print(f"Currently checking Table: {table_name} in Database: {database_name}") + deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n") + + cursor.execute(f'SELECT * FROM {table_name}') + rows = cursor.fetchall() + + for row in rows: + for i, column_value in enumerate(row): + column_name = cursor.description[i][0] + + if ignore_columns and column_name in ignore_columns: + continue + + deduced_result = deduce_instance.deidentify( + str(column_value), + disabled={'names', 'institutions', 'locations', 'dates', 'ages', 'urls'} + ) + + if deduced_result.annotations and is_valid_verhoeff(column_value): + deduced_file.write(f"Column: {column_name}, Data: {column_value}\n") + deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mobile_numbers.txt', 'a') as file: + if deduced_result.annotations and is_valid_mobile_number(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + + with open('mails.txt', 'a') as file: + if deduced_result.annotations and is_valid_email(column_value): + file.write(f"Column: {column_name}, Data: {column_value}\n") + file.write(f"Deduced Findings: {deduced_result.annotations}\n\n") + +def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name): + mc = Minio(s3_host, + access_key=s3_user_key, + secret_key=s3_user_secret, + region=s3_region, + secure=False) # Set secure=True if using HTTPS + + try: + if not mc.bucket_exists(s3_bucket_name): + mc.make_bucket(s3_bucket_name, location=s3_region) + + # Ensure files exist before attempting to upload + for filename in ['id.txt', 'mails.txt', 'mobile_numbers.txt']: + open(filename, 'a').close() + + mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt') + mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt') + mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt') + + print("\nReports pushed to MinIO") + + except ResponseError as err: + print(f"MinIO Error: {err}") + +def deduce_sensitive_data_in_databases(): + # Read connection details from environment variables or db.properties file + db_server = os.environ.get('db-server') + db_port = os.environ.get('db-port') + db_user = os.environ.get('db-su-user') + db_password = os.environ.get('postgres-password') + + minio_host = os.environ.get('s3-host') + minio_region = os.environ.get('s3-region') + minio_user_key = os.environ.get('s3-user-key') + minio_user_secret = os.environ.get('s3-user-secret') + minio_bucket_name = os.environ.get('s3-bucket-name') # If environment variables are not set, read from db.properties file - if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]): + if not all([db_server, db_port, db_user, db_password, minio_host, minio_user_key, minio_user_secret, minio_bucket_name]): config = ConfigParser() config.read('db.properties') @@ -112,4 +159,4 @@ def is_valid_mobile_number(phone_number): connection.close() # Call the main function -deduce_sensitive_data_in_databases() +deduce_sensitive_data_in_databases() \ No newline at end of file From 9bf0f9f34ce4b671845c8c34613c5eb354d02d87 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 25 Jan 2024 13:35:54 +0530 Subject: [PATCH 33/35] [MOSIP-29854] updatepush_trigger and Dockerfile on update branch --- .github/workflows/push_trigger.yml | 50 +++++++++++++++++++----------- DataBreachDetector/Dockerfile | 6 ++-- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index a9ba04c..d3bb394 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -1,27 +1,41 @@ -name: Build Docker Image +name: Build databreach-detector on: - push: - branches: - - master - - develop - - update + release: + types: [published] pull_request: + types: [opened, reopened, synchronize] + workflow_dispatch: + inputs: + message: + description: 'Message for manually triggering' + required: false + default: 'Triggered for Updates' + type: string + push: branches: - master + - 1.* - develop + - release* + - MOSIP* - update jobs: - - build: - - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Build the Docker image - run: docker build . --file DataBreachDetector/Dockerfile --tag maheshbinayak1/databreachdetector:final - - name: Log in to Docker Hub - run: docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - - name: Push the Docker image - run: docker push maheshbinayak1/databreachdetector:final \ No newline at end of file + build-dockers: + strategy: + matrix: + include: + - SERVICE_LOCATION: 'DataBreachDetector' + SERVICE_NAME: 'DataBreachDetector' + fail-fast: false + name: ${{ matrix.SERVICE_NAME }} + uses: mosip/kattu/.github/workflows/docker-build.yml@master + with: + SERVICE_LOCATION: ${{ matrix.SERVICE_LOCATION }} + SERVICE_NAME: ${{ matrix.SERVICE_NAME }} + secrets: + DEV_NAMESPACE_DOCKER_HUB: ${{ secrets.DEV_NAMESPACE_DOCKER_HUB }} + ACTOR_DOCKER_HUB: ${{ secrets.ACTOR_DOCKER_HUB }} + RELEASE_DOCKER_HUB: ${{ secrets.RELEASE_DOCKER_HUB }} + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_DEVOPS }} \ No newline at end of file diff --git a/DataBreachDetector/Dockerfile b/DataBreachDetector/Dockerfile index 5f7eff1..6cdafe6 100644 --- a/DataBreachDetector/Dockerfile +++ b/DataBreachDetector/Dockerfile @@ -38,8 +38,8 @@ ENV db-port= ENV db-su-user= ENV postgres-password= -COPY DataBreachDetector/requirements.txt . -COPY DataBreachDetector/db.properties . +COPY requirements.txt . +COPY db.properties . RUN pip install --no-cache-dir -r requirements.txt -COPY DataBreachDetector/databreachdetector.py . +COPY databreachdetector.py . CMD ["python", "databreachdetector.py"] \ No newline at end of file From 4c8d92499b93b6123c96872d50ca57cf599dd0c6 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 25 Jan 2024 13:40:03 +0530 Subject: [PATCH 34/35] [MOSIP-29854] updatepush_trigger and Dockerfile on update branch --- .github/workflows/push_trigger.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/push_trigger.yml b/.github/workflows/push_trigger.yml index d3bb394..314b16e 100644 --- a/.github/workflows/push_trigger.yml +++ b/.github/workflows/push_trigger.yml @@ -26,8 +26,8 @@ jobs: strategy: matrix: include: - - SERVICE_LOCATION: 'DataBreachDetector' - SERVICE_NAME: 'DataBreachDetector' + - SERVICE_LOCATION: 'databreachdetector' + SERVICE_NAME: 'databreachdetector' fail-fast: false name: ${{ matrix.SERVICE_NAME }} uses: mosip/kattu/.github/workflows/docker-build.yml@master From 1673f21ed9185c6bcf38c33fa866ca4f79c46748 Mon Sep 17 00:00:00 2001 From: "Mahesh.Binayak" Date: Thu, 25 Jan 2024 13:45:03 +0530 Subject: [PATCH 35/35] [MOSIP-29854]Renamed to lowercase --- {DataBreachDetector => databreachdetector}/Dockerfile | 0 {DataBreachDetector => databreachdetector}/README.md | 0 {DataBreachDetector => databreachdetector}/databreachdetector.py | 0 {DataBreachDetector => databreachdetector}/db.properties | 0 {DataBreachDetector => databreachdetector}/requirements.txt | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {DataBreachDetector => databreachdetector}/Dockerfile (100%) rename {DataBreachDetector => databreachdetector}/README.md (100%) rename {DataBreachDetector => databreachdetector}/databreachdetector.py (100%) rename {DataBreachDetector => databreachdetector}/db.properties (100%) rename {DataBreachDetector => databreachdetector}/requirements.txt (100%) diff --git a/DataBreachDetector/Dockerfile b/databreachdetector/Dockerfile similarity index 100% rename from DataBreachDetector/Dockerfile rename to databreachdetector/Dockerfile diff --git a/DataBreachDetector/README.md b/databreachdetector/README.md similarity index 100% rename from DataBreachDetector/README.md rename to databreachdetector/README.md diff --git a/DataBreachDetector/databreachdetector.py b/databreachdetector/databreachdetector.py similarity index 100% rename from DataBreachDetector/databreachdetector.py rename to databreachdetector/databreachdetector.py diff --git a/DataBreachDetector/db.properties b/databreachdetector/db.properties similarity index 100% rename from DataBreachDetector/db.properties rename to databreachdetector/db.properties diff --git a/DataBreachDetector/requirements.txt b/databreachdetector/requirements.txt similarity index 100% rename from DataBreachDetector/requirements.txt rename to databreachdetector/requirements.txt