Skip to content

Commit

Permalink
Merge pull request #6 from Mahesh-Binayak/update
Browse files Browse the repository at this point in the history
[MOSIP-29854]Merging changes to mosip:update
  • Loading branch information
ckm007 authored Jan 25, 2024
2 parents 8e4bf26 + 1673f21 commit 9551bd5
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 2 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/push_trigger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Build databreach-detector

on:
release:
types: [published]
pull_request:
types: [opened, reopened, synchronize]
workflow_dispatch:
inputs:
message:
description: 'Message for manually triggering'
required: false
default: 'Triggered for Updates'
type: string
push:
branches:
- master
- 1.*
- develop
- release*
- MOSIP*
- update

jobs:
build-dockers:
strategy:
matrix:
include:
- SERVICE_LOCATION: 'databreachdetector'
SERVICE_NAME: 'databreachdetector'
fail-fast: false
name: ${{ matrix.SERVICE_NAME }}
uses: mosip/kattu/.github/workflows/docker-build.yml@master
with:
SERVICE_LOCATION: ${{ matrix.SERVICE_LOCATION }}
SERVICE_NAME: ${{ matrix.SERVICE_NAME }}
secrets:
DEV_NAMESPACE_DOCKER_HUB: ${{ secrets.DEV_NAMESPACE_DOCKER_HUB }}
ACTOR_DOCKER_HUB: ${{ secrets.ACTOR_DOCKER_HUB }}
RELEASE_DOCKER_HUB: ${{ secrets.RELEASE_DOCKER_HUB }}
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_DEVOPS }}
2 changes: 0 additions & 2 deletions README.md

This file was deleted.

45 changes: 45 additions & 0 deletions databreachdetector/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM python:3.9

ARG SOURCE
ARG COMMIT_HASH
ARG COMMIT_ID
ARG BUILD_TIME
LABEL source=${SOURCE}
LABEL commit_hash=${COMMIT_HASH}
LABEL commit_id=${COMMIT_ID}
LABEL build_time=${BUILD_TIME}

ARG container_user=mosip
ARG container_user_group=mosip
ARG container_user_uid=1001
ARG container_user_gid=1001

# Create user group
RUN groupadd -r ${container_user_group}

# Create user with specific ID
RUN useradd -u ${container_user_uid} -r -g ${container_user_group} -s /bin/bash -m -d /home/${container_user} ${container_user}

WORKDIR /home/${container_user}
USER ${container_user}

ENV MYDIR=`pwd`
ENV DATE="$(date --utc +%FT%T.%3NZ)"
ENV ENABLE_INSECURE=false
ENV MODULE=

ENV s3-host=
ENV s3-region=
ENV s3-user-key=
ENV s3-user-secret=
ENV s3-bucket-name=
ENV db-server=
ENV db-port=
ENV db-su-user=
ENV postgres-password=

COPY requirements.txt .
COPY db.properties .
RUN pip install --no-cache-dir -r requirements.txt
COPY databreachdetector.py .
CMD ["python", "databreachdetector.py"]
2 changes: 2 additions & 0 deletions databreachdetector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# DBvaluefinder (WIP)
## This is a script that looks for certain types of Data in the DB and keeps reporting in a simple text file
162 changes: 162 additions & 0 deletions databreachdetector/databreachdetector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from configparser import ConfigParser
from minio import Minio
from minio.error import ResponseError
import psycopg2
from stdnum import verhoeff
from deduce import Deduce
import re
import os

def is_valid_verhoeff(number):
return verhoeff.is_valid(str(number))

def is_valid_email(email):
email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
match = email_pattern.match(str(email))
return bool(match)

def is_valid_mobile_number(phone_number):
pattern = re.compile(r'^[9]\d{9}$')
match = re.match(pattern, str(phone_number))
return bool(match)

def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables):
deduce_instance = Deduce()

with connection.cursor() as cursor:
cursor.execute(f"SET search_path TO {schema_name}")
cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,))
tables = [table[0] for table in cursor.fetchall()]

with open(output_file, 'a') as deduced_file:
for table_name in tables:
if ignore_tables and table_name in ignore_tables:
print(f"Ignoring Table: {table_name} in Database: {database_name}")
continue

print(f"Currently checking Table: {table_name} in Database: {database_name}")
deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n")

cursor.execute(f'SELECT * FROM {table_name}')
rows = cursor.fetchall()

for row in rows:
for i, column_value in enumerate(row):
column_name = cursor.description[i][0]

if ignore_columns and column_name in ignore_columns:
continue

deduced_result = deduce_instance.deidentify(
str(column_value),
disabled={'names', 'institutions', 'locations', 'dates', 'ages', 'urls'}
)

if deduced_result.annotations and is_valid_verhoeff(column_value):
deduced_file.write(f"Column: {column_name}, Data: {column_value}\n")
deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

with open('mobile_numbers.txt', 'a') as file:
if deduced_result.annotations and is_valid_mobile_number(column_value):
file.write(f"Column: {column_name}, Data: {column_value}\n")
file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

with open('mails.txt', 'a') as file:
if deduced_result.annotations and is_valid_email(column_value):
file.write(f"Column: {column_name}, Data: {column_value}\n")
file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name):
mc = Minio(s3_host,
access_key=s3_user_key,
secret_key=s3_user_secret,
region=s3_region,
secure=False) # Set secure=True if using HTTPS

try:
if not mc.bucket_exists(s3_bucket_name):
mc.make_bucket(s3_bucket_name, location=s3_region)

# Ensure files exist before attempting to upload
for filename in ['id.txt', 'mails.txt', 'mobile_numbers.txt']:
open(filename, 'a').close()

mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt')
mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt')
mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt')

print("\nReports pushed to MinIO")

except ResponseError as err:
print(f"MinIO Error: {err}")

def deduce_sensitive_data_in_databases():
# Read connection details from environment variables or db.properties file
db_server = os.environ.get('db-server')
db_port = os.environ.get('db-port')
db_user = os.environ.get('db-su-user')
db_password = os.environ.get('postgres-password')

minio_host = os.environ.get('s3-host')
minio_region = os.environ.get('s3-region')
minio_user_key = os.environ.get('s3-user-key')
minio_user_secret = os.environ.get('s3-user-secret')
minio_bucket_name = os.environ.get('s3-bucket-name')

# If environment variables are not set, read from db.properties file
if not all([db_server, db_port, db_user, db_password, minio_host, minio_user_key, minio_user_secret, minio_bucket_name]):
config = ConfigParser()
config.read('db.properties')

db_server = config.get('PostgreSQL Connection', 'db-server')
db_port = config.get('PostgreSQL Connection', 'db-port')
db_user = config.get('PostgreSQL Connection', 'db-su-user')
db_password = config.get('PostgreSQL Connection', 'postgres-password')

minio_host = config.get('MinIO Connection', 's3-host')
minio_region = config.get('MinIO Connection', 's3-region')
minio_user_key = config.get('MinIO Connection', 's3-user-key')
minio_user_secret = config.get('MinIO Connection', 's3-user-secret')
minio_bucket_name = config.get('MinIO Connection', 's3-bucket-name')

# Define the databases list
databases = [
{"name": "mosip_esignet", "schema": "esignet"},
# Add other databases as needed
]

connection = psycopg2.connect(
host=db_server,
port=db_port,
user=db_user,
password=db_password,
database="") # The database name is taken from the script's 'databases' list

try:
output_file_path = 'id.txt'
ignore_columns = ['status', 'cr_by']
ignore_tables = ['client_detail', 'reg_available_slot', 'batch_job_execution',
'batch_job_execution_context', 'batch_job_execution_params', 'batch_job_instance',
'batch_step_execution', 'batch_step_execution_context']

for db_info in databases:
print(f"\nAnalyzing data in Database: {db_info['name']}\n")
deduce_sensitive_data(connection, db_info['name'], db_info['schema'], output_file_path, ignore_columns,
ignore_tables)

print(f"\nDeduced findings saved to {output_file_path}, mails.txt, mobile_numbers.txt")

# Add the following lines to push reports to MinIO
s3_host = minio_host
s3_region = minio_region
s3_user_key = minio_user_key
s3_user_secret = minio_user_secret
s3_bucket_name = minio_bucket_name

push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name)

finally:
connection.close()

# Call the main function
deduce_sensitive_data_in_databases()
14 changes: 14 additions & 0 deletions databreachdetector/db.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# PostgreSQL Connection
[PostgreSQL Connection]
db_host=postgres.mosip.net
db_port=5432
db_user=postgres
db_password=

# MinIO Connection
[MinIO Connection]
minio_host=minio.minio:9000
minio_region=your_s3_region
minio_user_key=admin
minio_user_secret=
minio_bucket_name=security-testrig
4 changes: 4 additions & 0 deletions databreachdetector/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
psycopg2-binary==2.9.1
python-stdnum==1.19
deduce==2.4.4
minio==6.0.2

0 comments on commit 9551bd5

Please sign in to comment.