Skip to content

Commit

Permalink
[MOSIP-29854] updated script on update branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Mahesh-Binayak committed Jan 19, 2024
1 parent d0f8a8f commit 9e26907
Showing 1 changed file with 35 additions and 82 deletions.
117 changes: 35 additions & 82 deletions DataBreachDetector/databreachdetector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,88 +20,41 @@ def is_valid_mobile_number(phone_number):
match = re.match(pattern, str(phone_number))
return bool(match)

def deduce_sensitive_data(connection, database_name, schema_name, output_file, ignore_columns, ignore_tables):
deduce_instance = Deduce()

with connection.cursor() as cursor:
cursor.execute(f"SET search_path TO {schema_name}")
cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema=%s", (schema_name,))
tables = [table[0] for table in cursor.fetchall()]

with open(output_file, 'a') as deduced_file:
for table_name in tables:
if ignore_tables and table_name in ignore_tables:
print(f"Ignoring Table: {table_name} in Database: {database_name}")
continue

print(f"Currently checking Table: {table_name} in Database: {database_name}")
deduced_file.write(f"Currently checking Table: {table_name} in Database: {database_name}\n")

cursor.execute(f'SELECT * FROM {table_name}')
rows = cursor.fetchall()

for row in rows:
for i, column_value in enumerate(row):
column_name = cursor.description[i][0]

if ignore_columns and column_name in ignore_columns:
continue

deduced_result = deduce_instance.deidentify(
str(column_value),
disabled={'names', 'institutions', 'locations', 'dates', 'ages', 'urls'}
)

if deduced_result.annotations and is_valid_verhoeff(column_value):
deduced_file.write(f"Column: {column_name}, Data: {column_value}\n")
deduced_file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

with open('mobile_numbers.txt', 'a') as file:
if deduced_result.annotations and is_valid_mobile_number(column_value):
file.write(f"Column: {column_name}, Data: {column_value}\n")
file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

with open('mails.txt', 'a') as file:
if deduced_result.annotations and is_valid_email(column_value):
file.write(f"Column: {column_name}, Data: {column_value}\n")
file.write(f"Deduced Findings: {deduced_result.annotations}\n\n")

def push_reports_to_s3(s3_host, s3_region, s3_user_key, s3_user_secret, s3_bucket_name):
mc = Minio(s3_host,
access_key=s3_user_key,
secret_key=s3_user_secret,
region=s3_region,
secure=False) # Set secure=True if using HTTPS

try:
if not mc.bucket_exists(s3_bucket_name):
mc.make_bucket(s3_bucket_name, location=s3_region)

# Ensure files exist before attempting to upload
for filename in ['id.txt', 'mails.txt', 'mobile_numbers.txt']:
open(filename, 'a').close()

mc.fput_object(s3_bucket_name, 'reports/id.txt', 'id.txt')
mc.fput_object(s3_bucket_name, 'reports/mails.txt', 'mails.txt')
mc.fput_object(s3_bucket_name, 'reports/mobile_numbers.txt', 'mobile_numbers.txt')

print("\nReports pushed to MinIO")

except ResponseError as err:
print(f"MinIO Error: {err}")

def deduce_sensitive_data_in_databases():
# Read connection details from environment variables or db.properties file
db_server = os.environ.get('db-server')
db_port = os.environ.get('db-port')
db_user = os.environ.get('db-su-user')
db_password = os.environ.get('postgres-password')

minio_host = os.environ.get('s3-host')
minio_region = os.environ.get('s3-region')
minio_user_key = os.environ.get('s3-user-key')
minio_user_secret = os.environ.get('s3-user-secret')
minio_bucket_name = os.environ.get('s3-bucket-name')
# Print environment variable values
print("Environment Variables:")
for env_var in [
'db-server', 'db-port', 'db-su-user', 'postgres-password',
's3-host', 's3-region', 's3-user-key', 's3-user-secret', 's3-bucket-name'
]:
print(f"{env_var}: {os.environ.get(env_var)}")

# Read connection details from environment variables or db.properties file
db_server = os.environ.get('db-server')
db_port = os.environ.get('db-port')
db_user = os.environ.get('db-su-user')
db_password = os.environ.get('postgres-password')

minio_host = os.environ.get('s3-host')
minio_region = os.environ.get('s3-region')
minio_user_key = os.environ.get('s3-user-key')
minio_user_secret = os.environ.get('s3-user-secret')
minio_bucket_name = os.environ.get('s3-bucket-name')

# If environment variables are not set, read from db.properties file
if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]):
config = ConfigParser()
config.read('db.properties')

db_server = config.get('PostgreSQL Connection', 'db-host')
db_port = config.get('PostgreSQL Connection', 'db-port')
db_user = config.get('PostgreSQL Connection', 'db-su-user')
db_password = config.get('PostgreSQL Connection', 'postgres-password')

minio_host = config.get('MinIO Connection', 's3-host')
minio_region = config.get('MinIO Connection', 's3-region')
minio_user_key = config.get('MinIO Connection', 's3-user-key')
minio_user_secret = config.get('MinIO Connection', 's3-user-secret')
minio_bucket_name = config.get('MinIO Connection', 's3-bucket-name')

# If environment variables are not set, read from db.properties file
if not all([db_server, db_port, db_user, db_password, minio_host, minio_region, minio_user_key, minio_user_secret, minio_bucket_name]):
Expand Down

0 comments on commit 9e26907

Please sign in to comment.