Skip to content

Commit

Permalink
fix: some email validation and encoding by latin-1
Browse files Browse the repository at this point in the history
  • Loading branch information
iloveagent57 committed Mar 11, 2024
1 parent c35bcae commit 3978df4
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
31 changes: 27 additions & 4 deletions scripts/assignment_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
"""
import csv
from collections import defaultdict, Counter
from email.utils import parseaddr

import click

INPUT_FIELDNAMES = ['email', 'university_name']
INPUT_FIELDNAMES = ['university_name', 'email']


def _iterate_csv(input_file):
with open(input_file, 'r') as f_in:
with open(input_file, 'r', encoding='latin-1') as f_in:
reader = csv.DictReader(f_in, fieldnames=INPUT_FIELDNAMES, delimiter=',')
# read and skip the header
next(reader, None)
breakpoint()
for row in reader:
yield row

Expand All @@ -42,7 +42,7 @@ def print_duplicates(input_file):

for email, uni_list in unis_by_email.items():
if len(uni_list) > 1:
print(email, uni_list)
print(email or 'THE EMPTY STRING', 'is contained in', len(uni_list), 'different rows')


@click.command()
Expand All @@ -59,13 +59,36 @@ def print_plan_counts(input_file):
print(plan, count)


def is_valid_email(email):
_, address = parseaddr(email)
if not address:
return False
return True


@click.command()
@click.option(
'--input-file',
help='Path of local file containing email addresses to assign.',
)
def validate_emails(input_file):
invalid_emails = Counter()
for row in _iterate_csv(input_file):
if not is_valid_email(row['email']):
invalid_emails[row['email']] += 1

print(f'There were {sum(invalid_emails.values())} invalid emails')
print(invalid_emails)


@click.group()
def run():
pass


run.add_command(print_duplicates)
run.add_command(print_plan_counts)
run.add_command(validate_emails)


if __name__ == '__main__':
Expand Down
18 changes: 16 additions & 2 deletions scripts/local_assignment_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
import csv
import json
import os
import re
import time
from email.utils import parseaddr
from pprint import pprint

import click
Expand Down Expand Up @@ -146,6 +148,11 @@ def get_plan_uuids_by_name(plans_by_name_file):
return plans_by_name


def is_valid_email(email):
_, address = parseaddr(email)
return bool(address)


def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SIZE):
"""
Yield chunks of (chunk_id, subscription_plan, email) from the given input file.
Expand All @@ -159,14 +166,21 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
current_chunk = []
chunk_id = 0
current_subscription_plan_uuid = None
with open(input_file_path, 'r') as f_in:
# CSVs can contain non-ascii characters, latin-1
# is the encoding that currently works with our production input.
# could eventually be parameterized as input to this command.
with open(input_file_path, 'r', encoding='latin-1') as f_in:
reader = csv.DictReader(f_in, fieldnames=INPUT_FIELDNAMES, delimiter=',')

# read and skip the header
next(reader)

for row in reader:
email = row['email']
if not is_valid_email(email):
print("Invalid email:", email)
continue

university_name = row['university_name']
subscription_plan_uuid = plans_by_name[university_name]

Expand Down Expand Up @@ -198,7 +212,7 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI

def _post_assignments(subscription_plan_uuid, emails_for_chunk, environment='local', fetch_jwt=False):
"""
Maket the POST request to assign licenses.
Make the POST request to assign licenses.
"""
url_pattern = ENVIRONMENTS[environment]
url = url_pattern.format(subscription_plan_uuid=subscription_plan_uuid)
Expand Down

0 comments on commit 3978df4

Please sign in to comment.