From 717453bccc6823784886f3d716d804055015cb10 Mon Sep 17 00:00:00 2001 From: "Aapo (f021b) Rista" Date: Thu, 2 Nov 2023 10:33:23 +0200 Subject: [PATCH] Add application importer mananement command --- .../commands/import_applications_2nd_round.py | 195 ++++++++++++++++++ .../management/commands/import_users_xslx.py | 173 ++++++++++++++++ .../migrations/0025_application_other_id.py | 18 ++ django_server/application_evaluator/models.py | 1 + 4 files changed, 387 insertions(+) create mode 100644 django_server/application_evaluator/management/commands/import_applications_2nd_round.py create mode 100644 django_server/application_evaluator/management/commands/import_users_xslx.py create mode 100644 django_server/application_evaluator/migrations/0025_application_other_id.py diff --git a/django_server/application_evaluator/management/commands/import_applications_2nd_round.py b/django_server/application_evaluator/management/commands/import_applications_2nd_round.py new file mode 100644 index 0000000..e24955f --- /dev/null +++ b/django_server/application_evaluator/management/commands/import_applications_2nd_round.py @@ -0,0 +1,195 @@ +# Create management command to import data from Podio excel file to Django +import glob +from pathlib import Path + +import openpyxl +from django.core.files import File + +from django.core.management.base import BaseCommand +from application_evaluator.models import Application, ApplicationRound, ApplicationAttachment + + +# fields contains list of fields to be imported from excel file to Django +# first value is heading level (0 not shown in description), original title in excel, new title description field +# 0==ignore/>=1==include (value means heading level), original title, new title +fields = [ + [4, "Title of the application", "Application Title"], + [4, "Publishable summary of the application. (max. 1000 characters with spaces)", "Summary"], + [0, "Select city", "City"], + [4, "Select the challenge", "Challenge"], + [0, "Organisation", "Organization"], + [0, "Business ID (if you are individual citizen, just mark XX)", "Business ID"], + [0, "First name", "First Name"], + [0, "Last name", "Last Name"], + [0, "E-mail address", "Email"], + [0, "Phone number", "Phone Number"], + [ + 0, + "List here organisations and contact persons of any Piloting Partners", + "Piloting Partners (Organizations and Contacts)", + ], + [ + 4, + "Short description of the application (incl. target groups, objectives, technologies and methods) " + "and expected impact. (max. 5 000 characters with spaces)", + "Application Description", + ], + [ + 4, + "Work plan including preparatory actions, piloting plan, scale-up and replicability activities as well as " + "sustainability of the application. (max. 5 000 characters with spaces)", + "Work Plan", + ], + [ + 4, + "Resource plan including personnel, budget and possible self-funding. (max. 3 000 characters with spaces)", + "Resource Plan (Personnel, Budget, Self-funding)", + ], + [4, "Description of the co-creation methods applied. (max. 3 000 characters with spaces)", "Co-creation Methods"], + [4, "Plan for involving target group members. (max. 3 000 characters with spaces)", "Involvement of Target Group"], + [6, "In addition to Helsinki, you can offer your solution to", "Offer Solution to Helsinki"], + [6, "In addition to Amsterdam, you can offer your solution to", "Offer Solution to Amsterdam"], + [6, "Choose your budget for the Amsterdam pilot", "Budget for Amsterdam Pilot"], + [6, "Piloting plan for the Amsterdam pilot. (max. 3 000 characters)", "Piloting Plan for Amsterdam"], + [6, "Choose your budget for the Porto pilot", "Budget for Porto Pilot"], + [6, "Piloting plan for the Porto pilot. (max. 3 000 characters)", "Piloting Plan for Porto"], + [6, "Choose your budget for the Tallinn pilot", "Budget for Tallinn Pilot"], + [6, "Piloting plan for the Tallinn pilot. (max. 3 000 characters)", "Piloting Plan for Tallinn"], + [6, "Choose your budget for the Prague pilot", "Budget for Prague Pilot"], + [6, "Piloting plan for the Prague pilot. (max. 3 000 characters)", "Piloting Plan for Prague"], + [0, "Data privacy", "Data Privacy"], + [0, "GDPR", "GDPR Compliance"], + [0, "Ethics", "Ethical Considerations"], + [0, "I accept storing of personal information", "Accept Storing Personal Information"], + [0, "Data security policy", "Data Security Policy"], + [0, "Application ID", "Application ID"], + [0, "Submit date and time", "Submission Date and Time"], +] + + +def get_application_round_from_challenge_name(challenge_name: str) -> ApplicationRound: + """ + Get ApplicationRound from challenge name. + Challenge name is like "Helsinki: How to do foo bar?" + """ + # split challenge name to city and title using ':' + city, name = [x.strip() for x in challenge_name.split(":")] + # Search for ApplicationRounds with name containing title's first 4 words + # Pick 4 first words from title: + words = name.split(" ") + # remove all words shorter than 3 characters + words = [w for w in words if len(w) >= 3][:4] + # create qset for filtering ApplicationRounds where name contains all words + ars = ApplicationRound.objects.filter(name__startswith="CC-2") + for w in words: + ars = ars.filter(name__icontains=w) + if len(ars) != 1: + print(f"ApplicationRound not found for {challenge_name}") + print(f"Words: {words}") + print(ars.query) + exit() + else: + return ars[0] + + +def create_id_name_descriptions(app: dict) -> [str, str, str]: + """Create id, name and description from application data.""" + app_id = app["Application ID"] + name = app["Application Title"] + dlist = [] + for f in fields: + if f[0] > 0: + # Loop all fields and add to dlist lines like + dlist.append("#" * f[0] + f" {f[2]}\n\n{app[f[2]]}") + description = "\n\n".join(dlist) + return app_id, name, description + + +def read_excel_sheet(filename: str) -> list: + """Read Excel sheet and return it as a list of dicts.""" + wb = openpyxl.load_workbook(filename) + sheet = wb.worksheets[0] + data_list = [] + + for row in sheet.iter_rows(min_row=1, values_only=True): + if any(value is not None for value in row): # Skip empty rows + data_list.append(row) + + column_names = data_list.pop(0) + # create list of dicts + data_list = [dict(zip(column_names, row)) for row in data_list] + # remove items with None values + # NOTE: this doesn't work because some fields are empty strings in excel + # data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list] + return data_list + + +def import_attachments(app: Application, filename: str): + """Add attachments to Application object.""" + filepath = Path(filename) + # Check if exact filename exists in app.attachments + # If it does, skip + # If it doesn't, create ApplicationAttachment object and add it to app.attachments + for a in app.attachments.all(): + print(a.attachment.name, filepath.name) + if Path(a.attachment.name).name == filepath.name: + print(f"Attachment {filepath.name} already exists, skipping") + return + with open(filename, "rb") as f: + attachment = ApplicationAttachment.objects.create( + application=app, + attachment=File(f), + name=filepath.name, + ) + attachment.save() + print(attachment) + + +def create_application(application_round: ApplicationRound, app: dict) -> [Application, bool]: + """Create Application object from app dict.""" + app_id, name, description = create_id_name_descriptions(app) + application, created = Application.objects.get_or_create(other_id=app_id, application_round=application_round) + application.name = name + application.description = description + application.application_round = application_round + application.save() + return application, created + + +class Command(BaseCommand): + help = "Import data from Podio excel file to Django" + + def add_arguments(self, parser): + parser.add_argument("--filename", type=str, help="Excel file to import") + parser.add_argument("--attachments-dir", type=str, required=False, help="Directory containing attachments") + + def handle(self, *args, **options): + applications = read_excel_sheet(options["filename"]) + # Replace long column names with short ones. Long column name is fields[][1], short is fields[][2] + applications = [{f[2]: a[f[1]] for f in fields} for a in applications] + new_app_cnt = 0 + existing_app_cnt = 0 + attachment_cnt = 0 + for a in applications: + # if a["Application ID"] != "CC-2HE11-e7aad100-a2c4-98b7-1698756037": + # continue + ar = get_application_round_from_challenge_name(a["Challenge"]) + app, created = create_application(ar, a) + if created: + new_app_cnt += 1 + print(f"New application: {app}") + else: + existing_app_cnt += 1 + print(f"Existing application: {app}") + # Check if there is a subdirectory with same name as app.other_id + # If there is, import all files from that directory as attachments + if options["attachments_dir"]: + dirname = options["attachments_dir"] + for subdir in glob.glob(f"{dirname}/{app.other_id}/*"): + import_attachments(app, subdir) + attachment_cnt += 1 + print("----") + print(f"New applications: {new_app_cnt}") + print(f"Existing applications: {existing_app_cnt}") + print(f"Attachments: {attachment_cnt}") + # self.stdout.write(self.style.SUCCESS("jee hyvinhän se meni!")) diff --git a/django_server/application_evaluator/management/commands/import_users_xslx.py b/django_server/application_evaluator/management/commands/import_users_xslx.py new file mode 100644 index 0000000..80e6b57 --- /dev/null +++ b/django_server/application_evaluator/management/commands/import_users_xslx.py @@ -0,0 +1,173 @@ +# Create management command to import challenge texts gathered from a web page to Django +import re + +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand + +# import excel module +import openpyxl + + +from application_evaluator.models import ApplicationRound + + +def read_excel_sheet(filename: str) -> list: + """Read Excel sheet and return it as a list of dicts.""" + wb = openpyxl.load_workbook(filename) + sheet = wb.worksheets[0] + data_list = [] + + for row in sheet.iter_rows(min_row=1, values_only=True): + if any(value is not None for value in row): # Skip empty rows + data_list.append(row) + + column_names = data_list.pop(0) + # create list of dicts + data_list = [dict(zip(column_names, row)) for row in data_list] + # remove items with None values + # NOTE: this doesn't work because some fields are empty strings in excel + # data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list] + return data_list + + +def merge_duplicate_users(users: list) -> list: + """ + Merge duplicate users from Podio export. Example user dict: + + user = { + "Challenge": "Cityname: How to do foo bar?", + "Created by": "CommuniCity podio app", + "Created on": datetime.datetime(2023, 10, 30, 9, 48, 13), + "Data security policy": "I have read the data security policy...", + "E-mail": "user@example.com", + "Evaluation criteria": "Impact; Excellence; Implementation; Co-creation", + "First name": "User", + "Last name": "Name", + "Organization": None, + "Tags": None, + } + + There can be the same user, but with different challenge name. In this case, we want to merge the user data + and add the challenge name to the list of challenges. + """ + + # Create a dict with email as key and list of user dicts as value + user_dict = {} + for user in users: + # Remove obsolete keys ['Created on', 'Created by', 'Organization', 'Data security policy', 'Tags'] + for k in ["Created on", "Created by", "Organization", "Data security policy", "Tags"]: + if k in user: + del user[k] + email = user["E-mail"] + if email not in user_dict: + user_dict[email] = user + user_dict[email]["Challenge"] = [user_dict[email]["Challenge"]] + else: + if user["Challenge"] not in user_dict[email]["Challenge"]: + user_dict[email]["Challenge"].append(user["Challenge"]) + + return list(user_dict.values()) + + +x = [ + "CC-201: How can technology ease the process of setting up a bank account for foreigners", + "CC-202: How to support parents in online (sex)education and safety", + "CC-203: How to ensure accessible acceptance of a local payment system among " + "residents and local entrepreneurs in Amsterdam Nieuw", + "CC-204: How to encourage girls in Nieuw-West to take part in sports and " "exercise", + "CC-205: How to involve residents in a community savings and credit " + "cooperative that supports social initiatives in the city", + "CC-206: How to include deaf and hearing impaired in broadcasting information on public transport", + "CC-207: How to adapt an existing technological solution for a specific group", + "CC-208: How to strengthen the broad, positive health of youth through attractive and playful technology", + "CC-209: How can resilient role models help families to develop healthy relationships", + "CC-210: Wildcard – Propose any technological solution which contributes to a " + "breakthrough in intergenerational problems\t0\t0\tFalse", + "CC-211: How to reliably measure the digital skills of long-term unemployed citizens", + "CC-212: How to support the recognition of competence with help of a digital tool", + "CC-213: How to prevent pressure ulcers of wheelchair patients", + "CC-214: How to enhance the quality of life and foster inclusion for citizens " + "with severe disabilities through digital innovation", + "CC-215: How to collect and generate accessible pedestrian route information " + "through participatory data collection methods", + "CC-216: How to utilise existing data and data sources for activating " "digitally hard-to-reach residents", + "CC-217: How to reduce school absenteeism through an innovative and inclusive " "educational solution", + "CC-218: How to improve the thermal comfort and overall health in residential " + "buildings, focusing on passive strategies", + "CC-219: How to engage the citizens from socially and economically " + "disconnected localities in participatory planning", + "CC-220: How to better inform the public with limited access to digital tools " "about urban data analysis", +] + + +class Command(BaseCommand): + help = "Import user data from Podio excel file to Django" + + def add_arguments(self, parser): + parser.add_argument("--filename", type=str, required=True) + + def handle(self, *args, **options): + users = read_excel_sheet(options["filename"]) + new_users = merge_duplicate_users(users) + # for u in users: + # pprint(u) + # break + for u in new_users: + # pprint(u) + u["ApplicationRounds"] = [] + for c in u["Challenge"]: + # if "Breda" not in c: + # continue + # split challenge name to city and title using ':' + city, title = [x.strip() for x in c.split(":")] + # Search for ApplicationRounds with name containing title's first 4 words + # Pick 4 first words from title: + words = title.split(" ") + # remove all words shorter than 3 characters + words = [w for w in words if len(w) >= 3][:4] + + # create qset for filtering ApplicationRounds where name contains all words + ars = ApplicationRound.objects.filter(name__startswith="CC-2") + for w in words: + ars = ars.filter(name__icontains=w) + # ars = ApplicationRound.objects.filter(name__contains=title.strip("?")[:20]) + # print(ars) + + if len(ars) != 1: + print(f"ApplicationRound not found for {c}") + print(f"Words: {words}") + print(ars.query) + exit() + else: + u["ApplicationRounds"].append(ars[0]) + + # print(city, title, ars) + name, domain = u["E-mail"].lower().split("@") + name = re.sub(r"[aeiouAEIOU]", "y", name) + name = re.sub(r"[bcdfghjklmnpqrstvxz]", "x", name) + # korvaa vokaalit y-kirjaimella ja konsonantit x:llä + # name = re.sub(r"[aeiouy]", "y", name, re.IGNORECASE) + # name = re.sub(r"[bcdfghjklmnpqrstvxz.]", "x", name, re.IGNORECASE) + + print(f"@{domain}", len(u["ApplicationRounds"])) + # if u["E-mail"] == "gd.hollander@breda.nl": + # pprint(u) + # exit() + # break + # self.stdout.write(self.style.SUCCESS("{} challenges imported".format(len(challenges)))) + # pprint(new_users) + # Create users + for u in new_users: + user, created = User.objects.get_or_create(username=u["E-mail"].lower()) + if created: # Set first_name and last_name only if user is created + user.first_name = u["First name"] + user.last_name = u["Last name"] + # Set random password + user.set_password(User.objects.make_random_password()) + # Add ApplicationRounds to user + for ar in u["ApplicationRounds"]: + ar.evaluators.add(user) + print(ar.evaluators.all()) + user.save() + print(user, created) + exit() diff --git a/django_server/application_evaluator/migrations/0025_application_other_id.py b/django_server/application_evaluator/migrations/0025_application_other_id.py new file mode 100644 index 0000000..5c5a377 --- /dev/null +++ b/django_server/application_evaluator/migrations/0025_application_other_id.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.4 on 2023-11-01 09:37 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('application_evaluator', '0024_applicationround_city_alter_application_name_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='application', + name='other_id', + field=models.CharField(blank=True, max_length=128), + ), + ] diff --git a/django_server/application_evaluator/models.py b/django_server/application_evaluator/models.py index bee0ae1..7735471 100644 --- a/django_server/application_evaluator/models.py +++ b/django_server/application_evaluator/models.py @@ -197,6 +197,7 @@ def organization(user): class Application(NamedModel): application_round = models.ForeignKey(ApplicationRound, related_name="applications", on_delete=models.CASCADE) application_id = models.CharField(max_length=64, blank=True) # Application ID (18 char) from Salesforce CSV + other_id = models.CharField(max_length=128, blank=True) # id generated by some other system evaluating_organizations = models.ManyToManyField(Organization, related_name="applications_to_evaluate", blank=True) description = description_field() approved_by = models.ForeignKey(