Add application importer mananement command

ForumViriumHelsinki · Nov 2, 2023 · 717453b · 717453b
1 parent 10c135e
commit 717453b
Show file tree

Hide file tree

Showing 4 changed files with 387 additions and 0 deletions.
diff --git a/django_server/application_evaluator/management/commands/import_applications_2nd_round.py b/django_server/application_evaluator/management/commands/import_applications_2nd_round.py
@@ -0,0 +1,195 @@
+# Create management command to import data from Podio excel file to Django
+import glob
+from pathlib import Path
+
+import openpyxl
+from django.core.files import File
+
+from django.core.management.base import BaseCommand
+from application_evaluator.models import Application, ApplicationRound, ApplicationAttachment
+
+
+# fields contains list of fields to be imported from excel file to Django
+# first value is heading level (0 not shown in description), original title in excel, new title description field
+# 0==ignore/>=1==include (value means heading level), original title, new title
+fields = [
+    [4, "Title of the application", "Application Title"],
+    [4, "Publishable summary of the application. (max. 1000 characters with spaces)", "Summary"],
+    [0, "Select city", "City"],
+    [4, "Select the challenge", "Challenge"],
+    [0, "Organisation", "Organization"],
+    [0, "Business ID (if you are individual citizen, just mark XX)", "Business ID"],
+    [0, "First name", "First Name"],
+    [0, "Last name", "Last Name"],
+    [0, "E-mail address", "Email"],
+    [0, "Phone number", "Phone Number"],
+    [
+        0,
+        "List here organisations and contact persons of any Piloting Partners",
+        "Piloting Partners (Organizations and Contacts)",
+    ],
+    [
+        4,
+        "Short description of the application (incl. target groups, objectives, technologies and methods) "
+        "and expected impact. (max. 5 000 characters with spaces)",
+        "Application Description",
+    ],
+    [
+        4,
+        "Work plan including preparatory actions, piloting plan, scale-up and replicability activities as well as "
+        "sustainability of the application. (max. 5 000 characters with spaces)",
+        "Work Plan",
+    ],
+    [
+        4,
+        "Resource plan including personnel, budget and possible self-funding. (max. 3 000 characters with spaces)",
+        "Resource Plan (Personnel, Budget, Self-funding)",
+    ],
+    [4, "Description of the co-creation methods applied. (max. 3 000 characters with spaces)", "Co-creation Methods"],
+    [4, "Plan for involving target group members. (max. 3 000 characters with spaces)", "Involvement of Target Group"],
+    [6, "In addition to Helsinki, you can offer your solution to", "Offer Solution to Helsinki"],
+    [6, "In addition to Amsterdam, you can offer your solution to", "Offer Solution to Amsterdam"],
+    [6, "Choose your budget for the Amsterdam pilot", "Budget for Amsterdam Pilot"],
+    [6, "Piloting plan for the Amsterdam pilot. (max. 3 000 characters)", "Piloting Plan for Amsterdam"],
+    [6, "Choose your budget for the Porto pilot", "Budget for Porto Pilot"],
+    [6, "Piloting plan for the Porto pilot. (max. 3 000 characters)", "Piloting Plan for Porto"],
+    [6, "Choose your budget for the Tallinn pilot", "Budget for Tallinn Pilot"],
+    [6, "Piloting plan for the Tallinn pilot. (max. 3 000 characters)", "Piloting Plan for Tallinn"],
+    [6, "Choose your budget for the Prague pilot", "Budget for Prague Pilot"],
+    [6, "Piloting plan for the Prague pilot. (max. 3 000 characters)", "Piloting Plan for Prague"],
+    [0, "Data privacy", "Data Privacy"],
+    [0, "GDPR", "GDPR Compliance"],
+    [0, "Ethics", "Ethical Considerations"],
+    [0, "I accept storing of personal information", "Accept Storing Personal Information"],
+    [0, "Data security policy", "Data Security Policy"],
+    [0, "Application ID", "Application ID"],
+    [0, "Submit date and time", "Submission Date and Time"],
+]
+
+
+def get_application_round_from_challenge_name(challenge_name: str) -> ApplicationRound:
+    """
+    Get ApplicationRound from challenge name.
+    Challenge name is like "Helsinki: How to do foo bar?"
+    """
+    # split challenge name to city and title using ':'
+    city, name = [x.strip() for x in challenge_name.split(":")]
+    # Search for ApplicationRounds with name containing title's first 4 words
+    # Pick 4 first words from title:
+    words = name.split(" ")
+    # remove all words shorter than 3 characters
+    words = [w for w in words if len(w) >= 3][:4]
+    # create qset for filtering ApplicationRounds where name contains all words
+    ars = ApplicationRound.objects.filter(name__startswith="CC-2")
+    for w in words:
+        ars = ars.filter(name__icontains=w)
+    if len(ars) != 1:
+        print(f"ApplicationRound not found for {challenge_name}")
+        print(f"Words: {words}")
+        print(ars.query)
+        exit()
+    else:
+        return ars[0]
+
+
+def create_id_name_descriptions(app: dict) -> [str, str, str]:
+    """Create id, name and description from application data."""
+    app_id = app["Application ID"]
+    name = app["Application Title"]
+    dlist = []
+    for f in fields:
+        if f[0] > 0:
+            # Loop all fields and add to dlist lines like
+            dlist.append("#" * f[0] + f" {f[2]}\n\n{app[f[2]]}")
+    description = "\n\n".join(dlist)
+    return app_id, name, description
+
+
+def read_excel_sheet(filename: str) -> list:
+    """Read Excel sheet and return it as a list of dicts."""
+    wb = openpyxl.load_workbook(filename)
+    sheet = wb.worksheets[0]
+    data_list = []
+
+    for row in sheet.iter_rows(min_row=1, values_only=True):
+        if any(value is not None for value in row):  # Skip empty rows
+            data_list.append(row)
+
+    column_names = data_list.pop(0)
+    # create list of dicts
+    data_list = [dict(zip(column_names, row)) for row in data_list]
+    # remove items with None values
+    # NOTE: this doesn't work because some fields are empty strings in excel
+    # data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list]
+    return data_list
+
+
+def import_attachments(app: Application, filename: str):
+    """Add attachments to Application object."""
+    filepath = Path(filename)
+    # Check if exact filename exists in app.attachments
+    # If it does, skip
+    # If it doesn't, create ApplicationAttachment object and add it to app.attachments
+    for a in app.attachments.all():
+        print(a.attachment.name, filepath.name)
+        if Path(a.attachment.name).name == filepath.name:
+            print(f"Attachment {filepath.name} already exists, skipping")
+            return
+    with open(filename, "rb") as f:
+        attachment = ApplicationAttachment.objects.create(
+            application=app,
+            attachment=File(f),
+            name=filepath.name,
+        )
+        attachment.save()
+    print(attachment)
+
+
+def create_application(application_round: ApplicationRound, app: dict) -> [Application, bool]:
+    """Create Application object from app dict."""
+    app_id, name, description = create_id_name_descriptions(app)
+    application, created = Application.objects.get_or_create(other_id=app_id, application_round=application_round)
+    application.name = name
+    application.description = description
+    application.application_round = application_round
+    application.save()
+    return application, created
+
+
+class Command(BaseCommand):
+    help = "Import data from Podio excel file to Django"
+
+    def add_arguments(self, parser):
+        parser.add_argument("--filename", type=str, help="Excel file to import")
+        parser.add_argument("--attachments-dir", type=str, required=False, help="Directory containing attachments")
+
+    def handle(self, *args, **options):
+        applications = read_excel_sheet(options["filename"])
+        # Replace long column names with short ones. Long column name is fields[][1], short is fields[][2]
+        applications = [{f[2]: a[f[1]] for f in fields} for a in applications]
+        new_app_cnt = 0
+        existing_app_cnt = 0
+        attachment_cnt = 0
+        for a in applications:
+            # if a["Application ID"] != "CC-2HE11-e7aad100-a2c4-98b7-1698756037":
+            #     continue
+            ar = get_application_round_from_challenge_name(a["Challenge"])
+            app, created = create_application(ar, a)
+            if created:
+                new_app_cnt += 1
+                print(f"New application: {app}")
+            else:
+                existing_app_cnt += 1
+                print(f"Existing application: {app}")
+            # Check if there is a subdirectory with same name as app.other_id
+            # If there is, import all files from that directory as attachments
+            if options["attachments_dir"]:
+                dirname = options["attachments_dir"]
+                for subdir in glob.glob(f"{dirname}/{app.other_id}/*"):
+                    import_attachments(app, subdir)
+                    attachment_cnt += 1
+            print("----")
+        print(f"New applications: {new_app_cnt}")
+        print(f"Existing applications: {existing_app_cnt}")
+        print(f"Attachments: {attachment_cnt}")
+        # self.stdout.write(self.style.SUCCESS("jee hyvinhän se meni!"))
diff --git a/django_server/application_evaluator/management/commands/import_users_xslx.py b/django_server/application_evaluator/management/commands/import_users_xslx.py
@@ -0,0 +1,173 @@
+# Create management command to import challenge texts gathered from a web page to Django
+import re
+
+from django.contrib.auth.models import User
+from django.core.management.base import BaseCommand
+
+# import excel module
+import openpyxl
+
+
+from application_evaluator.models import ApplicationRound
+
+
+def read_excel_sheet(filename: str) -> list:
+    """Read Excel sheet and return it as a list of dicts."""
+    wb = openpyxl.load_workbook(filename)
+    sheet = wb.worksheets[0]
+    data_list = []
+
+    for row in sheet.iter_rows(min_row=1, values_only=True):
+        if any(value is not None for value in row):  # Skip empty rows
+            data_list.append(row)
+
+    column_names = data_list.pop(0)
+    # create list of dicts
+    data_list = [dict(zip(column_names, row)) for row in data_list]
+    # remove items with None values
+    # NOTE: this doesn't work because some fields are empty strings in excel
+    # data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list]
+    return data_list
+
+
+def merge_duplicate_users(users: list) -> list:
+    """
+    Merge duplicate users from Podio export. Example user dict:
+
+    user = {
+        "Challenge": "Cityname: How to do foo bar?",
+        "Created by": "CommuniCity podio app",
+        "Created on": datetime.datetime(2023, 10, 30, 9, 48, 13),
+        "Data security policy": "I have read the data security policy...",
+        "E-mail": "[email protected]",
+        "Evaluation criteria": "Impact; Excellence; Implementation; Co-creation",
+        "First name": "User",
+        "Last name": "Name",
+        "Organization": None,
+        "Tags": None,
+    }
+
+    There can be the same user, but with different challenge name. In this case, we want to merge the user data
+    and add the challenge name to the list of challenges.
+    """
+
+    # Create a dict with email as key and list of user dicts as value
+    user_dict = {}
+    for user in users:
+        # Remove obsolete keys ['Created on', 'Created by',  'Organization', 'Data security policy', 'Tags']
+        for k in ["Created on", "Created by", "Organization", "Data security policy", "Tags"]:
+            if k in user:
+                del user[k]
+        email = user["E-mail"]
+        if email not in user_dict:
+            user_dict[email] = user
+            user_dict[email]["Challenge"] = [user_dict[email]["Challenge"]]
+        else:
+            if user["Challenge"] not in user_dict[email]["Challenge"]:
+                user_dict[email]["Challenge"].append(user["Challenge"])
+
+    return list(user_dict.values())
+
+
+x = [
+    "CC-201: How can technology ease the process of setting up a bank account for foreigners",
+    "CC-202: How to support parents in online (sex)education and safety",
+    "CC-203: How to ensure accessible acceptance of a local payment system among "
+    "residents and local entrepreneurs in Amsterdam Nieuw",
+    "CC-204: How to encourage girls in Nieuw-West to take part in sports and " "exercise",
+    "CC-205: How to involve residents in a community savings and credit "
+    "cooperative that supports social initiatives in the city",
+    "CC-206: How to include deaf and hearing impaired in broadcasting information on public transport",
+    "CC-207: How to adapt an existing technological solution for a specific group",
+    "CC-208: How to strengthen the broad, positive health of youth through attractive and playful technology",
+    "CC-209: How can resilient role models help families to develop healthy relationships",
+    "CC-210: Wildcard – Propose any technological solution which contributes to a "
+    "breakthrough in intergenerational problems\t0\t0\tFalse",
+    "CC-211: How to reliably measure the digital skills of long-term unemployed citizens",
+    "CC-212: How to support the recognition of competence with help of a digital tool",
+    "CC-213: How to prevent pressure ulcers of wheelchair patients",
+    "CC-214: How to enhance the quality of life and foster inclusion for citizens "
+    "with severe disabilities through digital innovation",
+    "CC-215: How to collect and generate accessible pedestrian route information "
+    "through participatory data collection methods",
+    "CC-216: How to utilise existing data and data sources for activating " "digitally hard-to-reach residents",
+    "CC-217: How to reduce school absenteeism through an innovative and inclusive " "educational solution",
+    "CC-218: How to improve the thermal comfort and overall health in residential "
+    "buildings, focusing on passive strategies",
+    "CC-219: How to engage the citizens from socially and economically "
+    "disconnected localities in participatory planning",
+    "CC-220: How to better inform the public with limited access to digital tools " "about urban data analysis",
+]
+
+
+class Command(BaseCommand):
+    help = "Import user data from Podio excel file to Django"
+
+    def add_arguments(self, parser):
+        parser.add_argument("--filename", type=str, required=True)
+
+    def handle(self, *args, **options):
+        users = read_excel_sheet(options["filename"])
+        new_users = merge_duplicate_users(users)
+        # for u in users:
+        #     pprint(u)
+        #     break
+        for u in new_users:
+            # pprint(u)
+            u["ApplicationRounds"] = []
+            for c in u["Challenge"]:
+                # if "Breda" not in c:
+                #     continue
+                # split challenge name to city and title using ':'
+                city, title = [x.strip() for x in c.split(":")]
+                # Search for ApplicationRounds with name containing title's first 4 words
+                # Pick 4 first words from title:
+                words = title.split(" ")
+                # remove all words shorter than 3 characters
+                words = [w for w in words if len(w) >= 3][:4]
+
+                # create qset for filtering ApplicationRounds where name contains all words
+                ars = ApplicationRound.objects.filter(name__startswith="CC-2")
+                for w in words:
+                    ars = ars.filter(name__icontains=w)
+                # ars = ApplicationRound.objects.filter(name__contains=title.strip("?")[:20])
+                # print(ars)
+
+                if len(ars) != 1:
+                    print(f"ApplicationRound not found for {c}")
+                    print(f"Words: {words}")
+                    print(ars.query)
+                    exit()
+                else:
+                    u["ApplicationRounds"].append(ars[0])
+
+                # print(city, title, ars)
+            name, domain = u["E-mail"].lower().split("@")
+            name = re.sub(r"[aeiouAEIOU]", "y", name)
+            name = re.sub(r"[bcdfghjklmnpqrstvxz]", "x", name)
+            # korvaa vokaalit y-kirjaimella ja konsonantit x:llä
+            # name = re.sub(r"[aeiouy]", "y", name, re.IGNORECASE)
+            # name = re.sub(r"[bcdfghjklmnpqrstvxz.]", "x", name, re.IGNORECASE)
+
+            print(f"@{domain}", len(u["ApplicationRounds"]))
+            # if u["E-mail"] == "[email protected]":
+            #     pprint(u)
+            #     exit()
+            # break
+        # self.stdout.write(self.style.SUCCESS("{} challenges imported".format(len(challenges))))
+        # pprint(new_users)
+        # Create users
+        for u in new_users:
+            user, created = User.objects.get_or_create(username=u["E-mail"].lower())
+            if created:  # Set first_name and last_name only if user is created
+                user.first_name = u["First name"]
+                user.last_name = u["Last name"]
+                # Set random password
+                user.set_password(User.objects.make_random_password())
+            # Add ApplicationRounds to user
+            for ar in u["ApplicationRounds"]:
+                ar.evaluators.add(user)
+                print(ar.evaluators.all())
+            user.save()
+            print(user, created)
+            exit()
diff --git a/django_server/application_evaluator/migrations/0025_application_other_id.py b/django_server/application_evaluator/migrations/0025_application_other_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.4 on 2023-11-01 09:37
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('application_evaluator', '0024_applicationround_city_alter_application_name_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='application',
+            name='other_id',
+            field=models.CharField(blank=True, max_length=128),
+        ),
+    ]
diff --git a/django_server/application_evaluator/models.py b/django_server/application_evaluator/models.py
@@ -197,6 +197,7 @@ def organization(user):
 class Application(NamedModel):
     application_round = models.ForeignKey(ApplicationRound, related_name="applications", on_delete=models.CASCADE)
     application_id = models.CharField(max_length=64, blank=True)  # Application ID (18 char) from Salesforce CSV
+    other_id = models.CharField(max_length=128, blank=True)  # id generated by some other system
     evaluating_organizations = models.ManyToManyField(Organization, related_name="applications_to_evaluate", blank=True)
     description = description_field()
     approved_by = models.ForeignKey(