Skip to content

Commit

Permalink
Add application importer mananement command
Browse files Browse the repository at this point in the history
  • Loading branch information
aapris committed Nov 2, 2023
1 parent 10c135e commit 717453b
Show file tree
Hide file tree
Showing 4 changed files with 387 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Create management command to import data from Podio excel file to Django
import glob
from pathlib import Path

import openpyxl
from django.core.files import File

from django.core.management.base import BaseCommand
from application_evaluator.models import Application, ApplicationRound, ApplicationAttachment


# fields contains list of fields to be imported from excel file to Django
# first value is heading level (0 not shown in description), original title in excel, new title description field
# 0==ignore/>=1==include (value means heading level), original title, new title
fields = [
[4, "Title of the application", "Application Title"],
[4, "Publishable summary of the application. (max. 1000 characters with spaces)", "Summary"],
[0, "Select city", "City"],
[4, "Select the challenge", "Challenge"],
[0, "Organisation", "Organization"],
[0, "Business ID (if you are individual citizen, just mark XX)", "Business ID"],
[0, "First name", "First Name"],
[0, "Last name", "Last Name"],
[0, "E-mail address", "Email"],
[0, "Phone number", "Phone Number"],
[
0,
"List here organisations and contact persons of any Piloting Partners",
"Piloting Partners (Organizations and Contacts)",
],
[
4,
"Short description of the application (incl. target groups, objectives, technologies and methods) "
"and expected impact. (max. 5 000 characters with spaces)",
"Application Description",
],
[
4,
"Work plan including preparatory actions, piloting plan, scale-up and replicability activities as well as "
"sustainability of the application. (max. 5 000 characters with spaces)",
"Work Plan",
],
[
4,
"Resource plan including personnel, budget and possible self-funding. (max. 3 000 characters with spaces)",
"Resource Plan (Personnel, Budget, Self-funding)",
],
[4, "Description of the co-creation methods applied. (max. 3 000 characters with spaces)", "Co-creation Methods"],
[4, "Plan for involving target group members. (max. 3 000 characters with spaces)", "Involvement of Target Group"],
[6, "In addition to Helsinki, you can offer your solution to", "Offer Solution to Helsinki"],
[6, "In addition to Amsterdam, you can offer your solution to", "Offer Solution to Amsterdam"],
[6, "Choose your budget for the Amsterdam pilot", "Budget for Amsterdam Pilot"],
[6, "Piloting plan for the Amsterdam pilot. (max. 3 000 characters)", "Piloting Plan for Amsterdam"],
[6, "Choose your budget for the Porto pilot", "Budget for Porto Pilot"],
[6, "Piloting plan for the Porto pilot. (max. 3 000 characters)", "Piloting Plan for Porto"],
[6, "Choose your budget for the Tallinn pilot", "Budget for Tallinn Pilot"],
[6, "Piloting plan for the Tallinn pilot. (max. 3 000 characters)", "Piloting Plan for Tallinn"],
[6, "Choose your budget for the Prague pilot", "Budget for Prague Pilot"],
[6, "Piloting plan for the Prague pilot. (max. 3 000 characters)", "Piloting Plan for Prague"],
[0, "Data privacy", "Data Privacy"],
[0, "GDPR", "GDPR Compliance"],
[0, "Ethics", "Ethical Considerations"],
[0, "I accept storing of personal information", "Accept Storing Personal Information"],
[0, "Data security policy", "Data Security Policy"],
[0, "Application ID", "Application ID"],
[0, "Submit date and time", "Submission Date and Time"],
]


def get_application_round_from_challenge_name(challenge_name: str) -> ApplicationRound:
"""
Get ApplicationRound from challenge name.
Challenge name is like "Helsinki: How to do foo bar?"
"""
# split challenge name to city and title using ':'
city, name = [x.strip() for x in challenge_name.split(":")]
# Search for ApplicationRounds with name containing title's first 4 words
# Pick 4 first words from title:
words = name.split(" ")
# remove all words shorter than 3 characters
words = [w for w in words if len(w) >= 3][:4]
# create qset for filtering ApplicationRounds where name contains all words
ars = ApplicationRound.objects.filter(name__startswith="CC-2")
for w in words:
ars = ars.filter(name__icontains=w)
if len(ars) != 1:
print(f"ApplicationRound not found for {challenge_name}")
print(f"Words: {words}")
print(ars.query)
exit()
else:
return ars[0]


def create_id_name_descriptions(app: dict) -> [str, str, str]:
"""Create id, name and description from application data."""
app_id = app["Application ID"]
name = app["Application Title"]
dlist = []
for f in fields:
if f[0] > 0:
# Loop all fields and add to dlist lines like
dlist.append("#" * f[0] + f" {f[2]}\n\n{app[f[2]]}")
description = "\n\n".join(dlist)
return app_id, name, description


def read_excel_sheet(filename: str) -> list:
"""Read Excel sheet and return it as a list of dicts."""
wb = openpyxl.load_workbook(filename)
sheet = wb.worksheets[0]
data_list = []

for row in sheet.iter_rows(min_row=1, values_only=True):
if any(value is not None for value in row): # Skip empty rows
data_list.append(row)

column_names = data_list.pop(0)
# create list of dicts
data_list = [dict(zip(column_names, row)) for row in data_list]
# remove items with None values
# NOTE: this doesn't work because some fields are empty strings in excel
# data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list]
return data_list


def import_attachments(app: Application, filename: str):
"""Add attachments to Application object."""
filepath = Path(filename)
# Check if exact filename exists in app.attachments
# If it does, skip
# If it doesn't, create ApplicationAttachment object and add it to app.attachments
for a in app.attachments.all():
print(a.attachment.name, filepath.name)
if Path(a.attachment.name).name == filepath.name:
print(f"Attachment {filepath.name} already exists, skipping")
return
with open(filename, "rb") as f:
attachment = ApplicationAttachment.objects.create(
application=app,
attachment=File(f),
name=filepath.name,
)
attachment.save()
print(attachment)


def create_application(application_round: ApplicationRound, app: dict) -> [Application, bool]:
"""Create Application object from app dict."""
app_id, name, description = create_id_name_descriptions(app)
application, created = Application.objects.get_or_create(other_id=app_id, application_round=application_round)
application.name = name
application.description = description
application.application_round = application_round
application.save()
return application, created


class Command(BaseCommand):
help = "Import data from Podio excel file to Django"

def add_arguments(self, parser):
parser.add_argument("--filename", type=str, help="Excel file to import")
parser.add_argument("--attachments-dir", type=str, required=False, help="Directory containing attachments")

def handle(self, *args, **options):
applications = read_excel_sheet(options["filename"])
# Replace long column names with short ones. Long column name is fields[][1], short is fields[][2]
applications = [{f[2]: a[f[1]] for f in fields} for a in applications]
new_app_cnt = 0
existing_app_cnt = 0
attachment_cnt = 0
for a in applications:
# if a["Application ID"] != "CC-2HE11-e7aad100-a2c4-98b7-1698756037":
# continue
ar = get_application_round_from_challenge_name(a["Challenge"])
app, created = create_application(ar, a)
if created:
new_app_cnt += 1
print(f"New application: {app}")
else:
existing_app_cnt += 1
print(f"Existing application: {app}")
# Check if there is a subdirectory with same name as app.other_id
# If there is, import all files from that directory as attachments
if options["attachments_dir"]:
dirname = options["attachments_dir"]
for subdir in glob.glob(f"{dirname}/{app.other_id}/*"):
import_attachments(app, subdir)
attachment_cnt += 1
print("----")
print(f"New applications: {new_app_cnt}")
print(f"Existing applications: {existing_app_cnt}")
print(f"Attachments: {attachment_cnt}")
# self.stdout.write(self.style.SUCCESS("jee hyvinhän se meni!"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Create management command to import challenge texts gathered from a web page to Django
import re

from django.contrib.auth.models import User
from django.core.management.base import BaseCommand

# import excel module
import openpyxl


from application_evaluator.models import ApplicationRound


def read_excel_sheet(filename: str) -> list:
"""Read Excel sheet and return it as a list of dicts."""
wb = openpyxl.load_workbook(filename)
sheet = wb.worksheets[0]
data_list = []

for row in sheet.iter_rows(min_row=1, values_only=True):
if any(value is not None for value in row): # Skip empty rows
data_list.append(row)

column_names = data_list.pop(0)
# create list of dicts
data_list = [dict(zip(column_names, row)) for row in data_list]
# remove items with None values
# NOTE: this doesn't work because some fields are empty strings in excel
# data_list = [{k: v for k, v in d.items() if v is not None} for d in data_list]
return data_list


def merge_duplicate_users(users: list) -> list:
"""
Merge duplicate users from Podio export. Example user dict:
user = {
"Challenge": "Cityname: How to do foo bar?",
"Created by": "CommuniCity podio app",
"Created on": datetime.datetime(2023, 10, 30, 9, 48, 13),
"Data security policy": "I have read the data security policy...",
"E-mail": "[email protected]",
"Evaluation criteria": "Impact; Excellence; Implementation; Co-creation",
"First name": "User",
"Last name": "Name",
"Organization": None,
"Tags": None,
}
There can be the same user, but with different challenge name. In this case, we want to merge the user data
and add the challenge name to the list of challenges.
"""

# Create a dict with email as key and list of user dicts as value
user_dict = {}
for user in users:
# Remove obsolete keys ['Created on', 'Created by', 'Organization', 'Data security policy', 'Tags']
for k in ["Created on", "Created by", "Organization", "Data security policy", "Tags"]:
if k in user:
del user[k]
email = user["E-mail"]
if email not in user_dict:
user_dict[email] = user
user_dict[email]["Challenge"] = [user_dict[email]["Challenge"]]
else:
if user["Challenge"] not in user_dict[email]["Challenge"]:
user_dict[email]["Challenge"].append(user["Challenge"])

return list(user_dict.values())


x = [
"CC-201: How can technology ease the process of setting up a bank account for foreigners",
"CC-202: How to support parents in online (sex)education and safety",
"CC-203: How to ensure accessible acceptance of a local payment system among "
"residents and local entrepreneurs in Amsterdam Nieuw",
"CC-204: How to encourage girls in Nieuw-West to take part in sports and " "exercise",
"CC-205: How to involve residents in a community savings and credit "
"cooperative that supports social initiatives in the city",
"CC-206: How to include deaf and hearing impaired in broadcasting information on public transport",
"CC-207: How to adapt an existing technological solution for a specific group",
"CC-208: How to strengthen the broad, positive health of youth through attractive and playful technology",
"CC-209: How can resilient role models help families to develop healthy relationships",
"CC-210: Wildcard – Propose any technological solution which contributes to a "
"breakthrough in intergenerational problems\t0\t0\tFalse",
"CC-211: How to reliably measure the digital skills of long-term unemployed citizens",
"CC-212: How to support the recognition of competence with help of a digital tool",
"CC-213: How to prevent pressure ulcers of wheelchair patients",
"CC-214: How to enhance the quality of life and foster inclusion for citizens "
"with severe disabilities through digital innovation",
"CC-215: How to collect and generate accessible pedestrian route information "
"through participatory data collection methods",
"CC-216: How to utilise existing data and data sources for activating " "digitally hard-to-reach residents",
"CC-217: How to reduce school absenteeism through an innovative and inclusive " "educational solution",
"CC-218: How to improve the thermal comfort and overall health in residential "
"buildings, focusing on passive strategies",
"CC-219: How to engage the citizens from socially and economically "
"disconnected localities in participatory planning",
"CC-220: How to better inform the public with limited access to digital tools " "about urban data analysis",
]


class Command(BaseCommand):
help = "Import user data from Podio excel file to Django"

def add_arguments(self, parser):
parser.add_argument("--filename", type=str, required=True)

def handle(self, *args, **options):
users = read_excel_sheet(options["filename"])
new_users = merge_duplicate_users(users)
# for u in users:
# pprint(u)
# break
for u in new_users:
# pprint(u)
u["ApplicationRounds"] = []
for c in u["Challenge"]:
# if "Breda" not in c:
# continue
# split challenge name to city and title using ':'
city, title = [x.strip() for x in c.split(":")]
# Search for ApplicationRounds with name containing title's first 4 words
# Pick 4 first words from title:
words = title.split(" ")
# remove all words shorter than 3 characters
words = [w for w in words if len(w) >= 3][:4]

# create qset for filtering ApplicationRounds where name contains all words
ars = ApplicationRound.objects.filter(name__startswith="CC-2")
for w in words:
ars = ars.filter(name__icontains=w)
# ars = ApplicationRound.objects.filter(name__contains=title.strip("?")[:20])
# print(ars)

if len(ars) != 1:
print(f"ApplicationRound not found for {c}")
print(f"Words: {words}")
print(ars.query)
exit()
else:
u["ApplicationRounds"].append(ars[0])

# print(city, title, ars)
name, domain = u["E-mail"].lower().split("@")
name = re.sub(r"[aeiouAEIOU]", "y", name)
name = re.sub(r"[bcdfghjklmnpqrstvxz]", "x", name)
# korvaa vokaalit y-kirjaimella ja konsonantit x:llä
# name = re.sub(r"[aeiouy]", "y", name, re.IGNORECASE)
# name = re.sub(r"[bcdfghjklmnpqrstvxz.]", "x", name, re.IGNORECASE)

print(f"@{domain}", len(u["ApplicationRounds"]))
# if u["E-mail"] == "[email protected]":
# pprint(u)
# exit()
# break
# self.stdout.write(self.style.SUCCESS("{} challenges imported".format(len(challenges))))
# pprint(new_users)
# Create users
for u in new_users:
user, created = User.objects.get_or_create(username=u["E-mail"].lower())
if created: # Set first_name and last_name only if user is created
user.first_name = u["First name"]
user.last_name = u["Last name"]
# Set random password
user.set_password(User.objects.make_random_password())
# Add ApplicationRounds to user
for ar in u["ApplicationRounds"]:
ar.evaluators.add(user)
print(ar.evaluators.all())
user.save()
print(user, created)
exit()
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.4 on 2023-11-01 09:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('application_evaluator', '0024_applicationround_city_alter_application_name_and_more'),
]

operations = [
migrations.AddField(
model_name='application',
name='other_id',
field=models.CharField(blank=True, max_length=128),
),
]
1 change: 1 addition & 0 deletions django_server/application_evaluator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def organization(user):
class Application(NamedModel):
application_round = models.ForeignKey(ApplicationRound, related_name="applications", on_delete=models.CASCADE)
application_id = models.CharField(max_length=64, blank=True) # Application ID (18 char) from Salesforce CSV
other_id = models.CharField(max_length=128, blank=True) # id generated by some other system
evaluating_organizations = models.ManyToManyField(Organization, related_name="applications_to_evaluate", blank=True)
description = description_field()
approved_by = models.ForeignKey(
Expand Down

0 comments on commit 717453b

Please sign in to comment.