Skip to content

Commit

Permalink
Allow a PI's institution name to be search with subdomains
Browse files Browse the repository at this point in the history
Now, for PIs whose emails contain multiple subdomains (i.e `[email protected]`),
each of their subdomains will be matched against institute_map.json,
from the most qualified to least qualified subdomain.

In the example of `[email protected]`, this means `b.c.edu` would be matched first,
then `c.edu`, then `edu`
  • Loading branch information
QuanMPhm committed Jun 11, 2024
1 parent 0400557 commit 5e86c9e
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 34 deletions.
9 changes: 0 additions & 9 deletions process_report/institute_map.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@
"dfci.harvard.edu" : "Dana-Farber Cancer Institute",
"bwh.harvard.edu" : "Brigham and Women's Hospital",
"bidmc.harvard.edu" : "Beth Israel Deaconess Medical Center",
"fas.harvard.edu" : "Harvard University",
"cga.harvard.edu" : "Harvard University",
"iq.harvard.edu" : "Harvard University",
"hks.harvard.edu" : "Harvard University",
"hsph.harvard.edu" : "Harvard University",
"seas.harvard.edu" : "Harvard University",
"gse.harvard.edu" : "Harvard University",
"gov.harvard.edu" : "Harvard University",
"oeb.harvard.edu" : "Harvard University",
"harvard.edu" : "Harvard University",
"wpi.edu" : "Worcester Polytechnic Institute",
"mit.edu" : "Massachusetts Institute of Technology",
Expand Down
7 changes: 5 additions & 2 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@


def get_institution_from_pi(institute_map, pi_uname):
institution_key = pi_uname.split("@")[-1]
institution_name = institute_map.get(institution_key, "")
institution_domain = pi_uname.split("@")[-1]
for i in range(institution_domain.count(".") + 1):
if institution_name := institute_map.get(institution_domain, ""):
break
institution_domain = institution_domain[institution_domain.find(".") + 1 :]

if institution_name == "":
print(f"Warning: PI name {pi_uname} does not match any institution!")
Expand Down
36 changes: 13 additions & 23 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,29 +234,19 @@ def test_get_pi_institution(self):
"northeastern.edu": "Northeastern University",
}

self.assertEqual(
process_report.get_institution_from_pi(institute_map, "[email protected]"),
"Boston University",
)
self.assertEqual(
process_report.get_institution_from_pi(
institute_map, "[email protected]"
),
"McLean Hospital",
)
self.assertEqual(
process_report.get_institution_from_pi(institute_map, "[email protected]"),
"Harvard University",
)
self.assertEqual(
process_report.get_institution_from_pi(institute_map, "fake"), ""
)
self.assertEqual(
process_report.get_institution_from_pi(
institute_map, "[email protected]"
),
"Northeastern University",
)
answers = {
"[email protected]": "Boston University",
"[email protected]": "McLean Hospital",
"[email protected]": "Harvard University",
"e@edu": "",
"[email protected]": "Northeastern University",
"[email protected]": "Harvard University",
}

for pi_email, answer in answers.items():
self.assertEqual(
process_report.get_institution_from_pi(institute_map, pi_email), answer
)


class TestAlias(TestCase):
Expand Down

0 comments on commit 5e86c9e

Please sign in to comment.