diff --git a/cleanco.py b/cleanco.py index d191e86..9574d45 100644 --- a/cleanco.py +++ b/cleanco.py @@ -10,7 +10,9 @@ class cleanco(): def __init__(self, business_name): - self.business_name = business_name + # always do non-visible cleanup, but store the original just in case + self.business_name = ' '.join(business_name.split()) + self._original = business_name # Sorted business types / abbreviation by length of business type sorted_types = [] @@ -66,21 +68,39 @@ def end_strip(self, a_set): else: return None - # A clean version of the business name - def clean_name(self): - business_name = self.business_name + def clean_name(self, suffix=True, prefix=False, middle=False, multi=False): + "return cleared version of the business name" + + name = self.business_name + + # return name without suffixed/prefixed/middle type term(s) - # Get rid of country items once for item in self.suffix_sort: - if ((business_name.lower()).endswith(" " + item)): - start = (business_name.lower()).find(item) - end = len(item) - business_name = business_name[0:-end] - business_name = self.string_stripper(business_name) - break - - return self.string_stripper(business_name) + if suffix: + if name.lower().endswith(" " + item): + start = name.lower().find(item) + end = len(item) + name = name[0:-end-1] + name = self.string_stripper(name) + if multi==False: + break + if prefix: + if name.lower().startswith(item+' '): + name = name[len(item)+1:] + if multi==False: + break + if middle: + term = ' ' + item + ' ' + if term in name.lower(): + start = name.lower().find(term) + end = start + len(term) + name = name[:start] + " " + name[end:] + if multi==False: + break + + return self.string_stripper(name) + def type(self): self.type = self.end_strip(self.sorted_types) diff --git a/tests/test_cleanname.py b/tests/test_cleanname.py index e115b1b..0e71bad 100644 --- a/tests/test_cleanname.py +++ b/tests/test_cleanname.py @@ -4,9 +4,12 @@ # Tests that demonstrate stuff is stripped away basic_cleanup_tests = { - "name with suffix": "Hello World Oy", - "name w/ ', ltd.'": "Hello World, ltd.", - "name with extra ws": "Hello World ltd", + "name w/ suffix": "Hello World Oy", + "name w/ ', ltd.'": "Hello World, ltd.", + "name w/ ws suffix ws": "Hello World ltd", + "name w/ suffix ws": "Hello World ltd ", + "name w/ suffix dot ws": "Hello World ltd. ", + "name w/ ws suffix dot ws": " Hello World ltd. ", } def test_basic_cleanups(): @@ -15,6 +18,22 @@ def test_basic_cleanups(): for testname, variation in basic_cleanup_tests.items(): assert cleanco(variation).clean_name() == expected, errmsg % testname +multi_cleanup_tests = { + "name + suffix": "Hello World Oy", + "name + two suffix": "Hello World Ab Oy", + "prefix + name": "Oy Hello World", + "prefix + name + suffix": "Oy Hello World Ab", + "name w/ term in middle": "Hello Oy World", + "name w/ mid + suffix": "Hello Oy World Ab" +} + +def test_multi_type_cleanups(): + expected = "Hello World" + errmsg = "cleanup of %s failed" + for testname, variation in multi_cleanup_tests.items(): + result = cleanco(variation).clean_name(prefix=True, suffix=True, middle=True, multi=True) + assert result == expected, errmsg % testname + # Tests that demonstrate organization name is kept intact