diff --git a/Dockerfile b/Dockerfile index 4c464fdf..d250560a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN curl -sL https://deb.nodesource.com/setup_12.x | bash - # # Read more on Dockerfile best practices at the source: # https://docs.docker.com/develop/develop-images/dockerfile_best-practices -RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs +RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs npm # Inside the container, create an app directory and switch into it RUN mkdir /app diff --git a/manage.py b/manage.py index a54bfb13..a36cb464 100755 --- a/manage.py +++ b/manage.py @@ -5,7 +5,7 @@ def main(): - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'parserator_web.settings') + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "parserator_web.settings") try: from django.core.management import execute_from_command_line except ImportError as exc: @@ -17,5 +17,5 @@ def main(): execute_from_command_line(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/parserator_web/apps.py b/parserator_web/apps.py index 02d93a45..bfdebb6e 100644 --- a/parserator_web/apps.py +++ b/parserator_web/apps.py @@ -2,4 +2,4 @@ class DjangoAppConfig(AppConfig): - name = 'parserator_web' + name = "parserator_web" diff --git a/parserator_web/asgi.py b/parserator_web/asgi.py index 099ca854..967bd250 100644 --- a/parserator_web/asgi.py +++ b/parserator_web/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'parserator_web.settings') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "parserator_web.settings") application = get_asgi_application() diff --git a/parserator_web/management/commands/clear_cache.py b/parserator_web/management/commands/clear_cache.py index 94e6c815..86fa6f8b 100644 --- a/parserator_web/management/commands/clear_cache.py +++ b/parserator_web/management/commands/clear_cache.py @@ -6,15 +6,14 @@ class Command(BaseCommand): """A simple management command which clears the site-wide cache.""" - help = 'Fully clear your site-wide cache.' + + help = "Fully clear your site-wide cache." def handle(self, *args, **kwargs): try: assert settings.CACHES except AttributeError: - raise CommandError( - 'No cache configured. Check CACHES in settings.py.' - ) + raise CommandError("No cache configured. Check CACHES in settings.py.") cache.clear() - self.stdout.write('Successfully cleared the cache.') + self.stdout.write("Successfully cleared the cache.") diff --git a/parserator_web/settings.py b/parserator_web/settings.py index e4614696..5e473a1c 100644 --- a/parserator_web/settings.py +++ b/parserator_web/settings.py @@ -9,6 +9,7 @@ For the full list of settings and their values, see https://docs.djangoproject.com/en/3.0/ref/settings/ """ + import os import dj_database_url @@ -17,78 +18,78 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Retrieve the secret key from the DJANGO_SECRET_KEY environment variable -SECRET_KEY = os.environ['DJANGO_SECRET_KEY'] +SECRET_KEY = os.environ["DJANGO_SECRET_KEY"] # Set the DJANGO_DEBUG environment variable to False to disable debug mode -DEBUG = False if os.getenv('DJANGO_DEBUG', True) == 'False' else True +DEBUG = False if os.getenv("DJANGO_DEBUG", True) == "False" else True # Define DJANGO_ALLOWED_HOSTS as a comma-separated list of valid hosts, # e.g. localhost,127.0.0.1,.herokuapp.com -allowed_hosts = os.getenv('DJANGO_ALLOWED_HOSTS', []) -ALLOWED_HOSTS = allowed_hosts.split(',') if allowed_hosts else [] +allowed_hosts = os.getenv("DJANGO_ALLOWED_HOSTS", []) +ALLOWED_HOSTS = allowed_hosts.split(",") if allowed_hosts else [] # Application definition INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'parserator_web' + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "parserator_web", ] MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'whitenoise.middleware.WhiteNoiseMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', + "django.middleware.security.SecurityMiddleware", + "whitenoise.middleware.WhiteNoiseMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", ] -ROOT_URLCONF = 'parserator_web.urls' +ROOT_URLCONF = "parserator_web.urls" TEMPLATES = [ { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': ['templates/'], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": ["templates/"], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", ], }, }, ] -WSGI_APPLICATION = 'parserator_web.wsgi.application' +WSGI_APPLICATION = "parserator_web.wsgi.application" # Database # https://docs.djangoproject.com/en/3.0/ref/settings/#databases DATABASES = {} -DATABASES['default'] = dj_database_url.parse( - os.getenv('DATABASE_URL', 'postgres://postgres:postgres@postgres:5432/parserator'), +DATABASES["default"] = dj_database_url.parse( + os.getenv("DATABASE_URL", "postgres://postgres:postgres@postgres:5432/parserator"), conn_max_age=600, - ssl_require=True if os.getenv('POSTGRES_REQUIRE_SSL') else False + ssl_require=True if os.getenv("POSTGRES_REQUIRE_SSL") else False, ) # Caching # https://docs.djangoproject.com/en/3.0/topics/cache/ -cache_backend = 'dummy.DummyCache' if DEBUG is True else 'db.DatabaseCache' +cache_backend = "dummy.DummyCache" if DEBUG is True else "db.DatabaseCache" CACHES = { - 'default': { - 'BACKEND': f'django.core.cache.backends.{cache_backend}', - 'LOCATION': 'site_cache', + "default": { + "BACKEND": f"django.core.cache.backends.{cache_backend}", + "LOCATION": "site_cache", } } @@ -97,25 +98,25 @@ AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] # Internationalization # https://docs.djangoproject.com/en/3.0/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = "en-us" -TIME_ZONE = 'UTC' +TIME_ZONE = "UTC" USE_I18N = True @@ -126,18 +127,18 @@ # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.2/howto/static-files/ -STATIC_URL = '/static/' -STATIC_ROOT = '/static' +STATIC_URL = "/static/" +STATIC_ROOT = "/static" STATICFILES_STORAGE = os.getenv( - 'DJANGO_STATICFILES_STORAGE', - 'whitenoise.storage.CompressedManifestStaticFilesStorage' + "DJANGO_STATICFILES_STORAGE", + "whitenoise.storage.CompressedManifestStaticFilesStorage", ) STATICFILES_FINDERS = ( - 'django.contrib.staticfiles.finders.FileSystemFinder', - 'django.contrib.staticfiles.finders.AppDirectoriesFinder', + "django.contrib.staticfiles.finders.FileSystemFinder", + "django.contrib.staticfiles.finders.AppDirectoriesFinder", ) # Enforce SSL in production if DEBUG is False: - SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') + SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") SECURE_SSL_REDIRECT = True diff --git a/parserator_web/templates/parserator_web/index.html b/parserator_web/templates/parserator_web/index.html index a72d9c80..56156e42 100644 --- a/parserator_web/templates/parserator_web/index.html +++ b/parserator_web/templates/parserator_web/index.html @@ -11,7 +11,7 @@

U.S. addres

Dealing with some messy or unstructured addresses? We can parse them for you.

Try it out! Parse an address in the United States into fields like AddressNumber, StreetName and ZipCode.

-
+ {% csrf_token %} @@ -21,7 +21,7 @@

U.S. addres +

diff --git a/parserator_web/urls.py b/parserator_web/urls.py index ce5f2b97..39a189e3 100644 --- a/parserator_web/urls.py +++ b/parserator_web/urls.py @@ -3,6 +3,6 @@ from parserator_web import views urlpatterns = [ - path('', views.Home.as_view(), name='home'), - path('api/parse/', views.AddressParse.as_view(), name='address-parse') + path("", views.Home.as_view(), name="home"), + path("api/parse/", views.AddressParse.as_view(), name="address-parse"), ] diff --git a/parserator_web/views.py b/parserator_web/views.py index 0be3f4a9..0fb54194 100644 --- a/parserator_web/views.py +++ b/parserator_web/views.py @@ -1,24 +1,74 @@ import usaddress +from typing import Tuple, Dict from django.views.generic import TemplateView from rest_framework.views import APIView from rest_framework.response import Response from rest_framework.renderers import JSONRenderer from rest_framework.exceptions import ParseError +import logging +# Logging configuration +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) class Home(TemplateView): - template_name = 'parserator_web/index.html' + template_name = "parserator_web/index.html" class AddressParse(APIView): renderer_classes = [JSONRenderer] - def get(self, request): - # TODO: Flesh out this method to parse an address string using the - # parse() method and return the parsed components to the frontend. - return Response({}) + def get(self, request) -> Response: - def parse(self, address): - # TODO: Implement this method to return the parsed components of a - # given address using usaddress: https://github.com/datamade/usaddress + + # Obtain request from search + address = request.query_params.get("address", "") + if not address: + logging.info("Address not received") + return Response({"error": "No address provided"}, status=400) + + # Parse input address + try: + address_components, address_type = self.parse(address) + except usaddress.RepeatedLabelError as e: + logging.info(f"Parsing address {address} not succesful: {e}") + return Response( + {"error": f"Not possible to parse address: {str(e)}"}, + status=400, + ) + except ParseError as e: + logging.info(f"Parsing address {address} not succesful: {e}") + return Response( + {"error": f"Not possible to parse address: {str(e)}"}, + status=400, + ) + except Exception as e: + return Response( + {"error": f"Not possible to parse address: {str(e)}"}, + status=400, + ) + + # Compile results + data = { + "input_string": address, + "address_components": address_components, + "address_type": address_type, + } + + return Response(data) + + def parse(self, address: str) -> Tuple[Dict, str]: + """ + From a valid string, it returns the parsed address components and address + type using the usaaddress library + Input: + - address (str): Address to be parsed + Return: + - address_componenets (dict): Dictionary with tag as the key and its + corresponding value as the value + - address_type (str): Type of address + """ + + address_components, address_type = usaddress.tag(address) return address_components, address_type diff --git a/parserator_web/wsgi.py b/parserator_web/wsgi.py index 91725eb8..103bb1af 100644 --- a/parserator_web/wsgi.py +++ b/parserator_web/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'parserator_web.settings') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "parserator_web.settings") application = get_wsgi_application() diff --git a/tests/test_views.py b/tests/test_views.py index bfd5d0b7..b2df68b8 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -1,15 +1,77 @@ import pytest +from http import HTTPStatus +API_ENDPOINT = "/api/parse/" -def test_api_parse_succeeds(client): - # TODO: Finish this test. Send a request to the API and confirm that the - # data comes back in the appropriate format. - address_string = '123 main st chicago il' - pytest.fail() +# Although test should have same behavior since it heavily depends on +# the usaaddress library, added tests for different types of address +# to test correct functionality of the API +@pytest.mark.parametrize( + "address,parsed_components,address_type", + [ + ( + "123 main st chicago il", + { + "AddressNumber": "123", + "StreetName": "main", + "StreetNamePostType": "st", + "PlaceName": "chicago", + "StateName": "il", + }, + "Street Address", + ), # Example case + ( + "123 main ave chicago il", + { + "AddressNumber": "123", + "StreetName": "main", + "StreetNamePostType": "ave", + "PlaceName": "chicago", + "StateName": "il", + }, + "Street Address", + ), # Ave instead of St + ( + "123 main st chicago il 60615", + { + "AddressNumber": "123", + "StreetName": "main", + "StreetNamePostType": "st", + "PlaceName": "chicago", + "StateName": "il", + "ZipCode": "60615", + }, + "Street Address", + ), # Example case + Zip Code + # TODO: Add tests for intersection or other address types + ], +) +def test_api_parse_succeeds(client, address, parsed_components, address_type): -def test_api_parse_raises_error(client): - # TODO: Finish this test. The address_string below will raise a - # RepeatedLabelError, so ParseAddress.parse() will not be able to parse it. - address_string = '123 main st chicago il 123 main st' - pytest.fail() + api_input = {"address": address} + + response = client.get(API_ENDPOINT, api_input) + address_response_type = response.json()["address_type"] + address_response_components = response.json()["address_components"] + + assert response.status_code == HTTPStatus.OK + assert address_response_type == address_type + assert address_response_components == parsed_components + + +@pytest.mark.parametrize( + "address", + [ + ("123 main st chicago il 123 main st"), # Repeated street + ("123 main st chicago il chicago il"), # Repeated city+state + ], +) +def test_api_parse_raises_error(client, address): + + api_input = {"address": address} + response = client.get(API_ENDPOINT, api_input) + error = response.json()["error"] + + assert error is not None + assert response.status_code == 400