implemented pydocstyle linter, updated docstrings, setup pydocstyle g…

…ithub lint stage
wayfair-incubator · Feb 18, 2021 · 5108c98 · 5108c98
1 parent c604642
commit 5108c98
Show file tree

Hide file tree

Showing 17 changed files with 141 additions and 103 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 max-complexity = 8
-ignore = E203,W503,E501
+ignore = E203,W503,E501,W293
 builtins = unicode
 tee = True
 exclude = venv,env
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -89,6 +89,22 @@ jobs:
 
         - name: Run mypy
           run: mypy extra_model tests
+
+  pydocstyle:
+    runs-on: ubuntu-latest
+    steps:
+        - name: Check out code
+          uses: actions/checkout@v2
+        - uses: actions/setup-python@v2
+          with:
+            python-version: ${{ env.PYTHON_VERSION }}
+        - name: Install dependencies
+          uses: ./.github/actions/install-dependencies
+          with:
+            test-requirements: "true"
+
+        - name: Run pydocstyle
+          run: pydocstyle extra_model
 
   test:
     runs-on: ubuntu-latest

diff --git a/docker/run_tests.sh b/docker/run_tests.sh
@@ -51,3 +51,6 @@ flake8 extra_model tests
 
 echo "Running bandit..."
 bandit --ini .bandit --quiet -r extra_model
+
+echo "Running pydocstyle..."
+pydocstyle extra_model
diff --git a/extra_model/__init__.py b/extra_model/__init__.py
@@ -1,5 +1,5 @@
-__version__ = "0.0.1"
+"""Extra model public objects."""
 
+from extra_model._models import ExtraModel, extra_factory  # noqa
 
-def example():
-    return "hello, world"
+__version__ = "0.1.0"
diff --git a/extra_model/_adjectives.py b/extra_model/_adjectives.py
@@ -1,4 +1,5 @@
-"""cluster adjectives and extract sentiment"""
+"""Cluster adjectives and extract sentiment."""
+
 from collections import Counter
 
 import numpy as np
@@ -8,8 +9,10 @@
 
 
 def cluster_adjectives(adjective_counts, vectorizer):  # noqa: C901
-    """cluster adjectives based on a constant radius clustering algorithm
-    technical implementation uses a scikitlearn BallTree
+    """Cluster adjectives based on a constant radius clustering algorithm.
+    
+    Technical implementation uses a scikitlearn BallTree.
+    
     :param adjective_counts: dictionary with adjectives and their counts
     :type adjective_counts: [(str,int)]
     :param vectorizer:  provide embeddings to evaluate adjective similarity
@@ -114,9 +117,11 @@ def cluster_adjectives(adjective_counts, vectorizer):  # noqa: C901
 
 
 def fill_sentiment_dict(adjective_counts):
-    """given a dictionary with adjectives and their counts, will compute
-    the sentiment of each of the adjectives using the VADER sentiment analysis package
+    """Given a dictionary with adjectives and their counts, will compute.
+    
+    The sentiment of each of the adjectives using the VADER sentiment analysis package
     and return a dictionary of the adjectives and their sentiments.
+    
     :param adjective_counts: dictionary with adjectives and their counts
     :type adjective_counts: dict
     :return: dictionary, where the keys are the adjectives and the values are tuples of the
@@ -137,7 +142,8 @@ def fill_sentiment_dict(adjective_counts):
 
 
 def sentiments_from_adjectives(adjective_counts, sentiment_dict):
-    """build the weighted average sentiment score from a list of adjetives and their counts
+    """Build the weighted average sentiment score from a list of adjetives and their counts.
+    
     :param adjective_counts: list of tuples with adjectives and their counts
     :type adjective_counts: [(str,int)]
     :param sentiment_dict: dictionary with adjectives and their sentiment, as tuple of compound and binary sentiment
@@ -163,9 +169,12 @@ def sentiments_from_adjectives(adjective_counts, sentiment_dict):
 
 
 def adjective_info(dataframe_topics, dataframe_aspects, vectorizer):
-    """Add adjective related information to the dataframes, this has two facets:
+    """Add adjective related information to the dataframes.
+    
+    This has two facets:
     -> for each topic cluster similar adjectives, to get a more abstract/readable list
     -> for each topic, use the adjectives to come up with a sentiment classification
+    
     :param dataframe_topics: the dataframe with the topics we want to enrich, needs to have a collum `rawterms`
     :type dataframe_topics: :class:`pandas.DataFrame`
     :param dataframe_aspects: the dataframe with the aspect instances and related adjectives with columsn `aspect` and `descriptor`

diff --git a/extra_model/_aspects.py b/extra_model/_aspects.py
@@ -14,8 +14,8 @@
 
 
 def compound_noun_list(token):
-    """
-    Find compound nouns
+    """Find compound nouns.
+    
     :param token: token for which to generate potential compound nouns
     :type token: :class:`spacy.token`
     :return: list of potential compounds
@@ -32,8 +32,8 @@ def compound_noun_list(token):
 
 
 def acomp_list(tokens):
-    """
-    Find descriptions for a given token
+    """Find descriptions for a given token.
+    
     :param tokens: list of tokens that are children of the head of the nount for which descriptions are searched.
     :type tokens: [:class:`spacy.token`]
     :return: list of adjectives
@@ -54,8 +54,8 @@ def acomp_list(tokens):
 
 
 def adjective_list(tokens):
-    """
-    Find adjectives modifying a given noun
+    """Find adjectives modifying a given noun.
+    
     :param tokens: tokens of potential adjectice candidates (children of the noun and children of the head for compounds)
     :type tokens: [:class:`spacy.token`]
     :return: list of adjectives
@@ -76,8 +76,8 @@ def adjective_list(tokens):
 
 
 def adjective_negations(token):
-    """
-    Function to find all negated adjectives in a sentence.
+    """Find all negated adjectives in a sentence.
+    
     :param token: negation token to handle
     :type token: :class:`spacy.token`
     :return: list of negated adjectives
@@ -107,9 +107,10 @@ def adjective_negations(token):
 
 
 def parse(dataframe_texts):  # noqa: C901
-    """
-    Parse the comments and extract a list of potential aspects based on grammatical relations
+    """Parse the comments and extract a list of potential aspects based on grammatical relations.
+    
     (e.g. modified by adjective)
+    
     :param dataframe_texts: a dataframe with the raw texts. The collumn wit the texts needs to be called 'Comments'
     :type dataframe_texts: :class:`pandas.DataFrame`
     :return: a dataframe with the aspect candidates
@@ -169,8 +170,8 @@ def parse(dataframe_texts):  # noqa: C901
 
 
 def generate_aspects(dataframe_texts):
-    """
-    Generates the aspects that will be merged into topics from the raw texts:
+    """Generate the aspects that will be merged into topics from the raw texts.
+    
     :param dataframe_texts: a dataframe with the raw texts in the column 'Comments'
     :type dataframe_texts: :class:`pandas.DataFrame`
     :return: a dataframe with the aspect candidates, their associated description, index of original text in the

diff --git a/extra_model/_cli.py b/extra_model/_cli.py
@@ -15,16 +15,13 @@
 @click.argument("output_path", type=Path, default="/app/output")
 @click.option("--debug", is_flag=True)
 def entrypoint(input_path: Path, output_path: Path, debug: bool = False) -> None:
-
-    """
-    Parse and handle CLI arguments.
+    """Parse and handle CLI arguments.
 
     :param input_path: Path to the file that should be used for running extra_model on.
     :param output_path: Path to the file that output of extra_model is going to be saved.
     :param debug: If set to True, sets log level for the application to DEBUG, else WARNING.
     :return: Dictionary with input_path and output_path set to specified values
     """
-
     logging.getLogger("extra_model").setLevel("DEBUG" if debug else "INFO")
 
     try:

diff --git a/extra_model/_disambiguate.py b/extra_model/_disambiguate.py
@@ -1,4 +1,4 @@
-"""functions to do word-sense disambiguation using artifical contexts"""
+"""Functions to do word-sense disambiguation using artifical contexts."""
 import logging
 import math
 
@@ -13,8 +13,8 @@
 
 
 def vectorize_aspects(aspect_counts, vectorizer):
-    """
-    Turn the aspect map into a a vector of nouns and their vector representations, which also filters aspects without embedding
+    """Turn the aspect map into a a vector of nouns and their vector representations, which also filters aspects without embedding.
+    
     :param aspect_counts: (dict): the dictionary with aspect counts
     :param vectorizer: (Vectorizer): the provider of word-embeddings
     :return vectors with representable aspects and their vector embeddings
@@ -30,8 +30,8 @@ def vectorize_aspects(aspect_counts, vectorizer):
 
 
 def best_cluster(aspect_vectors):
-    """
-    Find the optimal cluster size using silhouette scores
+    """Find the optimal cluster size using silhouette scores.
+    
     :param aspect_vectors: ([embeddings]): list of embeddings vectors to be clustered
     :return int the optimal number of clusters
     """
@@ -69,10 +69,11 @@ def best_cluster(aspect_vectors):
 
 
 def cluster(aspects, aspect_vectors, vectorizer):
-    """
-    cluster aspects based on the distance of their vector representations
-        once clusters are found, use the other aspects in a given cluster to generate the context for a specific aspect
-        noun
+    """Cluster aspects based on the distance of their vector representations.
+    
+    Once clusters are found, use the other aspects in a given cluster to generate the
+    context for a specific aspect noun.
+    
     :param aspects: ([string]): list of words for which clusters are generated
     :param aspect_vectors: ([embedding]): list of embeddings corresponding to the the aspects
     :param vectorizer: (Vectorizer):  the provider of word-embeddings for context generation
@@ -106,8 +107,8 @@ def cluster(aspects, aspect_vectors, vectorizer):
 
 
 def match(aspect_counts, vectorizer):
-    """
-    Match a word to a specific wordnet entry, using the vector similarity of the aspects context and the synonym gloss.
+    """Match a word to a specific wordnet entry, using the vector similarity of the aspects context and the synonym gloss.
+    
     :param aspect_counts: (dict): dictionary of aspect->number of occurrence
     :param vectorizer: (Vectorizer):  the provider of word-embeddings for context generation
     :return [string]: list of aspects that have an embedding
@@ -194,6 +195,7 @@ def match(aspect_counts, vectorizer):
 
 
 def match_from_single(aspect, fulltext, vectorizer):
+    """docstring."""
     # produce the synsets and their embedding
     synset = wn.synsets(aspect.lower(), pos=wn.NOUN)
     if len(synset) == 0:

diff --git a/extra_model/_errors.py b/extra_model/_errors.py
@@ -1,2 +1,2 @@
 class ExtraModelError(Exception):
-    """ Generic Error """
+    """Generic error."""
diff --git a/extra_model/_filter.py b/extra_model/_filter.py
@@ -1,7 +1,8 @@
-"""do some filtering on the text input:
- -comments need to be not empty
- -a few letters long
- -in egnlish Langage
+"""Do some filtering on the text input.
+
+- comments need to be not empty
+- a few letters long
+- in english Langage
 """
 import logging
 
@@ -12,8 +13,8 @@
 
 
 def filter(dataframe):
-    """
-    Filter a dataframe for language and text length, also remove unprintable unicode characters
+    """Filter a dataframe for language and text length, also remove unprintable unicode characters.
+    
     :param dataframe: (pandas.dataframe): dataframe to be filtered
     :return the filtered dataframe
     """