Skip to content

Commit

Permalink
implemented pydocstyle linter, updated docstrings, setup pydocstyle g…
Browse files Browse the repository at this point in the history
…ithub lint stage
  • Loading branch information
Chris Antonellis committed Feb 18, 2021
1 parent c604642 commit 5108c98
Show file tree
Hide file tree
Showing 17 changed files with 141 additions and 103 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
max-complexity = 8
ignore = E203,W503,E501
ignore = E203,W503,E501,W293
builtins = unicode
tee = True
exclude = venv,env
16 changes: 16 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ jobs:

- name: Run mypy
run: mypy extra_model tests

pydocstyle:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
uses: ./.github/actions/install-dependencies
with:
test-requirements: "true"

- name: Run pydocstyle
run: pydocstyle extra_model

test:
runs-on: ubuntu-latest
Expand Down
3 changes: 3 additions & 0 deletions docker/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ flake8 extra_model tests

echo "Running bandit..."
bandit --ini .bandit --quiet -r extra_model

echo "Running pydocstyle..."
pydocstyle extra_model
6 changes: 3 additions & 3 deletions extra_model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__version__ = "0.0.1"
"""Extra model public objects."""

from extra_model._models import ExtraModel, extra_factory # noqa

def example():
return "hello, world"
__version__ = "0.1.0"
23 changes: 16 additions & 7 deletions extra_model/_adjectives.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""cluster adjectives and extract sentiment"""
"""Cluster adjectives and extract sentiment."""

from collections import Counter

import numpy as np
Expand All @@ -8,8 +9,10 @@


def cluster_adjectives(adjective_counts, vectorizer): # noqa: C901
"""cluster adjectives based on a constant radius clustering algorithm
technical implementation uses a scikitlearn BallTree
"""Cluster adjectives based on a constant radius clustering algorithm.
Technical implementation uses a scikitlearn BallTree.
:param adjective_counts: dictionary with adjectives and their counts
:type adjective_counts: [(str,int)]
:param vectorizer: provide embeddings to evaluate adjective similarity
Expand Down Expand Up @@ -114,9 +117,11 @@ def cluster_adjectives(adjective_counts, vectorizer): # noqa: C901


def fill_sentiment_dict(adjective_counts):
"""given a dictionary with adjectives and their counts, will compute
the sentiment of each of the adjectives using the VADER sentiment analysis package
"""Given a dictionary with adjectives and their counts, will compute.
The sentiment of each of the adjectives using the VADER sentiment analysis package
and return a dictionary of the adjectives and their sentiments.
:param adjective_counts: dictionary with adjectives and their counts
:type adjective_counts: dict
:return: dictionary, where the keys are the adjectives and the values are tuples of the
Expand All @@ -137,7 +142,8 @@ def fill_sentiment_dict(adjective_counts):


def sentiments_from_adjectives(adjective_counts, sentiment_dict):
"""build the weighted average sentiment score from a list of adjetives and their counts
"""Build the weighted average sentiment score from a list of adjetives and their counts.
:param adjective_counts: list of tuples with adjectives and their counts
:type adjective_counts: [(str,int)]
:param sentiment_dict: dictionary with adjectives and their sentiment, as tuple of compound and binary sentiment
Expand All @@ -163,9 +169,12 @@ def sentiments_from_adjectives(adjective_counts, sentiment_dict):


def adjective_info(dataframe_topics, dataframe_aspects, vectorizer):
"""Add adjective related information to the dataframes, this has two facets:
"""Add adjective related information to the dataframes.
This has two facets:
-> for each topic cluster similar adjectives, to get a more abstract/readable list
-> for each topic, use the adjectives to come up with a sentiment classification
:param dataframe_topics: the dataframe with the topics we want to enrich, needs to have a collum `rawterms`
:type dataframe_topics: :class:`pandas.DataFrame`
:param dataframe_aspects: the dataframe with the aspect instances and related adjectives with columsn `aspect` and `descriptor`
Expand Down
25 changes: 13 additions & 12 deletions extra_model/_aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@


def compound_noun_list(token):
"""
Find compound nouns
"""Find compound nouns.
:param token: token for which to generate potential compound nouns
:type token: :class:`spacy.token`
:return: list of potential compounds
Expand All @@ -32,8 +32,8 @@ def compound_noun_list(token):


def acomp_list(tokens):
"""
Find descriptions for a given token
"""Find descriptions for a given token.
:param tokens: list of tokens that are children of the head of the nount for which descriptions are searched.
:type tokens: [:class:`spacy.token`]
:return: list of adjectives
Expand All @@ -54,8 +54,8 @@ def acomp_list(tokens):


def adjective_list(tokens):
"""
Find adjectives modifying a given noun
"""Find adjectives modifying a given noun.
:param tokens: tokens of potential adjectice candidates (children of the noun and children of the head for compounds)
:type tokens: [:class:`spacy.token`]
:return: list of adjectives
Expand All @@ -76,8 +76,8 @@ def adjective_list(tokens):


def adjective_negations(token):
"""
Function to find all negated adjectives in a sentence.
"""Find all negated adjectives in a sentence.
:param token: negation token to handle
:type token: :class:`spacy.token`
:return: list of negated adjectives
Expand Down Expand Up @@ -107,9 +107,10 @@ def adjective_negations(token):


def parse(dataframe_texts): # noqa: C901
"""
Parse the comments and extract a list of potential aspects based on grammatical relations
"""Parse the comments and extract a list of potential aspects based on grammatical relations.
(e.g. modified by adjective)
:param dataframe_texts: a dataframe with the raw texts. The collumn wit the texts needs to be called 'Comments'
:type dataframe_texts: :class:`pandas.DataFrame`
:return: a dataframe with the aspect candidates
Expand Down Expand Up @@ -169,8 +170,8 @@ def parse(dataframe_texts): # noqa: C901


def generate_aspects(dataframe_texts):
"""
Generates the aspects that will be merged into topics from the raw texts:
"""Generate the aspects that will be merged into topics from the raw texts.
:param dataframe_texts: a dataframe with the raw texts in the column 'Comments'
:type dataframe_texts: :class:`pandas.DataFrame`
:return: a dataframe with the aspect candidates, their associated description, index of original text in the
Expand Down
5 changes: 1 addition & 4 deletions extra_model/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
@click.argument("output_path", type=Path, default="/app/output")
@click.option("--debug", is_flag=True)
def entrypoint(input_path: Path, output_path: Path, debug: bool = False) -> None:

"""
Parse and handle CLI arguments.
"""Parse and handle CLI arguments.
:param input_path: Path to the file that should be used for running extra_model on.
:param output_path: Path to the file that output of extra_model is going to be saved.
:param debug: If set to True, sets log level for the application to DEBUG, else WARNING.
:return: Dictionary with input_path and output_path set to specified values
"""

logging.getLogger("extra_model").setLevel("DEBUG" if debug else "INFO")

try:
Expand Down
24 changes: 13 additions & 11 deletions extra_model/_disambiguate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""functions to do word-sense disambiguation using artifical contexts"""
"""Functions to do word-sense disambiguation using artifical contexts."""
import logging
import math

Expand All @@ -13,8 +13,8 @@


def vectorize_aspects(aspect_counts, vectorizer):
"""
Turn the aspect map into a a vector of nouns and their vector representations, which also filters aspects without embedding
"""Turn the aspect map into a a vector of nouns and their vector representations, which also filters aspects without embedding.
:param aspect_counts: (dict): the dictionary with aspect counts
:param vectorizer: (Vectorizer): the provider of word-embeddings
:return vectors with representable aspects and their vector embeddings
Expand All @@ -30,8 +30,8 @@ def vectorize_aspects(aspect_counts, vectorizer):


def best_cluster(aspect_vectors):
"""
Find the optimal cluster size using silhouette scores
"""Find the optimal cluster size using silhouette scores.
:param aspect_vectors: ([embeddings]): list of embeddings vectors to be clustered
:return int the optimal number of clusters
"""
Expand Down Expand Up @@ -69,10 +69,11 @@ def best_cluster(aspect_vectors):


def cluster(aspects, aspect_vectors, vectorizer):
"""
cluster aspects based on the distance of their vector representations
once clusters are found, use the other aspects in a given cluster to generate the context for a specific aspect
noun
"""Cluster aspects based on the distance of their vector representations.
Once clusters are found, use the other aspects in a given cluster to generate the
context for a specific aspect noun.
:param aspects: ([string]): list of words for which clusters are generated
:param aspect_vectors: ([embedding]): list of embeddings corresponding to the the aspects
:param vectorizer: (Vectorizer): the provider of word-embeddings for context generation
Expand Down Expand Up @@ -106,8 +107,8 @@ def cluster(aspects, aspect_vectors, vectorizer):


def match(aspect_counts, vectorizer):
"""
Match a word to a specific wordnet entry, using the vector similarity of the aspects context and the synonym gloss.
"""Match a word to a specific wordnet entry, using the vector similarity of the aspects context and the synonym gloss.
:param aspect_counts: (dict): dictionary of aspect->number of occurrence
:param vectorizer: (Vectorizer): the provider of word-embeddings for context generation
:return [string]: list of aspects that have an embedding
Expand Down Expand Up @@ -194,6 +195,7 @@ def match(aspect_counts, vectorizer):


def match_from_single(aspect, fulltext, vectorizer):
"""docstring."""
# produce the synsets and their embedding
synset = wn.synsets(aspect.lower(), pos=wn.NOUN)
if len(synset) == 0:
Expand Down
2 changes: 1 addition & 1 deletion extra_model/_errors.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
class ExtraModelError(Exception):
""" Generic Error """
"""Generic error."""
13 changes: 7 additions & 6 deletions extra_model/_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""do some filtering on the text input:
-comments need to be not empty
-a few letters long
-in egnlish Langage
"""Do some filtering on the text input.
- comments need to be not empty
- a few letters long
- in english Langage
"""
import logging

Expand All @@ -12,8 +13,8 @@


def filter(dataframe):
"""
Filter a dataframe for language and text length, also remove unprintable unicode characters
"""Filter a dataframe for language and text length, also remove unprintable unicode characters.
:param dataframe: (pandas.dataframe): dataframe to be filtered
:return the filtered dataframe
"""
Expand Down
Loading

0 comments on commit 5108c98

Please sign in to comment.