Skip to content

Commit

Permalink
Continue to work on docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jceresearch committed May 29, 2022
1 parent 60073c7 commit 1b8e680
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 52 deletions.
34 changes: 0 additions & 34 deletions docs/source/_autosummary/pydit.rst

This file was deleted.

7 changes: 4 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"sphinx.ext.doctest",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"myst_parser",
]

autosummary_generate = True
Expand All @@ -62,10 +63,10 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'classic'
html_theme = "classic"

#'sphinx_rtd_theme' (you need to
#"alabaster"
#'sphinx_rtd_theme' (you need to
# "alabaster"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
Expand Down
12 changes: 11 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@
Welcome to pydit's documentation!
=================================

.. include:: readme_link.rst

Detailed documentation
======================

.. autosummary::
:toctree: _autosummary
:recursive:

pydit
pydit.functions
pydit.logger
pydit.filemanager
pydit.utils



Indices and tables
==================
Expand Down
2 changes: 2 additions & 0 deletions docs/source/readme_link.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.. include:: ../../README.md
:parser: myst_parser.sphinx_
10 changes: 6 additions & 4 deletions pydit/functions/add_counts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Add a count column that will bring the count of that key in the referenced table"""
""" Adds the count of that key from another dataframe, similar to COUNTIF() in Excel"""

import logging
import pandas as pd
Expand All @@ -9,13 +9,15 @@
def add_counts_between_related_df(
df1, df2, left_on="", right_on="", on="", inplace=False
):
"""Add a count column to bring the count of that key in those tables
"""Add a count column to bring the count of that key in another dataframe
This works similar to adding countif() in Excel to sense check if an
identifier in one sheet is in fullly in another (presumably master), or
if there are duplicated keys or orphans/gaps.
This routine does both ways so you can quickly check whether you have
one to one, many to many etc, and where there may be the anomales.
This routine does both ways to quickly check whether the relationship is
one to one, many to many etc.
There is another function that checks referential integrity and does this
in a more conceptual way, but often you just need to add some counting
numbers and filter for >1 or zeroes.
Expand Down
12 changes: 7 additions & 5 deletions pydit/functions/add_percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@


def add_percentile(df, col, col_group=None, inplace=False):
"""Adds columns for percentile for a chosen column and also
within a category group , if provided
from https://stackoverflow.com/questions/50804120/how-do-i-get-the-percentile-for-a-row-in-a-pandas-dataframe
Using the percentile with linear interpolation method, but kept
various ranks calculations for reference
"""Adds columns for percentile for a chosen column in a DataFrame
It can also provide it within a category group (col_group)
Reference: https://stackoverflow.com/questions/50804120/how-do-i-get-the-percentile-for-a-row-in-a-pandas-dataframe
Using the percentile with linear interpolation method, but kept various
ranks calculations for reference
Args:
df_in (DataFrame): Pandas DataFrame
Expand Down
21 changes: 16 additions & 5 deletions pydit/functions/cleanup_column_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,21 @@ def _get_random_string(length):


def cleanup_column_names(df, max_field_name_len=40, inplace=False):
""" Cleanup the column names of a Pandas dataframe
e.g. removes non alphanumeric chars, _ instead of space, perc instead
of %, strips trailing spaces, converts to lowercase
"""
"""
Cleanup the column names of a Pandas dataframe.
e.g. removes non alphanumeric chars, _ instead of space, perc instead
of %, strips trailing spaces, converts to lowercase.
Args:
df (DataFrame): Pandas DataFrame
max_field_name_len (int, optional): Maximum length of field name. Defaults to 40.
inplace (bool, optional): If True, the dataframe is modified in place. Defaults to False.
Returns:
Pandas DataFrame with cleaned column names
"""
prev_cols = list(df.columns)
new_cols = []
for e in prev_cols:
Expand All @@ -97,7 +108,7 @@ def cleanup_column_names(df, max_field_name_len=40, inplace=False):
# names, just in case keeping this quite low, feel free to increase or remove
new_cols = _deduplicate_list(new_cols)
if not inplace:
df=df.copy()
df = df.copy()
df.columns = new_cols
logger.debug("Previous column names:%s", prev_cols)
logger.info("New columns names:%s", list(df.columns))
Expand Down

0 comments on commit 1b8e680

Please sign in to comment.