Skip to content

Commit

Permalink
Added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
dmccrevan committed Nov 20, 2018
1 parent b1ae19d commit 2f47121
Showing 1 changed file with 22 additions and 3 deletions.
25 changes: 22 additions & 3 deletions src/modules/generationtools/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ def identify(col):
return not float_found # True if all ints, false if atleast one float found

def rank_categories(col):
"""Ranks the column values in most frequent descending order
Arguments:
col {Dataframe Column} -- The column to sort
Returns:
[List] -- List of tuples of the value and percentage it occurs
"""

categories = {}
# Count the occurances
for item in col:
Expand All @@ -42,6 +51,15 @@ def rank_categories(col):
return categories

def categorical_convert(col):
"""Encodes categorical data into ML-usable data
Arguments:
col {Dataframe column} -- The column to encode
Returns:
Dataframe column -- An encoded column
"""

categories = rank_categories(col)
distributions = {}
limits = {}
Expand All @@ -58,11 +76,12 @@ def categorical_convert(col):
# sample from the distributions and return that value
return col.apply(lambda x: distributions[x].rvs()), limits


"""Example usage
def main():
data = pd.read_csv('test.csv')
for col in data:
if identify(data[col]):
print(categorical_convert(data[col]))

if __name__ == "__main__":
main()
"""

0 comments on commit 2f47121

Please sign in to comment.