From 2f47121028b7b9c9d1c1108330e3834c38e3f63d Mon Sep 17 00:00:00 2001
From: Daniel McCrevan <danielmccrevan@gmail.com>
Date: Tue, 20 Nov 2018 13:57:32 -0500
Subject: [PATCH] Added comments

---
 src/modules/generationtools/categorical.py | 25 +++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/modules/generationtools/categorical.py b/src/modules/generationtools/categorical.py
index 1551622..0bbb40c 100644
--- a/src/modules/generationtools/categorical.py
+++ b/src/modules/generationtools/categorical.py
@@ -28,6 +28,15 @@ def identify(col):
     return not float_found # True if all ints, false if atleast one float found
 
 def rank_categories(col):
+    """Ranks the column values in most frequent descending order
+    
+    Arguments:
+        col {Dataframe Column} -- The column to sort
+    
+    Returns:
+        [List] -- List of tuples of the value and percentage it occurs
+    """
+
     categories = {}
     # Count the occurances
     for item in col:
@@ -42,6 +51,15 @@ def rank_categories(col):
     return categories
 
 def categorical_convert(col):
+    """Encodes categorical data into ML-usable data
+    
+    Arguments:
+        col {Dataframe column} -- The column to encode
+    
+    Returns:
+        Dataframe column -- An encoded column
+    """
+
     categories = rank_categories(col)
     distributions = {}
     limits = {}
@@ -58,11 +76,12 @@ def categorical_convert(col):
     # sample from the distributions and return that value
     return col.apply(lambda x: distributions[x].rvs()), limits
 
+
+"""Example usage
+
 def main():
     data = pd.read_csv('test.csv')
     for col in data:
         if identify(data[col]):
             print(categorical_convert(data[col]))
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+"""
\ No newline at end of file