Added comments

CarmeLabs · Nov 20, 2018 · 2f47121 · 2f47121
1 parent b1ae19d
commit 2f47121
Showing 1 changed file with 22 additions and 3 deletions.
diff --git a/src/modules/generationtools/categorical.py b/src/modules/generationtools/categorical.py
@@ -28,6 +28,15 @@ def identify(col):
     return not float_found # True if all ints, false if atleast one float found
 
 def rank_categories(col):
+    """Ranks the column values in most frequent descending order
+    
+    Arguments:
+        col {Dataframe Column} -- The column to sort
+    
+    Returns:
+        [List] -- List of tuples of the value and percentage it occurs
+    """
+
     categories = {}
     # Count the occurances
     for item in col:
@@ -42,6 +51,15 @@ def rank_categories(col):
     return categories
 
 def categorical_convert(col):
+    """Encodes categorical data into ML-usable data
+    
+    Arguments:
+        col {Dataframe column} -- The column to encode
+    
+    Returns:
+        Dataframe column -- An encoded column
+    """
+
     categories = rank_categories(col)
     distributions = {}
     limits = {}
@@ -58,11 +76,12 @@ def categorical_convert(col):
     # sample from the distributions and return that value
     return col.apply(lambda x: distributions[x].rvs()), limits
 
+
+"""Example usage
+
 def main():
     data = pd.read_csv('test.csv')
     for col in data:
         if identify(data[col]):
             print(categorical_convert(data[col]))
-
-if __name__ == "__main__":
-    main()
+"""