diff --git a/libra/preprocessing/data_preprocesser.py b/libra/preprocessing/data_preprocesser.py index c9347c72..b64a6e78 100644 --- a/libra/preprocessing/data_preprocesser.py +++ b/libra/preprocessing/data_preprocesser.py @@ -133,7 +133,7 @@ def structured_preprocesser(data, ca_threshold, text): accept_sparse=True)), ('embedder', FunctionTransformer( - text_embedder, + textembedder, accept_sparse=True))]), text[x])) @@ -146,14 +146,14 @@ def structured_preprocesser(data, ca_threshold, text): if too_many_values(combined[categorical_columns], ca_threshold): cat_pipeline = Pipeline([ ('imputer', SimpleImputer(strategy="constant", fill_value="")), - ('one_hot_encoder', OneHotEncoder(handle_unknown='ignore')), + ('one_hotencoder', OneHotEncoder(handle_unknown='ignore')), ('transformer', FunctionTransformer(lambda x: x.toarray(), accept_sparse=True)), ('ca', CA(n_components=-1)) ]) else: cat_pipeline = Pipeline([ ('imputer', SimpleImputer(strategy="constant", fill_value="")), - ('one_hot_encoder', OneHotEncoder(handle_unknown='ignore')) + ('one_hotencoder', OneHotEncoder(handle_unknown='ignore')) ]) full_pipeline.transformers.append( @@ -230,7 +230,7 @@ def text_preprocessing(data, text_cols): data['test'] = combined.iloc[len(data['train']):] -def text_embedder(text): +def textembedder(text): total = list() for i in text: @@ -253,7 +253,7 @@ def generate_column_labels(full_pipeline, numeric_cols, text_cols): else: try: encoded_cols = full_pipeline.named_transformers_[ - 'cat']['one_hot_encoder'].get_feature_names() + 'cat']['one_hotencoder'].get_feature_names() cols = [*list(numeric_cols), *encoded_cols, *text_cols] except Exception as error: @@ -281,7 +281,7 @@ def clustering_preprocessor(data): # pipeline for categorical columns cat_pipeline = Pipeline([ ('imputer', SimpleImputer(strategy="constant", fill_value="")), - ('one_hot_encoder', OneHotEncoder()), + ('one_hotencoder', OneHotEncoder()), ]) # combine the two pipelines diff --git a/libra/queries.py b/libra/queries.py index 89d814a5..ab88e062 100644 --- a/libra/queries.py +++ b/libra/queries.py @@ -758,7 +758,9 @@ def convolutional_query(self, epochs=10, height=None, width=None, - show_feature_map=False): + show_feature_map=False, + save_as_tfjs=None, + save_as_tflite=None): ''' Calls the body of the convolutional neural network query which is located in the feedforward.py file :param instruction: The objective that you want to model (str). @@ -794,7 +796,9 @@ def convolutional_query(self, pretrained=pretrained, epochs=epochs, height=height, - width=width) + width=width, + save_as_tfjs=save_as_tfjs, + save_as_tflite=save_as_tflite) if show_feature_map: model = self.models["convolutional_NN"]["model"] diff --git a/libra/query/feedforward_nn.py b/libra/query/feedforward_nn.py index cdc51579..fd093bea 100644 --- a/libra/query/feedforward_nn.py +++ b/libra/query/feedforward_nn.py @@ -1,6 +1,8 @@ from colorama import Fore, Style from tensorflow.keras.callbacks import EarlyStopping import os +import tensorflow as tf +import tensorflowjs as tfjs from libra.preprocessing.image_preprocesser import (setwise_preprocessing, csv_preprocessing, classwise_preprocessing, @@ -325,14 +327,14 @@ def classification_ann(instruction, if num_classes >= 2: # ANN needs target one hot encoded for classification - one_hot_encoder = OneHotEncoder() + one_hotencoder = OneHotEncoder() y = pd.DataFrame( - one_hot_encoder.fit_transform( + one_hotencoder.fit_transform( np.reshape( y.values, (-1, 1))).toarray(), - columns=one_hot_encoder.get_feature_names()) + columns=one_hotencoder.get_feature_names()) y_train = y.iloc[:len(X_train)] y_test = y.iloc[len(X_train):] @@ -475,7 +477,7 @@ def classification_ann(instruction, "plots": plots, "target": remove, "preprocesser": full_pipeline, - "interpreter": one_hot_encoder, + "interpreter": one_hotencoder, 'test_data': {'X': X_test, 'y': y_test}, 'losses': { 'training_loss': final_hist.history['loss'], @@ -504,7 +506,9 @@ def convolutional(instruction=None, pretrained=None, epochs=10, height=None, - width=None): + width=None, + save_as_tfjs=None, + save_as_tflite=None): ''' Body of the convolutional function used that is called in the neural network query if the data is presented in images. @@ -790,6 +794,17 @@ def convolutional(instruction=None, # storing values the model dictionary logger("Stored model under 'convolutional_NN' key") + + if save_as_tfjs: + tfjs.converters.save_keras_model(model, "tfjsmodel") + logger("Saved tfjs model under 'tfjsmodel' directory") + + if save_as_tflite: + converter = tf.lite.TFLiteConverter.from_keras_model(model) + tflite_model = converter.convert() + open ("model.tflite" , "wb") .write(tflite_model) + logger("Saved tflite model as 'model.tflite' ") + clearLog() K.clear_session() diff --git a/libra/query/unused_functions.py b/libra/query/unused_functions.py index c0dcf843..840bf77c 100644 --- a/libra/query/unused_functions.py +++ b/libra/query/unused_functions.py @@ -357,7 +357,7 @@ def get_snet_layer(num_out=2): # # best_hps : best Hyperprameters obtained after tuning, stored as map # # history : history of the data executed from the given model # """ -# return tuner,best_model.best_estimator_.get_params()['C'], history +# return tuner,best_model.bestestimator_.get_params()['C'], history # # def tuneClass( @@ -431,7 +431,7 @@ def get_snet_layer(num_out=2): # # # best_hps : best Hyperprameters obtained after tuning, stored as array # # # history : history of the data executed from the given model # # """ -# # return tuner,best_model.best_estimator_.get_params()['C'], history +# # return tuner,best_model.bestestimator_.get_params()['C'], history # # #return models[0], hyp, history # # # # """""" @@ -767,7 +767,7 @@ def insert_layer(model,layer_name,layer_index): # learning_rate=learning_rate, # restore_from='fresh', # run_name='run1', -# print_every=10, +# printevery=10, # sample_every=200, # save_every=save_every, # checkpoint_dir=save_path diff --git a/requirements.txt b/requirements.txt index 1ca6f65f..60451625 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ colorama transformers==3.0.2 -tensorflow==2.2.0 +tensorflow keras==2.4.3 numpy pandas @@ -10,6 +10,7 @@ tabulate textblob seaborn keras-tuner +tensorflowjs jellyfish spacy autocorrect diff --git a/tools/data/nlp_data/sentimentAnalysisData.csv b/tools/data/nlp_data/sentimentAnalysisData.csv index 14ce6b0b..6766f932 100644 --- a/tools/data/nlp_data/sentimentAnalysisData.csv +++ b/tools/data/nlp_data/sentimentAnalysisData.csv @@ -888,7 +888,7 @@ Label, Num, Date, Query, Username, Tweet 0,"1468032862","Mon Apr 06 23:21:35 PDT 2009","NO_QUERY","lamarmcarter","happy for Coach Stringer (HOF c/o 2009!)...now if I can only finish my term paper on her " 0,"1468032869","Mon Apr 06 23:21:35 PDT 2009","NO_QUERY","urbanrocker","I feel like shit. This is NOT the way I want to spend my birthday's eve " 0,"1468033025","Mon Apr 06 23:21:39 PDT 2009","NO_QUERY","strifethe3rd","so far i have 311 on all my sites put together, most of them were me checking out the updates i made......" -0,"1468033122","Mon Apr 06 23:21:41 PDT 2009","NO_QUERY","Ember_X","has a lot on her mind. I needs to make some moneys. And ideas. Nowhere is hiring! " +0,"1468033122","Mon Apr 06 23:21:41 PDT 2009","NO_QUERY","Emberx","has a lot on her mind. I needs to make some moneys. And ideas. Nowhere is hiring! " 0,"1468033152","Mon Apr 06 23:21:41 PDT 2009","NO_QUERY","xGabii","@emiliexclarkex miss you " 0,"1468033194","Mon Apr 06 23:21:44 PDT 2009","NO_QUERY","tygerbaby","@stephenkruiser I'm so sorry to hear about your dog " 0,"1468033322","Mon Apr 06 23:21:45 PDT 2009","NO_QUERY","nubblecakes","Too much internet. How it plagues me " @@ -7853,7 +7853,7 @@ Label, Num, Date, Query, Username, Tweet 1,"1573858603","Tue Apr 21 03:11:26 PDT 2009","NO_QUERY","FredByrne","@PhilByrne65 Oh my god I had the most drunken weekend thanks to the footy! Wasn't gonna go out sunday but after the penos got hammered " 1,"1573858615","Tue Apr 21 03:11:26 PDT 2009","NO_QUERY","bobbigmac","@robertkneschke I think you just claimed the first ever use of the #picNiche hashtag Hooray " 1,"1573858656","Tue Apr 21 03:11:27 PDT 2009","NO_QUERY","Simo_C","@cabert17 cheer up! tomorrow you'll be at home " -1,"1573858700","Tue Apr 21 03:11:27 PDT 2009","NO_QUERY","timetraveller_x","@lowleeta Hello to you too " +1,"1573858700","Tue Apr 21 03:11:27 PDT 2009","NO_QUERY","timetravellerx","@lowleeta Hello to you too " 1,"1573858710","Tue Apr 21 03:11:28 PDT 2009","NO_QUERY","freetz","still playing with my pet " 1,"1573858774","Tue Apr 21 03:11:28 PDT 2009","NO_QUERY","bakees","blah. getting ready. school all day. lets hope it gets better out before last block. whoot gym " 1,"1573858788","Tue Apr 21 03:11:28 PDT 2009","NO_QUERY","frgt10kyur","I'm off to work and then the computer lab loving life!" @@ -8472,7 +8472,7 @@ Label, Num, Date, Query, Username, Tweet 1,"1573984889","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","jonmack","just had a nice brunch now it's time for work " 1,"1573984899","Tue Apr 21 03:46:09 PDT 2009","NO_QUERY","snaglepus","@ADDN2X I have been using more Skype - except for now when I should " 1,"1573984901","Tue Apr 21 03:46:09 PDT 2009","NO_QUERY","AndrewChapman","Back onto surf4wine later to write blog post and upload new wines/photos. Bit behind with online wine life " -1,"1573984911","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","geoff_garcia","I'm a twitter virgin " +1,"1573984911","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","geoffgarcia","I'm a twitter virgin " 1,"1573984923","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","DasKreative","Learning how to communicate DKS thro twitter! feel free to ad us onto ur list! lotsa action comin up cheers,raoul/eddie" 1,"1573984925","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","mizdi","@dennis_siuu3p u cannot make me dizzy again.. never ( mahal pa rin kita kapatid, don't worry add kita sa facebook bukas)" 1,"1573984968","Tue Apr 21 03:46:10 PDT 2009","NO_QUERY","sheckie7","Let's start this day!! Love to wake up to the rain! " diff --git a/tools/materials/Reference Manual.md b/tools/materials/Reference Manual.md index f23aa4f8..1ba17cd6 100644 --- a/tools/materials/Reference Manual.md +++ b/tools/materials/Reference Manual.md @@ -1285,10 +1285,10 @@ None Executes code to tokenize & normalize text, autocorrects spelling, and lemmatizes all columns comprised of textual data -### text_embedder ### +### textembedder ### ``` python -libra.text_embedder(text) +libra.textembedder(text) ``` Embeds data in text column by summing all of the numerical vector representations of textual contents in column into suitable scalar format