Skip to content

Commit

Permalink
Merge pull request #243 from Palashio/ramya-branch
Browse files Browse the repository at this point in the history
Replace dicts in sklearn models  with LabelEncoders
  • Loading branch information
Palashio authored Jul 18, 2020
2 parents ea118ed + d0455ce commit 86bf214
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 48 deletions.
36 changes: 17 additions & 19 deletions libra/plotting/generate_plots.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import seaborn as sns
import warnings
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.preprocessing import LabelBinarizer
from numpy import interp
import pandas as pd
import sklearn
Expand Down Expand Up @@ -261,9 +261,11 @@ def plot_mc_roc(y_test, y_score, interpreter=None):
lw = 2
n_classes = len(np.unique(y_test))
classes = pd.unique(y_test)
label_binarizer = LabelBinarizer()
label_binarizer.fit(np.concatenate((y_test, y_score)))
if n_classes != 2:
y_test = label_binarize(y_test, classes=classes)
y_score = label_binarize(y_score, classes=classes)
y_test = label_binarizer.transform(y_test)
y_score = label_binarizer.transform(y_score)
else:
n_classes = 1
y_test = y_test.reshape(-1, 1)
Expand Down Expand Up @@ -308,21 +310,15 @@ def plot_mc_roc(y_test, y_score, interpreter=None):
color='navy', linestyle=':', linewidth=4)

for i in range(n_classes):
if isinstance(interpreter, dict):
inverted_interpreter = dict(map(reversed, interpreter.items()))
plt.plot(fpr[i], tpr[i], lw=lw,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(inverted_interpreter[i], roc_auc[i]))
else:
plt.plot(
fpr[i],
tpr[i],
lw=lw,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(
interpreter.inverse_transform(
[[i]])[0],
roc_auc[i]))
plt.plot(
fpr[i],
tpr[i],
lw=lw,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(
interpreter.inverse_transform(
[[label_binarizer.classes_[i]]])[0],
roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
Expand Down Expand Up @@ -401,7 +397,9 @@ def analyze(client, model=None, save=True, save_model=False):
if model in ['svm', 'nearest_neighbor',
'decision_tree', 'text_classification']:
label_source = modeldict['interpreter']
labels = list(modeldict['interpreter'].keys())
labels = []
for num in np.unique(np.concatenate((real, preds))):
labels.append(label_source.inverse_transform([[num]])[0])
else:
label_source = enc
labels = enc.classes_
Expand Down
12 changes: 2 additions & 10 deletions libra/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,8 @@ def interpret(self, model, predictions):
'''
modeldict = self.models[model]
if modeldict.get('interpreter'):
if isinstance(modeldict['interpreter'], dict):
inverted_interpreter = dict(
map(reversed, modeldict['interpreter'].items()))
toRet = []
for each in predictions:
toRet.append(inverted_interpreter[each])
predictions = toRet
else:
predictions = modeldict['interpreter'].inverse_transform(
predictions)
predictions = modeldict['interpreter'].inverse_transform(
predictions)
clearLog()
return predictions

Expand Down
31 changes: 14 additions & 17 deletions libra/query/classification_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from libra.plotting.generate_plots import (generate_clustering_plots)
from colorama import Fore, Style
import warnings
import sklearn

warnings.filterwarnings("ignore", category=UserWarning)

Expand Down Expand Up @@ -245,13 +246,11 @@ def train_svm(instruction,
# Needed to make a custom label encoder due to train test split changes
# Can still be inverse transformed, just a bit of extra work
y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
label_mappings = {}
logger("Labels being mapped to appropriate classes")
for i in range(len(y_vals)):
label_mappings[y_vals[i]] = i
label_mappings = sklearn.preprocessing.LabelEncoder()
label_mappings.fit(y_vals)

y_train = y_train.apply(lambda x: label_mappings[x]).values
y_test = y_test.apply(lambda x: label_mappings[x]).values
y_train = label_mappings.transform(y_train)
y_test = label_mappings.transform(y_test)

# Fitting to SVM and storing in the model dictionary
logger("Fitting Support Vector Machine")
Expand Down Expand Up @@ -328,11 +327,11 @@ def nearest_neighbors(instruction=None,
num_classes = len(np.unique(y))
# encodes the label dataset into 0's and 1's
y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
label_mappings = {}
for i in range(len(y_vals)):
label_mappings[y_vals[i]] = i
y_train = y_train.apply(lambda x: label_mappings[x]).values
y_test = y_test.apply(lambda x: label_mappings[x]).values
label_mappings = sklearn.preprocessing.LabelEncoder()
label_mappings.fit(y_vals)

y_train = label_mappings.transform(y_train)
y_test = label_mappings.transform(y_test)
logger("Labels being mapped to appropriate classes")
models = []
scores = []
Expand Down Expand Up @@ -418,13 +417,11 @@ def decision_tree(instruction,
# Needed to make a custom label encoder due to train test split changes
# Can still be inverse transformed, just a bit of extra work
y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
label_mappings = {}
for i in range(len(y_vals)):
label_mappings[y_vals[i]] = i
label_mappings = sklearn.preprocessing.LabelEncoder()
label_mappings.fit(y_vals)

# Custom label encoder due to train test split
y_train = y_train.apply(lambda x: label_mappings[x]).values
y_test = y_test.apply(lambda x: label_mappings[x]).values
y_train = label_mappings.transform(y_train)
y_test = label_mappings.transform(y_test)

logger("Labels being mapped to appropriate classes")
num_classes = len(np.unique(y))
Expand Down
6 changes: 4 additions & 2 deletions libra/query/feedforward_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ def classification_ann(instruction,
# print((" " * 2 * counter)+ tabulate(datax, headers=col_name, tablefmt='orgtbl'))
losses.append(history.history[maximizer]
[len(history.history[maximizer]) - 1])
accuracies.append(history.history['val_accuracy']
[len(history.history['val_accuracy']) - 1])
# keeps running model and fit functions until the validation loss stops
# decreasing

Expand Down Expand Up @@ -386,9 +388,9 @@ def classification_ann(instruction,
datax = []
values.append(str(len(model.layers)))
values.append(
"| " + str(history.history['accuracy'][len(history.history['val_accuracy']) - 1]))
"| " + str(history.history['accuracy'][len(history.history['accuracy']) - 1]))
values.append(
"| " + str(history.history['accuracy'][len(history.history['val_accuracy']) - 1]))
"| " + str(history.history['val_accuracy'][len(history.history['val_accuracy']) - 1]))
datax.append(values)
for row in datax:
print((" " * 2 * counter) + "| " +
Expand Down

0 comments on commit 86bf214

Please sign in to comment.