-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5e6a97a
commit a9271d5
Showing
32 changed files
with
515 additions
and
184 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(88%)
backend/app/controllers/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(88%)
backend/app/infrastructure/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(98%)
backend/app/infrastructure/__pycache__/db_connection_manager.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(93%)
backend/app/repositories/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(100%)
backend/app/repositories/__pycache__/csv_file_repo.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(100%)
backend/app/repositories/__pycache__/sqlite_db_repo.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
+65 Bytes
(100%)
backend/app/use_cases/__pycache__/generate.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(99%)
backend/app/use_cases/__pycache__/get_headers.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(99%)
backend/app/use_cases/__pycache__/get_last_login_data.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(99%)
backend/app/use_cases/__pycache__/get_values_under_header.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(98%)
backend/app/use_cases/__pycache__/upload_data.cpython-312.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file modified
BIN
-25 Bytes
(99%)
backend/ml_model/entities/__pycache__/datapoint_entity.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(99%)
backend/ml_model/repository/__pycache__/data_preprocessing.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
-25 Bytes
(99%)
backend/ml_model/repository/__pycache__/file_reader.cpython-312.pyc
Binary file not shown.
Binary file modified
BIN
+2.32 KB
(300%)
backend/ml_model/repository/__pycache__/model_saver.cpython-312.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,72 @@ | ||
import os | ||
import pickle | ||
|
||
import pandas as pd | ||
from sklearn.model_selection import GridSearchCV | ||
|
||
|
||
def save_model(best_clf: GridSearchCV, x_test: pd.DataFrame, y_test: pd.Series) -> None: | ||
class ModelSaver: | ||
""" | ||
Saves the model as a pkl file | ||
Saves the trained model and its evaluation score as a pickle (.pkl) file. | ||
Parameters: | ||
----------- | ||
best_clf : GridSearchCV | ||
The trained model object, which is an instance of GridSearchCV containing the best estimator after hyperparameter tuning. | ||
x_test : pd.DataFrame | ||
The test dataset features used for evaluating the model. | ||
y_test : pd.Series | ||
The actual labels corresponding to the test dataset. | ||
Returns: | ||
-------- | ||
None | ||
The function saves the model and its score to a file named `model_with_score.pkl` in the parent directory of the current script location. | ||
""" | ||
# Overall model score | ||
score = best_clf.score(x_test, y_test) | ||
def __init__(self, best_clf: GridSearchCV, x_test: pd.DataFrame, y_test: pd.Series): | ||
""" | ||
Initializes the ModelSaver class with model, test features, and test labels. | ||
Parameters: | ||
----------- | ||
best_clf : GridSearchCV | ||
The trained model object, which is an instance of GridSearchCV containing the best estimator after hyperparameter tuning. | ||
x_test : pd.DataFrame | ||
The test dataset features used for evaluating the model. | ||
y_test : pd.Series | ||
The actual labels corresponding to the test dataset. | ||
""" | ||
self.best_clf = best_clf | ||
self.x_test = x_test | ||
self.y_test = y_test | ||
|
||
def save_model(self) -> None: | ||
""" | ||
Saves the trained model and its evaluation score as a pickle (.pkl) file. | ||
Returns: | ||
-------- | ||
None | ||
The function saves the model and its score to a file named `model_with_score.pkl` in the parent directory of the current script location. | ||
Notes: | ||
------ | ||
- The `score` is calculated using the `score` method of the `best_clf` object, which typically represents accuracy for classification models. | ||
- The resulting pickle file contains a dictionary with two keys: | ||
- "model": the `best_clf` object. | ||
- "score": the evaluation score of the model on the test data. | ||
""" | ||
# Overall model score | ||
score = self.best_clf.score(self.x_test, self.y_test) | ||
|
||
curr_dir = os.path.dirname(__file__) | ||
model_path = os.path.join(curr_dir, "../model_with_score.pkl") | ||
curr_dir = os.path.dirname(__file__) | ||
model_path = os.path.join(curr_dir, "../model_with_score.pkl") | ||
|
||
# Save the model and its score | ||
with open(model_path, "wb") as f: | ||
pickle.dump({"model": best_clf, "score": score}, f) | ||
# Save the model and its score | ||
with open(model_path, "wb") as f: | ||
pickle.dump({"model": self.best_clf, "score": score}, f) | ||
|
||
return | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from sklearn.model_selection import GridSearchCV | ||
from sklearn.tree import DecisionTreeClassifier | ||
import pandas as pd | ||
|
||
|
||
class SafeGridSearch: | ||
""" | ||
A utility class for safely performing grid search for hyperparameter tuning. | ||
This class ensures that grid search is executed while handling cases | ||
where there are insufficient samples for cross-validation. | ||
""" | ||
|
||
def __init__(self, classifier=DecisionTreeClassifier(), param_grid=None): | ||
""" | ||
Initializes the SafeGridSearch with a classifier and a parameter grid. | ||
Parameters: | ||
----------- | ||
classifier : estimator object, optional (default=DecisionTreeClassifier()) | ||
The base classifier to use for grid search. | ||
param_grid : dict, optional | ||
The parameter grid to use for tuning hyperparameters. If None, | ||
a default parameter grid for DecisionTreeClassifier is used. | ||
""" | ||
self.classifier = classifier | ||
self.param_grid = param_grid or { | ||
"criterion": ["gini", "entropy"], | ||
"max_depth": [None] + list(range(1, 11)), | ||
"min_samples_split": [2, 5, 10], | ||
} | ||
|
||
def perform_search(self, x_train: pd.DataFrame, y_train: pd.Series): | ||
""" | ||
Performs a safe grid search for hyperparameter tuning. | ||
Parameters: | ||
----------- | ||
x_train : pd.DataFrame | ||
The training feature set. | ||
y_train : pd.Series | ||
The training labels. | ||
Returns: | ||
-------- | ||
estimator or None | ||
Returns the best estimator if grid search is successful. | ||
Returns None if there are insufficient samples for cross-validation. | ||
""" | ||
try: | ||
grid_search = GridSearchCV(self.classifier, self.param_grid, cv=5, scoring="accuracy") | ||
grid_search.fit(x_train, y_train) | ||
return grid_search.best_estimator_ | ||
except ValueError as e: | ||
if "Cannot have number of splits n_splits" in str(e): | ||
print("Not enough samples for cross-validation. Returning None.") | ||
return None |
Oops, something went wrong.