Skip to content
This repository has been archived by the owner on May 7, 2023. It is now read-only.

Commit

Permalink
Merge branch 'andrey/gsk96-refactoring'
Browse files Browse the repository at this point in the history
  • Loading branch information
andreybavt committed Jun 9, 2022
2 parents 42fd7ba + 7e5af08 commit 54b504b
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 63 deletions.
27 changes: 20 additions & 7 deletions giskard/giskard_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""API Client to interact with the Giskard app"""
import warnings
from typing import List
from urllib.parse import urljoin

Expand All @@ -9,7 +10,11 @@


class GiskardError(Exception):
pass

def __init__(self, message: str, status: int, code: str) -> None:
super().__init__(message)
self.status = status
self.code = code


class ErrorHandlingAdapter(HTTPAdapter):
Expand All @@ -21,7 +26,9 @@ def build_response(self, req, resp):
try:
err_resp = response.json()
giskard_error = GiskardError(
f"{err_resp.get('title', 'Unknown error')}: {err_resp.get('detail', 'no details')}")
status=err_resp.get('status'),
code=err_resp.get('message'),
message=f"{err_resp.get('title', 'Unknown error')}: {err_resp.get('detail', 'no details')}")
except: # NOSONAR
response.raise_for_status()
raise giskard_error
Expand All @@ -48,11 +55,17 @@ def get_project(self, project_key: str):
return GiskardProject(self._session, response['key'])

def create_project(self, project_key: str, name: str, description: str = None):
response = self._session.post('project', json={
"description": description,
"key": project_key,
"name": name
}).json()
try:
response = self._session.post('project', json={
"description": description,
"key": project_key,
"name": name
}).json()
except GiskardError as e:
if e.code == 'error.http.409':
warnings.warn("This project key already exists. "
"If you want to reuse existing project use get_project(“project_key”) instead")
raise e
actual_project_key = response.get('key')
if actual_project_key != project_key:
print(f"Project created with a key : {actual_project_key}")
Expand Down
170 changes: 121 additions & 49 deletions giskard/project.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
import logging
import warnings
from typing import Callable, Dict, Iterable, List, Optional, Union

import numpy as np
Expand All @@ -23,7 +23,6 @@ def _serialize(prediction_function: Callable[
Iterable[Union[str, float, int]],
]) -> bytes:
compressed_pickle: bytes = compress(pickle_dumps(prediction_function))
print(f'Compressed model size: {len(compressed_pickle)} bytes')
return compressed_pickle

def upload_model(
Expand All @@ -32,19 +31,24 @@ def upload_model(
model_type: str,
feature_names: List[str],
name: str = None,
classification_threshold: Optional[float] = 0.5,
validate_df: pd.DataFrame = None,
target: Optional[List[str]] = None,
classification_threshold: Optional[float] = None,
classification_labels: Optional[List[str]] = None,
validate_df: pd.DataFrame = None
):
print(f"Uploading model '{name}' to project '{self.project_key}'...")

self._validate_classification_threshold(classification_threshold)
self._validate_model_type(model_type)
self._validate_features(feature_names=feature_names, validate_df=validate_df)
self._validate_prediction_function(prediction_function)
classification_labels = self._validate_classification_labels(classification_labels, model_type)

if model_type == SupportedModelTypes.CLASSIFICATION.value:
self._validate_classification_threshold_label(classification_labels, classification_threshold)

if validate_df is not None:
self._validate_model_execution(prediction_function, validate_df)
self._validate_model_execution(prediction_function, validate_df, model_type, classification_labels)
if target is not None and model_type == SupportedModelTypes.CLASSIFICATION.value:
target_values = validate_df[target].unique()
self._validate_label_with_target(classification_labels, target_values)

model = self._serialize(prediction_function)
requirements = get_python_requirements()
Expand All @@ -65,23 +69,26 @@ def upload_model(
('requirementsFile', requirements)
]
self.session.post('project/models/upload', data={}, files=files)
print(f"Uploading model '{name}' to project '{self.project_key}': Done!")
print(f"Successfully uploaded model to project '{self.project_key}'")

def upload_df(
self,
df: pd.DataFrame,
feature_types: Dict[str, str],
column_types: Dict[str, str],
target: str = None,
name: str = None,
) -> requests.Response:
logging.info(f"Uploading dataset '{name}' to project '{self.project_key}'...")
self.validate_df(df, feature_types)
self._validate_input_types(feature_types)
self._validate_features(column_types=column_types)
if target is not None:
self._validate_target(target, df.keys())
self.validate_df(df, column_types)
self._validate_input_types(column_types)

data = compress(save_df(df))
params = {
"projectKey": self.project_key,
"name": name,
"featureTypes": feature_types,
"featureTypes": column_types,
"target": target
}

Expand All @@ -90,49 +97,50 @@ def upload_df(
('file', data)
]

logging.info(f"Uploading dataset '{name}' to project '{self.project_key}': Done!")
print(f"Successfully uploaded dataset to project '{self.project_key}'")
return self.session.post("project/data/upload", data={}, files=files)

def upload_model_and_df(
self,
prediction_function: Callable[[pd.DataFrame], Iterable[Union[str, float, int]]],
prediction_task: str,
feature_names: List[str],
model_type: str,
df: pd.DataFrame,
feature_types: Dict[str, str],
target: str,
column_types: Dict[str, str],
feature_names: List[str] = None,
target: str = None,
model_name: str = None,
dataset_name: str = None,
classification_threshold: Optional[float] = 0.5,
classification_threshold: Optional[float] = None,
classification_labels: Optional[List[str]] = None,
) -> None:
self.upload_model(prediction_function,
prediction_task,
feature_names,
model_name,
classification_threshold,
classification_labels,
df)
self.upload_model(prediction_function=prediction_function,
model_type=model_type,
feature_names=feature_names or list(column_types.keys()),
name=model_name,
classification_threshold=classification_threshold,
classification_labels=classification_labels,
validate_df=df,
target=target)
self.upload_df(
df=df,
name=dataset_name,
feature_types=feature_types,
column_types=column_types,
target=target)

@staticmethod
def _validate_model_type(prediction_task):
if prediction_task not in {task.value for task in SupportedModelTypes}:
def _validate_model_type(model_type):
if model_type not in {task.value for task in SupportedModelTypes}:
raise ValueError(
f"Invalid prediction_task parameter: {prediction_task}. "
f"Invalid model_type parameter: {model_type}. "
+ f"Please choose one of {[task.value for task in SupportedModelTypes]}."
)

@staticmethod
def _validate_input_types(input_types):
if input_types and type(input_types) is dict:
if set(input_types.values()) > {column_type.value for column_type in SupportedColumnType}:
if not set(input_types.values()).issubset(set(column_type.value for column_type in SupportedColumnType)):
raise ValueError(
f"Invalid input_types parameter: {input_types}. "
f"Invalid input_types parameter: "
+ f"Please choose types among {[column_type.value for column_type in SupportedColumnType]}."
)
else:
Expand All @@ -148,16 +156,62 @@ def _validate_prediction_function(prediction_function):
)

@staticmethod
def _validate_classification_threshold(classification_threshold):
def _validate_target(target, dataframe_keys):
if target is not None and target not in dataframe_keys:
raise ValueError(
f"Invalid target parameter: "
f" Select the target from the column names of the dataset: {dataframe_keys}")

@staticmethod
def _validate_features(feature_names=None, column_types=None, validate_df=None):
if feature_names is not None:
if not isinstance(feature_names, list):
raise ValueError(
f"Invalid feature_names parameter. Please provide the feature names as a list."
)
if validate_df is not None:
if not set(feature_names).issubset(set(validate_df.columns)):
missing_columns = set(feature_names) - set(validate_df.columns)
raise ValueError(
f"Value mentioned in feature_names is not available in validate_df: {missing_columns} ")

if column_types is not None and not isinstance(column_types, dict):
raise ValueError(
f"Invalid column_types parameter. Please provide the feature names as a dictionary."
)

@staticmethod
def _validate_classification_threshold_label(classification_labels, classification_threshold=None):
if classification_labels is None:
raise ValueError(
f"Missing classification_labels parameter for classification model."
)
if classification_threshold is not None and not isinstance(classification_threshold, (int, float)):
raise ValueError(
f"Invalid classification_threshold parameter: {classification_threshold}. Please specify valid number."
)

if classification_threshold is not None:
if classification_threshold != 0.5:
if len(classification_labels) != 2:
raise ValueError(
f"Invalid classification_threshold parameter: {classification_threshold} value is applicable "
f"only for binary classification. "
)

@staticmethod
def _validate_label_with_target(classification_labels, target_values=None):
if target_values is not None:
if set(target_values) != set(classification_labels):
raise ValueError(
f"Invalid classification_labels parameter: {classification_labels} do not match with"
f" target column values{target_values}."
)

@staticmethod
def _validate_classification_labels(classification_labels, prediction_task):
def _validate_classification_labels(classification_labels, model_type):
res = None
if prediction_task == SupportedModelTypes.CLASSIFICATION.value:
if model_type == SupportedModelTypes.CLASSIFICATION.value:
if (
classification_labels is not None
and hasattr(classification_labels, "__iter__")
Expand All @@ -167,30 +221,48 @@ def _validate_classification_labels(classification_labels, prediction_task):
res: Optional[List[str]] = [str(label) for label in classification_labels]
else:
raise ValueError(
f"Invalid classification_labels parameter: {classification_labels}. Please specify more than 1 label."
f"Invalid classification_labels parameter: {classification_labels}. "
f"Please specify more than 1 label."
)
else:
raise ValueError(
f"Invalid classification_labels parameter: {classification_labels}. Please specify valid list of strings."
f"Invalid classification_labels parameter: {classification_labels}. "
f"Please specify valid list of strings."
)
if model_type == SupportedModelTypes.REGRESSION.value and classification_labels is not None:
warnings.warn("'classification_labels' parameter is ignored for regression model")
res = None
return res

@staticmethod
def _validate_model_execution(prediction_function, df: pd.DataFrame) -> None:
def _validate_model_execution(prediction_function, df: pd.DataFrame, model_type, classification_labels) -> None:
prediction = prediction_function(df)
if not isinstance(prediction, np.ndarray):
raise ValueError("Model should return numpy array")
if isinstance(prediction, np.ndarray) or isinstance(prediction, list):
if model_type == SupportedModelTypes.CLASSIFICATION.value:
if not any(isinstance(y, float) for x in prediction for y in x):
raise ValueError("Model prediction should return float values ")
if model_type == SupportedModelTypes.REGRESSION.value:
if not any(isinstance(x, float) for x in prediction):
raise ValueError("Model prediction should return float values ")
else:
raise ValueError("Model should return numpy array or a list")

GiskardProject._validate_classification_prediction(classification_labels, model_type, prediction)

@staticmethod
def _validate_classification_prediction(classification_labels, model_type, prediction):
if model_type == SupportedModelTypes.CLASSIFICATION.value:
if np.all(np.round(np.sum(prediction, axis=1), 2) != 1):
raise ValueError("Invalid Classification Model prediction. Sum of all probabilities should be 1 ")
if prediction.shape[1] != len(classification_labels):
raise ValueError("Prediction output label shape and classification_labels shape do not match")

@staticmethod
def validate_df(df: pd.DataFrame, input_types) -> pd.DataFrame:
if set(input_types.values()) < set(df.columns):
missing_columns = set(df.columns) - set(input_types.values())
raise ValueError(f"Missing input_types for columns: {missing_columns}")
elif set(input_types.values()) > set(df.columns):
missing_columns = set(input_types.values()) - set(df.columns)
raise ValueError(
f"Missing columns in dataframe according to input_types: {missing_columns}"
)
if not set(input_types.keys()).issubset(set(df.columns)):
missing_columns = set(input_types.keys()) - set(df.columns)
raise ValueError(f"Value mentioned in column_types is not available in dataframe: {missing_columns} ")

else:
pandas_inferred_input_types = df.dtypes.to_dict()
for column, dtype in pandas_inferred_input_types.items():
Expand Down
37 changes: 30 additions & 7 deletions tests/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
from io import BytesIO

import pytest
import httpretty
import numpy as np
import pandas as pd
Expand All @@ -18,11 +18,26 @@ def test_upload_df(diabetes_dataset):
httpretty.POST,
"http://giskard-host:12345/api/v2/project/data/upload"
)

df, input_types, target = diabetes_dataset
dataset_name = "diabetes dataset"
client = GiskardClient("http://giskard-host:12345", "SECRET_TOKEN")
project = GiskardProject(client.session, "test-project")
project.upload_df(df, input_types, target, name="diabetes dataset")

with pytest.raises(Exception): # Error Scenario
project.upload_df(
df=df,
column_types=input_types,
target=target,
name=dataset_name)
with pytest.raises(Exception): # Error Scenario
project.upload_df(df=df,
column_types={"test":"test"},
name=dataset_name)

project.upload_df(df=df,
column_types=input_types,
name=dataset_name)

req = httpretty.last_request()
assert req.headers.get('Authorization') == 'Bearer SECRET_TOKEN'
assert int(req.headers.get('Content-Length')) > 0
Expand All @@ -47,12 +62,20 @@ def test_upload_model(linear_regression_diabetes: GiskardModel, diabetes_dataset
client = GiskardClient("http://giskard-host:12345", "SECRET_TOKEN")
project = GiskardProject(client.session, "test-project")
project.upload_model(
model.prediction_function,
model.model_type,
model.feature_names,
"uploaded model",
prediction_function=model.prediction_function,
model_type=model.model_type,
feature_names=model.feature_names,
name="uploaded model",
validate_df=df
)
with pytest.raises(Exception):
project.upload_model(
prediction_function=model.prediction_function,
model_type=model.model_type,
feature_names=input_types,
name="uploaded model",
validate_df=df
)
req = httpretty.last_request()
assert req.headers.get('Authorization') == 'Bearer SECRET_TOKEN'
assert int(req.headers.get('Content-Length')) > 0
Expand Down

0 comments on commit 54b504b

Please sign in to comment.