-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
giannibalistreri
committed
Dec 30, 2023
1 parent
91fbbda
commit b7a9c4f
Showing
16 changed files
with
735 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
## Setup Terraform on AWS | ||
|
||
1. Create IAM Role: | ||
|
||
Terraform needs full access to all required AWS services. Therefore, create a IAM role that can be inferred by Terraform: | ||
- TerraformServiceRole: | ||
- Permission Policies: AdministratorAccess | ||
- Trust Relationship: see json file "terraform_iam_role_trust_relationship.json" | ||
|
||
2. Create User: | ||
|
||
The created IAM role needs to be attached to a user who is member of a user group. | ||
- User Group: CICD | ||
- User: Gitlab | ||
- Permission Policies: gitlab_permissions (see json file "gitlab_permissions.json") | ||
|
||
3. Create TF-State S3 Bucket: | ||
|
||
In order to make the Terraform deployment available for an organisation the Terraform state file must be persisted in a accessable S3 bucket. | ||
- S3 Bucket: xxx-ml-ops-tfstate-production | ||
|
||
|
||
## Prerequisites of Kubeflow Deployment | ||
|
||
1. Provision Domain: | ||
|
||
In order to make Kubeflow available via the internet a public domain is needed. AWS offers a service called Route53 which can be used for provision domain. | ||
- Route53 > Hosted zones > Create hosted zone | ||
|
||
2. Setup Gitlab-CI: | ||
|
||
The following configurations must be made so that Teraform can be executed via CI/CD: | ||
- Create Gitlab Variables: Settings > CI/CD > Variables | ||
- AWS_ACCESS_KEY_ID (mask variable, expand variable reference) | ||
- AWS_ACCOUNT_ID (mask variable, expand variable reference) | ||
- AWS_SECRET_ACCESS_KEY (mask variable, expand variable reference) | ||
- AWS_REGION (mask variable, expand variable reference) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
""" | ||
Customized model inference predictor: PyTorch | ||
""" | ||
|
||
import argparse | ||
import numpy as np | ||
import torch | ||
|
||
from kserve import Model, ModelServer | ||
from torchvision import models | ||
from typing import Dict | ||
|
||
|
||
class AlexNetModel(Model): | ||
def __init__(self, name: str): | ||
super().__init__(name) | ||
self.name = name | ||
self.model = None | ||
self.load() | ||
self.ready = True | ||
|
||
def load(self): | ||
self.model = models.alexnet(pretrained=True) | ||
self.model.eval() | ||
|
||
def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict: | ||
img_data = payload["instances"][0]["image"]["b64"] | ||
raw_img_data = base64.b64decode(img_data) | ||
input_image = Image.open(io.BytesIO(raw_img_data)) | ||
preprocess = transforms.Compose([ | ||
transforms.Resize(256), | ||
transforms.CenterCrop(224), | ||
transforms.ToTensor(), | ||
transforms.Normalize(mean=[0.485, 0.456, 0.406], | ||
std=[0.229, 0.224, 0.225]), | ||
]) | ||
input_tensor = preprocess(input_image).unsqueeze(0) | ||
output = self.model(input_tensor) | ||
torch.nn.functional.softmax(output, dim=1) | ||
values, top_5 = torch.topk(output, 5) | ||
result = values.flatten().tolist() | ||
response_id = generate_uuid() | ||
return {"predictions": result} | ||
|
||
|
||
if __name__ == "__main__": | ||
model = AlexNetModel("custom-model") | ||
ModelServer().start([model]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import requests | ||
import os | ||
import json | ||
|
||
from e2e.utils.utils import load_json_file | ||
|
||
|
||
def run_inference_sample(): | ||
# common vars | ||
KUBEFLOW_DOMAIN = os.environ.get("KUBEFLOW_DOMAIN", "kubeflow.example.com") | ||
PROFILE_NAMESPACE = os.environ.get("PROFILE_NAMESPACE", "kubeflow-user-example-com") | ||
MODEL_NAME = os.environ.get("MODEL_NAME", "sklearn-iris") | ||
AUTH_PROVIDER = os.environ.get("AUTH_PROVIDER", "dex") | ||
|
||
URL = f"https://{MODEL_NAME}.{PROFILE_NAMESPACE}.{KUBEFLOW_DOMAIN}/v1/models/{MODEL_NAME}:predict" | ||
HEADERS = {"Host": f"{MODEL_NAME}.{PROFILE_NAMESPACE}.{KUBEFLOW_DOMAIN}"} | ||
DASHBOARD_URL = f"https://kubeflow.{KUBEFLOW_DOMAIN}" | ||
data = load_json_file("./utils/kserve/iris-input.json") | ||
response = None | ||
if AUTH_PROVIDER != "cognito": | ||
PROFILE_USERNAME = os.environ.get("PROFILE_USERNAME", "[email protected]") | ||
PASSWORD = os.environ.get("PASSWORD", "12341234") | ||
|
||
def session_cookie(host, login, password): | ||
session = requests.Session() | ||
response = session.get(host) | ||
headers = { | ||
"Content-Type": "application/x-www-form-urlencoded", | ||
} | ||
data = {"login": login, "password": password} | ||
session.post(response.url, headers=headers, data=data) | ||
session_cookie = session.cookies.get_dict()["authservice_session"] | ||
return session_cookie | ||
|
||
cookie = { | ||
"authservice_session": session_cookie( | ||
DASHBOARD_URL, PROFILE_USERNAME, PASSWORD | ||
) | ||
} | ||
response = requests.post(URL, headers=HEADERS, json=data, cookies=cookie) | ||
else: | ||
HTTP_HEADER_NAME = os.environ.get("HTTP_HEADER_NAME", "x-api-key") | ||
HTTP_HEADER_VALUE = os.environ.get("HTTP_HEADER_VALUE", "token1") | ||
HEADERS[HTTP_HEADER_NAME] = HTTP_HEADER_VALUE | ||
|
||
response = requests.post(URL, headers=HEADERS, json=data) | ||
|
||
status_code = response.status_code | ||
print("Status Code", status_code) | ||
if status_code == 200: | ||
print("JSON Response ", json.dumps(response.json(), indent=2)) | ||
|
||
else: | ||
raise Exception("prediction failed, status code = ") | ||
|
||
|
||
if __name__ == "__main__": | ||
run_inference_sample() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ | ||
"instances": [ | ||
[ | ||
6.8, | ||
2.8, | ||
4.8, | ||
1.4 | ||
], | ||
[ | ||
6.0, | ||
3.4, | ||
4.5, | ||
1.6 | ||
] | ||
] | ||
} |
17 changes: 17 additions & 0 deletions
17
kubeflow_templates/kfserve/predictor/custom_predictor_sklearn/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
FROM python:3.9-alpine | ||
|
||
RUN mkdir "/src" | ||
|
||
COPY kserve/predictor/custom_predictor_sklearn/src /src | ||
COPY kserve/predictor/custom_predictor_sklearn/pyproject.toml /src | ||
|
||
WORKDIR /src | ||
|
||
RUN python -m pip install --upgrade pip | ||
RUN python -m pip install setuptools | ||
RUN python -m pip install poetry | ||
|
||
RUN poetry config virtualenvs.create false | ||
RUN poetry install --no-dev | ||
|
||
ENTRYPOINT ["python", "task.py"] |
9 changes: 9 additions & 0 deletions
9
kubeflow_templates/kfserve/predictor/custom_predictor_sklearn/docker_build.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
export IMAGE_NAME=xxx | ||
export IMAGE_TAG=xxx | ||
export AWS_ACCOUNT_ID=xxx | ||
export AWS_ACCOUNT_REGION=xxx | ||
|
||
aws ecr get-login-password --region $AWS_ACCOUNT_REGION | sudo docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_ACCOUNT_REGION.amazonaws.com | ||
sudo docker build -t $IMAGE_NAME . | ||
sudo docker tag $IMAGE_NAME:$IMAGE_TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_ACCOUNT_REGION.amazonaws.com/$IMAGE_NAME:$IMAGE_TAG | ||
sudo docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_ACCOUNT_REGION.amazonaws.com/$IMAGE_NAME:$IMAGE_TAG |
20 changes: 20 additions & 0 deletions
20
kubeflow_templates/kfserve/predictor/custom_predictor_sklearn/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
[tool.poetry] | ||
name = "predictor_sklearn" | ||
version = "0.1.0" | ||
description = "Predictor of endpoint inference pipeline" | ||
authors = ["gianni <[email protected]>"] | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.9" | ||
catboost = "^1.2.2" | ||
pygam = "^0.9.0" | ||
s3fs = "^2023.9.2" | ||
scikit-learn = "^1.3.2" | ||
xgboost = "^2.0.2" | ||
|
||
[tool.poetry.dev-dependencies] | ||
pytest = "^5.2" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
9 changes: 9 additions & 0 deletions
9
...plates/kfserve/predictor/custom_predictor_sklearn/rest_custom_serving_runtime_kserve.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: custom-model | ||
spec: | ||
predictor: | ||
containers: | ||
- name: kserve-container | ||
image: ${DOCKER_USER}/custom-model:v1 |
58 changes: 58 additions & 0 deletions
58
kubeflow_templates/kfserve/predictor/custom_predictor_sklearn/src/task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
""" | ||
Customized model inference predictor: Sklearn API (Non-Neural Networks) | ||
""" | ||
|
||
import argparse | ||
import numpy as np | ||
import torch | ||
|
||
from kserve import Model, ModelServer | ||
from torchvision import models | ||
from typing import Dict | ||
|
||
|
||
class SupervisedMLPredictor(Model): | ||
""" | ||
Class for generating predictions used in inference endpoints of KServe | ||
""" | ||
def __init__(self, name: str): | ||
super().__init__(name) | ||
self.name: str = name | ||
self.model = None | ||
self.load() | ||
|
||
def load(self): | ||
self.model = models.alexnet(pretrained=True) | ||
self.model.eval() | ||
|
||
def preprocess(self, inputs: Dict, headers: Dict[str, str] = None) -> Dict: | ||
return {'instances': [image_transform(instance) for instance in inputs['instances']]} | ||
|
||
def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict: | ||
img_data = payload["instances"][0]["image"]["b64"] | ||
raw_img_data = base64.b64decode(img_data) | ||
input_image = Image.open(io.BytesIO(raw_img_data)) | ||
preprocess = transforms.Compose([ | ||
transforms.Resize(256), | ||
transforms.CenterCrop(224), | ||
transforms.ToTensor(), | ||
transforms.Normalize(mean=[0.485, 0.456, 0.406], | ||
std=[0.229, 0.224, 0.225]), | ||
]) | ||
input_tensor = preprocess(input_image).unsqueeze(0) | ||
output = self.model(input_tensor) | ||
torch.nn.functional.softmax(output, dim=1) | ||
values, top_5 = torch.topk(output, 5) | ||
result = values.flatten().tolist() | ||
response_id = generate_uuid() | ||
return {"predictions": result} | ||
|
||
def postprocess(self, inputs: Dict, headers: Dict[str, str] = None) -> Dict: | ||
return inputs | ||
|
||
|
||
if __name__ == "__main__": | ||
model = Predictor("custom-model") | ||
ModelServer().start([model]) |
104 changes: 104 additions & 0 deletions
104
kubeflow_templates/kubeflow_management/katib/kubeflow_katib_management.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
""" | ||
Management of katib setup | ||
""" | ||
|
||
import argparse | ||
import subprocess | ||
|
||
from typing import List | ||
|
||
|
||
PARSER = argparse.ArgumentParser(description="manage kubeflow hyperparameter tuning service called katib") | ||
PARSER.add_argument('-aws_account_id', type=str, required=True, default=None, help='AWS account id') | ||
PARSER.add_argument('-service_account_name', type=str, required=False, default='default-editor', help='name of the service account') | ||
PARSER.add_argument('-aws_region', type=str, required=False, default='eu-central-1', help='AWS region code') | ||
PARSER.add_argument('-cluster_name', type=str, required=False, default='kubeflow', help='name of the EKS cluster') | ||
ARGS = PARSER.parse_args() | ||
|
||
|
||
class KubeflowKatibManagementException(Exception): | ||
""" | ||
Class for handling exceptions from class KubeflowKatibManagement | ||
""" | ||
pass | ||
|
||
|
||
class KubeflowKatibManagement: | ||
""" | ||
Class for handling Kubeflow katib management | ||
""" | ||
def __init__(self, | ||
aws_account_id: str, | ||
service_account_name: str, | ||
aws_region: str = 'eu-central-1', | ||
cluster_name: str = 'kubeflow' | ||
): | ||
""" | ||
:param aws_account_id: str | ||
AWS Account ID | ||
:param service_account_name: str | ||
Name of the service account name | ||
:param aws_region: str | ||
AWS region | ||
:param cluster_name: str | ||
Name of the EKS cluster | ||
""" | ||
self.service_account_name: str = service_account_name | ||
self.cluster_name: str = cluster_name | ||
self.aws_region: str = aws_region | ||
self.aws_account_id: str = aws_account_id | ||
self._login_to_eks_cluster() | ||
|
||
def _adjust_katib_config(self) -> str: | ||
""" | ||
Adjust Katib config yaml | ||
:return: str | ||
Adjusted katib config yaml file | ||
""" | ||
_cmd: str = f"kubectl get configMap katib-config -n {self.cluster_name} -o yaml > katib_config.yaml" | ||
subprocess.run(_cmd, shell=True, capture_output=False, text=True) | ||
with open('katib_config.yaml', 'r') as file: | ||
_katib_config_yaml = file.read() | ||
_config_yaml: List[str] = ['apiVersion: v1', 'data:'] | ||
_found_area: bool = False | ||
for line in _katib_config_yaml.split('\n'): | ||
if line.find('suggestion: |-') >= 0: | ||
_found_area = True | ||
if line.find('kind: ConfigMap') >= 0: | ||
_found_area = False | ||
_config_yaml.append(line) | ||
if _found_area: | ||
if line.find('"image":') >= 0: | ||
_config_yaml.append(f' "serviceAccountName": "{self.service_account_name}"') | ||
return "\n".join(_config_yaml) | ||
|
||
def _login_to_eks_cluster(self) -> None: | ||
""" | ||
Login to running EKS cluster | ||
""" | ||
_cmd: str = f"aws eks --region {self.aws_region} update-kubeconfig --name {self.cluster_name}" | ||
subprocess.run(_cmd, shell=True, capture_output=False, text=True) | ||
|
||
def enable_katib(self) -> None: | ||
""" | ||
Enable katib by accessing S3 AWS services | ||
""" | ||
_katib_config_yaml: str = self._adjust_katib_config() | ||
with open('new_katib_config.yaml', 'w') as file: | ||
file.write(_katib_config_yaml) | ||
subprocess.run('kubectl apply -f new_katib_config.yaml', shell=True, capture_output=False, text=True) | ||
|
||
|
||
if __name__ == '__main__': | ||
_kubeflow_katib_management: KubeflowKatibManagement = KubeflowKatibManagement(aws_account_id=ARGS.aws_account_id, | ||
service_account_name=ARGS.service_account_name, | ||
aws_region=ARGS.aws_region, | ||
cluster_name=ARGS.cluster_name | ||
) | ||
_kubeflow_katib_management.enable_katib() |
Oops, something went wrong.