Skip to content
This repository has been archived by the owner on Jun 25, 2023. It is now read-only.

solution #5

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions autotests/app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM grafana/xk6 as xk6_builder
RUN xk6 build --output k6 --with github.com/szkiba/xk6-faker@latest \
--with github.com/grafana/xk6-output-prometheus-remote@latest


FROM grafana/k6

WORKDIR /app/
COPY src/main.js /app/
COPY --from=xk6_builder /xk6/k6 /usr/bin/k6

ENTRYPOINT k6 run -o xk6-prometheus-rw main.js
2 changes: 1 addition & 1 deletion autotests/app/src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,4 @@ export function post_radom() {
"model 5 score valid": res => res.json().jy46604790.score > 0,
"model 5 label valid": res => res.json().jy46604790.label === "LABEL_0" || res.json().jy46604790.label === "LABEL_1",
});
}
}
4 changes: 2 additions & 2 deletions autotests/helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ global:
activeDeadlineSeconds: 3600 # 1h

env:
PARTICIPANT_NAME: <REPLACE_WITH_USERNAME>
api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/<REPLACE_WITH_ENDPOINT>
PARTICIPANT_NAME: Khush Patel
api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process

# K6, do not edit!
K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write
Expand Down
23 changes: 23 additions & 0 deletions solution/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
pip-log.txt
pip-delete-this-directory.txt
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.gitignore
.mypy_cache
.pytest_cache
.hypothesis
.idea
11 changes: 11 additions & 0 deletions solution/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.10.11-slim-bullseye

WORKDIR /code

COPY ./requirements.txt /code/requirements.txt

COPY ./app.py /code/app.py

RUN pip install -r requirements.txt

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
181 changes: 181 additions & 0 deletions solution/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Import required libraries
from fastapi import FastAPI
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
from fastapi import Request, Body
# Initialize a FastAPI app instance
app = FastAPI()

# Define a default route
@app.get('/')
async def home():
html = (
'Mlops Challenge'
)
return html.format(format)


# Load the trained models
print("Model Loading Started...")

# CardiffNLP Model
cardiffnlp_MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
cardiffnlp_tokenizer = AutoTokenizer.from_pretrained(cardiffnlp_MODEL)
cardiffnlp_model = AutoModelForSequenceClassification.from_pretrained(cardiffnlp_MODEL)
print("cardiffnlp Model Loaded!")

# ivanlau Model
ivanlau_MODEL = f"ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
ivanlau_tokenizer = AutoTokenizer.from_pretrained(ivanlau_MODEL)
ivanlau_config = AutoConfig.from_pretrained(ivanlau_MODEL)
ivanlau_model = AutoModelForSequenceClassification.from_pretrained(ivanlau_MODEL)
print("ivanlau Model Loaded!")

# svalabs Model
svalabs_MODEL = f"svalabs/twitter-xlm-roberta-crypto-spam"
svalabs_tokenizer = AutoTokenizer.from_pretrained(svalabs_MODEL)
svalabs_model = AutoModelForSequenceClassification.from_pretrained(svalabs_MODEL)
print("svalabs Model Loaded!")

# EIStakovskii Model
EIStakovskii_MODEL = f"EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"
EIStakovskii_tokenizer = AutoTokenizer.from_pretrained(EIStakovskii_MODEL)
EIStakovskii_model = AutoModelForSequenceClassification.from_pretrained(EIStakovskii_MODEL)
print("EIStakovskii Model Loaded!")

# jy46604790 Model
jy46604790_MODEL = f"jy46604790/Fake-News-Bert-Detect"
jy46604790_tokenizer = AutoTokenizer.from_pretrained(jy46604790_MODEL)
jy46604790_model = AutoModelForSequenceClassification.from_pretrained(jy46604790_MODEL)
print("jy46604790 Model Loaded!")


def preprocess(text: str) -> str:
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)


# Define a function for each of the trained models to preprocess the input and return the prediction
def cardiffnlp_precessing(text: str) -> dict:
encoded_input = cardiffnlp_tokenizer(text, return_tensors='pt')
outputs = cardiffnlp_model(**encoded_input)
outputs = outputs.logits.detach().numpy()

scores = softmax(outputs)[0]

max_index = scores.argmax()

id2label = ["NEGATIVE","NEUTRAL","POSITIVE"]

l = id2label[max_index]
s = scores[max_index]
result = {
"score": float(s),
"label": l
}
return result

def ivanlau_precessing(text: str) -> dict:
encoded_input = ivanlau_tokenizer(text, return_tensors='pt')
outputs = ivanlau_model(**encoded_input)
outputs = outputs.logits.detach().numpy()

scores = softmax(outputs)[0]

max_index = scores.argmax()


l = ivanlau_config.id2label[max_index]
s = scores[max_index]
result = {
"score": float(s),
"label": l
}
return result

def svalabs_precessing(text: str) -> dict:
encoded_input = svalabs_tokenizer(text, return_tensors='pt')
outputs = svalabs_model(**encoded_input)
outputs = outputs.logits.detach().numpy()

scores = softmax(outputs)[0]

id2label = ["HAM","SPAM"]

max_index = scores.argmax()

l = id2label[max_index]
s = scores[max_index]
result = {
"score": float(s),
"label": l
}
return result

def EIStakovskii_precessing(text: str) -> dict:
encoded_input = EIStakovskii_tokenizer(text, return_tensors='pt')
outputs = EIStakovskii_model(**encoded_input)
outputs = outputs.logits.detach().numpy()

scores = softmax(outputs)[0]

max_index = scores.argmax()

id2label = ["LABEL_0","LABEL_1"]

l = id2label[max_index]
s = scores[max_index]
result = {
"score": float(s),
"label": l
}

return result

def jy46604790_precessing(text: str) -> dict:
encoded_input = jy46604790_tokenizer(text, return_tensors='pt')
outputs = jy46604790_model(**encoded_input)
outputs = outputs.logits.detach().numpy()

scores = softmax(outputs)[0]

max_index = scores.argmax()


id2label = ["LABEL_1","LABEL_0"]

l = id2label[max_index]
s = scores[max_index]
result = {
"score": float(s),
"label": l
}

return result

# Define a process route
@app.post("/process")
async def predict(request: Request):
# start = time.time()
text = preprocess((await request.body()).decode())
results_cardiffnlp = cardiffnlp_precessing(text)
results_ivanlau = ivanlau_precessing(text)
results_svalabs = svalabs_precessing(text)
results_EIStakovskii = EIStakovskii_precessing(text)
results_jy46604790 = jy46604790_precessing(text)
# end = time.time()
# total_time = end - start
# print(f"Total inference time with preprocessing: {total_time}")
result = {
"cardiffnlp": results_cardiffnlp,
"ivanlau":results_ivanlau,
"svalabs":results_svalabs,
"EIStakovskii":results_EIStakovskii,
"jy46604790":results_jy46604790,
}
return result
8 changes: 8 additions & 0 deletions solution/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
sentencepiece
transformers
torch
scipy
fastapi
uvicorn[standard]
pydantic
protobuf==3.13.0