diff --git a/autotests/helm/values.yaml b/autotests/helm/values.yaml index cda6a5e..06aa820 100644 --- a/autotests/helm/values.yaml +++ b/autotests/helm/values.yaml @@ -25,8 +25,8 @@ global: activeDeadlineSeconds: 3600 # 1h env: - PARTICIPANT_NAME: - api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/ + PARTICIPANT_NAME: ddnovikov + api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process # K6, do not edit! K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write diff --git a/solution/Dockerfile b/solution/Dockerfile new file mode 100644 index 0000000..06c552b --- /dev/null +++ b/solution/Dockerfile @@ -0,0 +1,22 @@ +# syntax = docker/dockerfile:1.4 + +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + git \ + git-lfs \ + jq && \ + git lfs install && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /code +COPY ./requirements.txt /code/requirements.txt +RUN pip install -r /code/requirements.txt +COPY app.py /code +COPY cmd.sh . + +ENV TRANSFORMERS_CACHE=/models \ + TOKENIZERS_PARALLELISM=false + +CMD ["./cmd.sh"] \ No newline at end of file diff --git a/solution/app.py b/solution/app.py new file mode 100644 index 0000000..d4f4466 --- /dev/null +++ b/solution/app.py @@ -0,0 +1,87 @@ +import asyncio + +import torch + +from fastapi import FastAPI +from transformers import AutoTokenizer, AutoModelForSequenceClassification +from starlette.requests import Request +from optimum.onnxruntime import ORTModelForSequenceClassification + +app = FastAPI() + +MODELS = { + "cardiffnlp": "/models/twitter-xlm-roberta-base-sentiment/", + "ivanlau": "/models/language-detection-fine-tuned-on-xlm-roberta-base/", + "svalabs": "/models/twitter-xlm-roberta-crypto-spam/", + "EIStakovskii": "/models/xlm_roberta_base_multilingual_toxicity_classifier_plus/", + "jy46604790": "/models/Fake-News-Bert-Detect/" +} + + +async def model_inference_task(model_name: str, q: asyncio.Queue): + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = ORTModelForSequenceClassification.from_pretrained(model_name, provider="CUDAExecutionProvider") + model.to(device="cuda:0") + + while True: + strings, queues = [], [] + while True: + try: + (string, rq) = await asyncio.wait_for(q.get(), timeout=0.025) + except asyncio.exceptions.TimeoutError: + break + strings.append(string) + queues.append(rq) + if len(strings) == 8: + break + if not strings: + continue + + encoded_input = tokenizer( + strings, + padding='longest', + truncation=True, + return_token_type_ids=True, + return_tensors='pt' + ).to(device="cuda:0") + logits = model(**encoded_input).logits + + id2label = model.config.id2label + label_ids = logits.argmax(dim=1) + scores = logits.softmax(dim=-1) + outs = [ + { + "label": id2label[label_id.item()], + "score": score[label_id.item()].item() + } + for label_id, score in zip(label_ids, scores) + ] + + for rq, out in zip(queues, outs): + await rq.put(out) + + +@app.on_event("startup") +async def startup_event(): + app.model_queues = {} + for model_key, model_name in MODELS.items(): + q = asyncio.Queue() + app.model_queues[model_key] = q + asyncio.create_task(model_inference_task(model_name, q)) + + +@app.post("/process") +async def process(request: Request): + text = (await request.body()).decode() + if not text: + return {} + + result = {k: None for k in MODELS.keys()} + for model_key, model_q in request.app.model_queues.items(): + response_q = asyncio.Queue() + await model_q.put((text, response_q)) + result[model_key] = await response_q.get() + if model_key == "cardiffnlp": + result[model_key]["label"] = result[model_key]["label"].upper() + + return result diff --git a/solution/cmd.sh b/solution/cmd.sh new file mode 100755 index 0000000..cc887c4 --- /dev/null +++ b/solution/cmd.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e +MODELS_DIR='/models' + +if [ -d "$MODELS_DIR" ] +then + echo 'cloning repos...' + git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment /models/twitter-xlm-roberta-base-sentiment + git clone https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base /models/language-detection-fine-tuned-on-xlm-roberta-base + cd /models/language-detection-fine-tuned-on-xlm-roberta-base + tmp=$(mktemp) && jq --arg skipLibCheck true ' .label2id |= with_entries(.value |= tonumber) ' config.json > "$tmp" && mv "$tmp" config.json + git clone https://huggingface.co/svalabs/twitter-xlm-roberta-crypto-spam /models/twitter-xlm-roberta-crypto-spam + git clone https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus /models/xlm_roberta_base_multilingual_toxicity_classifier_plus + cd /models/xlm_roberta_base_multilingual_toxicity_classifier_plus + tmp=$(mktemp) && jq --arg skipLibCheck true '. += { id2label : { "0": "LABEL_0", "1": "LABEL_1" } }' config.json > "$tmp" && mv "$tmp" config.json + git clone https://huggingface.co/jy46604790/Fake-News-Bert-Detect /models/Fake-News-Bert-Detect + cd /models/Fake-News-Bert-Detect + tmp=$(mktemp) && jq --arg skipLibCheck true '. += { id2label : { "0": "LABEL_0", "1": "LABEL_1" } }' config.json > "$tmp" && mv "$tmp" config.json + cd + echo 'done' + + model_files=( + "/models/twitter-xlm-roberta-base-sentiment/" + "/models/language-detection-fine-tuned-on-xlm-roberta-base/" + "/models/twitter-xlm-roberta-crypto-spam/" + "/models/xlm_roberta_base_multilingual_toxicity_classifier_plus/" + "/models/Fake-News-Bert-Detect/" + ) + + for file_path in "${model_files[@]}" + do + rm -f "$file_path/tf_model.h5" + optimum-cli export onnx -m "$file_path" --device cuda --framework pt --optimize O4 --task text-classification "$file_path" + done +else + echo "directory $MODELS_DIR is not mounted, exiting" + exit +fi + +echo 'all done, starting the server' + +cd /code +uvicorn app:app --host 0.0.0.0 --port 8080 \ No newline at end of file diff --git a/solution/docker-compose.yml b/solution/docker-compose.yml new file mode 100644 index 0000000..195b5a5 --- /dev/null +++ b/solution/docker-compose.yml @@ -0,0 +1,27 @@ +version: "3.9" + +services: + web: + build: . + image: infra_challenge + volumes: + - models:/models + ports: + - "8080:8080" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + +networks: + default: + driver: bridge + ipam: + config: + - subnet: 172.16.57.0/24 + +volumes: + models: \ No newline at end of file diff --git a/solution/helm/envs/ddnovikov.yaml b/solution/helm/envs/ddnovikov.yaml new file mode 100644 index 0000000..6751204 --- /dev/null +++ b/solution/helm/envs/ddnovikov.yaml @@ -0,0 +1,11 @@ +global: + resources: + limits: + nvidia.com/gpu: 1 + pod: + ports: + - name: http + containerPort: 8080 + protocol: TCP + service: + targetPort: 8080 diff --git a/solution/helm/envs/example.yaml b/solution/helm/envs/example.yaml deleted file mode 100644 index 4ef5b0c..0000000 --- a/solution/helm/envs/example.yaml +++ /dev/null @@ -1,30 +0,0 @@ -global: - # add any variables you need in format `key: value` - # variables will be available in the container as environment variables - env: - EXAMPLE: "example" - - # change 8000 to your application target port - pod: - ports: - - name: http - containerPort: 8000 - protocol: TCP - service: - targetPort: 8000 - - # add any configmap data you need - # configmaps will be mounted to /workspace/ - config: - mount: - path: /workspace - # Map of configmap entries. Entries might be of types: string, map - data: - conf1.yaml: - key1: - key11: value11 - key12: value12 - key2: value2 - conf2.yaml: - key1: value1 - key2: value2 diff --git a/solution/requirements.txt b/solution/requirements.txt new file mode 100644 index 0000000..af36117 --- /dev/null +++ b/solution/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.89.1 +protobuf==3.20.1 +pydantic==1.10.7 +sentencepiece==0.1.99 +tokenizers==0.13.3 +transformers==4.29.2 +uvicorn==0.20.0 +optimum[onnxruntime-gpu]==1.8.6 +onnxruntime-gpu==1.15.0