From 0ab2b51138c3954e368d5db66406ec522c23e4ca Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Tue, 9 May 2023 18:31:51 +0700 Subject: [PATCH 01/13] WIP: first CPU-only working solution. --- solution/Dockerfile | 37 +++++++++++++++++++++++++++++ solution/app.py | 34 +++++++++++++++++++++++++++ solution/docker-compose.yml | 15 ++++++++++++ solution/requirements.txt | 46 +++++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+) create mode 100644 solution/Dockerfile create mode 100644 solution/app.py create mode 100644 solution/docker-compose.yml create mode 100644 solution/requirements.txt diff --git a/solution/Dockerfile b/solution/Dockerfile new file mode 100644 index 0000000..510ee60 --- /dev/null +++ b/solution/Dockerfile @@ -0,0 +1,37 @@ +# syntax = docker/dockerfile:1.4 + +# todo: add checking out to specific revision to avoid unexpected changes to model behaviour? +# todo: update requirements + +FROM python:3.10.11-slim-bullseye as models_storage_stage + +RUN --mount=type=cache,id=infra-challenge-apt,target=/var/cache/apt \ + apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + git \ + git-lfs && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /models + +RUN git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment && \ + git clone https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base && \ + git clone https://huggingface.co/svalabs/twitter-xlm-roberta-crypto-spam && \ + git clone https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus && \ + git clone https://huggingface.co/jy46604790/Fake-News-Bert-Detect + +FROM python:3.10.11-slim-bullseye as app + +WORKDIR /code +RUN mkdir -p /code/models +COPY --from=models_storage_stage /models /code/models + +COPY ./requirements.txt /code/requirements.txt + +RUN --mount=type=cache,id=infra-challenge-pip,target=/root/.cache \ + pip install -r /code/requirements.txt + +COPY app.py /code + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file diff --git a/solution/app.py b/solution/app.py new file mode 100644 index 0000000..6cc5dc2 --- /dev/null +++ b/solution/app.py @@ -0,0 +1,34 @@ +import time + +from fastapi import FastAPI +from transformers import pipeline +from starlette.requests import Request + +app = FastAPI() + +models = { + "cardiffnlp": {"path": "models/twitter-xlm-roberta-crypto-spam"}, + "ivanlau": {"path": "models/language-detection-fine-tuned-on-xlm-roberta-base"}, + "svalabs": {"path": "models/twitter-xlm-roberta-crypto-spam"}, + "EIStakovskii": {"path": "models/xlm_roberta_base_multilingual_toxicity_classifier_plus"}, + "jy46604790": {"path": "models/Fake-News-Bert-Detect"} +} + +for model in models.values(): + model["pipeline"] = pipeline('text-classification', model=model["path"]) + + +@app.post("/process") +async def process(request: Request): + text = str(await request.body()) + result = {k: None for k in models.keys()} + start_overall = time.time() + for model_key, model_value in models.items(): + print(f"Inference using model: {model}") + start = time.time() + result[model_key] = model_value["pipeline"](text)[0] + elapsed = time.time() - start + print(f"The result of the inference using {model_key} is {result}. It took {elapsed} seconds to compute.\n") + elapsed_overall = time.time() - start_overall + print(f"ELAPSED OVERALL: {elapsed_overall}") + return result \ No newline at end of file diff --git a/solution/docker-compose.yml b/solution/docker-compose.yml new file mode 100644 index 0000000..76211d5 --- /dev/null +++ b/solution/docker-compose.yml @@ -0,0 +1,15 @@ +version: "3.9" + +services: + web: + build: . + image: app + ports: + - "8080:8080" + +networks: + default: + driver: bridge + ipam: + config: + - subnet: 172.16.57.0/24 \ No newline at end of file diff --git a/solution/requirements.txt b/solution/requirements.txt new file mode 100644 index 0000000..67465ea --- /dev/null +++ b/solution/requirements.txt @@ -0,0 +1,46 @@ +anyio==3.6.2 +certifi==2023.5.7 +charset-normalizer==3.1.0 +click==8.1.3 +cmake==3.26.3 +fastapi==0.89.1 +filelock==3.12.0 +fsspec==2023.5.0 +h11==0.14.0 +huggingface-hub==0.14.1 +idna==3.4 +Jinja2==3.1.2 +lit==16.0.3 +MarkupSafe==2.1.2 +mpmath==1.3.0 +networkx==3.1 +numpy==1.24.3 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cufft-cu11==10.9.0.58 +nvidia-curand-cu11==10.2.10.91 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-nccl-cu11==2.14.3 +nvidia-nvtx-cu11==11.7.91 +packaging==23.1 +protobuf==3.18.0 +pydantic==1.10.7 +PyYAML==6.0 +regex==2023.5.5 +requests==2.30.0 +sentencepiece==0.1.99 +sniffio==1.3.0 +starlette==0.22.0 +sympy==1.11.1 +tokenizers==0.13.3 +torch==2.0.0 +tqdm==4.65.0 +transformers==4.28.1 +triton==2.0.0 +typing_extensions==4.5.0 +urllib3==2.0.2 +uvicorn==0.20.0 \ No newline at end of file From 3e415cc80779eb04807e3d62ad5466536e456767 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Tue, 9 May 2023 21:32:20 +0700 Subject: [PATCH 02/13] WIP: first working GPU solution + bugfix with first model. Somehow works under k6's load but obviously optimizations are required. --- autotests/app/k6.Dockerfile | 5 ++--- solution/Dockerfile | 8 ++------ solution/app.py | 32 +++++++++++++++++++++++++------- solution/docker-compose.yml | 7 +++++++ 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/autotests/app/k6.Dockerfile b/autotests/app/k6.Dockerfile index b57391c..07cfb39 100644 --- a/autotests/app/k6.Dockerfile +++ b/autotests/app/k6.Dockerfile @@ -2,11 +2,10 @@ FROM grafana/xk6 as xk6_builder RUN xk6 build --output k6 --with github.com/szkiba/xk6-faker@latest \ --with github.com/grafana/xk6-output-prometheus-remote@latest - FROM grafana/k6 WORKDIR /app/ COPY src/main.js /app/ -COPY --from=xk6_builder /xk6/k6 /usr/bin/k6 +COPY --from=xk6_builder /xk6/k6 /usr/bin/k6 -ENTRYPOINT k6 run -o xk6-prometheus-rw main.js \ No newline at end of file +ENTRYPOINT k6 run -e PARTICIPANT_NAME=ddnovikov -e api_host=http://66.42.45.249:8080/process main.js \ No newline at end of file diff --git a/solution/Dockerfile b/solution/Dockerfile index 510ee60..f50501f 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,17 +1,14 @@ # syntax = docker/dockerfile:1.4 # todo: add checking out to specific revision to avoid unexpected changes to model behaviour? -# todo: update requirements FROM python:3.10.11-slim-bullseye as models_storage_stage -RUN --mount=type=cache,id=infra-challenge-apt,target=/var/cache/apt \ +RUN --mount=type=cache,id=storage-apt,target=/var/cache/apt \ apt-get update --yes && \ apt-get install --yes --no-install-recommends \ git \ - git-lfs && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + git-lfs WORKDIR /models @@ -28,7 +25,6 @@ RUN mkdir -p /code/models COPY --from=models_storage_stage /models /code/models COPY ./requirements.txt /code/requirements.txt - RUN --mount=type=cache,id=infra-challenge-pip,target=/root/.cache \ pip install -r /code/requirements.txt diff --git a/solution/app.py b/solution/app.py index 6cc5dc2..a8be460 100644 --- a/solution/app.py +++ b/solution/app.py @@ -7,7 +7,7 @@ app = FastAPI() models = { - "cardiffnlp": {"path": "models/twitter-xlm-roberta-crypto-spam"}, + "cardiffnlp": {"path": "models/twitter-xlm-roberta-base-sentiment"}, "ivanlau": {"path": "models/language-detection-fine-tuned-on-xlm-roberta-base"}, "svalabs": {"path": "models/twitter-xlm-roberta-crypto-spam"}, "EIStakovskii": {"path": "models/xlm_roberta_base_multilingual_toxicity_classifier_plus"}, @@ -15,20 +15,38 @@ } for model in models.values(): - model["pipeline"] = pipeline('text-classification', model=model["path"]) + model["pipeline"] = pipeline('text-classification', model=model["path"], device=0) +# todo: put each model into separate process and execute inference asynchronously @app.post("/process") async def process(request: Request): - text = str(await request.body()) + text = (await request.body()).decode() + if not text: + return {} result = {k: None for k in models.keys()} - start_overall = time.time() + # start_overall = time.time() for model_key, model_value in models.items(): print(f"Inference using model: {model}") start = time.time() result[model_key] = model_value["pipeline"](text)[0] elapsed = time.time() - start print(f"The result of the inference using {model_key} is {result}. It took {elapsed} seconds to compute.\n") - elapsed_overall = time.time() - start_overall - print(f"ELAPSED OVERALL: {elapsed_overall}") - return result \ No newline at end of file + # elapsed_overall = time.time() - start_overall + # print(f"ELAPSED OVERALL: {elapsed_overall}") + return result + + +# async def server_loop(q): +# pipe = pipeline(model="") +# while True: +# (string, response_q) = await q.get() +# out = pipe(string) +# await response_q.put(out) + + +# @app.on_event("startup") +# async def startup_event(): +# q = asyncio.Queue() +# app.model_queue = q +# asyncio.create_task(server_loop(q)) \ No newline at end of file diff --git a/solution/docker-compose.yml b/solution/docker-compose.yml index 76211d5..9749e85 100644 --- a/solution/docker-compose.yml +++ b/solution/docker-compose.yml @@ -6,6 +6,13 @@ services: image: app ports: - "8080:8080" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] networks: default: From 07207ea8e22a2ec72bd8a1ed239d8b50527dba80 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 02:33:29 +0700 Subject: [PATCH 03/13] WIP: attempt at async computations, doesn't seem very successful. --- solution/app.py | 73 +++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/solution/app.py b/solution/app.py index a8be460..9f4cefe 100644 --- a/solution/app.py +++ b/solution/app.py @@ -1,52 +1,53 @@ -import time +import asyncio + +from concurrent.futures import ProcessPoolExecutor from fastapi import FastAPI from transformers import pipeline from starlette.requests import Request -app = FastAPI() -models = { - "cardiffnlp": {"path": "models/twitter-xlm-roberta-base-sentiment"}, - "ivanlau": {"path": "models/language-detection-fine-tuned-on-xlm-roberta-base"}, - "svalabs": {"path": "models/twitter-xlm-roberta-crypto-spam"}, - "EIStakovskii": {"path": "models/xlm_roberta_base_multilingual_toxicity_classifier_plus"}, - "jy46604790": {"path": "models/Fake-News-Bert-Detect"} +app = FastAPI() +SHARED_PROCESS_POOL = ProcessPoolExecutor(max_workers=5) + +MODELS = { + "cardiffnlp": "models/twitter-xlm-roberta-base-sentiment", + "ivanlau": "models/language-detection-fine-tuned-on-xlm-roberta-base", + "svalabs": "models/twitter-xlm-roberta-crypto-spam", + "EIStakovskii": "models/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "jy46604790": "models/Fake-News-Bert-Detect" } -for model in models.values(): - model["pipeline"] = pipeline('text-classification', model=model["path"], device=0) + +async def model_inference_task(model_path, q): + print(f"loading {model_path}...") + text_classification_pipeline = pipeline('text-classification', model=model_path, device=0) + print(f"done loading {model_path}, waiting on the channel") + while True: + (text, response_q) = await q.get() + inference_result = text_classification_pipeline(text)[0] + await response_q.put(inference_result) + + +@app.on_event("startup") +async def startup_event(): + app.model_queues = {} + for model_key, model_path in MODELS.items(): + q = asyncio.Queue() + app.model_queues[model_key] = q + asyncio.create_task(model_inference_task(model_path, q)) -# todo: put each model into separate process and execute inference asynchronously @app.post("/process") async def process(request: Request): text = (await request.body()).decode() if not text: return {} - result = {k: None for k in models.keys()} - # start_overall = time.time() - for model_key, model_value in models.items(): - print(f"Inference using model: {model}") - start = time.time() - result[model_key] = model_value["pipeline"](text)[0] - elapsed = time.time() - start - print(f"The result of the inference using {model_key} is {result}. It took {elapsed} seconds to compute.\n") - # elapsed_overall = time.time() - start_overall - # print(f"ELAPSED OVERALL: {elapsed_overall}") + result = {k: None for k in MODELS.keys()} + for model_key, model_q in request.app.model_queues.items(): + response_q = asyncio.Queue() + await model_q.put((text, response_q)) + result[model_key] = await response_q.get() + if model_key == "cardiffnlp": + result[model_key]["label"] = result[model_key]["label"].upper() return result - - -# async def server_loop(q): -# pipe = pipeline(model="") -# while True: -# (string, response_q) = await q.get() -# out = pipe(string) -# await response_q.put(out) - - -# @app.on_event("startup") -# async def startup_event(): -# q = asyncio.Queue() -# app.model_queue = q -# asyncio.create_task(server_loop(q)) \ No newline at end of file From 7c86621fdb1746d70e72095f4b1cfa20bbd50e43 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 03:32:15 +0700 Subject: [PATCH 04/13] WIP: attempt at async computations with batching. --- solution/app.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/solution/app.py b/solution/app.py index 9f4cefe..43e9af9 100644 --- a/solution/app.py +++ b/solution/app.py @@ -20,13 +20,23 @@ async def model_inference_task(model_path, q): - print(f"loading {model_path}...") text_classification_pipeline = pipeline('text-classification', model=model_path, device=0) - print(f"done loading {model_path}, waiting on the channel") while True: - (text, response_q) = await q.get() - inference_result = text_classification_pipeline(text)[0] - await response_q.put(inference_result) + strings, queues = [], [] + while True: + try: + (string, rq) = await asyncio.wait_for(q.get(), timeout=0.03) + except asyncio.exceptions.TimeoutError: + break + strings.append(string) + queues.append(rq) + if len(strings) == 3: + break + if not strings: + continue + outs = text_classification_pipeline(strings, batch_size=len(strings)) + for rq, out in zip(queues, outs): + await rq.put(out) @app.on_event("startup") From 7d67166345d5a5ec8e3776a03bc36481f939df35 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 15:05:57 +0700 Subject: [PATCH 05/13] WIP: final solution, works ok, improvements from this point are going to cost lots of time but are going to be marginal. Timeout/batch_size settings may be tied to the HW i used for the tests. --- autotests/helm/values.yaml | 4 ++-- solution/app.py | 2 +- solution/helm/envs/ddnovikov.yaml | 11 +++++++++++ solution/helm/envs/example.yaml | 30 ------------------------------ 4 files changed, 14 insertions(+), 33 deletions(-) create mode 100644 solution/helm/envs/ddnovikov.yaml delete mode 100644 solution/helm/envs/example.yaml diff --git a/autotests/helm/values.yaml b/autotests/helm/values.yaml index cda6a5e..06aa820 100644 --- a/autotests/helm/values.yaml +++ b/autotests/helm/values.yaml @@ -25,8 +25,8 @@ global: activeDeadlineSeconds: 3600 # 1h env: - PARTICIPANT_NAME: - api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/ + PARTICIPANT_NAME: ddnovikov + api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process # K6, do not edit! K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write diff --git a/solution/app.py b/solution/app.py index 43e9af9..9ec7ff2 100644 --- a/solution/app.py +++ b/solution/app.py @@ -25,7 +25,7 @@ async def model_inference_task(model_path, q): strings, queues = [], [] while True: try: - (string, rq) = await asyncio.wait_for(q.get(), timeout=0.03) + (string, rq) = await asyncio.wait_for(q.get(), timeout=0.015) except asyncio.exceptions.TimeoutError: break strings.append(string) diff --git a/solution/helm/envs/ddnovikov.yaml b/solution/helm/envs/ddnovikov.yaml new file mode 100644 index 0000000..6751204 --- /dev/null +++ b/solution/helm/envs/ddnovikov.yaml @@ -0,0 +1,11 @@ +global: + resources: + limits: + nvidia.com/gpu: 1 + pod: + ports: + - name: http + containerPort: 8080 + protocol: TCP + service: + targetPort: 8080 diff --git a/solution/helm/envs/example.yaml b/solution/helm/envs/example.yaml deleted file mode 100644 index 4ef5b0c..0000000 --- a/solution/helm/envs/example.yaml +++ /dev/null @@ -1,30 +0,0 @@ -global: - # add any variables you need in format `key: value` - # variables will be available in the container as environment variables - env: - EXAMPLE: "example" - - # change 8000 to your application target port - pod: - ports: - - name: http - containerPort: 8000 - protocol: TCP - service: - targetPort: 8000 - - # add any configmap data you need - # configmaps will be mounted to /workspace/ - config: - mount: - path: /workspace - # Map of configmap entries. Entries might be of types: string, map - data: - conf1.yaml: - key1: - key11: value11 - key12: value12 - key2: value2 - conf2.yaml: - key1: value1 - key2: value2 From c692fa863113f8165457475d4a5f1a8a133d485e Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 15:26:55 +0700 Subject: [PATCH 06/13] Minor fixes. --- autotests/app/k6.Dockerfile | 5 +++-- solution/Dockerfile | 8 ++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/autotests/app/k6.Dockerfile b/autotests/app/k6.Dockerfile index 07cfb39..b57391c 100644 --- a/autotests/app/k6.Dockerfile +++ b/autotests/app/k6.Dockerfile @@ -2,10 +2,11 @@ FROM grafana/xk6 as xk6_builder RUN xk6 build --output k6 --with github.com/szkiba/xk6-faker@latest \ --with github.com/grafana/xk6-output-prometheus-remote@latest + FROM grafana/k6 WORKDIR /app/ COPY src/main.js /app/ -COPY --from=xk6_builder /xk6/k6 /usr/bin/k6 +COPY --from=xk6_builder /xk6/k6 /usr/bin/k6 -ENTRYPOINT k6 run -e PARTICIPANT_NAME=ddnovikov -e api_host=http://66.42.45.249:8080/process main.js \ No newline at end of file +ENTRYPOINT k6 run -o xk6-prometheus-rw main.js \ No newline at end of file diff --git a/solution/Dockerfile b/solution/Dockerfile index f50501f..c5ff786 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,11 +1,8 @@ # syntax = docker/dockerfile:1.4 -# todo: add checking out to specific revision to avoid unexpected changes to model behaviour? - FROM python:3.10.11-slim-bullseye as models_storage_stage -RUN --mount=type=cache,id=storage-apt,target=/var/cache/apt \ - apt-get update --yes && \ +RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ git \ git-lfs @@ -25,8 +22,7 @@ RUN mkdir -p /code/models COPY --from=models_storage_stage /models /code/models COPY ./requirements.txt /code/requirements.txt -RUN --mount=type=cache,id=infra-challenge-pip,target=/root/.cache \ - pip install -r /code/requirements.txt +RUN pip install -r /code/requirements.txt COPY app.py /code From d53beec340bfc6f9a65680b3f7a819ac58d700c8 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 15:28:47 +0700 Subject: [PATCH 07/13] Minor fixes. --- solution/app.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/solution/app.py b/solution/app.py index 9ec7ff2..8a8d231 100644 --- a/solution/app.py +++ b/solution/app.py @@ -1,14 +1,11 @@ import asyncio -from concurrent.futures import ProcessPoolExecutor - from fastapi import FastAPI from transformers import pipeline from starlette.requests import Request app = FastAPI() -SHARED_PROCESS_POOL = ProcessPoolExecutor(max_workers=5) MODELS = { "cardiffnlp": "models/twitter-xlm-roberta-base-sentiment", From feafcb45569133deba5f4520da0c44a90510eb6f Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Wed, 10 May 2023 15:51:32 +0700 Subject: [PATCH 08/13] Moving to one stage build from two-stage build to try saving some disk space for submissino. --- solution/Dockerfile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index c5ff786..26484ef 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,13 +1,14 @@ # syntax = docker/dockerfile:1.4 -FROM python:3.10.11-slim-bullseye as models_storage_stage +FROM python:3.10.11-slim-bullseye RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ git \ git-lfs -WORKDIR /models +RUN mkdir -p /code/models +WORKDIR /code/models RUN git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment && \ git clone https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base && \ @@ -15,11 +16,7 @@ RUN git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentime git clone https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus && \ git clone https://huggingface.co/jy46604790/Fake-News-Bert-Detect -FROM python:3.10.11-slim-bullseye as app - WORKDIR /code -RUN mkdir -p /code/models -COPY --from=models_storage_stage /models /code/models COPY ./requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt From e11c7af3ec8b740f51bedb241e81eed12d4a28fd Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Mon, 15 May 2023 01:42:11 +0700 Subject: [PATCH 09/13] Minor fixes to finally run the submission. --- solution/Dockerfile | 15 +++----------- solution/app.py | 27 +++++++++++++------------ solution/docker-compose.yml | 9 +++++++-- solution/requirements.txt | 39 ------------------------------------- 4 files changed, 25 insertions(+), 65 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index 26484ef..a24f972 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,26 +1,17 @@ # syntax = docker/dockerfile:1.4 -FROM python:3.10.11-slim-bullseye +FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ git \ git-lfs -RUN mkdir -p /code/models -WORKDIR /code/models - -RUN git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment && \ - git clone https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base && \ - git clone https://huggingface.co/svalabs/twitter-xlm-roberta-crypto-spam && \ - git clone https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus && \ - git clone https://huggingface.co/jy46604790/Fake-News-Bert-Detect - WORKDIR /code - COPY ./requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt - COPY app.py /code +ENV TRANSFORMERS_CACHE=/models + CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file diff --git a/solution/app.py b/solution/app.py index 8a8d231..1834eda 100644 --- a/solution/app.py +++ b/solution/app.py @@ -7,17 +7,20 @@ app = FastAPI() -MODELS = { - "cardiffnlp": "models/twitter-xlm-roberta-base-sentiment", - "ivanlau": "models/language-detection-fine-tuned-on-xlm-roberta-base", - "svalabs": "models/twitter-xlm-roberta-crypto-spam", - "EIStakovskii": "models/xlm_roberta_base_multilingual_toxicity_classifier_plus", - "jy46604790": "models/Fake-News-Bert-Detect" +models = { + "cardiffnlp": {"name": "cardiffnlp/twitter-xlm-roberta-base-sentiment"}, + "ivanlau": {"name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"}, + "svalabs": {"name": "svalabs/twitter-xlm-roberta-crypto-spam"}, + "EIStakovskii": {"name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"}, + "jy46604790": {"name": "jy46604790/Fake-News-Bert-Detect"} } +for model_key, model_dict in models.items(): + text_classification_pipeline = pipeline('text-classification', model=model_dict["name"], device=0) + model_dict["pipeline"] = text_classification_pipeline -async def model_inference_task(model_path, q): - text_classification_pipeline = pipeline('text-classification', model=model_path, device=0) + +async def model_inference_task(model_name, q): while True: strings, queues = [], [] while True: @@ -31,7 +34,7 @@ async def model_inference_task(model_path, q): break if not strings: continue - outs = text_classification_pipeline(strings, batch_size=len(strings)) + outs = models[model_name]["pipeline"](strings, batch_size=len(strings)) for rq, out in zip(queues, outs): await rq.put(out) @@ -39,10 +42,10 @@ async def model_inference_task(model_path, q): @app.on_event("startup") async def startup_event(): app.model_queues = {} - for model_key, model_path in MODELS.items(): + for model_key in models.keys(): q = asyncio.Queue() app.model_queues[model_key] = q - asyncio.create_task(model_inference_task(model_path, q)) + asyncio.create_task(model_inference_task(model_key, q)) @app.post("/process") @@ -50,7 +53,7 @@ async def process(request: Request): text = (await request.body()).decode() if not text: return {} - result = {k: None for k in MODELS.keys()} + result = {k: None for k in models.keys()} for model_key, model_q in request.app.model_queues.items(): response_q = asyncio.Queue() await model_q.put((text, response_q)) diff --git a/solution/docker-compose.yml b/solution/docker-compose.yml index 9749e85..195b5a5 100644 --- a/solution/docker-compose.yml +++ b/solution/docker-compose.yml @@ -3,7 +3,9 @@ version: "3.9" services: web: build: . - image: app + image: infra_challenge + volumes: + - models:/models ports: - "8080:8080" deploy: @@ -19,4 +21,7 @@ networks: driver: bridge ipam: config: - - subnet: 172.16.57.0/24 \ No newline at end of file + - subnet: 172.16.57.0/24 + +volumes: + models: \ No newline at end of file diff --git a/solution/requirements.txt b/solution/requirements.txt index 67465ea..e19963f 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -1,46 +1,7 @@ -anyio==3.6.2 -certifi==2023.5.7 -charset-normalizer==3.1.0 -click==8.1.3 -cmake==3.26.3 fastapi==0.89.1 -filelock==3.12.0 -fsspec==2023.5.0 -h11==0.14.0 -huggingface-hub==0.14.1 -idna==3.4 -Jinja2==3.1.2 -lit==16.0.3 -MarkupSafe==2.1.2 -mpmath==1.3.0 -networkx==3.1 -numpy==1.24.3 -nvidia-cublas-cu11==11.10.3.66 -nvidia-cuda-cupti-cu11==11.7.101 -nvidia-cuda-nvrtc-cu11==11.7.99 -nvidia-cuda-runtime-cu11==11.7.99 -nvidia-cudnn-cu11==8.5.0.96 -nvidia-cufft-cu11==10.9.0.58 -nvidia-curand-cu11==10.2.10.91 -nvidia-cusolver-cu11==11.4.0.1 -nvidia-cusparse-cu11==11.7.4.91 -nvidia-nccl-cu11==2.14.3 -nvidia-nvtx-cu11==11.7.91 -packaging==23.1 protobuf==3.18.0 pydantic==1.10.7 -PyYAML==6.0 -regex==2023.5.5 -requests==2.30.0 sentencepiece==0.1.99 -sniffio==1.3.0 -starlette==0.22.0 -sympy==1.11.1 tokenizers==0.13.3 -torch==2.0.0 -tqdm==4.65.0 transformers==4.28.1 -triton==2.0.0 -typing_extensions==4.5.0 -urllib3==2.0.2 uvicorn==0.20.0 \ No newline at end of file From 2da75e276febdf79d0be4f285e12dd2d71e98339 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Mon, 15 May 2023 03:20:38 +0700 Subject: [PATCH 10/13] More fixes. --- solution/Dockerfile | 5 ----- solution/app.py | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index a24f972..6960d2c 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -2,11 +2,6 @@ FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime -RUN apt-get update --yes && \ - apt-get install --yes --no-install-recommends \ - git \ - git-lfs - WORKDIR /code COPY ./requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt diff --git a/solution/app.py b/solution/app.py index 1834eda..b0caaf6 100644 --- a/solution/app.py +++ b/solution/app.py @@ -4,7 +4,6 @@ from transformers import pipeline from starlette.requests import Request - app = FastAPI() models = { @@ -16,7 +15,7 @@ } for model_key, model_dict in models.items(): - text_classification_pipeline = pipeline('text-classification', model=model_dict["name"], device=0) + text_classification_pipeline = pipeline('text-classification', model=model_dict["name"], device="cuda:0") model_dict["pipeline"] = text_classification_pipeline From caefd79c3de1695a6039ca1ec42602dc528d2ceb Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Tue, 30 May 2023 04:16:05 +0700 Subject: [PATCH 11/13] More improvements. --- solution/Dockerfile | 5 +++-- solution/app.py | 46 +++++++++++++++++++++++++++++---------- solution/requirements.txt | 2 +- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index 6960d2c..e9f0991 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,12 +1,13 @@ # syntax = docker/dockerfile:1.4 -FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime WORKDIR /code COPY ./requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt COPY app.py /code -ENV TRANSFORMERS_CACHE=/models +ENV TRANSFORMERS_CACHE=/models \ + TOKENIZERS_PARALLELISM=false CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file diff --git a/solution/app.py b/solution/app.py index b0caaf6..e2816ae 100644 --- a/solution/app.py +++ b/solution/app.py @@ -1,12 +1,15 @@ import asyncio +import torch + from fastapi import FastAPI -from transformers import pipeline +from transformers import AutoTokenizer, AutoModelForSequenceClassification from starlette.requests import Request +torch.set_float32_matmul_precision('medium') app = FastAPI() -models = { +MODELS = { "cardiffnlp": {"name": "cardiffnlp/twitter-xlm-roberta-base-sentiment"}, "ivanlau": {"name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"}, "svalabs": {"name": "svalabs/twitter-xlm-roberta-crypto-spam"}, @@ -14,12 +17,11 @@ "jy46604790": {"name": "jy46604790/Fake-News-Bert-Detect"} } -for model_key, model_dict in models.items(): - text_classification_pipeline = pipeline('text-classification', model=model_dict["name"], device="cuda:0") - model_dict["pipeline"] = text_classification_pipeline +async def model_inference_task(model_name: str, q: asyncio.Queue): + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device="cuda:0") -async def model_inference_task(model_name, q): while True: strings, queues = [], [] while True: @@ -29,11 +31,31 @@ async def model_inference_task(model_name, q): break strings.append(string) queues.append(rq) - if len(strings) == 3: + if len(strings) == 5: break if not strings: continue - outs = models[model_name]["pipeline"](strings, batch_size=len(strings)) + + encoded_input = tokenizer( + strings, + padding='longest', + truncation=True, + return_token_type_ids=True, + return_tensors='pt' + ).to(device="cuda:0") + logits = model(**encoded_input).logits + + id2label = model.config.id2label + label_ids = logits.argmax(dim=1) + scores = logits.softmax(dim=-1) + outs = [ + { + "label": id2label[label_id.item()], + "score": score[label_id.item()].item() + } + for label_id, score in zip(label_ids, scores) + ] + for rq, out in zip(queues, outs): await rq.put(out) @@ -41,10 +63,10 @@ async def model_inference_task(model_name, q): @app.on_event("startup") async def startup_event(): app.model_queues = {} - for model_key in models.keys(): + for model_key, model_value in MODELS.items(): q = asyncio.Queue() app.model_queues[model_key] = q - asyncio.create_task(model_inference_task(model_key, q)) + asyncio.create_task(model_inference_task(model_value["name"], q)) @app.post("/process") @@ -52,11 +74,13 @@ async def process(request: Request): text = (await request.body()).decode() if not text: return {} - result = {k: None for k in models.keys()} + + result = {k: None for k in MODELS.keys()} for model_key, model_q in request.app.model_queues.items(): response_q = asyncio.Queue() await model_q.put((text, response_q)) result[model_key] = await response_q.get() if model_key == "cardiffnlp": result[model_key]["label"] = result[model_key]["label"].upper() + return result diff --git a/solution/requirements.txt b/solution/requirements.txt index e19963f..dde15ee 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -3,5 +3,5 @@ protobuf==3.18.0 pydantic==1.10.7 sentencepiece==0.1.99 tokenizers==0.13.3 -transformers==4.28.1 +transformers==4.29.2 uvicorn==0.20.0 \ No newline at end of file From 295fc045158ba37ab3ac1ddb9056fdb4e9c9a6d1 Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Tue, 30 May 2023 16:59:43 +0700 Subject: [PATCH 12/13] Moar improvements. --- solution/app.py | 24 +++++++++++++----------- solution/requirements.txt | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/solution/app.py b/solution/app.py index e2816ae..25da537 100644 --- a/solution/app.py +++ b/solution/app.py @@ -5,33 +5,35 @@ from fastapi import FastAPI from transformers import AutoTokenizer, AutoModelForSequenceClassification from starlette.requests import Request +from optimum.bettertransformer import BetterTransformer -torch.set_float32_matmul_precision('medium') +torch.set_float32_matmul_precision('high') app = FastAPI() MODELS = { - "cardiffnlp": {"name": "cardiffnlp/twitter-xlm-roberta-base-sentiment"}, - "ivanlau": {"name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"}, - "svalabs": {"name": "svalabs/twitter-xlm-roberta-crypto-spam"}, - "EIStakovskii": {"name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"}, - "jy46604790": {"name": "jy46604790/Fake-News-Bert-Detect"} + "cardiffnlp": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "ivanlau": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", + "svalabs": "svalabs/twitter-xlm-roberta-crypto-spam", + "EIStakovskii": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "jy46604790": "jy46604790/Fake-News-Bert-Detect" } async def model_inference_task(model_name: str, q: asyncio.Queue): tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device="cuda:0") + model = AutoModelForSequenceClassification.from_pretrained(model_name) + model = BetterTransformer.transform(model).to(device="cuda:0") while True: strings, queues = [], [] while True: try: - (string, rq) = await asyncio.wait_for(q.get(), timeout=0.015) + (string, rq) = await asyncio.wait_for(q.get(), timeout=0.025) except asyncio.exceptions.TimeoutError: break strings.append(string) queues.append(rq) - if len(strings) == 5: + if len(strings) == 8: break if not strings: continue @@ -63,10 +65,10 @@ async def model_inference_task(model_name: str, q: asyncio.Queue): @app.on_event("startup") async def startup_event(): app.model_queues = {} - for model_key, model_value in MODELS.items(): + for model_key, model_name in MODELS.items(): q = asyncio.Queue() app.model_queues[model_key] = q - asyncio.create_task(model_inference_task(model_value["name"], q)) + asyncio.create_task(model_inference_task(model_name, q)) @app.post("/process") diff --git a/solution/requirements.txt b/solution/requirements.txt index dde15ee..fcbcd17 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -4,4 +4,5 @@ pydantic==1.10.7 sentencepiece==0.1.99 tokenizers==0.13.3 transformers==4.29.2 -uvicorn==0.20.0 \ No newline at end of file +uvicorn==0.20.0 +optimum==1.8.6 From b904d256d6fca4049d9dd245f942ec55db172f2c Mon Sep 17 00:00:00 2001 From: Daniil Novikov Date: Sat, 3 Jun 2023 01:08:35 +0700 Subject: [PATCH 13/13] Attempt to run through onnx. --- solution/Dockerfile | 11 +++++++++- solution/app.py | 17 +++++++-------- solution/cmd.sh | 44 +++++++++++++++++++++++++++++++++++++++ solution/requirements.txt | 5 +++-- 4 files changed, 65 insertions(+), 12 deletions(-) create mode 100755 solution/cmd.sh diff --git a/solution/Dockerfile b/solution/Dockerfile index e9f0991..06c552b 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -2,12 +2,21 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + git \ + git-lfs \ + jq && \ + git lfs install && \ + rm -rf /var/lib/apt/lists/* + WORKDIR /code COPY ./requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt COPY app.py /code +COPY cmd.sh . ENV TRANSFORMERS_CACHE=/models \ TOKENIZERS_PARALLELISM=false -CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file +CMD ["./cmd.sh"] \ No newline at end of file diff --git a/solution/app.py b/solution/app.py index 25da537..d4f4466 100644 --- a/solution/app.py +++ b/solution/app.py @@ -5,24 +5,23 @@ from fastapi import FastAPI from transformers import AutoTokenizer, AutoModelForSequenceClassification from starlette.requests import Request -from optimum.bettertransformer import BetterTransformer +from optimum.onnxruntime import ORTModelForSequenceClassification -torch.set_float32_matmul_precision('high') app = FastAPI() MODELS = { - "cardiffnlp": "cardiffnlp/twitter-xlm-roberta-base-sentiment", - "ivanlau": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", - "svalabs": "svalabs/twitter-xlm-roberta-crypto-spam", - "EIStakovskii": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", - "jy46604790": "jy46604790/Fake-News-Bert-Detect" + "cardiffnlp": "/models/twitter-xlm-roberta-base-sentiment/", + "ivanlau": "/models/language-detection-fine-tuned-on-xlm-roberta-base/", + "svalabs": "/models/twitter-xlm-roberta-crypto-spam/", + "EIStakovskii": "/models/xlm_roberta_base_multilingual_toxicity_classifier_plus/", + "jy46604790": "/models/Fake-News-Bert-Detect/" } async def model_inference_task(model_name: str, q: asyncio.Queue): tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForSequenceClassification.from_pretrained(model_name) - model = BetterTransformer.transform(model).to(device="cuda:0") + model = ORTModelForSequenceClassification.from_pretrained(model_name, provider="CUDAExecutionProvider") + model.to(device="cuda:0") while True: strings, queues = [], [] diff --git a/solution/cmd.sh b/solution/cmd.sh new file mode 100755 index 0000000..cc887c4 --- /dev/null +++ b/solution/cmd.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e +MODELS_DIR='/models' + +if [ -d "$MODELS_DIR" ] +then + echo 'cloning repos...' + git clone https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment /models/twitter-xlm-roberta-base-sentiment + git clone https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base /models/language-detection-fine-tuned-on-xlm-roberta-base + cd /models/language-detection-fine-tuned-on-xlm-roberta-base + tmp=$(mktemp) && jq --arg skipLibCheck true ' .label2id |= with_entries(.value |= tonumber) ' config.json > "$tmp" && mv "$tmp" config.json + git clone https://huggingface.co/svalabs/twitter-xlm-roberta-crypto-spam /models/twitter-xlm-roberta-crypto-spam + git clone https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus /models/xlm_roberta_base_multilingual_toxicity_classifier_plus + cd /models/xlm_roberta_base_multilingual_toxicity_classifier_plus + tmp=$(mktemp) && jq --arg skipLibCheck true '. += { id2label : { "0": "LABEL_0", "1": "LABEL_1" } }' config.json > "$tmp" && mv "$tmp" config.json + git clone https://huggingface.co/jy46604790/Fake-News-Bert-Detect /models/Fake-News-Bert-Detect + cd /models/Fake-News-Bert-Detect + tmp=$(mktemp) && jq --arg skipLibCheck true '. += { id2label : { "0": "LABEL_0", "1": "LABEL_1" } }' config.json > "$tmp" && mv "$tmp" config.json + cd + echo 'done' + + model_files=( + "/models/twitter-xlm-roberta-base-sentiment/" + "/models/language-detection-fine-tuned-on-xlm-roberta-base/" + "/models/twitter-xlm-roberta-crypto-spam/" + "/models/xlm_roberta_base_multilingual_toxicity_classifier_plus/" + "/models/Fake-News-Bert-Detect/" + ) + + for file_path in "${model_files[@]}" + do + rm -f "$file_path/tf_model.h5" + optimum-cli export onnx -m "$file_path" --device cuda --framework pt --optimize O4 --task text-classification "$file_path" + done +else + echo "directory $MODELS_DIR is not mounted, exiting" + exit +fi + +echo 'all done, starting the server' + +cd /code +uvicorn app:app --host 0.0.0.0 --port 8080 \ No newline at end of file diff --git a/solution/requirements.txt b/solution/requirements.txt index fcbcd17..af36117 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -1,8 +1,9 @@ fastapi==0.89.1 -protobuf==3.18.0 +protobuf==3.20.1 pydantic==1.10.7 sentencepiece==0.1.99 tokenizers==0.13.3 transformers==4.29.2 uvicorn==0.20.0 -optimum==1.8.6 +optimum[onnxruntime-gpu]==1.8.6 +onnxruntime-gpu==1.15.0