Skip to content
This repository has been archived by the owner on Jun 25, 2023. It is now read-only.

Initial solution commit #33

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added solution/.gitignore
Empty file.
24 changes: 24 additions & 0 deletions solution/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# sudo apt install docker-compose

FROM pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime

COPY requirements.txt .

RUN apt-get update \
&& apt-get install -y curl tzdata wget unzip sudo software-properties-common
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
RUN apt-get install -y git-all git-lfs \
&& git lfs install
RUN pip install --upgrade pip \
&& pip uninstall fastapi dataclasses aiofiles pydantic \
&& pip install --no-cache-dir -U -r requirements.txt

WORKDIR /code/
ENV PYTHONPATH=/code/
COPY ./src /code/src
COPY ./config /code/config
COPY ./helm /code/helm

# RUN python src/utils.py

EXPOSE 8000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @kseniiaarshinina, thank you for the interest in this challenge and your wonderful solution. We couldnt run our tests -- the Dockerfile is missing an entrypoint and pod was restarting over and over again (we dont use docker-compose for deployment, only helm). Can you please adjust the dockerfile and re-request the review? Thank you in advance!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @rsolovev! Thank you for your acknowledgement of my solution. Sure thing, will fix it right away.

10 changes: 10 additions & 0 deletions solution/config/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

from dynaconf import Dynaconf

settings = Dynaconf(
envvar_prefix="DYNACONF",
settings_files=['settings.yaml', 'models.yaml'],
)

# `envvar_prefix` = export envvars with `export DYNACONF_FOO=bar`.
# `settings_files` = Load these files in the order.
17 changes: 17 additions & 0 deletions solution/config/models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
models:
- cardiffnlp:
model_name: cardiffnlp/twitter-xlm-roberta-base-sentiment
label_mapping: {0: NEGATIVE, 1: NEUTRAL, 2: POSITIVE}
- ivanlau:
model_name: ivanlau/language-detection-fine-tuned-on-xlm-roberta-base
label_mapping: {0: Arabic, 1: Basque, 2: Breton, 3: Catalan, 4: Chinese_China, 5: Chinese_Hongkong, 6: Chinese_Taiwan, 7: Chuvash, 8: Czech, 9: Dhivehi, 10: Dutch, 11: English, 12: Esperanto, 13: Estonian, 14: French, 15: Frisian, 16: Georgian, 17: German, 18: Greek, 19: Hakha_Chin, 20: Indonesian, 21: Interlingua, 22: Italian, 23: Japanese, 24: Kabyle, 25: Kinyarwanda, 26: Kyrgyz, 27: Latvian, 28: Maltese, 29: Mongolian, 30: Persian, 31: Polish, 32: Portuguese, 33: Romanian, 34: Romansh_Sursilvan, 35: Russian, 36: Sakha, 37: Slovenian, 38: Spanish, 39: Swedish, 40: Tamil, 41: Tatar, 42: Turkish, 43: Ukranian, 44: Welsh
}
- svalabs:
model_name: svalabs/twitter-xlm-roberta-crypto-spam
label_mapping: {0: HAM, 1: SPAM}
- EIStakovskii:
model_name: EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus
label_mapping: {0: LABEL_0, 1: LABEL_1}
- jy46604790:
model_name: jy46604790/Fake-News-Bert-Detect
label_mapping: {0: LABEL_0, 1: LABEL_1}
1 change: 1 addition & 0 deletions solution/config/settings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--- {}
16 changes: 16 additions & 0 deletions solution/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: '3'

networks:
solution_network:
driver: bridge
services:
solution:
build:
context: .
networks:
- solution_network
# environment:
# ?
command: "uvicorn src.server:app --host 0.0.0.0 --port 8000"
ports:
- 8000:8000
10 changes: 10 additions & 0 deletions solution/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
asyncio
dynaconf
fastapi
GitPython
msgpack >= 1.0.0
protobuf==3.20.*
sentencepiece
ThreadPoolExecutorPlus >= 0.2.2
transformers
uvicorn
47 changes: 47 additions & 0 deletions solution/src/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import logging
import sys
import torch


log = logging.getLogger(__name__)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(20)
formatter = logging.Formatter("%(asctime)s %(levelname)s - %(message)s")
ch.setFormatter(formatter)
log.addHandler(ch)
log.setLevel(20)


class Model():
def __init__(
self,
model_settings,
):
self.model_name = model_settings.values()[0].to_dict()["model_name"]
self.model_name_short = list(model_settings.keys())[0]
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
log.info(f"Model name: {self.model_name}")
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.label_mapping = model_settings.values()[0].to_dict()["label_mapping"]

def predict(self, text):

# Preprocess text
input = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(self.device)

self.model.to(self.device)

# Run inference
output = self.model(**input)
logits = output.logits
probabilities = logits.softmax(dim=1)
predicted_class = probabilities.argmax(dim=1).item()
predicted_label = self.label_mapping[predicted_class]
confidence = probabilities[0, predicted_class].item()

return {
"score": confidence,
"label": predicted_label
}
82 changes: 82 additions & 0 deletions solution/src/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from fastapi import FastAPI, Request
import asyncio
import logging
import sys
from config.config import settings
from src.model import Model


log = logging.getLogger(__name__)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(20)
formatter = logging.Formatter("%(asctime)s %(levelname)s - %(message)s")
ch.setFormatter(formatter)
log.addHandler(ch)
log.setLevel(20)


app = FastAPI()
app.request_queue = asyncio.Queue()
app.models = {}


# Initiating HF model instances from downloaded models
def init_models(models_dict, models_settings):
if not bool(models_dict):
for model in models_settings["models"]:

log.debug(f"Model in progress: {model}")
model_instance = Model(model)

# Store model
models_dict[model_instance.model_name] = model_instance
log.debug(f"Models dict: {models_dict}")
return models_dict


# Processing text for inference
async def get_inference_results():
inference_results = {}
while True:
job = await app.request_queue.get()
log.info(f"Got a job: (size of remaining queue: {app.request_queue.qsize()})")

try:
# Process the received text
data = await job.body()
log.info(f"Text: {data.decode()}")

for model in app.models.items():

model_output = model[1].predict(data.decode())
log.debug(f"After the model's work: {model_output}")

# Store inference result
inference_results[model[1].model_name_short] = model_output

log.info(f"Inference results: {inference_results}")
return inference_results

except Exception as e:
log.info(f"Exception occurred: {str(e)}")

finally:
app.request_queue.task_done()


# Start asyncio background task
@app.on_event("startup")
async def startup():
app.models = init_models(app.models, settings)
asyncio.create_task(get_inference_results())


# API endpoint for inference
@app.post("/process")
async def process_request(request: Request):
await app.request_queue.put(request)
log.info("Request is received and put into queue")
inference_results = await get_inference_results()
await asyncio.sleep(0)
return inference_results

28 changes: 28 additions & 0 deletions solution/src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import git
import logging
import sys

from config.config import settings


log = logging.getLogger(__name__)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(10)
formatter = logging.Formatter("%(asctime)s %(levelname)s - %(message)s")
ch.setFormatter(formatter)
log.addHandler(ch)
log.setLevel(10)

# This function can be used if there is enough space to keep the models locally
def download_HF_models(models_settings):
# Clone a remote repository
for model in models_settings["models"]:
model_name = model.values()[0].to_dict()['model_name']
repo_url = f"https://huggingface.co/{model_name}"
local_path = f"/models/{model_name}"
repo = git.Repo.clone_from(repo_url, local_path)
log.debug(f'Repository Cloned at location: {local_path}')


if __name__ == "__main__":
download_HF_models(settings)