Skip to content

Commit

Permalink
Added fine tuned models
Browse files Browse the repository at this point in the history
Signed-off-by: Navneet Verma <[email protected]>
  • Loading branch information
navneet1v committed Jan 12, 2024
1 parent 63fe67f commit c239ed1
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 0 deletions.
2 changes: 2 additions & 0 deletions fine-tuned-models/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.zip filter=lfs diff=lfs merge=lfs -text
*.tar.gz filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions fine-tuned-models/amazon_esci.zip
Git LFS file not shown
16 changes: 16 additions & 0 deletions fine-tuned-models/create_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import hashlib

#Example: model_file_path = "/home/ec2-user/dev/norm_comb_tuned/test/trace_models/dbpedia_tuned.zip"
model_file_path = "<FULL_PATH_OF_THE_TRACED_MODEL>"

sha256 = hashlib.sha256()
BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
with open(model_file_path, "rb") as file:
while True:
chunk = file.read(BUF_SIZE)
if not chunk:
break
sha256.update(chunk)
sha256_value = sha256.hexdigest()

print(sha256_value)
3 changes: 3 additions & 0 deletions fine-tuned-models/dbpedia.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions fine-tuned-models/fiqa.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions fine-tuned-models/nfcorpus.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions fine-tuned-models/quora.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions fine-tuned-models/scidocs.zip
Git LFS file not shown
60 changes: 60 additions & 0 deletions fine-tuned-models/trace_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
from zipfile import ZipFile

import torch
from sentence_transformers import SentenceTransformer

#model = SentenceTransformer("models/dbpedia_custom_small")
model = SentenceTransformer("covid_tasb_9")

folder_path = "traced_model"
model_name = "trec_covid.pt"
zip_file_name = "trec_covid_tuned.zip"

save_json_folder_path = folder_path
model_output_path = folder_path

model_path = os.path.join(folder_path, model_name)

print("model_path:", model_path)

zip_file_path = os.path.join(model_output_path, zip_file_name)

# save tokenizer.json in save_json_folder_name
model.save(save_json_folder_path)

# convert to pt format will need to be in cpu,
# set the device to cpu, convert its input_ids and attention_mask in cpu and save as .pt format
device = torch.device("cpu")
cpu_model = model.to(device)

sentences = ["This is the first example we want to explore", "I'm using these sentences as example but please try to provide longer example which will be helpful for models"]

features = cpu_model.tokenizer(
sentences, return_tensors="pt", padding=True, truncation=True
).to(device)

compiled_model = torch.jit.trace(
cpu_model,
(
{
"input_ids": features["input_ids"],
"attention_mask": features["attention_mask"],
}
),
strict=False,
)
torch.jit.save(compiled_model, model_path)
print("model file is saved to ", model_path)

# zip model file along with tokenizer.json as output
with ZipFile(str(zip_file_path), "w") as zipObj:
zipObj.write(
model_path,
arcname=str(model_name),
)
zipObj.write(
os.path.join(save_json_folder_path, "tokenizer.json"),
arcname="tokenizer.json",
)
print("zip file is saved to ", zip_file_path, "\n")
3 changes: 3 additions & 0 deletions fine-tuned-models/trec_covid.zip
Git LFS file not shown

0 comments on commit c239ed1

Please sign in to comment.