Skip to content

Commit

Permalink
turbine tank
Browse files Browse the repository at this point in the history
  • Loading branch information
saienduri committed Feb 14, 2024
1 parent 26d6428 commit 94c09e4
Show file tree
Hide file tree
Showing 8 changed files with 681 additions and 6 deletions.
2 changes: 2 additions & 0 deletions models/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ transformers
accelerate
diffusers==0.24.0
brevitas @ git+https://github.com/Xilinx/brevitas.git@6695e8df7f6a2c7715b9ed69c4b78157376bb60b
# turbine tank downloading/uploading
azure-storage-blob
32 changes: 32 additions & 0 deletions models/turbine_models/custom_models/sd_inference/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import torch
import torch._dynamo as dynamo
from transformers import CLIPTextModel, CLIPTokenizer
from turbine_models.turbine_tank import turbine_tank

import argparse

Expand Down Expand Up @@ -46,6 +47,18 @@
help="Specify vulkan target triple or rocm/cuda target device.",
)
parser.add_argument("--vulkan_max_allocation", type=str, default="4294967296")
parser.add_argument(
"--download_ir",
action=argparse.BooleanOptionalAction,
default=True,
help="download IR from turbine tank",
)
parser.add_argument(
"--upload_ir",
action=argparse.BooleanOptionalAction,
default=False,
help="upload IR to turbine tank",
)


def export_clip_model(
Expand All @@ -57,13 +70,19 @@ def export_clip_model(
device=None,
target_triple=None,
max_alloc=None,
download_ir=False,
upload_ir=False,
):
# Load the tokenizer and text encoder to tokenize and encode the text.
tokenizer = CLIPTokenizer.from_pretrained(
hf_model_name,
subfolder="tokenizer",
token=hf_auth_token,
)

if download_ir:
return turbine_tank.downloadModelArtifacts(hf_model_name + "-clip"), tokenizer

text_encoder_model = CLIPTextModel.from_pretrained(
hf_model_name,
subfolder="text_encoder",
Expand Down Expand Up @@ -94,6 +113,15 @@ def main(self, inp=AbstractTensor(1, 77, dtype=torch.int64)):

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-clip")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload += "-clip"
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str, tokenizer
else:
Expand All @@ -102,6 +130,8 @@ def main(self, inp=AbstractTensor(1, 77, dtype=torch.int64)):

if __name__ == "__main__":
args = parser.parse_args()
if args.upload_ir and args.download_ir:
raise ValueError("upload_ir and download_ir can't both be true")
mod_str, _ = export_clip_model(
args.hf_model_name,
args.hf_auth_token,
Expand All @@ -111,6 +141,8 @@ def main(self, inp=AbstractTensor(1, 77, dtype=torch.int64)):
args.device,
args.iree_target_triple,
args.vulkan_max_allocation,
args.download_ir,
args.upload_ir,
)
safe_name = args.hf_model_name.split("/")[-1].strip()
safe_name = re.sub("-", "_", safe_name)
Expand Down
31 changes: 31 additions & 0 deletions models/turbine_models/custom_models/sd_inference/unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import safetensors
import argparse
from turbine_models.turbine_tank import turbine_tank

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -53,6 +54,18 @@
help="Specify vulkan target triple or rocm/cuda target device.",
)
parser.add_argument("--vulkan_max_allocation", type=str, default="4294967296")
parser.add_argument(
"--download_ir",
action=argparse.BooleanOptionalAction,
default=True,
help="download IR from turbine tank",
)
parser.add_argument(
"--upload_ir",
action=argparse.BooleanOptionalAction,
default=False,
help="upload IR to turbine tank",
)


class UnetModel(torch.nn.Module):
Expand Down Expand Up @@ -90,7 +103,12 @@ def export_unet_model(
device=None,
target_triple=None,
max_alloc=None,
download_ir=False,
upload_ir=False,
):
if download_ir:
return turbine_tank.downloadModelArtifacts(hf_model_name + "-unet")

mapper = {}
utils.save_external_weights(
mapper, unet_model, external_weights, external_weight_path
Expand Down Expand Up @@ -125,6 +143,15 @@ def main(

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-unet")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload += "-unet"
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str
else:
Expand All @@ -133,6 +160,8 @@ def main(

if __name__ == "__main__":
args = parser.parse_args()
if args.upload_ir and args.download_ir:
raise ValueError("upload_ir and download_ir can't both be true")
unet_model = UnetModel(
args.hf_model_name,
args.hf_auth_token,
Expand All @@ -150,6 +179,8 @@ def main(
args.device,
args.iree_target_triple,
args.vulkan_max_allocation,
args.download_ir,
args.upload_ir,
)
safe_name = utils.create_safe_name(args.hf_model_name, "-unet")
with open(f"{safe_name}.mlir", "w+") as f:
Expand Down
2 changes: 1 addition & 1 deletion models/turbine_models/custom_models/sd_inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def compile_to_vmfb(module_str, device, target_triple, max_alloc, safe_name):
with open(f"{safe_name}.vmfb", "wb+") as f:
f.write(flatbuffer_blob)
print("Saved to", safe_name + ".vmfb")
exit()
return


def create_safe_name(hf_model_name, model_name_str):
Expand Down
31 changes: 31 additions & 0 deletions models/turbine_models/custom_models/sd_inference/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import safetensors
import argparse
from turbine_models.turbine_tank import turbine_tank

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -54,6 +55,18 @@
)
parser.add_argument("--vulkan_max_allocation", type=str, default="4294967296")
parser.add_argument("--variant", type=str, default="decode")
parser.add_argument(
"--download_ir",
action=argparse.BooleanOptionalAction,
default=True,
help="download IR from turbine tank",
)
parser.add_argument(
"--upload_ir",
action=argparse.BooleanOptionalAction,
default=False,
help="upload IR to turbine tank",
)


class VaeModel(torch.nn.Module):
Expand Down Expand Up @@ -89,7 +102,12 @@ def export_vae_model(
target_triple=None,
max_alloc=None,
variant="decode",
download_ir=False,
upload_ir=False,
):
if download_ir:
return turbine_tank.downloadModelArtifacts(hf_model_name + "-" + variant)

mapper = {}
utils.save_external_weights(
mapper, vae_model, external_weights, external_weight_path
Expand All @@ -113,6 +131,15 @@ def main(self, inp=AbstractTensor(*sample, dtype=torch.float32)):

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-vae")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload = model_name_upload + "-" + variant
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str
else:
Expand All @@ -121,6 +148,8 @@ def main(self, inp=AbstractTensor(*sample, dtype=torch.float32)):

if __name__ == "__main__":
args = parser.parse_args()
if args.upload_ir and args.download_ir:
raise ValueError("upload_ir and download_ir can't both be true")
vae_model = VaeModel(
args.hf_model_name,
args.hf_auth_token,
Expand All @@ -139,6 +168,8 @@ def main(self, inp=AbstractTensor(*sample, dtype=torch.float32)):
args.iree_target_triple,
args.vulkan_max_allocation,
args.variant,
args.download_ir,
args.upload_ir,
)
safe_name = utils.create_safe_name(args.hf_model_name, "-vae")
with open(f"{safe_name}.mlir", "w+") as f:
Expand Down
42 changes: 37 additions & 5 deletions models/turbine_models/custom_models/stateless_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import re
import json
from turbine_models.turbine_tank import turbine_tank

os.environ["TORCH_LOGS"] = "dynamic"
from transformers import AutoTokenizer, AutoModelForCausalLM
Expand Down Expand Up @@ -61,6 +62,18 @@
action="store_true",
help="Compile LLM with StreamingLLM optimizations",
)
parser.add_argument(
"--download_ir",
action=argparse.BooleanOptionalAction,
default=True,
help="download IR from turbine tank",
)
parser.add_argument(
"--upload_ir",
action=argparse.BooleanOptionalAction,
default=False,
help="upload IR to turbine tank",
)


def generate_schema(num_layers):
Expand Down Expand Up @@ -107,7 +120,18 @@ def export_transformer_model(
vulkan_max_allocation=None,
streaming_llm=False,
vmfb_path=None,
download_ir=False,
upload_ir=False,
):
tokenizer = AutoTokenizer.from_pretrained(
hf_model_name,
use_fast=False,
token=hf_auth_token,
)

if download_ir:
return turbine_tank.downloadModelArtifacts(hf_model_name), tokenizer

mod = AutoModelForCausalLM.from_pretrained(
hf_model_name,
torch_dtype=torch.float,
Expand All @@ -121,11 +145,7 @@ def export_transformer_model(
if precision == "f16":
mod = mod.half()
dtype = torch.float16
tokenizer = AutoTokenizer.from_pretrained(
hf_model_name,
use_fast=False,
token=hf_auth_token,
)

# TODO: generate these values instead of magic numbers
NUM_LAYERS = mod.config.num_hidden_layers
HEADS = getattr(mod.config, "num_key_value_heads", None)
Expand Down Expand Up @@ -319,6 +339,14 @@ def evict_kvcache_space(self):
module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = hf_model_name.split("/")[-1].strip()
safe_name = re.sub("-", "_", safe_name)
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str, tokenizer
else:
Expand Down Expand Up @@ -382,6 +410,8 @@ def evict_kvcache_space(self):

if __name__ == "__main__":
args = parser.parse_args()
if args.upload_ir and args.download_ir:
raise ValueError("upload_ir and download_ir can't both be true")
mod_str, _ = export_transformer_model(
args.hf_model_name,
args.hf_auth_token,
Expand All @@ -395,6 +425,8 @@ def evict_kvcache_space(self):
args.vulkan_max_allocation,
args.streaming_llm,
args.vmfb_path,
args.download_ir,
args.upload_ir,
)
safe_name = args.hf_model_name.split("/")[-1].strip()
safe_name = re.sub("-", "_", safe_name)
Expand Down
Loading

0 comments on commit 94c09e4

Please sign in to comment.