diff --git a/models/turbine_models/custom_models/pipeline_base.py b/models/turbine_models/custom_models/pipeline_base.py new file mode 100644 index 000000000..990165bad --- /dev/null +++ b/models/turbine_models/custom_models/pipeline_base.py @@ -0,0 +1,500 @@ +# Copyright 2024 Advanced Micro Devices, inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import logging +import torch + +import iree.runtime as ireert +from turbine_models.custom_models.sd_inference import utils, schedulers +from turbine_models.custom_models.sdxl_inference.pipeline_ir import ( + get_pipeline_ir, +) +from turbine_models.utils.sdxl_benchmark import run_benchmark +from turbine_models.model_runner import vmfbRunner + +from PIL import Image +import gc +import os +import numpy as np +import time +import copy +from datetime import datetime as dt + + +def merge_arg_into_map(model_map, arg, arg_name): + if isinstance(arg, dict): + for key in arg.keys(): + model_map[key][arg_name] = arg[key] + else: + for key in model_map.keys(): + model_map[key][arg_name] = arg + return model_map + + +class PipelineComponent: + """ + Wraps a VMFB runner with attributes for embedded metadata, device info, utilities and + has methods for handling I/O or otherwise assisting in interfacing with their pipeline + and its other components. + This aims to make new pipelines and execution modes easier to write, manage, and debug. + """ + + def __init__(self, dest_type=ireert.DeviceArray, dest_dtype="float16"): + self.runner = None + self.module_name = None + self.device = None + self.metadata = None + self.benchmark = False + self.output_type = dest_type + self.output_dtype = dest_dtype + + def load( + self, + rt_device: str, + vmfb_path: str, + module_name: str, + external_weight_path: str = None, + extra_plugin=None, + ): + self.module_name = module_name + self.runner = vmfbRunner( + rt_device, vmfb_path, external_weight_path, extra_plugin + ) + self.device = self.runner.config.device + self.module = getattr(self.runner.ctx.modules, module_name) + if "get_metadata" in self.module.keys(): + self.metadata = self.module["get_metadata"]() + + def unload(self): + self.device = None + self.runner = None + gc.collect() + + def _run(self, function_name, inputs: list): + return self.module[function_name](*inputs) + + def _run_and_benchmark(self, function_name, inputs: list): + start_time = time.time() + output = self._run(function_name, inputs) + latency = time.time() - start_time + print(f"Latency for {self.module_name}['{function_name}']: {latency}sec") + return output + + def __call__(self, function_name, inputs: list): + if not isinstance(inputs, list): + inputs = [inputs] + if self.benchmark: + output = self._run_and_benchmark(function_name, inputs) + else: + output = self._run(function_name, inputs) + if output.dtype() != self.output_dtype: + output = output.astype(self.output_dtype) + + match self.output_type: + case ireert.DeviceArray: + return output + case torch.Tensor: + return torch.tensor(output.to_host()) + case np.ndarray: + return output.to_host() + + +class TurbinePipelineBase: + """ + This class is a lightweight base for Stable Diffusion + inference API classes. It should provide methods for: + + - Exporting and compiling a set (model map) of torch IR modules + - preparing weights for an inference job + - loading weights for an inference job + - utilities i.e. filenames, downloads + + The general flow of an arbitrary child of this pipeline base is as follows: + 1. Initialize a model map and class attributes. + 2. Preparation: Check if all necessary files are present, and generate them if not. (prepare_all() / prepare_submodel()) + - This is done by submodel, so that users can generate a new submodel with the same pipeline. + - If vmfb not found, first check turbine tank for matching .vmfb file. + - If vmfb not downloadable, try downloading .mlir. + - If neither on Azure, run the export function in model map to export to torch IR and compile with IREE. + - If weights not found, run the export function in model map with weights_only=True. + - Apps should populate the weights with custom weights by now so they can be managed and converted if needed here. + 3. Load the pipeline: Load the prepared files onto devices as vmfbRunners. (load_pipeline() / load_submodel() / reload_submodel()) + 4. Run Inference: + + + + Arguments: + model_map: dict + A dictionary mapping submodel names to their export functions and hf model ids. This is used throughout the pipeline. + It also should provide I/O information for the submodels. + height: int + The height of the image to be generated + width: int + The width of the image to be generated + precision: str + The precision of the image latents. This usually decides the precision of all models in the pipeline. + max_length: int + The maximum sequence length for text encoders and diffusion models. + batch_size: int + The number of images to generate from each inference batch. This changes the shapes in all submodels. + device: str | dict[str] + Either a string i.e. "rocm://0", or a dictionary of such with keys matching the submodels of a given pipeline. + If a string, a dictionary will be created based on the pipeline's model map and the same device will be used for all submodels. + iree_target_triple: str | dict[str] + Either a string i.e. "gfx1100", or a dictionary with keys matching the submodels of a given pipeline. + ireec_flags: str | dict[str] + A comma-separated string of flags to pass to the IREE compiler, or a dict of them with keys matching submodels of a given pipeline. + """ + + def __init__( + self, + model_map: dict, + batch_size: int, + device: str | dict[str], + iree_target_triple: str | dict[str], + ireec_flags: str | dict[str] = None, + precision: str | dict[str] = "fp16", + td_spec: str | dict[str] = None, + decomp_attn: bool | dict[bool] = False, + external_weights: str | dict[str] = "safetensors", + pipeline_dir: str = "./shark_vmfbs", + external_weights_dir: str = "./shark_weights", + ): + self.map = model_map + self.batch_size = batch_size + if isinstance(device, dict): + assert isinstance( + iree_target_triple, dict + ), "Device and target triple must be both dicts or both strings." + for submodel in self.map.keys(): + assert submodel in device.keys(), f"Device for {submodel} not found." + assert ( + submodel in iree_target_triple.keys() + ), f"Target arch for {submodel} not found." + self.map[submodel]["device"] = (device[submodel],) + self.map[submodel]["driver"] = ( + utils.iree_device_map(device[submodel]), + ) + self.map[submodel]["target"] = iree_target_triple[submodel] + else: + assert isinstance( + iree_target_triple, str + ), "Device and target triple must be both dicts or both strings." + for submodel in self.map.keys(): + self.map[submodel]["device"] = (device[submodel],) + self.map[submodel]["driver"] = ( + utils.iree_device_map(device[submodel]), + ) + self.map[submodel]["target"] = iree_target_triple[submodel] + map_arguments = { + "ireec_flags": ireec_flags, + "precision": precision, + "td_spec": td_spec, + "decomp_attn": decomp_attn, + "external_weights": external_weights, + } + for arg in map_arguments.keys(): + self.map = merge_arg_into_map(self.map, map_arguments[arg], arg) + np_dtypes = { + "fp16": np.float16, + "fp32": np.float32, + } + torch_dtypes = { + "fp16": torch.float16, + "fp32": torch.float32, + } + for submodel in self.map.keys(): + self.map = merge_arg_into_map( + self.map, np_dtypes[self.map[submodel]["precision"]], "np_dtype" + ) + self.map = merge_arg_into_map( + self.map, torch_dtypes[self.map[submodel]["precision"]], "torch_dtype" + ) + print(self.map) + + self.pipeline_dir = pipeline_dir + self.external_weights_dir = external_weights_dir + + # Disabled for now -- enable through option when turbine tank is ready. + self.download = False + + # These arguments are set at run or load time. + self.compiled_pipeline = False + self.split_scheduler = False + self.cpu_scheduling = False + + # TODO: set this based on user-inputted guidance scale and negative prompt. + self.do_classifier_free_guidance = True # False if any(x in hf_model_name for x in ["turbo", "lightning"]) else True + self._interrupt = False + + # FILE MANAGEMENT AND PIPELINE SETUP + + def prepare_all( + self, + mlirs: dict, + vmfbs: dict, + weights: dict, + interactive: bool = True, + ): + ready = self.is_prepared(vmfbs, weights) + match ready: + case True: + print("All necessary files found.") + return + case False: + if interactive: + do_continue = input( + f"\nIt seems you are missing some necessary files. Would you like to generate them now? (y/n)" + ) + if do_continue.lower() != "y": + exit() + for submodel in self.map.keys(): + if not self.map[submodel].get("vmfb"): + print("Fetching: ", submodel) + self.export_submodel(submodel, input_mlir=mlirs) + if not self.map[submodel]["external_weights"]: + assert not self.map[submodel].get( + "weights" + ), f"External weights should not be used for a model with inlined params." + return self.prepare_all(mlirs, vmfbs, weights, interactive) + + def is_prepared(self, vmfbs, weights): + missing = {} + pipeline_dir = self.pipeline_dir + for key in self.map: + missing[key] = [] + # vmfb is already present in model map + if self.map[key].get("vmfb"): + continue + # vmfb is passed in to this function + elif vmfbs.get(key): + self.map[key]["vmfb"] = vmfbs[key] + continue + # search self.pipeline_dir for key-specific vmfb + keywords = self.map[key].get("keywords", []) + keywords.extend( + [ + self.map[key]["safe_name"], + "vmfb", + "bs" + str(self.batch_size), + self.map[key]["target"], + self.map[key]["precision"], + ] + ) + avail_files = os.listdir(pipeline_dir) + candidates = [] + for filename in avail_files: + if all(str(x) in filename for x in keywords): + candidates.append(os.path.join(pipeline_dir, filename)) + if len(candidates) == 1: + self.map[key]["vmfb"] = candidates[0] + elif len(candidates) > 1: + print(f"Multiple files found for {key}: {candidates}") + print(f"Choosing {candidates[0]} for {key}.") + self.map[key]["vmfb"] = candidates[0] + else: + # vmfb not found in pipeline_dir. Add to list of files to generate. + missing[key].append("vmfb") + + # Make sure vmfb needs external weights, as they may be inlined. + if self.map[key].get("external_weights"): + if self.map[key].get("weights"): + # weights already found in model map + continue + elif weights.get(key): + # weights passed in to this function + self.map[key]["weights"] = weights[key] + continue + # search self.external_weights_dir for key-specific weights + w_keywords = [ + self.map[key]["safe_name"], + self.map[key]["precision"], + self.map[key]["external_weights"], + ] + avail_files = os.listdir(self.external_weights_dir) + candidates = [] + for filename in avail_files: + if all(str(x) in filename for x in w_keywords): + candidates.append( + os.path.join(self.external_weights_dir, filename) + ) + if len(candidates) == 1: + self.map[key]["weights"] = candidates[0] + elif len(candidates) > 1: + print(f"Multiple weight files found for {key}: {candidates}") + print(f"Choosing {candidates[0]} for {key}.") + self.map[key][weights] = candidates[0] + else: + # weights not found in external_weights_dir. Add to list of files to generate. + missing[key].append("weights") + if any(missing[key].values()): + print(f"Missing files for {key}: ", missing[key]) + ready = False + else: + ready = True + return ready + + def get_mlir_from_turbine_tank(self, submodel, container_name): + from turbine_models.turbine_tank import downloadModelArtifacts + + safe_name = utils.create_safe_name( + self.hf_model_name, + f"_{self.max_length}_{self.height}x{self.width}_{self.precision}_{submodel}.mlir", + ) + mlir_path = downloadModelArtifacts( + safe_name, + container_name, + ) + return mlir_path + + # IMPORT / COMPILE PHASE + + def export_submodel( + self, + submodel: str, + weights_only: bool = False, + ): + if not os.path.exists(self.pipeline_dir): + os.makedirs(self.pipeline_dir) + + if self.map[submodel]["external_weights"] and self.external_weights_dir: + if not os.path.exists(self.external_weights_dir): + os.makedirs(self.external_weights_dir, exist_ok=False) + + self.map[submodel]["weights"] = os.path.join( + self.external_weights_dir, + f"{submodel}_{self.map[submodel]['precision']}." + + self.map["submodel"]["external_weights"], + ) + + elif not self.map["submodel"]["external_weights"]: + print( + "No external weights type specified using --external_weights, weights for imported .mlir files will not be externalized." + ) + self.map[submodel]["weights"] = None + + if weights_only: + input_mlir = None + elif "mlir" in self.map[submodel].keys(): + input_mlir = self.map[submodel]["mlir"] + elif self.download: + try: + input_mlir = self.get_mlir_from_turbine_tank( + submodel, self.tank_container + ) + except: + input_mlir = None + else: + input_mlir = None + self.map[submodel]["mlir"] = input_mlir + + match submodel: + case "unetloop": #SDXL ONLY FOR NOW + pipeline_file = get_pipeline_ir( + self.width, + self.height, + self.precision, + self.batch_size, + self.max_length, + "unet_loop", + ) + pipeline_keys = [ + utils.create_safe_name(self.hf_model_name.split("/")[-1], ""), + "bs" + str(self.batch_size), + f"{str(self.width)}x{str(self.height)}", + self.precision, + str(self.max_length), + "unetloop", + ] + vmfb_path = utils.compile_to_vmfb( + pipeline_file, + self.map["unet"]["device"], + self.map["unet"]["target"], + self.ireec_flags["pipeline"], + os.path.join(self.pipeline_dir, "_".join(pipeline_keys)), + return_path=True, + mlir_source="str", + ) + self.map[submodel]["vmfb"] = vmfb_path + self.map[submodel]["weights"] = None + case "fullpipeline": #SDXL ONLY FOR NOW + pipeline_file = get_pipeline_ir( + self.width, + self.height, + self.precision, + self.batch_size, + self.max_length, + "tokens_to_image", + ) + pipeline_keys = [ + utils.create_safe_name(self.hf_model_name.split("/")[-1], ""), + "bs" + str(self.batch_size), + f"{str(self.width)}x{str(self.height)}", + self.precision, + str(self.max_length), + "fullpipeline", + ] + vmfb_path = utils.compile_to_vmfb( + pipeline_file, + self.map["unet"]["device"], + self.map["unet"]["target"], + self.ireec_flags["pipeline"], + os.path.join(self.pipeline_dir, "_".join(pipeline_keys)), + return_path=True, + mlir_source="str", + ) + self.map[submodel]["vmfb"] = vmfb_path + self.map[submodel]["weights"] = None + case _: + export_args = dict(**self.map[submodel]["export_args"]) + export_args["input_mlir"] = self.map[submodel].get("mlir") + vmfb_path = self.map[submodel]["export"](*export_args) + + # LOAD + def load_map(self): + for submodel in self.map.keys(): + self.load_submodel(submodel) + + def load_submodel(self, submodel): + if not self.map[submodel].get("vmfb"): + raise ValueError(f"VMFB not found for {submodel}.") + if not self.map[submodel].get("weights") and self.map[submodel].get( + "external_weights" + ): + raise ValueError(f"Weights not found for {submodel}.") + self.map[submodel]["runner"] = PipelineComponent() + self.map[submodel]["runner"].load( + self.map[submodel]["driver"], + self.map[submodel]["vmfb"], + self.map[submodel]["module_name"], + self.map[submodel].get("weights"), + self.map[submodel].get("extra_plugin"), + ) + setattr(self, submodel, self.map[submodel]["runner"]) + + def unload_submodel(self, submodel): + self.map[submodel]["runner"].unload() + setattr(self, submodel, None) + + +def numpy_to_pil_image(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + if images.shape[-1] == 1: + # special case for grayscale (single channel) images + pil_images = [] + for batched_image in images: + for image in range(0, batched_image.size(dim=0)): + pil_images.append(Image.fromarray(image.squeeze(), mode="L")) + else: + pil_images = [] + for image in images: + pil_images.append(Image.fromarray(image)) + return pil_images diff --git a/models/turbine_models/custom_models/sd3_inference/sd3_pipeline.py b/models/turbine_models/custom_models/sd3_inference/sd3_pipeline.py index 256e4d21b..1068d6b6c 100644 --- a/models/turbine_models/custom_models/sd3_inference/sd3_pipeline.py +++ b/models/turbine_models/custom_models/sd3_inference/sd3_pipeline.py @@ -123,7 +123,7 @@ def __init__( self.external_weights_dir = external_weights_dir self.external_weights = external_weights self.vae_decomp_attn = vae_decomp_attn - self.custom_vae = custom_vae + self.custom_vae = None self.cpu_scheduling = cpu_scheduling self.torch_dtype = torch.float32 if self.precision == "fp32" else torch.float16 self.vae_precision = vae_precision if vae_precision else self.precision