From b2799e8da36c157a3c5448ce882c57c3b89a7f44 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 6 Jun 2024 13:20:46 -0400 Subject: [PATCH] Remove redundant files --- .github/workflows/torchao.yml | 1 + userbenchmark/dynamo/huggingface.py | 682 ---------------------------- userbenchmark/dynamo/timm_models.py | 384 ---------------- 3 files changed, 1 insertion(+), 1066 deletions(-) delete mode 100755 userbenchmark/dynamo/huggingface.py delete mode 100755 userbenchmark/dynamo/timm_models.py diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index da65221cd4..02575810b3 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -19,6 +19,7 @@ jobs: BUILD_ENVIRONMENT: benchmark-nightly if: ${{ github.repository_owner == 'pytorch' }} runs-on: [a100-runner] + timeout-minutes: 1440 # 24 hours steps: - name: Checkout TorchBench uses: actions/checkout@v3 diff --git a/userbenchmark/dynamo/huggingface.py b/userbenchmark/dynamo/huggingface.py deleted file mode 100755 index 5e139783c1..0000000000 --- a/userbenchmark/dynamo/huggingface.py +++ /dev/null @@ -1,682 +0,0 @@ -#!/usr/bin/env python3 -import importlib -import logging -import os -import re -import subprocess -import sys -import warnings - -from common import BenchmarkRunner, download_retry_decorator, main, reset_rng_state - -import torch - -from torch._dynamo.testing import collect_results -from torch._dynamo.utils import clone_inputs - -log = logging.getLogger(__name__) - -# Enable FX graph caching -if "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ: - torch._inductor.config.fx_graph_cache = True - - -def pip_install(package): - subprocess.check_call([sys.executable, "-m", "pip", "install", package]) - - -# Disable the flake warnings for the imports. Flake8 does not provide a way to -# disable just warning for the entire file. Disabling flake8 entirely. -# flake8: noqa -imports = [ - "AlbertForPreTraining", - "AutoConfig", - "AutoModelForCausalLM", - "AutoModelForMaskedLM", - "AutoModelForSeq2SeqLM", - "BigBirdConfig", - "BlenderbotForConditionalGeneration", - "BlenderbotModel", - "BlenderbotSmallForConditionalGeneration", - "BlenderbotSmallModel", - "CLIPModel", - "CLIPVisionModel", - "ElectraForPreTraining", - "GPT2ForSequenceClassification", - "GPTJForSequenceClassification", - "GPTNeoForSequenceClassification", - "HubertForSequenceClassification", - "LxmertForPreTraining", - "LxmertForQuestionAnswering", - "MarianForCausalLM", - "MarianModel", - "MarianMTModel", - "PegasusForConditionalGeneration", - "PegasusModel", - "ReformerConfig", - "ViTForImageClassification", - "ViTForMaskedImageModeling", - "ViTModel", -] - - -def process_hf_reformer_output(out): - assert isinstance(out, list) - # second output is unstable - return [elem for i, elem in enumerate(out) if i != 1] - - -try: - mod = importlib.import_module("transformers") - for cls in imports: - if not hasattr(mod, cls): - raise ModuleNotFoundError -except ModuleNotFoundError: - print("Installing HuggingFace Transformers...") - pip_install("git+https://github.com/huggingface/transformers.git#egg=transformers") -finally: - for cls in imports: - exec(f"from transformers import {cls}") - - -# These models contain the models present in huggingface_models_list. It is a -# combination of models supported by HF Fx parser and some manually supplied -# models. For these models, we already know the largest batch size that can fit -# on A100 GPUs - 40 GB. -BATCH_SIZE_KNOWN_MODELS = dict() - - -# Get the list of models and their batch sizes -MODELS_FILENAME = os.path.join(os.path.dirname(__file__), "huggingface_models_list.txt") -assert os.path.exists(MODELS_FILENAME) -with open(MODELS_FILENAME, "r") as fh: - lines = fh.readlines() - lines = [line.rstrip() for line in lines] - for line in lines: - model_name, batch_size = line.split(",") - batch_size = int(batch_size) - BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size -assert len(BATCH_SIZE_KNOWN_MODELS) - - -SKIP = { - # Difficult to setup accuracy test because .eval() not supported - "Reformer", - # Fails deepcopy - "BlenderbotForConditionalGeneration", - "GPTNeoForCausalLM", - "GPTNeoForSequenceClassification", - # Fails with even batch size = 1 - "GPTJForCausalLM", - "GPTJForQuestionAnswering", -} - -# TODO - Fails even after fake tensors -BATCH_SIZE_DIVISORS = { - "AlbertForMaskedLM": 2, - "AlbertForQuestionAnswering": 2, - "AllenaiLongformerBase": 2, - "BartForCausalLM": 2, - "BartForConditionalGeneration": 2, - "BertForMaskedLM": 2, - "BertForQuestionAnswering": 2, - "BlenderbotForCausalLM": 8, - # "BlenderbotForConditionalGeneration" : 16, - "BlenderbotSmallForCausalLM": 4, - "BlenderbotSmallForConditionalGeneration": 2, - "CamemBert": 2, - "DebertaForMaskedLM": 4, - "DebertaForQuestionAnswering": 2, - "DebertaV2ForMaskedLM": 4, - "DebertaV2ForQuestionAnswering": 8, - "DistilBertForMaskedLM": 2, - "DistilBertForQuestionAnswering": 2, - "DistillGPT2": 2, - "ElectraForCausalLM": 2, - "ElectraForQuestionAnswering": 2, - "GPT2ForSequenceClassification": 2, - # "GPTJForCausalLM" : 2, - # "GPTJForQuestionAnswering" : 2, - # "GPTNeoForCausalLM" : 32, - # "GPTNeoForSequenceClassification" : 2, - "GoogleFnet": 2, - "LayoutLMForMaskedLM": 2, - "LayoutLMForSequenceClassification": 2, - "M2M100ForConditionalGeneration": 4, - "MBartForCausalLM": 2, - "MBartForConditionalGeneration": 2, - "MT5ForConditionalGeneration": 2, - "MegatronBertForCausalLM": 4, - "MegatronBertForQuestionAnswering": 2, - "MobileBertForMaskedLM": 2, - "MobileBertForQuestionAnswering": 2, - "OPTForCausalLM": 2, - "PLBartForCausalLM": 2, - "PLBartForConditionalGeneration": 2, - "PegasusForCausalLM": 4, - "PegasusForConditionalGeneration": 2, - "RobertaForCausalLM": 2, - "RobertaForQuestionAnswering": 2, - "Speech2Text2ForCausalLM": 4, - "T5ForConditionalGeneration": 2, - "T5Small": 2, - "TrOCRForCausalLM": 2, - "XGLMForCausalLM": 4, - "XLNetLMHeadModel": 2, - "YituTechConvBert": 2, -} - -SKIP_ACCURACY_CHECK_MODELS = { - # Models too large to have eager, dynamo and fp64_numbers simultaneosuly - # even for 40 GB machine. - "DebertaV2ForMaskedLM", - "BlenderbotForCausalLM", -} - -SKIP_DUE_TO_CONTROL_FLOW = {"AllenaiLongformerBase"} - - -REQUIRE_HIGHER_TOLERANCE_TRAINING = { - "MT5ForConditionalGeneration", - # AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem - # harmful. - "AlbertForQuestionAnswering", -} -REQUIRE_HIGHER_TOLERANCE_INFERENCE = { - "GPT2ForSequenceClassification", - "RobertaForQuestionAnswering", -} - - -SKIP_FOR_CPU = { - "OPTForCausalLM", # OOMs -} - -ONLY_EVAL_MODE = { - "M2M100ForConditionalGeneration", # Fails with dynamo for train mode -} - -FP32_ONLY_MODELS = { - "GoogleFnet", -} - - -def get_module_cls_by_model_name(model_cls_name): - _module_by_model_name = { - "Speech2Text2Decoder": "transformers.models.speech_to_text_2.modeling_speech_to_text_2", - "TrOCRDecoder": "transformers.models.trocr.modeling_trocr", - } - module_name = _module_by_model_name.get(model_cls_name, "transformers") - module = importlib.import_module(module_name) - return getattr(module, model_cls_name) - - -def get_sequence_length(model_cls, model_name): - if model_name.startswith(("Blenderbot",)): - seq_length = 128 - elif model_name.startswith(("GPT2", "Bart", "T5", "PLBart", "MBart")): - seq_length = 1024 - elif model_name in ("AllenaiLongformerBase", "BigBird"): - seq_length = 1024 - elif model_name.startswith("OPT"): - seq_length = 2048 - elif "Reformer" in model_name: - seq_length = 4096 - elif model_name.startswith( - ( - "Albert", - "Deberta", - "Layout", - "Electra", - "XLNet", - "MegatronBert", - "Bert", - "Roberta", - ) - ) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert", "CamemBert"): - seq_length = 512 - elif model_name in ("TrOCRForCausalLM"): - seq_length = 256 - elif model_name.startswith("MobileBert"): - seq_length = 128 - elif model_name.startswith("Wav2Vec2"): - # If too short, will fail with something like - # ValueError: `mask_length` has to be smaller than `sequence_length`, - # but got `mask_length`: 10 and `sequence_length`: 9` - seq_length = 10000 # NB: a more realistic size is 155136 - else: - log.info( - f"Sequence Length not defined for {model_name}. Choosing 128 arbitrarily" - ) - seq_length = 128 - return seq_length - - -def generate_inputs_for_model( - model_cls, model, model_name, bs, device, include_loss_args=False -): - # TODO - Check if following values are representative - num_choices = 3 - num_visual_features = 42 - seq_length = get_sequence_length(model_cls, model_name) - vocab_size = model.config.vocab_size - - if model_name.startswith("Wav2Vec2"): - # TODO: If we add more input_values style models, try to work this - # into the overall control flow - target_length = 100 - return { - "input_values": torch.randn((bs, seq_length), device=device), - # Added because that's what the example training script has - "attention_mask": rand_int_tensor(device, 0, 2, (bs, seq_length)), - "labels": rand_int_tensor(device, 0, vocab_size, (bs, target_length)), - } - - if model_name.endswith("MultipleChoice"): - input = rand_int_tensor(device, 0, vocab_size, (bs, num_choices, seq_length)) - elif model_name.startswith("Roberta"): - input = rand_int_tensor(device, 0, 1, (bs, seq_length)) - else: - input = rand_int_tensor(device, 0, vocab_size, (bs, seq_length)) - - if "Bart" in model_name: - input[:, -1] = model.config.eos_token_id - - input_dict = {"input_ids": input} - - if ( - model_name.startswith("T5") - or model_name.startswith("M2M100") - or model_name.startswith("MT5") - or model_cls - in [ - BlenderbotModel, - BlenderbotSmallModel, - BlenderbotForConditionalGeneration, - BlenderbotSmallForConditionalGeneration, - PegasusModel, - PegasusForConditionalGeneration, - MarianModel, - MarianMTModel, - ] - ): - input_dict["decoder_input_ids"] = input - - if model_name.startswith("Lxmert"): - visual_feat_dim, visual_pos_dim = ( - model.config.visual_feat_dim, - model.config.visual_pos_dim, - ) - input_dict["visual_feats"] = torch.randn( - bs, num_visual_features, visual_feat_dim - ) - input_dict["visual_pos"] = torch.randn(bs, num_visual_features, visual_pos_dim) - - if include_loss_args: - if model_name.endswith("PreTraining"): - if model_cls in [ElectraForPreTraining, LxmertForPreTraining]: - input_dict["labels"] = rand_int_tensor(device, 0, 1, (bs, seq_length)) - else: - label_name = ( - "sentence_order_label" - if model_cls in [AlbertForPreTraining] - else "next_sentence_label" - ) - input_dict["labels"] = ( - rand_int_tensor(device, 0, vocab_size, (bs, seq_length)), - ) - input_dict[label_name] = rand_int_tensor(device, 0, 1, (bs,)) - elif model_name.endswith("QuestionAnswering"): - input_dict["start_positions"] = rand_int_tensor( - device, 0, seq_length, (bs,) - ) - input_dict["end_positions"] = rand_int_tensor(device, 0, seq_length, (bs,)) - elif ( - model_name.endswith("MaskedLM") - or model_name.endswith("HeadModel") - or model_name.endswith("CausalLM") - or model_name.endswith("DoubleHeadsModel") - ): - input_dict["labels"] = rand_int_tensor( - device, 0, vocab_size, (bs, seq_length) - ) - elif model_name.endswith("TokenClassification"): - input_dict["labels"] = rand_int_tensor( - device, 0, model.config.num_labels - 1, (bs, seq_length) - ) - elif model_name.endswith("MultipleChoice"): - input_dict["labels"] = rand_int_tensor(device, 0, num_choices, (bs,)) - elif model_name.endswith("SequenceClassification"): - input_dict["labels"] = rand_int_tensor( - device, 0, model.config.num_labels - 1, (bs,) - ) - elif model_name.endswith("NextSentencePrediction"): - input_dict["labels"] = rand_int_tensor(device, 0, 1, (bs,)) - elif model_name.endswith("ForConditionalGeneration"): - input_dict["labels"] = rand_int_tensor( - device, 0, vocab_size - 1, (bs, seq_length) - ) - elif model_name in EXTRA_MODELS: - input_dict["labels"] = rand_int_tensor( - device, 0, vocab_size, (bs, seq_length) - ) - else: - raise NotImplementedError( - f"Class {model_name} unsupported for training test " - ) - - return input_dict - - -def rand_int_tensor(device, low, high, shape): - return torch.randint( - low, - high, - shape, - device=device, - dtype=torch.int64, - requires_grad=False, - ) - - -EXTRA_MODELS = { - "AllenaiLongformerBase": ( - AutoConfig.from_pretrained("allenai/longformer-base-4096"), - AutoModelForMaskedLM, - ), - "Reformer": ( - ReformerConfig(), - AutoModelForMaskedLM, - ), - "T5Small": ( - AutoConfig.from_pretrained("t5-small"), - AutoModelForSeq2SeqLM, - ), - # "BigBird": ( - # BigBirdConfig(attention_type="block_sparse"), - # AutoModelForMaskedLM, - # ), - "DistillGPT2": ( - AutoConfig.from_pretrained("distilgpt2"), - AutoModelForCausalLM, - ), - "GoogleFnet": ( - AutoConfig.from_pretrained("google/fnet-base"), - AutoModelForMaskedLM, - ), - "YituTechConvBert": ( - AutoConfig.from_pretrained("YituTech/conv-bert-base"), - AutoModelForMaskedLM, - ), - "CamemBert": ( - AutoConfig.from_pretrained("camembert-base"), - AutoModelForMaskedLM, - ), -} - - -class HuggingfaceRunner(BenchmarkRunner): - def __init__(self): - super().__init__() - self.suite_name = "huggingface" - - @property - def skip_models_for_cpu(self): - return SKIP_FOR_CPU - - @property - def fp32_only_models(self): - return FP32_ONLY_MODELS - - @property - def skip_models_due_to_control_flow(self): - return SKIP_DUE_TO_CONTROL_FLOW - - def _get_model_cls_and_config(self, model_name): - if model_name not in EXTRA_MODELS: - model_cls = get_module_cls_by_model_name(model_name) - config_cls = model_cls.config_class - config = config_cls() - - # NB: some models need a pad token defined to handle BS > 1 - if ( - model_cls - in [ - GPT2ForSequenceClassification, - GPTNeoForSequenceClassification, - GPTJForSequenceClassification, - ] - or model_cls.__name__.startswith("Roberta") - or model_cls.__name__.startswith("Marian") - ): - config.pad_token_id = 0 - - else: - config, model_cls = EXTRA_MODELS[model_name] - - return model_cls, config - - @download_retry_decorator - def _download_model(self, model_name): - model_cls, config = self._get_model_cls_and_config(model_name) - if "auto" in model_cls.__module__: - # Handle auto classes - model = model_cls.from_config(config) - else: - model = model_cls(config) - return model - - def load_model( - self, - device, - model_name, - batch_size=None, - extra_args=None, - ): - is_training = self.args.training - use_eval_mode = self.args.use_eval_mode - dtype = torch.float32 - reset_rng_state() - model_cls, config = self._get_model_cls_and_config(model_name) - model = self._download_model(model_name) - model = model.to(device, dtype=dtype) - if self.args.enable_activation_checkpointing: - model.gradient_checkpointing_enable() - if model_name in BATCH_SIZE_KNOWN_MODELS: - batch_size_default = BATCH_SIZE_KNOWN_MODELS[model_name] - elif batch_size is None: - batch_size_default = 16 - log.info( - f"Batch size not specified for {model_name}. Setting batch_size=16" - ) - - if batch_size is None: - batch_size = batch_size_default - if model_name in BATCH_SIZE_DIVISORS: - batch_size = max(int(batch_size / BATCH_SIZE_DIVISORS[model_name]), 1) - log.info( - f"Running smaller batch size={batch_size} for {model_name}, orig batch_size={batch_size_default}" - ) - - example_inputs = generate_inputs_for_model( - model_cls, model, model_name, batch_size, device, include_loss_args=True - ) - - # So we can check for correct gradients without eliminating the dropout computation - for attr in dir(config): - if "drop" in attr and isinstance(getattr(config, attr), float): - setattr(config, attr, 1e-30) - - if ( - is_training - and not use_eval_mode - and not (self.args.accuracy and model_name in ONLY_EVAL_MODE) - ): - model.train() - else: - model.eval() - - self.validate_model(model, example_inputs) - return device, model_name, model, example_inputs, batch_size - - def iter_model_names(self, args): - model_names = list(BATCH_SIZE_KNOWN_MODELS.keys()) + list(EXTRA_MODELS.keys()) - model_names = set(model_names) - model_names = sorted(model_names) - - start, end = self.get_benchmark_indices(len(model_names)) - for index, model_name in enumerate(model_names): - if index < start or index >= end: - continue - if ( - not re.search("|".join(args.filter), model_name, re.I) - or re.search("|".join(args.exclude), model_name, re.I) - or model_name in args.exclude_exact - or model_name in SKIP - ): - continue - yield model_name - - @property - def skip_accuracy_checks_large_models_dashboard(self): - if self.args.dashboard or self.args.accuracy: - return SKIP_ACCURACY_CHECK_MODELS - return set() - - @property - def get_output_amp_train_process_func(self): - return {} - - def pick_grad(self, name, is_training): - if is_training: - return torch.enable_grad() - else: - return torch.no_grad() - - def get_tolerance_and_cosine_flag(self, is_training, current_device, name): - cosine = self.args.cosine - if is_training: - if name in REQUIRE_HIGHER_TOLERANCE_TRAINING: - return 2e-2, cosine - else: - return 1e-2, cosine - else: - if name in REQUIRE_HIGHER_TOLERANCE_INFERENCE: - return 4e-3, cosine - return 1e-3, cosine - - def compute_loss(self, pred): - return pred[0] - - def forward_pass(self, mod, inputs, collect_outputs=True): - with self.autocast(**self.autocast_arg): - return mod(**inputs) - - def forward_and_backward_pass(self, mod, inputs, collect_outputs=True): - cloned_inputs = clone_inputs(inputs) - self.optimizer_zero_grad(mod) - with self.autocast(**self.autocast_arg): - pred = mod(**cloned_inputs) - loss = self.compute_loss(pred) - self.grad_scaler.scale(loss).backward() - self.optimizer_step() - if collect_outputs: - return collect_results(mod, pred, loss, cloned_inputs) - return None - - -def refresh_model_names_and_batch_sizes(): - """ - This function reads the HF Fx tracer supported models and finds the largest - batch size that could fit on the GPU with PyTorch eager. - - The resulting data is written in huggingface_models_list.txt. - - Note - We only need to run this function if we believe that HF Fx tracer now - supports more models. - """ - import transformers.utils.fx as hf_fx - - family = dict() - lm_seen = set() - family_seen = set() - for cls_name in hf_fx._SUPPORTED_MODELS: - if "For" not in cls_name: - continue - - model_cls = get_module_cls_by_model_name(cls_name) - - # TODO: AttributeError: '*Config' object has no attribute 'vocab_size' - if model_cls in [ - CLIPModel, - CLIPVisionModel, - # SwinForImageClassification, - # SwinForImageClassification, - # SwinForMaskedImageModeling, - # SwinModel, - ViTForImageClassification, - ViTForMaskedImageModeling, - ViTModel, - ]: - continue - - # TODO: AssertionError: Padding_idx must be within num_embeddings - if model_cls in [MarianForCausalLM, MarianMTModel, MarianModel]: - continue - - # TODO: "model is not supported yet" from HFTracer - if model_cls in [HubertForSequenceClassification]: - continue - - # TODO: shape mismatch in loss calculation - if model_cls in [LxmertForQuestionAnswering]: - continue - - family_name = cls_name.split("For")[0] - if family_name not in family: - family[family_name] = [] - if cls_name.endswith(("MaskedLM", "CausalLM")) and family_name not in lm_seen: - family[family_name].append(cls_name) - lm_seen.add(family_name) - elif ( - cls_name.endswith( - ("SequenceClassification", "ConditionalGeneration", "QuestionAnswering") - ) - and family_name not in family_seen - ): - family[family_name].append(cls_name) - family_seen.add(family_name) - elif cls_name.endswith("ImageClassification"): - family[family_name].append(cls_name) - - chosen_models = set() - for members in family.values(): - chosen_models.update(set(members)) - - # Add the EXTRA_MODELS - chosen_models.update(set(EXTRA_MODELS.keys())) - - for model_name in sorted(chosen_models): - try: - subprocess.check_call( - [sys.executable] - + sys.argv - + ["--find-batch-sizes"] - + [f"--only={model_name}"] - + [f"--output={MODELS_FILENAME}"] - ) - except subprocess.SubprocessError: - log.warning(f"Failed to find suitable batch size for {model_name}") - - -def huggingface_main(): - # Code to refresh model names and batch sizes - # if "--find-batch-sizes" not in sys.argv: - # refresh_model_names_and_batch_sizes() - logging.basicConfig(level=logging.WARNING) - warnings.filterwarnings("ignore") - main(HuggingfaceRunner()) - - -if __name__ == "__main__": - huggingface_main() diff --git a/userbenchmark/dynamo/timm_models.py b/userbenchmark/dynamo/timm_models.py deleted file mode 100755 index db29a9bf36..0000000000 --- a/userbenchmark/dynamo/timm_models.py +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env python3 -import importlib -import logging -import os -import re -import subprocess -import sys -import warnings - -from common import BenchmarkRunner, download_retry_decorator, main - -import torch - -from torch._dynamo.testing import collect_results, reduce_to_scalar_loss -from torch._dynamo.utils import clone_inputs - -# Enable FX graph caching -if "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ: - torch._inductor.config.fx_graph_cache = True - - -def pip_install(package): - subprocess.check_call([sys.executable, "-m", "pip", "install", package]) - - -try: - importlib.import_module("timm") -except ModuleNotFoundError: - print("Installing PyTorch Image Models...") - pip_install("git+https://github.com/rwightman/pytorch-image-models") -finally: - from timm import __version__ as timmversion - from timm.data import resolve_data_config - from timm.models import create_model - -TIMM_MODELS = dict() -filename = os.path.join(os.path.dirname(__file__), "timm_models_list.txt") - -with open(filename) as fh: - lines = fh.readlines() - lines = [line.rstrip() for line in lines] - for line in lines: - model_name, batch_size = line.split(" ") - TIMM_MODELS[model_name] = int(batch_size) - - -# TODO - Figure out the reason of cold start memory spike - -BATCH_SIZE_DIVISORS = { - "beit_base_patch16_224": 2, - "convit_base": 2, - "convmixer_768_32": 2, - "convnext_base": 2, - "cspdarknet53": 2, - "deit_base_distilled_patch16_224": 2, - "gluon_xception65": 2, - "mobilevit_s": 2, - "pnasnet5large": 2, - "poolformer_m36": 2, - "resnest101e": 2, - "swin_base_patch4_window7_224": 2, - "swsl_resnext101_32x16d": 2, - "vit_base_patch16_224": 2, - "volo_d1_224": 2, - "jx_nest_base": 4, -} - -REQUIRE_HIGHER_TOLERANCE = { - "fbnetv3_b", - "gmixer_24_224", - "hrnet_w18", - "inception_v3", - "mixer_b16_224", - "sebotnet33ts_256", - "selecsls42b", -} - -REQUIRE_HIGHER_TOLERANCE_FOR_FREEZING = { - "adv_inception_v3", - "botnet26t_256", - "gluon_inception_v3", - "selecsls42b", - "swsl_resnext101_32x16d", -} - -SCALED_COMPUTE_LOSS = { - "ese_vovnet19b_dw", - "fbnetc_100", - "mnasnet_100", - "mobilevit_s", - "sebotnet33ts_256", -} - -FORCE_AMP_FOR_FP16_BF16_MODELS = { - "convit_base", - "xcit_large_24_p8_224", -} - -SKIP_ACCURACY_CHECK_AS_EAGER_NON_DETERMINISTIC_MODELS = { - "xcit_large_24_p8_224", -} - - -def refresh_model_names(): - import glob - - from timm.models import list_models - - def read_models_from_docs(): - models = set() - # TODO - set the path to pytorch-image-models repo - for fn in glob.glob("../pytorch-image-models/docs/models/*.md"): - with open(fn) as f: - while True: - line = f.readline() - if not line: - break - if not line.startswith("model = timm.create_model("): - continue - - model = line.split("'")[1] - # print(model) - models.add(model) - return models - - def get_family_name(name): - known_families = [ - "darknet", - "densenet", - "dla", - "dpn", - "ecaresnet", - "halo", - "regnet", - "efficientnet", - "deit", - "mobilevit", - "mnasnet", - "convnext", - "resnet", - "resnest", - "resnext", - "selecsls", - "vgg", - "xception", - ] - - for known_family in known_families: - if known_family in name: - return known_family - - if name.startswith("gluon_"): - return "gluon_" + name.split("_")[1] - return name.split("_")[0] - - def populate_family(models): - family = dict() - for model_name in models: - family_name = get_family_name(model_name) - if family_name not in family: - family[family_name] = [] - family[family_name].append(model_name) - return family - - docs_models = read_models_from_docs() - all_models = list_models(pretrained=True, exclude_filters=["*in21k"]) - - all_models_family = populate_family(all_models) - docs_models_family = populate_family(docs_models) - - for key in docs_models_family: - del all_models_family[key] - - chosen_models = set() - chosen_models.update(value[0] for value in docs_models_family.values()) - - chosen_models.update(value[0] for key, value in all_models_family.items()) - - filename = "timm_models_list.txt" - if os.path.exists("benchmarks"): - filename = "benchmarks/" + filename - with open(filename, "w") as fw: - for model_name in sorted(chosen_models): - fw.write(model_name + "\n") - - -class TimmRunner(BenchmarkRunner): - def __init__(self): - super().__init__() - self.suite_name = "timm_models" - - @property - def force_amp_for_fp16_bf16_models(self): - return FORCE_AMP_FOR_FP16_BF16_MODELS - - @property - def force_fp16_for_bf16_models(self): - return set() - - @property - def get_output_amp_train_process_func(self): - return {} - - @property - def skip_accuracy_check_as_eager_non_deterministic(self): - if self.args.accuracy and self.args.training: - return SKIP_ACCURACY_CHECK_AS_EAGER_NON_DETERMINISTIC_MODELS - return set() - - @property - def guard_on_nn_module_models(self): - return { - "convit_base", - } - - @download_retry_decorator - def _download_model(self, model_name): - model = create_model( - model_name, - in_chans=3, - scriptable=False, - num_classes=None, - drop_rate=0.0, - drop_path_rate=None, - drop_block_rate=None, - pretrained=True, - ) - return model - - def load_model( - self, - device, - model_name, - batch_size=None, - extra_args=None, - ): - if self.args.enable_activation_checkpointing: - raise NotImplementedError( - "Activation checkpointing not implemented for Timm models" - ) - - is_training = self.args.training - use_eval_mode = self.args.use_eval_mode - - channels_last = self._args.channels_last - model = self._download_model(model_name) - - if model is None: - raise RuntimeError(f"Failed to load model '{model_name}'") - model.to( - device=device, - memory_format=torch.channels_last if channels_last else None, - ) - - self.num_classes = model.num_classes - - data_config = resolve_data_config( - vars(self._args) if timmversion >= "0.8.0" else self._args, - model=model, - use_test_size=not is_training, - ) - input_size = data_config["input_size"] - recorded_batch_size = TIMM_MODELS[model_name] - - if model_name in BATCH_SIZE_DIVISORS: - recorded_batch_size = max( - int(recorded_batch_size / BATCH_SIZE_DIVISORS[model_name]), 1 - ) - batch_size = batch_size or recorded_batch_size - - torch.manual_seed(1337) - input_tensor = torch.randint( - 256, size=(batch_size,) + input_size, device=device - ).to(dtype=torch.float32) - mean = torch.mean(input_tensor) - std_dev = torch.std(input_tensor) - example_inputs = (input_tensor - mean) / std_dev - - if channels_last: - example_inputs = example_inputs.contiguous( - memory_format=torch.channels_last - ) - example_inputs = [ - example_inputs, - ] - self.target = self._gen_target(batch_size, device) - - self.loss = torch.nn.CrossEntropyLoss().to(device) - - if model_name in SCALED_COMPUTE_LOSS: - self.compute_loss = self.scaled_compute_loss - - if is_training and not use_eval_mode: - model.train() - else: - model.eval() - - self.validate_model(model, example_inputs) - - return device, model_name, model, example_inputs, batch_size - - def iter_model_names(self, args): - # for model_name in list_models(pretrained=True, exclude_filters=["*in21k"]): - model_names = sorted(TIMM_MODELS.keys()) - start, end = self.get_benchmark_indices(len(model_names)) - for index, model_name in enumerate(model_names): - if index < start or index >= end: - continue - if ( - not re.search("|".join(args.filter), model_name, re.I) - or re.search("|".join(args.exclude), model_name, re.I) - or model_name in args.exclude_exact - or model_name in self.skip_models - ): - continue - - yield model_name - - def pick_grad(self, name, is_training): - if is_training: - return torch.enable_grad() - else: - return torch.no_grad() - - def get_tolerance_and_cosine_flag(self, is_training, current_device, name): - cosine = self.args.cosine - tolerance = 1e-3 - - if self.args.freezing and name in REQUIRE_HIGHER_TOLERANCE_FOR_FREEZING: - # the conv-batchnorm fusion used under freezing may cause relatively - # large numerical difference. We need are larger tolerance. - # Check https://github.com/pytorch/pytorch/issues/120545 for context - tolerance = 8 * 1e-2 - - if is_training: - if name in ["levit_128"]: - tolerance = 8 * 1e-2 - elif name in REQUIRE_HIGHER_TOLERANCE: - tolerance = 4 * 1e-2 - else: - tolerance = 1e-2 - return tolerance, cosine - - def _gen_target(self, batch_size, device): - return torch.empty((batch_size,) + (), device=device, dtype=torch.long).random_( - self.num_classes - ) - - def compute_loss(self, pred): - # High loss values make gradient checking harder, as small changes in - # accumulation order upsets accuracy checks. - return reduce_to_scalar_loss(pred) - - def scaled_compute_loss(self, pred): - # Loss values need zoom out further. - return reduce_to_scalar_loss(pred) / 1000.0 - - def forward_pass(self, mod, inputs, collect_outputs=True): - with self.autocast(**self.autocast_arg): - return mod(*inputs) - - def forward_and_backward_pass(self, mod, inputs, collect_outputs=True): - cloned_inputs = clone_inputs(inputs) - self.optimizer_zero_grad(mod) - with self.autocast(**self.autocast_arg): - pred = mod(*cloned_inputs) - if isinstance(pred, tuple): - pred = pred[0] - loss = self.compute_loss(pred) - self.grad_scaler.scale(loss).backward() - self.optimizer_step() - if collect_outputs: - return collect_results(mod, pred, loss, cloned_inputs) - return None - - -def timm_main(): - logging.basicConfig(level=logging.WARNING) - warnings.filterwarnings("ignore") - main(TimmRunner()) - - -if __name__ == "__main__": - timm_main()