From b2799e8da36c157a3c5448ce882c57c3b89a7f44 Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Thu, 6 Jun 2024 13:20:46 -0400
Subject: [PATCH] Remove redundant files

---
 .github/workflows/torchao.yml       |   1 +
 userbenchmark/dynamo/huggingface.py | 682 ----------------------------
 userbenchmark/dynamo/timm_models.py | 384 ----------------
 3 files changed, 1 insertion(+), 1066 deletions(-)
 delete mode 100755 userbenchmark/dynamo/huggingface.py
 delete mode 100755 userbenchmark/dynamo/timm_models.py

diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
index da65221cd4..02575810b3 100644
--- a/.github/workflows/torchao.yml
+++ b/.github/workflows/torchao.yml
@@ -19,6 +19,7 @@ jobs:
       BUILD_ENVIRONMENT: benchmark-nightly
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: [a100-runner]
+    timeout-minutes: 1440 # 24 hours
     steps:
       - name: Checkout TorchBench
         uses: actions/checkout@v3
diff --git a/userbenchmark/dynamo/huggingface.py b/userbenchmark/dynamo/huggingface.py
deleted file mode 100755
index 5e139783c1..0000000000
--- a/userbenchmark/dynamo/huggingface.py
+++ /dev/null
@@ -1,682 +0,0 @@
-#!/usr/bin/env python3
-import importlib
-import logging
-import os
-import re
-import subprocess
-import sys
-import warnings
-
-from common import BenchmarkRunner, download_retry_decorator, main, reset_rng_state
-
-import torch
-
-from torch._dynamo.testing import collect_results
-from torch._dynamo.utils import clone_inputs
-
-log = logging.getLogger(__name__)
-
-# Enable FX graph caching
-if "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ:
-    torch._inductor.config.fx_graph_cache = True
-
-
-def pip_install(package):
-    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-
-
-# Disable the flake warnings for the imports. Flake8 does not provide a way to
-# disable just warning for the entire file. Disabling flake8 entirely.
-# flake8: noqa
-imports = [
-    "AlbertForPreTraining",
-    "AutoConfig",
-    "AutoModelForCausalLM",
-    "AutoModelForMaskedLM",
-    "AutoModelForSeq2SeqLM",
-    "BigBirdConfig",
-    "BlenderbotForConditionalGeneration",
-    "BlenderbotModel",
-    "BlenderbotSmallForConditionalGeneration",
-    "BlenderbotSmallModel",
-    "CLIPModel",
-    "CLIPVisionModel",
-    "ElectraForPreTraining",
-    "GPT2ForSequenceClassification",
-    "GPTJForSequenceClassification",
-    "GPTNeoForSequenceClassification",
-    "HubertForSequenceClassification",
-    "LxmertForPreTraining",
-    "LxmertForQuestionAnswering",
-    "MarianForCausalLM",
-    "MarianModel",
-    "MarianMTModel",
-    "PegasusForConditionalGeneration",
-    "PegasusModel",
-    "ReformerConfig",
-    "ViTForImageClassification",
-    "ViTForMaskedImageModeling",
-    "ViTModel",
-]
-
-
-def process_hf_reformer_output(out):
-    assert isinstance(out, list)
-    # second output is unstable
-    return [elem for i, elem in enumerate(out) if i != 1]
-
-
-try:
-    mod = importlib.import_module("transformers")
-    for cls in imports:
-        if not hasattr(mod, cls):
-            raise ModuleNotFoundError
-except ModuleNotFoundError:
-    print("Installing HuggingFace Transformers...")
-    pip_install("git+https://github.com/huggingface/transformers.git#egg=transformers")
-finally:
-    for cls in imports:
-        exec(f"from transformers import {cls}")
-
-
-# These models contain the models present in huggingface_models_list. It is a
-# combination of models supported by HF Fx parser and some manually supplied
-# models. For these models, we already know the largest batch size that can fit
-# on A100 GPUs - 40 GB.
-BATCH_SIZE_KNOWN_MODELS = dict()
-
-
-# Get the list of models and their batch sizes
-MODELS_FILENAME = os.path.join(os.path.dirname(__file__), "huggingface_models_list.txt")
-assert os.path.exists(MODELS_FILENAME)
-with open(MODELS_FILENAME, "r") as fh:
-    lines = fh.readlines()
-    lines = [line.rstrip() for line in lines]
-    for line in lines:
-        model_name, batch_size = line.split(",")
-        batch_size = int(batch_size)
-        BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
-assert len(BATCH_SIZE_KNOWN_MODELS)
-
-
-SKIP = {
-    # Difficult to setup accuracy test because .eval() not supported
-    "Reformer",
-    # Fails deepcopy
-    "BlenderbotForConditionalGeneration",
-    "GPTNeoForCausalLM",
-    "GPTNeoForSequenceClassification",
-    # Fails with even batch size = 1
-    "GPTJForCausalLM",
-    "GPTJForQuestionAnswering",
-}
-
-# TODO - Fails even after fake tensors
-BATCH_SIZE_DIVISORS = {
-    "AlbertForMaskedLM": 2,
-    "AlbertForQuestionAnswering": 2,
-    "AllenaiLongformerBase": 2,
-    "BartForCausalLM": 2,
-    "BartForConditionalGeneration": 2,
-    "BertForMaskedLM": 2,
-    "BertForQuestionAnswering": 2,
-    "BlenderbotForCausalLM": 8,
-    # "BlenderbotForConditionalGeneration" : 16,
-    "BlenderbotSmallForCausalLM": 4,
-    "BlenderbotSmallForConditionalGeneration": 2,
-    "CamemBert": 2,
-    "DebertaForMaskedLM": 4,
-    "DebertaForQuestionAnswering": 2,
-    "DebertaV2ForMaskedLM": 4,
-    "DebertaV2ForQuestionAnswering": 8,
-    "DistilBertForMaskedLM": 2,
-    "DistilBertForQuestionAnswering": 2,
-    "DistillGPT2": 2,
-    "ElectraForCausalLM": 2,
-    "ElectraForQuestionAnswering": 2,
-    "GPT2ForSequenceClassification": 2,
-    # "GPTJForCausalLM" : 2,
-    # "GPTJForQuestionAnswering" : 2,
-    # "GPTNeoForCausalLM" : 32,
-    # "GPTNeoForSequenceClassification" : 2,
-    "GoogleFnet": 2,
-    "LayoutLMForMaskedLM": 2,
-    "LayoutLMForSequenceClassification": 2,
-    "M2M100ForConditionalGeneration": 4,
-    "MBartForCausalLM": 2,
-    "MBartForConditionalGeneration": 2,
-    "MT5ForConditionalGeneration": 2,
-    "MegatronBertForCausalLM": 4,
-    "MegatronBertForQuestionAnswering": 2,
-    "MobileBertForMaskedLM": 2,
-    "MobileBertForQuestionAnswering": 2,
-    "OPTForCausalLM": 2,
-    "PLBartForCausalLM": 2,
-    "PLBartForConditionalGeneration": 2,
-    "PegasusForCausalLM": 4,
-    "PegasusForConditionalGeneration": 2,
-    "RobertaForCausalLM": 2,
-    "RobertaForQuestionAnswering": 2,
-    "Speech2Text2ForCausalLM": 4,
-    "T5ForConditionalGeneration": 2,
-    "T5Small": 2,
-    "TrOCRForCausalLM": 2,
-    "XGLMForCausalLM": 4,
-    "XLNetLMHeadModel": 2,
-    "YituTechConvBert": 2,
-}
-
-SKIP_ACCURACY_CHECK_MODELS = {
-    # Models too large to have eager, dynamo and fp64_numbers simultaneosuly
-    # even for 40 GB machine.
-    "DebertaV2ForMaskedLM",
-    "BlenderbotForCausalLM",
-}
-
-SKIP_DUE_TO_CONTROL_FLOW = {"AllenaiLongformerBase"}
-
-
-REQUIRE_HIGHER_TOLERANCE_TRAINING = {
-    "MT5ForConditionalGeneration",
-    # AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem
-    # harmful.
-    "AlbertForQuestionAnswering",
-}
-REQUIRE_HIGHER_TOLERANCE_INFERENCE = {
-    "GPT2ForSequenceClassification",
-    "RobertaForQuestionAnswering",
-}
-
-
-SKIP_FOR_CPU = {
-    "OPTForCausalLM",  # OOMs
-}
-
-ONLY_EVAL_MODE = {
-    "M2M100ForConditionalGeneration",  # Fails with dynamo for train mode
-}
-
-FP32_ONLY_MODELS = {
-    "GoogleFnet",
-}
-
-
-def get_module_cls_by_model_name(model_cls_name):
-    _module_by_model_name = {
-        "Speech2Text2Decoder": "transformers.models.speech_to_text_2.modeling_speech_to_text_2",
-        "TrOCRDecoder": "transformers.models.trocr.modeling_trocr",
-    }
-    module_name = _module_by_model_name.get(model_cls_name, "transformers")
-    module = importlib.import_module(module_name)
-    return getattr(module, model_cls_name)
-
-
-def get_sequence_length(model_cls, model_name):
-    if model_name.startswith(("Blenderbot",)):
-        seq_length = 128
-    elif model_name.startswith(("GPT2", "Bart", "T5", "PLBart", "MBart")):
-        seq_length = 1024
-    elif model_name in ("AllenaiLongformerBase", "BigBird"):
-        seq_length = 1024
-    elif model_name.startswith("OPT"):
-        seq_length = 2048
-    elif "Reformer" in model_name:
-        seq_length = 4096
-    elif model_name.startswith(
-        (
-            "Albert",
-            "Deberta",
-            "Layout",
-            "Electra",
-            "XLNet",
-            "MegatronBert",
-            "Bert",
-            "Roberta",
-        )
-    ) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert", "CamemBert"):
-        seq_length = 512
-    elif model_name in ("TrOCRForCausalLM"):
-        seq_length = 256
-    elif model_name.startswith("MobileBert"):
-        seq_length = 128
-    elif model_name.startswith("Wav2Vec2"):
-        # If too short, will fail with something like
-        # ValueError: `mask_length` has to be smaller than `sequence_length`,
-        # but got `mask_length`: 10 and `sequence_length`: 9`
-        seq_length = 10000  # NB: a more realistic size is 155136
-    else:
-        log.info(
-            f"Sequence Length not defined for {model_name}. Choosing 128 arbitrarily"
-        )
-        seq_length = 128
-    return seq_length
-
-
-def generate_inputs_for_model(
-    model_cls, model, model_name, bs, device, include_loss_args=False
-):
-    # TODO - Check if following values are representative
-    num_choices = 3
-    num_visual_features = 42
-    seq_length = get_sequence_length(model_cls, model_name)
-    vocab_size = model.config.vocab_size
-
-    if model_name.startswith("Wav2Vec2"):
-        # TODO: If we add more input_values style models, try to work this
-        # into the overall control flow
-        target_length = 100
-        return {
-            "input_values": torch.randn((bs, seq_length), device=device),
-            # Added because that's what the example training script has
-            "attention_mask": rand_int_tensor(device, 0, 2, (bs, seq_length)),
-            "labels": rand_int_tensor(device, 0, vocab_size, (bs, target_length)),
-        }
-
-    if model_name.endswith("MultipleChoice"):
-        input = rand_int_tensor(device, 0, vocab_size, (bs, num_choices, seq_length))
-    elif model_name.startswith("Roberta"):
-        input = rand_int_tensor(device, 0, 1, (bs, seq_length))
-    else:
-        input = rand_int_tensor(device, 0, vocab_size, (bs, seq_length))
-
-    if "Bart" in model_name:
-        input[:, -1] = model.config.eos_token_id
-
-    input_dict = {"input_ids": input}
-
-    if (
-        model_name.startswith("T5")
-        or model_name.startswith("M2M100")
-        or model_name.startswith("MT5")
-        or model_cls
-        in [
-            BlenderbotModel,
-            BlenderbotSmallModel,
-            BlenderbotForConditionalGeneration,
-            BlenderbotSmallForConditionalGeneration,
-            PegasusModel,
-            PegasusForConditionalGeneration,
-            MarianModel,
-            MarianMTModel,
-        ]
-    ):
-        input_dict["decoder_input_ids"] = input
-
-    if model_name.startswith("Lxmert"):
-        visual_feat_dim, visual_pos_dim = (
-            model.config.visual_feat_dim,
-            model.config.visual_pos_dim,
-        )
-        input_dict["visual_feats"] = torch.randn(
-            bs, num_visual_features, visual_feat_dim
-        )
-        input_dict["visual_pos"] = torch.randn(bs, num_visual_features, visual_pos_dim)
-
-    if include_loss_args:
-        if model_name.endswith("PreTraining"):
-            if model_cls in [ElectraForPreTraining, LxmertForPreTraining]:
-                input_dict["labels"] = rand_int_tensor(device, 0, 1, (bs, seq_length))
-            else:
-                label_name = (
-                    "sentence_order_label"
-                    if model_cls in [AlbertForPreTraining]
-                    else "next_sentence_label"
-                )
-                input_dict["labels"] = (
-                    rand_int_tensor(device, 0, vocab_size, (bs, seq_length)),
-                )
-                input_dict[label_name] = rand_int_tensor(device, 0, 1, (bs,))
-        elif model_name.endswith("QuestionAnswering"):
-            input_dict["start_positions"] = rand_int_tensor(
-                device, 0, seq_length, (bs,)
-            )
-            input_dict["end_positions"] = rand_int_tensor(device, 0, seq_length, (bs,))
-        elif (
-            model_name.endswith("MaskedLM")
-            or model_name.endswith("HeadModel")
-            or model_name.endswith("CausalLM")
-            or model_name.endswith("DoubleHeadsModel")
-        ):
-            input_dict["labels"] = rand_int_tensor(
-                device, 0, vocab_size, (bs, seq_length)
-            )
-        elif model_name.endswith("TokenClassification"):
-            input_dict["labels"] = rand_int_tensor(
-                device, 0, model.config.num_labels - 1, (bs, seq_length)
-            )
-        elif model_name.endswith("MultipleChoice"):
-            input_dict["labels"] = rand_int_tensor(device, 0, num_choices, (bs,))
-        elif model_name.endswith("SequenceClassification"):
-            input_dict["labels"] = rand_int_tensor(
-                device, 0, model.config.num_labels - 1, (bs,)
-            )
-        elif model_name.endswith("NextSentencePrediction"):
-            input_dict["labels"] = rand_int_tensor(device, 0, 1, (bs,))
-        elif model_name.endswith("ForConditionalGeneration"):
-            input_dict["labels"] = rand_int_tensor(
-                device, 0, vocab_size - 1, (bs, seq_length)
-            )
-        elif model_name in EXTRA_MODELS:
-            input_dict["labels"] = rand_int_tensor(
-                device, 0, vocab_size, (bs, seq_length)
-            )
-        else:
-            raise NotImplementedError(
-                f"Class {model_name} unsupported for training test "
-            )
-
-    return input_dict
-
-
-def rand_int_tensor(device, low, high, shape):
-    return torch.randint(
-        low,
-        high,
-        shape,
-        device=device,
-        dtype=torch.int64,
-        requires_grad=False,
-    )
-
-
-EXTRA_MODELS = {
-    "AllenaiLongformerBase": (
-        AutoConfig.from_pretrained("allenai/longformer-base-4096"),
-        AutoModelForMaskedLM,
-    ),
-    "Reformer": (
-        ReformerConfig(),
-        AutoModelForMaskedLM,
-    ),
-    "T5Small": (
-        AutoConfig.from_pretrained("t5-small"),
-        AutoModelForSeq2SeqLM,
-    ),
-    # "BigBird": (
-    #     BigBirdConfig(attention_type="block_sparse"),
-    #     AutoModelForMaskedLM,
-    # ),
-    "DistillGPT2": (
-        AutoConfig.from_pretrained("distilgpt2"),
-        AutoModelForCausalLM,
-    ),
-    "GoogleFnet": (
-        AutoConfig.from_pretrained("google/fnet-base"),
-        AutoModelForMaskedLM,
-    ),
-    "YituTechConvBert": (
-        AutoConfig.from_pretrained("YituTech/conv-bert-base"),
-        AutoModelForMaskedLM,
-    ),
-    "CamemBert": (
-        AutoConfig.from_pretrained("camembert-base"),
-        AutoModelForMaskedLM,
-    ),
-}
-
-
-class HuggingfaceRunner(BenchmarkRunner):
-    def __init__(self):
-        super().__init__()
-        self.suite_name = "huggingface"
-
-    @property
-    def skip_models_for_cpu(self):
-        return SKIP_FOR_CPU
-
-    @property
-    def fp32_only_models(self):
-        return FP32_ONLY_MODELS
-
-    @property
-    def skip_models_due_to_control_flow(self):
-        return SKIP_DUE_TO_CONTROL_FLOW
-
-    def _get_model_cls_and_config(self, model_name):
-        if model_name not in EXTRA_MODELS:
-            model_cls = get_module_cls_by_model_name(model_name)
-            config_cls = model_cls.config_class
-            config = config_cls()
-
-            # NB: some models need a pad token defined to handle BS > 1
-            if (
-                model_cls
-                in [
-                    GPT2ForSequenceClassification,
-                    GPTNeoForSequenceClassification,
-                    GPTJForSequenceClassification,
-                ]
-                or model_cls.__name__.startswith("Roberta")
-                or model_cls.__name__.startswith("Marian")
-            ):
-                config.pad_token_id = 0
-
-        else:
-            config, model_cls = EXTRA_MODELS[model_name]
-
-        return model_cls, config
-
-    @download_retry_decorator
-    def _download_model(self, model_name):
-        model_cls, config = self._get_model_cls_and_config(model_name)
-        if "auto" in model_cls.__module__:
-            # Handle auto classes
-            model = model_cls.from_config(config)
-        else:
-            model = model_cls(config)
-        return model
-
-    def load_model(
-        self,
-        device,
-        model_name,
-        batch_size=None,
-        extra_args=None,
-    ):
-        is_training = self.args.training
-        use_eval_mode = self.args.use_eval_mode
-        dtype = torch.float32
-        reset_rng_state()
-        model_cls, config = self._get_model_cls_and_config(model_name)
-        model = self._download_model(model_name)
-        model = model.to(device, dtype=dtype)
-        if self.args.enable_activation_checkpointing:
-            model.gradient_checkpointing_enable()
-        if model_name in BATCH_SIZE_KNOWN_MODELS:
-            batch_size_default = BATCH_SIZE_KNOWN_MODELS[model_name]
-        elif batch_size is None:
-            batch_size_default = 16
-            log.info(
-                f"Batch size not specified for {model_name}. Setting batch_size=16"
-            )
-
-        if batch_size is None:
-            batch_size = batch_size_default
-            if model_name in BATCH_SIZE_DIVISORS:
-                batch_size = max(int(batch_size / BATCH_SIZE_DIVISORS[model_name]), 1)
-                log.info(
-                    f"Running smaller batch size={batch_size} for {model_name}, orig batch_size={batch_size_default}"
-                )
-
-        example_inputs = generate_inputs_for_model(
-            model_cls, model, model_name, batch_size, device, include_loss_args=True
-        )
-
-        # So we can check for correct gradients without eliminating the dropout computation
-        for attr in dir(config):
-            if "drop" in attr and isinstance(getattr(config, attr), float):
-                setattr(config, attr, 1e-30)
-
-        if (
-            is_training
-            and not use_eval_mode
-            and not (self.args.accuracy and model_name in ONLY_EVAL_MODE)
-        ):
-            model.train()
-        else:
-            model.eval()
-
-        self.validate_model(model, example_inputs)
-        return device, model_name, model, example_inputs, batch_size
-
-    def iter_model_names(self, args):
-        model_names = list(BATCH_SIZE_KNOWN_MODELS.keys()) + list(EXTRA_MODELS.keys())
-        model_names = set(model_names)
-        model_names = sorted(model_names)
-
-        start, end = self.get_benchmark_indices(len(model_names))
-        for index, model_name in enumerate(model_names):
-            if index < start or index >= end:
-                continue
-            if (
-                not re.search("|".join(args.filter), model_name, re.I)
-                or re.search("|".join(args.exclude), model_name, re.I)
-                or model_name in args.exclude_exact
-                or model_name in SKIP
-            ):
-                continue
-            yield model_name
-
-    @property
-    def skip_accuracy_checks_large_models_dashboard(self):
-        if self.args.dashboard or self.args.accuracy:
-            return SKIP_ACCURACY_CHECK_MODELS
-        return set()
-
-    @property
-    def get_output_amp_train_process_func(self):
-        return {}
-
-    def pick_grad(self, name, is_training):
-        if is_training:
-            return torch.enable_grad()
-        else:
-            return torch.no_grad()
-
-    def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
-        cosine = self.args.cosine
-        if is_training:
-            if name in REQUIRE_HIGHER_TOLERANCE_TRAINING:
-                return 2e-2, cosine
-            else:
-                return 1e-2, cosine
-        else:
-            if name in REQUIRE_HIGHER_TOLERANCE_INFERENCE:
-                return 4e-3, cosine
-        return 1e-3, cosine
-
-    def compute_loss(self, pred):
-        return pred[0]
-
-    def forward_pass(self, mod, inputs, collect_outputs=True):
-        with self.autocast(**self.autocast_arg):
-            return mod(**inputs)
-
-    def forward_and_backward_pass(self, mod, inputs, collect_outputs=True):
-        cloned_inputs = clone_inputs(inputs)
-        self.optimizer_zero_grad(mod)
-        with self.autocast(**self.autocast_arg):
-            pred = mod(**cloned_inputs)
-            loss = self.compute_loss(pred)
-        self.grad_scaler.scale(loss).backward()
-        self.optimizer_step()
-        if collect_outputs:
-            return collect_results(mod, pred, loss, cloned_inputs)
-        return None
-
-
-def refresh_model_names_and_batch_sizes():
-    """
-    This function reads the HF Fx tracer supported models and finds the largest
-    batch size that could fit on the GPU with PyTorch eager.
-
-    The resulting data is written in huggingface_models_list.txt.
-
-    Note - We only need to run this function if we believe that HF Fx tracer now
-    supports more models.
-    """
-    import transformers.utils.fx as hf_fx
-
-    family = dict()
-    lm_seen = set()
-    family_seen = set()
-    for cls_name in hf_fx._SUPPORTED_MODELS:
-        if "For" not in cls_name:
-            continue
-
-        model_cls = get_module_cls_by_model_name(cls_name)
-
-        # TODO: AttributeError: '*Config' object has no attribute 'vocab_size'
-        if model_cls in [
-            CLIPModel,
-            CLIPVisionModel,
-            # SwinForImageClassification,
-            # SwinForImageClassification,
-            # SwinForMaskedImageModeling,
-            # SwinModel,
-            ViTForImageClassification,
-            ViTForMaskedImageModeling,
-            ViTModel,
-        ]:
-            continue
-
-        # TODO: AssertionError: Padding_idx must be within num_embeddings
-        if model_cls in [MarianForCausalLM, MarianMTModel, MarianModel]:
-            continue
-
-        # TODO: "model is not supported yet" from HFTracer
-        if model_cls in [HubertForSequenceClassification]:
-            continue
-
-        # TODO: shape mismatch in loss calculation
-        if model_cls in [LxmertForQuestionAnswering]:
-            continue
-
-        family_name = cls_name.split("For")[0]
-        if family_name not in family:
-            family[family_name] = []
-        if cls_name.endswith(("MaskedLM", "CausalLM")) and family_name not in lm_seen:
-            family[family_name].append(cls_name)
-            lm_seen.add(family_name)
-        elif (
-            cls_name.endswith(
-                ("SequenceClassification", "ConditionalGeneration", "QuestionAnswering")
-            )
-            and family_name not in family_seen
-        ):
-            family[family_name].append(cls_name)
-            family_seen.add(family_name)
-        elif cls_name.endswith("ImageClassification"):
-            family[family_name].append(cls_name)
-
-    chosen_models = set()
-    for members in family.values():
-        chosen_models.update(set(members))
-
-    # Add the EXTRA_MODELS
-    chosen_models.update(set(EXTRA_MODELS.keys()))
-
-    for model_name in sorted(chosen_models):
-        try:
-            subprocess.check_call(
-                [sys.executable]
-                + sys.argv
-                + ["--find-batch-sizes"]
-                + [f"--only={model_name}"]
-                + [f"--output={MODELS_FILENAME}"]
-            )
-        except subprocess.SubprocessError:
-            log.warning(f"Failed to find suitable batch size for {model_name}")
-
-
-def huggingface_main():
-    # Code to refresh model names and batch sizes
-    # if "--find-batch-sizes" not in sys.argv:
-    #     refresh_model_names_and_batch_sizes()
-    logging.basicConfig(level=logging.WARNING)
-    warnings.filterwarnings("ignore")
-    main(HuggingfaceRunner())
-
-
-if __name__ == "__main__":
-    huggingface_main()
diff --git a/userbenchmark/dynamo/timm_models.py b/userbenchmark/dynamo/timm_models.py
deleted file mode 100755
index db29a9bf36..0000000000
--- a/userbenchmark/dynamo/timm_models.py
+++ /dev/null
@@ -1,384 +0,0 @@
-#!/usr/bin/env python3
-import importlib
-import logging
-import os
-import re
-import subprocess
-import sys
-import warnings
-
-from common import BenchmarkRunner, download_retry_decorator, main
-
-import torch
-
-from torch._dynamo.testing import collect_results, reduce_to_scalar_loss
-from torch._dynamo.utils import clone_inputs
-
-# Enable FX graph caching
-if "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ:
-    torch._inductor.config.fx_graph_cache = True
-
-
-def pip_install(package):
-    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-
-
-try:
-    importlib.import_module("timm")
-except ModuleNotFoundError:
-    print("Installing PyTorch Image Models...")
-    pip_install("git+https://github.com/rwightman/pytorch-image-models")
-finally:
-    from timm import __version__ as timmversion
-    from timm.data import resolve_data_config
-    from timm.models import create_model
-
-TIMM_MODELS = dict()
-filename = os.path.join(os.path.dirname(__file__), "timm_models_list.txt")
-
-with open(filename) as fh:
-    lines = fh.readlines()
-    lines = [line.rstrip() for line in lines]
-    for line in lines:
-        model_name, batch_size = line.split(" ")
-        TIMM_MODELS[model_name] = int(batch_size)
-
-
-# TODO - Figure out the reason of cold start memory spike
-
-BATCH_SIZE_DIVISORS = {
-    "beit_base_patch16_224": 2,
-    "convit_base": 2,
-    "convmixer_768_32": 2,
-    "convnext_base": 2,
-    "cspdarknet53": 2,
-    "deit_base_distilled_patch16_224": 2,
-    "gluon_xception65": 2,
-    "mobilevit_s": 2,
-    "pnasnet5large": 2,
-    "poolformer_m36": 2,
-    "resnest101e": 2,
-    "swin_base_patch4_window7_224": 2,
-    "swsl_resnext101_32x16d": 2,
-    "vit_base_patch16_224": 2,
-    "volo_d1_224": 2,
-    "jx_nest_base": 4,
-}
-
-REQUIRE_HIGHER_TOLERANCE = {
-    "fbnetv3_b",
-    "gmixer_24_224",
-    "hrnet_w18",
-    "inception_v3",
-    "mixer_b16_224",
-    "sebotnet33ts_256",
-    "selecsls42b",
-}
-
-REQUIRE_HIGHER_TOLERANCE_FOR_FREEZING = {
-    "adv_inception_v3",
-    "botnet26t_256",
-    "gluon_inception_v3",
-    "selecsls42b",
-    "swsl_resnext101_32x16d",
-}
-
-SCALED_COMPUTE_LOSS = {
-    "ese_vovnet19b_dw",
-    "fbnetc_100",
-    "mnasnet_100",
-    "mobilevit_s",
-    "sebotnet33ts_256",
-}
-
-FORCE_AMP_FOR_FP16_BF16_MODELS = {
-    "convit_base",
-    "xcit_large_24_p8_224",
-}
-
-SKIP_ACCURACY_CHECK_AS_EAGER_NON_DETERMINISTIC_MODELS = {
-    "xcit_large_24_p8_224",
-}
-
-
-def refresh_model_names():
-    import glob
-
-    from timm.models import list_models
-
-    def read_models_from_docs():
-        models = set()
-        # TODO - set the path to pytorch-image-models repo
-        for fn in glob.glob("../pytorch-image-models/docs/models/*.md"):
-            with open(fn) as f:
-                while True:
-                    line = f.readline()
-                    if not line:
-                        break
-                    if not line.startswith("model = timm.create_model("):
-                        continue
-
-                    model = line.split("'")[1]
-                    # print(model)
-                    models.add(model)
-        return models
-
-    def get_family_name(name):
-        known_families = [
-            "darknet",
-            "densenet",
-            "dla",
-            "dpn",
-            "ecaresnet",
-            "halo",
-            "regnet",
-            "efficientnet",
-            "deit",
-            "mobilevit",
-            "mnasnet",
-            "convnext",
-            "resnet",
-            "resnest",
-            "resnext",
-            "selecsls",
-            "vgg",
-            "xception",
-        ]
-
-        for known_family in known_families:
-            if known_family in name:
-                return known_family
-
-        if name.startswith("gluon_"):
-            return "gluon_" + name.split("_")[1]
-        return name.split("_")[0]
-
-    def populate_family(models):
-        family = dict()
-        for model_name in models:
-            family_name = get_family_name(model_name)
-            if family_name not in family:
-                family[family_name] = []
-            family[family_name].append(model_name)
-        return family
-
-    docs_models = read_models_from_docs()
-    all_models = list_models(pretrained=True, exclude_filters=["*in21k"])
-
-    all_models_family = populate_family(all_models)
-    docs_models_family = populate_family(docs_models)
-
-    for key in docs_models_family:
-        del all_models_family[key]
-
-    chosen_models = set()
-    chosen_models.update(value[0] for value in docs_models_family.values())
-
-    chosen_models.update(value[0] for key, value in all_models_family.items())
-
-    filename = "timm_models_list.txt"
-    if os.path.exists("benchmarks"):
-        filename = "benchmarks/" + filename
-    with open(filename, "w") as fw:
-        for model_name in sorted(chosen_models):
-            fw.write(model_name + "\n")
-
-
-class TimmRunner(BenchmarkRunner):
-    def __init__(self):
-        super().__init__()
-        self.suite_name = "timm_models"
-
-    @property
-    def force_amp_for_fp16_bf16_models(self):
-        return FORCE_AMP_FOR_FP16_BF16_MODELS
-
-    @property
-    def force_fp16_for_bf16_models(self):
-        return set()
-
-    @property
-    def get_output_amp_train_process_func(self):
-        return {}
-
-    @property
-    def skip_accuracy_check_as_eager_non_deterministic(self):
-        if self.args.accuracy and self.args.training:
-            return SKIP_ACCURACY_CHECK_AS_EAGER_NON_DETERMINISTIC_MODELS
-        return set()
-
-    @property
-    def guard_on_nn_module_models(self):
-        return {
-            "convit_base",
-        }
-
-    @download_retry_decorator
-    def _download_model(self, model_name):
-        model = create_model(
-            model_name,
-            in_chans=3,
-            scriptable=False,
-            num_classes=None,
-            drop_rate=0.0,
-            drop_path_rate=None,
-            drop_block_rate=None,
-            pretrained=True,
-        )
-        return model
-
-    def load_model(
-        self,
-        device,
-        model_name,
-        batch_size=None,
-        extra_args=None,
-    ):
-        if self.args.enable_activation_checkpointing:
-            raise NotImplementedError(
-                "Activation checkpointing not implemented for Timm models"
-            )
-
-        is_training = self.args.training
-        use_eval_mode = self.args.use_eval_mode
-
-        channels_last = self._args.channels_last
-        model = self._download_model(model_name)
-
-        if model is None:
-            raise RuntimeError(f"Failed to load model '{model_name}'")
-        model.to(
-            device=device,
-            memory_format=torch.channels_last if channels_last else None,
-        )
-
-        self.num_classes = model.num_classes
-
-        data_config = resolve_data_config(
-            vars(self._args) if timmversion >= "0.8.0" else self._args,
-            model=model,
-            use_test_size=not is_training,
-        )
-        input_size = data_config["input_size"]
-        recorded_batch_size = TIMM_MODELS[model_name]
-
-        if model_name in BATCH_SIZE_DIVISORS:
-            recorded_batch_size = max(
-                int(recorded_batch_size / BATCH_SIZE_DIVISORS[model_name]), 1
-            )
-        batch_size = batch_size or recorded_batch_size
-
-        torch.manual_seed(1337)
-        input_tensor = torch.randint(
-            256, size=(batch_size,) + input_size, device=device
-        ).to(dtype=torch.float32)
-        mean = torch.mean(input_tensor)
-        std_dev = torch.std(input_tensor)
-        example_inputs = (input_tensor - mean) / std_dev
-
-        if channels_last:
-            example_inputs = example_inputs.contiguous(
-                memory_format=torch.channels_last
-            )
-        example_inputs = [
-            example_inputs,
-        ]
-        self.target = self._gen_target(batch_size, device)
-
-        self.loss = torch.nn.CrossEntropyLoss().to(device)
-
-        if model_name in SCALED_COMPUTE_LOSS:
-            self.compute_loss = self.scaled_compute_loss
-
-        if is_training and not use_eval_mode:
-            model.train()
-        else:
-            model.eval()
-
-        self.validate_model(model, example_inputs)
-
-        return device, model_name, model, example_inputs, batch_size
-
-    def iter_model_names(self, args):
-        # for model_name in list_models(pretrained=True, exclude_filters=["*in21k"]):
-        model_names = sorted(TIMM_MODELS.keys())
-        start, end = self.get_benchmark_indices(len(model_names))
-        for index, model_name in enumerate(model_names):
-            if index < start or index >= end:
-                continue
-            if (
-                not re.search("|".join(args.filter), model_name, re.I)
-                or re.search("|".join(args.exclude), model_name, re.I)
-                or model_name in args.exclude_exact
-                or model_name in self.skip_models
-            ):
-                continue
-
-            yield model_name
-
-    def pick_grad(self, name, is_training):
-        if is_training:
-            return torch.enable_grad()
-        else:
-            return torch.no_grad()
-
-    def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
-        cosine = self.args.cosine
-        tolerance = 1e-3
-
-        if self.args.freezing and name in REQUIRE_HIGHER_TOLERANCE_FOR_FREEZING:
-            # the conv-batchnorm fusion used under freezing may cause relatively
-            # large numerical difference. We need are larger tolerance.
-            # Check https://github.com/pytorch/pytorch/issues/120545 for context
-            tolerance = 8 * 1e-2
-
-        if is_training:
-            if name in ["levit_128"]:
-                tolerance = 8 * 1e-2
-            elif name in REQUIRE_HIGHER_TOLERANCE:
-                tolerance = 4 * 1e-2
-            else:
-                tolerance = 1e-2
-        return tolerance, cosine
-
-    def _gen_target(self, batch_size, device):
-        return torch.empty((batch_size,) + (), device=device, dtype=torch.long).random_(
-            self.num_classes
-        )
-
-    def compute_loss(self, pred):
-        # High loss values make gradient checking harder, as small changes in
-        # accumulation order upsets accuracy checks.
-        return reduce_to_scalar_loss(pred)
-
-    def scaled_compute_loss(self, pred):
-        # Loss values need zoom out further.
-        return reduce_to_scalar_loss(pred) / 1000.0
-
-    def forward_pass(self, mod, inputs, collect_outputs=True):
-        with self.autocast(**self.autocast_arg):
-            return mod(*inputs)
-
-    def forward_and_backward_pass(self, mod, inputs, collect_outputs=True):
-        cloned_inputs = clone_inputs(inputs)
-        self.optimizer_zero_grad(mod)
-        with self.autocast(**self.autocast_arg):
-            pred = mod(*cloned_inputs)
-            if isinstance(pred, tuple):
-                pred = pred[0]
-            loss = self.compute_loss(pred)
-        self.grad_scaler.scale(loss).backward()
-        self.optimizer_step()
-        if collect_outputs:
-            return collect_results(mod, pred, loss, cloned_inputs)
-        return None
-
-
-def timm_main():
-    logging.basicConfig(level=logging.WARNING)
-    warnings.filterwarnings("ignore")
-    main(TimmRunner())
-
-
-if __name__ == "__main__":
-    timm_main()