diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9d42f73384..b163f74fcb 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -131,8 +131,6 @@ jobs: # Install the built wheel pip install ./wheelhouse/nodai* # Validate the Models - pip uninstall torch torchvision - pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu117 /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh" pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" -k "not metal" | tail -n 1 | diff --git a/build_tools/populate_sharktank_ci.sh b/build_tools/populate_sharktank_ci.sh index 15af2f0bde..a433bf1f9c 100644 --- a/build_tools/populate_sharktank_ci.sh +++ b/build_tools/populate_sharktank_ci.sh @@ -1,5 +1,5 @@ #!/bin/bash -IMPORTER=1 ./setup_venv.sh +IMPORTER=1 BENCHMARK=1 ./setup_venv.sh source $GITHUB_WORKSPACE/shark.venv/bin/activate python generate_sharktank.py --upload=False --ci_tank_dir=True diff --git a/setup_venv.sh b/setup_venv.sh index d63bb8d8fb..473f6b4db4 100755 --- a/setup_venv.sh +++ b/setup_venv.sh @@ -123,8 +123,12 @@ fi $PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f https://download.pytorch.org/whl/nightly/torch/ if [[ $(uname -s) = 'Linux' && ! -z "${BENCHMARK}" ]]; then + T_VER=$($PYTHON -m pip show torch | grep Version) + TORCH_VERSION=${T_VER:9:17} + TV_VER=$($PYTHON -m pip show torchvision | grep Version) + TV_VERSION=${TV_VER:9:18} $PYTHON -m pip uninstall -y torch torchvision - $PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu117 + $PYTHON -m pip install --no-deps https://download.pytorch.org/whl/nightly/cu117/torch-${TORCH_VERSION}%2Bcu117-cp310-cp310-linux_x86_64.whl https://download.pytorch.org/whl/nightly/cu117/torchvision-${TV_VERSION}%2Bcu117-cp310-cp310-linux_x86_64.whl if [ $? -eq 0 ];then echo "Successfully Installed torch + cu117." else diff --git a/shark/shark_benchmark_runner.py b/shark/shark_benchmark_runner.py index 5651d2bfeb..4bf5896806 100644 --- a/shark/shark_benchmark_runner.py +++ b/shark/shark_benchmark_runner.py @@ -333,7 +333,10 @@ def benchmark_all_csv( else: bench_result["shape_type"] = "static" bench_result["device"] = device_str - bench_result["data_type"] = inputs[0].dtype + if "fp16" in modelname: + bench_result["data_type"] = "float16" + else: + bench_result["data_type"] = inputs[0].dtype for e in engines: ( bench_result["param_count"], diff --git a/tank/all_models.csv b/tank/all_models.csv index 69c9379575..a4b38b153f 100644 --- a/tank/all_models.csv +++ b/tank/all_models.csv @@ -17,7 +17,7 @@ albert-base-v2,linalg,torch,1e-2,1e-3,default,None,True,True,True,"issue with at alexnet,linalg,torch,1e-2,1e-3,default,None,False,False,True,"Assertion Error: Zeros Output" bert-base-cased,linalg,torch,1e-2,1e-3,default,None,False,False,False,"" bert-base-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,"" -bert-base-uncased_fp16,linalg,torch,1e-2,1e-2,default,None,True,True,True,"" +bert-base-uncased_fp16,linalg,torch,1e-1,1e-1,default,None,True,False,True,"" facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"Fails during iree-compile." google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/311" microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/390" diff --git a/tank/model_utils.py b/tank/model_utils.py index 3114d9f28d..29a58eba11 100644 --- a/tank/model_utils.py +++ b/tank/model_utils.py @@ -156,13 +156,14 @@ def get_vision_model(torch_model): "mnasnet1_0": models.mnasnet1_0(weights="DEFAULT"), } if isinstance(torch_model, str): + fp16_model = None if "fp16" in torch_model: fp16_model = True torch_model = vision_models_dict[torch_model] model = VisionModule(torch_model) test_input = torch.randn(1, 3, 224, 224) actual_out = model(test_input) - if fp16_model == True: + if fp16_model is not None: test_input_fp16 = test_input.to( device=torch.device("cuda"), dtype=torch.half ) @@ -187,17 +188,15 @@ class BertHalfPrecisionModel(torch.nn.Module): def __init__(self, hf_model_name): super().__init__() from transformers import AutoModelForMaskedLM - import transformers as trf - transformers_path = trf.__path__[0] - hf_model_path = f"{transformers_path}/models/{hf_model_name}" self.model = AutoModelForMaskedLM.from_pretrained( hf_model_name, # The pretrained model. num_labels=2, # The number of output labels--2 for binary classification. output_attentions=False, # Whether the model returns attentions weights. output_hidden_states=False, # Whether the model returns all hidden-states. torchscript=True, - ) + torch_dtype=torch.float16, + ).to("cuda") def forward(self, tokens): return self.model.forward(tokens)[0] @@ -210,22 +209,21 @@ def get_fp16_model(torch_model): model = BertHalfPrecisionModel(modelname) tokenizer = AutoTokenizer.from_pretrained(modelname) text = "Replace me by any text you like." - encoded_input = tokenizer( + test_input_fp16 = tokenizer( text, truncation=True, max_length=128, return_tensors="pt", - ) - for key in encoded_input: - encoded_input[key] = ( - encoded_input[key].detach().numpy().astype(np.half) - ) - + ).input_ids.to("cuda") + # test_input = torch.randint(2, (1, 128)) + # test_input_fp16 = test_input.to( + # device=torch.device("cuda") + # ) model_fp16 = model.half() model_fp16.eval() - model_fp16.to("cuda") - actual_out_fp16 = model_fp16(encoded_input) - return model_fp16, encoded_input, actual_out_fp16 + with torch.no_grad(): + actual_out_fp16 = model_fp16(test_input_fp16) + return model_fp16, test_input_fp16, actual_out_fp16 # Utility function for comparing two tensors (torch). diff --git a/tank/torch_model_list.csv b/tank/torch_model_list.csv index 10cd5924b9..f52518d895 100644 --- a/tank/torch_model_list.csv +++ b/tank/torch_model_list.csv @@ -17,3 +17,4 @@ microsoft/beit-base-patch16-224-pt22k-ft22k,True,hf_img_cls,False,86M,"image-cla nvidia/mit-b0,True,hf_img_cls,False,3.7M,"image-classification,transformer-encoder",SegFormer mnasnet1_0,False,vision,True,-,"cnn, torchvision, mobile, architecture-search","Outperforms other mobile CNNs on Accuracy vs. Latency" resnet50_fp16,False,vision,True,23M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)" +bert-base-uncased_fp16,True,fp16,False,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"