Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resnet on qnn #1601

Merged
merged 2 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
|Vision|stable diffusion <br> stable diffusion XL|[Link](https://github.com/microsoft/Olive/tree/main/examples/stable_diffusion)|`GPU`: with ONNX Runtime optimization for DirectML EP<br>`GPU`: with ONNX Runtime optimization for CUDA EP<br>`Intel CPU`: with OpenVINO toolkit
||squeezenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/directml/squeezenet)|`GPU`: with ONNX Runtime optimizations with DirectML EP
||mobilenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/mobilenet)|`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP
||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model<br>`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model<br>`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model<br>`AMD DPU`: with AMD Vitis-AI Quantization<br>`Intel GPU`: with ONNX Runtime optimizations with multiple EPs
||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model<br>`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model<br>`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model<br>`AMD DPU`: with AMD Vitis-AI Quantization<br>`Intel GPU`: with ONNX Runtime optimizations with multiple EPs<br>`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP
||VGG|[Link](https://github.com/microsoft/Olive/tree/main/examples/vgg)|`Qualcomm NPU`: with SNPE toolkit
||inception|[Link](https://github.com/microsoft/Olive/tree/main/examples/inception)|`Qualcomm NPU`: with SNPE toolkit
||super resolution|[Link](https://github.com/microsoft/Olive/tree/main/examples/super_resolution)|`CPU`: with ONNX Runtime pre/post processing integration for a single ONNX model
8 changes: 7 additions & 1 deletion examples/resnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This folder contains examples of ResNet optimization using different workflows.
- CPU: [with PyTorch QAT PyTorch Lightning Module and ORT optimizations](#resnet-optimization-with-qat-pytorch-lightning-module-on-cpu)
- AMD DPU: [with AMD Vitis-AI Quantization](#resnet-optimization-with-vitis-ai-ptq-on-amd-dpu)
- Intel GPU: [with OpenVINO and DirectML execution providers in ONNX Runtime](#resnet-optimization-with-openvino-and-dml-execution-providers)

- Qualcomm NPU: [with QNN execution provider in ONNX Runtime](#resnet-optimization-with-qnn-execution-providers)
Go to [How to run](#how-to-run)

## Optimization Workflows
Expand Down Expand Up @@ -45,6 +45,12 @@ This example performs ResNet optimization with OpenVINO and DML execution provid
- *ONNX Model -> ONNX Runtime performance tuning on multiple ep*

Config file: [resnet_multiple_ep.json](resnet_multiple_ep.json)

### ResNet optimization with QNN execution providers
This example performs ResNet optimization with QNN execution providers in one workflow. It performs the optimization pipeline:
- *PyTorch Model -> Onnx Model -> QNN Quantized Onnx Model*

Config file: [resnet_ptq_qnn.json](resnet_ptq_qnn.json)
## How to run
### Pip requirements
Install the necessary python packages:
Expand Down
74 changes: 74 additions & 0 deletions examples/resnet/imagenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# -------------------------------------------------------------------------
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from logging import getLogger
Fixed Show fixed Hide fixed
jambayk marked this conversation as resolved.
Show resolved Hide resolved
Fixed Show fixed Hide fixed
from pathlib import Path
Fixed Show fixed Hide fixed

import numpy as np
import torchvision.transforms as transforms
from torch import from_numpy
from torch.utils.data import Dataset

from olive.data.registry import Registry

logger = getLogger(__name__)


class ImagenetDataset(Dataset):
def __init__(self, data):
self.images = from_numpy(data["images"])
self.labels = from_numpy(data["labels"])

def __len__(self):
return min(len(self.images), len(self.labels))

def __getitem__(self, idx):
return {"input": self.images[idx]}, self.labels[idx]


@Registry.register_post_process()
def imagenet_post_fun(output):
return output.argmax(axis=1)


preprocess = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)


@Registry.register_pre_process()
def dataset_pre_process(output_data, **kwargs):
cache_key = kwargs.get("cache_key")
cache_file = None
if cache_key:
cache_file = Path(f"./cache/data/{cache_key}.npz")
if cache_file.exists():
with np.load(Path(cache_file)) as data:
return ImagenetDataset(data)

size = kwargs.get("size", 256)
labels = []
images = []
for i, sample in enumerate(output_data):
if i >= size:
break
image = sample["image"]
label = sample["label"]
image = image.convert("RGB")
image = preprocess(image)
images.append(image)
labels.append(label)

result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)})

if cache_file:
cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
np.savez(cache_file, images=np.array(images), labels=np.array(labels))

return result_data
126 changes: 126 additions & 0 deletions examples/resnet/resnet_ptq_qnn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"input_model": {
"type": "HfModel",
"model_path": "microsoft/resnet-50",
"task": "image-classification",
"io_config": {
"input_names": [ "input" ],
"input_shapes": [ [ 1, 3, 224, 224 ] ],
"output_names": [ "output" ]
}
},
"systems": {
"qnn_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "quantize_data_config",
"type": "HuggingfaceContainer",
"user_script": "imagenet.py",
"load_dataset_config": {
"data_name": "imagenet-1k",
"split": "validation",
"streaming": true,
"trust_remote_code": true
},
"pre_process_data_config": { "type": "dataset_pre_process", "size": 256, "cache_key": "imagenet256" },
"post_process_data_config": { "type": "imagenet_post_fun" }
}
],
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "accuracy",
"type": "accuracy",
"data_config": "quantize_data_config",
"sub_types": [
{
"name": "accuracy_score",
"priority": 1,
"metric_config": { "task": "multiclass", "num_classes": 1001 }
}
],
"user_config": {
"inference_settings": {
"onnx": {
"session_options": {
"extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
},
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
},
{
"name": "latency_qnn",
"type": "latency",
"data_config": "quantize_data_config",
"sub_types": [
{ "name": "avg", "priority": 2 },
{ "name": "p75" },
{ "name": "p90" },
{ "name": "p99" }
],
"user_config": {
"inference_settings": {
"onnx": {
"session_options": {
"extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
},
"execution_provider": "QNNExecutionProvider",
"provider_options": [ { "backend_path": "QnnHtp.dll" } ]
}
}
}
},
{
"name": "latency_cpu",
"type": "latency",
"data_config": "quantize_data_config",
"sub_types": [
{ "name": "avg", "priority": 3 },
{ "name": "p75" },
{ "name": "p90" },
{ "name": "p99" }
],
"user_config": {
"inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
}
}
]
}
},
"passes": {
"conversion": {
"device": "cpu",
"type": "OnnxConversion",
"target_opset": 17,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"dynamic": false,
"use_dynamo_exporter": false
},
"QNNPreprocess": { "type": "QNNPreprocess", "fuse_layernorm": true },
"OnnxQuantization": {
"type": "OnnxQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"host": "qnn_system",
"target": "qnn_system",
"evaluator": "common_evaluator",
"cache_dir": "cache",
"clean_cache": true,
"output_dir": "models/resnet_ptq_qnn",
"evaluate_input_model": false
}