From a33557e65f8c8d043036f194c3e33a29be7ab51f Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Wed, 26 Feb 2025 16:32:49 +0800 Subject: [PATCH 01/12] enable pt2e test --- .../actions/inductor-xpu-e2e-test/action.yml | 3 + .github/actions/pt2e/action.yml | 147 ++++++++++++++++++ .github/workflows/nightly_ondemand.yml | 34 +++- .../workflows/nightly_ondemand_rolling.yml | 37 ++++- .github/workflows/nightly_ondemand_whl.yml | 35 ++++- 5 files changed, 250 insertions(+), 6 deletions(-) create mode 100644 .github/actions/pt2e/action.yml diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 52ec6c3b0..88551837e 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -111,6 +111,9 @@ runs: set -xe for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g') do + if [ "${suite}" == "pt2e" ];then + continue + fi contains "huggingface,timm_models,torchbench" $suite $contains_status for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g') diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml new file mode 100644 index 000000000..a01c7bc91 --- /dev/null +++ b/.github/actions/pt2e/action.yml @@ -0,0 +1,147 @@ +name: inductor-xpu-pt2e-test + +inputs: + suite: + required: true + type: string + default: 'huggingface' + description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma + env_prepare: + required: false + description: If set to any value, will prepare suite test env + dt: + required: true + type: string + default: 'float32' + description: Data precision of the test.float32,int8. Delimiter is comma + mode: + required: true + type: string + default: 'inference' + description: inference. Delimiter is comma + scenario: + required: true + type: string + default: 'accuracy' + description: accuracy,performance. Delimiter is comma + cards: + required: false + type: string + default: 'all' + description: which cards can be used in the test + hf_token: + required: false + description: HUGGING_FACE_HUB_TOKEN for torchbench test + pytorch: + required: false + type: string + default: 'main' + description: Pytorch branch/commit + driver: + required: false + type: string + default: 'lts' + description: Driver lts/rolling + +runs: + using: composite + steps: + - name: Prepare ENV + if: ${{ inputs.env_prepare }} + shell: bash + run: | + source activate e2e_ci + source .github/scripts/env.sh ${{ inputs.pytorch }} + # accuracy code + if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then + rm -rf pt2e-accuracy + git clone -b yifeng/accuracy https://github.com/chuanqi129/inductor-tools pt2e-accuracy + fi + # performance code + if [[ "${{ inputs.scenario }}" == *"performance"* ]];then + rm -rf pt2e-performance + git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance + fi + # deps + if [[ ${{ inputs.scenario }} == *"performance"* ]]; then + pip install pyyaml botocore + if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then + rm -rf pt2e-audio + git clone --single-branch -b main https://github.com/pytorch/audio pt2e-audio + cd pt2e-audio && git checkout $TORCHAUDIO_COMMIT_ID + python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install --no-deps dist/*.whl + cd ../ + rm -rf pt2e-vision + git clone --single-branch -b main https://github.com/pytorch/vision pt2e-vision + cd pt2e-vision && git checkout $TORCHVISION_COMMIT_ID + python setup.py bdist_wheel && pip uninstall torchvision -y && pip install --no-deps dist/*.whl + cd ../ + fi + # torchbench + python -c "import torch, torchvision, torchaudio" + rm -rf pt2e-benchmark + git clone https://github.com/pytorch/benchmark pt2e-benchmark + cd pt2e-benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt + pip install -U transformers tokenizers safetensors + python install.py --continue_on_fail + cd ../ + # deps for torchrec_dlrm + pip install pyre_extensions + pip install fbgemm-gpu + pip install --no-deps torchmetrics==1.0.3 torchrec + # transformers + pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION} + # timm + pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID + pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch) + fi + pip install numpy==1.26.4 + # dataset + if [ ! -d ${HOME}/datasets/imagenet ];then + rm -rf ${HOME}/datasets/imagenet + mkdir -p ${HOME}/datasets/imagenet + cd ${HOME}/datasets/imagenet + wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar + tar -xf ILSVRC2012_img_val.tar + wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh + bash valprep.sh + fi + - name: PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + env: + HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }} + NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} + DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} + shell: bash + run: | + source activate e2e_ci + source .github/scripts/env.sh ${{ inputs.pytorch }} + pt2e_logs_dir="${{ github.workspace }}/../pytorch/inductor_log" + rm -rf "${pt2e_logs_dir}" && mkdir -p "${pt2e_logs_dir}" + if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then + if [[ "${{ inputs.dt }}" == *"float32"* ]];then + python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --is_fp32 --dataset_dir ${HOME}/datasets/imagenet |\ + tee "${pt2e_logs_dir}/accuracy-fp32.log" + fi + if [[ "${{ inputs.dt }}" == *"int8"* ]];then + python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${HOME}/datasets/imagenet |\ + tee "${pt2e_logs_dir}/accuracy-int8.log" + fi + fi + if [[ "${{ inputs.scenario }}" == *"performance"* ]];then + models="alexnet,demucs,dlrm,hf_Albert,hf_Bert,hf_Bert_large,hf_DistilBert,hf_Roberta_base,mnasnet1_0,mobilenet_v2," + models+="mobilenet_v3_large,nvidia_deeprecommender,pytorch_CycleGAN_and_pix2pix,resnet152,resnet18,resnet50,resnext50_32x4d," + models+="shufflenet_v2_x1_0,squeezenet1_1,Super_SloMo,timm_efficientnet,timm_nfnet,timm_regnet,timm_resnest," + models+="timm_vision_transformer,timm_vision_transformer_large,timm_vovnet,vgg16" + if [[ "${{ inputs.dt }}" == *"float32"* ]];then + rm -rf pt2e-performance/.userbenchmark + python pt2e-performance/run_benchmark.py xpu --test eval --channels-last --metrics throughputs --torchdynamo inductor -m $models 2>&1 |\ + tee "${pt2e_logs_dir}/performance-fp32.log" + mv pt2e-performance/.userbenchmark ${pt2e_logs_dir}/performance-fp32 + fi + if [[ "${{ inputs.dt }}" == *"float32"* ]];then + rm -rf pt2e-performance/.userbenchmark + python pt2e-performance/run_benchmark.py xpu --test eval --channels-last --metrics throughputs --torchdynamo inductor --quantization pt2e -m $models 2>&1 |\ + tee "${pt2e_logs_dir}/performance-int8.log" + mv pt2e-performance/.userbenchmark ${pt2e_logs_dir}/performance-int8 + fi + fi diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 26da75ff3..ed1e20618 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -32,7 +32,7 @@ on: required: true type: string default: 'huggingface' - description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma + description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma dt: required: true type: string @@ -233,6 +233,16 @@ jobs: scenario: accuracy env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Nightly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4' + uses: ./.github/actions/pt2e + with: + suite: pt2e + dt: float32,int8 + scenario: accuracy,performance + env_prepare: true + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # Weekly launch - name: Weekly Huggingface Full Test if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' @@ -264,9 +274,19 @@ jobs: mode: inference,training scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Weekly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' + uses: ./.github/actions/pt2e + with: + suite: pt2e + env_prepare: true + dt: float32,int8 + scenario: accuracy,performance + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # On-demand launch - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - if: github.event_name != 'schedule' + if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }} uses: ./.github/actions/inductor-xpu-e2e-test with: suite: ${{ inputs.suite }} @@ -275,6 +295,16 @@ jobs: mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} + uses: ./.github/actions/pt2e + with: + suite: ${{ inputs.suite }} + env_prepare: true + dt: ${{ inputs.dt }} + mode: ${{ inputs.mode }} + scenario: ${{ inputs.scenario }} + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - name: Summarize archieve files id: summary diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 5cf66b3c8..22cf0cbee 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -32,7 +32,7 @@ on: required: true type: string default: 'huggingface' - description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma + description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma dt: required: true type: string @@ -241,6 +241,17 @@ jobs: env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} driver: rolling + - name: Nightly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '30 13 * * 0-4' + uses: ./.github/actions/pt2e + with: + suite: pt2e + dt: float32,int8 + scenario: accuracy,performance + env_prepare: true + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling + # Weekly launch - name: Weekly Huggingface Full Test if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' @@ -275,9 +286,20 @@ jobs: scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} driver: rolling + - name: Weekly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' + uses: ./.github/actions/pt2e + with: + suite: pt2e + env_prepare: true + dt: float32,int8 + scenario: accuracy,performance + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling + # On-demand launch - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - if: github.event_name != 'schedule' + if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }} uses: ./.github/actions/inductor-xpu-e2e-test with: suite: ${{ inputs.suite }} @@ -287,6 +309,17 @@ jobs: scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} driver: rolling + - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} + uses: ./.github/actions/pt2e + with: + suite: ${{ inputs.suite }} + env_prepare: true + dt: ${{ inputs.dt }} + mode: ${{ inputs.mode }} + scenario: ${{ inputs.scenario }} + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Summarize archieve files id: summary diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index 86cc3e764..583d36040 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -22,7 +22,7 @@ on: required: true type: string default: 'huggingface' - description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma + description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma dt: required: true type: string @@ -187,6 +187,16 @@ jobs: pytorch: nightly_wheel env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Nightly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4' + uses: ./.github/actions/pt2e + with: + suite: pt2e + dt: float32,int8 + scenario: accuracy,performance + pytorch: nightly_wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # Weekly launch - name: Weekly Huggingface Full Test if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' @@ -221,9 +231,19 @@ jobs: scenario: accuracy,performance pytorch: nightly_wheel hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Weekly PT2E Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' + uses: ./.github/actions/pt2e + with: + suite: pt2e + dt: float32,int8 + scenario: accuracy,performance + pytorch: nightly_wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # On-demand launch - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - if: github.event_name != 'schedule' + if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }} uses: ./.github/actions/inductor-xpu-e2e-test with: suite: ${{ inputs.suite }} @@ -233,6 +253,17 @@ jobs: scenario: ${{ inputs.scenario }} pytorch: nightly_wheel hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} + uses: ./.github/actions/pt2e + with: + suite: ${{ inputs.suite }} + env_prepare: true + dt: ${{ inputs.dt }} + mode: ${{ inputs.mode }} + scenario: ${{ inputs.scenario }} + pytorch: nightly_wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - name: Summarize archieve files id: summary From c54c0aa104723cd4ae0c86046b03201474bf9c31 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 11:46:06 +0800 Subject: [PATCH 02/12] remove useless params --- .github/actions/inductor-xpu-e2e-test/action.yml | 5 ----- .github/actions/pt2e/action.yml | 12 +----------- .github/workflows/nightly_ondemand.yml | 10 +--------- .github/workflows/nightly_ondemand_rolling.yml | 11 +---------- .github/workflows/nightly_ondemand_whl.yml | 11 +---------- 5 files changed, 4 insertions(+), 45 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 88551837e..9b38218ea 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -24,11 +24,6 @@ inputs: type: string default: 'accuracy' description: accuracy,performance. Delimiter is comma - cards: - required: false - type: string - default: 'all' - description: which cards can be used in the test hf_token: required: false description: HUGGING_FACE_HUB_TOKEN for torchbench test diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index a01c7bc91..ca72c4d58 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -1,11 +1,6 @@ name: inductor-xpu-pt2e-test inputs: - suite: - required: true - type: string - default: 'huggingface' - description: Dynamo benchmarks test suite. huggingface,timm_models,torchbench. Delimiter is comma env_prepare: required: false description: If set to any value, will prepare suite test env @@ -24,11 +19,6 @@ inputs: type: string default: 'accuracy' description: accuracy,performance. Delimiter is comma - cards: - required: false - type: string - default: 'all' - description: which cards can be used in the test hf_token: required: false description: HUGGING_FACE_HUB_TOKEN for torchbench test @@ -106,7 +96,7 @@ runs: wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh bash valprep.sh fi - - name: PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + - name: PT2E Test (${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) env: HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }} NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index ed1e20618..452a36475 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -48,11 +48,6 @@ on: type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set python: required: false type: string @@ -62,7 +57,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -428,9 +423,6 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION | $KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.model }}" != "" ];then - test_scope+="; model=${{ inputs.model }}" - fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 22cf0cbee..1cba2c6b8 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -48,11 +48,6 @@ on: type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set python: required: false type: string @@ -62,7 +57,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -172,7 +167,6 @@ jobs: echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -445,9 +439,6 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | rolling-$DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.model }}" != "" ];then - test_scope+="; model=${{ inputs.model }}" - fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index 583d36040..1b63727ec 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -38,11 +38,6 @@ on: type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set python: required: false type: string @@ -52,7 +47,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -138,7 +133,6 @@ jobs: echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(pip list |grep cmplr |head -n 1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -360,9 +354,6 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.model }}" != "" ];then - test_scope+="; model=${{ inputs.model }}" - fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt From 42c773a2487c9385250697b2313eca454fdb8866 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 12:04:43 +0800 Subject: [PATCH 03/12] use repo directly --- .github/actions/pt2e/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index ca72c4d58..2fecbee05 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -58,12 +58,12 @@ runs: if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then rm -rf pt2e-audio git clone --single-branch -b main https://github.com/pytorch/audio pt2e-audio - cd pt2e-audio && git checkout $TORCHAUDIO_COMMIT_ID + cd pt2e-audio python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install --no-deps dist/*.whl cd ../ rm -rf pt2e-vision git clone --single-branch -b main https://github.com/pytorch/vision pt2e-vision - cd pt2e-vision && git checkout $TORCHVISION_COMMIT_ID + cd pt2e-vision python setup.py bdist_wheel && pip uninstall torchvision -y && pip install --no-deps dist/*.whl cd ../ fi From 6f2348bc04966c2fe4bf94089d211f7cc4810029 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 12:13:34 +0800 Subject: [PATCH 04/12] use repo directly --- .github/actions/pt2e/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index 2fecbee05..04dafdcc4 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -71,7 +71,7 @@ runs: python -c "import torch, torchvision, torchaudio" rm -rf pt2e-benchmark git clone https://github.com/pytorch/benchmark pt2e-benchmark - cd pt2e-benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt + cd pt2e-benchmark && pip install --no-deps -r requirements.txt pip install -U transformers tokenizers safetensors python install.py --continue_on_fail cd ../ From 54b62557601735ddb3ab2aedad7c2eee131d27de Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 12:34:06 +0800 Subject: [PATCH 05/12] remove useless params --- .github/actions/pt2e/action.yml | 7 +------ .github/workflows/nightly_ondemand.yml | 10 +++++++++- .github/workflows/nightly_ondemand_rolling.yml | 11 ++++++++++- .github/workflows/nightly_ondemand_whl.yml | 11 ++++++++++- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index 04dafdcc4..dd54141cb 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -9,11 +9,6 @@ inputs: type: string default: 'float32' description: Data precision of the test.float32,int8. Delimiter is comma - mode: - required: true - type: string - default: 'inference' - description: inference. Delimiter is comma scenario: required: true type: string @@ -96,7 +91,7 @@ runs: wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh bash valprep.sh fi - - name: PT2E Test (${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + - name: PT2E Test (${{ inputs.dt }} ${{ inputs.scenario }}) env: HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }} NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 452a36475..5bcdd521a 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -43,6 +43,11 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set scenario: required: true type: string @@ -57,7 +62,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -423,6 +428,9 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION | $KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" + if [ "${{ inputs.model }}" != "" ];then + test_scope+="; model=${{ inputs.model }}" + fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 1cba2c6b8..76c4c6977 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -43,6 +43,11 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set scenario: required: true type: string @@ -57,7 +62,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -167,6 +172,7 @@ jobs: echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -439,6 +445,9 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | rolling-$DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" + if [ "${{ inputs.model }}" != "" ];then + test_scope+="; model=${{ inputs.model }}" + fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index 1b63727ec..ffddb765c 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -33,6 +33,11 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set scenario: required: true type: string @@ -47,7 +52,7 @@ on: permissions: read-all concurrency: - group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.python }} + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} cancel-in-progress: ${{ github.event_name != 'schedule' }} jobs: @@ -133,6 +138,7 @@ jobs: echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(pip list |grep cmplr |head -n 1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -354,6 +360,9 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION |$KERNEL_VERSION | $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" + if [ "${{ inputs.model }}" != "" ];then + test_scope+="; model=${{ inputs.model }}" + fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt From 7896a71acd00c22cc7fed8db743835044d61ec70 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 13:44:05 +0800 Subject: [PATCH 06/12] update --- .github/actions/pt2e/action.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index dd54141cb..b4f80795c 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -40,7 +40,7 @@ runs: # accuracy code if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then rm -rf pt2e-accuracy - git clone -b yifeng/accuracy https://github.com/chuanqi129/inductor-tools pt2e-accuracy + git clone -b main https://github.com/chuanqi129/inductor-tools pt2e-accuracy fi # performance code if [[ "${{ inputs.scenario }}" == *"performance"* ]];then @@ -53,20 +53,18 @@ runs: if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then rm -rf pt2e-audio git clone --single-branch -b main https://github.com/pytorch/audio pt2e-audio - cd pt2e-audio + cd pt2e-audio && git checkout $TORCHAUDIO_COMMIT_ID python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install --no-deps dist/*.whl cd ../ rm -rf pt2e-vision git clone --single-branch -b main https://github.com/pytorch/vision pt2e-vision - cd pt2e-vision + cd pt2e-vision && git checkout $TORCHVISION_COMMIT_ID python setup.py bdist_wheel && pip uninstall torchvision -y && pip install --no-deps dist/*.whl cd ../ fi # torchbench python -c "import torch, torchvision, torchaudio" - rm -rf pt2e-benchmark - git clone https://github.com/pytorch/benchmark pt2e-benchmark - cd pt2e-benchmark && pip install --no-deps -r requirements.txt + cd pt2e-performance && pip install --no-deps -r requirements.txt pip install -U transformers tokenizers safetensors python install.py --continue_on_fail cd ../ From a1dd015e5a057d9f48eddfef55b41ad64bd56a38 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Thu, 27 Feb 2025 15:55:21 +0800 Subject: [PATCH 07/12] fix int8 issue --- .github/actions/pt2e/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml index b4f80795c..b5a15f6c4 100644 --- a/.github/actions/pt2e/action.yml +++ b/.github/actions/pt2e/action.yml @@ -121,7 +121,7 @@ runs: tee "${pt2e_logs_dir}/performance-fp32.log" mv pt2e-performance/.userbenchmark ${pt2e_logs_dir}/performance-fp32 fi - if [[ "${{ inputs.dt }}" == *"float32"* ]];then + if [[ "${{ inputs.dt }}" == *"int8"* ]];then rm -rf pt2e-performance/.userbenchmark python pt2e-performance/run_benchmark.py xpu --test eval --channels-last --metrics throughputs --torchdynamo inductor --quantization pt2e -m $models 2>&1 |\ tee "${pt2e_logs_dir}/performance-int8.log" From e19b855523cb495fcef87fd10f1d2eab63b5e51b Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Fri, 28 Feb 2025 15:25:37 +0800 Subject: [PATCH 08/12] remove usless params --- .../actions/inductor-xpu-e2e-test/action.yml | 5 +++++ .github/workflows/nightly_ondemand.yml | 18 +++++++----------- .github/workflows/nightly_ondemand_rolling.yml | 16 ++++++---------- .github/workflows/nightly_ondemand_whl.yml | 15 ++++++--------- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 9b38218ea..88551837e 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -24,6 +24,11 @@ inputs: type: string default: 'accuracy' description: accuracy,performance. Delimiter is comma + cards: + required: false + type: string + default: 'all' + description: which cards can be used in the test hf_token: required: false description: HUGGING_FACE_HUB_TOKEN for torchbench test diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 5bcdd521a..2c2e35698 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -43,16 +43,16 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set scenario: required: true type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set python: required: false type: string @@ -233,11 +233,10 @@ jobs: scenario: accuracy env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Nightly PT2E Accuracy Test + - name: Nightly PT2E Full Test if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4' uses: ./.github/actions/pt2e with: - suite: pt2e dt: float32,int8 scenario: accuracy,performance env_prepare: true @@ -274,11 +273,10 @@ jobs: mode: inference,training scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - name: Weekly PT2E Accuracy Test + - name: Weekly PT2E Full Test if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' uses: ./.github/actions/pt2e with: - suite: pt2e env_prepare: true dt: float32,int8 scenario: accuracy,performance @@ -299,10 +297,8 @@ jobs: if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} uses: ./.github/actions/pt2e with: - suite: ${{ inputs.suite }} env_prepare: true dt: ${{ inputs.dt }} - mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 76c4c6977..5354fc6ed 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -43,16 +43,16 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set scenario: required: true type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set python: required: false type: string @@ -241,11 +241,10 @@ jobs: env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} driver: rolling - - name: Nightly PT2E Accuracy Test + - name: Nightly PT2E Full Test if: github.event_name == 'schedule' && github.event.schedule == '30 13 * * 0-4' uses: ./.github/actions/pt2e with: - suite: pt2e dt: float32,int8 scenario: accuracy,performance env_prepare: true @@ -290,7 +289,6 @@ jobs: if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' uses: ./.github/actions/pt2e with: - suite: pt2e env_prepare: true dt: float32,int8 scenario: accuracy,performance @@ -313,10 +311,8 @@ jobs: if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} uses: ./.github/actions/pt2e with: - suite: ${{ inputs.suite }} env_prepare: true dt: ${{ inputs.dt }} - mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} driver: rolling diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index ffddb765c..d8fe754d8 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -33,16 +33,16 @@ on: type: string default: 'inference' description: Test mode. `inference,training`. Delimiter is comma - model: - required: false - type: string - default: '' - description: Model. Will only run this one mode if set scenario: required: true type: string default: 'accuracy' description: Test scenario. `accuracy,performance`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set python: required: false type: string @@ -191,7 +191,6 @@ jobs: if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4' uses: ./.github/actions/pt2e with: - suite: pt2e dt: float32,int8 scenario: accuracy,performance pytorch: nightly_wheel @@ -235,7 +234,7 @@ jobs: if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' uses: ./.github/actions/pt2e with: - suite: pt2e + env_prepare: true dt: float32,int8 scenario: accuracy,performance pytorch: nightly_wheel @@ -257,10 +256,8 @@ jobs: if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }} uses: ./.github/actions/pt2e with: - suite: ${{ inputs.suite }} env_prepare: true dt: ${{ inputs.dt }} - mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} pytorch: nightly_wheel hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} From 5112ba4871ad56034b7c2166b3499c6767b62123 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Tue, 4 Mar 2025 15:09:42 +0800 Subject: [PATCH 09/12] update pt2e summary --- .github/scripts/summary_pt2e.py | 67 +++++++++++++++++++ .github/workflows/nightly_ondemand.yml | 24 ++++--- .../workflows/nightly_ondemand_rolling.yml | 24 ++++--- .github/workflows/nightly_ondemand_whl.yml | 24 ++++--- 4 files changed, 115 insertions(+), 24 deletions(-) create mode 100644 .github/scripts/summary_pt2e.py diff --git a/.github/scripts/summary_pt2e.py b/.github/scripts/summary_pt2e.py new file mode 100644 index 000000000..e342e908a --- /dev/null +++ b/.github/scripts/summary_pt2e.py @@ -0,0 +1,67 @@ +import sys +import os +import json +import csv +import pandas as pd + + +work_dir = sys.argv[1] +# scan files endwith .log and accuracy in folder +for root, dirs, files in os.walk(work_dir): + for file in files: + if file.endswith('.log') and 'accuracy' in file: + log_file_path = os.path.join(root, file) + # generate related csv file + csv_file_name = os.path.splitext(file)[0] + '.csv' + csv_file_path = os.path.join(root, csv_file_name) + # Data + csvData = [] + # read log + with open(log_file_path, 'r', encoding='utf-8') as log_file: + for line in log_file: + if "Acc" in line: + parts = line.strip().split() + model = parts[0].rstrip(':') + dt = parts[1].rstrip(':') + acc1 = parts[4] + acc5 = parts[6] + csvData.append([model,acc5]) + # write csv + with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file: + writer = csv.writer(csv_file) + writer.writerow(['Model',dt]) + writer.writerows(csvData) + +# scan .json file +for item in os.listdir(work_dir): + item_path = os.path.join(work_dir, item) + if os.path.isdir(item_path): + # generate csv + csv_file_name = item + '.csv' + csv_file_path = os.path.join(work_dir, csv_file_name) + + # data + csvData = [] + # scan json + for root, dirs, files in os.walk(item_path): + for file in files: + if file.endswith('.json'): + json_file_path = os.path.join(root, file) + with open(json_file_path, 'r', encoding='utf-8') as json_file: + data = json.load(json_file) + metrics = data.get('metrics',{}) + try: + for key, value in metrics.items(): + parts = key.rsplit('-eval_throughput',1) + if len(parts) == 2: + model = parts[0] + throughput = value + csvData.append([model,throughput]) + except json.JSONDecodeError: + print(f"Error decoding JSON file: {json_file_path}") + + with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file: + writer = csv.writer(csv_file) + writer.writerow(['Model','Throughput']) + writer.writerows(csvData) + diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 2c2e35698..412ad14d3 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -312,14 +312,22 @@ jobs: find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs # Print summary - rm -rf /tmp/tmp-*.txt - source activate e2e_ci - bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} - exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) - if [ ${exit_label} -ne 0 ];then - grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 - echo "There are ${exit_label} cases that need look into!!! Please check them" - exit ${exit_label} + if [ "${{ inputs.suite }}" != 'pt2e' ];then + rm -rf /tmp/tmp-*.txt + source activate e2e_ci + bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} + exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) + if [ ${exit_label} -ne 0 ];then + grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 + echo "There are ${exit_label} cases that need look into!!! Please check them" + exit ${exit_label} + fi + else + source activate e2e_ci + cp -r ${{ github.workspace }}/.github/scripts/summary_pt2e.py ${{ github.workspace }}/upload_files + cd ${{ github.workspace }}/upload_files + python summary_pt2e.py ${{ github.workspace }}/upload_files + rm -rf summary_pt2e.py fi - name: Upload Inductor XPU E2E Data if: ${{ ! cancelled() }} diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 5354fc6ed..5ddde3239 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -327,14 +327,22 @@ jobs: find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs # Print summary - rm -rf /tmp/tmp-*.txt - source activate e2e_ci - bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} - exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) - if [ ${exit_label} -ne 0 ];then - grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 - echo "There are ${exit_label} cases that need look into!!! Please check them" - exit ${exit_label} + if [ "${{ inputs.suite }}" != 'pt2e' ];then + rm -rf /tmp/tmp-*.txt + source activate e2e_ci + bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} + exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) + if [ ${exit_label} -ne 0 ];then + grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 + echo "There are ${exit_label} cases that need look into!!! Please check them" + exit ${exit_label} + fi + else + source activate e2e_ci + cp -r ${{ github.workspace }}/.github/scripts/summary_pt2e.py ${{ github.workspace }}/upload_files + cd ${{ github.workspace }}/upload_files + python summary_pt2e.py ${{ github.workspace }}/upload_files + rm -rf summary_pt2e.py fi - name: Upload Inductor XPU E2E Data if: ${{ ! cancelled() }} diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index d8fe754d8..604c2626a 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -272,14 +272,22 @@ jobs: find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs # Print summary - rm -rf /tmp/tmp-*.txt - source activate e2e_ci - bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} - exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) - if [ ${exit_label} -ne 0 ];then - grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 - echo "There are ${exit_label} cases that need look into!!! Please check them" - exit ${exit_label} + if [ "${{ inputs.suite }}" != 'pt2e' ];then + rm -rf /tmp/tmp-*.txt + source activate e2e_ci + bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY} + exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt) + if [ ${exit_label} -ne 0 ];then + grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1 + echo "There are ${exit_label} cases that need look into!!! Please check them" + exit ${exit_label} + fi + else + source activate e2e_ci + cp -r ${{ github.workspace }}/.github/scripts/summary_pt2e.py ${{ github.workspace }}/upload_files + cd ${{ github.workspace }}/upload_files + python summary_pt2e.py ${{ github.workspace }}/upload_files + rm -rf summary_pt2e.py fi - name: Upload Inductor XPU E2E Data if: ${{ ! cancelled() }} From 4c1adac52d016c0072e9f83ed9b96fa727ee65dd Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Fri, 7 Mar 2025 13:58:01 +0800 Subject: [PATCH 10/12] fix lint error and add calculation for accuracy/performance --- .github/scripts/summary_pt2e.py | 79 +++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/.github/scripts/summary_pt2e.py b/.github/scripts/summary_pt2e.py index e342e908a..0e6daebbb 100644 --- a/.github/scripts/summary_pt2e.py +++ b/.github/scripts/summary_pt2e.py @@ -4,20 +4,19 @@ import csv import pandas as pd - work_dir = sys.argv[1] -# scan files endwith .log and accuracy in folder +# scan files endwith .log and accuracy in file for root, dirs, files in os.walk(work_dir): for file in files: if file.endswith('.log') and 'accuracy' in file: log_file_path = os.path.join(root, file) - # generate related csv file + # generate related csv file csv_file_name = os.path.splitext(file)[0] + '.csv' csv_file_path = os.path.join(root, csv_file_name) # Data csvData = [] - # read log - with open(log_file_path, 'r', encoding='utf-8') as log_file: + # read log + with open(log_file_path, encoding='utf-8') as log_file: for line in log_file: if "Acc" in line: parts = line.strip().split() @@ -26,7 +25,7 @@ acc1 = parts[4] acc5 = parts[6] csvData.append([model,acc5]) - # write csv + # write csv with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file: writer = csv.writer(csv_file) writer.writerow(['Model',dt]) @@ -39,15 +38,14 @@ # generate csv csv_file_name = item + '.csv' csv_file_path = os.path.join(work_dir, csv_file_name) - - # data + # data csvData = [] # scan json for root, dirs, files in os.walk(item_path): for file in files: if file.endswith('.json'): json_file_path = os.path.join(root, file) - with open(json_file_path, 'r', encoding='utf-8') as json_file: + with open(json_file_path, encoding='utf-8') as json_file: data = json.load(json_file) metrics = data.get('metrics',{}) try: @@ -65,3 +63,66 @@ writer.writerow(['Model','Throughput']) writer.writerows(csvData) +# accuracy ratio +for filename in os.listdir(work_dir): + if filename.endswith('.csv') and 'accuracy' in filename and 'fp32' in filename: + file_path = os.path.join(work_dir, filename) + df_fp32 = pd.read_csv(file_path) + + if filename.endswith('.csv') and 'accuracy' in filename and 'int8' in filename: + file_path = os.path.join(work_dir, filename) + df_int8 = pd.read_csv(file_path) + +df_fp32_selected = df_fp32[['Model','fp32']] +df_int8_selected = df_int8[['Model','int8']] +acc_df = pd.merge(df_fp32_selected, df_int8_selected, on='Model') # merge csv files +acc_df['(fp32-int8)/fp32'] = (acc_df['fp32'] - acc_df['int8']) / acc_df['fp32'] # calculation +acc_df['int8/fp32'] = acc_df['int8'] / acc_df['fp32'] + +acc_df['(fp32-int8)/fp32'] = acc_df['(fp32-int8)/fp32'].apply(lambda x: f"{x:.2%}") # results percentages + +acc_df.to_csv('summary_acc.csv', index=False) # write to summary_acc.csv + +# perf ratio +for filename_perf in os.listdir(work_dir): + if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'fp32' in filename_perf: + file_path = os.path.join(work_dir, filename_perf) + perf_fp32 = pd.read_csv(file_path) + + if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'int8' in filename_perf: + file_path = os.path.join(work_dir, filename_perf) + perf_int8 = pd.read_csv(file_path) + +# Create Model Data +Model = { + 'Model': ['alexnet','demucs','dlrm','hf_Albert','hf_Bert','hf_Bert_large','hf_DistilBert','hf_Roberta_base','mnasnet1_0', + 'mobilenet_v2','mobilenet_v3_large','nvidia_deeprecommender','pytorch_CycleGAN_and_pix2pix', + 'resnet152','resnet18','resnet50','resnext50_32x4d','shufflenet_v2_x1_0','squeezenet1_1','Super_SloMo', + 'timm_efficientnet','timm_nfnet,timm_regnet','timm_resnest','timm_vision_transformer','timm_vision_transformer_large','timm_vovnet','vgg16'] + } + +perf_df = pd.DataFrame(Model) + +fp32_merged = pd.merge(perf_df, perf_fp32[['Model', 'Throughput']], on='Model', how='left').rename(columns={'Throughput': 'fp32'}) +int8_merged = pd.merge(perf_df, perf_int8[['Model', 'Throughput']], on='Model', how='left').rename(columns={'Throughput': 'int8'}) + +perf_df = pd.concat([fp32_merged, int8_merged], axis=1) +perf_df = perf_df.loc[:, ~perf_df.columns.duplicated()] #remove extra Model + +perf_df['int8/fp32'] = perf_df['int8']/perf_df['fp32'] + +# write to new csv file +perf_df.to_csv('summary_perf.csv', index=False) + + + + + + + + + + + + + From 08a6c15b332a7b21e0d604314d819517b3a2bc34 Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Fri, 7 Mar 2025 14:30:19 +0800 Subject: [PATCH 11/12] fix lint error --- .github/scripts/summary_pt2e.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/.github/scripts/summary_pt2e.py b/.github/scripts/summary_pt2e.py index 0e6daebbb..5330ede72 100644 --- a/.github/scripts/summary_pt2e.py +++ b/.github/scripts/summary_pt2e.py @@ -62,17 +62,15 @@ writer = csv.writer(csv_file) writer.writerow(['Model','Throughput']) writer.writerows(csvData) - # accuracy ratio for filename in os.listdir(work_dir): if filename.endswith('.csv') and 'accuracy' in filename and 'fp32' in filename: file_path = os.path.join(work_dir, filename) df_fp32 = pd.read_csv(file_path) - if filename.endswith('.csv') and 'accuracy' in filename and 'int8' in filename: file_path = os.path.join(work_dir, filename) df_int8 = pd.read_csv(file_path) - + df_fp32_selected = df_fp32[['Model','fp32']] df_int8_selected = df_int8[['Model','int8']] acc_df = pd.merge(df_fp32_selected, df_int8_selected, on='Model') # merge csv files @@ -88,11 +86,9 @@ if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'fp32' in filename_perf: file_path = os.path.join(work_dir, filename_perf) perf_fp32 = pd.read_csv(file_path) - if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'int8' in filename_perf: file_path = os.path.join(work_dir, filename_perf) perf_int8 = pd.read_csv(file_path) - # Create Model Data Model = { 'Model': ['alexnet','demucs','dlrm','hf_Albert','hf_Bert','hf_Bert_large','hf_DistilBert','hf_Roberta_base','mnasnet1_0', @@ -100,7 +96,6 @@ 'resnet152','resnet18','resnet50','resnext50_32x4d','shufflenet_v2_x1_0','squeezenet1_1','Super_SloMo', 'timm_efficientnet','timm_nfnet,timm_regnet','timm_resnest','timm_vision_transformer','timm_vision_transformer_large','timm_vovnet','vgg16'] } - perf_df = pd.DataFrame(Model) fp32_merged = pd.merge(perf_df, perf_fp32[['Model', 'Throughput']], on='Model', how='left').rename(columns={'Throughput': 'fp32'}) @@ -114,15 +109,3 @@ # write to new csv file perf_df.to_csv('summary_perf.csv', index=False) - - - - - - - - - - - - From 745f9c63ba0151d3af9fe833f7fc38809919094a Mon Sep 17 00:00:00 2001 From: kaileiyx Date: Mon, 10 Mar 2025 09:24:21 +0800 Subject: [PATCH 12/12] fix lint issue --- .github/scripts/summary_pt2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/summary_pt2e.py b/.github/scripts/summary_pt2e.py index 5330ede72..b849e756d 100644 --- a/.github/scripts/summary_pt2e.py +++ b/.github/scripts/summary_pt2e.py @@ -70,7 +70,7 @@ if filename.endswith('.csv') and 'accuracy' in filename and 'int8' in filename: file_path = os.path.join(work_dir, filename) df_int8 = pd.read_csv(file_path) - + df_fp32_selected = df_fp32[['Model','fp32']] df_int8_selected = df_int8[['Model','int8']] acc_df = pd.merge(df_fp32_selected, df_int8_selected, on='Model') # merge csv files