Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable pt2e test #1412

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ runs:
set -xe
for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')
do
if [ "${suite}" == "pt2e" ];then
continue
fi
contains "huggingface,timm_models,torchbench" $suite
$contains_status
for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g')
Expand Down
130 changes: 130 additions & 0 deletions .github/actions/pt2e/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
name: inductor-xpu-pt2e-test

inputs:
env_prepare:
required: false
description: If set to any value, will prepare suite test env
dt:
required: true
type: string
default: 'float32'
description: Data precision of the test.float32,int8. Delimiter is comma
scenario:
required: true
type: string
default: 'accuracy'
description: accuracy,performance. Delimiter is comma
hf_token:
required: false
description: HUGGING_FACE_HUB_TOKEN for torchbench test
pytorch:
required: false
type: string
default: 'main'
description: Pytorch branch/commit
driver:
required: false
type: string
default: 'lts'
description: Driver lts/rolling

runs:
using: composite
steps:
- name: Prepare ENV
if: ${{ inputs.env_prepare }}
shell: bash
run: |
source activate e2e_ci
source .github/scripts/env.sh ${{ inputs.pytorch }}
# accuracy code
if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
rm -rf pt2e-accuracy
git clone -b main https://github.com/chuanqi129/inductor-tools pt2e-accuracy
fi
# performance code
if [[ "${{ inputs.scenario }}" == *"performance"* ]];then
rm -rf pt2e-performance
git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance
fi
# deps
if [[ ${{ inputs.scenario }} == *"performance"* ]]; then
pip install pyyaml botocore
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
rm -rf pt2e-audio
git clone --single-branch -b main https://github.com/pytorch/audio pt2e-audio
cd pt2e-audio && git checkout $TORCHAUDIO_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install --no-deps dist/*.whl
cd ../
rm -rf pt2e-vision
git clone --single-branch -b main https://github.com/pytorch/vision pt2e-vision
cd pt2e-vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install --no-deps dist/*.whl
cd ../
fi
# torchbench
python -c "import torch, torchvision, torchaudio"
cd pt2e-performance && pip install --no-deps -r requirements.txt
pip install -U transformers tokenizers safetensors
python install.py --continue_on_fail
cd ../
# deps for torchrec_dlrm
pip install pyre_extensions
pip install fbgemm-gpu
pip install --no-deps torchmetrics==1.0.3 torchrec
# transformers
pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION}
# timm
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch)
fi
pip install numpy==1.26.4
# dataset
if [ ! -d ${HOME}/datasets/imagenet ];then
rm -rf ${HOME}/datasets/imagenet
mkdir -p ${HOME}/datasets/imagenet
cd ${HOME}/datasets/imagenet
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
tar -xf ILSVRC2012_img_val.tar
wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
bash valprep.sh
fi
- name: PT2E Test (${{ inputs.dt }} ${{ inputs.scenario }})
env:
HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
shell: bash
run: |
source activate e2e_ci
source .github/scripts/env.sh ${{ inputs.pytorch }}
pt2e_logs_dir="${{ github.workspace }}/../pytorch/inductor_log"
rm -rf "${pt2e_logs_dir}" && mkdir -p "${pt2e_logs_dir}"
if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
if [[ "${{ inputs.dt }}" == *"float32"* ]];then
python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --is_fp32 --dataset_dir ${HOME}/datasets/imagenet |\
tee "${pt2e_logs_dir}/accuracy-fp32.log"
fi
if [[ "${{ inputs.dt }}" == *"int8"* ]];then
python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${HOME}/datasets/imagenet |\
tee "${pt2e_logs_dir}/accuracy-int8.log"
fi
fi
if [[ "${{ inputs.scenario }}" == *"performance"* ]];then
models="alexnet,demucs,dlrm,hf_Albert,hf_Bert,hf_Bert_large,hf_DistilBert,hf_Roberta_base,mnasnet1_0,mobilenet_v2,"
models+="mobilenet_v3_large,nvidia_deeprecommender,pytorch_CycleGAN_and_pix2pix,resnet152,resnet18,resnet50,resnext50_32x4d,"
models+="shufflenet_v2_x1_0,squeezenet1_1,Super_SloMo,timm_efficientnet,timm_nfnet,timm_regnet,timm_resnest,"
models+="timm_vision_transformer,timm_vision_transformer_large,timm_vovnet,vgg16"
if [[ "${{ inputs.dt }}" == *"float32"* ]];then
rm -rf pt2e-performance/.userbenchmark
python pt2e-performance/run_benchmark.py xpu --test eval --channels-last --metrics throughputs --torchdynamo inductor -m $models 2>&1 |\
tee "${pt2e_logs_dir}/performance-fp32.log"
mv pt2e-performance/.userbenchmark ${pt2e_logs_dir}/performance-fp32
fi
if [[ "${{ inputs.dt }}" == *"int8"* ]];then
rm -rf pt2e-performance/.userbenchmark
python pt2e-performance/run_benchmark.py xpu --test eval --channels-last --metrics throughputs --torchdynamo inductor --quantization pt2e -m $models 2>&1 |\
tee "${pt2e_logs_dir}/performance-int8.log"
mv pt2e-performance/.userbenchmark ${pt2e_logs_dir}/performance-int8
fi
fi
111 changes: 111 additions & 0 deletions .github/scripts/summary_pt2e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import sys

Check failure on line 1 in .github/scripts/summary_pt2e.py

View workflow job for this annotation

GitHub Actions / preci-lint-check

NEWLINE Trailing newline

Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.
import os
import json
import csv
import pandas as pd

work_dir = sys.argv[1]
# scan files endwith .log and accuracy in file
for root, dirs, files in os.walk(work_dir):
for file in files:
if file.endswith('.log') and 'accuracy' in file:
log_file_path = os.path.join(root, file)
# generate related csv file
csv_file_name = os.path.splitext(file)[0] + '.csv'
csv_file_path = os.path.join(root, csv_file_name)
# Data
csvData = []
# read log
with open(log_file_path, encoding='utf-8') as log_file:
for line in log_file:
if "Acc" in line:
parts = line.strip().split()
model = parts[0].rstrip(':')
dt = parts[1].rstrip(':')
acc1 = parts[4]
acc5 = parts[6]
csvData.append([model,acc5])
# write csv
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Model',dt])
writer.writerows(csvData)

# scan .json file
for item in os.listdir(work_dir):
item_path = os.path.join(work_dir, item)
if os.path.isdir(item_path):
# generate csv
csv_file_name = item + '.csv'
csv_file_path = os.path.join(work_dir, csv_file_name)
# data
csvData = []
# scan json
for root, dirs, files in os.walk(item_path):
for file in files:
if file.endswith('.json'):
json_file_path = os.path.join(root, file)
with open(json_file_path, encoding='utf-8') as json_file:
data = json.load(json_file)
metrics = data.get('metrics',{})
try:
for key, value in metrics.items():
parts = key.rsplit('-eval_throughput',1)
if len(parts) == 2:
model = parts[0]
throughput = value
csvData.append([model,throughput])
except json.JSONDecodeError:
print(f"Error decoding JSON file: {json_file_path}")

with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Model','Throughput'])
writer.writerows(csvData)
# accuracy ratio
for filename in os.listdir(work_dir):
if filename.endswith('.csv') and 'accuracy' in filename and 'fp32' in filename:
file_path = os.path.join(work_dir, filename)
df_fp32 = pd.read_csv(file_path)
if filename.endswith('.csv') and 'accuracy' in filename and 'int8' in filename:
file_path = os.path.join(work_dir, filename)
df_int8 = pd.read_csv(file_path)

df_fp32_selected = df_fp32[['Model','fp32']]
df_int8_selected = df_int8[['Model','int8']]
acc_df = pd.merge(df_fp32_selected, df_int8_selected, on='Model') # merge csv files
acc_df['(fp32-int8)/fp32'] = (acc_df['fp32'] - acc_df['int8']) / acc_df['fp32'] # calculation
acc_df['int8/fp32'] = acc_df['int8'] / acc_df['fp32']

acc_df['(fp32-int8)/fp32'] = acc_df['(fp32-int8)/fp32'].apply(lambda x: f"{x:.2%}") # results percentages

acc_df.to_csv('summary_acc.csv', index=False) # write to summary_acc.csv

# perf ratio
for filename_perf in os.listdir(work_dir):
if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'fp32' in filename_perf:
file_path = os.path.join(work_dir, filename_perf)
perf_fp32 = pd.read_csv(file_path)
if filename_perf.endswith('.csv') and 'performance' in filename_perf and 'int8' in filename_perf:
file_path = os.path.join(work_dir, filename_perf)
perf_int8 = pd.read_csv(file_path)
# Create Model Data
Model = {
'Model': ['alexnet','demucs','dlrm','hf_Albert','hf_Bert','hf_Bert_large','hf_DistilBert','hf_Roberta_base','mnasnet1_0',
'mobilenet_v2','mobilenet_v3_large','nvidia_deeprecommender','pytorch_CycleGAN_and_pix2pix',
'resnet152','resnet18','resnet50','resnext50_32x4d','shufflenet_v2_x1_0','squeezenet1_1','Super_SloMo',
'timm_efficientnet','timm_nfnet,timm_regnet','timm_resnest','timm_vision_transformer','timm_vision_transformer_large','timm_vovnet','vgg16']
}
perf_df = pd.DataFrame(Model)

fp32_merged = pd.merge(perf_df, perf_fp32[['Model', 'Throughput']], on='Model', how='left').rename(columns={'Throughput': 'fp32'})
int8_merged = pd.merge(perf_df, perf_int8[['Model', 'Throughput']], on='Model', how='left').rename(columns={'Throughput': 'int8'})

perf_df = pd.concat([fp32_merged, int8_merged], axis=1)
perf_df = perf_df.loc[:, ~perf_df.columns.duplicated()] #remove extra Model

perf_df['int8/fp32'] = perf_df['int8']/perf_df['fp32']

# write to new csv file
perf_df.to_csv('summary_perf.csv', index=False)

54 changes: 44 additions & 10 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ on:
required: true
type: string
default: 'huggingface'
description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma
description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
dt:
required: true
type: string
Expand Down Expand Up @@ -233,6 +233,15 @@ jobs:
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Nightly PT2E Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/pt2e
with:
dt: float32,int8
scenario: accuracy,performance
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

# Weekly launch
- name: Weekly Huggingface Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
Expand Down Expand Up @@ -264,9 +273,18 @@ jobs:
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Weekly PT2E Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/pt2e
with:
env_prepare: true
dt: float32,int8
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

# On-demand launch
- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: github.event_name != 'schedule'
if: ${{ github.event_name != 'schedule' && inputs.suite != 'pt2e' }}
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: ${{ inputs.suite }}
Expand All @@ -275,6 +293,14 @@ jobs:
mode: ${{ inputs.mode }}
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: OnDemand PT2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: ${{ github.event_name != 'schedule' && contains(inputs.suite, 'pt2e') }}
uses: ./.github/actions/pt2e
with:
env_prepare: true
dt: ${{ inputs.dt }}
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

- name: Summarize archieve files
id: summary
Expand All @@ -286,14 +312,22 @@ jobs:
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
# Print summary
rm -rf /tmp/tmp-*.txt
source activate e2e_ci
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
if [ "${{ inputs.suite }}" != 'pt2e' ];then
rm -rf /tmp/tmp-*.txt
source activate e2e_ci
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
else
source activate e2e_ci
cp -r ${{ github.workspace }}/.github/scripts/summary_pt2e.py ${{ github.workspace }}/upload_files
cd ${{ github.workspace }}/upload_files
python summary_pt2e.py ${{ github.workspace }}/upload_files
rm -rf summary_pt2e.py
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
Expand Down
Loading
Loading