inductor-A100-perf-nightly #633
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: inductor-A100-perf-nightly | |
on: | |
schedule: | |
- cron: 0 7 * * 1-6 | |
- cron: 0 7 * * 0 | |
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it | |
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs | |
workflow_dispatch: | |
inputs: | |
training: | |
description: Run training (on by default)? | |
required: false | |
type: boolean | |
default: true | |
inference: | |
description: Run inference (on by default)? | |
required: false | |
type: boolean | |
default: true | |
default: | |
description: Run inductor_default? | |
required: false | |
type: boolean | |
default: false | |
dynamic: | |
description: Run inductor_dynamic_shapes? | |
required: false | |
type: boolean | |
default: false | |
cppwrapper: | |
description: Run inductor_cpp_wrapper? | |
required: false | |
type: boolean | |
default: false | |
cudagraphs: | |
description: Run inductor_cudagraphs? | |
required: false | |
type: boolean | |
default: true | |
freezing_cudagraphs: | |
description: Run inductor_cudagraphs with freezing for inference? | |
required: false | |
type: boolean | |
default: false | |
aotinductor: | |
description: Run aot_inductor for inference? | |
required: false | |
type: boolean | |
default: false | |
maxautotune: | |
description: Run inductor_max_autotune? | |
required: false | |
type: boolean | |
default: false | |
benchmark_configs: | |
description: The list of configs used the benchmark | |
required: false | |
type: string | |
default: inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
permissions: read-all | |
jobs: | |
get-label-type: | |
name: get-label-type | |
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | |
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} | |
with: | |
triggering_actor: ${{ github.triggering_actor }} | |
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | |
curr_branch: ${{ github.head_ref || github.ref_name }} | |
curr_ref_type: ${{ github.ref_type }} | |
# NB: Keep this in sync with trunk.yml | |
linux-focal-cuda12_4-py3_10-gcc9-inductor-build: | |
name: cuda12.4-py3.10-gcc9-sm80 | |
uses: ./.github/workflows/_linux-build.yml | |
needs: get-label-type | |
with: | |
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | |
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 | |
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks | |
cuda-arch-list: '8.0' | |
test-matrix: | | |
{ include: [ | |
{ config: "inductor_huggingface_perf", shard: 1, num_shards: 3, runner: "linux.aws.a100" }, | |
{ config: "inductor_huggingface_perf", shard: 2, num_shards: 3, runner: "linux.aws.a100" }, | |
{ config: "inductor_huggingface_perf", shard: 3, num_shards: 3, runner: "linux.aws.a100" }, | |
{ config: "inductor_timm_perf", shard: 1, num_shards: 5, runner: "linux.aws.a100" }, | |
{ config: "inductor_timm_perf", shard: 2, num_shards: 5, runner: "linux.aws.a100" }, | |
{ config: "inductor_timm_perf", shard: 3, num_shards: 5, runner: "linux.aws.a100" }, | |
{ config: "inductor_timm_perf", shard: 4, num_shards: 5, runner: "linux.aws.a100" }, | |
{ config: "inductor_timm_perf", shard: 5, num_shards: 5, runner: "linux.aws.a100" }, | |
{ config: "inductor_torchbench_perf", shard: 1, num_shards: 4, runner: "linux.aws.a100" }, | |
{ config: "inductor_torchbench_perf", shard: 2, num_shards: 4, runner: "linux.aws.a100" }, | |
{ config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.aws.a100" }, | |
{ config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.aws.a100" }, | |
]} | |
selected-test-configs: ${{ inputs.benchmark_configs }} | |
secrets: inherit | |
linux-focal-cuda12_4-py3_10-gcc9-inductor-test-nightly: | |
name: cuda12.4-py3.10-gcc9-sm80 | |
uses: ./.github/workflows/_linux-test.yml | |
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build | |
if: github.event.schedule == '0 7 * * 1-6' | |
with: | |
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 | |
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true | |
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} | |
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} | |
timeout-minutes: 720 | |
# disable monitor in perf tests for more investigation | |
disable-monitor: true | |
secrets: inherit | |
linux-focal-cuda12_4-py3_10-gcc9-inductor-test-weekly: | |
name: cuda12.4-py3.10-gcc9-sm80 | |
uses: ./.github/workflows/_linux-test.yml | |
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build | |
if: github.event.schedule == '0 7 * * 0' | |
with: | |
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 | |
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true | |
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} | |
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} | |
timeout-minutes: 1440 | |
# disable monitor in perf tests for more investigation | |
disable-monitor: true | |
secrets: inherit | |
linux-focal-cuda12_4-py3_10-gcc9-inductor-test: | |
name: cuda12.4-py3.10-gcc9-sm80 | |
uses: ./.github/workflows/_linux-test.yml | |
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build | |
if: github.event_name == 'workflow_dispatch' | |
with: | |
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 | |
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }} | |
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} | |
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} | |
timeout-minutes: 720 | |
# disable monitor in perf tests for more investigation | |
disable-monitor: true | |
secrets: inherit |