From ec51c4fe64e3305687fe7168fd2489f3c8171d5f Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Wed, 10 Jul 2024 16:52:53 -0700 Subject: [PATCH] adapt CI tests to use compiled_rmsnorm [ghstack-poisoned] --- test_runner.py | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/test_runner.py b/test_runner.py index 82492bbd..ec8d4363 100755 --- a/test_runner.py +++ b/test_runner.py @@ -54,7 +54,7 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--experimental.pipeline_parallel_schedule 1f1b", "--training.data_parallel_degree 1", - "--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP + "--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP ], ], "PP 1D test 1f1b", @@ -70,7 +70,7 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--experimental.pipeline_parallel_schedule gpipe", "--training.data_parallel_degree 1", - "--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP + "--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP ], ], "PP 1D test gpipe", @@ -86,7 +86,7 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--experimental.pipeline_parallel_schedule 1f1b", "--training.data_parallel_degree 2", - "--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP + "--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP ], ], "PP+DP 1f1b 2D test", @@ -101,7 +101,7 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--experimental.pipeline_parallel_schedule gpipe", "--training.data_parallel_degree 2", - "--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP + "--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP ], ], "PP+DP gpipe 2D test", @@ -115,7 +115,6 @@ def build_test_list(): "--experimental.pipeline_parallel_degree 2", "--experimental.pipeline_parallel_split_points layers.4", "--training.tensor_parallel_degree 2", - "--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP ], ], "PP+TP 2D test", @@ -129,7 +128,6 @@ def build_test_list(): "--experimental.pipeline_parallel_degree 2", "--experimental.pipeline_parallel_split_points layers.4", "--experimental.pipeline_parallel_split_mode tracer", - "--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with tracer ], ], "PP tracer frontend test", @@ -147,7 +145,16 @@ def build_test_list(): OverrideDefinitions( [ [ - "--training.compile --model.norm_type=rmsnorm", + "--training.tensor_parallel_degree 2", + ], + ], + "2D eager", + "2d_eager", + ), + OverrideDefinitions( + [ + [ + "--training.compile", ], ], "1D compile", @@ -167,29 +174,20 @@ def build_test_list(): OverrideDefinitions( [ [ - "--training.compile --training.tensor_parallel_degree 2 --model.norm_type=rmsnorm", + "--training.compile --training.tensor_parallel_degree 2", ], ], "2D compile", "2d_compile", ), - OverrideDefinitions( - [ - [ - "--training.tensor_parallel_degree 2 --model.norm_type=rmsnorm", - ], - ], - "Eager mode 2DParallel with rmsnorm", - "eager_2d_rmsnorm", - ), OverrideDefinitions( [ [ "--training.tensor_parallel_degree 2 --model.norm_type=fused_rmsnorm", ], ], - "Eager mode 2DParallel with fused_rmsnorm", - "eager_2d_fused_rmsnorm", + "2D eager with fused_rmsnorm", + "2d_eager_fused_rmsnorm", ), OverrideDefinitions( [ @@ -233,7 +231,6 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--training.data_parallel_degree 2", "--training.tensor_parallel_degree 2", - "--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP ], [ "--training.steps 20", @@ -242,7 +239,6 @@ def build_test_list(): "--experimental.pipeline_parallel_split_points layers.4", "--training.data_parallel_degree 2", "--training.tensor_parallel_degree 2", - "--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP ], ], "PP+DP+TP 3D test with save/load resume ckpt", @@ -257,7 +253,7 @@ def build_test_list(): "--experimental.pipeline_parallel_degree 4", "--experimental.pipeline_parallel_split_points layers.1,layers.2,layers.3,layers.4,layers.5,layers.6,layers.7", "--experimental.pipeline_parallel_schedule interleaved_1f1b", - "--model.norm_type rmsnorm", # fused_rmsnorm throws cuda context error with pp + "--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm throws cuda context error with pp ], ], "PP looped 1f1b test", @@ -277,7 +273,8 @@ def build_test_list(): OverrideDefinitions( [ [ - "--memory_estimation.enabled --model.norm_type rmsnorm", + "--memory_estimation.enabled", + "--model.norm_type rmsnorm", # estimation mode does not support compiled_rmsnorm yet ] ], "FSDP2 Memory Tracking and Estimation",