From cba7991d686b17568dc934276792b2a23b231c2d Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:33:35 +0530 Subject: [PATCH 01/28] remove validation args from textual onverson tests --- examples/textual_inversion/test_textual_inversion.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index a5d7bcb65dd3..5d645d06f867 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -102,8 +102,6 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 From ede49cf3b554d8c6aa1a6d2f778f844f861b96ed Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:37:20 +0530 Subject: [PATCH 02/28] reduce number of train steps in textual inversion tests --- examples/textual_inversion/test_textual_inversion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index 5d645d06f867..f8309d4d1bc8 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -107,7 +107,7 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 3 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -121,7 +121,7 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-1", "checkpoint-2", "checkpoint-3"}, + {"checkpoint-1", "checkpoint-2"}, ) resume_run_args = f""" @@ -138,14 +138,14 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 4 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=1 - --resume_from_checkpoint=checkpoint-3 + --resume_from_checkpoint=checkpoint-2 --checkpoints_total_limit=2 """.split() From 85f3160f56ddffd248e635099b883cce2f969008 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:38:45 +0530 Subject: [PATCH 03/28] fix: directories. --- examples/textual_inversion/test_textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index f8309d4d1bc8..f3271e45214d 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -154,5 +154,5 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-3", "checkpoint-4"}, + {"checkpoint-4"}, ) From b7dfcb96ba4b48e00e84dbc3502aa96bb9a29707 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:40:11 +0530 Subject: [PATCH 04/28] debig --- examples/textual_inversion/test_textual_inversion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index f3271e45214d..b927ddec53e0 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -152,6 +152,7 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl run_command(self._launch_args + resume_run_args) # check checkpoint directories exist + print(f"Directories: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4"}, From 29f6ece2ae60952af99c9f51bdd485714de5f331 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:41:32 +0530 Subject: [PATCH 05/28] fix: directories. --- examples/textual_inversion/test_textual_inversion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index b927ddec53e0..7e963da9173a 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -152,8 +152,7 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl run_command(self._launch_args + resume_run_args) # check checkpoint directories exist - print(f"Directories: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4"}, + {"checkpoint-2", "checkpoint-3"}, ) From ec522821cf005b74e728a4e480aabd587d28028e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:44:17 +0530 Subject: [PATCH 06/28] remove validation tests from textual onversion --- examples/textual_inversion/test_textual_inversion.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index 7e963da9173a..ba9cabd9aafe 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -40,8 +40,6 @@ def test_textual_inversion(self): --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 @@ -68,8 +66,6 @@ def test_textual_inversion_checkpointing(self): --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 @@ -131,8 +127,6 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 From ff7e4ae680ae7e858586f969cbaf51fe1e1ef460 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:49:46 +0530 Subject: [PATCH 07/28] try reducing the time of test_text_to_image_checkpointing_use_ema --- examples/text_to_image/test_text_to_image.py | 24 +++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 308a038b5533..ac0070170ee3 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -149,7 +149,7 @@ def test_text_to_image_checkpointing_use_ema(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -161,7 +161,7 @@ def test_text_to_image_checkpointing_use_ema(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -186,12 +186,12 @@ def test_text_to_image_checkpointing_use_ema(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) - # Run training script for 7 total steps resuming from checkpoint 4 + # Run training script for 2 total steps resuming from checkpoint 4 resume_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -202,13 +202,13 @@ def test_text_to_image_checkpointing_use_ema(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} - --checkpointing_steps=2 + --checkpointing_steps=1 --resume_from_checkpoint=checkpoint-4 --use_ema --seed=0 @@ -218,16 +218,14 @@ def test_text_to_image_checkpointing_use_ema(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) + # no checkpoint-2 -> check old checkpoints do not exist + # check new checkpoints exist + print(f"Directory: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - { - # no checkpoint-2 -> check old checkpoints do not exist - # check new checkpoints exist - "checkpoint-4", - "checkpoint-6", - }, + {"checkpoint-4", "checkpoint-6"}, ) def test_text_to_image_checkpointing_checkpoints_total_limit(self): From 1e63a6e2a1927a1caa4ad2c9defe8a74e11fc81b Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:52:13 +0530 Subject: [PATCH 08/28] fix: directories --- examples/text_to_image/test_text_to_image.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index ac0070170ee3..5cd4caa9150a 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -222,10 +222,9 @@ def test_text_to_image_checkpointing_use_ema(self): # no checkpoint-2 -> check old checkpoints do not exist # check new checkpoints exist - print(f"Directory: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"}, + {"checkpoint-4", "checkpoint-5"}, ) def test_text_to_image_checkpointing_checkpoints_total_limit(self): From e07f9c969b1938ba40628d476b5e386cb24f120c Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:55:13 +0530 Subject: [PATCH 09/28] speed up test_text_to_image_checkpointing --- examples/text_to_image/test_text_to_image.py | 23 +++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 5cd4caa9150a..599233862986 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -64,7 +64,7 @@ def test_text_to_image_checkpointing(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -76,7 +76,7 @@ def test_text_to_image_checkpointing(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -89,7 +89,7 @@ def test_text_to_image_checkpointing(self): run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( @@ -100,12 +100,12 @@ def test_text_to_image_checkpointing(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) - # Run training script for 7 total steps resuming from checkpoint 4 + # Run training script for 2 total steps resuming from checkpoint 4 resume_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -116,7 +116,7 @@ def test_text_to_image_checkpointing(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -131,16 +131,13 @@ def test_text_to_image_checkpointing(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) + # no checkpoint-2 -> check old checkpoints do not exist + # check new checkpoints exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - { - # no checkpoint-2 -> check old checkpoints do not exist - # check new checkpoints exist - "checkpoint-4", - "checkpoint-6", - }, + {"checkpoint-4", "checkpoint-5"}, ) def test_text_to_image_checkpointing_use_ema(self): From bff0d05b66b8a3f1d07af81c5513920bc2d00332 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 12:58:51 +0530 Subject: [PATCH 10/28] speed up test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- examples/text_to_image/test_text_to_image.py | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 599233862986..93df0a7449ef 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -122,7 +122,7 @@ def test_text_to_image_checkpointing(self): --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} - --checkpointing_steps=2 + --checkpointing_steps=1 --resume_from_checkpoint=checkpoint-4 --seed=0 """.split() @@ -272,8 +272,8 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 9, checkpointing_steps == 2 - # Should create checkpoints at steps 2, 4, 6, 8 + # max_train_steps == 4, checkpointing_steps == 2 + # Should create checkpoints at steps 2, 4, 6 initial_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -284,7 +284,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 9 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -297,7 +297,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( @@ -305,7 +305,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, ) - # resume and we should try to checkpoint at 10, where we'll have to remove + # resume and we should try to checkpoint at 6, where we'll have to remove # checkpoint-2 and checkpoint-4 instead of just a single previous checkpoint resume_run_args = f""" @@ -317,27 +317,27 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 11 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --seed=0 """.split() run_command(self._launch_args + resume_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) From b8032c7a91d8b76304891bb090936fb73b770bc5 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:02:11 +0530 Subject: [PATCH 11/28] fix --- examples/text_to_image/test_text_to_image.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 93df0a7449ef..6a9c8c94f5bb 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -273,7 +273,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing # max_train_steps == 4, checkpointing_steps == 2 - # Should create checkpoints at steps 2, 4, 6 + # Should create checkpoints at steps 2, 4 initial_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -302,7 +302,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) # resume and we should try to checkpoint at 6, where we'll have to remove @@ -317,7 +317,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 6 + --max_train_steps 8 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant From e84c10aba2f1074ca9e9c953f2f5490b56722745 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:03:52 +0530 Subject: [PATCH 12/28] speed up test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- examples/instruct_pix2pix/test_instruct_pix2pix.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/instruct_pix2pix/test_instruct_pix2pix.py b/examples/instruct_pix2pix/test_instruct_pix2pix.py index c4d7500723fa..3ab1fa52eaa0 100644 --- a/examples/instruct_pix2pix/test_instruct_pix2pix.py +++ b/examples/instruct_pix2pix/test_instruct_pix2pix.py @@ -63,7 +63,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 --output_dir {tmpdir} --seed=0 @@ -74,7 +74,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -84,11 +84,11 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 --output_dir {tmpdir} --seed=0 - --resume_from_checkpoint=checkpoint-8 + --resume_from_checkpoint=checkpoint-4 --checkpoints_total_limit=3 """.split() @@ -97,5 +97,5 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) From 443bcd5f813d7c86043be298157b7a8b80fdd115 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:05:26 +0530 Subject: [PATCH 13/28] set checkpoints_total_limit to 2. --- examples/instruct_pix2pix/test_instruct_pix2pix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/instruct_pix2pix/test_instruct_pix2pix.py b/examples/instruct_pix2pix/test_instruct_pix2pix.py index 3ab1fa52eaa0..078d2cafd759 100644 --- a/examples/instruct_pix2pix/test_instruct_pix2pix.py +++ b/examples/instruct_pix2pix/test_instruct_pix2pix.py @@ -89,7 +89,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple --output_dir {tmpdir} --seed=0 --resume_from_checkpoint=checkpoint-4 - --checkpoints_total_limit=3 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) From 9d3ea3139a956c2fd6210997c6a7dcb5bd9c4f94 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:08:53 +0530 Subject: [PATCH 14/28] test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints speed up --- .../text_to_image/test_text_to_image_lora.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index 83cbb78b2dc6..f2453a5e1dcc 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -127,8 +127,8 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 9, checkpointing_steps == 2 - # Should create checkpoints at steps 2, 4, 6, 8 + # max_train_steps == 4, checkpointing_steps == 2 + # Should create checkpoints at steps 2, 4 initial_run_args = f""" examples/text_to_image/train_text_to_image_lora.py @@ -139,7 +139,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 9 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -156,15 +156,15 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) - # resume and we should try to checkpoint at 10, where we'll have to remove + # resume and we should try to checkpoint at 6, where we'll have to remove # checkpoint-2 and checkpoint-4 instead of just a single previous checkpoint resume_run_args = f""" @@ -176,15 +176,15 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 11 + --max_train_steps 8 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --seed=0 --num_validation_images=0 """.split() @@ -195,12 +195,12 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) From 4296ce11dde073620ec867045480b5b44d8d3af9 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:13:19 +0530 Subject: [PATCH 15/28] speed up test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- .../test_unconditional.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/unconditional_image_generation/test_unconditional.py b/examples/unconditional_image_generation/test_unconditional.py index b7e19abe9f6e..261a8d725afd 100644 --- a/examples/unconditional_image_generation/test_unconditional.py +++ b/examples/unconditional_image_generation/test_unconditional.py @@ -90,10 +90,10 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch --train_batch_size 1 --num_epochs 1 --gradient_accumulation_steps 1 - --ddpm_num_inference_steps 2 + --ddpm_num_inference_steps 1 --learning_rate 1e-3 --lr_warmup_steps 5 - --checkpointing_steps=1 + --checkpointing_steps=2 """.split() run_command(self._launch_args + initial_run_args) @@ -101,7 +101,7 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-1", "checkpoint-2", "checkpoint-3", "checkpoint-4", "checkpoint-5", "checkpoint-6"}, + {"checkpoint-2", "checkpoint-4", "checkpoint-6"}, ) resume_run_args = f""" @@ -113,12 +113,12 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch --train_batch_size 1 --num_epochs 2 --gradient_accumulation_steps 1 - --ddpm_num_inference_steps 2 + --ddpm_num_inference_steps 1 --learning_rate 1e-3 --lr_warmup_steps 5 --resume_from_checkpoint=checkpoint-6 --checkpointing_steps=2 - --checkpoints_total_limit=3 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) @@ -126,5 +126,5 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-8", "checkpoint-10", "checkpoint-12"}, + {"checkpoint-8", "checkpoint-10"}, ) From 2c7e97983ad00f6cfd61098788340b69752adae4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:14:43 +0530 Subject: [PATCH 16/28] debug --- examples/unconditional_image_generation/test_unconditional.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/unconditional_image_generation/test_unconditional.py b/examples/unconditional_image_generation/test_unconditional.py index 261a8d725afd..36dcb7466d01 100644 --- a/examples/unconditional_image_generation/test_unconditional.py +++ b/examples/unconditional_image_generation/test_unconditional.py @@ -124,6 +124,7 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch run_command(self._launch_args + resume_run_args) # check checkpoint directories exist + print(f"Directory: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-8", "checkpoint-10"}, From 6ec0d847914143a5e824d21ec4f4645e0c493fed Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:15:59 +0530 Subject: [PATCH 17/28] fix: directories. --- examples/unconditional_image_generation/test_unconditional.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/unconditional_image_generation/test_unconditional.py b/examples/unconditional_image_generation/test_unconditional.py index 36dcb7466d01..49e11f33d4e1 100644 --- a/examples/unconditional_image_generation/test_unconditional.py +++ b/examples/unconditional_image_generation/test_unconditional.py @@ -124,8 +124,7 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch run_command(self._launch_args + resume_run_args) # check checkpoint directories exist - print(f"Directory: {os.listdir(tmpdir)}") self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-8", "checkpoint-10"}, + {"checkpoint-10", "checkpoint-12"}, ) From fca5113344daca8ec0582b1a88f767faa99a2f9d Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:18:54 +0530 Subject: [PATCH 18/28] speed up test_instruct_pix2pix_checkpointing_checkpoints_total_limit --- examples/instruct_pix2pix/test_instruct_pix2pix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/instruct_pix2pix/test_instruct_pix2pix.py b/examples/instruct_pix2pix/test_instruct_pix2pix.py index 078d2cafd759..b30baf8b1b02 100644 --- a/examples/instruct_pix2pix/test_instruct_pix2pix.py +++ b/examples/instruct_pix2pix/test_instruct_pix2pix.py @@ -40,7 +40,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit(self): --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=7 + --max_train_steps=6 --checkpointing_steps=2 --checkpoints_total_limit=2 --output_dir {tmpdir} From d71605b868d0805def1985acd00fcea5821fdfe1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:20:59 +0530 Subject: [PATCH 19/28] speed up: test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- examples/controlnet/test_controlnet.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/examples/controlnet/test_controlnet.py b/examples/controlnet/test_controlnet.py index e62d095adaa2..5d40b717f2af 100644 --- a/examples/controlnet/test_controlnet.py +++ b/examples/controlnet/test_controlnet.py @@ -65,7 +65,7 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet - --max_train_steps=9 + --max_train_steps=6 --checkpointing_steps=2 """.split() @@ -73,7 +73,7 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4", "checkpoint-6"}, ) resume_run_args = f""" @@ -85,18 +85,15 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-6 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-8", "checkpoint-10", "checkpoint-12"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) class ControlNetSDXL(ExamplesTestsAccelerate): From e137f8a1f305daf76f79bd5f0f01c568a2a04a21 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:23:02 +0530 Subject: [PATCH 20/28] speed up test_controlnet_sdxl --- examples/controlnet/test_controlnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/controlnet/test_controlnet.py b/examples/controlnet/test_controlnet.py index 5d40b717f2af..e1adafe6be6f 100644 --- a/examples/controlnet/test_controlnet.py +++ b/examples/controlnet/test_controlnet.py @@ -108,7 +108,7 @@ def test_controlnet_sdxl(self): --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet-sdxl - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() From 7afc28389a1ad4c20abb9f76716cd56ae80c0519 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:28:13 +0530 Subject: [PATCH 21/28] speed up dreambooth tests --- examples/dreambooth/test_dreambooth.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/dreambooth/test_dreambooth.py b/examples/dreambooth/test_dreambooth.py index 0c6c2a062325..38b17e784d8b 100644 --- a/examples/dreambooth/test_dreambooth.py +++ b/examples/dreambooth/test_dreambooth.py @@ -89,7 +89,7 @@ def test_dreambooth_checkpointing(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -100,7 +100,7 @@ def test_dreambooth_checkpointing(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -114,7 +114,7 @@ def test_dreambooth_checkpointing(self): # check can run the original fully trained output pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # check checkpoint directories exist self.assertTrue(os.path.isdir(os.path.join(tmpdir, "checkpoint-2"))) @@ -123,7 +123,7 @@ def test_dreambooth_checkpointing(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) @@ -138,7 +138,7 @@ def test_dreambooth_checkpointing(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -153,7 +153,7 @@ def test_dreambooth_checkpointing(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # check old checkpoints do not exist self.assertFalse(os.path.isdir(os.path.join(tmpdir, "checkpoint-2"))) @@ -196,7 +196,7 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() @@ -204,7 +204,7 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -216,15 +216,15 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"} ) From 4f2fcde2df7acc4e7c3bb3d574577ef71a2b6c3e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:32:38 +0530 Subject: [PATCH 22/28] speed up test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- examples/dreambooth/test_dreambooth_lora.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/dreambooth/test_dreambooth_lora.py b/examples/dreambooth/test_dreambooth_lora.py index fc43269f732e..70aee14ffb36 100644 --- a/examples/dreambooth/test_dreambooth_lora.py +++ b/examples/dreambooth/test_dreambooth_lora.py @@ -135,7 +135,7 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() @@ -143,7 +143,7 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"} ) resume_run_args = f""" @@ -155,17 +155,17 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"} ) def test_dreambooth_lora_if_model(self): From 651cfce37d53d7b0496cffaced81eabafefe81e1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:35:34 +0530 Subject: [PATCH 23/28] speed up test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints --- .../custom_diffusion/test_custom_diffusion.py | 15 ++++++--------- examples/dreambooth/test_dreambooth.py | 5 +---- examples/dreambooth/test_dreambooth_lora.py | 10 ++-------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/examples/custom_diffusion/test_custom_diffusion.py b/examples/custom_diffusion/test_custom_diffusion.py index 78f24c5172d6..1266a7c3aaab 100644 --- a/examples/custom_diffusion/test_custom_diffusion.py +++ b/examples/custom_diffusion/test_custom_diffusion.py @@ -93,7 +93,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple --train_batch_size=1 --modifier_token= --dataloader_num_workers=0 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 --no_safe_serialization """.split() @@ -102,7 +102,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -115,16 +115,13 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple --train_batch_size=1 --modifier_token= --dataloader_num_workers=0 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --no_safe_serialization """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) diff --git a/examples/dreambooth/test_dreambooth.py b/examples/dreambooth/test_dreambooth.py index 38b17e784d8b..ce2f3215bc71 100644 --- a/examples/dreambooth/test_dreambooth.py +++ b/examples/dreambooth/test_dreambooth.py @@ -224,7 +224,4 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) diff --git a/examples/dreambooth/test_dreambooth_lora.py b/examples/dreambooth/test_dreambooth_lora.py index 70aee14ffb36..fa1064123620 100644 --- a/examples/dreambooth/test_dreambooth_lora.py +++ b/examples/dreambooth/test_dreambooth_lora.py @@ -141,10 +141,7 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ run_command(self._launch_args + test_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-2", "checkpoint-4"}) resume_run_args = f""" examples/dreambooth/train_dreambooth_lora.py @@ -163,10 +160,7 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) def test_dreambooth_lora_if_model(self): with tempfile.TemporaryDirectory() as tmpdir: From 9fa43da22aeeabf9a071253117ad8cf3901c5ae8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:37:57 +0530 Subject: [PATCH 24/28] speed up test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_limit --- examples/text_to_image/test_text_to_image_lora.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index f2453a5e1dcc..297caaf1edde 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -272,7 +272,7 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -283,7 +283,7 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -298,11 +298,8 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) From c1dcdc20cc2330e6e6dd8c15109e747672161cd1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:47:53 +0530 Subject: [PATCH 25/28] speed up # checkpoint-2 should have been deleted --- examples/text_to_image/test_text_to_image_lora.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index 297caaf1edde..3a99cc1c558a 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -41,7 +41,7 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -52,7 +52,7 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -66,14 +66,11 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe" From e999fafaef1872d5b4bded4946fe26cf6abf03d3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:50:04 +0530 Subject: [PATCH 26/28] speed up examples/text_to_image/test_text_to_image.py::TextToImage::test_text_to_image_checkpointing_checkpoints_total_limit --- examples/text_to_image/test_text_to_image.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 6a9c8c94f5bb..3aa557b5349e 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -230,7 +230,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -243,7 +243,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -257,13 +257,13 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist + # checkpoint-2 should have been deleted self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, + {"checkpoint-4", "checkpoint-6"} ) def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): From 3d66d1ecf95c73a4ae880774e311fe4164c32a81 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 13:52:45 +0530 Subject: [PATCH 27/28] additional speed ups --- examples/custom_diffusion/test_custom_diffusion.py | 2 +- examples/dreambooth/test_dreambooth_lora.py | 8 ++++---- examples/text_to_image/test_text_to_image_lora.py | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/custom_diffusion/test_custom_diffusion.py b/examples/custom_diffusion/test_custom_diffusion.py index 1266a7c3aaab..4ae310b5888d 100644 --- a/examples/custom_diffusion/test_custom_diffusion.py +++ b/examples/custom_diffusion/test_custom_diffusion.py @@ -78,7 +78,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit(self): self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"}, + {"checkpoint-4", "checkpoint-6"} ) def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): diff --git a/examples/dreambooth/test_dreambooth_lora.py b/examples/dreambooth/test_dreambooth_lora.py index fa1064123620..8c9ac6c1df55 100644 --- a/examples/dreambooth/test_dreambooth_lora.py +++ b/examples/dreambooth/test_dreambooth_lora.py @@ -322,7 +322,7 @@ def test_dreambooth_lora_sdxl_checkpointing_checkpoints_total_limit(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --checkpointing_steps=2 --checkpoints_total_limit=2 --learning_rate 5.0e-04 @@ -336,13 +336,13 @@ def test_dreambooth_lora_sdxl_checkpointing_checkpoints_total_limit(self): pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe("a prompt", num_inference_steps=2) + pipe("a prompt", num_inference_steps=1) # check checkpoint directories exist + # checkpoint-2 should have been deleted self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, + {"checkpoint-4", "checkpoint-6"} ) def test_dreambooth_lora_sdxl_text_encoder_checkpointing_checkpoints_total_limit(self): diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index 3a99cc1c558a..c9dcdc2cd64e 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -78,7 +78,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -91,7 +91,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -109,13 +109,13 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist + # checkpoint-2 should have been deleted self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, + {"checkpoint-4", "checkpoint-6"} ) def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): From d5dd94226c467f4f3392fcfbb153689aa6af40c0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Dec 2023 14:06:52 +0530 Subject: [PATCH 28/28] style --- examples/custom_diffusion/test_custom_diffusion.py | 5 +---- examples/dreambooth/test_dreambooth_lora.py | 5 +---- examples/text_to_image/test_text_to_image.py | 5 +---- examples/text_to_image/test_text_to_image_lora.py | 5 +---- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/examples/custom_diffusion/test_custom_diffusion.py b/examples/custom_diffusion/test_custom_diffusion.py index 4ae310b5888d..da4355d5ac25 100644 --- a/examples/custom_diffusion/test_custom_diffusion.py +++ b/examples/custom_diffusion/test_custom_diffusion.py @@ -76,10 +76,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit(self): run_command(self._launch_args + test_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/examples/dreambooth/test_dreambooth_lora.py b/examples/dreambooth/test_dreambooth_lora.py index 8c9ac6c1df55..496ce22f814e 100644 --- a/examples/dreambooth/test_dreambooth_lora.py +++ b/examples/dreambooth/test_dreambooth_lora.py @@ -340,10 +340,7 @@ def test_dreambooth_lora_sdxl_checkpointing_checkpoints_total_limit(self): # check checkpoint directories exist # checkpoint-2 should have been deleted - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_dreambooth_lora_sdxl_text_encoder_checkpointing_checkpoints_total_limit(self): pipeline_path = "hf-internal-testing/tiny-stable-diffusion-xl-pipe" diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 3aa557b5349e..814c13cf486e 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -261,10 +261,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): # check checkpoint directories exist # checkpoint-2 should have been deleted - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe" diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index c9dcdc2cd64e..4daee834d0e6 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -113,10 +113,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): # check checkpoint directories exist # checkpoint-2 should have been deleted - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"} - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe"