diff --git a/examples/controlnet/test_controlnet.py b/examples/controlnet/test_controlnet.py index e62d095adaa2..e1adafe6be6f 100644 --- a/examples/controlnet/test_controlnet.py +++ b/examples/controlnet/test_controlnet.py @@ -65,7 +65,7 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet - --max_train_steps=9 + --max_train_steps=6 --checkpointing_steps=2 """.split() @@ -73,7 +73,7 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4", "checkpoint-6"}, ) resume_run_args = f""" @@ -85,18 +85,15 @@ def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_check --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-6 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-8", "checkpoint-10", "checkpoint-12"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) class ControlNetSDXL(ExamplesTestsAccelerate): @@ -111,7 +108,7 @@ def test_controlnet_sdxl(self): --train_batch_size=1 --gradient_accumulation_steps=1 --controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet-sdxl - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() diff --git a/examples/custom_diffusion/test_custom_diffusion.py b/examples/custom_diffusion/test_custom_diffusion.py index 78f24c5172d6..da4355d5ac25 100644 --- a/examples/custom_diffusion/test_custom_diffusion.py +++ b/examples/custom_diffusion/test_custom_diffusion.py @@ -76,10 +76,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit(self): run_command(self._launch_args + test_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-4", "checkpoint-6"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -93,7 +90,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple --train_batch_size=1 --modifier_token= --dataloader_num_workers=0 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 --no_safe_serialization """.split() @@ -102,7 +99,7 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -115,16 +112,13 @@ def test_custom_diffusion_checkpointing_checkpoints_total_limit_removes_multiple --train_batch_size=1 --modifier_token= --dataloader_num_workers=0 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --no_safe_serialization """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) diff --git a/examples/dreambooth/test_dreambooth.py b/examples/dreambooth/test_dreambooth.py index 0c6c2a062325..ce2f3215bc71 100644 --- a/examples/dreambooth/test_dreambooth.py +++ b/examples/dreambooth/test_dreambooth.py @@ -89,7 +89,7 @@ def test_dreambooth_checkpointing(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -100,7 +100,7 @@ def test_dreambooth_checkpointing(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -114,7 +114,7 @@ def test_dreambooth_checkpointing(self): # check can run the original fully trained output pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # check checkpoint directories exist self.assertTrue(os.path.isdir(os.path.join(tmpdir, "checkpoint-2"))) @@ -123,7 +123,7 @@ def test_dreambooth_checkpointing(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) @@ -138,7 +138,7 @@ def test_dreambooth_checkpointing(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -153,7 +153,7 @@ def test_dreambooth_checkpointing(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(instance_prompt, num_inference_steps=2) + pipe(instance_prompt, num_inference_steps=1) # check old checkpoints do not exist self.assertFalse(os.path.isdir(os.path.join(tmpdir, "checkpoint-2"))) @@ -196,7 +196,7 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() @@ -204,7 +204,7 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -216,15 +216,12 @@ def test_dreambooth_checkpointing_checkpoints_total_limit_removes_multiple_check --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) diff --git a/examples/dreambooth/test_dreambooth_lora.py b/examples/dreambooth/test_dreambooth_lora.py index fc43269f732e..496ce22f814e 100644 --- a/examples/dreambooth/test_dreambooth_lora.py +++ b/examples/dreambooth/test_dreambooth_lora.py @@ -135,16 +135,13 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 """.split() run_command(self._launch_args + test_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-2", "checkpoint-4"}) resume_run_args = f""" examples/dreambooth/train_dreambooth_lora.py @@ -155,18 +152,15 @@ def test_dreambooth_lora_checkpointing_checkpoints_total_limit_removes_multiple_ --resolution=64 --train_batch_size=1 --gradient_accumulation_steps=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, - ) + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) def test_dreambooth_lora_if_model(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -328,7 +322,7 @@ def test_dreambooth_lora_sdxl_checkpointing_checkpoints_total_limit(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --checkpointing_steps=2 --checkpoints_total_limit=2 --learning_rate 5.0e-04 @@ -342,14 +336,11 @@ def test_dreambooth_lora_sdxl_checkpointing_checkpoints_total_limit(self): pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe("a prompt", num_inference_steps=2) + pipe("a prompt", num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_dreambooth_lora_sdxl_text_encoder_checkpointing_checkpoints_total_limit(self): pipeline_path = "hf-internal-testing/tiny-stable-diffusion-xl-pipe" diff --git a/examples/instruct_pix2pix/test_instruct_pix2pix.py b/examples/instruct_pix2pix/test_instruct_pix2pix.py index c4d7500723fa..b30baf8b1b02 100644 --- a/examples/instruct_pix2pix/test_instruct_pix2pix.py +++ b/examples/instruct_pix2pix/test_instruct_pix2pix.py @@ -40,7 +40,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit(self): --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=7 + --max_train_steps=6 --checkpointing_steps=2 --checkpoints_total_limit=2 --output_dir {tmpdir} @@ -63,7 +63,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=9 + --max_train_steps=4 --checkpointing_steps=2 --output_dir {tmpdir} --seed=0 @@ -74,7 +74,7 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) resume_run_args = f""" @@ -84,12 +84,12 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple --resolution=64 --random_flip --train_batch_size=1 - --max_train_steps=11 + --max_train_steps=8 --checkpointing_steps=2 --output_dir {tmpdir} --seed=0 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) @@ -97,5 +97,5 @@ def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) diff --git a/examples/text_to_image/test_text_to_image.py b/examples/text_to_image/test_text_to_image.py index 308a038b5533..814c13cf486e 100644 --- a/examples/text_to_image/test_text_to_image.py +++ b/examples/text_to_image/test_text_to_image.py @@ -64,7 +64,7 @@ def test_text_to_image_checkpointing(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -76,7 +76,7 @@ def test_text_to_image_checkpointing(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -89,7 +89,7 @@ def test_text_to_image_checkpointing(self): run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( @@ -100,12 +100,12 @@ def test_text_to_image_checkpointing(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) - # Run training script for 7 total steps resuming from checkpoint 4 + # Run training script for 2 total steps resuming from checkpoint 4 resume_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -116,13 +116,13 @@ def test_text_to_image_checkpointing(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} - --checkpointing_steps=2 + --checkpointing_steps=1 --resume_from_checkpoint=checkpoint-4 --seed=0 """.split() @@ -131,16 +131,13 @@ def test_text_to_image_checkpointing(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) + # no checkpoint-2 -> check old checkpoints do not exist + # check new checkpoints exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - { - # no checkpoint-2 -> check old checkpoints do not exist - # check new checkpoints exist - "checkpoint-4", - "checkpoint-6", - }, + {"checkpoint-4", "checkpoint-5"}, ) def test_text_to_image_checkpointing_use_ema(self): @@ -149,7 +146,7 @@ def test_text_to_image_checkpointing_use_ema(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 5, checkpointing_steps == 2 + # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 initial_run_args = f""" @@ -161,7 +158,7 @@ def test_text_to_image_checkpointing_use_ema(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 5 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -186,12 +183,12 @@ def test_text_to_image_checkpointing_use_ema(self): # check can run an intermediate checkpoint unet = UNet2DConditionModel.from_pretrained(tmpdir, subfolder="checkpoint-2/unet") pipe = DiffusionPipeline.from_pretrained(pretrained_model_name_or_path, unet=unet, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # Remove checkpoint 2 so that we can check only later checkpoints exist after resuming shutil.rmtree(os.path.join(tmpdir, "checkpoint-2")) - # Run training script for 7 total steps resuming from checkpoint 4 + # Run training script for 2 total steps resuming from checkpoint 4 resume_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -202,13 +199,13 @@ def test_text_to_image_checkpointing_use_ema(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} - --checkpointing_steps=2 + --checkpointing_steps=1 --resume_from_checkpoint=checkpoint-4 --use_ema --seed=0 @@ -218,16 +215,13 @@ def test_text_to_image_checkpointing_use_ema(self): # check can run new fully trained pipeline pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) + # no checkpoint-2 -> check old checkpoints do not exist + # check new checkpoints exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - { - # no checkpoint-2 -> check old checkpoints do not exist - # check new checkpoints exist - "checkpoint-4", - "checkpoint-6", - }, + {"checkpoint-4", "checkpoint-5"}, ) def test_text_to_image_checkpointing_checkpoints_total_limit(self): @@ -236,7 +230,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -249,7 +243,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -263,14 +257,11 @@ def test_text_to_image_checkpointing_checkpoints_total_limit(self): run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe" @@ -278,8 +269,8 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 9, checkpointing_steps == 2 - # Should create checkpoints at steps 2, 4, 6, 8 + # max_train_steps == 4, checkpointing_steps == 2 + # Should create checkpoints at steps 2, 4 initial_run_args = f""" examples/text_to_image/train_text_to_image.py @@ -290,7 +281,7 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 9 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -303,15 +294,15 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch run_command(self._launch_args + initial_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) - # resume and we should try to checkpoint at 10, where we'll have to remove + # resume and we should try to checkpoint at 6, where we'll have to remove # checkpoint-2 and checkpoint-4 instead of just a single previous checkpoint resume_run_args = f""" @@ -323,27 +314,27 @@ def test_text_to_image_checkpointing_checkpoints_total_limit_removes_multiple_ch --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 11 + --max_train_steps 8 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --seed=0 """.split() run_command(self._launch_args + resume_run_args) pipe = DiffusionPipeline.from_pretrained(tmpdir, safety_checker=None) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) diff --git a/examples/text_to_image/test_text_to_image_lora.py b/examples/text_to_image/test_text_to_image_lora.py index 83cbb78b2dc6..4daee834d0e6 100644 --- a/examples/text_to_image/test_text_to_image_lora.py +++ b/examples/text_to_image/test_text_to_image_lora.py @@ -41,7 +41,7 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -52,7 +52,7 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -66,14 +66,11 @@ def test_text_to_image_lora_sdxl_checkpointing_checkpoints_total_limit(self): pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe" @@ -81,7 +78,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -94,7 +91,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -112,14 +109,11 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit(self): "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): pretrained_model_name_or_path = "hf-internal-testing/tiny-stable-diffusion-pipe" @@ -127,8 +121,8 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 9, checkpointing_steps == 2 - # Should create checkpoints at steps 2, 4, 6, 8 + # max_train_steps == 4, checkpointing_steps == 2 + # Should create checkpoints at steps 2, 4 initial_run_args = f""" examples/text_to_image/train_text_to_image_lora.py @@ -139,7 +133,7 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 9 + --max_train_steps 4 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -156,15 +150,15 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-2", "checkpoint-4", "checkpoint-6", "checkpoint-8"}, + {"checkpoint-2", "checkpoint-4"}, ) - # resume and we should try to checkpoint at 10, where we'll have to remove + # resume and we should try to checkpoint at 6, where we'll have to remove # checkpoint-2 and checkpoint-4 instead of just a single previous checkpoint resume_run_args = f""" @@ -176,15 +170,15 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip --random_flip --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 11 + --max_train_steps 8 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=2 - --resume_from_checkpoint=checkpoint-8 - --checkpoints_total_limit=3 + --resume_from_checkpoint=checkpoint-4 + --checkpoints_total_limit=2 --seed=0 --num_validation_images=0 """.split() @@ -195,12 +189,12 @@ def test_text_to_image_lora_checkpointing_checkpoints_total_limit_removes_multip "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None ) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-6", "checkpoint-8", "checkpoint-10"}, + {"checkpoint-6", "checkpoint-8"}, ) @@ -272,7 +266,7 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li with tempfile.TemporaryDirectory() as tmpdir: # Run training script with checkpointing - # max_train_steps == 7, checkpointing_steps == 2, checkpoints_total_limit == 2 + # max_train_steps == 6, checkpointing_steps == 2, checkpoints_total_limit == 2 # Should create checkpoints at steps 2, 4, 6 # with checkpoint at step 2 deleted @@ -283,7 +277,7 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 7 + --max_train_steps 6 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -298,11 +292,8 @@ def test_text_to_image_lora_sdxl_text_encoder_checkpointing_checkpoints_total_li pipe = DiffusionPipeline.from_pretrained(pipeline_path) pipe.load_lora_weights(tmpdir) - pipe(prompt, num_inference_steps=2) + pipe(prompt, num_inference_steps=1) # check checkpoint directories exist - self.assertEqual( - {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - # checkpoint-2 should have been deleted - {"checkpoint-4", "checkpoint-6"}, - ) + # checkpoint-2 should have been deleted + self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-4", "checkpoint-6"}) diff --git a/examples/textual_inversion/test_textual_inversion.py b/examples/textual_inversion/test_textual_inversion.py index a5d7bcb65dd3..ba9cabd9aafe 100644 --- a/examples/textual_inversion/test_textual_inversion.py +++ b/examples/textual_inversion/test_textual_inversion.py @@ -40,8 +40,6 @@ def test_textual_inversion(self): --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 @@ -68,8 +66,6 @@ def test_textual_inversion_checkpointing(self): --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 @@ -102,14 +98,12 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 3 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -123,7 +117,7 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-1", "checkpoint-2", "checkpoint-3"}, + {"checkpoint-1", "checkpoint-2"}, ) resume_run_args = f""" @@ -133,21 +127,19 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl --learnable_property object --placeholder_token --initializer_token a - --validation_prompt - --validation_steps 1 --save_steps 1 --num_vectors 2 --resolution 64 --train_batch_size 1 --gradient_accumulation_steps 1 - --max_train_steps 4 + --max_train_steps 2 --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} --checkpointing_steps=1 - --resume_from_checkpoint=checkpoint-3 + --resume_from_checkpoint=checkpoint-2 --checkpoints_total_limit=2 """.split() @@ -156,5 +148,5 @@ def test_textual_inversion_checkpointing_checkpoints_total_limit_removes_multipl # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-3", "checkpoint-4"}, + {"checkpoint-2", "checkpoint-3"}, ) diff --git a/examples/unconditional_image_generation/test_unconditional.py b/examples/unconditional_image_generation/test_unconditional.py index b7e19abe9f6e..49e11f33d4e1 100644 --- a/examples/unconditional_image_generation/test_unconditional.py +++ b/examples/unconditional_image_generation/test_unconditional.py @@ -90,10 +90,10 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch --train_batch_size 1 --num_epochs 1 --gradient_accumulation_steps 1 - --ddpm_num_inference_steps 2 + --ddpm_num_inference_steps 1 --learning_rate 1e-3 --lr_warmup_steps 5 - --checkpointing_steps=1 + --checkpointing_steps=2 """.split() run_command(self._launch_args + initial_run_args) @@ -101,7 +101,7 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-1", "checkpoint-2", "checkpoint-3", "checkpoint-4", "checkpoint-5", "checkpoint-6"}, + {"checkpoint-2", "checkpoint-4", "checkpoint-6"}, ) resume_run_args = f""" @@ -113,12 +113,12 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch --train_batch_size 1 --num_epochs 2 --gradient_accumulation_steps 1 - --ddpm_num_inference_steps 2 + --ddpm_num_inference_steps 1 --learning_rate 1e-3 --lr_warmup_steps 5 --resume_from_checkpoint=checkpoint-6 --checkpointing_steps=2 - --checkpoints_total_limit=3 + --checkpoints_total_limit=2 """.split() run_command(self._launch_args + resume_run_args) @@ -126,5 +126,5 @@ def test_unconditional_checkpointing_checkpoints_total_limit_removes_multiple_ch # check checkpoint directories exist self.assertEqual( {x for x in os.listdir(tmpdir) if "checkpoint" in x}, - {"checkpoint-8", "checkpoint-10", "checkpoint-12"}, + {"checkpoint-10", "checkpoint-12"}, )