From eb23a2f27c588f71dd380b8cb3a7600a702b3fa0 Mon Sep 17 00:00:00 2001
From: coryMosaicML <83666378+coryMosaicML@users.noreply.github.com>
Date: Wed, 9 Oct 2024 09:57:20 -0700
Subject: [PATCH] Update dependencies for use with torch 2.4.1 and composer
 0.25.0 (#176)

---
 .github/workflows/code-quality.yaml           |  2 +-
 .github/workflows/docker.yaml                 |  6 +++++
 .github/workflows/pr-cpu.yaml                 |  8 +++---
 diffusion/datasets/image_caption_latents.py   |  1 +
 diffusion/models/models.py                    |  2 +-
 .../precomputed_text_latent_diffusion.py      |  2 +-
 setup.py                                      | 26 +++++++++++++++----
 7 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
index 261edd5e..16e2a125 100644
--- a/.github/workflows/code-quality.yaml
+++ b/.github/workflows/code-quality.yaml
@@ -24,8 +24,8 @@ jobs:
     strategy:
       matrix:
         python_version:
-          - "3.9"
           - "3.10"
+          - "3.11"
         pip_deps:
           - "[dev]"
     steps:
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 164b5ccb..20dd7c6e 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -29,6 +29,12 @@ jobs:
         - name: "2.4.0_cu124_aws"
           base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+        - name: "2.4.1_cu124"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
+          dep_groups: "[all]"
+        - name: "2.4.1_cu124_aws"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws
+          dep_groups: "[all]"
     steps:
 
     - name: Checkout
diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
index b43ce0ae..b5f8cc2e 100644
--- a/.github/workflows/pr-cpu.yaml
+++ b/.github/workflows/pr-cpu.yaml
@@ -19,12 +19,12 @@ jobs:
     strategy:
       matrix:
         include:
-          - name: 'cpu-3.9-1.12'
-            container: mosaicml/pytorch:1.12.1_cpu-python3.9-ubuntu20.04
+          - name: 'cpu-3.10-2.1'
+            container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
-          - name: 'cpu-3.10-1.13'
-            container: mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
+          - name: 'cpu-3.11-2.4'
+            container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
     name: ${{ matrix.name }}
diff --git a/diffusion/datasets/image_caption_latents.py b/diffusion/datasets/image_caption_latents.py
index 01ae1e8d..8ca46c14 100644
--- a/diffusion/datasets/image_caption_latents.py
+++ b/diffusion/datasets/image_caption_latents.py
@@ -273,6 +273,7 @@ def build_streaming_image_caption_latents_dataloader(
         text_latent_shapes=text_latent_shapes,
         attention_mask_keys=attention_mask_keys,
         latent_dtype=dtype,
+        batch_size=batch_size,
         **streaming_kwargs,
     )
 
diff --git a/diffusion/models/models.py b/diffusion/models/models.py
index 33b02f8d..82c1480f 100644
--- a/diffusion/models/models.py
+++ b/diffusion/models/models.py
@@ -125,7 +125,7 @@ def stable_diffusion_2(
     precision = torch.float16 if encode_latents_in_fp16 else None
     # Make the text encoder
     text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder='text_encoder', torch_dtype=precision)
-    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer')
+    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer', clean_up_tokenization_spaces=True)
 
     # Make the autoencoder
     if autoencoder_path is None:
diff --git a/diffusion/models/precomputed_text_latent_diffusion.py b/diffusion/models/precomputed_text_latent_diffusion.py
index d1ee9136..31acdb2f 100644
--- a/diffusion/models/precomputed_text_latent_diffusion.py
+++ b/diffusion/models/precomputed_text_latent_diffusion.py
@@ -189,7 +189,7 @@ def set_rng_generator(self, rng_generator: torch.Generator):
         self.rng_generator = rng_generator
 
     def encode_images(self, inputs, dtype=torch.bfloat16):
-        with torch.amp.autocast('cuda', enabled=False):
+        with torch.autocast(device_type='cuda', enabled=False):
             latents = self.vae.encode(inputs.to(dtype))['latent_dist'].sample().data
         latents = (latents - self.latent_mean) / self.latent_std  # scale latents
         return latents
diff --git a/setup.py b/setup.py
index f909592a..50feb4de 100644
--- a/setup.py
+++ b/setup.py
@@ -6,11 +6,27 @@
 from setuptools import find_packages, setup
 
 install_requires = [
-    'mosaicml==0.20.1', 'mosaicml-streaming==0.7.4', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
-    'diffusers[torch]==0.26.3', 'transformers[torch]==4.38.2', 'huggingface_hub==0.21.2', 'wandb==0.16.3',
-    'xformers==0.0.23.post1', 'triton==2.1.0', 'torchmetrics[image]==1.3.1', 'lpips==0.1.4', 'clean-fid==0.1.35',
-    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.19.2',
-    'datasets==2.19.2', 'peft==0.12.0'
+    'mosaicml==0.25.0',
+    'mosaicml-streaming==0.9.0',
+    'hydra-core>=1.2',
+    'hydra-colorlog>=1.1.0',
+    'diffusers[torch]==0.30.3',
+    'transformers[torch]==4.44.2',
+    'huggingface-hub[hf_transfer]>=0.23.2',
+    'wandb>=0.18.1',
+    'xformers==0.0.28.post1',
+    'triton>=2.1.0',
+    'torchmetrics[image]>=1.4.0.post0',
+    'lpips==0.1.4',
+    'clean-fid==0.1.35',
+    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33',
+    'gradio==4.44.0',
+    'datasets==2.19.2',
+    'peft==0.12.0',
+    'numpy<2.0.0',
+    'sentencepiece',
+    'mlflow',
+    'pynvml',
 ]
 
 extras_require = {}