adopt dual patchnorm design

lucidrains · Feb 3, 2023 · 2c680bb · 2c680bb
1 parent 94fb2db
commit 2c680bb
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -450,3 +450,15 @@ trainer.train()
     year    = {2021}
 }
 ```
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2302.01327,
+    doi     = {10.48550/ARXIV.2302.01327},
+    url     = {https://arxiv.org/abs/2302.01327},
+    author  = {Kumar, Manoj and Dehghani, Mostafa and Houlsby, Neil},
+    title   = {Dual PatchNorm},
+    publisher = {arXiv},
+    year    = {2023},
+    copyright = {Creative Commons Attribution 4.0 International}
+}
+```
diff --git a/phenaki_pytorch/cvivit.py b/phenaki_pytorch/cvivit.py
@@ -269,12 +269,16 @@ def __init__(
 
         self.to_patch_emb_first_frame = nn.Sequential(
             Rearrange('b c 1 (h p1) (w p2) -> b 1 h w (c p1 p2)', p1 = patch_height, p2 = patch_width),
-            nn.Linear(channels * patch_width * patch_height, dim)
+            nn.LayerNorm(channels * patch_width * patch_height),
+            nn.Linear(channels * patch_width * patch_height, dim),
+            nn.LayerNorm(dim)
         )
 
         self.to_patch_emb = nn.Sequential(
             Rearrange('b c (t pt) (h p1) (w p2) -> b t h w (c pt p1 p2)', p1 = patch_height, p2 = patch_width, pt = temporal_patch_size),
-            nn.Linear(channels * patch_width * patch_height * temporal_patch_size, dim)
+            nn.LayerNorm(channels * patch_width * patch_height * temporal_patch_size),
+            nn.Linear(channels * patch_width * patch_height * temporal_patch_size, dim),
+            nn.LayerNorm(dim)
         )
 
         transformer_kwargs = dict(

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'phenaki-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.1',
+  version = '0.2.0',
   license='MIT',
   description = 'Phenaki - Pytorch',
   author = 'Phil Wang',
@@ -31,7 +31,7 @@
     'torchvision',
     'transformers>=4.20.1',
     'tqdm',
-    'vector-quantize-pytorch>=0.10.14'
+    'vector-quantize-pytorch>=0.10.15'
   ],
   classifiers=[
     'Development Status :: 4 - Beta',