diff --git a/README.md b/README.md
index 71ebefc..1e46c5f 100644
--- a/README.md
+++ b/README.md
@@ -450,3 +450,15 @@ trainer.train()
     year    = {2021}
 }
 ```
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2302.01327,
+    doi     = {10.48550/ARXIV.2302.01327},
+    url     = {https://arxiv.org/abs/2302.01327},
+    author  = {Kumar, Manoj and Dehghani, Mostafa and Houlsby, Neil},
+    title   = {Dual PatchNorm},
+    publisher = {arXiv},
+    year    = {2023},
+    copyright = {Creative Commons Attribution 4.0 International}
+}
+```
diff --git a/phenaki_pytorch/cvivit.py b/phenaki_pytorch/cvivit.py
index 0ecf70d..f45ca07 100644
--- a/phenaki_pytorch/cvivit.py
+++ b/phenaki_pytorch/cvivit.py
@@ -269,12 +269,16 @@ def __init__(
 
         self.to_patch_emb_first_frame = nn.Sequential(
             Rearrange('b c 1 (h p1) (w p2) -> b 1 h w (c p1 p2)', p1 = patch_height, p2 = patch_width),
-            nn.Linear(channels * patch_width * patch_height, dim)
+            nn.LayerNorm(channels * patch_width * patch_height),
+            nn.Linear(channels * patch_width * patch_height, dim),
+            nn.LayerNorm(dim)
         )
 
         self.to_patch_emb = nn.Sequential(
             Rearrange('b c (t pt) (h p1) (w p2) -> b t h w (c pt p1 p2)', p1 = patch_height, p2 = patch_width, pt = temporal_patch_size),
-            nn.Linear(channels * patch_width * patch_height * temporal_patch_size, dim)
+            nn.LayerNorm(channels * patch_width * patch_height * temporal_patch_size),
+            nn.Linear(channels * patch_width * patch_height * temporal_patch_size, dim),
+            nn.LayerNorm(dim)
         )
 
         transformer_kwargs = dict(
diff --git a/setup.py b/setup.py
index 2fd4491..04c4024 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'phenaki-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.1',
+  version = '0.2.0',
   license='MIT',
   description = 'Phenaki - Pytorch',
   author = 'Phil Wang',
@@ -31,7 +31,7 @@
     'torchvision',
     'transformers>=4.20.1',
     'tqdm',
-    'vector-quantize-pytorch>=0.10.14'
+    'vector-quantize-pytorch>=0.10.15'
   ],
   classifiers=[
     'Development Status :: 4 - Beta',