fix bug with non-cuda linear causal attn

lucidrains · Dec 7, 2020 · 98532fc · 98532fc
1 parent 35a54b6
commit 98532fc
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/performer_pytorch/performer_pytorch.py b/performer_pytorch/performer_pytorch.py
@@ -128,7 +128,7 @@ def causal_linear_attention(q, k, v):
 # inefficient causal linear attention, without cuda code, for reader's reference
 # not being used
 def causal_linear_attention_noncuda(q, k, v):
-    D_inv = torch.einsum('...nd,...nd->...n', q, k.cumsum(dim=-2))
+    D_inv = 1. / torch.einsum('...nd,...nd->...n', q, k.cumsum(dim=-2))
     context = torch.einsum('...nd,...ne->...nde', k, v)
     context = context.cumsum(dim=-3)
     out = torch.einsum('...nde,...nd,...n->...ne', context, q, D_inv)

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'performer-pytorch',
   packages = find_packages(exclude=['examples']),
-  version = '0.11.1',
+  version = '0.11.2',
   license='MIT',
   description = 'Performer - Pytorch',
   author = 'Phil Wang',