Skip to content

Commit

Permalink
fix renormalizing attention for the causal case
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Nov 7, 2020
1 parent 9130848 commit 9fbfe59
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
5 changes: 2 additions & 3 deletions performer_pytorch/performer_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,10 @@ def causal_linear_attention(q, k, v):
# inefficient causal linear attention, without cuda code, for reader's reference
# not being used
def causal_linear_attention_noncuda(q, k, v):
k_cumsum = k.cumsum(dim=-2)
D_inv = torch.einsum('...nd,...nd->...n', q, k.cumsum(dim=-2))
context = torch.einsum('...nd,...ne->...nde', k, v)
context = context.cumsum(dim=-3)
context /= k_cumsum.unsqueeze(dim=-1)
out = torch.einsum('...nde,...nd->...ne', context, q)
out = torch.einsum('...nde,...nd,...n->...ne', context, q, D_inv)
return out

class FastAttention(nn.Module):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'performer-pytorch',
packages = find_packages(exclude=['examples']),
version = '0.7.4',
version = '0.7.5',
license='MIT',
description = 'Performer - Pytorch',
author = 'Phil Wang',
Expand Down

0 comments on commit 9fbfe59

Please sign in to comment.