Skip to content

Commit

Permalink
update to use new attention_type interface
Browse files Browse the repository at this point in the history
Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
NickLucche committed Jan 9, 2025
1 parent 3eae4f6 commit 455d0cb
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions vllm/model_executor/models/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def __init__(self,
1.0,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn")
prefix=f"{prefix}.attn",
attn_type=self.attn_type)

# Only the first SelfAttention block in encoder decoder has this
# embedding layer, the others reuse its output.
Expand Down Expand Up @@ -418,12 +419,7 @@ def forward(
# Encoder/Decoder Self-Attention Layer, attn bias already cached.
assert attn_bias is not None

attn_output = self.attn(q,
k,
v,
kv_cache,
attn_metadata,
attn_type=self.attn_type)
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
output, _ = self.out_proj(attn_output)
return output

Expand Down

0 comments on commit 455d0cb

Please sign in to comment.