From 455d0cb7288e50fd1037b7393b2d85f4053d2da4 Mon Sep 17 00:00:00 2001
From: NickLucche <nlucches@redhat.com>
Date: Thu, 9 Jan 2025 15:26:44 +0000
Subject: [PATCH] update to use new attention_type interface

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 vllm/model_executor/models/t5.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/vllm/model_executor/models/t5.py b/vllm/model_executor/models/t5.py
index a8327a6e8128a..50c7c26659507 100644
--- a/vllm/model_executor/models/t5.py
+++ b/vllm/model_executor/models/t5.py
@@ -202,7 +202,8 @@ def __init__(self,
                               1.0,
                               cache_config=cache_config,
                               quant_config=quant_config,
-                              prefix=f"{prefix}.attn")
+                              prefix=f"{prefix}.attn",
+                              attn_type=self.attn_type)
 
         # Only the first SelfAttention block in encoder decoder has this
         # embedding layer, the others reuse its output.
@@ -418,12 +419,7 @@ def forward(
             # Encoder/Decoder Self-Attention Layer, attn bias already cached.
             assert attn_bias is not None
 
-        attn_output = self.attn(q,
-                                k,
-                                v,
-                                kv_cache,
-                                attn_metadata,
-                                attn_type=self.attn_type)
+        attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
         output, _ = self.out_proj(attn_output)
         return output