diff --git a/examples/dailymail/README.md b/examples/dailymail/README.md
index 378d6df..934465a 100644
--- a/examples/dailymail/README.md
+++ b/examples/dailymail/README.md
@@ -1,6 +1,6 @@
 # DailyMail & T5 Example
 
-This directory contains scripts for fine-tuning RoBERTa computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`.
+This directory contains scripts for fine-tuning T5 and computing influence scores on the DailyMail dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/summarization).
 To begin, install the necessary packages:
 
 ```bash
diff --git a/examples/dailymail/pipeline.py b/examples/dailymail/pipeline.py
index 3276370..f221cdb 100644
--- a/examples/dailymail/pipeline.py
+++ b/examples/dailymail/pipeline.py
@@ -16,6 +16,7 @@
     "xglue": ("news_body", "news_title"),
     "xsum": ("document", "summary"),
     "wiki_summary": ("article", "highlights"),
+    "multi_news": ("document", "summary"),
 }
 
 
diff --git a/examples/dailymail/requirements.txt b/examples/dailymail/requirements.txt
index 748a191..7ae6637 100644
--- a/examples/dailymail/requirements.txt
+++ b/examples/dailymail/requirements.txt
@@ -1,3 +1,7 @@
+sentencepiece!=0.1.92
 nltk
 py7zr
 rouge-score
+transformers
+evaluate
+datasets
diff --git a/examples/dailymail/train.py b/examples/dailymail/train.py
index 3c02bd9..4efde89 100644
--- a/examples/dailymail/train.py
+++ b/examples/dailymail/train.py
@@ -9,7 +9,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from accelerate.utils import set_seed
+from accelerate.utils import set_seed, send_to_device
 from filelock import FileLock
 from torch import nn
 from torch.nn import CrossEntropyLoss
@@ -30,7 +30,7 @@
 
 
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
+print(DEVICE)
 
 def parse_args():
     parser = argparse.ArgumentParser(description="Train seq2seq models on DailyMail dataset.")
@@ -115,18 +115,12 @@ def train(
     model.train()
     for epoch in range(num_train_epochs):
         total_loss = 0.0
+        print("epoch start")
         for batch in train_dataloader:
+            print("done")
             optimizer.zero_grad(set_to_none=True)
-            logits = model(
-                input_ids=batch["input_ids"].to(device=DEVICE),
-                attention_mask=batch["attention_mask"].to(device=DEVICE),
-                decoder_input_ids=batch["decoder_input_ids"].to(device=DEVICE),
-            ).logits
-            loss = F.cross_entropy(
-                logits.view(-1, logits.size(-1)),
-                batch["labels"].view(-1).to(device=DEVICE),
-                ignore_index=-100,
-            )
+            batch = send_to_device(batch, device=DEVICE)
+            loss = model(**batch).loss
             loss.backward()
             optimizer.step()
             total_loss += loss.detach().float()
diff --git a/examples/swag/README.md b/examples/swag/README.md
index 5a0b919..6dd4d7f 100644
--- a/examples/swag/README.md
+++ b/examples/swag/README.md
@@ -1,6 +1,6 @@
 # SWAG & RoBERTa Example
 
-This directory contains scripts for fine-tuning RoBERTa computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`.
+This directory contains scripts for fine-tuning RoBERTa and computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`.
 To begin, install the necessary packages:
 
 ```bash