diff --git a/examples/dailymail/README.md b/examples/dailymail/README.md index 378d6df..934465a 100644 --- a/examples/dailymail/README.md +++ b/examples/dailymail/README.md @@ -1,6 +1,6 @@ # DailyMail & T5 Example -This directory contains scripts for fine-tuning RoBERTa computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`. +This directory contains scripts for fine-tuning T5 and computing influence scores on the DailyMail dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/summarization). To begin, install the necessary packages: ```bash diff --git a/examples/dailymail/pipeline.py b/examples/dailymail/pipeline.py index 3276370..f221cdb 100644 --- a/examples/dailymail/pipeline.py +++ b/examples/dailymail/pipeline.py @@ -16,6 +16,7 @@ "xglue": ("news_body", "news_title"), "xsum": ("document", "summary"), "wiki_summary": ("article", "highlights"), + "multi_news": ("document", "summary"), } diff --git a/examples/dailymail/requirements.txt b/examples/dailymail/requirements.txt index 748a191..7ae6637 100644 --- a/examples/dailymail/requirements.txt +++ b/examples/dailymail/requirements.txt @@ -1,3 +1,7 @@ +sentencepiece!=0.1.92 nltk py7zr rouge-score +transformers +evaluate +datasets diff --git a/examples/dailymail/train.py b/examples/dailymail/train.py index 3c02bd9..4efde89 100644 --- a/examples/dailymail/train.py +++ b/examples/dailymail/train.py @@ -9,7 +9,7 @@ import numpy as np import torch import torch.nn.functional as F -from accelerate.utils import set_seed +from accelerate.utils import set_seed, send_to_device from filelock import FileLock from torch import nn from torch.nn import CrossEntropyLoss @@ -30,7 +30,7 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - +print(DEVICE) def parse_args(): parser = argparse.ArgumentParser(description="Train seq2seq models on DailyMail dataset.") @@ -115,18 +115,12 @@ def train( model.train() for epoch in range(num_train_epochs): total_loss = 0.0 + print("epoch start") for batch in train_dataloader: + print("done") optimizer.zero_grad(set_to_none=True) - logits = model( - input_ids=batch["input_ids"].to(device=DEVICE), - attention_mask=batch["attention_mask"].to(device=DEVICE), - decoder_input_ids=batch["decoder_input_ids"].to(device=DEVICE), - ).logits - loss = F.cross_entropy( - logits.view(-1, logits.size(-1)), - batch["labels"].view(-1).to(device=DEVICE), - ignore_index=-100, - ) + batch = send_to_device(batch, device=DEVICE) + loss = model(**batch).loss loss.backward() optimizer.step() total_loss += loss.detach().float() diff --git a/examples/swag/README.md b/examples/swag/README.md index 5a0b919..6dd4d7f 100644 --- a/examples/swag/README.md +++ b/examples/swag/README.md @@ -1,6 +1,6 @@ # SWAG & RoBERTa Example -This directory contains scripts for fine-tuning RoBERTa computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`. +This directory contains scripts for fine-tuning RoBERTa and computing influence scores on the SWAG dataset. The pipeline is motivated from [this HuggingFace Example](https://github.com/huggingface/transformers/tree/main/examples/pytorch/multiple-choice) demonstrates how to define `post_process_per_sample_gradient`. To begin, install the necessary packages: ```bash