Skip to content

Commit

Permalink
set seed in sft/dpo/reward_modeling to make result reproducable (#1357)
Browse files Browse the repository at this point in the history
Signed-off-by: Wang, Yi A <[email protected]>
  • Loading branch information
sywangyi authored Feb 23, 2024
1 parent ca90cba commit 2a2676e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Trainer,
TrainerCallback,
TrainingArguments,
set_seed,
)
from transformers.utils import PaddingStrategy

Expand Down Expand Up @@ -89,11 +90,14 @@ class ScriptArguments:
default=False,
metadata={"help": "Whether to run eval after the first step"},
)
seed: Optional[int] = field(
default=0, metadata={"help": "Random seed that will be set at the beginning of training."}
)


parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses()[0]

set_seed(script_args.seed)
# Load the human stack-exchange-paired dataset for tuning the reward model.
train_dataset = load_dataset("lvwerra/stack-exchange-paired", data_dir="data/reward", split="train")
if script_args.train_subset > 0:
Expand Down Expand Up @@ -129,7 +133,10 @@ class ScriptArguments:
logging_steps=10,
optim=script_args.optim,
lr_scheduler_type=script_args.lr_scheduler_type,
seed=script_args.seed,
)


# Load the value-head model and tokenizer.
tokenizer_name = script_args.tokenizer_name if script_args.tokenizer_name is not None else script_args.model_name
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from accelerate import Accelerator
from datasets import Dataset, load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, set_seed

from trl import DPOTrainer

Expand Down Expand Up @@ -78,6 +78,9 @@ class ScriptArguments:
"https://github.com/huggingface/transformers/issues/22482#issuecomment-1595790992"
},
)
seed: Optional[int] = field(
default=0, metadata={"help": "Random seed that will be set at the beginning of training."}
)


def get_stack_exchange_paired(
Expand Down Expand Up @@ -128,6 +131,8 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses()[0]

set_seed(script_args.seed)

# 1. load a pretrained model
model = AutoModelForCausalLM.from_pretrained(
script_args.model_name_or_path,
Expand Down Expand Up @@ -182,6 +187,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
remove_unused_columns=False,
run_name="dpo_llama2",
gradient_checkpointing_kwargs=dict(use_reentrant=script_args.gradient_checkpointing_use_reentrant),
seed=script_args.seed,
)

peft_config = LoraConfig(
Expand Down
19 changes: 14 additions & 5 deletions examples/research_projects/stack_llama_2/scripts/sft_llama2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@
from datasets import load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
set_seed,
)

from trl import SFTTrainer
from trl.import_utils import is_npu_available, is_xpu_available
Expand Down Expand Up @@ -53,6 +60,8 @@ class ScriptArguments:
if training_args.gradient_checkpointing:
raise ValueError("gradient_checkpointing not supported")

set_seed(training_args.seed)


def chars_token_ratio(dataset, tokenizer, nb_examples=400):
"""
Expand Down Expand Up @@ -91,7 +100,7 @@ def prepare_sample_text(example):
return text


def create_datasets(tokenizer, args):
def create_datasets(tokenizer, args, seed=None):
dataset = load_dataset(
args.dataset_name,
data_dir=args.subset,
Expand All @@ -104,9 +113,9 @@ def create_datasets(tokenizer, args):
print("Loading the dataset in streaming mode")
valid_data = dataset.take(args.size_valid_set)
train_data = dataset.skip(args.size_valid_set)
train_data = train_data.shuffle(buffer_size=args.shuffle_buffer, seed=None)
train_data = train_data.shuffle(buffer_size=args.shuffle_buffer, seed=seed)
else:
dataset = dataset.train_test_split(test_size=0.005, seed=None)
dataset = dataset.train_test_split(test_size=0.005, seed=seed)
train_data = dataset["train"]
valid_data = dataset["test"]
print(f"Size of the train set: {len(train_data)}. Size of the validation set: {len(valid_data)}")
Expand Down Expand Up @@ -153,7 +162,7 @@ def create_datasets(tokenizer, args):
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

train_dataset, eval_dataset = create_datasets(tokenizer, script_args)
train_dataset, eval_dataset = create_datasets(tokenizer, script_args, seed=training_args.seed)

trainer = SFTTrainer(
model=base_model,
Expand Down

0 comments on commit 2a2676e

Please sign in to comment.