Expected `tensors` and `new_tensors` to have the same type but found <class ‘tuple’> and <class ‘torch.Tensor’> #35640

Bruce-Azar-Wayne · 2025-01-12T16:58:58Z

System Info

I added the code to read the first item of tuple, which did not work, but the issue existed even before then. similar issue has been found by others too: https://discuss.huggingface.co/t/add-metrics-to-object-detection-example/31213/12

This is being run in a Kaggle environment.
`from nltk.translate.bleu_score import corpus_bleu
from transformers import EvalPrediction

def compute_metrics(eval_pred: EvalPrediction):
predictions, label_ids = eval_pred.predictions, eval_pred.label_ids

# Ensure predictions are a tensor (handle tuple case)
if isinstance(predictions, tuple):
    predictions = predictions[0]

# Ensure label_ids are a tensor (handle tuple case)
if isinstance(label_ids, tuple):
    label_ids = label_ids[0]

# Decode predictions and labels
decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]
decoded_labels = [[tokenizer.decode(label, skip_special_tokens=True)] for label in label_ids]

# Calculate BLEU score
bleu_score = corpus_bleu(decoded_labels, decoded_preds)
return {"bleu": bleu_score}

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = train_dataset,
eval_dataset= val_dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 1,
# num_train_epochs = 1, # Set this for 1 full training run.
max_steps = 1,
learning_rate = 2e-4,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
evaluation_strategy="epoch", # Enable evaluation during training
),
compute_metrics=compute_metrics,
)
`
Full error:

AssertionError Traceback (most recent call last)
in <cell line: 1>()
----> 1 trainer_stats = trainer.train()

/usr/local/lib/python3.10/dist-packages/unsloth/tokenizer_utils.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)

/usr/local/lib/python3.10/dist-packages/unsloth/models/llama.py in _fast_inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval, start_time)
3047 metrics = None
3048 if self.control.should_evaluate:
→ 3049 metrics = self._evaluate(trial, ignore_keys_for_eval)
3050 is_new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial)
3051

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _evaluate(self, trial, ignore_keys_for_eval, skip_scheduler)
3001
3002 def _evaluate(self, trial, ignore_keys_for_eval, skip_scheduler=False):
→ 3003 metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
3004 self._report_to_hp_search(trial, self.state.global_step, metrics)
3005

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
4048
4049 eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
→ 4050 output = eval_loop(
4051 eval_dataloader,
4052 description=“Evaluation”,

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
4269 logits = self.gather_function((logits))
4270 if not self.args.batch_eval_metrics or description == “Prediction”:
→ 4271 all_preds.add(logits)
4272 if labels is not None:
4273 labels = self.gather_function((labels))

/usr/local/lib/python3.10/dist-packages/transformers/trainer_pt_utils.py in add(self, tensors)
320 self.tensors = tensors if self.do_nested_concat else [tensors]
321 elif self.do_nested_concat:
→ 322 self.tensors = nested_concat(self.tensors, tensors, padding_index=self.padding_index)
323 else:
324 self.tensors.append(tensors)

/usr/local/lib/python3.10/dist-packages/transformers/trainer_pt_utils.py in nested_concat(tensors, new_tensors, padding_index)
129 if not (isinstance(tensors, torch.Tensor) and isinstance(new_tensors, torch.Tensor)):
130 assert (
→ 131 type(tensors) is type(new_tensors)
132 ), f"Expected tensors and new_tensors to have the same type but found {type(tensors)} and {type(new_tensors)}."
133 if isinstance(tensors, (list, tuple)):

AssertionError: Expected tensors and new_tensors to have the same type but found <class ‘tuple’> and <class ‘torch.Tensor’>.

Who can help?

@muellerzr @SunMarc

Information

The official example scripts
My own modified scripts

Tasks

An officially supported task in the examples folder (such as GLUE/SQuAD, ...)
My own task or dataset (give details below)

Reproduction

from nltk.translate.bleu_score import corpus_bleu
from transformers import EvalPrediction

def compute_metrics(eval_pred: EvalPrediction):
predictions, label_ids = eval_pred.predictions, eval_pred.label_ids

# Ensure predictions are a tensor (handle tuple case)
if isinstance(predictions, tuple):
    predictions = predictions[0]

# Ensure label_ids are a tensor (handle tuple case)
if isinstance(label_ids, tuple):
    label_ids = label_ids[0]

# Decode predictions and labels
decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]
decoded_labels = [[tokenizer.decode(label, skip_special_tokens=True)] for label in label_ids]

# Calculate BLEU score
bleu_score = corpus_bleu(decoded_labels, decoded_preds)
return {"bleu": bleu_score}

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = train_dataset,
eval_dataset= val_dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 1,
# num_train_epochs = 1, # Set this for 1 full training run.
max_steps = 1,
learning_rate = 2e-4,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
evaluation_strategy="epoch", # Enable evaluation during training
),
compute_metrics=compute_metrics,
)

Expected behavior

Full error: