diff --git a/ludwig/api.py b/ludwig/api.py
index d1c5e2a3fe1..3302f51d4d8 100644
--- a/ludwig/api.py
+++ b/ludwig/api.py
@@ -831,6 +831,7 @@ def predict(
data_format: str = None,
split: str = FULL,
batch_size: int = 128,
+ generation_config: Optional[Dict] = None,
skip_save_unprocessed_output: bool = True,
skip_save_predictions: bool = True,
output_directory: str = "results",
@@ -840,43 +841,34 @@ def predict(
) -> Tuple[Union[dict, pd.DataFrame], str]:
"""Using a trained model, make predictions from the provided dataset.
- # Inputs
- :param dataset: (Union[str, dict, pandas.DataFrame]) source containing
- the entire dataset to be evaluated.
- :param data_format: (str, default: `None`) format to interpret data
- sources. Will be inferred automatically if not specified. Valid
- formats are `'auto'`, `'csv'`, `'df'`, `'dict'`, `'excel'`, `'feather'`,
- `'fwf'`, `'hdf5'` (cache file produced during previous training),
- `'html'` (file containing a single HTML `
`), `'json'`, `'jsonl'`,
- `'parquet'`, `'pickle'` (pickled Pandas DataFrame), `'sas'`, `'spss'`,
- `'stata'`, `'tsv'`.
- :param: split: (str, default= `'full'`): if the input dataset contains
- a split column, this parameter indicates which split of the data
- to use. Possible values are `'full'`, `'training'`, `'validation'`, `'test'`.
- :param batch_size: (int, default: 128) size of batch to use when making
- predictions.
- :param skip_save_unprocessed_output: (bool, default: `True`) if this
- parameter is `False`, predictions and their probabilities are saved
- in both raw unprocessed numpy files containing tensors and as
- postprocessed CSV files (one for each output feature).
- If this parameter is `True`, only the CSV ones are saved and the
- numpy ones are skipped.
- :param skip_save_predictions: (bool, default: `True`) skips saving
- test predictions CSV files.
- :param output_directory: (str, default: `'results'`) the directory that
- will contain the training statistics, TensorBoard logs, the saved
- model and the training progress files.
- :param return_type: (Union[str, dict, pandas.DataFrame], default: pd.DataFrame)
- indicates the format of the returned predictions.
- :param callbacks: (Optional[List[Callback]], default: None)
- optional list of callbacks to use during this predict operation. Any callbacks
- already registered to the model will be preserved.
-
- # Return
+ Args:
+ dataset: (Union[str, dict, pandas.DataFrame]): source containing the entire dataset to be evaluated.
+ data_format: (str, default: `None`) format to interpret data sources. Will be inferred automatically if not
+ specified. Valid formats are `'auto'`, `'csv'`, `'df'`, `'dict'`, `'excel'`, `'feather'`, `'fwf'`,
+ `'hdf5'` (cache file produced during previous training), `'html'` (file containing a single HTML
+ ``), `'json'`, `'jsonl'`, `'parquet'`, `'pickle'` (pickled Pandas DataFrame), `'sas'`, `'spss'`,
+ `'stata'`, `'tsv'`.
+ split: (str, default= `'full'`): if the input dataset contains a split column, this parameter indicates
+ which split of the data to use. Possible values are `'full'`, `'training'`, `'validation'`, `'test'`.
+ batch_size: (int, default: 128) size of batch to use when making predictions.
+ generation_config: Dict, default: `None`) config for the generation of the predictions. If `None`, the
+ config that was used during model training is used.
+ skip_save_unprocessed_output: (bool, default: `True`) if this parameter is `False`, predictions and their
+ probabilities are saved in both raw unprocessed numpy files containing tensors and as postprocessed CSV
+ files (one for each output feature). If this parameter is `True`, only the CSV ones are saved and the
+ numpy ones are skipped.
+ skip_save_predictions: (bool, default: `True`) skips saving test predictions CSV files.
+ output_directory: (str, default: `'results'`) the directory that will contain the training statistics,
+ TensorBoard logs, the saved model and the training progress files.
+ return_type: (Union[str, dict, pandas.DataFrame], default: pd.DataFrame) indicates the format of the
+ returned predictions.
+ callbacks: (Optional[List[Callback]], default: None) optional list of callbacks to use during this predict
+ operation. Any callbacks already registered to the model will be preserved.
- :return: (Tuple[Union[dict, pd.DataFrame], str]) `(predictions, output_directory)`
- `predictions` predictions from the provided dataset,
- `output_directory` filepath string to where data was stored.
+ Returns:
+ `(predictions, output_directory)`: (Tuple[Union[dict, pd.DataFrame], str])
+ `predictions` predictions from the provided dataset,
+ `output_directory` filepath string to where data was stored.
"""
self._check_initialization()
@@ -893,12 +885,21 @@ def predict(
callbacks=self.callbacks + (callbacks or []),
)
+ # Set the generation config if it exists.
+ # model.reset_generation_config() is called after batch prediction.
+ if generation_config is not None:
+ self.model.set_generation_config(generation_config)
+
logger.debug("Predicting")
with self.backend.create_predictor(self.model, batch_size=batch_size) as predictor:
predictions = predictor.batch_predict(
dataset,
)
+ # If there was a generation config set prior to batch prediction, reset it.
+ if generation_config is not None:
+ self.model.reset_generation_config()
+
if self.backend.is_coordinator():
# if we are skipping all saving,
# there is no need to create a directory that will remain empty
diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py
index 0ef2c9e3200..f3263564755 100644
--- a/ludwig/models/llm.py
+++ b/ludwig/models/llm.py
@@ -1,4 +1,5 @@
import contextlib
+import copy
import logging
import os
import tempfile
@@ -160,6 +161,10 @@ def __init__(
self.generation = GenerationConfig(**self.config_obj.generation.to_dict())
+ # Save the original generation config so that we can reset it if/when we change it when self.generation gets is
+ # dynamically mutated during 1-off predict calls after fine-tuning.
+ self.original_generation_config = copy.deepcopy(self.generation)
+
# ================ Inputs ================
try:
self.input_features.update(self.build_inputs(input_feature_configs=self.config_obj.input_features))
@@ -195,6 +200,14 @@ def __init__(
def create_feature_dict(self) -> LudwigFeatureDict:
return DictWrapper(LudwigFeatureDict())
+ def set_generation_config(self, generation_config_dict):
+ """Sets the generation config for the model."""
+ self.generation = GenerationConfig(**generation_config_dict)
+
+ def reset_generation_config(self):
+ """Sets the generation config for th."""
+ self.generation = self.original_generation_config
+
@property
def output_feature_decoder(self) -> OutputFeature:
return self._output_feature_decoder.module
@@ -375,7 +388,7 @@ def generate(
mask=None,
) -> Dict[str, torch.Tensor]:
"""Generates tokens using the model."""
-
+ logger.info(f"For generating text, using: {self.generation}")
input_ids, _ = self._unpack_inputs(inputs)
with torch.no_grad():
@@ -383,6 +396,10 @@ def generate(
sequences_list = []
for input_ids_sample in input_ids:
input_ids_sample_no_padding = remove_left_padding(input_ids_sample, self.tokenizer)
+ logger.info(
+ "Decoded text inputs for the first example in batch: "
+ f"{self.tokenizer.decode(input_ids_sample_no_padding[0])}"
+ )
if input_ids_sample_no_padding.shape[1] > self.max_input_length:
logger.warning(