diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py
index 828a7d469fd..51887a7d6bf 100644
--- a/ludwig/data/preprocessing.py
+++ b/ludwig/data/preprocessing.py
@@ -1224,7 +1224,7 @@ def build_dataset(
         else:
             logger.warning(
                 f"Specified split column {global_preprocessing_parameters['split']['column']} for fixed "
-                f"split strategy was not found in dataset."
+                f"split strategy was not found in dataset."  # noqa: E713
             )
 
     # update input features with prompt configs during preprocessing (as opposed to during the model forward pass)
@@ -1457,7 +1457,7 @@ def cast_columns(dataset_cols, features, backend) -> None:
             )
         except KeyError as e:
             raise KeyError(
-                f"Feature name {e} specified in the config was not found in dataset with columns: "
+                f"Feature name {e} specified in the config was not found in dataset with columns: "  # noqa: E713
                 + f"{list(dataset_cols.keys())}"
             )
 
diff --git a/ludwig/encoders/image/base.py b/ludwig/encoders/image/base.py
index 7b7688631b1..a69649cdcaa 100644
--- a/ludwig/encoders/image/base.py
+++ b/ludwig/encoders/image/base.py
@@ -402,6 +402,12 @@ def __init__(
             )
             transformer = ViTModel(config)
 
+        if output_attentions:
+            config_dict: dict = transformer.config.to_dict()
+            updated_config: ViTConfig = ViTConfig(**config_dict)
+            updated_config._attn_implementation = "eager"
+            transformer = ViTModel(updated_config)
+
         self.transformer = FreezeModule(transformer, frozen=not trainable)
 
         self._output_shape = (transformer.config.hidden_size,)
diff --git a/ludwig/models/base.py b/ludwig/models/base.py
index e54f4bc1f8c..c173af824b6 100644
--- a/ludwig/models/base.py
+++ b/ludwig/models/base.py
@@ -316,7 +316,7 @@ def collect_weights(self, tensor_names=None, **kwargs):
         weight_names = {name for name, _ in self.named_parameters()}
         for name in tensor_names:
             if name not in weight_names:
-                raise ValueError(f'Requested tensor name filter "{name}" not present in the model graph')
+                raise ValueError(f'Requested tensor name filter "{name}" not present in the model graph')  # noqa: E713
 
         # Apply filter.
         tensor_set = set(tensor_names)
diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py
index 1a1abc99a41..862ee55ff4a 100644
--- a/ludwig/models/llm.py
+++ b/ludwig/models/llm.py
@@ -563,6 +563,8 @@ def save(self, save_path):
         # avoid this hack
         if self.config_obj.trainer.type != "none":
             weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME)
+            # We initialize the model's generation configuration; otherwise, we get a validation error.
+            self.model.generation_config = self.generation
             self.model.save_pretrained(weights_save_path)
         else:
             logger.info("Skipped saving LLM without weight adjustments.")
diff --git a/ludwig/utils/automl/utils.py b/ludwig/utils/automl/utils.py
index 24d4bac3b8a..30c57511538 100644
--- a/ludwig/utils/automl/utils.py
+++ b/ludwig/utils/automl/utils.py
@@ -31,7 +31,7 @@
 def avg_num_tokens_decoder(x):
     if x is None:
         return None
-    if type(x) == bytes:
+    if type(x) is bytes:
         return x.decode("utf-8")
     return str(x)
 
diff --git a/ludwig/utils/image_utils.py b/ludwig/utils/image_utils.py
index 3470a9a9a6c..6395a2cbe17 100644
--- a/ludwig/utils/image_utils.py
+++ b/ludwig/utils/image_utils.py
@@ -446,11 +446,11 @@ def to_np_tuple(prop: Union[int, Iterable]) -> np.ndarray:
     height_stride = 2 and width_stride = 3. stride=2 gets converted into
     np.array([2, 2]).
     """
-    if type(prop) == int:
+    if type(prop) is int:
         return np.ones(2).astype(int) * prop
     elif isinstance(prop, Iterable) and len(prop) == 2:
         return np.array(list(prop)).astype(int)
-    elif type(prop) == np.ndarray and prop.size == 2:
+    elif type(prop) is np.ndarray and prop.size == 2:
         return prop.astype(int)
     else:
         raise TypeError(f"prop must be int or iterable of length 2, but is {prop}.")
diff --git a/ludwig/utils/server_utils.py b/ludwig/utils/server_utils.py
index b1db792bbcb..36991dc0488 100644
--- a/ludwig/utils/server_utils.py
+++ b/ludwig/utils/server_utils.py
@@ -134,7 +134,7 @@ def deserialize_request(form) -> tuple:
     files = []
     file_index = {}
     for k, v in form.multi_items():
-        if type(v) == UploadFile:
+        if type(v) is UploadFile:
             file_index[v.filename] = _write_file(v, files)
 
     # reconstruct the dataframe
diff --git a/ludwig/utils/visualization_utils.py b/ludwig/utils/visualization_utils.py
index a00a04e9f04..54e37ef6da3 100644
--- a/ludwig/utils/visualization_utils.py
+++ b/ludwig/utils/visualization_utils.py
@@ -1432,7 +1432,7 @@ def hyperopt_report(hyperparameters, hyperopt_results_df, metric, filename_templ
         else:
             # TODO: more research needed on how to handle RayTune "sample_from" search space
             raise ValueError(
-                f"{hp_params[SPACE]} search space not supported in Ludwig.  "
+                f"{hp_params[SPACE]} search space not supported in Ludwig.  "  # noqa: E713
                 f"Supported values are {RAY_TUNE_FLOAT_SPACES | RAY_TUNE_INT_SPACES | RAY_TUNE_CATEGORY_SPACES}."
             )
 
diff --git a/requirements_viz.txt b/requirements_viz.txt
index 0a819a1c5e4..a33a1d546f3 100644
--- a/requirements_viz.txt
+++ b/requirements_viz.txt
@@ -1,4 +1,4 @@
-matplotlib>=3.4; python_version > '3.6'
+matplotlib>3.4,<3.9.0; python_version > '3.6'
 matplotlib>=3.0,<3.4; python_version <= '3.6'
 seaborn>=0.7,<0.12
 hiplot
diff --git a/setup.cfg b/setup.cfg
index 421f3a791ee..c9095299645 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -19,3 +19,19 @@ ignore =
     W503
     # Ignore "whitespace before ':'"
     E203
+    # Ignore "missing whitespace after ':'"
+    E231
+    # Ignore "multiple spaces after ':'"
+    E241
+    # Ignore "multiple spaces before operator"
+    E221
+    # Ignore "whitespace around operator"
+    E225
+    # Ignore "whitespace around arithmetic operator"
+    E226
+    # Ignore "multiple spaces after ':'"
+    E241
+    # Ignore "multiple spaces after keyword"
+    E271
+    # Ignore "missing whitespace after keyword"
+    E275