From a46a4e15b4646053a8bea40c25362683cd1dec5d Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Tue, 12 Mar 2024 16:31:12 -0400 Subject: [PATCH 01/12] Fixed error message --- kronfluence/computer/covariance_computer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kronfluence/computer/covariance_computer.py b/kronfluence/computer/covariance_computer.py index dd3dacc..b508f97 100644 --- a/kronfluence/computer/covariance_computer.py +++ b/kronfluence/computer/covariance_computer.py @@ -170,7 +170,7 @@ def fit_covariance_matrices( if no_partition: if total_data_examples < self.state.num_processes: - error_msg = "There are more data examples than the number of processes." + error_msg = "The number of processes are more than the data examples." self.logger.error(error_msg) raise ValueError(error_msg) if per_device_batch_size is None: From 48d75590ba72aacf6e852598d40e41a5607a966c Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Wed, 13 Mar 2024 23:41:37 -0400 Subject: [PATCH 02/12] Fixed error message --- kronfluence/computer/covariance_computer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kronfluence/computer/covariance_computer.py b/kronfluence/computer/covariance_computer.py index b508f97..bf0199b 100644 --- a/kronfluence/computer/covariance_computer.py +++ b/kronfluence/computer/covariance_computer.py @@ -237,7 +237,7 @@ def fit_covariance_matrices( max_total_examples = total_data_examples // factor_args.covariance_data_partition_size if max_total_examples < self.state.num_processes: - error_msg = "There are more data examples than the number of processes." + error_msg = "The number of processes are more than the data examples." self.logger.error(error_msg) raise ValueError(error_msg) if per_device_batch_size is None: From 65a3b802fc57068850a2173e4beb1d5b2b02d4bb Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Thu, 14 Mar 2024 13:03:26 -0400 Subject: [PATCH 03/12] Fixed error message --- kronfluence/computer/eigen_computer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kronfluence/computer/eigen_computer.py b/kronfluence/computer/eigen_computer.py index a9b7baf..840cdaf 100644 --- a/kronfluence/computer/eigen_computer.py +++ b/kronfluence/computer/eigen_computer.py @@ -288,7 +288,7 @@ def fit_lambda_matrices( if no_partition: if total_data_examples < self.state.num_processes: - error_msg = "There are more data examples than the number of processes." + error_msg = "The number of processes are more than the data examples." self.logger.error(error_msg) raise ValueError(error_msg) if per_device_batch_size is None: @@ -354,7 +354,7 @@ def fit_lambda_matrices( max_total_examples = total_data_examples // factor_args.lambda_data_partition_size if max_total_examples < self.state.num_processes: - error_msg = "There are more data examples than the number of processes." + error_msg = "The number of processes are more than the data examples." self.logger.error(error_msg) raise ValueError(error_msg) if per_device_batch_size is None: From 37cb00699c4313e9046360261445ac5cbb3d5de9 Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Thu, 14 Mar 2024 16:59:15 -0400 Subject: [PATCH 04/12] minor change in var name --- kronfluence/factor/eigen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kronfluence/factor/eigen.py b/kronfluence/factor/eigen.py index e9684a5..ab13b07 100644 --- a/kronfluence/factor/eigen.py +++ b/kronfluence/factor/eigen.py @@ -293,7 +293,7 @@ def fit_lambda_matrices_with_loader( with torch.no_grad(): saved_factors: FACTOR_TYPE = {} - for covariance_factor_name in LAMBDA_FACTOR_NAMES: - saved_factors[covariance_factor_name] = load_factors(model=model, factor_name=covariance_factor_name) + for lambda_factor_name in LAMBDA_FACTOR_NAMES: + saved_factors[lambda_factor_name] = load_factors(model=model, factor_name=lambda_factor_name) set_mode(model=model, mode=ModuleMode.DEFAULT, keep_factors=False) return num_data_processed, saved_factors From ee951ac6cad5262f109fc9e84f7bd7781f23b4a8 Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Fri, 15 Mar 2024 11:51:26 -0400 Subject: [PATCH 05/12] Removed assertion and made module_type mandatory --- kronfluence/module/tracked_module.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kronfluence/module/tracked_module.py b/kronfluence/module/tracked_module.py index 31f1680..e8f68cb 100644 --- a/kronfluence/module/tracked_module.py +++ b/kronfluence/module/tracked_module.py @@ -56,10 +56,9 @@ class TrackedModule(nn.Module): SUPPORTED_MODULES: Dict[Type[nn.Module], Any] = {} - def __init_subclass__(cls, module_type: Optional[Type[nn.Module]] = None, **kwargs) -> None: + def __init_subclass__(cls, module_type: Type[nn.Module] = None, **kwargs) -> None: """Automatically registers subclasses as supported modules.""" super().__init_subclass__(**kwargs) - assert module_type is not None if module_type is not None: cls.SUPPORTED_MODULES[module_type] = cls From 189dcef51b4d59ed72a13eb77a2dc050ac19661c Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Fri, 15 Mar 2024 11:55:10 -0400 Subject: [PATCH 06/12] Added consistency in registering hooks --- kronfluence/module/tracked_module.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kronfluence/module/tracked_module.py b/kronfluence/module/tracked_module.py index e8f68cb..607484a 100644 --- a/kronfluence/module/tracked_module.py +++ b/kronfluence/module/tracked_module.py @@ -316,7 +316,7 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) def _release_covariance_matrices(self) -> None: """Clears the stored activation and pseudo-gradient covariance matrices from memory.""" @@ -491,7 +491,7 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) def _release_lambda_matrix(self) -> None: """Clears the stored Lambda matrix from memory.""" @@ -607,7 +607,7 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) def _release_preconditioned_gradient(self) -> None: """Clears the preconditioned per-sample-gradient from memory.""" @@ -727,7 +727,7 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) def _register_self_score_hooks(self) -> None: """Installs forward and backward hooks for computation of self-influence scores.""" @@ -785,7 +785,7 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) def release_scores(self) -> None: """Clears the influence scores from memory.""" From 5c91e46cfe6d7635f01fed0f22b9e88ec8f85779 Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Fri, 15 Mar 2024 11:58:03 -0400 Subject: [PATCH 07/12] Fixed docstring --- kronfluence/module/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kronfluence/module/linear.py b/kronfluence/module/linear.py index 6158e36..285c899 100644 --- a/kronfluence/module/linear.py +++ b/kronfluence/module/linear.py @@ -71,7 +71,7 @@ def _compute_per_sample_gradient( Returns: torch.Tensor: The per-sample-gradient tensor. The per-sample-gradient is a 3-dimensional matrix - with dimension `batch_size x input_dim x gradient_dim`. An additional dimension is added + with dimension `batch_size x gradient_dim x input_dim`. An additional dimension is added when the bias term is used. """ if self.original_module.bias is not None: From 78c2803fcbee19d7b22404ad5f4a274257adc546 Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Fri, 15 Mar 2024 12:08:17 -0400 Subject: [PATCH 08/12] Wrapped with try-except --- kronfluence/computer/computer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kronfluence/computer/computer.py b/kronfluence/computer/computer.py index a1723b5..e405d78 100644 --- a/kronfluence/computer/computer.py +++ b/kronfluence/computer/computer.py @@ -39,7 +39,7 @@ find_executable_batch_size, make_indices_partition, ) -from kronfluence.utils.exceptions import FactorsNotFoundError, UnsupportableModuleError +from kronfluence.utils.exceptions import FactorsNotFoundError, UnsupportableModuleError, TrackedModuleNotFoundError from kronfluence.utils.logger import PassThroughProfiler, Profiler, get_logger, get_time from kronfluence.utils.save import ( FACTOR_ARGUMENTS_NAME, @@ -80,8 +80,9 @@ def __init__( self.model.eval() self.task = task - tracked_module_names = get_tracked_module_names(self.model) - if len(tracked_module_names) == 0: + try: + tracked_module_names = get_tracked_module_names(self.model) + except TrackedModuleNotFoundError: error_msg = ( f"No tracked modules found in the provided model: {self.model}. " f"Please make sure to run `prepare_model` before passing it in to the " From 3b997fb25eda24d54243b7a598ea83beeb1addbc Mon Sep 17 00:00:00 2001 From: Omkar Dige Date: Fri, 15 Mar 2024 12:14:42 -0400 Subject: [PATCH 09/12] Minor fix --- kronfluence/computer/covariance_computer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kronfluence/computer/covariance_computer.py b/kronfluence/computer/covariance_computer.py index bf0199b..8dfaa1d 100644 --- a/kronfluence/computer/covariance_computer.py +++ b/kronfluence/computer/covariance_computer.py @@ -298,10 +298,10 @@ def aggregate_covariance_matrices( data_partition_size = factor_args.covariance_data_partition_size module_partition_size = factor_args.covariance_module_partition_size all_required_partitions = [(i, j) for i in range(data_partition_size) for j in range(module_partition_size)] - all_partition_exists = [ + all_partition_exists = all([ covariance_matrices_exist(output_dir=factors_output_dir, partition=partition) for partition in all_required_partitions - ] + ]) if not all_partition_exists: self.logger.info( "Covariance matrices are not aggregated as covariance matrices for some partitions " From 70b70a661d1aa5ae317a9430154454cc67276c26 Mon Sep 17 00:00:00 2001 From: xeon27 Date: Fri, 15 Mar 2024 13:47:22 -0400 Subject: [PATCH 10/12] Minor fix --- kronfluence/computer/computer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kronfluence/computer/computer.py b/kronfluence/computer/computer.py index e405d78..d1e8b20 100644 --- a/kronfluence/computer/computer.py +++ b/kronfluence/computer/computer.py @@ -82,14 +82,14 @@ def __init__( try: tracked_module_names = get_tracked_module_names(self.model) - except TrackedModuleNotFoundError: + except TrackedModuleNotFoundError as e: error_msg = ( f"No tracked modules found in the provided model: {self.model}. " f"Please make sure to run `prepare_model` before passing it in to the " f"Analyzer." ) self.logger.error(error_msg) - raise UnsupportableModuleError(error_msg) + raise UnsupportableModuleError(error_msg) from e self.logger.info(f"Tracking modules with names: {tracked_module_names}.") if self.state.use_distributed and not isinstance(model, (DDP, FSDP)): From ab153ad580e5e409c956b5a1ad9ba5b98f1f49a7 Mon Sep 17 00:00:00 2001 From: xeon27 Date: Sat, 16 Mar 2024 00:06:16 -0400 Subject: [PATCH 11/12] Minor fix (pylint) --- kronfluence/computer/covariance_computer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kronfluence/computer/covariance_computer.py b/kronfluence/computer/covariance_computer.py index 8dfaa1d..8eeda6b 100644 --- a/kronfluence/computer/covariance_computer.py +++ b/kronfluence/computer/covariance_computer.py @@ -298,10 +298,10 @@ def aggregate_covariance_matrices( data_partition_size = factor_args.covariance_data_partition_size module_partition_size = factor_args.covariance_module_partition_size all_required_partitions = [(i, j) for i in range(data_partition_size) for j in range(module_partition_size)] - all_partition_exists = all([ + all_partition_exists = all( covariance_matrices_exist(output_dir=factors_output_dir, partition=partition) for partition in all_required_partitions - ]) + ) if not all_partition_exists: self.logger.info( "Covariance matrices are not aggregated as covariance matrices for some partitions " From 37373188c308af11e940517163724c97b637d8e5 Mon Sep 17 00:00:00 2001 From: xeon27 Date: Sat, 16 Mar 2024 00:12:37 -0400 Subject: [PATCH 12/12] Minor fix (pylint) --- kronfluence/module/tracked_module.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kronfluence/module/tracked_module.py b/kronfluence/module/tracked_module.py index 607484a..30523ef 100644 --- a/kronfluence/module/tracked_module.py +++ b/kronfluence/module/tracked_module.py @@ -316,7 +316,9 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append( + self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook) + ) def _release_covariance_matrices(self) -> None: """Clears the stored activation and pseudo-gradient covariance matrices from memory.""" @@ -491,7 +493,9 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append( + self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook) + ) def _release_lambda_matrix(self) -> None: """Clears the stored Lambda matrix from memory.""" @@ -607,7 +611,9 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append( + self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook) + ) def _release_preconditioned_gradient(self) -> None: """Clears the preconditioned per-sample-gradient from memory.""" @@ -727,7 +733,9 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append( + self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook) + ) def _register_self_score_hooks(self) -> None: """Installs forward and backward hooks for computation of self-influence scores.""" @@ -785,7 +793,9 @@ def backward_hook(output_gradient: torch.Tensor) -> None: self._registered_hooks.append(self.original_module.register_forward_hook(forward_hook)) if self.factor_args.immediate_gradient_removal: - self._registered_hooks.append(self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook)) + self._registered_hooks.append( + self.original_module.register_full_backward_hook(full_backward_gradient_removal_hook) + ) def release_scores(self) -> None: """Clears the influence scores from memory."""