From 6b9b7366a4cf824423322baefa0dfcf481bba777 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 05:36:49 -0400 Subject: [PATCH 1/8] Make batch size more difficult --- tests/gpu_tests/cpu_test.py | 20 +++++++++----------- tests/gpu_tests/prepare_tests.py | 15 +++++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tests/gpu_tests/cpu_test.py b/tests/gpu_tests/cpu_test.py index 7cdaf74..faa77d9 100644 --- a/tests/gpu_tests/cpu_test.py +++ b/tests/gpu_tests/cpu_test.py @@ -16,6 +16,7 @@ construct_mnist_mlp, get_mnist_dataset, ) +from tests.gpu_tests.prepare_tests import TRAIN_INDICES, QUERY_INDICES from tests.utils import check_tensor_dict_equivalence logging.basicConfig(level=logging.DEBUG) @@ -33,9 +34,7 @@ def setUpClass(cls) -> None: cls.model = cls.model.double() cls.train_dataset = get_mnist_dataset(split="train", data_path="data") - cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(200))) cls.eval_dataset = get_mnist_dataset(split="valid", data_path="data") - cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(100))) cls.task = ClassificationTask() cls.model = prepare_model(cls.model, cls.task) @@ -54,7 +53,7 @@ def test_covariance_matrices(self) -> None: factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, ) new_covariance_factors = self.analyzer.load_covariance_matrices(factors_name=NEW_FACTOR_NAME) @@ -83,7 +82,7 @@ def test_lambda_matrices(self): factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, load_from_factors_name=OLD_FACTOR_NAME, ) @@ -114,10 +113,10 @@ def test_pairwise_scores(self) -> None: factors_name=OLD_FACTOR_NAME, query_dataset=self.eval_dataset, train_dataset=self.train_dataset, - train_indices=list(range(42)), - query_indices=list(range(23)), - per_device_query_batch_size=2, - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + query_indices=list(range(QUERY_INDICES)), + per_device_query_batch_size=12, + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) @@ -145,15 +144,14 @@ def test_self_scores(self) -> None: scores_name=NEW_SCORE_NAME, factors_name=OLD_FACTOR_NAME, train_dataset=self.train_dataset, - train_indices=list(range(42)), - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) new_self_scores = self.analyzer.load_self_scores(scores_name=NEW_SCORE_NAME) self_scores = self.analyzer.load_self_scores(scores_name=OLD_SCORE_NAME) - torch.set_printoptions(threshold=30_000) print(f"Previous score: {self_scores[ALL_MODULE_NAME]}") print(f"Previous shape: {self_scores[ALL_MODULE_NAME].shape}") print(f"New score: {new_self_scores[ALL_MODULE_NAME]}") diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index be52ff8..01fc75b 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -12,6 +12,11 @@ ) +# Pick difficult cases where the dataset is not perfectly divisible by batch size. +TRAIN_INDICES = 59_999 +QUERY_INDICES = 50 + + def train() -> None: assert torch.cuda.is_available() device = torch.device("cuda") @@ -76,9 +81,7 @@ def run_analysis() -> None: model.load_state_dict(torch.load("model.pth")) train_dataset = get_mnist_dataset(split="train", data_path="data") - train_dataset = Subset(train_dataset, indices=list(range(200))) eval_dataset = get_mnist_dataset(split="valid", data_path="data") - eval_dataset = Subset(eval_dataset, indices=list(range(100))) task = ClassificationTask() model = model.double() @@ -100,7 +103,7 @@ def run_analysis() -> None: factors_name="single_gpu", dataset=train_dataset, factor_args=factor_args, - per_device_batch_size=32, + per_device_batch_size=512, overwrite_output_dir=True, ) @@ -114,8 +117,8 @@ def run_analysis() -> None: factors_name="single_gpu", query_dataset=eval_dataset, train_dataset=train_dataset, - train_indices=list(range(59_999)), - query_indices=list(range(50)), + train_indices=list(range(TRAIN_INDICES)), + query_indices=list(range(QUERY_INDICES)), per_device_query_batch_size=12, per_device_train_batch_size=512, score_args=score_args, @@ -125,7 +128,7 @@ def run_analysis() -> None: scores_name="single_gpu", factors_name="single_gpu", train_dataset=train_dataset, - train_indices=list(range(59_999)), + train_indices=list(range(TRAIN_INDICES)), per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, From add12fe0e5b87aa870ad7fe0bf332884795d1415 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 05:41:03 -0400 Subject: [PATCH 2/8] Try iterative lambda aggregate --- tests/gpu_tests/prepare_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index 01fc75b..d13753f 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -98,6 +98,7 @@ def run_analysis() -> None: activation_covariance_dtype=torch.float64, gradient_covariance_dtype=torch.float64, lambda_dtype=torch.float64, + lambda_iterative_aggregate=True, ) analyzer.fit_all_factors( factors_name="single_gpu", From ef1c2f735031c5ab239ce978bf5752356ae0d132 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 05:43:00 -0400 Subject: [PATCH 3/8] Add disable model save --- tests/gpu_tests/prepare_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index d13753f..81b12e7 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -91,6 +91,7 @@ def run_analysis() -> None: analysis_name="gpu_test", model=model, task=task, + disable_model_save=True, ) factor_args = FactorArguments( From 3655ea70cee10df8cf7f13810b5a0c52e2d1bf6a Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 05:46:42 -0400 Subject: [PATCH 4/8] Reduce BS lambda for test --- examples/uci/train.py | 35 +++++++++++++++++++++++++++++--- tests/gpu_tests/prepare_tests.py | 4 ++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/examples/uci/train.py b/examples/uci/train.py index 301226b..4d372bb 100644 --- a/examples/uci/train.py +++ b/examples/uci/train.py @@ -1,11 +1,11 @@ import argparse import logging import os - +from torch.utils import data import torch import torch.nn.functional as F +from torch import nn from accelerate.utils import set_seed -from torch.utils.data import DataLoader from tqdm import tqdm from examples.uci.pipeline import construct_regression_mlp, get_regression_dataset @@ -82,6 +82,35 @@ def parse_args(): return args +def train(dataset: data.Dataset, batch_size: int, num_train_epochs: int, learning_rate: float, weight_decay: float) -> nn.Module: + train_dataloader = data.DataLoader( + dataset=dataset, + batch_size=batch_size, + shuffle=True, + drop_last=True, + ) + + model = construct_regression_mlp() + optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + + model.train() + for epoch in range(num_train_epochs): + total_loss = 0 + with tqdm(train_dataloader, unit="batch") as tepoch: + for batch in tepoch: + tepoch.set_description(f"Epoch {epoch}") + inputs, targets = batch + outputs = model(inputs) + loss = F.mse_loss(outputs, targets) + total_loss += loss.detach().float() + loss.backward() + optimizer.step() + optimizer.zero_grad() + tepoch.set_postfix(loss=total_loss.item() / len(train_dataloader)) + return model + + + def main(): args = parse_args() @@ -92,7 +121,7 @@ def main(): set_seed(args.seed) train_dataset = get_regression_dataset(data_name=args.dataset_name, split="train", data_path=args.dataset_dir) - train_dataloader = DataLoader( + train_dataloader = data.DataLoader( dataset=train_dataset, batch_size=args.train_batch_size, shuffle=True, diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index 81b12e7..c6687c2 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -91,7 +91,6 @@ def run_analysis() -> None: analysis_name="gpu_test", model=model, task=task, - disable_model_save=True, ) factor_args = FactorArguments( @@ -99,7 +98,8 @@ def run_analysis() -> None: activation_covariance_dtype=torch.float64, gradient_covariance_dtype=torch.float64, lambda_dtype=torch.float64, - lambda_iterative_aggregate=True, + lambda_iterative_aggregate=False, + lambda_max_examples=1_000 ) analyzer.fit_all_factors( factors_name="single_gpu", From 7c59820349169c24a192e01935b38c5a01256cce Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 06:02:43 -0400 Subject: [PATCH 5/8] Reduce query indices --- examples/uci/README.md | 3 +- examples/uci/analyze.py | 87 +++----------------------- examples/uci/pipeline.py | 4 +- examples/uci/train.py | 101 +++++++++++++------------------ kronfluence/analyzer.py | 13 +++- tests/gpu_tests/cpu_test.py | 2 +- tests/gpu_tests/prepare_tests.py | 11 ++-- 7 files changed, 71 insertions(+), 150 deletions(-) diff --git a/examples/uci/README.md b/examples/uci/README.md index d6fb6b3..8dfaaae 100644 --- a/examples/uci/README.md +++ b/examples/uci/README.md @@ -4,7 +4,7 @@ This directory contains scripts designed for training a regression model and con ## Training -To initiate the training of a regression model using the Concrete dataset, execute the following command: +To train a regression model on the Concrete dataset, run the following command: ```bash python train.py --dataset_name concrete \ --dataset_dir ./data \ @@ -16,7 +16,6 @@ python train.py --dataset_name concrete \ --num_train_epochs 20 \ --seed 1004 ``` -Alternatively, you can download the model checkpoint. # Influence Analysis diff --git a/examples/uci/analyze.py b/examples/uci/analyze.py index 130c921..13082ea 100644 --- a/examples/uci/analyze.py +++ b/examples/uci/analyze.py @@ -2,13 +2,12 @@ import logging import math import os -from typing import Dict, Tuple +from typing import Tuple import torch import torch.nn.functional as F from analyzer import Analyzer, prepare_model from arguments import FactorArguments, ScoreArguments -from module.utils import wrap_tracked_modules from task import Task from torch import nn from torch.profiler import ProfilerActivity, profile, record_function @@ -96,14 +95,12 @@ def compute_measurement( def main(): args = parse_args() - logging.basicConfig(level=logging.INFO) - train_dataset = get_regression_dataset(data_name=args.dataset_name, split="train", data_path=args.dataset_dir) - eval_dataset = get_regression_dataset(data_name=args.dataset_name, split="valid", data_path=args.dataset_dir) + train_dataset = get_regression_dataset(data_name=args.dataset_name, split="train", dataset_dir=args.dataset_dir) + eval_dataset = get_regression_dataset(data_name=args.dataset_name, split="valid", dataset_dir=args.dataset_dir) model = construct_regression_mlp() - checkpoint_path = os.path.join(args.checkpoint_dir, "model.pth") if not os.path.isfile(checkpoint_path): raise ValueError(f"No checkpoint found at {checkpoint_path}.") @@ -120,91 +117,25 @@ def main(): ) factor_args = FactorArguments( strategy=args.factor_strategy, - covariance_data_partition_size=5, - covariance_module_partition_size=4, ) - # with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - # with record_function("covariance"): - # analyzer.fit_covariance_matrices( - # factors_name=args.factor_strategy, - # dataset=train_dataset, - # factor_args=factor_args, - # per_device_batch_size=args.batch_size, - # overwrite_output_dir=True, - # ) - # - # print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10)) - # cov_factors = analyzer.fit_covariance_matrices( - # factors_name=args.factor_strategy, - # dataset=train_dataset, - # factor_args=factor_args, - # per_device_batch_size=args.batch_size, - # overwrite_output_dir=True, - # ) - # print(cov_factors) - - with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - with record_function("eigen"): - res = analyzer.perform_eigendecomposition( - factors_name=args.factor_strategy, - factor_args=factor_args, - overwrite_output_dir=True, - ) - # print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10)) - # print(res) - res = analyzer.fit_lambda_matrices( + analyzer.fit_all_factors( factors_name=args.factor_strategy, dataset=train_dataset, - # factor_args=factor_args, per_device_batch_size=None, + factor_args=factor_args, overwrite_output_dir=True, ) - # print(res) - # - score_args = ScoreArguments(data_partition_size=2, module_partition_size=2) - analyzer.compute_pairwise_scores( - scores_name="hello", + + scores = analyzer.compute_pairwise_scores( + scores_name="pairwise", factors_name=args.factor_strategy, query_dataset=eval_dataset, train_dataset=train_dataset, per_device_query_batch_size=16, per_device_train_batch_size=8, - score_args=score_args, overwrite_output_dir=True, ) - # scores = analyzer.load_pairwise_scores(scores_name="hello") - # print(scores) - # - # analyzer.compute_self_scores( - # scores_name="hello", - # factors_name=args.factor_strategy, - # # query_dataset=eval_dataset, - # train_dataset=train_dataset, - # # per_device_query_batch_size=16, - # per_device_train_batch_size=8, - # overwrite_output_dir=True, - # ) - # # scores = analyzer.load_self_scores(scores_name="hello") - # # print(scores) - - # analyzer.fit_all_factors( - # factor_name=args.factor_strategy, - # dataset=train_dataset, - # factor_args=factor_args, - # per_device_batch_size=None, - # overwrite_output_dir=True, - # ) - # - # score_name = "full_pairwise" - # analyzer.compute_pairwise_scores( - # score_name=score_name, - # query_dataset=eval_dataset, - # per_device_query_batch_size=len(eval_dataset), - # train_dataset=train_dataset, - # per_device_train_batch_size=len(train_dataset), - # ) - # scores = analyzer.load_pairwise_scores(score_name=score_name) - # print(scores.shape) + logging.info(f"Scores: {scores}") if __name__ == "__main__": diff --git a/examples/uci/pipeline.py b/examples/uci/pipeline.py index a4b0b3e..bbb3eaf 100644 --- a/examples/uci/pipeline.py +++ b/examples/uci/pipeline.py @@ -38,12 +38,12 @@ def get_regression_dataset( data_name: str, split: str, indices: List[int] = None, - data_path: str = "data/", + dataset_dir: str = "data/", ) -> Dataset: assert split in ["train", "eval_train", "valid"] # Load the dataset from the `.data` file. - data = np.loadtxt(os.path.join(data_path, data_name + ".data"), delimiter=None) + data = np.loadtxt(os.path.join(dataset_dir, data_name + ".data"), delimiter=None) data = data.astype(np.float32) # Shuffle the dataset. diff --git a/examples/uci/train.py b/examples/uci/train.py index 4d372bb..2ae0fdb 100644 --- a/examples/uci/train.py +++ b/examples/uci/train.py @@ -1,11 +1,12 @@ import argparse import logging import os -from torch.utils import data + import torch import torch.nn.functional as F -from torch import nn from accelerate.utils import set_seed +from torch import nn +from torch.utils import data from tqdm import tqdm from examples.uci.pipeline import construct_regression_mlp, get_regression_dataset @@ -82,7 +83,13 @@ def parse_args(): return args -def train(dataset: data.Dataset, batch_size: int, num_train_epochs: int, learning_rate: float, weight_decay: float) -> nn.Module: +def train( + dataset: data.Dataset, + batch_size: int, + num_train_epochs: int, + learning_rate: float, + weight_decay: float, +) -> nn.Module: train_dataloader = data.DataLoader( dataset=dataset, batch_size=batch_size, @@ -110,6 +117,25 @@ def train(dataset: data.Dataset, batch_size: int, num_train_epochs: int, learnin return model +def evaluate(model: nn.Module, dataset: data.Dataset, batch_size: int) -> float: + dataloader = data.DataLoader( + dataset=dataset, + batch_size=batch_size, + shuffle=False, + drop_last=False, + ) + + model.eval() + total_loss = 0 + for batch in dataloader: + with torch.no_grad(): + inputs, targets = batch + outputs = model(inputs) + loss = F.mse_loss(outputs, targets, reduction="sum") + total_loss += loss.detach().float() + + return total_loss.item() / len(dataloader.dataset) + def main(): args = parse_args() @@ -120,68 +146,25 @@ def main(): if args.seed is not None: set_seed(args.seed) - train_dataset = get_regression_dataset(data_name=args.dataset_name, split="train", data_path=args.dataset_dir) - train_dataloader = data.DataLoader( + train_dataset = get_regression_dataset(data_name=args.dataset_name, split="train", dataset_dir=args.dataset_dir) + + model = train( dataset=train_dataset, batch_size=args.train_batch_size, - shuffle=True, - drop_last=True, + num_train_epochs=args.num_train_epochs, + learning_rate=args.learning_rate, + weight_decay=args.weight_decay, ) - model = construct_regression_mlp() - optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) - logger.info("Start training the model.") - model.train() - for epoch in range(args.num_train_epochs): - total_loss = 0 - with tqdm(train_dataloader, unit="batch") as tepoch: - for batch in tepoch: - tepoch.set_description(f"Epoch {epoch}") - inputs, targets = batch - outputs = model(inputs) - loss = F.mse_loss(outputs, targets) - total_loss += loss.detach().float() - loss.backward() - optimizer.step() - optimizer.zero_grad() - tepoch.set_postfix(loss=total_loss.item() / len(train_dataloader)) - - logger.info("Start evaluating the model.") - model.eval() - train_eval_dataset = get_regression_dataset( - data_name=args.dataset_name, split="eval_train", data_path=args.dataset_dir + eval_train_dataset = get_regression_dataset( + data_name=args.dataset_name, split="eval_train", dataset_dir=args.dataset_dir ) - train_eval_dataloader = DataLoader( - dataset=train_eval_dataset, - batch_size=args.eval_batch_size, - shuffle=False, - drop_last=False, - ) - eval_dataset = get_regression_dataset(data_name=args.dataset_name, split="valid", data_path=args.dataset_dir) - eval_dataloader = DataLoader( - dataset=eval_dataset, - batch_size=args.eval_batch_size, - shuffle=False, - drop_last=False, - ) - - total_loss = 0 - for batch in train_eval_dataloader: - with torch.no_grad(): - inputs, targets = batch - outputs = model(inputs) - loss = F.mse_loss(outputs, targets, reduction="sum") - total_loss += loss.detach().float() - logger.info(f"Train loss {total_loss.item() / len(train_eval_dataloader.dataset)}") + train_loss = evaluate(model=model, dataset=eval_train_dataset, batch_size=args.eval_batch_size) + logger.info(f"Train loss: {train_loss}") - total_loss = 0 - for batch in eval_dataloader: - with torch.no_grad(): - inputs, targets = batch - outputs = model(inputs) - loss = F.mse_loss(outputs, targets, reduction="sum") - total_loss += loss.detach().float() - logger.info(f"Evaluation loss {total_loss.item() / len(eval_dataloader.dataset)}") + eval_dataset = get_regression_dataset(data_name=args.dataset_name, split="valid", dataset_dir=args.dataset_dir) + eval_loss = evaluate(model=model, dataset=eval_dataset, batch_size=args.eval_batch_size) + logger.info(f"Evaluation loss: {eval_loss}") if args.checkpoint_dir is not None: torch.save(model.state_dict(), os.path.join(args.checkpoint_dir, "model.pth")) diff --git a/kronfluence/analyzer.py b/kronfluence/analyzer.py index 0dc2c0a..61faeca 100644 --- a/kronfluence/analyzer.py +++ b/kronfluence/analyzer.py @@ -1,6 +1,9 @@ from typing import Optional from accelerate.utils import extract_model_from_parallel +from kronfluence.module.constants import FACTOR_TYPE + +from factor.config import FactorConfig from safetensors.torch import save_file from torch import nn from torch.utils import data @@ -119,7 +122,7 @@ def fit_all_factors( dataloader_kwargs: Optional[DataLoaderKwargs] = None, factor_args: Optional[FactorArguments] = None, overwrite_output_dir: bool = False, - ) -> None: + ) -> Optional[FACTOR_TYPE]: """Computes all necessary factors for the given factor strategy. As an example, EK-FAC requires (1) computing covariance matrices, (2) performing Eigendecomposition, and (3) computing Lambda (corrected-eigenvalues) matrices. @@ -161,3 +164,11 @@ def fit_all_factors( factor_args=factor_args, overwrite_output_dir=overwrite_output_dir, ) + + if factor_args is None: + factor_args = FactorArguments() + strategy = factor_args.strategy + factor_config = FactorConfig.CONFIGS[strategy] + return self._load_all_required_factors( + factors_name=factors_name, strategy=strategy, factor_config=factor_config + ) diff --git a/tests/gpu_tests/cpu_test.py b/tests/gpu_tests/cpu_test.py index faa77d9..639a99f 100644 --- a/tests/gpu_tests/cpu_test.py +++ b/tests/gpu_tests/cpu_test.py @@ -16,7 +16,7 @@ construct_mnist_mlp, get_mnist_dataset, ) -from tests.gpu_tests.prepare_tests import TRAIN_INDICES, QUERY_INDICES +from tests.gpu_tests.prepare_tests import QUERY_INDICES, TRAIN_INDICES from tests.utils import check_tensor_dict_equivalence logging.basicConfig(level=logging.DEBUG) diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index c6687c2..d46c1f4 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -11,10 +11,9 @@ get_mnist_dataset, ) - # Pick difficult cases where the dataset is not perfectly divisible by batch size. -TRAIN_INDICES = 59_999 -QUERY_INDICES = 50 +TRAIN_INDICES = 5_003 +QUERY_INDICES = 51 def train() -> None: @@ -82,6 +81,8 @@ def run_analysis() -> None: train_dataset = get_mnist_dataset(split="train", data_path="data") eval_dataset = get_mnist_dataset(split="valid", data_path="data") + train_dataset = Subset(train_dataset, indices=list(range(TRAIN_INDICES))) + eval_dataset = Subset(eval_dataset, indices=list(range(QUERY_INDICES))) task = ClassificationTask() model = model.double() @@ -99,7 +100,6 @@ def run_analysis() -> None: gradient_covariance_dtype=torch.float64, lambda_dtype=torch.float64, lambda_iterative_aggregate=False, - lambda_max_examples=1_000 ) analyzer.fit_all_factors( factors_name="single_gpu", @@ -119,8 +119,6 @@ def run_analysis() -> None: factors_name="single_gpu", query_dataset=eval_dataset, train_dataset=train_dataset, - train_indices=list(range(TRAIN_INDICES)), - query_indices=list(range(QUERY_INDICES)), per_device_query_batch_size=12, per_device_train_batch_size=512, score_args=score_args, @@ -130,7 +128,6 @@ def run_analysis() -> None: scores_name="single_gpu", factors_name="single_gpu", train_dataset=train_dataset, - train_indices=list(range(TRAIN_INDICES)), per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, From 34fef576432945c55e52a8ceb4396e307f455af8 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 06:13:51 -0400 Subject: [PATCH 6/8] Synchronize batch size for cpu test --- examples/uci/tutorial.ipynb | 37 ++++++++++++++++++++++++++++++++ tests/gpu_tests/cpu_test.py | 3 ++- tests/gpu_tests/prepare_tests.py | 1 - 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 examples/uci/tutorial.ipynb diff --git a/examples/uci/tutorial.ipynb b/examples/uci/tutorial.ipynb new file mode 100644 index 0000000..54f657b --- /dev/null +++ b/examples/uci/tutorial.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/gpu_tests/cpu_test.py b/tests/gpu_tests/cpu_test.py index 639a99f..2cb4952 100644 --- a/tests/gpu_tests/cpu_test.py +++ b/tests/gpu_tests/cpu_test.py @@ -34,7 +34,9 @@ def setUpClass(cls) -> None: cls.model = cls.model.double() cls.train_dataset = get_mnist_dataset(split="train", data_path="data") + cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(TRAIN_INDICES))) cls.eval_dataset = get_mnist_dataset(split="valid", data_path="data") + cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(QUERY_INDICES))) cls.task = ClassificationTask() cls.model = prepare_model(cls.model, cls.task) @@ -122,7 +124,6 @@ def test_pairwise_scores(self) -> None: ) new_pairwise_scores = self.analyzer.load_pairwise_scores(scores_name=NEW_SCORE_NAME) - torch.set_printoptions(threshold=30_000) print(f"Previous score: {pairwise_scores[ALL_MODULE_NAME][10]}") print(f"Previous shape: {pairwise_scores[ALL_MODULE_NAME].shape}") print(f"New score: {new_pairwise_scores[ALL_MODULE_NAME][10]}") diff --git a/tests/gpu_tests/prepare_tests.py b/tests/gpu_tests/prepare_tests.py index d46c1f4..f55c211 100644 --- a/tests/gpu_tests/prepare_tests.py +++ b/tests/gpu_tests/prepare_tests.py @@ -99,7 +99,6 @@ def run_analysis() -> None: activation_covariance_dtype=torch.float64, gradient_covariance_dtype=torch.float64, lambda_dtype=torch.float64, - lambda_iterative_aggregate=False, ) analyzer.fit_all_factors( factors_name="single_gpu", From f7bd0a76a140f60aad405bfab6a9afb916fadad2 Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 13:43:58 -0400 Subject: [PATCH 7/8] Modify DDP GPU test setting --- examples/_test_requirements.txt | 3 +- examples/uci/analyze.py | 37 ++++--------- examples/uci/tutorial.ipynb | 95 +++++++++++++++++++++++++++++++-- kronfluence/analyzer.py | 3 +- kronfluence/score/pairwise.py | 2 +- kronfluence/score/self.py | 2 +- tests/gpu_tests/compile_test.py | 27 +++++----- tests/gpu_tests/cpu_test.py | 10 ++-- tests/gpu_tests/ddp_test.py | 33 ++++++------ 9 files changed, 144 insertions(+), 68 deletions(-) diff --git a/examples/_test_requirements.txt b/examples/_test_requirements.txt index ff88936..4a67121 100644 --- a/examples/_test_requirements.txt +++ b/examples/_test_requirements.txt @@ -1 +1,2 @@ -scikit-learn \ No newline at end of file +scikit-learn +jupyter \ No newline at end of file diff --git a/examples/uci/analyze.py b/examples/uci/analyze.py index 13082ea..5fb548b 100644 --- a/examples/uci/analyze.py +++ b/examples/uci/analyze.py @@ -6,13 +6,12 @@ import torch import torch.nn.functional as F -from analyzer import Analyzer, prepare_model -from arguments import FactorArguments, ScoreArguments -from task import Task from torch import nn -from torch.profiler import ProfilerActivity, profile, record_function from examples.uci.pipeline import construct_regression_mlp, get_regression_dataset +from kronfluence.analyzer import Analyzer, prepare_model +from kronfluence.arguments import FactorArguments +from kronfluence.task import Task BATCH_DTYPE = Tuple[torch.Tensor, torch.Tensor] @@ -32,31 +31,18 @@ def parse_args(): default="./data", help="A folder containing the UCI regression dataset.", ) - - parser.add_argument( - "--factor_strategy", - type=str, - default="ekfac", - help="Strategy to compute preconditioning factors.", - ) - parser.add_argument( - "--batch_size", - type=int, - default=256, - help="Batch size for compute factors and scores.", - ) parser.add_argument( - "--analysis_name", + "--checkpoint_dir", type=str, - default="uci", - help="Name of the influence analysis.", + default="./checkpoints", + help="A path to store the final checkpoint.", ) parser.add_argument( - "--checkpoint_dir", + "--factor_strategy", type=str, - default="./checkpoints", - help="A path to store the final checkpoint.", + default="ekfac", + help="Strategy to compute preconditioning factors.", ) args = parser.parse_args() @@ -110,7 +96,7 @@ def main(): model = prepare_model(model, task) analyzer = Analyzer( - analysis_name=args.analysis_name, + analysis_name=args.dataset_name, model=model, task=task, cpu=True, @@ -131,8 +117,7 @@ def main(): factors_name=args.factor_strategy, query_dataset=eval_dataset, train_dataset=train_dataset, - per_device_query_batch_size=16, - per_device_train_batch_size=8, + per_device_query_batch_size=len(eval_dataset), overwrite_output_dir=True, ) logging.info(f"Scores: {scores}") diff --git a/examples/uci/tutorial.ipynb b/examples/uci/tutorial.ipynb index 54f657b..9c4f049 100644 --- a/examples/uci/tutorial.ipynb +++ b/examples/uci/tutorial.ipynb @@ -2,15 +2,104 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "initial_id", "metadata": { - "collapsed": true + "collapsed": true, + "ExecuteTime": { + "end_time": "2024-03-12T10:46:20.005159Z", + "start_time": "2024-03-12T10:46:19.995640Z" + } }, "outputs": [], "source": [ - "" + "import kronfluence" ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "from examples.uci.train import train, evaluate" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-12T10:47:03.741170Z", + "start_time": "2024-03-12T10:47:02.235222Z" + } + }, + "id": "4e56f0f1d6e34e62" + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "from kronfluence.analyzer import Analyzer, prepare_model\n", + "from kronfluence.arguments import FactorArguments\n", + "from kronfluence.task import Task" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-12T10:55:57.054379Z", + "start_time": "2024-03-12T10:55:57.034172Z" + } + }, + "id": "6dc3ab20b6cb4050" + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "from examples.uci.pipeline import construct_regression_mlp, get_regression_dataset" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-12T11:40:46.724609Z", + "start_time": "2024-03-12T11:40:46.722860Z" + } + }, + "id": "f3ed29a0d098c6dd" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "dataset_name = \"concrete\"\n", + "dataset_dir = \"./data\"\n", + "train_batch_size = 32\n", + "num_train_epochs = 40\n" + ], + "metadata": { + "collapsed": false + }, + "id": "cd2af4deeea3afd7" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "train_dataset = get_regression_dataset(data_name=dataset_name, split=\"train\", dataset_dir=dataset_dir)\n", + "\n", + "model = train(\n", + " dataset=train_dataset,\n", + " batch_size=args.train_batch_size,\n", + " num_train_epochs=args.num_train_epochs,\n", + " learning_rate=args.learning_rate,\n", + " weight_decay=args.weight_decay,\n", + ")" + ], + "metadata": { + "collapsed": false + }, + "id": "c75658f17d06a7ab" } ], "metadata": { diff --git a/kronfluence/analyzer.py b/kronfluence/analyzer.py index 61faeca..8235713 100644 --- a/kronfluence/analyzer.py +++ b/kronfluence/analyzer.py @@ -1,8 +1,6 @@ from typing import Optional from accelerate.utils import extract_model_from_parallel -from kronfluence.module.constants import FACTOR_TYPE - from factor.config import FactorConfig from safetensors.torch import save_file from torch import nn @@ -13,6 +11,7 @@ from kronfluence.computer.eigen_computer import EigenComputer from kronfluence.computer.pairwise_score_computer import PairwiseScoreComputer from kronfluence.computer.self_score_computer import SelfScoreComputer +from kronfluence.module.constants import FACTOR_TYPE from kronfluence.module.utils import wrap_tracked_modules from kronfluence.task import Task from kronfluence.utils.dataset import DataLoaderKwargs diff --git a/kronfluence/score/pairwise.py b/kronfluence/score/pairwise.py index e1abd55..5b30e47 100644 --- a/kronfluence/score/pairwise.py +++ b/kronfluence/score/pairwise.py @@ -206,7 +206,7 @@ def compute_pairwise_scores_with_loaders( """Computes pairwise influence scores for a given model and task. Args: - loaded_factors (FACTOR_TYPE, optional): + loaded_factors (FACTOR_TYPE): The factor results to load from, before computing the pairwise scores. model (nn.Module): The model that pairwise influence scores will be computed. diff --git a/kronfluence/score/self.py b/kronfluence/score/self.py index 062069b..1c19741 100644 --- a/kronfluence/score/self.py +++ b/kronfluence/score/self.py @@ -93,7 +93,7 @@ def compute_self_scores_with_loaders( """Computes self-influence scores for a given model and task. Args: - loaded_factors (FACTOR_TYPE, optional): + loaded_factors (FACTOR_TYPE): The factor results to load from, before computing the self-influence scores. model (nn.Module): The model that self-influence scores will be computed. diff --git a/tests/gpu_tests/compile_test.py b/tests/gpu_tests/compile_test.py index 32cf1df..a361dd6 100644 --- a/tests/gpu_tests/compile_test.py +++ b/tests/gpu_tests/compile_test.py @@ -2,20 +2,21 @@ import unittest import torch -from analyzer import Analyzer, prepare_model -from arguments import FactorArguments, ScoreArguments -from module.constants import ( +from torch.utils import data + +from kronfluence.analyzer import Analyzer, prepare_model +from kronfluence.arguments import FactorArguments, ScoreArguments +from kronfluence.module.constants import ( ALL_MODULE_NAME, COVARIANCE_FACTOR_NAMES, LAMBDA_FACTOR_NAMES, ) -from torch.utils import data - from tests.gpu_tests.pipeline import ( ClassificationTask, construct_mnist_mlp, get_mnist_dataset, ) +from tests.gpu_tests.prepare_tests import QUERY_INDICES, TRAIN_INDICES from tests.utils import check_tensor_dict_equivalence logging.basicConfig(level=logging.DEBUG) @@ -33,9 +34,9 @@ def setUpClass(cls) -> None: cls.model = cls.model.double() cls.train_dataset = get_mnist_dataset(split="train", data_path="data") - cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(200))) + cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(TRAIN_INDICES))) cls.eval_dataset = get_mnist_dataset(split="valid", data_path="data") - cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(100))) + cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(QUERY_INDICES))) cls.task = ClassificationTask() cls.model = prepare_model(cls.model, cls.task) @@ -60,7 +61,7 @@ def test_covariance_matrices(self) -> None: factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, ) new_covariance_factors = self.analyzer.load_covariance_matrices(factors_name=NEW_FACTOR_NAME) @@ -83,7 +84,7 @@ def test_lambda_matrices(self): factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, load_from_factors_name=OLD_FACTOR_NAME, ) @@ -114,10 +115,10 @@ def test_pairwise_scores(self) -> None: factors_name=OLD_FACTOR_NAME, query_dataset=self.eval_dataset, train_dataset=self.train_dataset, - train_indices=list(range(42)), - query_indices=list(range(23)), - per_device_query_batch_size=2, - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + query_indices=list(range(QUERY_INDICES)), + per_device_query_batch_size=12, + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) diff --git a/tests/gpu_tests/cpu_test.py b/tests/gpu_tests/cpu_test.py index 2cb4952..85de834 100644 --- a/tests/gpu_tests/cpu_test.py +++ b/tests/gpu_tests/cpu_test.py @@ -2,15 +2,15 @@ import unittest import torch -from analyzer import Analyzer, prepare_model -from arguments import FactorArguments, ScoreArguments -from module.constants import ( +from torch.utils import data + +from kronfluence.analyzer import Analyzer, prepare_model +from kronfluence.arguments import FactorArguments, ScoreArguments +from kronfluence.module.constants import ( ALL_MODULE_NAME, COVARIANCE_FACTOR_NAMES, LAMBDA_FACTOR_NAMES, ) -from torch.utils import data - from tests.gpu_tests.pipeline import ( ClassificationTask, construct_mnist_mlp, diff --git a/tests/gpu_tests/ddp_test.py b/tests/gpu_tests/ddp_test.py index 2e50cec..b500c67 100644 --- a/tests/gpu_tests/ddp_test.py +++ b/tests/gpu_tests/ddp_test.py @@ -4,21 +4,22 @@ import torch import torch.distributed as dist -from analyzer import Analyzer, prepare_model -from arguments import FactorArguments, ScoreArguments -from module.constants import ( +from torch.nn.parallel import DistributedDataParallel +from torch.utils import data + +from kronfluence.analyzer import Analyzer, prepare_model +from kronfluence.arguments import FactorArguments, ScoreArguments +from kronfluence.module.constants import ( ALL_MODULE_NAME, COVARIANCE_FACTOR_NAMES, LAMBDA_FACTOR_NAMES, ) -from torch.nn.parallel import DistributedDataParallel -from torch.utils import data - from tests.gpu_tests.pipeline import ( ClassificationTask, construct_mnist_mlp, get_mnist_dataset, ) +from tests.gpu_tests.prepare_tests import QUERY_INDICES, TRAIN_INDICES from tests.utils import check_tensor_dict_equivalence LOCAL_RANK = int(os.environ["LOCAL_RANK"]) @@ -39,9 +40,9 @@ def setUpClass(cls) -> None: cls.model = cls.model.double() cls.train_dataset = get_mnist_dataset(split="train", data_path="data") - cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(200))) + cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(TRAIN_INDICES))) cls.eval_dataset = get_mnist_dataset(split="valid", data_path="data") - cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(100))) + cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(QUERY_INDICES))) cls.task = ClassificationTask() cls.model = prepare_model(cls.model, cls.task) @@ -70,7 +71,7 @@ def test_covariance_matrices(self) -> None: factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, ) new_covariance_factors = self.analyzer.load_covariance_matrices(factors_name=NEW_FACTOR_NAME) @@ -101,7 +102,7 @@ def test_lambda_matrices(self): factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, load_from_factors_name=OLD_FACTOR_NAME, ) @@ -134,10 +135,10 @@ def test_pairwise_scores(self) -> None: factors_name=OLD_FACTOR_NAME, query_dataset=self.eval_dataset, train_dataset=self.train_dataset, - train_indices=list(range(42)), - query_indices=list(range(23)), - per_device_query_batch_size=2, - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + query_indices=list(range(QUERY_INDICES)), + per_device_query_batch_size=12, + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) @@ -167,8 +168,8 @@ def test_self_scores(self) -> None: scores_name=NEW_SCORE_NAME, factors_name=OLD_FACTOR_NAME, train_dataset=self.train_dataset, - train_indices=list(range(42)), - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) From 9b09521f704ec04617d9ea7a29250e35ab1c0d0c Mon Sep 17 00:00:00 2001 From: Juhan Bae Date: Tue, 12 Mar 2024 13:49:09 -0400 Subject: [PATCH 8/8] Add torch.compile tests --- tests/gpu_tests/compile_test.py | 4 ++-- tests/gpu_tests/fsdp_test.py | 37 +++++++++++++++++---------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tests/gpu_tests/compile_test.py b/tests/gpu_tests/compile_test.py index a361dd6..21007f3 100644 --- a/tests/gpu_tests/compile_test.py +++ b/tests/gpu_tests/compile_test.py @@ -146,8 +146,8 @@ def test_self_scores(self) -> None: scores_name=NEW_SCORE_NAME, factors_name=OLD_FACTOR_NAME, train_dataset=self.train_dataset, - train_indices=list(range(42)), - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) diff --git a/tests/gpu_tests/fsdp_test.py b/tests/gpu_tests/fsdp_test.py index 831d3cf..24966bc 100644 --- a/tests/gpu_tests/fsdp_test.py +++ b/tests/gpu_tests/fsdp_test.py @@ -5,23 +5,24 @@ import torch import torch.distributed as dist -from analyzer import Analyzer, prepare_model -from arguments import FactorArguments, ScoreArguments -from module.constants import ( - ALL_MODULE_NAME, - COVARIANCE_FACTOR_NAMES, - LAMBDA_FACTOR_NAMES, -) from torch.distributed.fsdp import FullyShardedDataParallel as FSDP -from torch.distributed.fsdp.wrap import enable_wrap, size_based_auto_wrap_policy, wrap +from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy from torch.nn.parallel import DistributedDataParallel from torch.utils import data +from kronfluence.analyzer import Analyzer, prepare_model +from kronfluence.arguments import FactorArguments, ScoreArguments +from kronfluence.module.constants import ( + ALL_MODULE_NAME, + COVARIANCE_FACTOR_NAMES, + LAMBDA_FACTOR_NAMES, +) from tests.gpu_tests.pipeline import ( ClassificationTask, construct_mnist_mlp, get_mnist_dataset, ) +from tests.gpu_tests.prepare_tests import QUERY_INDICES, TRAIN_INDICES from tests.utils import check_tensor_dict_equivalence LOCAL_RANK = int(os.environ["LOCAL_RANK"]) @@ -42,9 +43,9 @@ def setUpClass(cls) -> None: cls.model = cls.model.double() cls.train_dataset = get_mnist_dataset(split="train", data_path="data") - cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(200))) + cls.train_dataset = data.Subset(cls.train_dataset, indices=list(range(TRAIN_INDICES))) cls.eval_dataset = get_mnist_dataset(split="valid", data_path="data") - cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(100))) + cls.eval_dataset = data.Subset(cls.eval_dataset, indices=list(range(QUERY_INDICES))) cls.task = ClassificationTask() cls.model = prepare_model(cls.model, cls.task) @@ -76,7 +77,7 @@ def test_covariance_matrices(self) -> None: factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, ) new_covariance_factors = self.analyzer.load_covariance_matrices(factors_name=NEW_FACTOR_NAME) @@ -107,7 +108,7 @@ def test_lambda_matrices(self): factors_name=NEW_FACTOR_NAME, dataset=self.train_dataset, factor_args=factor_args, - per_device_batch_size=16, + per_device_batch_size=512, overwrite_output_dir=True, load_from_factors_name=OLD_FACTOR_NAME, ) @@ -140,10 +141,10 @@ def test_pairwise_scores(self) -> None: factors_name=OLD_FACTOR_NAME, query_dataset=self.eval_dataset, train_dataset=self.train_dataset, - train_indices=list(range(42)), - query_indices=list(range(23)), - per_device_query_batch_size=2, - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + query_indices=list(range(QUERY_INDICES)), + per_device_query_batch_size=12, + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, ) @@ -173,8 +174,8 @@ def test_self_scores(self) -> None: scores_name=NEW_SCORE_NAME, factors_name=OLD_FACTOR_NAME, train_dataset=self.train_dataset, - train_indices=list(range(42)), - per_device_train_batch_size=4, + train_indices=list(range(TRAIN_INDICES)), + per_device_train_batch_size=512, score_args=score_args, overwrite_output_dir=True, )