From 846cc691c6e115ebfba005de164797a4fdb56499 Mon Sep 17 00:00:00 2001 From: Clayton Thorrez Date: Sun, 22 Sep 2024 20:29:17 -0700 Subject: [PATCH] add tqdm and num-cpu --- fastchat/serve/monitor/elo_analysis.py | 11 +++++++++-- fastchat/serve/monitor/rating_systems.py | 21 +++++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/fastchat/serve/monitor/elo_analysis.py b/fastchat/serve/monitor/elo_analysis.py index aeb4ec372e..6a16c92184 100644 --- a/fastchat/serve/monitor/elo_analysis.py +++ b/fastchat/serve/monitor/elo_analysis.py @@ -332,6 +332,7 @@ def report_elo_analysis_results( scale=1, filter_func=lambda x: True, style_control=False, + num_cpu=None, ): battles = pd.DataFrame(battles_json) @@ -378,10 +379,14 @@ def report_elo_analysis_results( ) elo_rating_final, coef_final = compute_style_control(battles) else: - bootstrap_df = compute_bootstrap_bt(battles, num_round=num_bootstrap) + bootstrap_df = compute_bootstrap_bt( + battles, num_round=num_bootstrap, num_cpu=num_cpu + ) elo_rating_final = compute_bt(battles) elif rating_system == "elo": - bootstrap_df = compute_bootstrap_elo(battles, num_round=num_bootstrap) + bootstrap_df = compute_bootstrap_elo( + battles, num_round=num_bootstrap, num_cpu=num_cpu + ) elo_rating_median = get_median_elo_from_bootstrap(bootstrap_df) elo_rating_final = elo_rating_median @@ -485,6 +490,7 @@ def pretty_print_elo_rating(rating): parser.add_argument("--category", nargs="+", default=["full"]) parser.add_argument("--scale", type=float, default=1) parser.add_argument("--style-control", action="store_true") + parser.add_argument("--num-cpu", type=int, default=12) args = parser.parse_args() np.random.seed(42) @@ -523,6 +529,7 @@ def pretty_print_elo_rating(rating): scale=args.scale, filter_func=filter_func, style_control=args.style_control, + num_cpu=args.num_cpu, ) for cat in args.category: diff --git a/fastchat/serve/monitor/rating_systems.py b/fastchat/serve/monitor/rating_systems.py index 8711fba2ea..6dda5b5e62 100644 --- a/fastchat/serve/monitor/rating_systems.py +++ b/fastchat/serve/monitor/rating_systems.py @@ -6,6 +6,7 @@ from scipy.special import expit from scipy.optimize import minimize import pandas as pd +from tqdm import tqdm STYLE_CONTROL_ELEMENTS_V1 = [ @@ -217,7 +218,13 @@ def compute_bt(df, base=10.0, scale=400.0, init_rating=1000, tol=1e-6): def compute_bootstrap_bt( - battles, num_round, base=10.0, scale=400.0, init_rating=1000.0, tol=1e-6 + battles, + num_round, + base=10.0, + scale=400.0, + init_rating=1000.0, + tol=1e-6, + num_cpu=None, ): matchups, outcomes, models, weights = preprocess_for_bt(battles) # bootstrap sample the unique outcomes and their counts directly using the multinomial distribution @@ -232,8 +239,8 @@ def compute_bootstrap_bt( bt_fn = partial( fit_bt, matchups, outcomes, n_models=len(models), alpha=np.log(base), tol=tol ) - with mp.Pool(os.cpu_count()) as pool: - results = pool.map(bt_fn, boot_weights) + with mp.Pool(num_cpu if num_cpu else os.cpu_count()) as pool: + results = list(tqdm(pool.imap_unordered(bt_fn, boot_weights), total=num_round)) ratings = np.array(results) scaled_ratings = scale_and_offset(ratings, models, scale, init_rating) @@ -346,6 +353,7 @@ def compute_bootstrap_style_control( init_rating=1000.0, scale=400.0, tol=1e-6, + num_cpu=None, ): matchups, features, outcomes, models = preprocess_for_style(df) @@ -364,9 +372,10 @@ def compute_bootstrap_style_control( low=0, high=matchups.shape[0], size=(num_round, matchups.shape[0]) ) - # this one is still memory and cpu intensive so don't make too many processes - with mp.Pool(4) as pool: - results = pool.map(contextual_bt_fn, boot_idxs) + with mp.Pool(num_cpu if num_cpu else os.cpu_count()) as pool: + results = list( + tqdm(pool.imap_unordered(contextual_bt_fn, boot_idxs), total=num_round) + ) ratings_params = np.array(results) ratings = ratings_params[:, : len(models)]