diff --git a/abacusnbody/hod/prepare_sim.py b/abacusnbody/hod/prepare_sim.py index 3ae7fe02..51171338 100644 --- a/abacusnbody/hod/prepare_sim.py +++ b/abacusnbody/hod/prepare_sim.py @@ -1094,9 +1094,14 @@ def main( else: shearmark = None # N_dim = config['HOD_params']['Ndim'] - nthread = int( - np.floor(multiprocessing.cpu_count() / config['prepare_sim']['Nparallel_load']) - ) + nthread = config['prepare_sim'].get('Nthread_per_load', 'auto') + if nthread == 'auto': + nthread = ( + len(os.sched_getaffinity(0)) // config['prepare_sim']['Nparallel_load'] + ) + print(f'prepare_sim inferred Nthread_per_load = {nthread}') + else: + nthread = int(nthread) p = multiprocessing.Pool(config['prepare_sim']['Nparallel_load']) p.starmap( diff --git a/scripts/hod/config/abacus_hod.yaml b/scripts/hod/config/abacus_hod.yaml index 68eb9442..d94f1027 100644 --- a/scripts/hod/config/abacus_hod.yaml +++ b/scripts/hod/config/abacus_hod.yaml @@ -13,7 +13,8 @@ sim_params: cleaned_halos: True # load cleaned halos? prepare_sim: - Nparallel_load: 5 # number of thread for organizing simulation outputs (prepare_sim) + Nparallel_load: 5 # number of processes. peak memory usage will increase by this factor. + Nthread_per_load: 'auto' # number of threads per process (auto uses the affinity mask) # HOD parameters HOD_params: diff --git a/scripts/hod/config/lc_hod.yaml b/scripts/hod/config/lc_hod.yaml index d20487ba..7c88dc49 100644 --- a/scripts/hod/config/lc_hod.yaml +++ b/scripts/hod/config/lc_hod.yaml @@ -14,6 +14,7 @@ sim_params: prepare_sim: Nparallel_load: 1 # not sure if this makes a difference since we have a single slab + Nthread_per_load: 'auto' # HOD parameters HOD_params: