From 4345c1908208ead6ab9bc4ba9e2b5ec3bde3105c Mon Sep 17 00:00:00 2001 From: Braxton Cuneo Date: Thu, 7 Sep 2023 13:13:59 -0700 Subject: [PATCH] Added --target option and logic for more complex scheduling --- mcdc/adapt.py | 16 ++- mcdc/loop.py | 304 +++++++++++++++++++++++++++++++------------------- mcdc/main.py | 5 +- 3 files changed, 205 insertions(+), 120 deletions(-) diff --git a/mcdc/adapt.py b/mcdc/adapt.py index 4a034a2a..7a2a2a77 100644 --- a/mcdc/adapt.py +++ b/mcdc/adapt.py @@ -208,6 +208,8 @@ def universal_arrays_inner (func): # ============================================================================= +SIMPLE_ASYNC = True + none_type = None mcdc_type = None state_spec = None @@ -216,7 +218,8 @@ def universal_arrays_inner (func): thread_gpu = None particle_gpu = None prep_gpu = None -step_gpu = None +step_async = None +find_cell_async = None @@ -225,7 +228,7 @@ def gpu_forward_declare(): global none_type, mcdc_type, state_spec global device_gpu, group_gpu, thread_gpu global particle_gpu, particle_record_gpu - global step_gpu + global step_async, find_cell_async none_type = numba.from_dtype(np.dtype([ ])) mcdc_type = numba.from_dtype(type_.global_) @@ -236,8 +239,10 @@ def gpu_forward_declare(): def step(prog: numba.uintp, P: particle_gpu): pass + def find_cell(prog: numba.uintp, P: particle_gpu): + pass - step_gpu, = adapt.harm.RuntimeSpec.async_dispatch(step) + step_async, find_cell_async = adapt.harm.RuntimeSpec.async_dispatch(step,find_cell) # ============================================================================= @@ -282,7 +287,10 @@ def add_active(particle,prog): @for_gpu() def add_active(particle,prog): P = kernel.recordlike_to_particle(particle) - step_gpu(prog,P) + if SIMPLE_ASYNC: + step_async(prog,P) + else: + find_cell_async(prog,P) @for_cpu() diff --git a/mcdc/loop.py b/mcdc/loop.py index b55fa1d5..aa06339a 100644 --- a/mcdc/loop.py +++ b/mcdc/loop.py @@ -99,6 +99,53 @@ def loop_main(mcdc): # Particle loop # ========================================================================= + + +@njit +def find_particle_cell(P,prog): + mcdc = adapt.device(prog) + + trans_struct = adapt.local_translate() + trans = trans_struct["values"] + P["cell_ID"] = kernel.get_particle_cell(P, 0, trans, mcdc) + + +@njit +def evaluate_collision(P,prog): + mcdc = adapt.device(prog) + + # Generate IC? + if mcdc["technique"]["IC_generator"] and mcdc["cycle_active"]: + kernel.bank_IC(P, prog) + + # Branchless collision? + if mcdc["technique"]["branchless_collision"]: + kernel.branchless_collision(P, mcdc) + + # Analog collision + else: + # Get collision type + kernel.collision(P, mcdc) + event = P["event"] + # Perform collision + if event & EVENT_CAPTURE: + kernel.capture(P, mcdc) + elif event == EVENT_SCATTERING: + kernel.scattering(P, prog) + elif event == EVENT_FISSION: + kernel.fission(P, prog) + + # Sensitivity quantification for nuclide? + material = mcdc["materials"][P["material_ID"]] + if material["sensitivity"] and ( + P["sensitivity_ID"] == 0 + or mcdc["technique"]["dsm_order"] == 2 + and P["sensitivity_ID"] <= mcdc["setting"]["N_sensitivity"] + ): + kernel.sensitivity_material(P, prog) + + + @njit def step_particle(P, prog): @@ -106,9 +153,8 @@ def step_particle(P, prog): # Find cell from root universe if unknown if P["cell_ID"] == -1: - trans_struct = adapt.local_translate() - trans = trans_struct["values"] - P["cell_ID"] = kernel.get_particle_cell(P, 0, trans, mcdc) + find_particle_cell(P,prog) + # Determine and move to event kernel.move_to_event(P, mcdc) @@ -120,40 +166,15 @@ def step_particle(P, prog): # Collision if event & EVENT_COLLISION: - # Generate IC? - if mcdc["technique"]["IC_generator"] and mcdc["cycle_active"]: - kernel.bank_IC(P, prog) - - # Branchless collision? - if mcdc["technique"]["branchless_collision"]: - kernel.branchless_collision(P, mcdc) + evaluate_collision(P,prog) - # Analog collision - else: - # Get collision type - kernel.collision(P, mcdc) - event = P["event"] - # Perform collision - if event & EVENT_CAPTURE: - kernel.capture(P, mcdc) - elif event == EVENT_SCATTERING: - kernel.scattering(P, prog) - elif event == EVENT_FISSION: - kernel.fission(P, prog) - - # Sensitivity quantification for nuclide? - material = mcdc["materials"][P["material_ID"]] - if material["sensitivity"] and ( - P["sensitivity_ID"] == 0 - or mcdc["technique"]["dsm_order"] == 2 - and P["sensitivity_ID"] <= mcdc["setting"]["N_sensitivity"] - ): - kernel.sensitivity_material(P, prog) # Mesh tally if event & EVENT_MESH: kernel.mesh_crossing(P, mcdc) + + # Different Methods for shifting the particle # Surface crossing if event & EVENT_SURFACE: @@ -174,6 +195,9 @@ def step_particle(P, prog): elif event & EVENT_LATTICE + EVENT_MESH: kernel.shift_particle(P, SHIFT) + + + # Time boundary if event & EVENT_TIME_BOUNDARY: kernel.time_boundary(P, mcdc) @@ -182,6 +206,7 @@ def step_particle(P, prog): elif mcdc["technique"]["weight_window"]: kernel.weight_window(P, prog) + # Apply weight roulette if mcdc["technique"]["weight_roulette"]: # check if weight has fallen below threshold @@ -881,13 +906,13 @@ def loop_source_precursor(seed, mcdc): def run_source_gpu_rt(mcdc): source_gpu_rt.store_state(mcdc) source_gpu_rt.init(4096) - source_gpu_rt.exec(2000000,288) + source_gpu_rt.exec(20000000,288) return source_gpu_rt.load_state() def run_precursor_gpu_rt(mcdc): precursor_gpu_rt.store_state(mcdc) precursor_gpu_rt.init(4096) - precursor_gpu_rt.exec(2000000,288) + precursor_gpu_rt.exec(20000000,288) return precursor_gpu_rt.load_state(mcdc) @@ -941,34 +966,155 @@ def initialize(prog: nb.uintp): pass def finalize(prog: nb.uintp): - pass + pass - def step(prog: nb.uintp, P: adapt.particle_gpu): + @njit + def hit_surface(P): + result = (P["event"] & EVENT_SURFACE) + result = result or (P["event"] & EVENT_SURFACE_MOVE) + result = result or (P["event"] & EVENT_CENSUS) + result = result or (P["event"] & EVENT_LATTICE + EVENT_MESH) + return result + + def find_cell(prog: nb.uintp, P: adapt.particle_gpu): mcdc = adapt.device(prog) if P["fresh"]: prep_particle(P,prog) - P["fresh"] = False - step_particle(P,prog) + P["fresh"] = False + # Find cell from root universe if unknown + #if P["cell_ID"] == -1: + find_particle_cell(P,prog) + + adapt.step_async(prog,P) + + def step(prog: nb.uintp, P: adapt.particle_gpu): + mcdc = adapt.device(prog) + # Determine and move to event + kernel.move_to_event(P, mcdc) + + if P["event"] & EVENT_COLLISION: + collision_async(prog,P) + elif P["event"] & EVENT_MESH: + mesh_async(prog,P) + elif hit_surface(P): + surface_async(prog,P) + else: + wrapup_async(prog,P) + + def collision(prog: nb.uintp, P: adapt.particle_gpu): + # Collision + evaluate_collision(P,prog) + if P["event"] & EVENT_MESH: + mesh_async(prog,P) + elif hit_surface(P): + surface_async(prog,P) + else: + wrapup_async(prog,P) + + def mesh(prog: nb.uintp, P: adapt.particle_gpu): + mcdc = adapt.device(prog) + + kernel.mesh_crossing(P, mcdc) + + if hit_surface(P): + surface_async(prog,P) + else: + wrapup_async(prog,P) + + def surface(prog: nb.uintp, P: adapt.particle_gpu): + mcdc = adapt.device(prog) + + # Different Methods for shifting the particle + # Surface crossing + if P["event"] & EVENT_SURFACE: + kernel.surface_crossing(P, prog) + + # Surface move + elif P["event"] & EVENT_SURFACE_MOVE: + P["t"] += SHIFT + P["cell_ID"] = -1 + + # Time census + elif P["event"] & EVENT_CENSUS: + P["t"] += SHIFT + adapt.add_census(kernel.copy_record(P), prog) + P["alive"] = False + + # Shift particle + elif P["event"] & EVENT_LATTICE + EVENT_MESH: + kernel.shift_particle(P, SHIFT) + + wrapup_async(prog,P) + + + def wrapup(prog: nb.uintp, P: adapt.particle_gpu): + mcdc = adapt.device(prog) + + # Time boundary + if P["event"] & EVENT_TIME_BOUNDARY: + kernel.time_boundary(P, mcdc) + + # Apply weight window + elif mcdc["technique"]["weight_window"]: + kernel.weight_window(P, prog) + + + # Apply weight roulette + if mcdc["technique"]["weight_roulette"]: + # check if weight has fallen below threshold + if abs(P["w"]) <= mcdc["technique"]["wr_threshold"]: + kernel.weight_roulette(P, mcdc) + + # Particle tracker + if mcdc["setting"]["track_particle"] and not P["alive"]: + kernel.track_particle(P, mcdc) + if P["alive"]: - adapt.step_gpu(prog,P) + if P["cell_ID"] == -1: + find_cell_async(prog,P) + else: + adapt.step_async(prog,P) - async_fns = [step] - base_fns = (initialize,finalize, work_maker) spec_name = "mcdc" if precursor: spec_name += "_precursor" else: spec_name += "_source" - loop_spec = adapt.harm.RuntimeSpec(spec_name,adapt.state_spec,base_fns,async_fns) + + spec = None + + if adapt.SIMPLE_ASYNC: + base_fns = (initialize,finalize, work_maker) + + def step(prog: nb.uintp, P: adapt.particle_gpu): + mcdc = adapt.device(prog) + if P["fresh"]: + prep_particle(P,prog) + P["fresh"] = False + step_particle(P,prog) + if P["alive"]: + adapt.step_async(prog,P) + + async_fns = [step] + spec = adapt.harm.RuntimeSpec(spec_name,adapt.state_spec,base_fns,async_fns) + + else: + base_fns = (initialize,finalize, work_maker) + + async_fns = [find_cell,step,collision,mesh,surface,wrapup] + find_cell_async, collision_async = adapt.harm.RuntimeSpec.async_dispatch(find_cell,collision) + mesh_async, surface_async, wrapup_async = adapt.harm.RuntimeSpec.async_dispatch(mesh,surface,wrapup) + spec = adapt.harm.RuntimeSpec(spec_name,adapt.state_spec,base_fns,async_fns) + global source_gpu_rt, precursor_gpu_rt if precursor: - precursor_gpu_rt = loop_spec.harmonize_instance() + precursor_gpu_rt = spec.harmonize_instance() else: - source_gpu_rt = loop_spec.harmonize_instance() + source_gpu_rt = spec.harmonize_instance() @@ -981,13 +1127,6 @@ def process_source_precursors(seed,mcdc): with objmode(mcdc_new = adapt.mcdc_type): mcdc_new = run_precursor_gpu_rt(mcdc) - #mcdc["nuclides"] = mcdc_new["nuclides"] - #mcdc["materials"] = mcdc_new["materials"] - #mcdc["surfaces"] = mcdc_new["surfaces"] - #mcdc["cells"] = mcdc_new["cells"] - #mcdc["universes"] = mcdc_new["universes"] - #mcdc["lattices"] = mcdc_new["lattices"] - #mcdc["sources"] = mcdc_new["sources"] mcdc["tally"] = mcdc_new["tally"] mcdc["setting"] = mcdc_new["setting"] mcdc["technique"] = mcdc_new["technique"] @@ -995,38 +1134,11 @@ def process_source_precursors(seed,mcdc): mcdc["bank_census"] = mcdc_new["bank_census"] mcdc["bank_source"] = mcdc_new["bank_source"] mcdc["bank_precursor"] = mcdc_new["bank_precursor"] - #mcdc["rng_seed_base"] = mcdc_new["rng_seed_base"] - #mcdc["rng_seed"] = mcdc_new["rng_seed"] - #mcdc["source_seed"] = mcdc_new["source_seed"] - #mcdc["source_precursor_seed"] = mcdc_new["source_precursor_seed"] - #mcdc["rng_stride"] = mcdc_new["rng_stride"] - #mcdc["k_eff"] = mcdc_new["k_eff"] - #mcdc["k_cycle"] = mcdc_new["k_cycle"] - #mcdc["k_avg"] = mcdc_new["k_avg"] - #mcdc["k_sdv"] = mcdc_new["k_sdv"] - #mcdc["n_avg"] = mcdc_new["n_avg"] - #mcdc["n_sdv"] = mcdc_new["n_sdv"] - #mcdc["n_max"] = mcdc_new["n_max"] - #mcdc["C_avg"] = mcdc_new["C_avg"] - #mcdc["C_sdv"] = mcdc_new["C_sdv"] - #mcdc["C_max"] = mcdc_new["C_max"] - #mcdc["k_avg_running"] = mcdc_new["k_avg_running"] - #mcdc["k_sdv_running"] = mcdc_new["k_sdv_running"] - #mcdc["gyration_radius"] = mcdc_new["gyration_radius"] mcdc["i_cycle"] = mcdc_new["i_cycle"] mcdc["cycle_active"] = mcdc_new["cycle_active"] mcdc["eigenvalue_tally_nuSigmaF"] = mcdc_new["eigenvalue_tally_nuSigmaF"] mcdc["eigenvalue_tally_n"] = mcdc_new["eigenvalue_tally_n"] mcdc["eigenvalue_tally_C"] = mcdc_new["eigenvalue_tally_C"] - #mcdc["mpi_size"] = mcdc_new["mpi_size"] - #mcdc["mpi_rank"] = mcdc_new["mpi_rank"] - #mcdc["mpi_master"] = mcdc_new["mpi_master"] - #mcdc["mpi_work_start"] = mcdc_new["mpi_work_start"] - #mcdc["mpi_work_size"] = mcdc_new["mpi_work_size"] - #mcdc["mpi_work_size_total"] = mcdc_new["mpi_work_size_total"] - #mcdc["mpi_work_start_precursor"] = mcdc_new["mpi_work_start_precursor"] - #mcdc["mpi_work_size_precursor"] = mcdc_new["mpi_work_size_precursor"] - #mcdc["mpi_work_size_total_precursor"] = mcdc_new["mpi_work_size_total_precursor"] mcdc["runtime_total"] = mcdc_new["runtime_total"] mcdc["runtime_preparation"] = mcdc_new["runtime_preparation"] mcdc["runtime_simulation"] = mcdc_new["runtime_simulation"] @@ -1036,8 +1148,6 @@ def process_source_precursors(seed,mcdc): mcdc["particle_track_N"] = mcdc_new["particle_track_N"] mcdc["particle_track_history_ID"] = mcdc_new["particle_track_history_ID"] mcdc["particle_track_particle_ID"] = mcdc_new["particle_track_particle_ID"] - #mcdc["precursor_strength"] = mcdc_new["precursor_strength"] - #mcdc["mpi_work_iter"] = mcdc_new["mpi_work_iter"] kernel.set_bank_size(mcdc["bank_active"],0) return process_source_precursors @@ -1050,13 +1160,6 @@ def process_sources(seed,mcdc): with objmode(mcdc_new = adapt.mcdc_type): mcdc_new = run_source_gpu_rt(mcdc) - #mcdc["nuclides"] = mcdc_new["nuclides"] - #mcdc["materials"] = mcdc_new["materials"] - #mcdc["surfaces"] = mcdc_new["surfaces"] - #mcdc["cells"] = mcdc_new["cells"] - #mcdc["universes"] = mcdc_new["universes"] - #mcdc["lattices"] = mcdc_new["lattices"] - #mcdc["sources"] = mcdc_new["sources"] mcdc["tally"] = mcdc_new["tally"] mcdc["setting"] = mcdc_new["setting"] mcdc["technique"] = mcdc_new["technique"] @@ -1064,38 +1167,11 @@ def process_sources(seed,mcdc): mcdc["bank_census"] = mcdc_new["bank_census"] mcdc["bank_source"] = mcdc_new["bank_source"] mcdc["bank_precursor"] = mcdc_new["bank_precursor"] - #mcdc["rng_seed_base"] = mcdc_new["rng_seed_base"] - #mcdc["rng_seed"] = mcdc_new["rng_seed"] - #mcdc["source_seed"] = mcdc_new["source_seed"] - #mcdc["source_precursor_seed"] = mcdc_new["source_precursor_seed"] - #mcdc["rng_stride"] = mcdc_new["rng_stride"] - #mcdc["k_eff"] = mcdc_new["k_eff"] - #mcdc["k_cycle"] = mcdc_new["k_cycle"] - #mcdc["k_avg"] = mcdc_new["k_avg"] - #mcdc["k_sdv"] = mcdc_new["k_sdv"] - #mcdc["n_avg"] = mcdc_new["n_avg"] - #mcdc["n_sdv"] = mcdc_new["n_sdv"] - #mcdc["n_max"] = mcdc_new["n_max"] - #mcdc["C_avg"] = mcdc_new["C_avg"] - #mcdc["C_sdv"] = mcdc_new["C_sdv"] - #mcdc["C_max"] = mcdc_new["C_max"] - #mcdc["k_avg_running"] = mcdc_new["k_avg_running"] - #mcdc["k_sdv_running"] = mcdc_new["k_sdv_running"] - #mcdc["gyration_radius"] = mcdc_new["gyration_radius"] mcdc["i_cycle"] = mcdc_new["i_cycle"] mcdc["cycle_active"] = mcdc_new["cycle_active"] mcdc["eigenvalue_tally_nuSigmaF"] = mcdc_new["eigenvalue_tally_nuSigmaF"] mcdc["eigenvalue_tally_n"] = mcdc_new["eigenvalue_tally_n"] mcdc["eigenvalue_tally_C"] = mcdc_new["eigenvalue_tally_C"] - #mcdc["mpi_size"] = mcdc_new["mpi_size"] - #mcdc["mpi_rank"] = mcdc_new["mpi_rank"] - #mcdc["mpi_master"] = mcdc_new["mpi_master"] - #mcdc["mpi_work_start"] = mcdc_new["mpi_work_start"] - #mcdc["mpi_work_size"] = mcdc_new["mpi_work_size"] - #mcdc["mpi_work_size_total"] = mcdc_new["mpi_work_size_total"] - #mcdc["mpi_work_start_precursor"] = mcdc_new["mpi_work_start_precursor"] - #mcdc["mpi_work_size_precursor"] = mcdc_new["mpi_work_size_precursor"] - #mcdc["mpi_work_size_total_precursor"] = mcdc_new["mpi_work_size_total_precursor"] mcdc["runtime_total"] = mcdc_new["runtime_total"] mcdc["runtime_preparation"] = mcdc_new["runtime_preparation"] mcdc["runtime_simulation"] = mcdc_new["runtime_simulation"] @@ -1105,8 +1181,6 @@ def process_sources(seed,mcdc): mcdc["particle_track_N"] = mcdc_new["particle_track_N"] mcdc["particle_track_history_ID"] = mcdc_new["particle_track_history_ID"] mcdc["particle_track_particle_ID"] = mcdc_new["particle_track_particle_ID"] - #mcdc["precursor_strength"] = mcdc_new["precursor_strength"] - #mcdc["mpi_work_iter"] = mcdc_new["mpi_work_iter"] kernel.set_bank_size(mcdc["bank_active"],0) return process_sources diff --git a/mcdc/main.py b/mcdc/main.py index 31623fb4..f68725cc 100644 --- a/mcdc/main.py +++ b/mcdc/main.py @@ -7,6 +7,9 @@ parser.add_argument( "--mode", type=str, help="Run mode", choices=["python", "numba"], default="python" ) +parser.add_argument( + "--target", type=str, help="Target", choices=["cpu", "gpu"], default="cpu" +) parser.add_argument("--N_particle", type=int, help="Number of particles") parser.add_argument("--output", type=str, help="Output file name") args, unargs = parser.parse_known_args() @@ -14,6 +17,7 @@ # Set mode # TODO: Will be inside run() once Python/Numba adapter is integrated mode = args.mode +target = args.target if mode == "python": nb.config.DISABLE_JIT = True elif mode == "numba": @@ -177,7 +181,6 @@ def prepare(): kernel.adapt_rng(nb.config.DISABLE_JIT) - target = "gpu" if target == "gpu": adapt.gpu_forward_declare()