From 3984a6218c4e00c79a11a18fe16514134ce22730 Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Thu, 15 Aug 2024 20:40:28 +0700
Subject: [PATCH 1/6] working Python mode

---
 mcdc/adapt.py            | 63 ----------------------------------------
 mcdc/code_factory.py     | 58 ++++++++++++++++++++++++++++++++++++
 mcdc/global_.py          |  2 ++
 mcdc/iqmc/iqmc_kernel.py | 25 ++++++++--------
 mcdc/iqmc/iqmc_loop.py   | 22 +++++++-------
 mcdc/kernel.py           | 17 ++++++-----
 mcdc/local.py            | 12 ++++++++
 mcdc/loop.py             | 20 ++++++-------
 mcdc/main.py             |  6 ++--
 mcdc/type_.py            | 42 +++++----------------------
 10 files changed, 125 insertions(+), 142 deletions(-)
 create mode 100644 mcdc/code_factory.py
 create mode 100644 mcdc/local.py

diff --git a/mcdc/adapt.py b/mcdc/adapt.py
index d80e6def..b66c3466 100644
--- a/mcdc/adapt.py
+++ b/mcdc/adapt.py
@@ -383,69 +383,6 @@ def add_IC(particle, prog):
     kernel.add_particle(particle, mcdc["technique"]["IC_bank_neutron_local"])
 
 
-@for_cpu()
-def local_translate():
-    return np.zeros(1, dtype=type_.translate)[0]
-
-
-@for_gpu()
-def local_translate():
-    trans = cuda.local.array(1, type_.translate)[0]
-    for i in range(3):
-        trans["values"][i] = 0
-    return trans
-
-
-@for_cpu()
-def local_group_array():
-    return np.zeros(1, dtype=type_.group_array)[0]
-
-
-@for_gpu()
-def local_group_array():
-    return cuda.local.array(1, type_.group_array)[0]
-
-
-@for_cpu()
-def local_j_array():
-    return np.zeros(1, dtype=type_.j_array)[0]
-
-
-@for_gpu()
-def local_j_array():
-    return cuda.local.array(1, type_.j_array)[0]
-
-
-@for_cpu()
-def local_RPN_array():
-    return np.zeros(1, dtype=type_.RPN_array)[0]
-
-
-@for_gpu()
-def local_RPN_array():
-    return cuda.local.array(1, type_.RPN_array)[0]
-
-
-@for_cpu()
-def local_particle():
-    return np.zeros(1, dtype=type_.particle)[0]
-
-
-@for_gpu()
-def local_particle():
-    return cuda.local.array(1, dtype=type_.particle)[0]
-
-
-@for_cpu()
-def local_particle_record():
-    return np.zeros(1, dtype=type_.particle_record)[0]
-
-
-@for_gpu()
-def local_particle_record():
-    return cuda.local.array(1, dtype=type_.particle_record)[0]
-
-
 @for_cpu()
 def global_add(ary, idx, val):
     result = ary[idx]
diff --git a/mcdc/code_factory.py b/mcdc/code_factory.py
new file mode 100644
index 00000000..4aa98c37
--- /dev/null
+++ b/mcdc/code_factory.py
@@ -0,0 +1,58 @@
+import numpy as np
+
+from numba import njit
+
+import mcdc.local as local
+import mcdc.type_ as type_
+
+
+# ==============================================================================
+# Local array and object (see local.py)
+# ==============================================================================
+
+
+def make_locals(input_deck):
+    # Hardware target
+    target = input_deck.setting["target"]
+
+    # Problem-dependent sizes
+    G = input_deck.materials[0].G
+    J = input_deck.materials[0].J
+    N_RPN = max(
+        [np.sum(np.array(x._region_RPN) >= 0.0) for x in input_deck.cells]
+    )
+
+    # Make the locals
+    local.translation = local_array(type_.float64, 3, target)
+    local.energy_group_array = local_array(type_.float64, G, target)
+    local.precursor_group_array = local_array(type_.float64, J, target)
+    local.RPN_array = local_array(type_.bool_, N_RPN, target)
+    local.particle = local_object(type_.particle, target)
+    local.particle_record = local_object(type_.particle_record, target)
+
+
+def local_array(dtype, size, target):
+    struct = type_.into_dtype([("values", dtype, (size,))])
+
+    @njit
+    def cpu():
+        return np.zeros(1, dtype=struct)[0]
+
+    @njit
+    def gpu():
+        return cuda.local.array(1, dtype=struct)[0]
+
+    return cpu if target == "cpu" else gpu
+
+
+def local_object(dtype, target):
+
+    @njit
+    def cpu():
+        return np.zeros(1, dtype=dtype)[0]
+
+    @njit
+    def gpu():
+        return cuda.local.array(1, dtype=dtype)[0]
+
+    return cpu if target == "cpu" else gpu
diff --git a/mcdc/global_.py b/mcdc/global_.py
index 8df838f7..b20a793f 100644
--- a/mcdc/global_.py
+++ b/mcdc/global_.py
@@ -78,6 +78,8 @@ def reset(self):
             # Below are parameters not copied to mcdc.setting
             "bank_active_buff": 100,
             "bank_census_buff": 1.0,
+            # Portability
+            "target": 'cpu'
         }
 
         self.technique = {
diff --git a/mcdc/iqmc/iqmc_kernel.py b/mcdc/iqmc/iqmc_kernel.py
index 9b0bf296..709b74ca 100644
--- a/mcdc/iqmc/iqmc_kernel.py
+++ b/mcdc/iqmc/iqmc_kernel.py
@@ -1,23 +1,24 @@
-from mpi4py import MPI
-
-import mcdc.adapt as adapt
 import numpy as np
 
+from mpi4py import MPI
 from numpy import ascontiguousarray as cga
 from numba import njit, objmode, literal_unroll
-from mcdc.loop import caching
-from mcdc.type_ import iqmc_score_list
-from mcdc.kernel import distance_to_boundary, distance_to_mesh
 
-# from mcdc.iqmc.iqmc_loop import iqmc_loop_source
+import mcdc.adapt as adapt
+import mcdc.local as local
+
 from mcdc.adapt import toggle, for_cpu, for_gpu
 from mcdc.constant import *
+from mcdc.loop import caching
+from mcdc.type_ import iqmc_score_list
 from mcdc.kernel import (
-    move_particle,
-    set_bank_size,
+    distance_to_boundary,
+    distance_to_mesh,
     get_particle_cell,
     get_particle_material,
     mesh_get_index,
+    move_particle,
+    set_bank_size,
 )
 
 # =========================================================================
@@ -285,7 +286,7 @@ def iqmc_prepare_particles(mcdc):
 
     for n in range(N_work):
         # Create new particle
-        P_new = adapt.local_particle_record()
+        P_new = local.particle_record()
         # assign initial group, time, and rng_seed (not used)
         P_new["g"] = 0
         P_new["t"] = 0
@@ -456,10 +457,10 @@ def iqmc_generate_material_idx(mcdc):
     Nz = len(mesh["z"]) - 1
     dx = dy = dz = 1
     # variables for cell finding functions
-    trans_struct = adapt.local_translate()
+    trans_struct = local.translation()
     trans = trans_struct["values"]
     # create particle to utilize cell finding functions
-    P_temp = adapt.local_particle()
+    P_temp = local.particle()
     # set default attributes
     P_temp["alive"] = True
     P_temp["material_ID"] = -1
diff --git a/mcdc/iqmc/iqmc_loop.py b/mcdc/iqmc/iqmc_loop.py
index c2f5eed0..9242a6a6 100644
--- a/mcdc/iqmc/iqmc_loop.py
+++ b/mcdc/iqmc/iqmc_loop.py
@@ -1,19 +1,21 @@
-import mcdc.kernel as kernel
-import mcdc.iqmc.iqmc_kernel as iqmc_kernel
-import mcdc.adapt as adapt
 import numpy as np
 
 from numpy import ascontiguousarray as cga
 from numba import njit, objmode
-from mcdc.loop import caching
-from mcdc.constant import *
 
+import mcdc.adapt as adapt
+import mcdc.iqmc.iqmc_kernel as iqmc_kernel
+import mcdc.kernel as kernel
+import mcdc.local as local
+
+from mcdc.constant import *
+from mcdc.loop import caching
 from mcdc.print_ import (
-    print_progress,
-    print_progress_iqmc,
-    print_iqmc_eigenvalue_progress,
     print_iqmc_eigenvalue_exit_code,
+    print_iqmc_eigenvalue_progress,
     print_msg,
+    print_progress,
+    print_progress_iqmc,
 )
 
 
@@ -48,7 +50,7 @@ def iqmc_step_particle(P, prog):
 
     # Find cell from root universe if unknown
     if P["cell_ID"] == -1:
-        trans_struct = adapt.local_translate()
+        trans_struct = local.translation()
         trans = trans_struct["values"]
         P["cell_ID"] = kernel.get_particle_cell(P, 0, trans, mcdc)
 
@@ -100,7 +102,7 @@ def iqmc_loop_source(mcdc):
 
         # Loop until active bank is exhausted
         while mcdc["bank_active"]["size"] > 0:
-            P = adapt.local_particle()
+            P = local.particle()
             # Get particle from active bank
             kernel.get_particle(P, mcdc["bank_active"], mcdc)
             # Particle loop
diff --git a/mcdc/kernel.py b/mcdc/kernel.py
index 6569756e..6ad15e37 100644
--- a/mcdc/kernel.py
+++ b/mcdc/kernel.py
@@ -4,6 +4,7 @@
 from numba import njit, objmode, literal_unroll
 import numba
 
+import mcdc.local as local
 import mcdc.type_ as type_
 
 from mcdc.constant import *
@@ -852,7 +853,7 @@ def rng_array(seed, shape, size):
 
 @njit
 def source_particle(seed, mcdc):
-    P: type_.particle_record = adapt.local_particle_record()
+    P = local.particle_record()
     P["rng_seed"] = seed
 
     # Sample source
@@ -1581,7 +1582,7 @@ def get_particle_cell(P, universe_ID, trans, mcdc):
 @njit
 def get_particle_material(P, mcdc):
     # Translation accumulator
-    trans_struct = adapt.local_translate()
+    trans_struct = local.translation()
     trans = trans_struct["values"]
 
     # Top level cell
@@ -1645,7 +1646,7 @@ def copy_recordlike(P_new, P):
 
 @njit
 def copy_record(P):
-    P_new = adapt.local_particle_record()
+    P_new = local.particle_record()
     copy_recordlike(P_new, P)
     return P_new
 
@@ -1705,7 +1706,7 @@ def cell_check(P, cell, trans, mcdc):
     N_token = mcdc["cell_region_data"][idx]
 
     # Create local value array
-    value_struct = adapt.local_RPN_array()
+    value_struct = local.RPN_array()
     value = value_struct["values"]
     N_value = 0
 
@@ -2645,7 +2646,7 @@ def distance_to_boundary(P, mcdc):
     event = 0
 
     # Translation accumulator
-    trans_struct = adapt.local_translate()
+    trans_struct = local.translation()
     trans = trans_struct["values"]
 
     # Top level cell
@@ -2779,7 +2780,7 @@ def surface_crossing(P, prog):
 
     mcdc = adapt.device(prog)
 
-    trans_struct = adapt.local_translate()
+    trans_struct = local.translation()
     trans = trans_struct["values"]
     trans = P["translation"]
 
@@ -2801,7 +2802,7 @@ def surface_crossing(P, prog):
     if P["alive"] and not surface["BC"] == BC_REFLECTIVE:
         cell = mcdc["cells"][P["cell_ID"]]
         if not cell_check(P, cell, trans, mcdc):
-            trans_struct = adapt.local_translate()
+            trans_struct = local.translation()
             trans = trans_struct["values"]
             P["cell_ID"] = get_particle_cell(P, UNIVERSE_ROOT, trans, mcdc)
 
@@ -3324,7 +3325,7 @@ def fission_CE(P, nuclide, P_new):
     J = 6
     nu = get_nu(NU_FISSION, nuclide, E)
     nu_p = get_nu(NU_FISSION_PROMPT, nuclide, E)
-    nu_d_struct = adapt.local_j_array()
+    nu_d_struct = local.precursor_group_array()
     nu_d = nu_d_struct["values"]
     for j in range(J):
         nu_d[j] = get_nu_group(NU_FISSION_DELAYED, nuclide, E, j)
diff --git a/mcdc/local.py b/mcdc/local.py
new file mode 100644
index 00000000..bf1a230e
--- /dev/null
+++ b/mcdc/local.py
@@ -0,0 +1,12 @@
+"""
+Collection of functions to make local array and object
+
+All of the functions are target-dependent (CPU or GPU mode)
+and defined in code_factory.py
+"""
+translation = None
+energy_group_array = None
+precursor_group_array = None
+RPN_array = None
+particle = None
+particle_record = None
diff --git a/mcdc/loop.py b/mcdc/loop.py
index a2bce7da..5e7d406f 100644
--- a/mcdc/loop.py
+++ b/mcdc/loop.py
@@ -1,25 +1,25 @@
+import pathlib
+
 import numpy as np
-from numpy import ascontiguousarray as cga
-from numba import njit, objmode, jit
 
 from mpi4py import MPI
+from numba import njit, objmode, jit
+from numpy import ascontiguousarray as cga
 
 import mcdc.adapt as adapt
 import mcdc.kernel as kernel
+import mcdc.local as local
 import mcdc.type_ as type_
-import pathlib
-
-import mcdc.print_ as print_module
 
 from mcdc.constant import *
 from mcdc.print_ import (
     print_header_batch,
-    print_progress,
-    print_progress_eigenvalue,
-    print_progress_iqmc,
     print_iqmc_eigenvalue_progress,
     print_iqmc_eigenvalue_exit_code,
     print_msg,
+    print_progress,
+    print_progress_eigenvalue,
+    print_progress_iqmc,
 )
 
 caching = True
@@ -228,7 +228,7 @@ def prep_particle(P, prog):
 @njit(cache=caching)
 def exhaust_active_bank(prog):
     mcdc = adapt.device(prog)
-    P = adapt.local_particle()
+    P = local.particle()
     # Loop until active bank is exhausted
     while kernel.get_bank_size(mcdc["bank_active"]) > 0:
         # Get particle from active bank
@@ -445,7 +445,7 @@ def step_particle(P, prog):
 
     # Find cell from root universe if unknown
     if P["cell_ID"] == -1:
-        trans_struct = adapt.local_translate()
+        trans_struct = local.translation()
         trans = trans_struct["values"]
         P["cell_ID"] = kernel.get_particle_cell(P, UNIVERSE_ROOT, trans, mcdc)
 
diff --git a/mcdc/main.py b/mcdc/main.py
index 402989ef..ed130ebd 100644
--- a/mcdc/main.py
+++ b/mcdc/main.py
@@ -85,6 +85,7 @@
 
 import mcdc.kernel as kernel
 import mcdc.type_ as type_
+import mcdc.code_factory as code_factory
 
 import mcdc.adapt as adapt
 from mcdc.constant import *
@@ -383,10 +384,7 @@ def prepare():
     type_.make_type_global(input_deck)
     kernel.adapt_rng(nb.config.DISABLE_JIT)
 
-    type_.make_type_translate(input_deck)
-    type_.make_type_group_array(input_deck)
-    type_.make_type_j_array(input_deck)
-    type_.make_type_RPN_array(input_deck)
+    code_factory.make_locals(input_deck)
 
     # =========================================================================
     # Create the global variable container
diff --git a/mcdc/type_.py b/mcdc/type_.py
index 62a097b5..37d05bdc 100644
--- a/mcdc/type_.py
+++ b/mcdc/type_.py
@@ -3,6 +3,7 @@
 import numpy as np
 import os
 
+from numba import njit
 from mpi4py import MPI
 from mpi4py.util.dtlib import from_numpy_dtype
 
@@ -26,27 +27,24 @@
 # ==============================================================================
 # MC/DC types
 # ==============================================================================
-# Currently defined based on input deck
-# TODO: This causes JIT recompilation in certain cases
+"""
+Some types are problem-dependent and defined in code_factory.py
+"""
 
 particle = None
 particle_record = None
+
 nuclide = None
 material = None
+
 surface = None
 universe = None
 lattice = None
+
 source = None
 setting = None
 tally = None
 technique = None
-
-# GPU mode related
-translate = None
-group_array = None
-j_array = None
-RPN_array = None
-
 global_ = None
 
 
@@ -1311,32 +1309,6 @@ def make_type_global(input_deck):
 # ==============================================================================
 
 
-def make_type_translate(input_deck):
-    global translate
-    translate = into_dtype([("values", float64, (3,))])
-
-
-def make_type_group_array(input_deck):
-    global group_array
-    G = input_deck.materials[0].G
-    group_array = into_dtype([("values", float64, (G,))])
-
-
-def make_type_j_array(input_deck):
-    global j_array
-    J = input_deck.materials[0].J
-    j_array = into_dtype([("values", float64, (J,))])
-
-
-def make_type_RPN_array(input_deck):
-    global RPN_array
-    N_max = 0
-    for cell_ in input_deck.cells:
-        N = np.sum(np.array(cell_._region_RPN) >= 0.0)
-        N_max = max(N_max, N)
-    RPN_array = into_dtype([("values", bool_, (N_max,))])
-
-
 def make_type_mesh(card):
     Nx = len(card["x"]) - 1
     Ny = len(card["y"]) - 1

From e2a373d7ed8c907a45256d3e36e145cb28fde691 Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Thu, 15 Aug 2024 20:41:08 +0700
Subject: [PATCH 2/6] back in black

---
 mcdc/code_factory.py | 4 +---
 mcdc/local.py        | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/mcdc/code_factory.py b/mcdc/code_factory.py
index 4aa98c37..4d987bc4 100644
--- a/mcdc/code_factory.py
+++ b/mcdc/code_factory.py
@@ -18,9 +18,7 @@ def make_locals(input_deck):
     # Problem-dependent sizes
     G = input_deck.materials[0].G
     J = input_deck.materials[0].J
-    N_RPN = max(
-        [np.sum(np.array(x._region_RPN) >= 0.0) for x in input_deck.cells]
-    )
+    N_RPN = max([np.sum(np.array(x._region_RPN) >= 0.0) for x in input_deck.cells])
 
     # Make the locals
     local.translation = local_array(type_.float64, 3, target)
diff --git a/mcdc/local.py b/mcdc/local.py
index bf1a230e..1b2a4bef 100644
--- a/mcdc/local.py
+++ b/mcdc/local.py
@@ -4,6 +4,7 @@
 All of the functions are target-dependent (CPU or GPU mode)
 and defined in code_factory.py
 """
+
 translation = None
 energy_group_array = None
 precursor_group_array = None

From bef9a018a6a40b39a706c2d1ae80cfa42431717f Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Thu, 15 Aug 2024 20:55:19 +0700
Subject: [PATCH 3/6] working Numba mode

---
 mcdc/kernel.py | 2 +-
 mcdc/loop.py   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mcdc/kernel.py b/mcdc/kernel.py
index 6ad15e37..5792d65c 100644
--- a/mcdc/kernel.py
+++ b/mcdc/kernel.py
@@ -1653,7 +1653,7 @@ def copy_record(P):
 
 @njit
 def recordlike_to_particle(P_rec):
-    P_new = adapt.local_particle()
+    P_new = local.particle()
     copy_recordlike(P_new, P_rec)
     P_new["fresh"] = True
     P_new["alive"] = True
diff --git a/mcdc/loop.py b/mcdc/loop.py
index 5e7d406f..44799337 100644
--- a/mcdc/loop.py
+++ b/mcdc/loop.py
@@ -273,7 +273,7 @@ def source_dd_resolution(prog):
 
     while not terminated:
         if kernel.get_bank_size(mcdc["bank_active"]) > 0:
-            P = adapt.local_particle()
+            P = local.particle()
             # Loop until active bank is exhausted
             while kernel.get_bank_size(mcdc["bank_active"]) > 0:
 
@@ -531,7 +531,7 @@ def generate_precursor_particle(DNP, particle_idx, seed_work, prog):
     g = DNP["n_g"]
 
     # Create new particle
-    P_new = adapt.local_particle()
+    P_new = local.particle()
     part_seed = kernel.split_seed(particle_idx, seed_work)
     P_new["rng_seed"] = part_seed
     P_new["alive"] = True
@@ -543,7 +543,7 @@ def generate_precursor_particle(DNP, particle_idx, seed_work, prog):
     P_new["z"] = DNP["z"]
 
     # Get material
-    trans_struct = adapt.local_translate()
+    trans_struct = local.translation()
     trans = trans_struct["values"]
     P_new["cell_ID"] = kernel.get_particle_cell(P_new, UNIVERSE_ROOT, trans, mcdc)
     material_ID = kernel.get_particle_material(P_new, mcdc)

From 224e6e843481b23a2b2ce931fd10472afe8d194c Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Thu, 15 Aug 2024 21:07:25 +0700
Subject: [PATCH 4/6] assign target to input_deck

---
 mcdc/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mcdc/main.py b/mcdc/main.py
index ed130ebd..69b78b41 100644
--- a/mcdc/main.py
+++ b/mcdc/main.py
@@ -384,6 +384,7 @@ def prepare():
     type_.make_type_global(input_deck)
     kernel.adapt_rng(nb.config.DISABLE_JIT)
 
+    input_deck.setting['target'] = target
     code_factory.make_locals(input_deck)
 
     # =========================================================================

From bfc67c8f23ccfa37eb171ed33115edb6e6dd8bd9 Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Thu, 15 Aug 2024 21:56:09 +0700
Subject: [PATCH 5/6] replace njit with cuda.jit

---
 mcdc/code_factory.py | 6 +++---
 mcdc/main.py         | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mcdc/code_factory.py b/mcdc/code_factory.py
index 4d987bc4..1d5d5911 100644
--- a/mcdc/code_factory.py
+++ b/mcdc/code_factory.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from numba import njit
+from numba import cuda, njit
 
 import mcdc.local as local
 import mcdc.type_ as type_
@@ -36,7 +36,7 @@ def local_array(dtype, size, target):
     def cpu():
         return np.zeros(1, dtype=struct)[0]
 
-    @njit
+    @cuda.jit(device=True)
     def gpu():
         return cuda.local.array(1, dtype=struct)[0]
 
@@ -49,7 +49,7 @@ def local_object(dtype, target):
     def cpu():
         return np.zeros(1, dtype=dtype)[0]
 
-    @njit
+    @cuda.jit(device=True)
     def gpu():
         return cuda.local.array(1, dtype=dtype)[0]
 
diff --git a/mcdc/main.py b/mcdc/main.py
index 69b78b41..e1f8fcdf 100644
--- a/mcdc/main.py
+++ b/mcdc/main.py
@@ -384,7 +384,7 @@ def prepare():
     type_.make_type_global(input_deck)
     kernel.adapt_rng(nb.config.DISABLE_JIT)
 
-    input_deck.setting['target'] = target
+    input_deck.setting["target"] = target
     code_factory.make_locals(input_deck)
 
     # =========================================================================

From ace15dd475b4ab7f7206ba526536a7bc80c2595d Mon Sep 17 00:00:00 2001
From: Ilham Variansyah <variansi@oregonstate.edu>
Date: Sun, 18 Aug 2024 11:30:07 +0700
Subject: [PATCH 6/6] back in black

---
 mcdc/global_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mcdc/global_.py b/mcdc/global_.py
index b20a793f..7bc9f01e 100644
--- a/mcdc/global_.py
+++ b/mcdc/global_.py
@@ -79,7 +79,7 @@ def reset(self):
             "bank_active_buff": 100,
             "bank_census_buff": 1.0,
             # Portability
-            "target": 'cpu'
+            "target": "cpu",
         }
 
         self.technique = {