Merge pull request #4 from OSC/classroom-to-osc-app

Add node choice, cpu choice, and cluster choice
OSC · Oct 6, 2023 · eab9206 · eab9206
2 parents 051e087 + 6e167f0
commit eab9206
Show file tree

Hide file tree

Showing 2 changed files with 124 additions and 7 deletions.
diff --git a/form.yml.erb b/form.yml.erb
@@ -7,10 +7,12 @@
 ---
 cluster:
   - "pitzer"
+  - "owens"
 form:
   - account
-  - compute_cluster
   - time
+  - num_cores
+  - node_type
 attributes:
   account:
     label: "Project"
@@ -19,12 +21,89 @@ attributes:
       <%- groups.each do |group|  %>
       - "<%= group %>"
       <%- end %>
-  compute_cluster:
-    widget: "hidden_field"
-    options:
-      - [
-        "pitzer", "pitzer"
-        ]
+  num_cores:
+    widget: "number_field"
+    label: "Number of cores"
+    value: 1
+    help: |
+      Number of cores on node type (4 GB per core unless requesting whole
+      node). Leave blank if requesting full node.
+    min: 0
+    max: 28
+    step: 1
+  node_type:
+      widget: select
+      label: "Node type"
+      help: |
+        - **Standard Compute** <br>
+          These are standard HPC machines. Owens has 648 of these nodes with 40
+          cores and 128 GB of memory. Pitzer has 224 of these nodes with 40 cores and
+          340 of these nodes with 48 cores. All pitzer nodes have 192 GB of RAM. Chosing "any" as the node type will decrease
+          your wait time.
+        - **GPU Enabled** <br>
+          These are HPC machines with GPUs. Owens has 160 nodes with 1 [NVIDIA Tesla P100 GPU]
+          and Pitzer has 74 nodes with 2 [NVIDIA Tesla V100 GPUs]. They have the same
+          CPU and memory characteristics of standard compute. However, Pitzer's 40 core machines
+          have 2 GPUs with 16 GB of RAM; and Pitzer's 48 core machines have 2 GPUs with 32 GB of RAM.
+          Dense GPU types have 4 GPUs with 16 GB of RAM.
+        - **Large Memory** <br>
+          These are HPC machines with very large amounts of memory. Owens has 16 hugemem nodes
+          with 48 cores and 1.5 TB of RAM. Pitzer has 4 hugemem nodes with 3 TB of RAM and 80 cores.
+          Pitzer also has 12 Largmem nodes which have 48 cores with 768 GB of RAM.
+
+        [NVIDIA Tesla P100 GPU]: http://www.nvidia.com/object/tesla-p100.html
+        [NVIDIA Tesla V100 GPUs]: https://www.nvidia.com/en-us/data-center/v100/
+      options:
+        - [
+            "any",     "any",
+            data-max-num-cores-for-cluster-owens: 28,
+            data-max-num-cores-for-cluster-pitzer: 48,
+          ]
+        - [
+            "48 core",     "any-48core",
+            data-max-num-cores-for-cluster-pitzer: 48,
+            data-option-for-cluster-owens: false,
+          ]
+        - [
+            "40 core",     "any-40core",
+            data-max-num-cores-for-cluster-pitzer: 40,
+            data-option-for-cluster-owens: false,
+          ]
+        - [
+            "any gpu",     "gpu",
+            data-max-num-cores-for-cluster-owens: 28,
+            data-max-num-cores-for-cluster-pitzer: 48,
+          ]
+        - [
+            "40 core gpu",     "gpu-40core",
+          data-max-num-cores-for-cluster-pitzer: 40,
+          data-option-for-cluster-owens: false,
+          ]
+        - [
+            "48 core gpu",     "gpu-48core",
+            data-max-num-cores-for-cluster-pitzer: 48,
+            data-option-for-cluster-owens: false,
+          ]
+        - [
+            "largemem", "largemem",
+            data-min-num-cores-for-cluster-pitzer: 24,
+            data-max-num-cores-for-cluster-pitzer: 48,
+            data-option-for-cluster-owens: false,
+          ]
+        - [
+            "hugemem", "hugemem",
+            data-min-num-cores-for-cluster-owens: 4,
+            data-max-num-cores-for-cluster-owens: 48,
+            data-min-num-cores-for-cluster-pitzer: 20,
+            data-max-num-cores-for-cluster-pitzer: 80,
+          ]
+        - [
+            "debug",   "debug",
+            data-max-num-cores-for-cluster-owens: 28,
+            data-max-num-cores-for-cluster-pitzer: 48,
+            data-option-for-cluster-owens: false,
+            data-option-for-cluster-pitzer: false,
+          ]
   time:
     widget: "number_field"
     value: 1

diff --git a/submit.yml.erb b/submit.yml.erb
@@ -2,6 +2,44 @@
 
   hours = time.to_i*60
   slurm_args = [ "--nodes", "1", "--ntasks-per-node", "1", "--mem", "4096", "--cpus-per-task", "1", "--time", "#{hours}" ]
+
+  nodes = bc_num_slots.blank? ? 1 : bc_num_slots.to_i
+
+  cores_lookup = {
+      "hugemem" => {"pitzer" => "80", "owens" => "48"},
+      "largemem" => {"pitzer" => "48", "owens" => "28"},
+
+      "any" => {"pitzer" => "40", "owens" => "28"},
+      "gpu" => {"pitzer" => "48", "owens" => "28"},
+
+      "any-48core" => {"pitzer" => "48", "owens" => "28"},
+      "gpu-48core" => {"pitzer" => "48", "owens" => "28"},
+
+      "any-40core" => {"pitzer" => "40", "owens" => "28"},
+      "gpu-40core" => {"pitzer" => "40", "owens" => "28"},
+  }
+
+  max_cores = cores_lookup[node_type][cluster]
+  ppn = num_cores.blank? ? max_cores : num_cores.to_i
+
+
+  case node_type
+  when "hugemem"
+    partition = bc_num_slots.to_i > 1 ? "hugemem-parallel" : "hugemem"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--partition", partition ]
+  when "gpu"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--gpus-per-node", "1" ]
+  when "any40-core"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--contstraint", "48core" ]
+  when "any48-core"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--contstraint", "48core" ]
+  when "gpu-48core"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--gpus-per-node", "1", "--constraint", "48core" ]
+  when "gpu-40core"
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}", "--gpus-per-node", "1", "--constraint", "40core" ]
+  else
+    slurm_args = [ "--nodes", "#{nodes}", "--ntasks-per-node", "#{ppn}" ]
+  end
 %>
 ---
 batch_connect: