diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..53e28a034d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Intel Corporation +# +# SPDX-License-Identifier: BSD-3-Clause + +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..5b13fae5a7 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,117 @@ +# SPDX-FileCopyrightText: Intel Corporation +# +# SPDX-License-Identifier: BSD-3-Clause + +on: + push: + branches: + - main + pull_request: + +env: + SPHINXOPTS: -q -W + +jobs: + checks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - run: pip install -r requirements.txt + - name: Checks + run: | + pre-commit run --all + + clang: + runs-on: ubuntu-latest + env: + CXX: clang++ + steps: + - uses: actions/checkout@v3 + - name: Build & test + run: | + #source /opt/intel/oneapi/setvars.sh + cmake -B build + make -j -C build all test + - uses: actions/upload-artifact@v3 + with: + name: log-clang-${{ env.CXX }} + path: build/Testing + + gcc: + runs-on: ubuntuy-latest + strategy: + fail-fast: false + matrix: + include: + - cxx: g++-10 + - cxx: g++-11 + - cxx: g++-12 + name: ${{ matrix.cxx }} + env: + CXX: ${{ matrix.cxx }} + steps: + - uses: actions/checkout@v3 + - name: Build & test + run: | + #source /opt/intel/oneapi/setvars.sh + cmake -B build + make -j -C build all test + cmake -B build-nofmt + make -j -C build-nofmt/examples/cpu all test + - uses: actions/upload-artifact@v3 + with: + name: log-gcc-${{ env.CXX }} + path: build/Testing + + icpx: + runs-on: ubuntu-latest + env: + CXX: icpx + steps: + - uses: actions/checkout@v3 + - name: Build & test + run: | + #source /opt/intel/oneapi/setvars.sh + cmake -B build -DENABLE_SYCL_MPI=on + make -j -C build all test + - uses: actions/upload-artifact@v3 + with: + name: log-icpx-${{ env.CXX }} + path: build/Testing + + publish: + needs: [checks, clang, gcc, icpx] + runs-on: dds-base + if: ${{ github.ref == 'refs/heads/main' }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - run: pip install -r requirements.txt + - name: Build doc + run: | + make -C doc/spec html + - name: Checkout gh-pages + uses: actions/checkout@v3 + with: + ref: gh-pages + path: gh-pages + - name: Publish to github pages + run: | + rm -rf gh-pages/* + touch gh-pages/.nojekyll + + cp -r doc/spec/build/html gh-pages/spec + cp -r doc/spec/build/doxygen-html gh-pages/doxygen + + cd gh-pages + git config user.name github-actions + git config user.email github-actions@github.com + git add . + # Ignore errors because no updates returns an error status. + git commit --reset-author --amend -m "Update from github actions" + git push --force origin gh-pages diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..16da31e05f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Intel Corporation +# +# SPDX-License-Identifier: BSD-3-Clause + +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v15.0.4 + hooks: + - id: clang-format + +- repo: https://github.com/ambv/black + rev: 22.10.0 + hooks: + - id: black + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: mixed-line-ending + - id: check-xml + - id: check-yaml + - id: check-case-conflict + - id: check-toml + - id: check-json + - id: check-added-large-files + args: ['--maxkb=800'] + +- repo: https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + +- repo: https://github.com/pycqa/doc8 + rev: v1.0.0 + hooks: + - id: doc8 + +- repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + +- repo: https://github.com/fsfe/reuse-tool + rev: v1.0.0 + hooks: + - id: reuse + +- repo: local + hooks: + - id: sphinx + name: sphinx + entry: make -C doc/spec spelling linkcheck html + language: system + pass_filenames: false + always_run: true diff --git a/bench/devcloud/dr-bench-05ae8920514c49deb09dee3d94648622.json b/bench/devcloud/dr-bench-05ae8920514c49deb09dee3d94648622.json new file mode 100644 index 0000000000..e32f94383e --- /dev/null +++ b/bench/devcloud/dr-bench-05ae8920514c49deb09dee3d94648622.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:48:36-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.06689,2.31787,24.0571], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "6000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "3", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "1" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.6165553901350177e+02, + "cpu_time": 5.6165532700000040e+02, + "time_unit": "ms", + "bytes_per_second": 2.1365408451373840e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3608734270749260e+03, + "cpu_time": 2.3596445299999968e+03, + "time_unit": "ms", + "bytes_per_second": 2.0331458454963010e+12, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1467817188989820e+03, + "cpu_time": 5.1372696359999918e+03, + "time_unit": "ms", + "bytes_per_second": 4.6631081928094995e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.2316018368905516e+03, + "cpu_time": 9.0834726950000113e+03, + "time_unit": "ms", + "bytes_per_second": 1.1814815513449133e+12, + "flops": 2.1933444641304083e+09, + "footprint": 3.5996591616000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1385008794861028e+02, + "cpu_time": 5.1266339400000049e+02, + "time_unit": "ms", + "bytes_per_second": 1.8682491693881130e+12, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-0f326341f9d94ba29646527008f9397e.json b/bench/devcloud/dr-bench-0f326341f9d94ba29646527008f9397e.json new file mode 100644 index 0000000000..90cef0d3a8 --- /dev/null +++ b/bench/devcloud/dr-bench-0f326341f9d94ba29646527008f9397e.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:42:03-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3710, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.88379,2.87305,35.7134], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.5516163100903964e+02, + "cpu_time": 5.5515669999999955e+02, + "time_unit": "ms", + "bytes_per_second": 7.2051088846499707e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3124956898975342e+03, + "cpu_time": 2.3124854639999980e+03, + "time_unit": "ms", + "bytes_per_second": 6.9189318146184106e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.1613349071019511e+03, + "cpu_time": 4.1606854099999991e+03, + "time_unit": "ms", + "bytes_per_second": 1.9224600227073248e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.8141352109033178e+03, + "cpu_time": 8.7900688860000009e+03, + "time_unit": "ms", + "bytes_per_second": 4.1249411816401971e+11, + "flops": 7.6576878372033358e+08, + "footprint": 3.5997811200000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0708981210029782e+02, + "cpu_time": 5.0706769800000018e+02, + "time_unit": "ms", + "bytes_per_second": 6.3105192091042615e+11, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-1255d1c30769449395d041799418a44b.json b/bench/devcloud/dr-bench-1255d1c30769449395d041799418a44b.json new file mode 100644 index 0000000000..4814090c1c --- /dev/null +++ b/bench/devcloud/dr-bench-1255d1c30769449395d041799418a44b.json @@ -0,0 +1,117 @@ +{ + "context": { + "date": "2023-12-01T03:08:33-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3619, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [142.663,118.33,79.1997], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info0": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8577399690809459e+03, + "cpu_time": 1.8487172580000006e+03, + "time_unit": "ms", + "bytes_per_second": 2.1531538679112689e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.3865799620094967e+03, + "cpu_time": 7.3686075740000024e+03, + "time_unit": "ms", + "bytes_per_second": 2.1660904075080569e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1631580167999262e+04, + "cpu_time": 1.1597553164000004e+04, + "time_unit": "ms", + "bytes_per_second": 6.8778273325317871e+10, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "BlackScholes_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "BlackScholes_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0471538821970993e+04, + "cpu_time": 4.0396294231000000e+04, + "time_unit": "ms", + "bytes_per_second": 4.9417443917755104e+10, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-1931eb5d1d374795a0001e0bd3593fae.json b/bench/devcloud/dr-bench-1931eb5d1d374795a0001e0bd3593fae.json new file mode 100644 index 0000000000..aa448cdb68 --- /dev/null +++ b/bench/devcloud/dr-bench-1931eb5d1d374795a0001e0bd3593fae.json @@ -0,0 +1,86 @@ +{ + "context": { + "date": "2023-12-01T03:07:47-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3799, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [109.821,109.198,74.4443], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "MHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8118294787895607e+04, + "cpu_time": 1.7760385455999996e+04, + "time_unit": "ms", + "bytes_per_second": 2.0066893566766425e+11, + "flops": 3.7252896472957474e+08, + "footprint": 1.7998905600000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.0610982560803536e+03, + "cpu_time": 2.0606356890000015e+03, + "time_unit": "ms", + "bytes_per_second": 1.5525703301915973e+11, + "footprint": 1.6000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-1bbc05c89b3142469b1ed34d9e5c57f8.json b/bench/devcloud/dr-bench-1bbc05c89b3142469b1ed34d9e5c57f8.json new file mode 100644 index 0000000000..f60ef2186a --- /dev/null +++ b/bench/devcloud/dr-bench-1bbc05c89b3142469b1ed34d9e5c57f8.json @@ -0,0 +1,213 @@ +{ + "context": { + "date": "2023-12-01T03:11:36-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3000, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [92.9668,104.205,80.2827], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "56", + "runtime": "DIRECT", + "target": "MHP_DIRECT_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.0528119340009212e+03, + "cpu_time": 2.0527000079999993e+03, + "time_unit": "ms", + "bytes_per_second": 1.9485467391082523e+11, + "footprint": 1.4285714200000000e+08 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.8632067970901653e+03, + "cpu_time": 7.8629778639999977e+03, + "time_unit": "ms", + "bytes_per_second": 2.0347932354927905e+11, + "footprint": 5.7142857100000000e+08 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.3750288408972910e+04, + "cpu_time": 1.3749173764999994e+04, + "time_unit": "ms", + "bytes_per_second": 5.8180597832257156e+10, + "footprint": 2.8571428500000000e+08 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.4045541670832581e+03, + "cpu_time": 4.4044608720000015e+03, + "time_unit": "ms", + "bytes_per_second": 1.8163018767680823e+11, + "footprint": 2.8571428500000000e+08 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.4591662551080772e+03, + "cpu_time": 4.4590019729999995e+03, + "time_unit": "ms", + "bytes_per_second": 1.7940573511552335e+11, + "footprint": 2.8571428500000000e+08 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.4642623949062536e+03, + "cpu_time": 6.4642111510000004e+03, + "time_unit": "ms", + "bytes_per_second": 1.8563602878273984e+11, + "footprint": 4.2857142800000000e+08 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.3968053419699909e+03, + "cpu_time": 6.3965907470000047e+03, + "time_unit": "ms", + "bytes_per_second": 1.8759364023893250e+11, + "footprint": 4.2857142800000000e+08 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.9712159749890143e+04, + "cpu_time": 1.9711895787000001e+04, + "time_unit": "ms", + "bytes_per_second": 1.8444345913036050e+11, + "flops": 3.4240741175190687e+08, + "footprint": 6.4281805700000000e+08 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.2683754339010302e+03, + "cpu_time": 2.2683492609999976e+03, + "time_unit": "ms", + "bytes_per_second": 1.4109269357127237e+11, + "footprint": 5.7152000000000000e+08 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2497487882982461e+04, + "cpu_time": 1.2497265677999991e+04, + "time_unit": "ms", + "bytes_per_second": 1.6003216156131293e+11, + "footprint": 7.1428571400000000e+08 + } + ] +} diff --git a/bench/devcloud/dr-bench-27a255afd4df46ddbdc52d067feeb958.json b/bench/devcloud/dr-bench-27a255afd4df46ddbdc52d067feeb958.json new file mode 100644 index 0000000000..ea5161eabe --- /dev/null +++ b/bench/devcloud/dr-bench-27a255afd4df46ddbdc52d067feeb958.json @@ -0,0 +1,181 @@ +{ + "context": { + "date": "2023-12-01T03:00:14-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [195.833,99.2056,53.0107], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info0": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8646866271095548e+03, + "cpu_time": 1.8501959899999995e+03, + "time_unit": "ms", + "bytes_per_second": 2.1451325610675870e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.3905101448925943e+03, + "cpu_time": 7.3666108349999995e+03, + "time_unit": "ms", + "bytes_per_second": 2.1649385071282553e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2791255334911728e+04, + "cpu_time": 1.2748943330000004e+04, + "time_unit": "ms", + "bytes_per_second": 6.2542727750616104e+10, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3128416599641369e+03, + "cpu_time": 6.6100220000038235e+00, + "time_unit": "ms", + "bytes_per_second": 1.8549255063693951e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3243016421108987e+03, + "cpu_time": 6.0379869999991342e+00, + "time_unit": "ms", + "bytes_per_second": 1.8500097037853299e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.2428005789869521e+03, + "cpu_time": 6.3116660000019920e+00, + "time_unit": "ms", + "bytes_per_second": 1.9222142127031223e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.2416961680003942e+03, + "cpu_time": 6.4830270000015844e+00, + "time_unit": "ms", + "bytes_per_second": 1.9225543309078357e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0670778174916413e+04, + "cpu_time": 9.4669509999931734e+00, + "time_unit": "ms", + "bytes_per_second": 4.9175356109450943e+10, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-3b67dab7a52d48728f3ac9b1c65be965.json b/bench/devcloud/dr-bench-3b67dab7a52d48728f3ac9b1c65be965.json new file mode 100644 index 0000000000..da3917516b --- /dev/null +++ b/bench/devcloud/dr-bench-3b67dab7a52d48728f3ac9b1c65be965.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:44:23-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3389, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.33691,2.49316,30.9614], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "3", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8678983089047455e+02, + "cpu_time": 1.8677556899999990e+02, + "time_unit": "ms", + "bytes_per_second": 2.1414442001103511e+12, + "footprint": 2.6666666660000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.8309814293891316e+02, + "cpu_time": 7.8306104400000004e+02, + "time_unit": "ms", + "bytes_per_second": 2.0431666380861418e+12, + "footprint": 1.0666666666000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.7300336490206105e+03, + "cpu_time": 1.7267569800000010e+03, + "time_unit": "ms", + "bytes_per_second": 4.6241875148086749e+11, + "footprint": 5.3333333330000000e+09 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.2536409489151224e+03, + "cpu_time": 3.1660738910000000e+03, + "time_unit": "ms", + "bytes_per_second": 1.1174493400730945e+12, + "flops": 2.0744727847891603e+09, + "footprint": 1.1999270400000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.7660868994339677e+02, + "cpu_time": 1.7595972499999846e+02, + "time_unit": "ms", + "bytes_per_second": 1.8119244307998706e+12, + "footprint": 1.0666720000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-3dad8cd026d44af98eb20bf90595f93c.json b/bench/devcloud/dr-bench-3dad8cd026d44af98eb20bf90595f93c.json new file mode 100644 index 0000000000..8f323d59fc --- /dev/null +++ b/bench/devcloud/dr-bench-3dad8cd026d44af98eb20bf90595f93c.json @@ -0,0 +1,117 @@ +{ + "context": { + "date": "2023-12-01T02:52:17-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [4.90234,3.4458,19.8945], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.5441771891002475e+02, + "cpu_time": 5.5434709100000032e+02, + "time_unit": "ms", + "bytes_per_second": 7.2147766270239844e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3062339069814975e+03, + "cpu_time": 2.3054007839999995e+03, + "time_unit": "ms", + "bytes_per_second": 6.9377177881065491e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.1594624510100275e+03, + "cpu_time": 4.1581149100000021e+03, + "time_unit": "ms", + "bytes_per_second": 1.9233254523207413e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "BlackScholes_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "BlackScholes_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.6197639669901782e+03, + "cpu_time": 2.6190965850000011e+03, + "time_unit": "ms", + "bytes_per_second": 7.6342755500136938e+11, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-47a547f0f82c4c6e881cc3d9b04de02f.json b/bench/devcloud/dr-bench-47a547f0f82c4c6e881cc3d9b04de02f.json new file mode 100644 index 0000000000..6608d68996 --- /dev/null +++ b/bench/devcloud/dr-bench-47a547f0f82c4c6e881cc3d9b04de02f.json @@ -0,0 +1,133 @@ +{ + "context": { + "date": "2023-12-01T02:38:35-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [5.06348,3.64062,44.063], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.2767433704822122e+02, + "cpu_time": 3.5146409999999850e+00, + "time_unit": "ms", + "bytes_per_second": 8.6237159749996985e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.3459736807634465e+02, + "cpu_time": 4.1861409999999211e+00, + "time_unit": "ms", + "bytes_per_second": 8.5598357894653333e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4542426809764263e+03, + "cpu_time": 4.0649390000000452e+00, + "time_unit": "ms", + "bytes_per_second": 8.2517176513776953e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4702525659040602e+03, + "cpu_time": 4.1476529999999734e+00, + "time_unit": "ms", + "bytes_per_second": 8.1618629875481189e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.4503114609444197e+03, + "cpu_time": 4.1543390000000624e+00, + "time_unit": "ms", + "bytes_per_second": 8.1622276672906848e+11, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-4b2a5a23ba8945cb8e27660743b2b01c.json b/bench/devcloud/dr-bench-4b2a5a23ba8945cb8e27660743b2b01c.json new file mode 100644 index 0000000000..21c6060172 --- /dev/null +++ b/bench/devcloud/dr-bench-4b2a5a23ba8945cb8e27660743b2b01c.json @@ -0,0 +1,86 @@ +{ + "context": { + "date": "2023-12-01T03:06:38-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3036, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [159.696,113.046,72.7725], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.8323353646897627e+04, + "cpu_time": 2.8057506888000000e+04, + "time_unit": "ms", + "bytes_per_second": 1.2836682324157761e+11, + "flops": 2.3830474611679012e+08, + "footprint": 3.5997811200000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.2234370399226418e+03, + "cpu_time": 3.2130882019999945e+03, + "time_unit": "ms", + "bytes_per_second": 9.9272917707640289e+10, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-4d94d50f10a5440d9e71127a18ccb37f.json b/bench/devcloud/dr-bench-4d94d50f10a5440d9e71127a18ccb37f.json new file mode 100644 index 0000000000..3ffe60f234 --- /dev/null +++ b/bench/devcloud/dr-bench-4d94d50f10a5440d9e71127a18ccb37f.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:50:19-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3675, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [3.42627,2.70752,21.9697], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "8000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "4", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "1" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.6175510802643703e+02, + "cpu_time": 5.6174769899999785e+02, + "time_unit": "ms", + "bytes_per_second": 2.8482162015778262e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3643713110717772e+03, + "cpu_time": 2.3643595180000007e+03, + "time_unit": "ms", + "bytes_per_second": 2.7068506414497388e+12, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1494293889559958e+03, + "cpu_time": 5.1441228399999945e+03, + "time_unit": "ms", + "bytes_per_second": 6.2142807645116064e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.2204183599029802e+03, + "cpu_time": 8.9897945809999983e+03, + "time_unit": "ms", + "bytes_per_second": 1.5772728695309468e+12, + "flops": 2.9281055746242824e+09, + "footprint": 3.5997811200000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1413105293202989e+02, + "cpu_time": 5.1167924599999992e+02, + "time_unit": "ms", + "bytes_per_second": 2.4896375986245303e+12, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-504b793dae8f487da53338f4c4375989.json b/bench/devcloud/dr-bench-504b793dae8f487da53338f4c4375989.json new file mode 100644 index 0000000000..09d72ea72d --- /dev/null +++ b/bench/devcloud/dr-bench-504b793dae8f487da53338f4c4375989.json @@ -0,0 +1,182 @@ +{ + "context": { + "date": "2023-12-01T03:03:48-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3777, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [112.497,100.962,62.8032], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info0": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "device_info1": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "SHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.5048122539442868e+03, + "cpu_time": 9.6845947799999976e+02, + "time_unit": "ms", + "bytes_per_second": 2.6581389070400894e+11, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0602420900029492e+03, + "cpu_time": 3.7317751040000003e+03, + "time_unit": "ms", + "bytes_per_second": 3.1619040582286987e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.0485001341903724e+04, + "cpu_time": 6.1815530639999988e+03, + "time_unit": "ms", + "bytes_per_second": 7.6299465676057495e+10, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1075174429734507e+03, + "cpu_time": 7.7651490000008039e+00, + "time_unit": "ms", + "bytes_per_second": 2.5744022831116086e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1934110898864596e+03, + "cpu_time": 7.9798829999973009e+00, + "time_unit": "ms", + "bytes_per_second": 2.5051582069518134e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3504073669151376e+03, + "cpu_time": 7.4055979999982924e+00, + "time_unit": "ms", + "bytes_per_second": 2.7583623757306128e+11, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.2855591999695043e+03, + "cpu_time": 9.0602880000005825e+00, + "time_unit": "ms", + "bytes_per_second": 2.8001013263532544e+11, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.0143088417085510e+04, + "cpu_time": 1.3105503000005569e+01, + "time_unit": "ms", + "bytes_per_second": 9.9289640128054352e+10, + "footprint": 2.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-5855e60fafe3432297c3267e8844b853.json b/bench/devcloud/dr-bench-5855e60fafe3432297c3267e8844b853.json new file mode 100644 index 0000000000..80d2dce5be --- /dev/null +++ b/bench/devcloud/dr-bench-5855e60fafe3432297c3267e8844b853.json @@ -0,0 +1,136 @@ +{ + "context": { + "date": "2023-12-01T02:39:12-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3798, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [3.21729,3.33203,42.4629], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info1": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info2": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info3": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "4", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3325329298617777e+02, + "cpu_time": 1.9933230000002133e+00, + "time_unit": "ms", + "bytes_per_second": 3.4297479352088149e+12, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3419590093052662e+02, + "cpu_time": 1.8998169999999703e+00, + "time_unit": "ms", + "bytes_per_second": 3.4159436472686904e+12, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.6309831208078140e+02, + "cpu_time": 2.2632049999997683e+00, + "time_unit": "ms", + "bytes_per_second": 3.3048900533942065e+12, + "footprint": 6.0000000000000000e+09 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.6253453299445562e+02, + "cpu_time": 2.3339540000000270e+00, + "time_unit": "ms", + "bytes_per_second": 3.3100295028125010e+12, + "footprint": 6.0000000000000000e+09 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.1424283089181984e+02, + "cpu_time": 5.8677320000000144e+00, + "time_unit": "ms", + "bytes_per_second": 3.2560412583020264e+12, + "footprint": 1.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-60577e0e78d947eea41d9663ea0e3669.json b/bench/devcloud/dr-bench-60577e0e78d947eea41d9663ea0e3669.json new file mode 100644 index 0000000000..f577cadc74 --- /dev/null +++ b/bench/devcloud/dr-bench-60577e0e78d947eea41d9663ea0e3669.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:45:01-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3778, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.76758,2.58887,29.791], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "4", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4087592093710325e+02, + "cpu_time": 1.4087091599999991e+02, + "time_unit": "ms", + "bytes_per_second": 2.8393780664517373e+12, + "footprint": 2.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.9309943610959851e+02, + "cpu_time": 5.9309413599999948e+02, + "time_unit": "ms", + "bytes_per_second": 2.6976926676833613e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.3147923870565719e+03, + "cpu_time": 1.3117010009999995e+03, + "time_unit": "ms", + "bytes_per_second": 6.0846108319121130e+11, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.5449880020658366e+03, + "cpu_time": 2.4650086589999987e+03, + "time_unit": "ms", + "bytes_per_second": 1.4286035644367434e+12, + "flops": 2.6521105775434594e+09, + "footprint": 8.9994528000000000e+09 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.3530595807127114e+02, + "cpu_time": 1.3452668200000062e+02, + "time_unit": "ms", + "bytes_per_second": 2.3650104146296577e+12, + "footprint": 8.0000000000000000e+09 + } + ] +} diff --git a/bench/devcloud/dr-bench-6250900504ea40ecaedc31dd2cb5825f.json b/bench/devcloud/dr-bench-6250900504ea40ecaedc31dd2cb5825f.json new file mode 100644 index 0000000000..c8e63f9b4a --- /dev/null +++ b/bench/devcloud/dr-bench-6250900504ea40ecaedc31dd2cb5825f.json @@ -0,0 +1,133 @@ +{ + "context": { + "date": "2023-12-01T02:41:43-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3590, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.44043,2.79443,36.4048], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "4", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3363383195950294e+02, + "cpu_time": 4.0231070000000813e+00, + "time_unit": "ms", + "bytes_per_second": 3.4241616177346631e+12, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3287898110275810e+02, + "cpu_time": 5.4413149999996691e+00, + "time_unit": "ms", + "bytes_per_second": 3.4352606500240532e+12, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.7361330898735730e+02, + "cpu_time": 6.2352160000003209e+00, + "time_unit": "ms", + "bytes_per_second": 3.2118770159780547e+12, + "footprint": 6.0000000000000000e+09 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.7350632600031844e+02, + "cpu_time": 7.7625230000002432e+00, + "time_unit": "ms", + "bytes_per_second": 3.2127969902147710e+12, + "footprint": 6.0000000000000000e+09 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.6721946799791203e+02, + "cpu_time": 3.6261122000000867e+01, + "time_unit": "ms", + "bytes_per_second": 2.9975144550282500e+12, + "footprint": 1.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-66fbf2f1650d4e3f925b6bf0688b7b84.json b/bench/devcloud/dr-bench-66fbf2f1650d4e3f925b6bf0688b7b84.json new file mode 100644 index 0000000000..dd0e715d99 --- /dev/null +++ b/bench/devcloud/dr-bench-66fbf2f1650d4e3f925b6bf0688b7b84.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:38:53-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3691, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [4.08936,3.49414,43.3667], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info1": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.7125676499661682e+02, + "cpu_time": 2.3728100000000030e+00, + "time_unit": "ms", + "bytes_per_second": 1.6975883624837581e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.7019346299559152e+02, + "cpu_time": 3.0029430000000357e+00, + "time_unit": "ms", + "bytes_per_second": 1.7014273122880500e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.2705783096899427e+02, + "cpu_time": 4.7765499999998795e+00, + "time_unit": "ms", + "bytes_per_second": 1.6504876900929421e+12, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.2930800494220853e+02, + "cpu_time": 4.4668640000000703e+00, + "time_unit": "ms", + "bytes_per_second": 1.6453953499318713e+12, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.3184490349126627e+03, + "cpu_time": 4.6986640000001856e+00, + "time_unit": "ms", + "bytes_per_second": 1.5169338723301389e+12, + "footprint": 2.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-7800cee608ec44d699f9a435db3e65f1.json b/bench/devcloud/dr-bench-7800cee608ec44d699f9a435db3e65f1.json new file mode 100644 index 0000000000..27ef186248 --- /dev/null +++ b/bench/devcloud/dr-bench-7800cee608ec44d699f9a435db3e65f1.json @@ -0,0 +1,69 @@ +{ + "context": { + "date": "2023-12-01T03:11:20-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3782, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [107.614,107.566,80.9629], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stencil2D_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stencil2D_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1855799399601656e+03, + "cpu_time": 3.1803619880000006e+03, + "time_unit": "ms", + "bytes_per_second": 1.0045266671411847e+11, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-7ca861e4d8f5486aa6185066cfd7d4b0.json b/bench/devcloud/dr-bench-7ca861e4d8f5486aa6185066cfd7d4b0.json new file mode 100644 index 0000000000..a6da798e9c --- /dev/null +++ b/bench/devcloud/dr-bench-7ca861e4d8f5486aa6185066cfd7d4b0.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:45:33-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3785, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [3.54297,2.79004,28.9907], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "1" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.5363951008610945e+02, + "cpu_time": 5.5363853499999925e+02, + "time_unit": "ms", + "bytes_per_second": 7.2249178881360291e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3046611520618158e+03, + "cpu_time": 2.3046508220000028e+03, + "time_unit": "ms", + "bytes_per_second": 6.9424522497313501e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.1552449670309843e+03, + "cpu_time": 4.1543743730000033e+03, + "time_unit": "ms", + "bytes_per_second": 1.9252775861530444e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.7966737679239213e+03, + "cpu_time": 8.7705582979999981e+03, + "time_unit": "ms", + "bytes_per_second": 4.1331292112451160e+11, + "flops": 7.6728883872124672e+08, + "footprint": 3.5997811200000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0692690308589169e+02, + "cpu_time": 5.0692577499999913e+02, + "time_unit": "ms", + "bytes_per_second": 6.3125471947141943e+11, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-9736fe26172840069fdc275df840cbf9.json b/bench/devcloud/dr-bench-9736fe26172840069fdc275df840cbf9.json new file mode 100644 index 0000000000..276bbfd4b2 --- /dev/null +++ b/bench/devcloud/dr-bench-9736fe26172840069fdc275df840cbf9.json @@ -0,0 +1,133 @@ +{ + "context": { + "date": "2023-12-01T02:40:34-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3797, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [1.75879,2.82764,39.0518], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.7538095796464046e+02, + "cpu_time": 5.8109689999996661e+00, + "time_unit": "ms", + "bytes_per_second": 1.6828608437014954e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.7353414393531943e+02, + "cpu_time": 5.1237479999990398e+00, + "time_unit": "ms", + "bytes_per_second": 1.6894241106915256e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.3717815506481179e+02, + "cpu_time": 3.7706250000013597e+00, + "time_unit": "ms", + "bytes_per_second": 1.6278290285127854e+12, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.4532339501341619e+02, + "cpu_time": 4.3830029999973874e+00, + "time_unit": "ms", + "bytes_per_second": 1.6100393574501970e+12, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2879471159133802e+03, + "cpu_time": 6.5052462000000588e+01, + "time_unit": "ms", + "bytes_per_second": 1.5528587899990361e+12, + "footprint": 2.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-982c908512be4f42896b0f5b4890b926.json b/bench/devcloud/dr-bench-982c908512be4f42896b0f5b4890b926.json new file mode 100644 index 0000000000..98183fb955 --- /dev/null +++ b/bench/devcloud/dr-bench-982c908512be4f42896b0f5b4890b926.json @@ -0,0 +1,135 @@ +{ + "context": { + "date": "2023-12-01T02:39:04-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3786, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [3.53369,3.39502,42.9067], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info1": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "device_info2": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "3", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1163578898584115e+02, + "cpu_time": 1.7084939999998383e+00, + "time_unit": "ms", + "bytes_per_second": 2.5670992494265386e+12, + "footprint": 5.3333333330000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1133390499648874e+02, + "cpu_time": 1.8784039999997226e+00, + "time_unit": "ms", + "bytes_per_second": 2.5695884295320244e+12, + "footprint": 5.3333333330000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.8830343389668059e+02, + "cpu_time": 5.0915680000001018e+00, + "time_unit": "ms", + "bytes_per_second": 2.4574883498646587e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0738236808511351e+02, + "cpu_time": 4.5421920000001670e+00, + "time_unit": "ms", + "bytes_per_second": 2.3650802146098613e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.4333098311327967e+02, + "cpu_time": 5.5361199999999222e+00, + "time_unit": "ms", + "bytes_per_second": 2.3715481110592041e+12, + "footprint": 1.3333333333000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-9bcab8ff6d954174bfc465788bba33e9.json b/bench/devcloud/dr-bench-9bcab8ff6d954174bfc465788bba33e9.json new file mode 100644 index 0000000000..81ab2c00a6 --- /dev/null +++ b/bench/devcloud/dr-bench-9bcab8ff6d954174bfc465788bba33e9.json @@ -0,0 +1,86 @@ +{ + "context": { + "date": "2023-12-01T02:53:04-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3793, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.9502,3.13232,19.0088], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Gemm_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Gemm_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.7745659003653083e+02, + "cpu_time": 3.7741401400000018e+02, + "time_unit": "ms", + "bytes_per_second": 8.1386842383721189e+09, + "flops": 1.0851578984496158e+13, + "footprint": 3.0720000000000000e+09 + }, + { + "name": "SyclSortFixture/Sort_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "SyclSortFixture/Sort_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.6249401105612128e+02, + "cpu_time": 7.6124128000000019e+02, + "time_unit": "ms", + "bytes_per_second": 1.0491885685658432e+10, + "footprint": 8.0000000000000000e+09 + } + ] +} diff --git a/bench/devcloud/dr-bench-9d00be9dc79741cbbfb02f0aaf274312.json b/bench/devcloud/dr-bench-9d00be9dc79741cbbfb02f0aaf274312.json new file mode 100644 index 0000000000..76e6c57482 --- /dev/null +++ b/bench/devcloud/dr-bench-9d00be9dc79741cbbfb02f0aaf274312.json @@ -0,0 +1,133 @@ +{ + "context": { + "date": "2023-12-01T02:39:21-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3361, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.87598,3.25488,42.0181], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.3012359001425682e+02, + "cpu_time": 4.0234730000001662e+00, + "time_unit": "ms", + "bytes_per_second": 8.6010075283408057e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.3045015894611788e+02, + "cpu_time": 4.0667689999995815e+00, + "time_unit": "ms", + "bytes_per_second": 8.5979887510162463e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4788316269197792e+03, + "cpu_time": 4.1088970000018321e+00, + "time_unit": "ms", + "bytes_per_second": 8.1145140403809827e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.5049269669732180e+03, + "cpu_time": 3.9623049999946147e+00, + "time_unit": "ms", + "bytes_per_second": 7.9738088713600378e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.4765454599224972e+03, + "cpu_time": 4.3175810000022352e+00, + "time_unit": "ms", + "bytes_per_second": 8.0757653447742065e+11, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-BlackScholes-CPU.csv b/bench/devcloud/dr-bench-BlackScholes-CPU.csv new file mode 100644 index 0000000000..b6af18cc77 --- /dev/null +++ b/bench/devcloud/dr-bench-BlackScholes-CPU.csv @@ -0,0 +1,8 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +76,BlackScholes_DR,BlackScholes,MHP_DIRECT_CPU,56,strong,0.16003216156131292,160.03216156131293,MHP,DIRECT,CPU,2000000000,56,56,1.0,12497.487882982461 +96,BlackScholes_DR,BlackScholes,MHP_DIRECT_CPU,112,strong,0.24000247484789544,240.00247484789546,MHP,DIRECT,CPU,2000000000,112,112,2.0,8333.247402000854 +35,BlackScholes_DR,BlackScholes,MHP_SYCL_CPU,1,strong,0.04902669621189425,49.02669621189425,MHP,SYCL,CPU,2000000000,1,56,1.0,40794.101061918685 +66,BlackScholes_DR,BlackScholes,MHP_SYCL_CPU,2,strong,0.09920903596072829,99.20903596072829,MHP,SYCL,CPU,2000000000,2,112,2.0,20159.454031905887 +142,BlackScholes_Reference,BlackScholes,Reference_CPU,1,strong,0.049417443917755106,49.4174439177551,SHP,SYCL,CPU,2000000000,1,56,1.0,40471.53882197099 +84,BlackScholes_DR,BlackScholes,SHP_SYCL_CPU,1,strong,0.049175356109450945,49.175356109450945,SHP,SYCL,CPU,2000000000,1,56,1.0,40670.77817491641 +53,BlackScholes_DR,BlackScholes,SHP_SYCL_CPU,2,strong,0.09928964012805436,99.28964012805436,SHP,SYCL,CPU,2000000000,2,112,2.0,20143.08841708551 diff --git a/bench/devcloud/dr-bench-BlackScholes-CPU.png b/bench/devcloud/dr-bench-BlackScholes-CPU.png new file mode 100644 index 0000000000..900af6113f Binary files /dev/null and b/bench/devcloud/dr-bench-BlackScholes-CPU.png differ diff --git a/bench/devcloud/dr-bench-BlackScholes-GPU.csv b/bench/devcloud/dr-bench-BlackScholes-GPU.csv new file mode 100644 index 0000000000..372960d805 --- /dev/null +++ b/bench/devcloud/dr-bench-BlackScholes-GPU.csv @@ -0,0 +1,10 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +152,BlackScholes_DR,BlackScholes,MHP_SYCL_GPU,1,strong,0.8075765344774206,807.5765344774206,MHP,SYCL,GPU,2000000000,1,56,1.0,2476.5454599224972 +22,BlackScholes_DR,BlackScholes,MHP_SYCL_GPU,2,strong,1.552858789999036,1552.8587899990362,MHP,SYCL,GPU,2000000000,2,112,2.0,1287.9471159133802 +147,BlackScholes_DR,BlackScholes,MHP_SYCL_GPU,3,strong,2.397058117688871,2397.058117688871,MHP,SYCL,GPU,2000000000,3,168,3.0,834.3560739062532 +4,BlackScholes_DR,BlackScholes,MHP_SYCL_GPU,4,strong,2.99751445502825,2997.51445502825,MHP,SYCL,GPU,2000000000,4,224,4.0,667.219467997912 +122,BlackScholes_Reference,BlackScholes,Reference_GPU,1,strong,0.7634275550013694,763.4275550013693,SHP,SYCL,GPU,2000000000,1,56,1.0,2619.7639669901782 +15,BlackScholes_DR,BlackScholes,SHP_SYCL_GPU,1,strong,0.8162227667290685,816.2227667290684,SHP,SYCL,GPU,2000000000,1,56,1.0,2450.3114609444197 +113,BlackScholes_DR,BlackScholes,SHP_SYCL_GPU,2,strong,1.5169338723301389,1516.933872330139,SHP,SYCL,GPU,2000000000,2,112,2.0,1318.4490349126627 +133,BlackScholes_DR,BlackScholes,SHP_SYCL_GPU,3,strong,2.3715481110592043,2371.548111059204,SHP,SYCL,GPU,2000000000,3,168,3.0,843.3309831132797 +58,BlackScholes_DR,BlackScholes,SHP_SYCL_GPU,4,strong,3.256041258302026,3256.0412583020266,SHP,SYCL,GPU,2000000000,4,224,4.0,614.2428308918198 diff --git a/bench/devcloud/dr-bench-BlackScholes-GPU.png b/bench/devcloud/dr-bench-BlackScholes-GPU.png new file mode 100644 index 0000000000..cfd7b973a2 Binary files /dev/null and b/bench/devcloud/dr-bench-BlackScholes-GPU.png differ diff --git a/bench/devcloud/dr-bench-DotProduct-CPU.csv b/bench/devcloud/dr-bench-DotProduct-CPU.csv new file mode 100644 index 0000000000..4214be11a8 --- /dev/null +++ b/bench/devcloud/dr-bench-DotProduct-CPU.csv @@ -0,0 +1,8 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +68,DotProduct_DR,DotProduct,MHP_DIRECT_CPU,56,strong,0.20347932354927906,203.47932354927906,MHP,DIRECT,CPU,2000000000,56,56,1.0,7863.206797090165 +88,DotProduct_DR,DotProduct,MHP_DIRECT_CPU,112,strong,0.3145914293880514,314.5914293880514,MHP,DIRECT,CPU,2000000000,112,112,2.0,5085.96182391983 +29,DotProduct_DR,DotProduct,MHP_SYCL_CPU,1,strong,0.21645868946359753,216.45868946359752,MHP,SYCL,CPU,2000000000,1,56,1.0,7391.710649107836 +60,DotProduct_DR,DotProduct,MHP_SYCL_CPU,2,strong,0.33470211226801505,334.70211226801507,MHP,SYCL,CPU,2000000000,2,112,2.0,4780.370189952039 +140,DotProduct_Reference,DotProduct,Reference_CPU,1,strong,0.2166090407508057,216.6090407508057,SHP,SYCL,CPU,2000000000,1,56,1.0,7386.579962009497 +78,DotProduct_DR,DotProduct,SHP_SYCL_CPU,1,strong,0.21649385071282554,216.49385071282552,SHP,SYCL,CPU,2000000000,1,56,1.0,7390.510144892594 +47,DotProduct_DR,DotProduct,SHP_SYCL_CPU,2,strong,0.3161904058228699,316.1904058228699,SHP,SYCL,CPU,2000000000,2,112,2.0,5060.242090002949 diff --git a/bench/devcloud/dr-bench-DotProduct-CPU.png b/bench/devcloud/dr-bench-DotProduct-CPU.png new file mode 100644 index 0000000000..2e1ead45f5 Binary files /dev/null and b/bench/devcloud/dr-bench-DotProduct-CPU.png differ diff --git a/bench/devcloud/dr-bench-DotProduct-GPU.csv b/bench/devcloud/dr-bench-DotProduct-GPU.csv new file mode 100644 index 0000000000..83f81ec1af --- /dev/null +++ b/bench/devcloud/dr-bench-DotProduct-GPU.csv @@ -0,0 +1,11 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +42,DotProduct_DR,DotProduct,MHP_SYCL_GPU,1,strong,0.6918931814618411,691.8931814618411,MHP,SYCL,GPU,2000000000,1,56,1.0,2312.4956898975342 +115,DotProduct_DR,DotProduct,MHP_SYCL_GPU,1,weak,0.694245224973135,694.245224973135,MHP,SYCL,GPU,2000000000,1,56,1.0,2304.6611520618158 +37,DotProduct_DR,DotProduct,MHP_SYCL_GPU,2,weak,1.3586457430666643,1358.6457430666644,MHP,SYCL,GPU,4000000000,2,112,2.0,2355.28651698207 +100,DotProduct_DR,DotProduct,MHP_SYCL_GPU,2,strong,1.3625164578560522,1362.5164578560523,MHP,SYCL,GPU,2000000000,2,112,2.0,1174.2977420747145 +24,DotProduct_DR,DotProduct,MHP_SYCL_GPU,3,weak,2.033145845496301,2033.145845496301,MHP,SYCL,GPU,6000000000,3,168,3.0,2360.873427074926 +124,DotProduct_DR,DotProduct,MHP_SYCL_GPU,3,strong,2.043166638086142,2043.1666380861418,MHP,SYCL,GPU,2000000000,3,168,3.0,783.0981429389132 +105,DotProduct_DR,DotProduct,MHP_SYCL_GPU,4,strong,2.6976926676833615,2697.6926676833614,MHP,SYCL,GPU,2000000000,4,224,4.0,593.0994361095985 +135,DotProduct_DR,DotProduct,MHP_SYCL_GPU,4,weak,2.706850641449739,2706.8506414497388,MHP,SYCL,GPU,8000000000,4,224,4.0,2364.371311071777 +120,DotProduct_Reference,DotProduct,Reference_GPU,1,strong,0.6937717788106549,693.771778810655,SHP,SYCL,GPU,2000000000,1,56,1.0,2306.2339069814975 +7,DotProduct_DR,DotProduct,SHP_SYCL_GPU,1,strong,0.6943473283595865,694.3473283595864,SHP,SYCL,GPU,2000000000,1,56,1.0,2304.322252927856 diff --git a/bench/devcloud/dr-bench-DotProduct-GPU.png b/bench/devcloud/dr-bench-DotProduct-GPU.png new file mode 100644 index 0000000000..784a2472c1 Binary files /dev/null and b/bench/devcloud/dr-bench-DotProduct-GPU.png differ diff --git a/bench/devcloud/dr-bench-Gemm-GPU.csv b/bench/devcloud/dr-bench-Gemm-GPU.csv new file mode 100644 index 0000000000..665b88adcb --- /dev/null +++ b/bench/devcloud/dr-bench-Gemm-GPU.csv @@ -0,0 +1,3 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +16,Gemm_Reference,Gemm,Reference_GPU,1,strong,0.008138684238372119,8.138684238372118,SHP,SYCL,GPU,2000000000,1,56,1.0,377.4565900365308 +5,Gemm_DR,Gemm,SHP_SYCL_GPU,1,strong,0.007988543681173039,7.988543681173039,SHP,SYCL,GPU,2000000000,1,56,1.0,384.55069191646544 diff --git a/bench/devcloud/dr-bench-Gemm-GPU.png b/bench/devcloud/dr-bench-Gemm-GPU.png new file mode 100644 index 0000000000..035ac9f5f4 Binary files /dev/null and b/bench/devcloud/dr-bench-Gemm-GPU.png differ diff --git a/bench/devcloud/dr-bench-Inclusive_Scan-CPU.csv b/bench/devcloud/dr-bench-Inclusive_Scan-CPU.csv new file mode 100644 index 0000000000..b283668662 --- /dev/null +++ b/bench/devcloud/dr-bench-Inclusive_Scan-CPU.csv @@ -0,0 +1,8 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +69,Inclusive_Scan_DR,Inclusive_Scan,MHP_DIRECT_CPU,56,strong,0.05818059783225716,58.180597832257156,MHP,DIRECT,CPU,2000000000,56,56,1.0,13750.28840897291 +89,Inclusive_Scan_DR,Inclusive_Scan,MHP_DIRECT_CPU,112,strong,0.08911746410425118,89.11746410425118,MHP,DIRECT,CPU,2000000000,112,112,2.0,8976.916118979168 +30,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_CPU,1,strong,0.06267067555967894,62.670675559678934,MHP,SYCL,CPU,2000000000,1,56,1.0,12765.140839086536 +61,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_CPU,2,strong,0.08086283985697457,80.86283985697456,MHP,SYCL,CPU,2000000000,2,112,2.0,9893.295875027305 +141,Inclusive_Scan_Reference,Inclusive_Scan,Reference_CPU,1,strong,0.06877827332531787,68.77827332531787,SHP,SYCL,CPU,2000000000,1,56,1.0,11631.580167999262 +79,Inclusive_Scan_DR,Inclusive_Scan,SHP_SYCL_CPU,1,strong,0.0625427277506161,62.5427277506161,SHP,SYCL,CPU,2000000000,1,56,1.0,12791.255334911728 +48,Inclusive_Scan_DR,Inclusive_Scan,SHP_SYCL_CPU,2,strong,0.0762994656760575,76.2994656760575,SHP,SYCL,CPU,2000000000,2,112,2.0,10485.001341903724 diff --git a/bench/devcloud/dr-bench-Inclusive_Scan-CPU.png b/bench/devcloud/dr-bench-Inclusive_Scan-CPU.png new file mode 100644 index 0000000000..ea7ad8460f Binary files /dev/null and b/bench/devcloud/dr-bench-Inclusive_Scan-CPU.png differ diff --git a/bench/devcloud/dr-bench-Inclusive_Scan-GPU.csv b/bench/devcloud/dr-bench-Inclusive_Scan-GPU.csv new file mode 100644 index 0000000000..211630ca7a --- /dev/null +++ b/bench/devcloud/dr-bench-Inclusive_Scan-GPU.csv @@ -0,0 +1,11 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +43,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,1,strong,0.1922460022707325,192.24600227073248,MHP,SYCL,GPU,2000000000,1,56,1.0,4161.334907101951 +116,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,1,weak,0.19252775861530444,192.52775861530444,MHP,SYCL,GPU,2000000000,1,56,1.0,4155.244967030984 +38,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,2,weak,0.3105489567652584,310.5489567652584,MHP,SYCL,GPU,4000000000,2,112,2.0,5152.166720075083 +101,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,2,strong,0.31033256920851066,310.3325692085107,MHP,SYCL,GPU,2000000000,2,112,2.0,2577.8796020036316 +25,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,3,weak,0.46631081928095,466.31081928094994,MHP,SYCL,GPU,6000000000,3,168,3.0,5146.781718898982 +125,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,3,strong,0.4624187514808675,462.4187514808675,MHP,SYCL,GPU,2000000000,3,168,3.0,1730.0336490206105 +106,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,4,strong,0.6084610831912113,608.4610831912113,MHP,SYCL,GPU,2000000000,4,224,4.0,1314.792387056572 +136,Inclusive_Scan_DR,Inclusive_Scan,MHP_SYCL_GPU,4,weak,0.6214280764511606,621.4280764511607,MHP,SYCL,GPU,8000000000,4,224,4.0,5149.429388955996 +121,Inclusive_Scan_Reference,Inclusive_Scan,Reference_GPU,1,strong,0.19233254523207413,192.33254523207412,SHP,SYCL,GPU,2000000000,1,56,1.0,4159.4624510100275 +8,Inclusive_Scan_DR,Inclusive_Scan,SHP_SYCL_GPU,1,strong,0.1912648973745623,191.26489737456228,SHP,SYCL,GPU,2000000000,1,56,1.0,4182.6807269988785 diff --git a/bench/devcloud/dr-bench-Inclusive_Scan-GPU.png b/bench/devcloud/dr-bench-Inclusive_Scan-GPU.png new file mode 100644 index 0000000000..58c0cc2df0 Binary files /dev/null and b/bench/devcloud/dr-bench-Inclusive_Scan-GPU.png differ diff --git a/bench/devcloud/dr-bench-Reduce-CPU.csv b/bench/devcloud/dr-bench-Reduce-CPU.csv new file mode 100644 index 0000000000..c2eb4498af --- /dev/null +++ b/bench/devcloud/dr-bench-Reduce-CPU.csv @@ -0,0 +1,8 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +67,Reduce_DR,Reduce,MHP_DIRECT_CPU,56,strong,0.19485467391082523,194.85467391082523,MHP,DIRECT,CPU,2000000000,56,56,1.0,2052.811934000921 +87,Reduce_DR,Reduce,MHP_DIRECT_CPU,112,strong,0.31081941470019453,310.8194147001945,MHP,DIRECT,CPU,2000000000,112,112,2.0,1286.9208970933362 +28,Reduce_DR,Reduce,MHP_SYCL_CPU,1,strong,0.21465724602838435,214.65724602838435,MHP,SYCL,CPU,2000000000,1,56,1.0,1863.4358140750003 +59,Reduce_DR,Reduce,MHP_SYCL_CPU,2,strong,0.3283334833848912,328.3334833848912,MHP,SYCL,CPU,2000000000,2,112,2.0,1218.2735549121476 +139,Reduce_Reference,Reduce,Reference_CPU,1,strong,0.21531538679112688,215.31538679112688,SHP,SYCL,CPU,2000000000,1,56,1.0,1857.739969080946 +77,Reduce_DR,Reduce,SHP_SYCL_CPU,1,strong,0.2145132561067587,214.5132561067587,SHP,SYCL,CPU,2000000000,1,56,1.0,1864.6866271095548 +46,Reduce_DR,Reduce,SHP_SYCL_CPU,2,strong,0.26581389070400896,265.81389070400894,SHP,SYCL,CPU,2000000000,2,112,2.0,1504.8122539442868 diff --git a/bench/devcloud/dr-bench-Reduce-CPU.png b/bench/devcloud/dr-bench-Reduce-CPU.png new file mode 100644 index 0000000000..253186ee70 Binary files /dev/null and b/bench/devcloud/dr-bench-Reduce-CPU.png differ diff --git a/bench/devcloud/dr-bench-Reduce-GPU.csv b/bench/devcloud/dr-bench-Reduce-GPU.csv new file mode 100644 index 0000000000..9bed35ebf1 --- /dev/null +++ b/bench/devcloud/dr-bench-Reduce-GPU.csv @@ -0,0 +1,11 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +41,Reduce_DR,Reduce,MHP_SYCL_GPU,1,strong,0.7205108884649971,720.510888464997,MHP,SYCL,GPU,2000000000,1,56,1.0,555.1616310090396 +114,Reduce_DR,Reduce,MHP_SYCL_GPU,1,weak,0.7224917888136029,722.4917888136029,MHP,SYCL,GPU,2000000000,1,56,1.0,553.6395100861095 +36,Reduce_DR,Reduce,MHP_SYCL_GPU,2,weak,1.4271992451952922,1427.1992451952922,MHP,SYCL,GPU,4000000000,2,112,2.0,560.5384130444459 +99,Reduce_DR,Reduce,MHP_SYCL_GPU,2,strong,1.4368887518570235,1436.8887518570234,MHP,SYCL,GPU,2000000000,2,112,2.0,278.37924090020414 +23,Reduce_DR,Reduce,MHP_SYCL_GPU,3,weak,2.136540845137384,2136.540845137384,MHP,SYCL,GPU,6000000000,3,168,3.0,561.6555390135018 +123,Reduce_DR,Reduce,MHP_SYCL_GPU,3,strong,2.141444200110351,2141.444200110351,MHP,SYCL,GPU,2000000000,3,168,3.0,186.78983089047455 +104,Reduce_DR,Reduce,MHP_SYCL_GPU,4,strong,2.8393780664517374,2839.378066451737,MHP,SYCL,GPU,2000000000,4,224,4.0,140.87592093710325 +134,Reduce_DR,Reduce,MHP_SYCL_GPU,4,weak,2.8482162015778263,2848.2162015778263,MHP,SYCL,GPU,8000000000,4,224,4.0,561.755108026437 +119,Reduce_Reference,Reduce,Reference_GPU,1,strong,0.7214776627023984,721.4776627023984,SHP,SYCL,GPU,2000000000,1,56,1.0,554.4177189100247 +6,Reduce_DR,Reduce,SHP_SYCL_GPU,1,strong,0.721602498208321,721.6024982083211,SHP,SYCL,GPU,2000000000,1,56,1.0,554.3218059709698 diff --git a/bench/devcloud/dr-bench-Reduce-GPU.png b/bench/devcloud/dr-bench-Reduce-GPU.png new file mode 100644 index 0000000000..9d1bdc2937 Binary files /dev/null and b/bench/devcloud/dr-bench-Reduce-GPU.png differ diff --git a/bench/devcloud/dr-bench-Sort-GPU.csv b/bench/devcloud/dr-bench-Sort-GPU.csv new file mode 100644 index 0000000000..9b94170e89 --- /dev/null +++ b/bench/devcloud/dr-bench-Sort-GPU.csv @@ -0,0 +1,3 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +17,Sort_Reference,Sort,Reference_GPU,1,strong,0.010491885685658432,10.491885685658431,SHP,SYCL,GPU,2000000000,1,56,1.0,762.4940110561213 +9,Sort_DR,Sort,SHP_SYCL_GPU,1,strong,0.012196333330975877,12.196333330975877,SHP,SYCL,GPU,2000000000,1,56,1.0,655.9348439323024 diff --git a/bench/devcloud/dr-bench-Sort-GPU.png b/bench/devcloud/dr-bench-Sort-GPU.png new file mode 100644 index 0000000000..989aec74e0 Binary files /dev/null and b/bench/devcloud/dr-bench-Sort-GPU.png differ diff --git a/bench/devcloud/dr-bench-Stencil2D-CPU.csv b/bench/devcloud/dr-bench-Stencil2D-CPU.csv new file mode 100644 index 0000000000..e3910455cb --- /dev/null +++ b/bench/devcloud/dr-bench-Stencil2D-CPU.csv @@ -0,0 +1,6 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +75,Stencil2D_DR,Stencil2D,MHP_DIRECT_CPU,56,strong,0.14109269357127238,141.09269357127238,MHP,DIRECT,CPU,2000000000,56,56,1.0,2268.37543390103 +95,Stencil2D_DR,Stencil2D,MHP_DIRECT_CPU,112,strong,0.22374492609400282,223.7449260940028,MHP,DIRECT,CPU,2000000000,112,112,2.0,1430.4288619512008 +86,Stencil2D_DR,Stencil2D,MHP_SYCL_CPU,1,strong,0.0992729177076403,99.27291770764029,MHP,SYCL,CPU,2000000000,1,56,1.0,3223.437039922642 +98,Stencil2D_DR,Stencil2D,MHP_SYCL_CPU,2,strong,0.15525703301915972,155.25703301915973,MHP,SYCL,CPU,2000000000,2,112,2.0,2061.0982560803536 +10,Stencil2D_Reference,Stencil2D,Reference_CPU,1,strong,0.10045266671411847,100.45266671411846,MHP,SYCL,CPU,2000000000,1,56,1.0,3185.5799399601656 diff --git a/bench/devcloud/dr-bench-Stencil2D-CPU.png b/bench/devcloud/dr-bench-Stencil2D-CPU.png new file mode 100644 index 0000000000..d545a9fe4e Binary files /dev/null and b/bench/devcloud/dr-bench-Stencil2D-CPU.png differ diff --git a/bench/devcloud/dr-bench-Stencil2D-GPU.csv b/bench/devcloud/dr-bench-Stencil2D-GPU.csv new file mode 100644 index 0000000000..03d004ec0c --- /dev/null +++ b/bench/devcloud/dr-bench-Stencil2D-GPU.csv @@ -0,0 +1,10 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +45,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,1,strong,0.6310519209104262,631.0519209104261,MHP,SYCL,GPU,2000000000,1,56,1.0,507.0898121002978 +118,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,1,weak,0.6312547194714194,631.2547194714194,MHP,SYCL,GPU,2000000000,1,56,1.0,506.9269030858917 +40,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,2,weak,1.2458396210001859,1245.8396210001858,MHP,SYCL,GPU,4000000000,2,112,2.0,513.7097819109291 +103,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,2,strong,1.2312135761101592,1231.2135761101592,MHP,SYCL,GPU,2000000000,2,112,2.0,259.90616592370077 +27,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,3,weak,1.8682491693881131,1868.249169388113,MHP,SYCL,GPU,6000000000,3,168,3.0,513.8500879486103 +127,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,3,strong,1.8119244307998705,1811.9244307998706,MHP,SYCL,GPU,2000000000,3,168,3.0,176.60868994339677 +108,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,4,strong,2.3650104146296576,2365.0104146296576,MHP,SYCL,GPU,2000000000,4,224,4.0,135.30595807127114 +138,Stencil2D_DR,Stencil2D,MHP_SYCL_GPU,4,weak,2.4896375986245305,2489.6375986245303,MHP,SYCL,GPU,8000000000,4,224,4.0,514.1310529320299 +128,Stencil2D_Reference,Stencil2D,Reference_GPU,1,strong,0.6259902726287199,625.9902726287198,MHP,SYCL,GPU,2000000000,1,56,1.0,511.19005197353073 diff --git a/bench/devcloud/dr-bench-Stencil2D-GPU.png b/bench/devcloud/dr-bench-Stencil2D-GPU.png new file mode 100644 index 0000000000..c36ab9426e Binary files /dev/null and b/bench/devcloud/dr-bench-Stencil2D-GPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Add-CPU.csv b/bench/devcloud/dr-bench-Stream_Add-CPU.csv new file mode 100644 index 0000000000..222b604c05 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Add-CPU.csv @@ -0,0 +1,7 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +72,Stream_Add,Stream_Add,MHP_DIRECT_CPU,56,strong,0.18563602878273983,185.63602878273983,MHP,DIRECT,CPU,2000000000,56,56,1.0,6464.262394906254 +92,Stream_Add,Stream_Add,MHP_DIRECT_CPU,112,strong,0.2842423906434117,284.24239064341174,MHP,DIRECT,CPU,2000000000,112,112,2.0,4221.748899886738 +33,Stream_Add,Stream_Add,MHP_SYCL_CPU,1,strong,0.19229622411853517,192.29622411853515,MHP,SYCL,CPU,2000000000,1,56,1.0,6240.372141994304 +64,Stream_Add,Stream_Add,MHP_SYCL_CPU,2,strong,0.29322029965250074,293.2202996525007,MHP,SYCL,CPU,2000000000,2,112,2.0,4092.4860980707545 +82,Stream_Add,Stream_Add,SHP_SYCL_CPU,1,strong,0.19222142127031222,192.22142127031222,SHP,SYCL,CPU,2000000000,1,56,1.0,6242.800578986952 +51,Stream_Add,Stream_Add,SHP_SYCL_CPU,2,strong,0.2758362375730613,275.8362375730613,SHP,SYCL,CPU,2000000000,2,112,2.0,4350.407366915138 diff --git a/bench/devcloud/dr-bench-Stream_Add-CPU.png b/bench/devcloud/dr-bench-Stream_Add-CPU.png new file mode 100644 index 0000000000..3473fe10b4 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Add-CPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Add-GPU.csv b/bench/devcloud/dr-bench-Stream_Add-GPU.csv new file mode 100644 index 0000000000..bd1c5d1399 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Add-GPU.csv @@ -0,0 +1,9 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +150,Stream_Add,Stream_Add,MHP_SYCL_GPU,1,strong,0.8114514040380982,811.4514040380983,MHP,SYCL,GPU,2000000000,1,56,1.0,1478.8316269197792 +20,Stream_Add,Stream_Add,MHP_SYCL_GPU,2,strong,1.6278290285127854,1627.8290285127855,MHP,SYCL,GPU,2000000000,2,112,2.0,737.1781550648118 +145,Stream_Add,Stream_Add,MHP_SYCL_GPU,3,strong,2.3760460629098152,2376.0460629098156,MHP,SYCL,GPU,2000000000,3,168,3.0,505.0407139541835 +2,Stream_Add,Stream_Add,MHP_SYCL_GPU,4,strong,3.2118770159780548,3211.8770159780547,MHP,SYCL,GPU,2000000000,4,224,4.0,373.6133089873573 +13,Stream_Add,Stream_Add,SHP_SYCL_GPU,1,strong,0.8251717651377696,825.1717651377695,SHP,SYCL,GPU,2000000000,1,56,1.0,1454.2426809764263 +111,Stream_Add,Stream_Add,SHP_SYCL_GPU,2,strong,1.6504876900929422,1650.487690092942,SHP,SYCL,GPU,2000000000,2,112,2.0,727.0578309689943 +131,Stream_Add,Stream_Add,SHP_SYCL_GPU,3,strong,2.4574883498646587,2457.4883498646586,SHP,SYCL,GPU,2000000000,3,168,3.0,488.3034338966806 +56,Stream_Add,Stream_Add,SHP_SYCL_GPU,4,strong,3.3048900533942067,3304.8900533942065,SHP,SYCL,GPU,2000000000,4,224,4.0,363.0983120807814 diff --git a/bench/devcloud/dr-bench-Stream_Add-GPU.png b/bench/devcloud/dr-bench-Stream_Add-GPU.png new file mode 100644 index 0000000000..c1ae6736b4 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Add-GPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Copy-CPU.csv b/bench/devcloud/dr-bench-Stream_Copy-CPU.csv new file mode 100644 index 0000000000..b36c199892 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Copy-CPU.csv @@ -0,0 +1,7 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +70,Stream_Copy,Stream_Copy,MHP_DIRECT_CPU,56,strong,0.18163018767680822,181.63018767680822,MHP,DIRECT,CPU,2000000000,56,56,1.0,4404.554167083258 +90,Stream_Copy,Stream_Copy,MHP_DIRECT_CPU,112,strong,0.27622993638317983,276.2299363831798,MHP,DIRECT,CPU,2000000000,112,112,2.0,2896.1379438985155 +31,Stream_Copy,Stream_Copy,MHP_SYCL_CPU,1,strong,0.1854902896149902,185.49028961499022,MHP,SYCL,CPU,2000000000,1,56,1.0,4312.894231070028 +62,Stream_Copy,Stream_Copy,MHP_SYCL_CPU,2,strong,0.2821687722971565,282.1687722971565,MHP,SYCL,CPU,2000000000,2,112,2.0,2835.182623105816 +80,Stream_Copy,Stream_Copy,SHP_SYCL_CPU,1,strong,0.1854925506369395,185.4925506369395,SHP,SYCL,CPU,2000000000,1,56,1.0,4312.841659964137 +49,Stream_Copy,Stream_Copy,SHP_SYCL_CPU,2,strong,0.25744022831116087,257.44022831116087,SHP,SYCL,CPU,2000000000,2,112,2.0,3107.5174429734507 diff --git a/bench/devcloud/dr-bench-Stream_Copy-CPU.png b/bench/devcloud/dr-bench-Stream_Copy-CPU.png new file mode 100644 index 0000000000..f906af17e2 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Copy-CPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Copy-GPU.csv b/bench/devcloud/dr-bench-Stream_Copy-GPU.csv new file mode 100644 index 0000000000..1b44536d67 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Copy-GPU.csv @@ -0,0 +1,9 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +148,Stream_Copy,Stream_Copy,MHP_SYCL_GPU,1,strong,0.8601007528340806,860.1007528340806,MHP,SYCL,GPU,2000000000,1,56,1.0,930.1235900142568 +18,Stream_Copy,Stream_Copy,MHP_SYCL_GPU,2,strong,1.6828608437014954,1682.8608437014955,MHP,SYCL,GPU,2000000000,2,112,2.0,475.38095796464046 +143,Stream_Copy,Stream_Copy,MHP_SYCL_GPU,3,strong,2.558839252763398,2558.839252763398,MHP,SYCL,GPU,2000000000,3,168,3.0,312.6417570529475 +0,Stream_Copy,Stream_Copy,MHP_SYCL_GPU,4,strong,3.424161617734663,3424.161617734663,MHP,SYCL,GPU,2000000000,4,224,4.0,233.63383195950294 +11,Stream_Copy,Stream_Copy,SHP_SYCL_GPU,1,strong,0.8623715974999698,862.3715974999699,SHP,SYCL,GPU,2000000000,1,56,1.0,927.6743370482212 +109,Stream_Copy,Stream_Copy,SHP_SYCL_GPU,2,strong,1.697588362483758,1697.5883624837581,SHP,SYCL,GPU,2000000000,2,112,2.0,471.2567649966168 +129,Stream_Copy,Stream_Copy,SHP_SYCL_GPU,3,strong,2.5670992494265388,2567.099249426539,SHP,SYCL,GPU,2000000000,3,168,3.0,311.63578898584115 +54,Stream_Copy,Stream_Copy,SHP_SYCL_GPU,4,strong,3.429747935208815,3429.747935208815,SHP,SYCL,GPU,2000000000,4,224,4.0,233.25329298617777 diff --git a/bench/devcloud/dr-bench-Stream_Copy-GPU.png b/bench/devcloud/dr-bench-Stream_Copy-GPU.png new file mode 100644 index 0000000000..f12ddefdc9 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Copy-GPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Scale-CPU.csv b/bench/devcloud/dr-bench-Stream_Scale-CPU.csv new file mode 100644 index 0000000000..b4dfe29e6b --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Scale-CPU.csv @@ -0,0 +1,7 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +71,Stream_Scale,Stream_Scale,MHP_DIRECT_CPU,56,strong,0.17940573511552335,179.40573511552336,MHP,DIRECT,CPU,2000000000,56,56,1.0,4459.166255108077 +91,Stream_Scale,Stream_Scale,MHP_DIRECT_CPU,112,strong,0.2547190017554135,254.71900175541347,MHP,DIRECT,CPU,2000000000,112,112,2.0,3140.715826015119 +32,Stream_Scale,Stream_Scale,MHP_SYCL_CPU,1,strong,0.18490877157134647,184.90877157134648,MHP,SYCL,CPU,2000000000,1,56,1.0,4326.457815936128 +63,Stream_Scale,Stream_Scale,MHP_SYCL_CPU,2,strong,0.28015850121010816,280.15850121010817,MHP,SYCL,CPU,2000000000,2,112,2.0,2855.526412885935 +81,Stream_Scale,Stream_Scale,SHP_SYCL_CPU,1,strong,0.185000970378533,185.000970378533,SHP,SYCL,CPU,2000000000,1,56,1.0,4324.301642110899 +50,Stream_Scale,Stream_Scale,SHP_SYCL_CPU,2,strong,0.25051582069518136,250.51582069518133,SHP,SYCL,CPU,2000000000,2,112,2.0,3193.4110898864596 diff --git a/bench/devcloud/dr-bench-Stream_Scale-CPU.png b/bench/devcloud/dr-bench-Stream_Scale-CPU.png new file mode 100644 index 0000000000..7743c21df9 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Scale-CPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Scale-GPU.csv b/bench/devcloud/dr-bench-Stream_Scale-GPU.csv new file mode 100644 index 0000000000..3edd62cad2 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Scale-GPU.csv @@ -0,0 +1,9 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +149,Stream_Scale,Stream_Scale,MHP_SYCL_GPU,1,strong,0.8597988751016247,859.7988751016246,MHP,SYCL,GPU,2000000000,1,56,1.0,930.4501589461179 +19,Stream_Scale,Stream_Scale,MHP_SYCL_GPU,2,strong,1.6894241106915255,1689.4241106915256,MHP,SYCL,GPU,2000000000,2,112,2.0,473.53414393531943 +144,Stream_Scale,Stream_Scale,MHP_SYCL_GPU,3,strong,2.5582928699235805,2558.2928699235804,MHP,SYCL,GPU,2000000000,3,168,3.0,312.70852895896047 +1,Stream_Scale,Stream_Scale,MHP_SYCL_GPU,4,strong,3.4352606500240532,3435.260650024053,MHP,SYCL,GPU,2000000000,4,224,4.0,232.8789811027581 +12,Stream_Scale,Stream_Scale,SHP_SYCL_GPU,1,strong,0.8559835789465333,855.9835789465333,SHP,SYCL,GPU,2000000000,1,56,1.0,934.5973680763447 +110,Stream_Scale,Stream_Scale,SHP_SYCL_GPU,2,strong,1.70142731228805,1701.42731228805,SHP,SYCL,GPU,2000000000,2,112,2.0,470.1934629955915 +130,Stream_Scale,Stream_Scale,SHP_SYCL_GPU,3,strong,2.5695884295320246,2569.5884295320243,SHP,SYCL,GPU,2000000000,3,168,3.0,311.33390499648874 +55,Stream_Scale,Stream_Scale,SHP_SYCL_GPU,4,strong,3.4159436472686906,3415.9436472686903,SHP,SYCL,GPU,2000000000,4,224,4.0,234.19590093052662 diff --git a/bench/devcloud/dr-bench-Stream_Scale-GPU.png b/bench/devcloud/dr-bench-Stream_Scale-GPU.png new file mode 100644 index 0000000000..e18a08f06d Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Scale-GPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Triad-CPU.csv b/bench/devcloud/dr-bench-Stream_Triad-CPU.csv new file mode 100644 index 0000000000..6279e5dd41 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Triad-CPU.csv @@ -0,0 +1,7 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +73,Stream_Triad,Stream_Triad,MHP_DIRECT_CPU,56,strong,0.1875936402389325,187.5936402389325,MHP,DIRECT,CPU,2000000000,56,56,1.0,6396.805341969991 +93,Stream_Triad,Stream_Triad,MHP_DIRECT_CPU,112,strong,0.27757003651989137,277.57003651989135,MHP,DIRECT,CPU,2000000000,112,112,2.0,4323.233210058698 +34,Stream_Triad,Stream_Triad,MHP_SYCL_CPU,1,strong,0.19230421338885123,192.30421338885122,MHP,SYCL,CPU,2000000000,1,56,1.0,6240.112886000706 +65,Stream_Triad,Stream_Triad,MHP_SYCL_CPU,2,strong,0.2924331110612302,292.43311106123025,MHP,SYCL,CPU,2000000000,2,112,2.0,4103.50249205789 +83,Stream_Triad,Stream_Triad,SHP_SYCL_CPU,1,strong,0.19225543309078358,192.25543309078358,SHP,SYCL,CPU,2000000000,1,56,1.0,6241.696168000394 +52,Stream_Triad,Stream_Triad,SHP_SYCL_CPU,2,strong,0.2800101326353254,280.0101326353254,SHP,SYCL,CPU,2000000000,2,112,2.0,4285.559199969504 diff --git a/bench/devcloud/dr-bench-Stream_Triad-CPU.png b/bench/devcloud/dr-bench-Stream_Triad-CPU.png new file mode 100644 index 0000000000..b8bef57dd2 Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Triad-CPU.png differ diff --git a/bench/devcloud/dr-bench-Stream_Triad-GPU.csv b/bench/devcloud/dr-bench-Stream_Triad-GPU.csv new file mode 100644 index 0000000000..edf79252e5 --- /dev/null +++ b/bench/devcloud/dr-bench-Stream_Triad-GPU.csv @@ -0,0 +1,9 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +151,Stream_Triad,Stream_Triad,MHP_SYCL_GPU,1,strong,0.7973808871360037,797.3808871360038,MHP,SYCL,GPU,2000000000,1,56,1.0,1504.926966973218 +21,Stream_Triad,Stream_Triad,MHP_SYCL_GPU,2,strong,1.610039357450197,1610.039357450197,MHP,SYCL,GPU,2000000000,2,112,2.0,745.3233950134162 +146,Stream_Triad,Stream_Triad,MHP_SYCL_GPU,3,strong,2.3942382892942544,2394.238289294254,MHP,SYCL,GPU,2000000000,3,168,3.0,501.203245042799 +3,Stream_Triad,Stream_Triad,MHP_SYCL_GPU,4,strong,3.212796990214771,3212.796990214771,MHP,SYCL,GPU,2000000000,4,224,4.0,373.50632600031844 +14,Stream_Triad,Stream_Triad,SHP_SYCL_GPU,1,strong,0.8161862987548119,816.1862987548119,SHP,SYCL,GPU,2000000000,1,56,1.0,1470.2525659040602 +112,Stream_Triad,Stream_Triad,SHP_SYCL_GPU,2,strong,1.6453953499318714,1645.3953499318714,SHP,SYCL,GPU,2000000000,2,112,2.0,729.3080049422085 +132,Stream_Triad,Stream_Triad,SHP_SYCL_GPU,3,strong,2.3650802146098613,2365.0802146098613,SHP,SYCL,GPU,2000000000,3,168,3.0,507.3823680851135 +57,Stream_Triad,Stream_Triad,SHP_SYCL_GPU,4,strong,3.310029502812501,3310.029502812501,SHP,SYCL,GPU,2000000000,4,224,4.0,362.5345329944556 diff --git a/bench/devcloud/dr-bench-Stream_Triad-GPU.png b/bench/devcloud/dr-bench-Stream_Triad-GPU.png new file mode 100644 index 0000000000..60dbf0518c Binary files /dev/null and b/bench/devcloud/dr-bench-Stream_Triad-GPU.png differ diff --git a/bench/devcloud/dr-bench-WaveEquation-CPU.csv b/bench/devcloud/dr-bench-WaveEquation-CPU.csv new file mode 100644 index 0000000000..72747489a3 --- /dev/null +++ b/bench/devcloud/dr-bench-WaveEquation-CPU.csv @@ -0,0 +1,5 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +74,WaveEquation_DR,WaveEquation,MHP_DIRECT_CPU,56,strong,0.1844434591303605,184.4434591303605,MHP,DIRECT,CPU,2000000000,56,56,1.0,19712.159749890143 +94,WaveEquation_DR,WaveEquation,MHP_DIRECT_CPU,112,strong,0.27535465652306274,275.3546565230628,MHP,DIRECT,CPU,2000000000,112,112,2.0,13203.985642042264 +85,WaveEquation_DR,WaveEquation,MHP_SYCL_CPU,1,strong,0.1283668232415776,128.3668232415776,MHP,SYCL,CPU,2000000000,1,56,1.0,28323.353646897627 +97,WaveEquation_DR,WaveEquation,MHP_SYCL_CPU,2,strong,0.20066893566766425,200.66893566766424,MHP,SYCL,CPU,2000000000,2,112,2.0,18118.294787895607 diff --git a/bench/devcloud/dr-bench-WaveEquation-CPU.png b/bench/devcloud/dr-bench-WaveEquation-CPU.png new file mode 100644 index 0000000000..62174387fb Binary files /dev/null and b/bench/devcloud/dr-bench-WaveEquation-CPU.png differ diff --git a/bench/devcloud/dr-bench-WaveEquation-GPU.csv b/bench/devcloud/dr-bench-WaveEquation-GPU.csv new file mode 100644 index 0000000000..f2a7ad870e --- /dev/null +++ b/bench/devcloud/dr-bench-WaveEquation-GPU.csv @@ -0,0 +1,9 @@ +,bench_name,Benchmark,Target,Ranks,Scaling,Bandwidth (TB/s),Bandwidth (GB/s),model,runtime,device,vsize,Number of GPU Tiles,Number of CPU Cores,Number of CPU Sockets,rtime +44,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,1,strong,0.4124941181640197,412.4941181640197,MHP,SYCL,GPU,2000000000,1,56,1.0,8814.135210903318 +117,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,1,weak,0.4133129211245116,413.3129211245116,MHP,SYCL,GPU,2000000000,1,56,1.0,8796.673767923921 +39,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,2,weak,0.7782218555805628,778.2218555805628,MHP,SYCL,GPU,4000000000,2,112,2.0,9343.920086931083 +102,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,2,strong,0.7958779822243652,795.8779822243653,MHP,SYCL,GPU,2000000000,2,112,2.0,4568.261734089587 +26,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,3,weak,1.1814815513449133,1181.4815513449134,MHP,SYCL,GPU,6000000000,3,168,3.0,9231.601836890552 +126,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,3,strong,1.1174493400730945,1117.4493400730944,MHP,SYCL,GPU,2000000000,3,168,3.0,3253.6409489151224 +107,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,4,strong,1.4286035644367434,1428.6035644367435,MHP,SYCL,GPU,2000000000,4,224,4.0,2544.9880020658366 +137,WaveEquation_DR,WaveEquation,MHP_SYCL_GPU,4,weak,1.5772728695309468,1577.2728695309468,MHP,SYCL,GPU,8000000000,4,224,4.0,9220.41835990298 diff --git a/bench/devcloud/dr-bench-WaveEquation-GPU.png b/bench/devcloud/dr-bench-WaveEquation-GPU.png new file mode 100644 index 0000000000..27e8ef6e44 Binary files /dev/null and b/bench/devcloud/dr-bench-WaveEquation-GPU.png differ diff --git a/bench/devcloud/dr-bench-a30b94d525ae4cee8aedceedddca35ce.json b/bench/devcloud/dr-bench-a30b94d525ae4cee8aedceedddca35ce.json new file mode 100644 index 0000000000..ae82d91e39 --- /dev/null +++ b/bench/devcloud/dr-bench-a30b94d525ae4cee8aedceedddca35ce.json @@ -0,0 +1,181 @@ +{ + "context": { + "date": "2023-12-01T02:54:28-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3798, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [1.45947,2.59961,17.4307], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.8634358140750003e+03, + "cpu_time": 1.8550503020000003e+03, + "time_unit": "ms", + "bytes_per_second": 2.1465724602838434e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 7.3917106491078357e+03, + "cpu_time": 7.3764912130000012e+03, + "time_unit": "ms", + "bytes_per_second": 2.1645868946359753e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2765140839086536e+04, + "cpu_time": 1.2740335561000002e+04, + "time_unit": "ms", + "bytes_per_second": 6.2670675559678932e+10, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3128942310700277e+03, + "cpu_time": 6.9288480000011532e+00, + "time_unit": "ms", + "bytes_per_second": 1.8549028961499020e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3264578159361281e+03, + "cpu_time": 6.8725500000113016e+00, + "time_unit": "ms", + "bytes_per_second": 1.8490877157134647e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.2403721419943040e+03, + "cpu_time": 7.4926879999992479e+00, + "time_unit": "ms", + "bytes_per_second": 1.9229622411853516e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.2401128860007057e+03, + "cpu_time": 6.5171560000010231e+00, + "time_unit": "ms", + "bytes_per_second": 1.9230421338885123e+11, + "footprint": 2.4000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0794101061918685e+04, + "cpu_time": 1.1052277999993976e+01, + "time_unit": "ms", + "bytes_per_second": 4.9026696211894249e+10, + "footprint": 4.0000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-a3f4821ec91c4d4c8ae2603dd15b4f11.json b/bench/devcloud/dr-bench-a3f4821ec91c4d4c8ae2603dd15b4f11.json new file mode 100644 index 0000000000..99a9993d38 --- /dev/null +++ b/bench/devcloud/dr-bench-a3f4821ec91c4d4c8ae2603dd15b4f11.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:47:00-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3745, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [1.87012,2.40039,26.4268], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "4000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "1" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.6053841304444586e+02, + "cpu_time": 5.6053320200000019e+02, + "time_unit": "ms", + "bytes_per_second": 1.4271992451952922e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3552865169820702e+03, + "cpu_time": 2.3551524550000026e+03, + "time_unit": "ms", + "bytes_per_second": 1.3586457430666643e+12, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1521667200750826e+03, + "cpu_time": 5.1448086500000018e+03, + "time_unit": "ms", + "bytes_per_second": 3.1054895676525842e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.3439200869310826e+03, + "cpu_time": 9.2250372820000030e+03, + "time_unit": "ms", + "bytes_per_second": 7.7822185558056274e+11, + "flops": 1.4447187913005497e+09, + "footprint": 3.5998231824000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1370978191092911e+02, + "cpu_time": 5.1277019999999141e+02, + "time_unit": "ms", + "bytes_per_second": 1.2458396210001858e+12, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-cfc91caadee849f29be3cb9102189519.json b/bench/devcloud/dr-bench-cfc91caadee849f29be3cb9102189519.json new file mode 100644 index 0000000000..68923aa04c --- /dev/null +++ b/bench/devcloud/dr-bench-cfc91caadee849f29be3cb9102189519.json @@ -0,0 +1,69 @@ +{ + "context": { + "date": "2023-12-01T02:53:02-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3789, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.9502,3.13232,19.0088], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stencil2D_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stencil2D_Reference/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.1119005197353073e+02, + "cpu_time": 5.1118343600000003e+02, + "time_unit": "ms", + "bytes_per_second": 6.2599027262871985e+11, + "footprint": 3.2000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-d0c2ccea5b0c459ebde79e341a1d68d8.json b/bench/devcloud/dr-bench-d0c2ccea5b0c459ebde79e341a1d68d8.json new file mode 100644 index 0000000000..2e5ef863f1 --- /dev/null +++ b/bench/devcloud/dr-bench-d0c2ccea5b0c459ebde79e341a1d68d8.json @@ -0,0 +1,213 @@ +{ + "context": { + "date": "2023-12-01T03:14:46-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3000, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [66.4565,83.0801,76.3306], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "112", + "runtime": "DIRECT", + "target": "MHP_DIRECT_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2869208970933362e+03, + "cpu_time": 1.2868947490000000e+03, + "time_unit": "ms", + "bytes_per_second": 3.1081941470019452e+11, + "footprint": 7.1428571000000000e+07 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0859618239198298e+03, + "cpu_time": 5.0858296690000025e+03, + "time_unit": "ms", + "bytes_per_second": 3.1459142938805139e+11, + "footprint": 2.8571428500000000e+08 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.9769161189791685e+03, + "cpu_time": 8.9727574059999988e+03, + "time_unit": "ms", + "bytes_per_second": 8.9117464104251190e+10, + "footprint": 1.4285714200000000e+08 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.8961379438985155e+03, + "cpu_time": 2.8959479829999991e+03, + "time_unit": "ms", + "bytes_per_second": 2.7622993638317981e+11, + "footprint": 1.4285714200000000e+08 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1407158260151191e+03, + "cpu_time": 3.1388039919999997e+03, + "time_unit": "ms", + "bytes_per_second": 2.5471900175541348e+11, + "footprint": 1.4285714200000000e+08 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.2217488998867384e+03, + "cpu_time": 4.2216444820000024e+03, + "time_unit": "ms", + "bytes_per_second": 2.8424239064341174e+11, + "footprint": 2.1428571400000000e+08 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.3232332100586982e+03, + "cpu_time": 4.3231834630000067e+03, + "time_unit": "ms", + "bytes_per_second": 2.7757003651989136e+11, + "footprint": 2.1428571400000000e+08 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.3203985642042264e+04, + "cpu_time": 1.3203679198999993e+04, + "time_unit": "ms", + "bytes_per_second": 2.7535465652306274e+11, + "flops": 5.1117819899083436e+08, + "footprint": 3.2140902800000000e+08 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.4304288619512008e+03, + "cpu_time": 1.4279558479999964e+03, + "time_unit": "ms", + "bytes_per_second": 2.2374492609400281e+11, + "footprint": 2.8576000000000000e+08 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.3332474020008540e+03, + "cpu_time": 8.3297003160000004e+03, + "time_unit": "ms", + "bytes_per_second": 2.4000247484789545e+11, + "footprint": 3.5714285700000000e+08 + } + ] +} diff --git a/bench/devcloud/dr-bench-dc6b76bd54834580a8fccdc353f49de1.json b/bench/devcloud/dr-bench-dc6b76bd54834580a8fccdc353f49de1.json new file mode 100644 index 0000000000..57da20cf8b --- /dev/null +++ b/bench/devcloud/dr-bench-dc6b76bd54834580a8fccdc353f49de1.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:53:39-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "shp/shp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3798, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.08643,2.89502,18.3413], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info0": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "SHP", + "numactl": "", + "rank": "0", + "ranks": "1", + "runtime": "SYCL", + "target": "SHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Gemm_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Gemm_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.8455069191646544e+02, + "cpu_time": 6.0797000000001322e-02, + "time_unit": "ms", + "bytes_per_second": 7.9885436811730385e+09, + "flops": 1.0651391574897385e+13, + "footprint": 3.0720000000000000e+09 + }, + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.5432180597096976e+02, + "cpu_time": 5.5375251300000002e+02, + "time_unit": "ms", + "bytes_per_second": 7.2160249820832104e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.3043222529278560e+03, + "cpu_time": 2.3034746080000000e+03, + "time_unit": "ms", + "bytes_per_second": 6.9434732835958643e+11, + "footprint": 3.2000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.1826807269988785e+03, + "cpu_time": 4.1751754369999999e+03, + "time_unit": "ms", + "bytes_per_second": 1.9126489737456229e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "DRSortFixture/Sort_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "DRSortFixture/Sort_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 6.5593484393230244e+02, + "cpu_time": 6.5548174799999265e+02, + "time_unit": "ms", + "bytes_per_second": 1.2196333330975878e+10, + "footprint": 8.0000000000000000e+09 + } + ] +} diff --git a/bench/devcloud/dr-bench-ee56a4c8add24f4b9b6777eb1a3db39b.json b/bench/devcloud/dr-bench-ee56a4c8add24f4b9b6777eb1a3db39b.json new file mode 100644 index 0000000000..788af94380 --- /dev/null +++ b/bench/devcloud/dr-bench-ee56a4c8add24f4b9b6777eb1a3db39b.json @@ -0,0 +1,134 @@ +{ + "context": { + "date": "2023-12-01T02:43:32-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3797, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [1.56152,2.44971,32.5269], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.7837924090020414e+02, + "cpu_time": 2.7828499700000009e+02, + "time_unit": "ms", + "bytes_per_second": 1.4368887518570234e+12, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.1742977420747145e+03, + "cpu_time": 1.1742842789999984e+03, + "time_unit": "ms", + "bytes_per_second": 1.3625164578560522e+12, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.5778796020036316e+03, + "cpu_time": 2.5765050490000012e+03, + "time_unit": "ms", + "bytes_per_second": 3.1033256920851068e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "WaveEquation_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.5682617340895868e+03, + "cpu_time": 4.5051493720000053e+03, + "time_unit": "ms", + "bytes_per_second": 7.9587798222436523e+11, + "flops": 1.4774962541293907e+09, + "footprint": 1.7998905600000000e+10 + }, + { + "name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stencil2D_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.5990616592370077e+02, + "cpu_time": 2.5885902199999578e+02, + "time_unit": "ms", + "bytes_per_second": 1.2312135761101592e+12, + "footprint": 1.6000000000000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-f4eaa5a34ce94d35ab3bd5bdcf44ed6b.json b/bench/devcloud/dr-bench-f4eaa5a34ce94d35ab3bd5bdcf44ed6b.json new file mode 100644 index 0000000000..c20f1feabd --- /dev/null +++ b/bench/devcloud/dr-bench-f4eaa5a34ce94d35ab3bd5bdcf44ed6b.json @@ -0,0 +1,133 @@ +{ + "context": { + "date": "2023-12-01T02:41:14-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3793, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [2.07471,2.771,37.5034], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "GPU", + "device_info": "Intel(R) Data Center GPU Max 1100, max_compute_units: 448", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "3", + "runtime": "SYCL", + "target": "MHP_SYCL_GPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1264175705294753e+02, + "cpu_time": 3.9651670000000472e+00, + "time_unit": "ms", + "bytes_per_second": 2.5588392527633979e+12, + "footprint": 5.3333333330000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 3.1270852895896047e+02, + "cpu_time": 4.4775960000000836e+00, + "time_unit": "ms", + "bytes_per_second": 2.5582928699235806e+12, + "footprint": 5.3333333330000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0504071395418350e+02, + "cpu_time": 1.0612863000000416e+01, + "time_unit": "ms", + "bytes_per_second": 2.3760460629098154e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.0120324504279898e+02, + "cpu_time": 6.1936550000005752e+00, + "time_unit": "ms", + "bytes_per_second": 2.3942382892942544e+12, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 8.3435607390625319e+02, + "cpu_time": 4.6766060000003051e+00, + "time_unit": "ms", + "bytes_per_second": 2.3970581176888711e+12, + "footprint": 1.3333333333000000e+10 + } + ] +} diff --git a/bench/devcloud/dr-bench-fd64e9be192a4f1687d8136fe6bc4640.json b/bench/devcloud/dr-bench-fd64e9be192a4f1687d8136fe6bc4640.json new file mode 100644 index 0000000000..f5f3ce2cb1 --- /dev/null +++ b/bench/devcloud/dr-bench-fd64e9be192a4f1687d8136fe6bc4640.json @@ -0,0 +1,181 @@ +{ + "context": { + "date": "2023-12-01T02:58:00-08:00", + "host_name": "idc-beta-batch-pvc-node-13", + "executable": "mhp/mhp-bench", + "num_cpus": 224, + "mhz_per_cpu": 3800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 110100480, + "num_sharing": 112 + } + ], + "load_avg": [92.4893,49.0439,32.8345], + "library_build_type": "debug", + "default_repetitions": "50", + "default_vector_size": "2000000000", + "device": "CPU", + "device_info": "Intel(R) Xeon(R) Platinum 8480+, max_compute_units: 112", + "hostname": "idc-beta-batch-pvc-node-13\n", + "lscpu": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 52 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 224\nOn-line CPU(s) list: 0-223\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8480+\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 56\nSocket(s): 2\nStepping: 8\nCPU max MHz: 3800.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hwp hwp_act_window hwp_epp hwp_pkg_req avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 5.3 MiB (112 instances)\nL1i cache: 3.5 MiB (112 instances)\nL2 cache: 224 MiB (112 instances)\nL3 cache: 210 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-55,112-167\nNUMA node1 CPU(s): 56-111,168-223\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n", + "model": "MHP", + "numactl": "", + "rank": "0", + "ranks": "2", + "runtime": "SYCL", + "target": "MHP_SYCL_CPU", + "weak-scaling": "0" + }, + "benchmarks": [ + { + "name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "Reduce_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 1.2182735549121476e+03, + "cpu_time": 1.2036727840000001e+03, + "time_unit": "ms", + "bytes_per_second": 3.2833348338489124e+11, + "footprint": 4.0000000000000000e+09 + }, + { + "name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "DotProduct_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.7803701899520393e+03, + "cpu_time": 4.7243844870000021e+03, + "time_unit": "ms", + "bytes_per_second": 3.3470211226801508e+11, + "footprint": 1.6000000000000000e+10 + }, + { + "name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "Inclusive_Scan_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 9.8932958750273046e+03, + "cpu_time": 9.8681771910000007e+03, + "time_unit": "ms", + "bytes_per_second": 8.0862839856974564e+10, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "Stream_Copy/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.8351826231058162e+03, + "cpu_time": 6.6812887300000057e+02, + "time_unit": "ms", + "bytes_per_second": 2.8216877229715649e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "Stream_Scale/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.8555264128859349e+03, + "cpu_time": 6.7897184599999605e+02, + "time_unit": "ms", + "bytes_per_second": 2.8015850121010815e+11, + "footprint": 8.0000000000000000e+09 + }, + { + "name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "Stream_Add/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.0924860980707545e+03, + "cpu_time": 9.6872324499999968e+02, + "time_unit": "ms", + "bytes_per_second": 2.9322029965250073e+11, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "Stream_Triad/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 4.1035024920578899e+03, + "cpu_time": 9.4834765600000287e+02, + "time_unit": "ms", + "bytes_per_second": 2.9243311106123022e+11, + "footprint": 1.2000000000000000e+10 + }, + { + "name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "BlackScholes_DR/min_time:0.100/min_warmup_time:0.100/real_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 2.0159454031905887e+04, + "cpu_time": 1.5916687900000426e+02, + "time_unit": "ms", + "bytes_per_second": 9.9209035960728287e+10, + "footprint": 2.0000000000000000e+10 + } + ] +} diff --git a/doxygen/algorithms_8hpp_source.html b/doxygen/algorithms_8hpp_source.html new file mode 100644 index 0000000000..67e23517ee --- /dev/null +++ b/doxygen/algorithms_8hpp_source.html @@ -0,0 +1,103 @@ + + +
+ + + + +
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::direct_iterator< Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = std::iter_value_t< Iter > |
+using | difference_type = std::iter_difference_t< Iter > |
+using | reference = std::iter_reference_t< Iter > |
+using | iterator = direct_iterator< Iter > |
+using | pointer = iterator |
+using | iterator_category = std::random_access_iterator_tag |
+using | is_passed_directly = ::std::true_type |
+Public Member Functions | |
+ | direct_iterator (Iter iter) noexcept |
+ | direct_iterator (const direct_iterator &) noexcept=default |
+direct_iterator & | operator= (const direct_iterator &) noexcept=default |
+bool | operator== (const direct_iterator &) const noexcept=default |
+bool | operator!= (const direct_iterator &) const noexcept=default |
+iterator | operator+ (difference_type offset) const noexcept |
+iterator | operator- (difference_type offset) const noexcept |
+difference_type | operator- (iterator other) const noexcept |
+bool | operator< (iterator other) const noexcept |
+bool | operator> (iterator other) const noexcept |
+bool | operator<= (iterator other) const noexcept |
+bool | operator>= (iterator other) const noexcept |
+iterator & | operator++ () noexcept |
+iterator | operator++ (int) noexcept |
+iterator & | operator-- () noexcept |
+iterator | operator-- (int) noexcept |
+iterator & | operator+= (difference_type offset) noexcept |
+iterator & | operator-= (difference_type offset) noexcept |
+reference | operator* () const noexcept |
+reference | operator[] (difference_type offset) const noexcept |
+Iter | base () const noexcept |
+Friends | |
+iterator | operator+ (difference_type n, iterator iter) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::enumerate_adapter_closure, including all inherited members.
+operator()(R &&r) const (defined in dr::__detail::enumerate_adapter_closure) | dr::__detail::enumerate_adapter_closure | inline |
operator| (defined in dr::__detail::enumerate_adapter_closure) | dr::__detail::enumerate_adapter_closure | friend |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const enumerate_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::enumerate_fn_, including all inherited members.
+enumerate() const (defined in dr::__detail::enumerate_fn_) | dr::__detail::enumerate_fn_ | inline |
operator()(R &&r) const (defined in dr::__detail::enumerate_fn_) | dr::__detail::enumerate_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> | |
constexpr auto | operator() (R &&r) const |
+auto | enumerate () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::event, including all inherited members.
+wait() (defined in dr::__detail::event) | dr::__detail::event | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+void | wait () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::mdspan_iter_accessor< Iter >, including all inherited members.
+access(Iter iter, std::size_t index) const (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > | inline |
data_handle_type typedef (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > | |
mdspan_iter_accessor() noexcept=default (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > | |
offset(Iter iter, std::size_t index) const noexcept (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > | inline |
offset_policy typedef (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > | |
reference typedef (defined in dr::__detail::mdspan_iter_accessor< Iter >) | dr::__detail::mdspan_iter_accessor< Iter > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | data_handle_type = Iter |
+using | reference = std::iter_reference_t< Iter > |
+using | offset_policy = mdspan_iter_accessor |
+Public Member Functions | |
+constexpr auto | access (Iter iter, std::size_t index) const |
+constexpr auto | offset (Iter iter, std::size_t index) const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::mdtranspose< Mdspan, Is >, including all inherited members.
+extent(std::size_t d) const (defined in dr::__detail::mdtranspose< Mdspan, Is >) | dr::__detail::mdtranspose< Mdspan, Is > | inline |
extents() const (defined in dr::__detail::mdtranspose< Mdspan, Is >) | dr::__detail::mdtranspose< Mdspan, Is > | inline |
mdtranspose(Mdspan &mdspan) (defined in dr::__detail::mdtranspose< Mdspan, Is >) | dr::__detail::mdtranspose< Mdspan, Is > | inline |
operator()(Indexes... indexes) const (defined in dr::__detail::mdtranspose< Mdspan, Is >) | dr::__detail::mdtranspose< Mdspan, Is > | inline |
operator()(std::array< std::size_t, rank_ > index) const (defined in dr::__detail::mdtranspose< Mdspan, Is >) | dr::__detail::mdtranspose< Mdspan, Is > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | mdtranspose (Mdspan &mdspan) |
+template<std::integral... Indexes> | |
auto & | operator() (Indexes... indexes) const |
+auto & | operator() (std::array< std::size_t, rank_ > index) const |
+auto | extents () const |
+auto | extent (std::size_t d) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::__detail::owning_view< R >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | owning_view (R &&range) |
+ | owning_view (owning_view &&other)=default |
+ | owning_view (const owning_view &other)=default |
+owning_view & | operator= (owning_view &&other)=default |
+owning_view & | operator= (const owning_view &other)=default |
+auto | size () const |
+auto | empty () const |
+auto | begin () |
+auto | begin () const |
+auto | end () |
+auto | end () const |
+decltype(auto) | base () |
+decltype(auto) | base () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::communicator, including all inherited members.
+all_gather(const T *src, T *dst, std::size_t count) const (defined in dr::communicator) | dr::communicator | inline |
all_gather(const T &src, std::vector< T > &dst) const (defined in dr::communicator) | dr::communicator | inline |
all_gather(const R &src, R &dst) const (defined in dr::communicator) | dr::communicator | inline |
alltoall(const R &sendr, R &recvr, std::size_t count) (defined in dr::communicator) | dr::communicator | inline |
alltoall(const T *send, T *receive, std::size_t count) (defined in dr::communicator) | dr::communicator | inline |
alltoallv(const SendR &sendbuf, const std::vector< std::size_t > &sendcnt, const std::vector< std::size_t > &senddsp, RecvR &recvbuf, const std::vector< std::size_t > &recvcnt, const std::vector< std::size_t > &recvdsp) (defined in dr::communicator) | dr::communicator | inline |
barrier() const (defined in dr::communicator) | dr::communicator | inline |
bcast(void *src, std::size_t count, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
communicator(MPI_Comm comm=MPI_COMM_WORLD) (defined in dr::communicator) | dr::communicator | inline |
first() const (defined in dr::communicator) | dr::communicator | inline |
gather(const void *src, void *dst, std::size_t count, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
gather(const T &src, std::span< T > dst, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
gatherv(const void *src, int *counts, int *offsets, void *dst, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
i_all_gather(const T *src, T *dst, std::size_t count, MPI_Request *req) const (defined in dr::communicator) | dr::communicator | inline |
i_all_gather(const T &src, std::vector< T > &dst, MPI_Request *req) const (defined in dr::communicator) | dr::communicator | inline |
irecv(T *data, std::size_t size, std::size_t src_rank, auto tag, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
irecv(T *data, std::size_t size, std::size_t src_rank, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
irecv(R &data, std::size_t src_rank, int tag, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
irecv(R &data, std::size_t src_rank, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
isend(const T *data, std::size_t count, std::size_t dst_rank, auto tag, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
isend(const T *data, std::size_t count, std::size_t dst_rank, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
isend(const R &data, std::size_t dst_rank, auto tag, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
isend(const R &data, std::size_t dst_rank, MPI_Request *request) const (defined in dr::communicator) | dr::communicator | inline |
last() const (defined in dr::communicator) | dr::communicator | inline |
mpi_comm() const (defined in dr::communicator) | dr::communicator | inline |
next() const (defined in dr::communicator) | dr::communicator | inline |
operator==(const communicator &other) const (defined in dr::communicator) | dr::communicator | inline |
prev() const (defined in dr::communicator) | dr::communicator | inline |
rank() const (defined in dr::communicator) | dr::communicator | inline |
scatter(const void *src, void *dst, std::size_t count, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
scatter(const std::span< T > src, T &dst, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
scatterv(const void *src, int *counts, int *offsets, void *dst, int dst_count, std::size_t root) const (defined in dr::communicator) | dr::communicator | inline |
size() const (defined in dr::communicator) | dr::communicator | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | communicator (MPI_Comm comm=MPI_COMM_WORLD) |
+auto | size () const |
+auto | rank () const |
+auto | prev () const |
+auto | next () const |
+auto | first () const |
+auto | last () const |
+MPI_Comm | mpi_comm () const |
+void | barrier () const |
+void | bcast (void *src, std::size_t count, std::size_t root) const |
+void | scatter (const void *src, void *dst, std::size_t count, std::size_t root) const |
+template<typename T > | |
void | scatter (const std::span< T > src, T &dst, std::size_t root) const |
+void | scatterv (const void *src, int *counts, int *offsets, void *dst, int dst_count, std::size_t root) const |
+void | gather (const void *src, void *dst, std::size_t count, std::size_t root) const |
+template<typename T > | |
void | gather (const T &src, std::span< T > dst, std::size_t root) const |
+template<typename T > | |
void | all_gather (const T *src, T *dst, std::size_t count) const |
+template<typename T > | |
void | all_gather (const T &src, std::vector< T > &dst) const |
+template<rng::contiguous_range R> | |
void | all_gather (const R &src, R &dst) const |
+template<typename T > | |
void | i_all_gather (const T *src, T *dst, std::size_t count, MPI_Request *req) const |
+template<typename T > | |
void | i_all_gather (const T &src, std::vector< T > &dst, MPI_Request *req) const |
+void | gatherv (const void *src, int *counts, int *offsets, void *dst, std::size_t root) const |
+template<typename T > | |
void | isend (const T *data, std::size_t count, std::size_t dst_rank, auto tag, MPI_Request *request) const |
+template<typename T > | |
void | isend (const T *data, std::size_t count, std::size_t dst_rank, MPI_Request *request) const |
+template<rng::contiguous_range R> | |
void | isend (const R &data, std::size_t dst_rank, auto tag, MPI_Request *request) const |
+template<rng::contiguous_range R> | |
void | isend (const R &data, std::size_t dst_rank, MPI_Request *request) const |
+template<typename T > | |
void | irecv (T *data, std::size_t size, std::size_t src_rank, auto tag, MPI_Request *request) const |
+template<typename T > | |
void | irecv (T *data, std::size_t size, std::size_t src_rank, MPI_Request *request) const |
+template<rng::contiguous_range R> | |
void | irecv (R &data, std::size_t src_rank, int tag, MPI_Request *request) const |
+template<rng::contiguous_range R> | |
void | irecv (R &data, std::size_t src_rank, MPI_Request *request) const |
+template<rng::contiguous_range R> | |
void | alltoall (const R &sendr, R &recvr, std::size_t count) |
+template<typename T > | |
void | alltoall (const T *send, T *receive, std::size_t count) |
+template<rng::contiguous_range SendR, rng::contiguous_range RecvR> | |
void | alltoallv (const SendR &sendbuf, const std::vector< std::size_t > &sendcnt, const std::vector< std::size_t > &senddsp, RecvR &recvbuf, const std::vector< std::size_t > &recvcnt, const std::vector< std::size_t > &recvdsp) |
+bool | operator== (const communicator &other) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::index< T >, including all inherited members.
+first (defined in dr::index< T >) | dr::index< T > | |
first_type typedef (defined in dr::index< T >) | dr::index< T > | |
get() const noexcept (defined in dr::index< T >) | dr::index< T > | inline |
index(index_type first, index_type second) (defined in dr::index< T >) | dr::index< T > | inline |
index(Tuple tuple) (defined in dr::index< T >) | dr::index< T > | inline |
index(std::initializer_list< U > tuple) (defined in dr::index< T >) | dr::index< T > | inline |
index()=default (defined in dr::index< T >) | dr::index< T > | |
index(const index &)=default (defined in dr::index< T >) | dr::index< T > | |
index(index &&)=default (defined in dr::index< T >) | dr::index< T > | |
index_type typedef (defined in dr::index< T >) | dr::index< T > | |
operator index< U >() const noexcept (defined in dr::index< T >) | dr::index< T > | inline |
operator index< U >() const noexcept (defined in dr::index< T >) | dr::index< T > | inlineexplicit |
operator<(const index &other) const noexcept (defined in dr::index< T >) | dr::index< T > | inline |
operator=(const index &)=default (defined in dr::index< T >) | dr::index< T > | |
operator=(index &&)=default (defined in dr::index< T >) | dr::index< T > | |
operator==(const index &) const noexcept=default (defined in dr::index< T >) | dr::index< T > | |
operator[](index_type dim) const noexcept (defined in dr::index< T >) | dr::index< T > | inline |
second (defined in dr::index< T >) | dr::index< T > | |
second_type typedef (defined in dr::index< T >) | dr::index< T > | |
~index()=default (defined in dr::index< T >) | dr::index< T > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | index_type = T |
+using | first_type = T |
+using | second_type = T |
+Public Member Functions | |
+constexpr index_type | operator[] (index_type dim) const noexcept |
+template<std::integral U> +requires (std::numeric_limits<U>::max() >= std::numeric_limits<T>::max()) | |
constexpr | operator index< U > () const noexcept |
+template<std::integral U> +requires (std::numeric_limits<U>::max() < std::numeric_limits<T>::max()) | |
constexpr | operator index< U > () const noexcept |
+constexpr | index (index_type first, index_type second) |
+template<TupleLike< T, T > Tuple> | |
constexpr | index (Tuple tuple) |
+template<std::integral U> | |
constexpr | index (std::initializer_list< U > tuple) |
+constexpr bool | operator== (const index &) const noexcept=default |
+constexpr bool | operator< (const index &other) const noexcept |
+template<std::size_t Index> +requires (Index <= 1) | |
constexpr T | get () const noexcept |
+ | index (const index &)=default |
+index & | operator= (const index &)=default |
+ | index (index &&)=default |
+index & | operator= (index &&)=default |
+Public Attributes | |
+index_type | first |
+index_type | second |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::iterator_adaptor< Accessor >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | accessor_type = Accessor |
+using | const_accessor_type = typename Accessor::const_iterator_accessor |
+using | nonconst_accessor_type = typename Accessor::nonconst_iterator_accessor |
+using | difference_type = typename Accessor::difference_type |
+using | value_type = typename Accessor::value_type |
+using | iterator = iterator_adaptor< accessor_type > |
+using | const_iterator = iterator_adaptor< const_accessor_type > |
+using | reference = typename Accessor::reference |
+using | iterator_category = typename Accessor::iterator_category |
+using | nonconst_iterator = iterator_adaptor< nonconst_accessor_type > |
+Public Member Functions | |
+ | iterator_adaptor (const iterator_adaptor &)=default |
+iterator_adaptor & | operator= (const iterator_adaptor &)=default |
+template<typename... Args> +requires ( sizeof...(Args) >= 1 && !((sizeof...(Args) == 1 && (std::is_same_v<nonconst_iterator, std::decay_t<Args>> || ...)) || (std::is_same_v<const_iterator, std::decay_t<Args>> || ...) || (std::is_same_v<nonconst_accessor_type, std::decay_t<Args>> || ...) || (std::is_same_v<const_accessor_type, std::decay_t<Args>> || ...)) && std::is_constructible_v<accessor_type, Args...>) | |
iterator_adaptor (Args &&...args) | |
+ | iterator_adaptor (const accessor_type &accessor) |
+ | iterator_adaptor (const const_accessor_type &accessor) |
+ | operator const_iterator () const |
+bool | operator== (const_iterator other) const |
+bool | operator!= (const_iterator other) const |
+bool | operator< (const_iterator other) const |
+bool | operator<= (const_iterator other) const |
+bool | operator> (const_iterator other) const |
+bool | operator>= (const_iterator other) const |
+reference | operator* () const |
+reference | operator[] (difference_type offset) const |
+iterator & | operator+= (difference_type offset) noexcept |
+iterator & | operator-= (difference_type offset) noexcept |
+iterator | operator+ (difference_type offset) const |
+iterator | operator- (difference_type offset) const |
+difference_type | operator- (const_iterator other) const |
+iterator & | operator++ () noexcept |
+iterator & | operator++ () noexcept |
+iterator | operator++ (int) noexcept |
+iterator & | operator-- () noexcept |
+iterator | operator-- (int) noexcept |
+auto | segments () const noexcept |
+Friends | |
+iterator | operator+ (difference_type n, iterator iter) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::logger, including all inherited members.
+base enum value (defined in dr::logger) | dr::logger | |
debug(const nostd::source_location &location, std::string format, Args &&...args) (defined in dr::logger) | dr::logger | inline |
debug(std::string format, Args &&...args) (defined in dr::logger) | dr::logger | inline |
debug(filters filter, std::string format, Args &&...args) (defined in dr::logger) | dr::logger | inline |
filter(const std::vector< std::string > &names) (defined in dr::logger) | dr::logger | inline |
filters enum name (defined in dr::logger) | dr::logger | |
for_each enum value (defined in dr::logger) | dr::logger | |
last enum value (defined in dr::logger) | dr::logger | |
logger() (defined in dr::logger) | dr::logger | inline |
mdspan_view enum value (defined in dr::logger) | dr::logger | |
mpi enum value (defined in dr::logger) | dr::logger | |
set_file(std::ofstream &fout) (defined in dr::logger) | dr::logger | inline |
transpose enum value (defined in dr::logger) | dr::logger |
+ Distributed Ranges
+
+ |
+
+Public Types | |
enum | filters { + base +, for_each +, transpose +, mdspan_view +, + mpi +, last + + } |
+Public Member Functions | |
+void | set_file (std::ofstream &fout) |
+void | filter (const std::vector< std::string > &names) |
+template<typename... Args> | |
void | debug (const nostd::source_location &location, std::string format, Args &&...args) |
+template<typename... Args> | |
void | debug (std::string format, Args &&...args) |
+template<typename... Args> | |
void | debug (filters filter, std::string format, Args &&...args) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::MpiBackend, including all inherited members.
+allocate(std::size_t data_size) (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
deallocate(void *data, std::size_t data_size) (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
fence() (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
getmem(void *dst, std::size_t offset, std::size_t datalen, int segment_index) (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
getrank() (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
putmem(void const *src, std::size_t offset, std::size_t datalen, int segment_index) (defined in dr::mp::MpiBackend) | dr::mp::MpiBackend | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::__detail::allocator< T >, including all inherited members.
+allocate(std::size_t sz) (defined in dr::mp::__detail::allocator< T >) | dr::mp::__detail::allocator< T > | inline |
deallocate(T *ptr, std::size_t sz) (defined in dr::mp::__detail::allocator< T >) | dr::mp::__detail::allocator< T > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+T * | allocate (std::size_t sz) |
+void | deallocate (T *ptr, std::size_t sz) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::__detail::buffer< T >, including all inherited members.
+begin() (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
buffer(std::size_t cnt) (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
data() (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
end() (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
replace(buffer &other) (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
resize(std::size_t cnt) (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
size() (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
value_type typedef (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | |
~buffer() (defined in dr::mp::__detail::buffer< T >) | dr::mp::__detail::buffer< T > | inline |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = T |
+Public Member Functions | |
+std::size_t | size () |
+ | buffer (std::size_t cnt) |
+T * | resize (std::size_t cnt) |
+void | replace (buffer &other) |
+T * | data () |
+T * | begin () |
+T * | end () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::__detail::md_segment< BaseSegment, Rank >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | index_type = dr::__detail::dr_extents< Rank > |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >, including all inherited members.
+index_type typedef (defined in dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >) | dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout > | |
mdspan() const (defined in dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >) | dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout > | inline |
mdsub_segment() (defined in dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >) | dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout > | inline |
mdsub_segment(BaseSegment segment, const index_type &slice_starts, const index_type &slice_ends) (defined in dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >) | dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout > | inline |
root_mdspan() const (defined in dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout >) | dr::mp::__detail::mdsub_segment< BaseSegment, Rank, Layout > | inline |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | index_type = dr::__detail::dr_extents< Rank > |
+Public Member Functions | |
+ | mdsub_segment (BaseSegment segment, const index_type &slice_starts, const index_type &slice_ends) |
+auto | mdspan () const |
+auto | root_mdspan () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::__detail::tmp_buffer< T >, including all inherited members.
+data() (defined in dr::mp::__detail::tmp_buffer< T >) | dr::mp::__detail::tmp_buffer< T > | inline |
tmp_buffer(std::size_t size, auto &&candidate) (defined in dr::mp::__detail::tmp_buffer< T >) | dr::mp::__detail::tmp_buffer< T > | inline |
~tmp_buffer() (defined in dr::mp::__detail::tmp_buffer< T >) | dr::mp::__detail::tmp_buffer< T > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | tmp_buffer (std::size_t size, auto &&candidate) |
+T * | data () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::count_fn_, including all inherited members.
+operator()(std::size_t root, DR &&dr, const T &value) const (defined in dr::mp::count_fn_) | dr::mp::count_fn_ | inline |
operator()(DR &&dr, const T &value) const (defined in dr::mp::count_fn_) | dr::mp::count_fn_ | inline |
operator()(std::size_t root, DI first, DI last, const T &value) const (defined in dr::mp::count_fn_) | dr::mp::count_fn_ | inline |
operator()(DI first, DI last, const T &value) const (defined in dr::mp::count_fn_) | dr::mp::count_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<typename T , dr::distributed_range DR> | |
auto | operator() (std::size_t root, DR &&dr, const T &value) const |
+template<typename T , dr::distributed_range DR> | |
auto | operator() (DR &&dr, const T &value) const |
+template<typename T , dr::distributed_iterator DI> | |
auto | operator() (std::size_t root, DI first, DI last, const T &value) const |
+template<typename T , dr::distributed_iterator DI> | |
auto | operator() (DI first, DI last, const T &value) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::count_if_fn_, including all inherited members.
+operator()(std::size_t root, DR &&dr, auto &&pred) const (defined in dr::mp::count_if_fn_) | dr::mp::count_if_fn_ | inline |
operator()(DR &&dr, auto &&pred) const (defined in dr::mp::count_if_fn_) | dr::mp::count_if_fn_ | inline |
operator()(std::size_t root, DI first, DI last, auto &&pred) const (defined in dr::mp::count_if_fn_) | dr::mp::count_if_fn_ | inline |
operator()(DI first, DI last, auto &&pred) const (defined in dr::mp::count_if_fn_) | dr::mp::count_if_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<dr::distributed_range DR> | |
auto | operator() (std::size_t root, DR &&dr, auto &&pred) const |
+template<dr::distributed_range DR> | |
auto | operator() (DR &&dr, auto &&pred) const |
+template<dr::distributed_iterator DI> | |
auto | operator() (std::size_t root, DI first, DI last, auto &&pred) const |
+template<dr::distributed_iterator DI> | |
auto | operator() (DI first, DI last, auto &&pred) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::distributed_mdarray< T, Rank >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | shape_type = dr::__detail::dr_extents< Rank > |
+Public Member Functions | |
+ | distributed_mdarray (dr::__detail::dr_extents< Rank > shape, distribution dist=distribution()) |
+auto | begin () const |
+auto | end () const |
+auto | size () const |
+auto | operator[] (auto n) |
+auto | segments () |
+auto & | halo () const |
+auto | mdspan () const |
+auto | extent (std::size_t r) const |
+auto | grid () |
+auto | view () const |
+auto | operator== (const distributed_mdarray &other) const |
+Static Public Member Functions | |
+static constexpr auto | rank () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::distributed_vector< T, BackendT >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
distributed vector + More...
+ +#include <distributed_vector.hpp>
+Classes | |
class | iterator |
+Public Types | |
+using | value_type = T |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | backend_type = BackendT |
+Public Member Functions | |
+ | distributed_vector (const distributed_vector &)=delete |
+distributed_vector & | operator= (const distributed_vector &)=delete |
+ | distributed_vector (distributed_vector &&) |
+ | distributed_vector (std::size_t size=0, distribution dist=distribution()) |
Constructor. | |
+ | distributed_vector (std::size_t size, value_type fill_value, distribution dist=distribution()) |
Constructor. | |
+auto | begin () const |
Returns iterator to beginning. | |
+auto | end () const |
Returns iterator to end. | |
+auto | size () const |
Returns size. | |
+auto | operator[] (difference_type n) const |
Returns reference using index. | |
+auto & | halo () const |
+auto | segments () const |
+void | fence () |
distributed vector
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::distributed_vector< T, BackendT >::iterator, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | iterator (const distributed_vector *parent, difference_type offset) |
+auto | operator+ (difference_type n) const |
+auto | operator- (difference_type n) const |
+auto | operator- (iterator other) const |
+auto & | operator+= (difference_type n) |
+auto & | operator-= (difference_type n) |
+auto & | operator++ () |
+auto | operator++ (int) |
+auto & | operator-- () |
+auto | operator-- (int) |
+bool | operator== (iterator other) const |
+auto | operator<=> (iterator other) const |
+auto | operator* () const |
+auto | operator[] (difference_type n) const |
+auto | local () |
+auto | segments () |
+Friends | |
+auto | operator+ (difference_type n, const iterator &other) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::dv_segment< DV >, including all inherited members.
+begin() const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
difference_type typedef (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | |
dv_segment()=default (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | |
dv_segment(DV *dv, std::size_t segment_index, std::size_t size, std::size_t reserved) (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
end() const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
is_local() const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
operator[](difference_type n) const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
reserved() const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
size() const (defined in dr::mp::dv_segment< DV >) | dr::mp::dv_segment< DV > | inline |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | difference_type = std::ptrdiff_t |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::dv_segment_iterator< DV >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = typename DV::value_type |
+using | size_type = typename DV::size_type |
+using | difference_type = typename DV::difference_type |
+Public Member Functions | |
+ | dv_segment_iterator (DV *dv, std::size_t segment_index, std::size_t index) |
+auto | operator<=> (const dv_segment_iterator &other) const noexcept |
+bool | operator== (const dv_segment_iterator &other) const noexcept |
+auto & | operator+= (difference_type n) |
+auto & | operator-= (difference_type n) |
+difference_type | operator- (const dv_segment_iterator &other) const noexcept |
+auto & | operator++ () |
+auto & | operator-- () |
+auto | operator++ (int) |
+auto | operator-- (int) |
+auto | operator+ (difference_type n) const |
+auto | operator- (difference_type n) const |
+auto | operator* () const |
+auto | operator[] (difference_type n) const |
+void | get (value_type *dst, std::size_t size) const |
+value_type | get () const |
+void | put (const value_type *dst, std::size_t size) const |
+void | put (const value_type &value) const |
+auto | rank () const |
+auto | local () const |
+auto | segments () const |
+auto & | halo () const |
+auto | halo_bounds () const |
+Friends | |
+auto | operator+ (difference_type n, const dv_segment_iterator &other) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::dv_segment_reference< DV >, including all inherited members.
+dv_segment_reference(const iterator it) (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > | inline |
operator value_type() const (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > | inline |
operator&() const (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > | inline |
operator=(const value_type &value) const (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > | inline |
operator=(const dv_segment_reference &other) const (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > | inline |
value_type typedef (defined in dr::mp::dv_segment_reference< DV >) | dr::mp::dv_segment_reference< DV > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = typename DV::value_type |
+Public Member Functions | |
+ | dv_segment_reference (const iterator it) |
+ | operator value_type () const |
+auto | operator= (const value_type &value) const |
+auto | operator= (const dv_segment_reference &other) const |
+auto | operator& () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::halo_impl< Group >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Classes | |
struct | max_op |
struct | min_op |
struct | multiplies_op |
struct | plus_op |
struct | second_op |
+Public Types | |
+using | group_type = Group |
+Public Member Functions | |
+ | halo_impl (const halo_impl &)=delete |
+halo_impl | operator= (const halo_impl &)=delete |
+ | halo_impl (communicator comm, const std::vector< Group > &owned_groups, const std::vector< Group > &halo_groups, const Memory &memory=Memory()) |
halo constructor | |
+void | exchange_begin () |
Begin a halo exchange. | |
+void | exchange_finalize () |
Complete a halo exchange. | |
+void | exchange () |
+void | reduce_begin () |
Begin a halo reduction. | |
+void | reduce_finalize (const auto &op) |
Complete a halo reduction. | |
+void | reduce_finalize () |
Complete a halo reduction. | |
+Public Attributes | |
+struct dr::mp::halo_impl::second_op | second |
+struct dr::mp::halo_impl::plus_op | plus |
+struct dr::mp::halo_impl::max_op | max |
+struct dr::mp::halo_impl::min_op | min |
+struct dr::mp::halo_impl::multiplies_op | multiplies |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::index_group< T, Memory >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = T |
+using | memory_type = Memory |
+Public Member Functions | |
+ | index_group (T *data, std::size_t rank, const std::vector< std::size_t > &indices, const Memory &memory) |
Constructor. | |
+ | index_group (const index_group &o) |
+void | unpack (const auto &op) |
+void | pack () |
+std::size_t | buffer_size () |
+T * | data_pointer () |
+std::size_t | data_size () |
+std::size_t | rank () |
+auto | tag () |
+Public Attributes | |
+T * | buffer = nullptr |
+std::size_t | request_index |
+bool | receive |
+bool | buffered |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::segmented_view< R, SegTpl >, including all inherited members.
+begin() const (defined in dr::mp::segmented_view< R, SegTpl >) | dr::mp::segmented_view< R, SegTpl > | inline |
end() const (defined in dr::mp::segmented_view< R, SegTpl >) | dr::mp::segmented_view< R, SegTpl > | inline |
segmented_view(V1 &&r, V2 &&tpl) (defined in dr::mp::segmented_view< R, SegTpl >) | dr::mp::segmented_view< R, SegTpl > | inline |
size() const (defined in dr::mp::segmented_view< R, SegTpl >) | dr::mp::segmented_view< R, SegTpl > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<typename V1 , typename V2 > | |
segmented_view (V1 &&r, V2 &&tpl) | |
+auto | begin () const |
+auto | end () const |
+auto | size () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::segmented_view_iterator< BaseIter, SegTplIter, SegTplSentinel >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | iterator_category = std::forward_iterator_tag |
+using | difference_type = rng::iter_difference_t< SegTplIter > |
+using | value_type = dr::remote_subrange< BaseIter > |
+Public Member Functions | |
+ | segmented_view_iterator (BaseIter base_begin, SegTplIter tpl_begin, SegTplSentinel tpl_end) |
+auto | operator== (segmented_view_iterator other) const |
+auto & | operator++ () |
+auto | operator++ (int) |
+auto | operator* () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::span_group< T, Memory >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = T |
+using | memory_type = Memory |
+Public Attributes | |
+T * | buffer = nullptr |
+std::size_t | request_index = 0 |
+bool | receive = false |
+bool | buffered = false |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::span_halo< T, Memory >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | group_type = span_group< T, Memory > |
Public Types inherited from dr::mp::halo_impl< Group > | |
+using | group_type = Group |
+Public Member Functions | |
+ | span_halo (communicator comm, T *data, std::size_t size, halo_bounds hb) |
+ | span_halo (communicator comm, std::span< T > span, halo_bounds hb) |
Public Member Functions inherited from dr::mp::halo_impl< Group > | |
+ | halo_impl (const halo_impl &)=delete |
+halo_impl | operator= (const halo_impl &)=delete |
+ | halo_impl (communicator comm, const std::vector< Group > &owned_groups, const std::vector< Group > &halo_groups, const Memory &memory=Memory()) |
halo constructor | |
+void | exchange_begin () |
Begin a halo exchange. | |
+void | exchange_finalize () |
Complete a halo exchange. | |
+void | exchange () |
+void | reduce_begin () |
Begin a halo reduction. | |
+void | reduce_finalize (const auto &op) |
Complete a halo reduction. | |
+void | reduce_finalize () |
Complete a halo reduction. | |
+Additional Inherited Members | |
Public Attributes inherited from dr::mp::halo_impl< Group > | |
+struct dr::mp::halo_impl::second_op | second |
+struct dr::mp::halo_impl::plus_op | plus |
+struct dr::mp::halo_impl::max_op | max |
+struct dr::mp::halo_impl::min_op | min |
+struct dr::mp::halo_impl::multiplies_op | multiplies |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::subrange< DM >, including all inherited members.
+begin() const (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
end() const (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
halo() const (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
iterator typedef (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | |
segments() const (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
size() (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
subrange(DM &dm, std::pair< std::size_t, std::size_t > row_rng, std::pair< std::size_t, std::size_t > col_rng) (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > | inline |
value_type typedef (defined in dr::mp::subrange< DM >) | dr::mp::subrange< DM > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | iterator = subrange_iterator< DM > |
+using | value_type = typename DM::value_type |
+Public Member Functions | |
+ | subrange (DM &dm, std::pair< std::size_t, std::size_t > row_rng, std::pair< std::size_t, std::size_t > col_rng) |
+iterator | begin () const |
+iterator | end () const |
+auto | size () |
+auto & | halo () const |
+auto | segments () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::subrange_iterator< DM >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = typename DM::value_type |
+using | difference_type = typename DM::difference_type |
+Public Member Functions | |
+ | subrange_iterator (DM *dm, std::pair< std::size_t, std::size_t > row_rng, std::pair< std::size_t, std::size_t > col_rng, difference_type index=0) noexcept |
+value_type & | operator* () const |
+dm_row< value_type > | operator[] (int n) |
+value_type & | operator[] (std::pair< int, int > p) |
+auto | operator<=> (const subrange_iterator &other) const noexcept |
+auto & | operator-= (difference_type n) |
+auto & | operator+= (difference_type n) |
+difference_type | operator- (const subrange_iterator &other) const noexcept |
+auto & | operator++ () |
+auto & | operator-- () |
+auto | operator++ (int) |
+auto | operator-- (int) |
+auto | operator+ (difference_type n) const |
+auto | operator- (difference_type n) const |
+auto & | halo () |
+auto | segments () |
+bool | is_local () |
+std::size_t | find_dm_offset () const |
+Friends | |
+bool | operator== (subrange_iterator &first, subrange_iterator &second) |
+bool | operator!= (subrange_iterator &first, subrange_iterator &second) |
+bool | operator== (subrange_iterator first, subrange_iterator second) |
+bool | operator!= (subrange_iterator first, subrange_iterator second) |
+auto | operator+ (difference_type n, const subrange_iterator &other) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::unstructured_halo< T, Memory >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | group_type = index_group< T, Memory > |
+using | index_map = std::pair< std::size_t, std::vector< std::size_t > > |
Public Types inherited from dr::mp::halo_impl< Group > | |
+using | group_type = Group |
+Public Member Functions | |
unstructured_halo (communicator comm, T *data, const std::vector< index_map > &owned, const std::vector< index_map > &halo, const Memory &memory=Memory()) | |
Public Member Functions inherited from dr::mp::halo_impl< Group > | |
+ | halo_impl (const halo_impl &)=delete |
+halo_impl | operator= (const halo_impl &)=delete |
+ | halo_impl (communicator comm, const std::vector< Group > &owned_groups, const std::vector< Group > &halo_groups, const Memory &memory=Memory()) |
halo constructor | |
+void | exchange_begin () |
Begin a halo exchange. | |
+void | exchange_finalize () |
Complete a halo exchange. | |
+void | exchange () |
+void | reduce_begin () |
Begin a halo reduction. | |
+void | reduce_finalize (const auto &op) |
Complete a halo reduction. | |
+void | reduce_finalize () |
Complete a halo reduction. | |
+Additional Inherited Members | |
Public Attributes inherited from dr::mp::halo_impl< Group > | |
+struct dr::mp::halo_impl::second_op | second |
+struct dr::mp::halo_impl::plus_op | plus |
+struct dr::mp::halo_impl::max_op | max |
+struct dr::mp::halo_impl::min_op | min |
+struct dr::mp::halo_impl::multiplies_op | multiplies |
+
|
+ +inline | +
Constructor
+ +
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::enumerate_adapter_closure, including all inherited members.
+operator()(R &&r) const (defined in dr::mp::views::enumerate_adapter_closure) | dr::mp::views::enumerate_adapter_closure | inline |
operator| (defined in dr::mp::views::enumerate_adapter_closure) | dr::mp::views::enumerate_adapter_closure | friend |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> +requires (rng::sized_range<R>) | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const enumerate_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::enumerate_fn_, including all inherited members.
+enumerate() const (defined in dr::mp::views::enumerate_fn_) | dr::mp::views::enumerate_fn_ | inline |
operator()(R &&r) const (defined in dr::mp::views::enumerate_fn_) | dr::mp::views::enumerate_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> | |
constexpr auto | operator() (R &&r) const |
+auto | enumerate () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::mdspan_adapter_closure< Rank >, including all inherited members.
+mdspan_adapter_closure(dr::__detail::dr_extents< Rank > full_shape, dr::__detail::dr_extents< Rank > tile_shape) (defined in dr::mp::views::mdspan_adapter_closure< Rank >) | dr::mp::views::mdspan_adapter_closure< Rank > | inline |
mdspan_adapter_closure(dr::__detail::dr_extents< Rank > full_shape) (defined in dr::mp::views::mdspan_adapter_closure< Rank >) | dr::mp::views::mdspan_adapter_closure< Rank > | inline |
operator()(R &&r) const (defined in dr::mp::views::mdspan_adapter_closure< Rank >) | dr::mp::views::mdspan_adapter_closure< Rank > | inline |
operator| (defined in dr::mp::views::mdspan_adapter_closure< Rank >) | dr::mp::views::mdspan_adapter_closure< Rank > | friend |
+ Distributed Ranges
+
+ |
+
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const mdspan_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::mdspan_fn_, including all inherited members.
+operator()(R &&r, Shape &&full_shape, Shape &&tile_shape) const (defined in dr::mp::views::mdspan_fn_) | dr::mp::views::mdspan_fn_ | inline |
operator()(R &&r, Shape &&full_shape) const (defined in dr::mp::views::mdspan_fn_) | dr::mp::views::mdspan_fn_ | inline |
operator()(Shape &&full_shape, Shape &&tile_shape) const (defined in dr::mp::views::mdspan_fn_) | dr::mp::views::mdspan_fn_ | inline |
operator()(Shape &&full_shape) const (defined in dr::mp::views::mdspan_fn_) | dr::mp::views::mdspan_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::submdspan_adapter_closure< Extents >, including all inherited members.
+operator()(R &&r) const (defined in dr::mp::views::submdspan_adapter_closure< Extents >) | dr::mp::views::submdspan_adapter_closure< Extents > | inline |
operator| (defined in dr::mp::views::submdspan_adapter_closure< Extents >) | dr::mp::views::submdspan_adapter_closure< Extents > | friend |
submdspan_adapter_closure(Extents slice_starts, Extents slice_ends) (defined in dr::mp::views::submdspan_adapter_closure< Extents >) | dr::mp::views::submdspan_adapter_closure< Extents > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | submdspan_adapter_closure (Extents slice_starts, Extents slice_ends) |
+template<rng::viewable_range R> | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const submdspan_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::views::submdspan_fn_, including all inherited members.
+operator()(R r, Extents &&slice_starts, Extents &&slice_ends) const (defined in dr::mp::views::submdspan_fn_) | dr::mp::views::submdspan_fn_ | inline |
operator()(Extents &&slice_starts, Extents &&slice_ends) const (defined in dr::mp::views::submdspan_fn_) | dr::mp::views::submdspan_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::zip_iterator< RngIter, BaseIters >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = rng::iter_value_t< RngIter > |
+using | difference_type = rng::iter_difference_t< RngIter > |
+using | iterator_category = std::random_access_iterator_tag |
+Public Member Functions | |
+ | zip_iterator (RngIter rng_iter, BaseIters... base_iters) |
+auto | operator+ (difference_type n) const |
+auto | operator- (difference_type n) const |
+auto | operator- (zip_iterator other) const |
+auto & | operator+= (difference_type n) |
+auto & | operator-= (difference_type n) |
+auto & | operator++ () |
+auto | operator++ (int) |
+auto & | operator-- () |
+auto | operator-- (int) |
+bool | operator== (zip_iterator other) const |
+auto | operator<=> (zip_iterator other) const |
+auto | operator* () const |
+auto | operator[] (difference_type n) const |
+auto | segments () const |
+auto | rank () const |
+auto | local () const |
+Friends | |
+auto | operator+ (difference_type n, const zip_iterator &other) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::mp::zip_view< Rs >, including all inherited members.
+base() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
begin() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
end() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
local() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
operator[](difference_type n) const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
rank() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
segments() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
size() const (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
zip_view(Rs... rs) (defined in dr::mp::zip_view< Rs >) | dr::mp::zip_view< Rs > | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::normal_distributed_iterator_accessor< V >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = rng::range_value_t< rng::range_reference_t< V > > |
+using | segment_type = rng::range_value_t< V > |
+using | size_type = rng::range_size_t< segment_type > |
+using | difference_type = rng::range_difference_t< segment_type > |
+using | reference = rng::range_reference_t< segment_type > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = normal_distributed_iterator_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | normal_distributed_iterator_accessor (const normal_distributed_iterator_accessor &) noexcept=default |
+constexpr normal_distributed_iterator_accessor & | operator= (const normal_distributed_iterator_accessor &) noexcept=default |
+constexpr | normal_distributed_iterator_accessor (V segments, size_type segment_id, size_type idx) noexcept |
+constexpr normal_distributed_iterator_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+auto | segments () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::remote_subrange< I >, including all inherited members.
+rank() const noexcept (defined in dr::remote_subrange< I >) | dr::remote_subrange< I > | inline |
remote_subrange()=default (defined in dr::remote_subrange< I >) | dr::remote_subrange< I > | |
remote_subrange(I first, I last, std::size_t rank) (defined in dr::remote_subrange< I >) | dr::remote_subrange< I > | inline |
remote_subrange(R &&r, std::size_t rank) (defined in dr::remote_subrange< I >) | dr::remote_subrange< I > | inline |
remote_subrange(R &&r) (defined in dr::remote_subrange< I >) | dr::remote_subrange< I > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+constexpr | remote_subrange (I first, I last, std::size_t rank) |
+template<rng::forward_range R> | |
constexpr | remote_subrange (R &&r, std::size_t rank) |
+template<dr::remote_range R> | |
constexpr | remote_subrange (R &&r) |
+constexpr std::size_t | rank () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::rma_window, including all inherited members.
+communicator() const (defined in dr::rma_window) | dr::rma_window | inline |
create(communicator comm, void *data, std::size_t size) (defined in dr::rma_window) | dr::rma_window | inline |
fence() const (defined in dr::rma_window) | dr::rma_window | inline |
flush(std::size_t rank) const (defined in dr::rma_window) | dr::rma_window | inline |
free() (defined in dr::rma_window) | dr::rma_window | inline |
get(std::size_t rank, std::size_t disp) const (defined in dr::rma_window) | dr::rma_window | inline |
get(void *dst, std::size_t size, std::size_t rank, std::size_t disp) const (defined in dr::rma_window) | dr::rma_window | inline |
local_data() (defined in dr::rma_window) | dr::rma_window | inline |
mpi_win() (defined in dr::rma_window) | dr::rma_window | inline |
null() const noexcept (defined in dr::rma_window) | dr::rma_window | inline |
operator==(const rma_window other) const noexcept (defined in dr::rma_window) | dr::rma_window | inline |
put(const auto &src, std::size_t rank, std::size_t disp) const (defined in dr::rma_window) | dr::rma_window | inline |
put(const void *src, std::size_t size, std::size_t rank, std::size_t disp) const (defined in dr::rma_window) | dr::rma_window | inline |
set_null() (defined in dr::rma_window) | dr::rma_window | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+void | create (communicator comm, void *data, std::size_t size) |
+template<typename T > | |
auto | local_data () |
+void | free () |
+bool | operator== (const rma_window other) const noexcept |
+void | set_null () |
+bool | null () const noexcept |
+template<typename T > | |
T | get (std::size_t rank, std::size_t disp) const |
+void | get (void *dst, std::size_t size, std::size_t rank, std::size_t disp) const |
+void | put (const auto &src, std::size_t rank, std::size_t disp) const |
+void | put (const void *src, std::size_t size, std::size_t rank, std::size_t disp) const |
+void | fence () const |
+void | flush (std::size_t rank) const |
+const auto & | communicator () const |
+auto | mpi_win () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::__detail::coo_matrix< T, I, Allocator >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = dr::sp::matrix_entry< T, I > |
+using | scalar_type = T |
+using | index_type = I |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | allocator_type = Allocator |
+using | key_type = dr::index< I > |
+using | map_type = T |
+using | backend_allocator_type = typename std::allocator_traits< allocator_type >::template rebind_alloc< value_type > |
+using | backend_type = std::vector< value_type, backend_allocator_type > |
+using | iterator = typename backend_type::iterator |
+using | const_iterator = typename backend_type::const_iterator |
+using | reference = dr::sp::matrix_ref< T, I > |
+using | const_reference = dr::sp::matrix_ref< std::add_const_t< T >, I > |
+using | scalar_reference = T & |
+Public Member Functions | |
+ | coo_matrix (dr::index< I > shape) |
+dr::index< I > | shape () const noexcept |
+size_type | size () const noexcept |
+void | reserve (size_type new_cap) |
+iterator | begin () noexcept |
+const_iterator | begin () const noexcept |
+iterator | end () noexcept |
+const_iterator | end () const noexcept |
+template<typename InputIt > | |
void | insert (InputIt first, InputIt last) |
+template<typename InputIt > | |
void | push_back (InputIt first, InputIt last) |
+void | push_back (const value_type &value) |
+template<typename InputIt > | |
void | assign_tuples (InputIt first, InputIt last) |
+std::pair< iterator, bool > | insert (value_type &&value) |
+std::pair< iterator, bool > | insert (const value_type &value) |
+template<class M > | |
std::pair< iterator, bool > | insert_or_assign (key_type k, M &&obj) |
+iterator | find (key_type key) noexcept |
+const_iterator | find (key_type key) const noexcept |
+void | reshape (dr::index< I > shape) |
+ | coo_matrix (const coo_matrix &)=default |
+ | coo_matrix (coo_matrix &&)=default |
+coo_matrix & | operator= (const coo_matrix &)=default |
+coo_matrix & | operator= (coo_matrix &&)=default |
+std::size_t | nbytes () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::block_cyclic, including all inherited members.
+block_cyclic(dr::index<> tile_shape={dr::sp::tile::div, dr::sp::tile::div}, dr::index<> grid_shape=detail::factor(dr::sp::nprocs())) (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inline |
block_cyclic(const block_cyclic &) noexcept=default (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | |
clone() const noexcept (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inlinevirtual |
grid_shape(dr::index<> matrix_shape) const (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inlinevirtual |
tile_rank(dr::index<> matrix_shape, dr::index<> tile_id) const (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inlinevirtual |
tile_shape() const (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inline |
tile_shape(dr::index<> matrix_shape) const (defined in dr::sp::block_cyclic) | dr::sp::block_cyclic | inlinevirtual |
~matrix_partition() (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | inlinevirtual |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | block_cyclic (dr::index<> tile_shape={dr::sp::tile::div, dr::sp::tile::div}, dr::index<> grid_shape=detail::factor(dr::sp::nprocs())) |
+ | block_cyclic (const block_cyclic &) noexcept=default |
+dr::index | tile_shape () const |
std::size_t | tile_rank (dr::index<> matrix_shape, dr::index<> tile_id) const |
dr::index | grid_shape (dr::index<> matrix_shape) const |
dr::index | tile_shape (dr::index<> matrix_shape) const |
std::unique_ptr< matrix_partition > | clone () const noexcept |
+virtual std::size_t | tile_rank (dr::index<> matrix_shape, dr::index<> tile_id) const =0 |
+virtual dr::index | grid_shape (dr::index<> matrix_shape) const =0 |
+virtual dr::index | tile_shape (dr::index<> matrix_shape) const =0 |
+virtual std::unique_ptr< matrix_partition > | clone () const =0 |
+
|
+ +inlinevirtualnoexcept | +
Implements dr::sp::matrix_partition.
+ +
+
|
+ +inlinevirtual | +
Implements dr::sp::matrix_partition.
+ +
+
|
+ +inlinevirtual | +
Implements dr::sp::matrix_partition.
+ +
+
|
+ +inlinevirtual | +
Implements dr::sp::matrix_partition.
+ +
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::buffered_allocator< Allocator >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | buffered_allocator (const Allocator &alloc, std::size_t buffer_size, std::size_t n_buffers) |
+pointer | allocate (std::size_t size) |
+void | deallocate (pointer ptr, std::size_t n) |
+bool | operator== (const buffered_allocator &) const =default |
+bool | operator!= (const buffered_allocator &) const =default |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::csr_matrix_view< T, I, TIter, IIter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_reference = std::iter_reference_t< TIter > |
+using | reference = dr::sp::matrix_ref< T, I, scalar_reference > |
+using | scalar_type = T |
+using | index_type = I |
+using | key_type = dr::index< I > |
+using | map_type = T |
+using | iterator = csr_matrix_view_iterator< T, I, TIter, IIter > |
+Public Member Functions | |
+ | csr_matrix_view (TIter values, IIter rowptr, IIter colind, key_type shape, size_type nnz, size_type rank) |
+ | csr_matrix_view (TIter values, IIter rowptr, IIter colind, key_type shape, size_type nnz, size_type rank, key_type idx_offset) |
+key_type | shape () const noexcept |
+size_type | size () const noexcept |
+std::size_t | rank () const |
+iterator | begin () const |
+iterator | end () const |
+auto | row (I row_index) const |
+auto | submatrix (key_type rows, key_type columns) const |
+auto | values_data () const |
+auto | rowptr_data () const |
+auto | colind_data () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::csr_matrix_view_accessor< T, I, TIter, IIter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_type = std::iter_value_t< TIter > |
+using | scalar_reference = std::iter_reference_t< TIter > |
+using | index_type = I |
+using | value_type = dr::sp::matrix_entry< scalar_type, I > |
+using | reference = dr::sp::matrix_ref< T, I, scalar_reference > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = csr_matrix_view_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+using | key_type = dr::index< I > |
+Public Member Functions | |
+constexpr | csr_matrix_view_accessor (const csr_matrix_view_accessor &) noexcept=default |
+constexpr csr_matrix_view_accessor & | operator= (const csr_matrix_view_accessor &) noexcept=default |
+constexpr | csr_matrix_view_accessor (TIter values, IIter rowptr, IIter colind, size_type idx, index_type row, size_type row_dim) noexcept |
+constexpr | csr_matrix_view_accessor (TIter values, IIter rowptr, IIter colind, size_type idx, index_type row, size_type row_dim, key_type idx_offset) noexcept |
+void | fast_forward_row () noexcept |
+void | fast_backward_row () noexcept |
+constexpr csr_matrix_view_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix< T, Allocator >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | allocator_type = Allocator |
+using | scalar_pointer = typename std::allocator_traits< Allocator >::pointer |
+using | scalar_reference = std::iter_reference_t< scalar_pointer > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | key_type = dr::index<> |
+using | map_type = T |
+using | iterator = dense_matrix_iterator< T, scalar_pointer > |
+Public Member Functions | |
+ | dense_matrix (key_type shape) |
+ | dense_matrix (key_type shape, std::size_t ld) |
+ | dense_matrix (key_type shape, std::size_t ld, const Allocator &alloc) |
+ | dense_matrix (dense_matrix &&other) |
+dense_matrix & | operator= (dense_matrix &&other) |
+ | dense_matrix (const dense_matrix &other)=delete |
+dense_matrix & | operator= (const dense_matrix &other)=delete |
+key_type | shape () const noexcept |
+size_type | size () const noexcept |
+scalar_reference | operator[] (key_type idx) const |
+iterator | begin () const |
+iterator | end () const |
+auto | row (size_type row_index) const |
+auto | column (size_type column_index) const |
+scalar_pointer | data () const |
+size_type | ld () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_accessor< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_type = std::iter_value_t< Iter > |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | value_type = dr::sp::matrix_entry< scalar_type, std::size_t > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = dense_matrix_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+using | key_type = dr::index<> |
+Public Member Functions | |
+constexpr | dense_matrix_accessor (const dense_matrix_accessor &) noexcept=default |
+constexpr dense_matrix_accessor & | operator= (const dense_matrix_accessor &) noexcept=default |
+constexpr | dense_matrix_accessor (Iter data, key_type idx, key_type matrix_shape, size_type ld) noexcept |
+constexpr | dense_matrix_accessor (Iter data, key_type idx, key_type idx_offset, key_type matrix_shape, size_type ld) noexcept |
+constexpr dense_matrix_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+Iter | data () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_column_accessor< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_value_type = std::iter_value_t< Iter > |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | value_type = dr::sp::matrix_entry< scalar_value_type, std::size_t > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = dense_matrix_column_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+using | key_type = dr::index<> |
+Public Member Functions | |
+constexpr | dense_matrix_column_accessor (const dense_matrix_column_accessor &) noexcept=default |
+constexpr dense_matrix_column_accessor & | operator= (const dense_matrix_column_accessor &) noexcept=default |
+constexpr | dense_matrix_column_accessor (Iter data, std::size_t i, std::size_t j, std::size_t ld) noexcept |
+constexpr dense_matrix_column_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_column_view< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | key_type = dr::index<> |
+using | map_type = T |
+using | iterator = dense_matrix_column_iterator< T, Iter > |
+Public Member Functions | |
+ | dense_matrix_column_view (Iter data, size_type column_idx, size_type size, size_type ld) |
+scalar_reference | operator[] (size_type idx) |
+iterator | begin () const |
+iterator | end () const |
+size_type | size () const noexcept |
+Public Attributes | |
+Iter | data_ |
+size_type | column_idx_ |
+size_type | size_ |
+size_type | ld_ |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_row_accessor< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_value_type = std::iter_value_t< Iter > |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | value_type = dr::sp::matrix_entry< scalar_value_type, std::size_t > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = dense_matrix_row_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+using | key_type = dr::index<> |
+Public Member Functions | |
+constexpr | dense_matrix_row_accessor (const dense_matrix_row_accessor &) noexcept=default |
+constexpr dense_matrix_row_accessor & | operator= (const dense_matrix_row_accessor &) noexcept=default |
+constexpr | dense_matrix_row_accessor (Iter data, std::size_t i, std::size_t j) noexcept |
+constexpr dense_matrix_row_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_row_view< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | key_type = dr::index<> |
+using | map_type = T |
+using | iterator = dense_matrix_row_iterator< T, Iter > |
+Public Member Functions | |
+ | dense_matrix_row_view (Iter data, size_type row_idx, size_type size) |
+scalar_reference | operator[] (size_type idx) |
+iterator | begin () const |
+iterator | end () const |
+size_type | size () const noexcept |
+Public Attributes | |
+Iter | data_ |
+size_type | row_idx_ |
+size_type | size_ |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::dense_matrix_view< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_reference = std::iter_reference_t< Iter > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | key_type = dr::index<> |
+using | map_type = T |
+using | iterator = dense_matrix_view_iterator< T, Iter > |
+Public Member Functions | |
+ | dense_matrix_view (Iter data, key_type shape, size_type ld, size_type rank) |
+ | dense_matrix_view (Iter data, key_type shape, key_type idx_offset, size_type ld, size_type rank) |
+template<typename Allocator > +requires (std::is_same_v<typename std::allocator_traits<Allocator>::pointer, Iter>) | |
dense_matrix_view (dense_matrix< T, Allocator > &m) | |
+key_type | shape () const noexcept |
+size_type | size () const noexcept |
+scalar_reference | operator[] (key_type idx) const |
+iterator | begin () const |
+iterator | end () const |
+auto | row (size_type row_index) const |
+auto | column (size_type column_index) const |
+Iter | data () const |
+std::size_t | rank () const |
+size_type | ld () const |
+auto | local () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::device_allocator< T, Alignment >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Classes | |
struct | rebind |
+Public Types | |
+using | value_type = T |
+using | pointer = device_ptr< T > |
+using | const_pointer = device_ptr< T > |
+using | reference = device_ref< T > |
+using | const_reference = device_ref< const T > |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | is_always_equal = std::false_type |
+Public Member Functions | |
+template<typename U > | |
device_allocator (const device_allocator< U, Alignment > &other) noexcept | |
+ | device_allocator (const sycl::queue &q) noexcept |
+ | device_allocator (const sycl::context &ctxt, const sycl::device &dev) noexcept |
+ | device_allocator (const device_allocator &)=default |
+device_allocator & | operator= (const device_allocator &)=default |
+pointer | allocate (std::size_t size) |
+void | deallocate (pointer ptr, std::size_t n) |
+bool | operator== (const device_allocator &) const =default |
+bool | operator!= (const device_allocator &) const =default |
+sycl::device | get_device () const noexcept |
+sycl::context | get_context () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::device_ptr< T >, including all inherited members.
+const_pointer typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
const_pointer (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
device_ptr(T *pointer) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
device_ptr() noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
device_ptr(const device_ptr &) noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
device_ptr(std::nullptr_t) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
difference_type typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
get_raw_pointer() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
iterator_category typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
local() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
nonconst_pointer typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
nonconst_pointer (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
operator const_pointer() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator device_ptr< const void >() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator device_ptr< void >() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator!=(std::nullptr_t) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator!=(const device_ptr &) const noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
operator*() const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator+(difference_type offset) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator+ (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | friend |
operator++() noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator++(int) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator+=(difference_type offset) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator-(difference_type offset) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator-(const_pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator-(pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator--() noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator--(int) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator-=(difference_type offset) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator<(const_pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator<=(const_pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator=(const device_ptr &) noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
operator=(std::nullptr_t) noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator==(std::nullptr_t) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator==(const device_ptr &) const noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
operator>(const_pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator>=(const_pointer other) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
operator[](difference_type offset) const noexcept (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | inline |
pointer typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
reference typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
value_type typedef (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > | |
~device_ptr() noexcept=default (defined in dr::sp::device_ptr< T >) | dr::sp::device_ptr< T > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = T |
+using | difference_type = std::ptrdiff_t |
+using | pointer = device_ptr< T > |
+using | const_pointer = device_ptr< std::add_const_t< T > > |
+using | nonconst_pointer = device_ptr< std::remove_const_t< T > > |
+using | iterator_category = std::random_access_iterator_tag |
+using | reference = device_ref< T > |
+Public Member Functions | |
+ | device_ptr (T *pointer) noexcept |
+ | device_ptr (const device_ptr &) noexcept=default |
+device_ptr & | operator= (const device_ptr &) noexcept=default |
+ | device_ptr (std::nullptr_t) noexcept |
+device_ptr & | operator= (std::nullptr_t) noexcept |
+ | operator device_ptr< void > () const noexcept |
+ | operator device_ptr< const void > () const noexcept |
+ | operator const_pointer () const noexcept |
+bool | operator== (std::nullptr_t) const noexcept |
+bool | operator!= (std::nullptr_t) const noexcept |
+bool | operator== (const device_ptr &) const noexcept=default |
+bool | operator!= (const device_ptr &) const noexcept=default |
+pointer | operator+ (difference_type offset) const noexcept |
+pointer | operator- (difference_type offset) const noexcept |
+difference_type | operator- (const_pointer other) const noexcept |
+difference_type | operator- (pointer other) const noexcept |
+bool | operator< (const_pointer other) const noexcept |
+bool | operator> (const_pointer other) const noexcept |
+bool | operator<= (const_pointer other) const noexcept |
+bool | operator>= (const_pointer other) const noexcept |
+pointer & | operator++ () noexcept |
+pointer | operator++ (int) noexcept |
+pointer & | operator-- () noexcept |
+pointer | operator-- (int) noexcept |
+pointer & | operator+= (difference_type offset) noexcept |
+pointer & | operator-= (difference_type offset) noexcept |
+reference | operator* () const noexcept |
+reference | operator[] (difference_type offset) const noexcept |
+T * | get_raw_pointer () const noexcept |
+T * | local () const noexcept |
+Public Attributes | |
+friend | const_pointer |
+friend | nonconst_pointer |
+Friends | |
+pointer | operator+ (difference_type n, pointer iter) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::device_ref< T >, including all inherited members.
+device_ref()=delete (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | |
device_ref(const device_ref &)=default (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | |
device_ref(T *pointer) (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | inline |
operator T() const (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | inline |
operator=(const T &value) const (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | inline |
operator=(const device_ref &other) const (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > | inline |
~device_ref()=default (defined in dr::sp::device_ref< T >) | dr::sp::device_ref< T > |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | device_ref (const device_ref &)=default |
+ | device_ref (T *pointer) |
+ | operator T () const |
+device_ref | operator= (const T &value) const |
+device_ref | operator= (const device_ref &other) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::device_span< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = T |
+using | size_type = std::size_t |
+using | difference_type = std::size_t |
+using | reference = std::iter_reference_t< Iter > |
Public Types inherited from dr::sp::span< T, Iter > | |
+using | value_type = std::iter_value_t< Iter > |
+using | size_type = std::size_t |
+using | difference_type = std::iter_difference_t< Iter > |
+using | reference = std::iter_reference_t< Iter > |
+using | iterator = Iter |
+Public Member Functions | |
+template<rng::random_access_range R> +requires (dr::remote_range<R>) | |
device_span (R &&r) | |
+template<rng::random_access_range R> | |
device_span (R &&r, std::size_t rank) | |
+template<class It > | |
constexpr | device_span (It first, std::size_t count, std::size_t rank) |
+template<class It , class End > | |
constexpr | device_span (It first, End last, std::size_t rank) |
+constexpr std::size_t | rank () const noexcept |
+device_span | first (std::size_t n) const |
+device_span | last (std::size_t n) const |
+device_span | subspan (std::size_t offset, std::size_t count) const |
Public Member Functions inherited from dr::sp::span< T, Iter > | |
+template<rng::random_access_range R> | |
span (R &&r) | |
+ | span (Iter first, Iter last) |
+ | span (Iter first, std::size_t count) |
+ | span (const span &) noexcept=default |
+span & | operator= (const span &) noexcept=default |
+std::size_t | size () const noexcept |
+bool | empty () const noexcept |
+Iter | begin () const noexcept |
+Iter | end () const noexcept |
+reference | operator[] (size_type index) const |
+span | first (size_type n) const |
+span | last (size_type n) const |
+span | subspan (std::size_t offset, std::size_t count) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::device_vector< T, Allocator >, including all inherited members.
+base typedef (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | |
device_vector() noexcept (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | inline |
device_vector(size_type count, const Allocator &alloc, size_type rank) (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | inline |
difference_type typedef (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | |
rank() const noexcept (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | inline |
size_type typedef (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > | |
value_type typedef (defined in dr::sp::device_vector< T, Allocator >) | dr::sp::device_vector< T, Allocator > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | base = dr::sp::vector< T, Allocator > |
+using | value_type = T |
+using | size_type = std::size_t |
+using | difference_type = std::size_t |
Public Types inherited from dr::sp::vector< T, Allocator > | |
+using | value_type = T |
+using | allocator_type = Allocator |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | pointer = typename std::allocator_traits< allocator_type >::pointer |
+using | const_pointer = typename std::allocator_traits< allocator_type >::const_pointer |
+using | reference = decltype(*std::declval< pointer >()) |
+using | const_reference = decltype(*std::declval< const_pointer >()) |
+using | iterator = pointer |
+using | const_iterator = const_pointer |
+Public Member Functions | |
+constexpr | device_vector (size_type count, const Allocator &alloc, size_type rank) |
+constexpr std::size_t | rank () const noexcept |
Public Member Functions inherited from dr::sp::vector< T, Allocator > | |
+ | vector (const Allocator &allocator) noexcept |
+ | vector (size_type count, const T &value, const Allocator &alloc=Allocator()) |
+ | vector (size_type count, const Allocator &alloc=Allocator()) |
+constexpr | vector (Iter first, Iter last, const Allocator &alloc=Allocator()) |
+ | vector (const vector &other) |
+ | vector (const vector &other, const Allocator &alloc) |
+ | vector (vector &&other) noexcept |
+ | vector (vector &&other, const Allocator &alloc) noexcept |
+ | vector (std::initializer_list< T > init, const Allocator &alloc=Allocator()) |
+vector & | operator= (const vector &other) |
+void | assign (Iter first, Iter last) |
+size_type | size () const noexcept |
+bool | empty () const noexcept |
+size_type | capacity () const noexcept |
+pointer | data () noexcept |
+const_pointer | data () const noexcept |
+allocator_type | get_allocator () const noexcept |
+iterator | begin () noexcept |
+const_iterator | begin () const noexcept |
+iterator | end () noexcept |
+const_iterator | end () const noexcept |
+reference | operator[] (size_type pos) |
+const_reference | operator[] (size_type pos) const |
+void | reserve (size_type new_cap) |
+void | push_back (const T &value) |
+void | push_back (T &&value) |
+bool | try_push_back (const T &value) |
+void | resize (size_type count) |
+void | resize (size_type count, const value_type &value) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_dense_matrix< T >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | value_type = dr::sp::matrix_entry< T > |
+using | scalar_reference = rng::range_reference_t< dr::sp::device_vector< T, dr::sp::device_allocator< T > > > |
+using | const_scalar_reference = rng::range_reference_t< const dr::sp::device_vector< T, dr::sp::device_allocator< T > > > |
+using | reference = dr::sp::matrix_ref< T, scalar_reference > |
+using | const_reference = dr::sp::matrix_ref< const T, const_scalar_reference > |
+using | key_type = dr::index<> |
+using | iterator = distributed_dense_matrix_iterator< T, dr::sp::device_vector< T, dr::sp::device_allocator< T > > > |
+Public Member Functions | |
+ | distributed_dense_matrix (key_type shape) |
+ | distributed_dense_matrix (key_type shape, const matrix_partition &partition) |
+size_type | size () const noexcept |
+key_type | shape () const noexcept |
+scalar_reference | operator[] (key_type index) |
+const_scalar_reference | operator[] (key_type index) const |
+iterator | begin () |
+iterator | end () |
+key_type | tile_shape () const noexcept |
+key_type | grid_shape () const noexcept |
+auto | tile (key_type tile_index) |
+std::vector< dense_matrix_view< T, rng::iterator_t< dr::sp::device_vector< T, dr::sp::device_allocator< T > > > > > | tiles () |
+template<typename Allocator = std::allocator<T>> | |
auto | get_tile (key_type tile_index, const Allocator &alloc=Allocator{}) |
+template<typename Allocator = std::allocator<T>> | |
auto | get_tile_async (key_type tile_index, const Allocator &alloc=Allocator{}) |
+auto | segments () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_dense_matrix_accessor< T, L >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | scalar_value_type = rng::range_value_t< L > |
+using | scalar_reference = rng::range_reference_t< L > |
+using | value_type = dr::sp::matrix_entry< scalar_value_type, std::size_t > |
+using | reference = dr::sp::matrix_ref< T, std::size_t, scalar_reference > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = distributed_dense_matrix_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+using | tile_type = L |
+using | key_type = dr::index<> |
+Public Member Functions | |
+constexpr | distributed_dense_matrix_accessor (const distributed_dense_matrix_accessor &) noexcept=default |
+constexpr distributed_dense_matrix_accessor & | operator= (const distributed_dense_matrix_accessor &) noexcept=default |
+constexpr | distributed_dense_matrix_accessor (std::span< tile_type > tiles, key_type grid_idx, key_type tile_idx, key_type grid_shape, key_type tile_shape, key_type matrix_shape) noexcept |
+constexpr distributed_dense_matrix_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_range_accessor< Segments >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | segment_type = rng::range_value_t< Segments > |
+using | value_type = rng::range_value_t< segment_type > |
+using | size_type = rng::range_size_t< segment_type > |
+using | difference_type = rng::range_difference_t< segment_type > |
+using | reference = rng::range_reference_t< segment_type > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = distributed_range_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | distributed_range_accessor (const distributed_range_accessor &) noexcept=default |
+constexpr distributed_range_accessor & | operator= (const distributed_range_accessor &) noexcept=default |
+constexpr | distributed_range_accessor (Segments segments, size_type segment_id, size_type idx) noexcept |
+constexpr distributed_range_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_span< T, L >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = T |
+using | value_type = std::remove_cv_t< T > |
+using | segment_type = dr::sp::device_span< T, L > |
+using | size_type = rng::range_size_t< segment_type > |
+using | difference_type = rng::range_difference_t< segment_type > |
+using | reference = rng::range_reference_t< segment_type > |
+using | iterator = distributed_span_iterator< T, segment_type > |
+Public Member Functions | |
+constexpr | distributed_span (const distributed_span &) noexcept=default |
+constexpr distributed_span & | operator= (const distributed_span &) noexcept=default |
+template<rng::input_range R> +requires (dr::remote_range<rng::range_reference_t<R>>) | |
constexpr | distributed_span (R &&segments) |
+template<dr::distributed_range R> | |
constexpr | distributed_span (R &&r) |
+constexpr size_type | size () const noexcept |
+constexpr size_type | size_bytes () const noexcept |
+constexpr reference | operator[] (size_type idx) const |
+constexpr bool | empty () const noexcept |
+constexpr distributed_span | subspan (size_type Offset, size_type Count=std::dynamic_extent) const |
+constexpr distributed_span | first (size_type Count) const |
+constexpr distributed_span | last (size_type Count) const |
+iterator | begin () |
+iterator | end () |
+constexpr reference | front () |
+constexpr reference | back () |
+std::span< segment_type > | segments () |
+std::span< const segment_type > | segments () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_span_accessor< T, L >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = T |
+using | value_type = std::remove_cv_t< T > |
+using | segment_type = L |
+using | size_type = rng::range_size_t< segment_type > |
+using | difference_type = rng::range_difference_t< segment_type > |
+using | reference = rng::range_reference_t< segment_type > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = distributed_span_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | distributed_span_accessor (const distributed_span_accessor &) noexcept=default |
+constexpr distributed_span_accessor & | operator= (const distributed_span_accessor &) noexcept=default |
+constexpr | distributed_span_accessor (std::span< segment_type > segments, size_type segment_id, size_type idx) noexcept |
+constexpr distributed_span_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+auto | segments () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::distributed_vector_accessor< T, L >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = T |
+using | value_type = std::remove_cv_t< T > |
+using | segment_type = L |
+using | const_segment_type = std::add_const_t< L > |
+using | nonconst_segment_type = std::remove_const_t< L > |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | reference = rng::range_reference_t< segment_type > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = distributed_vector_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | distributed_vector_accessor (const distributed_vector_accessor &) noexcept=default |
+constexpr distributed_vector_accessor & | operator= (const distributed_vector_accessor &) noexcept=default |
+constexpr | distributed_vector_accessor (std::span< segment_type > segments, size_type segment_id, size_type idx, size_type segment_size) noexcept |
+constexpr distributed_vector_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+auto | segments () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::duplicated_vector< T, Allocator >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | segment_type = dr::sp::device_vector< T, Allocator > |
+using | value_type = T |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | allocator_type = Allocator |
+Public Member Functions | |
+ | duplicated_vector (std::size_t count=0) |
+size_type | size () const noexcept |
+segment_type & | local_vector (std::size_t rank) |
+const segment_type & | local_vector (std::size_t rank) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::future< T, Event >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | event_type = Event |
+Public Member Functions | |
+ | future (std::unique_ptr< T > &&value, const std::vector< Event > &events) |
+ | future (T &&value, const std::vector< Event > &events) |
+void | update (const Event &event) |
+ | future (future &&)=default |
+future & | operator= (future &&)=default |
+ | future (const future &)=delete |
+future & | operator= (const future &)=delete |
+T | get () |
+std::vector< Event > | events () const |
+T & | value () const |
+void | wait () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::id< dimensions >, including all inherited members.
+get(int dimension) const (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | inline |
id() noexcept=default (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | |
id(std::size_t segment_id, std::size_t local_id, std::size_t global_id) (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | inline |
local_id() const (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | inline |
operator std::size_t() const (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | inline |
segment() const (defined in dr::sp::id< dimensions >) | dr::sp::id< dimensions > | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::matrix_entry< T, I >, including all inherited members.
+get() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
index() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
index_type typedef (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
map_type typedef (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
matrix_entry(dr::index< I > index, const map_type &value) (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
matrix_entry(dr::index< I > index, map_type &&value) (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
matrix_entry(dr::index< I > index, U &&value) (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
matrix_entry(Entry &&entry) (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
matrix_entry()=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
matrix_entry(const matrix_entry &)=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
matrix_entry(matrix_entry &&)=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
operator matrix_entry< std::add_const_t< T >, U >() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
operator matrix_entry< T, U >() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
operator std::pair< std::pair< I, I >, T >() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
operator<(const matrix_entry &other) const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
operator=(const matrix_entry &)=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
operator=(matrix_entry &&)=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | |
value() const noexcept (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > | inline |
~matrix_entry()=default (defined in dr::sp::matrix_entry< T, I >) | dr::sp::matrix_entry< T, I > |
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | index_type = I |
+using | map_type = T |
+Public Member Functions | |
+ | matrix_entry (dr::index< I > index, const map_type &value) |
+ | matrix_entry (dr::index< I > index, map_type &&value) |
+template<typename U > +requires (std::is_constructible_v<T, U>) | |
matrix_entry (dr::index< I > index, U &&value) | |
+template<typename Entry > | |
matrix_entry (Entry &&entry) | |
+template<std::size_t Index> | |
auto | get () const noexcept |
+ | operator std::pair< std::pair< I, I >, T > () const noexcept |
+dr::index< I > | index () const noexcept |
+map_type | value () const noexcept |
+template<std::integral U> +requires (!std::is_same_v<I, U> && std::numeric_limits<U>::max() >= std::numeric_limits<I>::max()) | |
operator matrix_entry< T, U > () const noexcept | |
+template<std::integral U> +requires (!std::is_const_v<T> && !std::is_same_v<I, U> && std::numeric_limits<U>::max() >= std::numeric_limits<I>::max()) | |
operator matrix_entry< std::add_const_t< T >, U > () const noexcept | |
+bool | operator< (const matrix_entry &other) const noexcept |
+ | matrix_entry (const matrix_entry &)=default |
+ | matrix_entry (matrix_entry &&)=default |
+matrix_entry & | operator= (const matrix_entry &)=default |
+matrix_entry & | operator= (matrix_entry &&)=default |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::matrix_partition, including all inherited members.
+clone() const =0 (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | pure virtual |
grid_shape(dr::index<> matrix_shape) const =0 (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | pure virtual |
tile_rank(dr::index<> matrix_shape, dr::index<> tile_id) const =0 (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | pure virtual |
tile_shape(dr::index<> matrix_shape) const =0 (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | pure virtual |
~matrix_partition() (defined in dr::sp::matrix_partition) | dr::sp::matrix_partition | inlinevirtual |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+virtual std::size_t | tile_rank (dr::index<> matrix_shape, dr::index<> tile_id) const =0 |
+virtual dr::index | grid_shape (dr::index<> matrix_shape) const =0 |
+virtual dr::index | tile_shape (dr::index<> matrix_shape) const =0 |
+virtual std::unique_ptr< matrix_partition > | clone () const =0 |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::matrix_ref< T, I, TRef >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | scalar_type = T |
+using | index_type = I |
+using | key_type = dr::index< I > |
+using | map_type = T |
+using | scalar_reference = TRef |
+using | value_type = dr::sp::matrix_entry< T, I > |
+Public Member Functions | |
+ | matrix_ref (dr::index< I > index, scalar_reference value) |
+ | operator value_type () const noexcept |
+ | operator std::pair< std::pair< I, I >, T > () const noexcept |
+template<std::size_t Index> +requires (Index <= 1) | |
decltype(auto) | get () const noexcept |
+dr::index< I > | index () const noexcept |
+scalar_reference | value () const noexcept |
+template<std::integral U> +requires (!std::is_same_v<I, U> && std::numeric_limits<U>::max() >= std::numeric_limits<I>::max()) | |
operator matrix_ref< T, U, TRef > () const noexcept | |
+template<std::integral U> +requires (!std::is_const_v<T> && !std::is_same_v<I, U> && std::numeric_limits<U>::max() >= std::numeric_limits<I>::max()) | |
operator matrix_ref< std::add_const_t< T >, U, TRef > () const noexcept | |
+bool | operator< (matrix_entry< T, I > other) const noexcept |
+ | matrix_ref (const matrix_ref &)=default |
+matrix_ref & | operator= (const matrix_ref &)=delete |
+ | matrix_ref (matrix_ref &&)=default |
+matrix_ref & | operator= (matrix_ref &&)=default |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::segment_range< dimensions >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = id< dimensions > |
+using | size_type = std::size_t |
+using | different_type = std::ptrdiff_t |
+using | reference = value_type |
+using | iterator = segment_range_iterator |
+Public Member Functions | |
+ | segment_range (std::size_t segment_id, std::size_t segment_size, std::size_t global_offset) |
+iterator | begin () const |
+iterator | end () const |
+size_type | size () const noexcept |
+value_type | operator[] (std::size_t idx) |
+size_type | rank () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::segment_range_accessor, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = id< 1 > |
+using | value_type = element_type |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | reference = value_type |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = segment_range_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | segment_range_accessor (const segment_range_accessor &) noexcept=default |
+constexpr segment_range_accessor & | operator= (const segment_range_accessor &) noexcept=default |
+constexpr | segment_range_accessor (size_type segment_id, size_type idx, size_type global_offset) noexcept |
+constexpr segment_range_accessor & | operator+= (difference_type offset) noexcept |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::span< T, Iter >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::random_access_range R> | |
span (R &&r) | |
+ | span (Iter first, Iter last) |
+ | span (Iter first, std::size_t count) |
+ | span (const span &) noexcept=default |
+span & | operator= (const span &) noexcept=default |
+std::size_t | size () const noexcept |
+bool | empty () const noexcept |
+Iter | begin () const noexcept |
+Iter | end () const noexcept |
+reference | operator[] (size_type index) const |
+span | first (size_type n) const |
+span | last (size_type n) const |
+span | subspan (std::size_t offset, std::size_t count) const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::sparse_matrix< T, I >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | value_type = dr::sp::matrix_entry< T > |
+using | scalar_reference = rng::range_reference_t< dr::sp::device_vector< T, dr::sp::device_allocator< T > > > |
+using | const_scalar_reference = rng::range_reference_t< const dr::sp::device_vector< T, dr::sp::device_allocator< T > > > |
+using | reference = dr::sp::matrix_ref< T, scalar_reference > |
+using | const_reference = dr::sp::matrix_ref< const T, const_scalar_reference > |
+using | key_type = dr::index< I > |
+using | segment_type = dr::sp::csr_matrix_view< T, I, rng::iterator_t< dr::sp::device_vector< T, dr::sp::device_allocator< T > > >, rng::iterator_t< dr::sp::device_vector< I, dr::sp::device_allocator< I > > > > |
+using | iterator = distributed_sparse_matrix_iterator< std::span< segment_type > && > |
+Public Member Functions | |
+ | sparse_matrix (key_type shape) |
+ | sparse_matrix (key_type shape, double density) |
+ | sparse_matrix (key_type shape, double density, const matrix_partition &partition) |
+ | sparse_matrix (key_type shape, const matrix_partition &partition) |
+size_type | size () const noexcept |
+key_type | shape () const noexcept |
+iterator | begin () |
+iterator | end () |
+segment_type | tile (key_type tile_index) |
+template<typename... Args> | |
auto | copy_tile_async (key_type tile_index, csr_matrix_view< T, I, Args... > tile_view) |
+template<typename... Args> | |
void | copy_tile (key_type tile_index, csr_matrix_view< T, I, Args... > tile_view) |
+key_type | tile_shape () const noexcept |
+key_type | grid_shape () const noexcept |
+std::span< segment_type > | tiles () |
+std::span< segment_type > | segments () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::vector< T, Allocator >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | vector (const Allocator &allocator) noexcept |
+ | vector (size_type count, const T &value, const Allocator &alloc=Allocator()) |
+ | vector (size_type count, const Allocator &alloc=Allocator()) |
+template<std::forward_iterator Iter> | |
constexpr | vector (Iter first, Iter last, const Allocator &alloc=Allocator()) |
+ | vector (const vector &other) |
+ | vector (const vector &other, const Allocator &alloc) |
+ | vector (vector &&other) noexcept |
+ | vector (vector &&other, const Allocator &alloc) noexcept |
+ | vector (std::initializer_list< T > init, const Allocator &alloc=Allocator()) |
+vector & | operator= (const vector &other) |
+template<std::forward_iterator Iter> | |
void | assign (Iter first, Iter last) |
+size_type | size () const noexcept |
+bool | empty () const noexcept |
+size_type | capacity () const noexcept |
+pointer | data () noexcept |
+const_pointer | data () const noexcept |
+allocator_type | get_allocator () const noexcept |
+iterator | begin () noexcept |
+iterator | end () noexcept |
+const_iterator | begin () const noexcept |
+const_iterator | end () const noexcept |
+reference | operator[] (size_type pos) |
+const_reference | operator[] (size_type pos) const |
+void | reserve (size_type new_cap) |
+void | push_back (const T &value) |
+void | push_back (T &&value) |
+bool | try_push_back (const T &value) |
+void | resize (size_type count) |
+void | resize (size_type count, const value_type &value) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::views::enumerate_adapter_closure, including all inherited members.
+operator()(R &&r) const (defined in dr::sp::views::enumerate_adapter_closure) | dr::sp::views::enumerate_adapter_closure | inline |
operator| (defined in dr::sp::views::enumerate_adapter_closure) | dr::sp::views::enumerate_adapter_closure | friend |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> +requires (rng::sized_range<R>) | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const enumerate_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::views::enumerate_fn_, including all inherited members.
+enumerate() const (defined in dr::sp::views::enumerate_fn_) | dr::sp::views::enumerate_fn_ | inline |
operator()(R &&r) const (defined in dr::sp::views::enumerate_fn_) | dr::sp::views::enumerate_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+template<rng::viewable_range R> | |
constexpr auto | operator() (R &&r) const |
+auto | enumerate () const |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::views::slice_adaptor_closure, including all inherited members.
+operator()(R &&r) const (defined in dr::sp::views::slice_adaptor_closure) | dr::sp::views::slice_adaptor_closure | inline |
operator| (defined in dr::sp::views::slice_adaptor_closure) | dr::sp::views::slice_adaptor_closure | friend |
slice_adaptor_closure(dr::index<> slice_indices) (defined in dr::sp::views::slice_adaptor_closure) | dr::sp::views::slice_adaptor_closure | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | slice_adaptor_closure (dr::index<> slice_indices) |
+template<rng::random_access_range R> | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::random_access_range R> | |
auto | operator| (R &&r, const slice_adaptor_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::zip_accessor< Iters >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | element_type = __detail::tuple_or_pair_t< std::iter_value_t< Iters >... > |
+using | value_type = element_type |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+using | reference = __detail::tuple_or_pair_t< std::iter_reference_t< Iters >... > |
+using | iterator_category = std::random_access_iterator_tag |
+using | iterator_accessor = zip_accessor |
+using | const_iterator_accessor = iterator_accessor |
+using | nonconst_iterator_accessor = iterator_accessor |
+Public Member Functions | |
+constexpr | zip_accessor (const zip_accessor &) noexcept=default |
+constexpr zip_accessor & | operator= (const zip_accessor &) noexcept=default |
+constexpr | zip_accessor (Iters... iters) |
+zip_accessor & | operator+= (difference_type offset) |
+constexpr bool | operator== (const iterator_accessor &other) const noexcept |
+constexpr difference_type | operator- (const iterator_accessor &other) const noexcept |
+constexpr bool | operator< (const iterator_accessor &other) const noexcept |
+constexpr reference | operator* () const noexcept |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::sp::zip_view< Rs >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
zip + More...
+ +#include <zip_view.hpp>
+Public Types | |
+using | size_type = std::size_t |
+using | difference_type = std::ptrdiff_t |
+Static Public Attributes | |
+static constexpr bool | num_views = sizeof...(Rs) |
zip
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::timer, including all inherited members.
+elapsed() (defined in dr::timer) | dr::timer | inline |
timer() (defined in dr::timer) | dr::timer | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+auto | elapsed () |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::transform_iterator< Iter, F >, including all inherited members.
+
+ Distributed Ranges
+
+ |
+
+Public Types | |
+using | value_type = std::invoke_result_t< F, std::iter_value_t< Iter > > |
+using | difference_type = std::iter_difference_t< Iter > |
+using | iterator = transform_iterator< Iter, F > |
+using | reference = value_type |
+using | pointer = iterator |
+using | iterator_category = std::random_access_iterator_tag |
+Public Member Functions | |
+ | transform_iterator (Iter iter, F fn) noexcept |
+ | transform_iterator (const transform_iterator &) noexcept=default |
+transform_iterator & | operator= (const transform_iterator &) noexcept=default |
+bool | operator== (const transform_iterator &other) const noexcept |
+bool | operator!= (const transform_iterator &other) const noexcept |
+iterator | operator+ (difference_type offset) const noexcept |
+iterator | operator- (difference_type offset) const noexcept |
+difference_type | operator- (iterator other) const noexcept |
+bool | operator< (iterator other) const noexcept |
+bool | operator> (iterator other) const noexcept |
+bool | operator<= (iterator other) const noexcept |
+bool | operator>= (iterator other) const noexcept |
+iterator & | operator++ () noexcept |
+iterator | operator++ (int) noexcept |
+iterator & | operator-- () noexcept |
+iterator | operator-- (int) noexcept |
+iterator & | operator+= (difference_type offset) noexcept |
+iterator & | operator-= (difference_type offset) noexcept |
+reference | operator* () const noexcept |
+reference | operator[] (difference_type offset) const noexcept |
+auto | local () const |
+Friends | |
+iterator | operator+ (difference_type n, iterator iter) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::transform_view< V, F >, including all inherited members.
+base() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
begin() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
end() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
rank() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
segments() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
size() const (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
transform_view(R &&r, F fn) (defined in dr::transform_view< V, F >) | dr::transform_view< V, F > | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::views::transform_adapter_closure< F >, including all inherited members.
+operator()(R &&r) const (defined in dr::views::transform_adapter_closure< F >) | dr::views::transform_adapter_closure< F > | inline |
operator| (defined in dr::views::transform_adapter_closure< F >) | dr::views::transform_adapter_closure< F > | friend |
transform_adapter_closure(F fn) (defined in dr::views::transform_adapter_closure< F >) | dr::views::transform_adapter_closure< F > | inline |
+ Distributed Ranges
+
+ |
+
+Public Member Functions | |
+ | transform_adapter_closure (F fn) |
+template<rng::viewable_range R> | |
auto | operator() (R &&r) const |
+Friends | |
+template<rng::viewable_range R> | |
auto | operator| (R &&r, const transform_adapter_closure &closure) |
+ Distributed Ranges
+
+ |
+
This is the complete list of members for dr::views::transform_fn_, including all inherited members.
+operator()(R &&r, F &&f) const (defined in dr::views::transform_fn_) | dr::views::transform_fn_ | inline |
operator()(F &&fn) const (defined in dr::views::transform_fn_) | dr::views::transform_fn_ | inline |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
▼Ndr | |
▼N__detail | |
Rmdspan_like | |
Rmdarray_like | |
▼Nmp | |
▼N__detail | |
Rone_argument | |
Rtwo_arguments | |
Rzipable | |
Ris_distributed | |
Rhas_segments | |
Rno_segments | |
Rhas_halo_method | |
Ris_mdspan_view | |
▼Nranges | |
▼N__detail | |
Rhas_local | |
▼Nsp | |
▼N__detail | |
Ris_syclmemcopyable | |
Rsycl_device_selector | |
Rremote_iterator | |
Rremote_range | |
Rdistributed_range | |
Rremote_contiguous_iterator | |
Rdistributed_iterator | |
Rremote_contiguous_range | |
Rdistributed_contiguous_range | |
Rdistributed_contiguous_iterator | |
RTupleLike | |
Rdistributed_mdspan_range |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+Files | |
file | iota.hpp [code] |
file | transform.hpp [code] |
file | views.hpp [code] |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | concepts |
directory | detail |
directory | mp |
directory | sp |
directory | views |
+Files | |
file | mp.hpp [code] |
file | sp.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | matrix |
+Files | |
file | algorithms.hpp [code] |
file | copy.hpp [code] |
file | equal.hpp [code] |
file | exclusive_scan.hpp [code] |
file | execution_policy.hpp [code] |
file | fill.hpp [code] |
file | for_each.hpp [code] |
file | inclusive_scan.hpp [code] |
file | iota.hpp [code] |
file | reduce.hpp [code] |
file | sort.hpp [code] |
file | transform.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | communicator.hpp [code] |
file | enumerate.hpp [code] |
file | format_shim.hpp [code] |
file | index.hpp [code] |
file | iterator_adaptor.hpp [code] |
file | logger.hpp [code] |
file | mdarray_shim.hpp [code] |
file | mdspan_shim.hpp [code] |
file | mdspan_utils.hpp [code] |
file | memory.hpp [code] |
file | normal_distributed_iterator.hpp [code] |
file | onedpl_direct_iterator.hpp [code] |
file | owning_view.hpp [code] |
file | ranges.hpp [code] |
file | ranges_shim.hpp [code] |
file | ranges_utils.hpp [code] |
file | remote_subrange.hpp [code] |
file | segments_tools.hpp [code] |
file | sycl_utils.hpp [code] |
file | tuple_utils.hpp [code] |
file | utils.hpp [code] |
file | view_detectors.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | gemm.hpp [code] |
file | gemv.hpp [code] |
file | local_gemm.hpp [code] |
file | local_gemv.hpp [code] |
file | matrix_algorithms.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | algorithms |
directory | containers |
directory | views |
+Files | |
file | alignment.hpp [code] |
file | allocator.hpp [code] |
file | common_support.hpp [code] |
file | global.hpp [code] |
file | halo.hpp [code] |
file | sycl_support.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | distributed_mdarray.hpp [code] |
file | distributed_vector.hpp [code] |
file | distribution.hpp [code] |
file | segment.hpp [code] |
file | subrange.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | coo_matrix.hpp [code] |
file | generate_random.hpp [code] |
file | matrix_io.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | enumerate.hpp [code] |
file | mdspan_view.hpp [code] |
file | segmented.hpp [code] |
file | sliding.hpp [code] |
file | submdspan_view.hpp [code] |
file | views.hpp [code] |
file | zip.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | source_location.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | copy.hpp [code] |
file | count.hpp [code] |
file | equal.hpp [code] |
file | exclusive_scan.hpp [code] |
file | fill.hpp [code] |
file | for_each.hpp [code] |
file | inclusive_exclusive_scan_impl.hpp [code] |
file | inclusive_scan.hpp [code] |
file | iota.hpp [code] |
file | md_for_each.hpp [code] |
file | reduce.hpp [code] |
file | sort.hpp [code] |
file | transform.hpp [code] |
file | transpose.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | csr_matrix_view.hpp [code] |
file | dense_column_view.hpp [code] |
file | dense_matrix_iterator.hpp [code] |
file | dense_matrix_view.hpp [code] |
file | dense_row_view.hpp [code] |
file | enumerate.hpp [code] |
file | standard_views.hpp [code] |
file | views.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Files | |
file | dense_matrix.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | dr |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | algorithms |
directory | containers |
directory | util |
directory | views |
+Files | |
file | allocators.hpp [code] |
file | detail.hpp [code] |
file | device_ptr.hpp [code] |
file | device_ref.hpp [code] |
file | device_span.hpp [code] |
file | device_vector.hpp [code] |
file | distributed_span.hpp [code] |
file | distributed_vector.hpp [code] |
file | future.hpp [code] |
file | init.hpp [code] |
file | range.hpp [code] |
file | range_adaptors.hpp [code] |
file | span.hpp [code] |
file | util.hpp [code] |
file | vector.hpp [code] |
file | zip_view.hpp [code] |
+ Distributed Ranges
+
+ |
+
+Directories | |
directory | sequential |
+Files | |
file | detail.hpp [code] |
file | distributed_dense_matrix.hpp [code] |
file | duplicated_vector.hpp [code] |
file | matrix_entry.hpp [code] |
file | matrix_partition.hpp [code] |
file | sparse_matrix.hpp [code] |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
▼ include | |
▼ dr | |
▼ concepts | |
concepts.hpp | |
▼ detail | |
communicator.hpp | |
enumerate.hpp | |
format_shim.hpp | |
index.hpp | |
iterator_adaptor.hpp | |
logger.hpp | |
mdarray_shim.hpp | |
mdspan_shim.hpp | |
mdspan_utils.hpp | |
memory.hpp | |
normal_distributed_iterator.hpp | |
onedpl_direct_iterator.hpp | |
owning_view.hpp | |
ranges.hpp | |
ranges_shim.hpp | |
ranges_utils.hpp | |
remote_subrange.hpp | |
segments_tools.hpp | |
sycl_utils.hpp | |
tuple_utils.hpp | |
utils.hpp | |
view_detectors.hpp | |
▼ mp | |
► algorithms | |
► containers | |
► views | |
alignment.hpp | |
allocator.hpp | |
common_support.hpp | |
global.hpp | |
halo.hpp | |
sycl_support.hpp | |
▼ sp | |
► algorithms | |
► containers | |
► util | |
► views | |
allocators.hpp | |
detail.hpp | |
device_ptr.hpp | |
device_ref.hpp | |
device_span.hpp | |
device_vector.hpp | |
distributed_span.hpp | |
distributed_vector.hpp | |
future.hpp | |
init.hpp | |
range.hpp | |
range_adaptors.hpp | |
span.hpp | |
util.hpp | |
vector.hpp | |
zip_view.hpp | |
▼ views | |
iota.hpp | |
transform.hpp | |
views.hpp | |
mp.hpp | |
sp.hpp | |
▼ vendor | |
▼ source_location | |
source_location.hpp |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
This page explains how to interpret the graphs that are generated by doxygen.
+Consider the following example:
This will result in the following graph:
+The boxes in the above graph have the following meaning:
+The arrows have the following meaning:
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ + |
+ Distributed Ranges
+
+ |
+
+ Distributed Ranges
+
+ |
+