From 9f7f10326a3b6c30e5a1f11f87585a45b59218fe Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sat, 9 Nov 2024 22:55:25 +0100 Subject: [PATCH 1/5] pfm parsing for perf stat codes per instruction --- .gitignore | 4 +- mkn.yaml | 24 +++++++++ phlop/app/perf.py | 3 ++ phlop/app/pfm/__init__.py | 5 ++ phlop/app/pfm/check_events.py | 48 +++++++++++++++++ phlop/app/pfm/showevtinfo.py | 81 +++++++++++++++++++++++++++++ phlop/app/stats_man.py | 1 - phlop/run/mpirun_perf.py | 1 - phlop/run/mpirun_stats_man.py | 1 - phlop/run/perf.py | 10 ++-- phlop/testing/parallel_processor.py | 3 +- sh/setup_pfm.sh | 11 ++++ sh/test.sh | 8 +-- tests/_phlop/app/pfm/test_pfm.py | 27 ++++++++++ 14 files changed, 215 insertions(+), 12 deletions(-) create mode 100644 phlop/app/pfm/__init__.py create mode 100644 phlop/app/pfm/check_events.py create mode 100644 phlop/app/pfm/showevtinfo.py create mode 100644 sh/setup_pfm.sh create mode 100644 tests/_phlop/app/pfm/test_pfm.py diff --git a/.gitignore b/.gitignore index c37575b..5ab9f94 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ bin __pycache__ dist phlop.egg-info/ -scope_timer.txt + +*scope_timer.txt +tpp diff --git a/mkn.yaml b/mkn.yaml index 28cc1ba..d633af4 100644 --- a/mkn.yaml +++ b/mkn.yaml @@ -7,6 +7,28 @@ profile: - name: base inc: inc +- name: pfm + inc: tpp/pfm/include + sub: pfm&tpp/pfm(git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4) + +- name: pfm_lib + parent: pfm + mode: static + inc: | + tpp/pfm/include + tpp/pfm/lib, 0 + src: tpp/pfm/lib + +- name: pfm_events + self: pfm_lib + main: tpp/pfm/examples/check_events.c + out: check_events + +- name: pfm_info + self: pfm_lib + main: tpp/pfm/examples/showevtinfo.c + out: showevtinfo + - name: scope_timer parent: base src: src/phlop/timing/scope_timer.cpp @@ -18,3 +40,5 @@ profile: src: src/phlop/timing/threaded_scope_timer.cpp mode: shared test: tests/timing/test_threaded_scope_timer.cpp + + diff --git a/phlop/app/perf.py b/phlop/app/perf.py index 0a03294..cdd3537 100644 --- a/phlop/app/perf.py +++ b/phlop/app/perf.py @@ -117,6 +117,7 @@ def cli_args_parser(description="Perf tool"): logging="0=off, 1=on non zero exit code, 2=always", outfile="path for saved file if active", tool="stat/record/etc", + extra="forward string to perf command", ) parser = argparse.ArgumentParser( @@ -133,6 +134,8 @@ def cli_args_parser(description="Perf tool"): parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) parser.add_argument("-t", "--tool", default="stat", help=_help.tool) parser.add_argument("--logging", type=int, default=1, help=_help.logging) + parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra) + return parser diff --git a/phlop/app/pfm/__init__.py b/phlop/app/pfm/__init__.py new file mode 100644 index 0000000..b117618 --- /dev/null +++ b/phlop/app/pfm/__init__.py @@ -0,0 +1,5 @@ +# +# +# +# +# diff --git a/phlop/app/pfm/check_events.py b/phlop/app/pfm/check_events.py new file mode 100644 index 0000000..26fe30c --- /dev/null +++ b/phlop/app/pfm/check_events.py @@ -0,0 +1,48 @@ +# +# +# +# +# + + +import logging +from pathlib import Path + +from phlop.os import pushd +from phlop.proc import run +from phlop.string import decode_bytes + +FILE_DIR = Path(__file__).resolve().parent + +logger = logging.getLogger(__name__) +check_events_start = "Total events:" + + +def parse_check_events_output(lines): + return lines[-1].split(":")[1].strip().replace("0x", "r") + + +def run_check_events(code): + with pushd(FILE_DIR.parent.parent.parent): + return decode_bytes( + run(f"./tpp/pfm/examples/check_events {code}").stdout + ).splitlines() + + +def get_evt_perf_code(code): + return parse_check_events_output(run_check_events(code)) + + +if __name__ == "__main__": + from phlop.app.pfm.showevtinfo import get_evt_info + + key, code = "[MULT_FLOPS]", "" + for info in get_evt_info(): + if key in info.umask: + code = f"{info.name}:{info.umask[key].code}" + break + + assert code != "" + + # print("get_evt_perf_code", get_evt_perf_code(code)) + print(run(f"perf stat -e {get_evt_perf_code(code)} sleep 5")) diff --git a/phlop/app/pfm/showevtinfo.py b/phlop/app/pfm/showevtinfo.py new file mode 100644 index 0000000..cdfa660 --- /dev/null +++ b/phlop/app/pfm/showevtinfo.py @@ -0,0 +1,81 @@ +# +# +# +# +# + + +import logging +from dataclasses import dataclass, field +from pathlib import Path + +from phlop.os import pushd +from phlop.proc import run +from phlop.string import decode_bytes + +FILE_DIR = Path(__file__).resolve().parent + +logger = logging.getLogger(__name__) +EVTINFO_delimiter = "#-----------------------------" + + +@dataclass +class EVTUMask: + id: str + desc: str + code: str + + +@dataclass +class EVTInfo: + idx: str + pmu: str + name: str + umask: dict = field(default_factory=lambda: {}) + ect: dict = field(default_factory=lambda: {}) + + +def _parse_evtinfo(bits_list): + assert len(bits_list) >= 7 + info = EVTInfo( + idx=bits_list[0][1].strip(), + pmu=bits_list[1][1].strip(), + name=bits_list[2][1].strip(), + ) + for bits in bits_list[7:]: + if bits[0].strip().startswith("Umask"): + info.umask[bits[3].strip()] = EVTUMask( + id=bits[3].strip(), desc=bits[5].strip(), code=bits[1].strip() + ) + return info + + +def parse_evtinfo_output(lines): + start_idx = 0 + for line in lines: + start_idx += 1 + if line.strip() == EVTINFO_delimiter: + break + + bits_list, results = [], [] + for line in lines[start_idx:]: + if line == EVTINFO_delimiter: + results.append(_parse_evtinfo(bits_list)) + bits_list = [] + continue + bits_list.append(line.strip().split(":")) + + return results + + +def run_evtinfo(): + with pushd(FILE_DIR.parent.parent.parent): + return decode_bytes(run("./tpp/pfm/examples/showevtinfo").stdout).splitlines() + + +def get_evt_info(): + return parse_evtinfo_output(run_evtinfo()) + + +if __name__ == "__main__": + print(get_evt_info()) diff --git a/phlop/app/stats_man.py b/phlop/app/stats_man.py index 8ed7edb..945439e 100644 --- a/phlop/app/stats_man.py +++ b/phlop/app/stats_man.py @@ -20,7 +20,6 @@ from phlop.dict import ValDict from phlop.proc import run_raw -logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) _default_interval = 2 diff --git a/phlop/run/mpirun_perf.py b/phlop/run/mpirun_perf.py index f4a7141..fb996d0 100644 --- a/phlop/run/mpirun_perf.py +++ b/phlop/run/mpirun_perf.py @@ -7,7 +7,6 @@ from phlop.app import perf as p -logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) MPI_RANK = os.environ.get("OMPI_COMM_WORLD_RANK") diff --git a/phlop/run/mpirun_stats_man.py b/phlop/run/mpirun_stats_man.py index acbe2af..712649c 100644 --- a/phlop/run/mpirun_stats_man.py +++ b/phlop/run/mpirun_stats_man.py @@ -7,7 +7,6 @@ from phlop.app import stats_man as sman -logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) MPI_RANK = os.environ.get("OMPI_COMM_WORLD_RANK") diff --git a/phlop/run/perf.py b/phlop/run/perf.py index 2529fc8..2196939 100644 --- a/phlop/run/perf.py +++ b/phlop/run/perf.py @@ -19,11 +19,11 @@ """ -def perf_stat_cmd(cli_args, path, line): +def perf_stat_cmd(cli_args, path, line, options): file = Path(line.split(" ")[-1]).stem outpath = logpath / path.stem outpath.mkdir(parents=True, exist_ok=True) - return p.stat_cmd(line, p.stat_events, outpath / f"{file}.json") + return p.stat_cmd(line, p.stat_events, outpath / f"{file}.json", options) def get_from_files(cli_args): @@ -50,7 +50,11 @@ def get_remaining(cli_args): test_batches = {} path = Path(cli_args.remaining[-1]).parent test_case = tc.determine_cores_for_test_case( - tc.TestCase(cmd=perf_stat_cmd(cli_args, path, " ".join(cli_args.remaining))) + tc.TestCase( + cmd=perf_stat_cmd( + cli_args, path, " ".join(cli_args.remaining), cli_args.extra + ) + ) ) test_batches[test_case.cores] = [test_case] return [tc.TestBatch(v, k) for k, v in test_batches.items()] diff --git a/phlop/testing/parallel_processor.py b/phlop/testing/parallel_processor.py index de51838..f2f0f91 100644 --- a/phlop/testing/parallel_processor.py +++ b/phlop/testing/parallel_processor.py @@ -16,8 +16,7 @@ logger = getLogger(__name__) -class TestCaseFailure(Exception): - ... +class TestCaseFailure(Exception): ... class LoggingMode(Enum): diff --git a/sh/setup_pfm.sh b/sh/setup_pfm.sh new file mode 100644 index 0000000..4b07bc5 --- /dev/null +++ b/sh/setup_pfm.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && cd "$CWD"/.. + +set -ex + +[ ! -d "tpp/pfm" ] && ( + git clone git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4 tpp/pfm + cd tpp/pfm + make +) diff --git a/sh/test.sh b/sh/test.sh index 0db08df..84da0fb 100755 --- a/sh/test.sh +++ b/sh/test.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$CWD"/.. +CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && cd "$CWD"/.. set -ex @@ -19,4 +18,7 @@ py -O tests/timing/test_scope_timer.py test_scope_timer -f scope_timer.txt py -Om phlop.run.valgrind echo yes py -Om phlop.run.valgrind --tool=massif echo yes -py -Om phlop.run.perf echo yes || echo "perf failed, assumed CI" +py -Om phlop.run.perf -e="--all-user" echo yes || echo "perf failed, assumed CI" + +# install via ./sh/setup_pfm.sh +[ -d "tpp/pfm" ] && py -O tests/_phlop/app/pfm/test_pfm.py || echo "pfm missing, skipped" diff --git a/tests/_phlop/app/pfm/test_pfm.py b/tests/_phlop/app/pfm/test_pfm.py new file mode 100644 index 0000000..4d709eb --- /dev/null +++ b/tests/_phlop/app/pfm/test_pfm.py @@ -0,0 +1,27 @@ +# +# +# + + +from phlop.proc import run +from phlop.string import decode_bytes + +if __name__ == "__main__": + from phlop.app.pfm.check_events import get_evt_perf_code + from phlop.app.pfm.showevtinfo import get_evt_info + + code = "" + key0, key1 = "[MULT_FLOPS]", "[ADD_SUB_FLOPS]" + for info in get_evt_info(): + if key0 in info.umask: + for key, umask in info.umask.items(): + code += f"{info.name}:{umask.code} " + break + # if key1 in info.umask: + # code += f"{info.name}:{info.umask[key1].code} " + + code = code.strip() + assert code != "" + + events = " ".join([f"-e {get_evt_perf_code(ev)}" for ev in code.split(" ")]) + print(decode_bytes(run(f"perf stat {events} sleep 5").stderr)) From e5a03fe75defa381198e9e5d7079a95544cba712 Mon Sep 17 00:00:00 2001 From: deegan Date: Tue, 3 Dec 2024 15:03:58 +0100 Subject: [PATCH 2/5] nvidia profiling --- phlop/app/nvidia/__init__.py | 5 ++++ phlop/app/nvidia/csan.py | 56 +++++++++++++++++++++++++++++++++++ phlop/app/nvidia/ncu.py | 57 ++++++++++++++++++++++++++++++++++++ phlop/app/perf.py | 3 -- phlop/app/pfm/showevtinfo.py | 6 ++-- sh/setup_pfm.sh | 0 6 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 phlop/app/nvidia/__init__.py create mode 100644 phlop/app/nvidia/csan.py create mode 100644 phlop/app/nvidia/ncu.py mode change 100644 => 100755 sh/setup_pfm.sh diff --git a/phlop/app/nvidia/__init__.py b/phlop/app/nvidia/__init__.py new file mode 100644 index 0000000..b117618 --- /dev/null +++ b/phlop/app/nvidia/__init__.py @@ -0,0 +1,5 @@ +# +# +# +# +# diff --git a/phlop/app/nvidia/csan.py b/phlop/app/nvidia/csan.py new file mode 100644 index 0000000..d425624 --- /dev/null +++ b/phlop/app/nvidia/csan.py @@ -0,0 +1,56 @@ +# compute sanitizer frontend + +# https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html + +## samples +# compute-sanitizer --tool memcheck [sanitizer_options] app_name [app_options] +# compute-sanitizer --tool racecheck [sanitizer_options] app_name [app_options] +# +# + +import os + +from phlop.dict import ValDict +from phlop.proc import run, run_mp + +metrics = [ + "all", + "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum", # read + "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum", # wrte +] + + +def run(exe, events, output_file=None): + env = {} + cmd = f"" + return run(record_cmd(exe, events, output_file), check=True) + + +def cli_args_parser(description="Perf tool"): + import argparse + + _help = ValDict( + dir="working directory", + quiet="Redirect output to /dev/null", + logging="0=off, 1=on non zero exit code, 2=always", + outfile="path for saved file if active", + tool="", + extra="forward string to csan command", + ) + + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("remaining", nargs=argparse.REMAINDER) + parser.add_argument("-d", "--dir", default=".", help=_help.dir) + parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) + parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) + parser.add_argument("-t", "--tool", default="stat", help=_help.tool) + parser.add_argument("--logging", type=int, default=1, help=_help.logging) + parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra) + + return parser + + +def verify_cli_args(cli_args): + return cli_args diff --git a/phlop/app/nvidia/ncu.py b/phlop/app/nvidia/ncu.py new file mode 100644 index 0000000..00743e5 --- /dev/null +++ b/phlop/app/nvidia/ncu.py @@ -0,0 +1,57 @@ +# Nsight Compute CLI + +# https://docs.nvidia.com/nsight-compute/ProfilingGuide/index.html + +## samples +# ncu --metrics all +# ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum +# +# + +import os + +from phlop.dict import ValDict +from phlop.proc import run, run_mp + +metrics = [ + "all", + "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum", # read + "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum", # wrte +] + + +def run(exe, events, output_file=None): + env = {} + cmd = f"" + # ncu --target-processes all -o mpirun [mpi arguments] [app arguments] + return run(cmd, check=True) + + +def cli_args_parser(description="Perf tool"): + import argparse + + _help = ValDict( + dir="working directory", + quiet="Redirect output to /dev/null", + logging="0=off, 1=on non zero exit code, 2=always", + outfile="path for saved file if active", + tool="", + extra="forward string to csan command", + ) + + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("remaining", nargs=argparse.REMAINDER) + parser.add_argument("-d", "--dir", default=".", help=_help.dir) + parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) + parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) + parser.add_argument("-t", "--tool", default="stat", help=_help.tool) + parser.add_argument("--logging", type=int, default=1, help=_help.logging) + parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra) + + return parser + + +def verify_cli_args(cli_args): + return cli_args diff --git a/phlop/app/perf.py b/phlop/app/perf.py index cdd3537..790a176 100644 --- a/phlop/app/perf.py +++ b/phlop/app/perf.py @@ -112,8 +112,6 @@ def cli_args_parser(description="Perf tool"): quiet="Redirect output to /dev/null", cores="Parallism core/thread count", infiles="infiles", - print_only="Print only, no execution", - regex="Filter out non-matching execution strings", logging="0=off, 1=on non zero exit code, 2=always", outfile="path for saved file if active", tool="stat/record/etc", @@ -130,7 +128,6 @@ def cli_args_parser(description="Perf tool"): "-p", "--print_only", action="store_true", default=False, help=_help.print_only ) parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) - parser.add_argument("-r", "--regex", default=None, help=_help.regex) parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) parser.add_argument("-t", "--tool", default="stat", help=_help.tool) parser.add_argument("--logging", type=int, default=1, help=_help.logging) diff --git a/phlop/app/pfm/showevtinfo.py b/phlop/app/pfm/showevtinfo.py index cdfa660..0dfb389 100644 --- a/phlop/app/pfm/showevtinfo.py +++ b/phlop/app/pfm/showevtinfo.py @@ -6,7 +6,7 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass, field, as_dict from pathlib import Path from phlop.os import pushd @@ -78,4 +78,6 @@ def get_evt_info(): if __name__ == "__main__": - print(get_evt_info()) + import json + + print(json.dumps(as_dict(get_evt_info()), tabs=2)) diff --git a/sh/setup_pfm.sh b/sh/setup_pfm.sh old mode 100644 new mode 100755 From 2176713b97aaeba691f7382a949e2999807d92c8 Mon Sep 17 00:00:00 2001 From: deegan Date: Wed, 4 Dec 2024 13:36:40 +0100 Subject: [PATCH 3/5] ... --- inc/phlop/timing/threaded_scope_timer.hpp | 32 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/inc/phlop/timing/threaded_scope_timer.hpp b/inc/phlop/timing/threaded_scope_timer.hpp index b11fc78..c367327 100644 --- a/inc/phlop/timing/threaded_scope_timer.hpp +++ b/inc/phlop/timing/threaded_scope_timer.hpp @@ -56,6 +56,7 @@ struct ScopeTimerMan } _headers.clear(); thread_storage.clear(); + thread_reports.clear(); active = false; } @@ -132,10 +133,16 @@ struct ScopeTimerMan std::unique_lock lk(work_); thread_storage.emplace_back(std::move(pt.reports), std::move(pt.traces)); } + void move(std::shared_ptr& report) + { + std::unique_lock lk(work_); + thread_reports.emplace_back(std::move(report)); + } std::mutex work_; std::vector, std::vector>> thread_storage; + std::vector> thread_reports; // keep alive }; @@ -159,9 +166,11 @@ struct RunTimerReportSnapshot std::vector childs; }; + struct RunTimerReport { - std::string_view k, f; + std::string const k; // key + std::string const f; // function std::uint32_t l = 0; RunTimerReport(std::string_view const& _k, std::string_view const& _f, std::uint32_t const& _l) @@ -175,6 +184,7 @@ struct RunTimerReport ~RunTimerReport() {} + auto operator()(std::size_t i) { return snapshots[i].get(); } auto size() { return snapshots.size(); } @@ -182,6 +192,12 @@ struct RunTimerReport }; +struct ThreadLifeWatcher +{ + ~ThreadLifeWatcher() { ScopeTimerMan::INSTANCE().move(report); } + + std::shared_ptr report; +}; struct scope_timer @@ -264,7 +280,9 @@ struct BinaryTimerFile template void recurse_traces_for_keys(Trace const& c) { - std::string s{c->self->k}; + assert(c); + assert(c->self); + auto const& s = c->self->k; if (!key_ids.count(s)) { auto [it, b] = key_ids.emplace(s, key_ids.size()); @@ -359,11 +377,13 @@ namespace detail #endif #define PHLOP_SCOPE_TIMER(key) \ - static phlop::threaded::RunTimerReport PHLOP_STR_CAT(ridx_, __LINE__){key, __FILE__, \ - __LINE__}; \ + static thread_local auto PHLOP_STR_CAT(ridx_, __LINE__) \ + = std::make_shared(key, __FILE__, __LINE__); \ + static thread_local phlop::threaded::ThreadLifeWatcher PHLOP_STR_CAT(_watcher_, __LINE__){ \ + PHLOP_STR_CAT(ridx_, __LINE__)}; \ phlop::threaded::scope_timer PHLOP_STR_CAT(_scope_timer_, \ - __LINE__){PHLOP_STR_CAT(ridx_, __LINE__)}; \ - phlop::threaded::ScopeTimerMan::local().report_stack_ptr = &PHLOP_STR_CAT(ridx_, __LINE__); + __LINE__){*PHLOP_STR_CAT(ridx_, __LINE__)}; \ + phlop::threaded::ScopeTimerMan::local().report_stack_ptr = PHLOP_STR_CAT(ridx_, __LINE__).get(); #endif /*_PHLOP_TIMING_THREADED_SCOPE_TIMER_HPP_*/ From aa3b3253860eb05db2cffda08a942989d01d3754 Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sun, 8 Dec 2024 15:03:40 +0100 Subject: [PATCH 4/5] ++ --- phlop/app/__main__.py | 2 ++ phlop/app/nvidia/__main__.py | 12 ++++++++++++ phlop/app/nvidia/csan.py | 18 ++++++++++-------- phlop/app/nvidia/ncu.py | 19 ++++++++++--------- phlop/app/pfm/__main__.py | 12 ++++++++++++ phlop/app/pfm/showevtinfo.py | 27 +++++++++++++++++++++++---- sh/clean.sh | 7 +++---- 7 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 phlop/app/nvidia/__main__.py create mode 100644 phlop/app/pfm/__main__.py diff --git a/phlop/app/__main__.py b/phlop/app/__main__.py index c7cc76d..32d4f1b 100644 --- a/phlop/app/__main__.py +++ b/phlop/app/__main__.py @@ -9,6 +9,8 @@ phlop.app.cmake phlop.app.test_cases phlop.app.git + phlop.app.nvidia + phlop.app.pfm phlop.app.perf""" print(available_modules) diff --git a/phlop/app/nvidia/__main__.py b/phlop/app/nvidia/__main__.py new file mode 100644 index 0000000..318c6a8 --- /dev/null +++ b/phlop/app/nvidia/__main__.py @@ -0,0 +1,12 @@ +# +# +# +# +# + + +available_modules = """Available: + phlop.app.nvidia.csan + phlop.app.nvidia.ncucsan""" + +print(available_modules) diff --git a/phlop/app/nvidia/csan.py b/phlop/app/nvidia/csan.py index d425624..5bde740 100644 --- a/phlop/app/nvidia/csan.py +++ b/phlop/app/nvidia/csan.py @@ -7,11 +7,11 @@ # compute-sanitizer --tool racecheck [sanitizer_options] app_name [app_options] # # +# -import os from phlop.dict import ValDict -from phlop.proc import run, run_mp +from phlop.proc import run metrics = [ "all", @@ -20,13 +20,15 @@ ] -def run(exe, events, output_file=None): - env = {} - cmd = f"" - return run(record_cmd(exe, events, output_file), check=True) +def build_command(cli_args): + return f"compute-sanitizer --tool {cli_args.tool} {cli_args.remaining}" + + +def exec(cli_args): + return run(build_command(cli_args), check=True) -def cli_args_parser(description="Perf tool"): +def cli_args_parser(description="compute-sanitizer tool"): import argparse _help = ValDict( @@ -45,7 +47,7 @@ def cli_args_parser(description="Perf tool"): parser.add_argument("-d", "--dir", default=".", help=_help.dir) parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) - parser.add_argument("-t", "--tool", default="stat", help=_help.tool) + parser.add_argument("-t", "--tool", default="memcheck", help=_help.tool) parser.add_argument("--logging", type=int, default=1, help=_help.logging) parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra) diff --git a/phlop/app/nvidia/ncu.py b/phlop/app/nvidia/ncu.py index 00743e5..d0920e0 100644 --- a/phlop/app/nvidia/ncu.py +++ b/phlop/app/nvidia/ncu.py @@ -3,15 +3,15 @@ # https://docs.nvidia.com/nsight-compute/ProfilingGuide/index.html ## samples +# ncu --help # ncu --metrics all # ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum -# +# ncu --target-processes all -o mpirun [mpi arguments] [app arguments] # -import os from phlop.dict import ValDict -from phlop.proc import run, run_mp +from phlop.proc import run metrics = [ "all", @@ -20,14 +20,15 @@ ] -def run(exe, events, output_file=None): - env = {} - cmd = f"" - # ncu --target-processes all -o mpirun [mpi arguments] [app arguments] - return run(cmd, check=True) +def build_command(cli_args): + return f"ncu {cli_args.remaining}" + + +def exec(cli_args): + return run(build_command(cli_args), check=True) -def cli_args_parser(description="Perf tool"): +def cli_args_parser(description="ncu tool"): import argparse _help = ValDict( diff --git a/phlop/app/pfm/__main__.py b/phlop/app/pfm/__main__.py new file mode 100644 index 0000000..f9d7228 --- /dev/null +++ b/phlop/app/pfm/__main__.py @@ -0,0 +1,12 @@ +# +# +# +# +# + + +available_modules = """Available: + phlop.app.pfm.check_events + phlop.app.pfm.showevtinfo""" + +print(available_modules) diff --git a/phlop/app/pfm/showevtinfo.py b/phlop/app/pfm/showevtinfo.py index 0dfb389..4e33c15 100644 --- a/phlop/app/pfm/showevtinfo.py +++ b/phlop/app/pfm/showevtinfo.py @@ -6,7 +6,7 @@ import logging -from dataclasses import dataclass, field, as_dict +from dataclasses import asdict, dataclass, field from pathlib import Path from phlop.os import pushd @@ -32,7 +32,26 @@ class EVTInfo: pmu: str name: str umask: dict = field(default_factory=lambda: {}) - ect: dict = field(default_factory=lambda: {}) + etc: dict = field(default_factory=lambda: {}) + + +@dataclass +class EVTInfos: + data: list = field(default_factory=lambda: []) + + def __iter__(self): + return self.data.__iter__() + + def umasks(self): + return EVTInfos(data=[d for d in self.data if d.umask]) + + def umasks_in(self, needle): + return EVTInfos( + data=[d for d in self.data if any(needle in k for k in d.umask)] + ) + + def append(self, ev: EVTInfo): + self.data.append(ev) def _parse_evtinfo(bits_list): @@ -57,7 +76,7 @@ def parse_evtinfo_output(lines): if line.strip() == EVTINFO_delimiter: break - bits_list, results = [], [] + bits_list, results = [], EVTInfos() for line in lines[start_idx:]: if line == EVTINFO_delimiter: results.append(_parse_evtinfo(bits_list)) @@ -80,4 +99,4 @@ def get_evt_info(): if __name__ == "__main__": import json - print(json.dumps(as_dict(get_evt_info()), tabs=2)) + print(json.dumps(asdict(get_evt_info()), tabs=2)) diff --git a/sh/clean.sh b/sh/clean.sh index c94f4b5..d47de06 100755 --- a/sh/clean.sh +++ b/sh/clean.sh @@ -1,10 +1,9 @@ #!/usr/bin/env bash -set -e -CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$CWD"/.. +set -ex +CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && cd "$CWD"/.. RM_RF=( - __pycache__ + __pycache__ .ruff_cache phlop.egg-info dist ) for RM in ${RM_RF[@]}; do From 940c8ceadca46183047980fb34157a1144b129e9 Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sun, 8 Dec 2024 15:55:33 +0100 Subject: [PATCH 5/5] ++ --- mkn.pfm.yaml | 29 +++++++++++++++++++++++++++++ mkn.yaml | 24 ------------------------ phlop/app/nvidia/__main__.py | 2 +- phlop/app/nvidia/csan.py | 13 +++++++++---- phlop/app/nvidia/ncu.py | 1 - sh/setup_pfm.sh | 2 +- 6 files changed, 40 insertions(+), 31 deletions(-) create mode 100644 mkn.pfm.yaml diff --git a/mkn.pfm.yaml b/mkn.pfm.yaml new file mode 100644 index 0000000..abd7ee9 --- /dev/null +++ b/mkn.pfm.yaml @@ -0,0 +1,29 @@ +#! clean build test run -p scope_timer,threaded_scope_timer -Oa "-fPIC -std=c++20" -W 9 + +# run script first: ./sh/setup_pfm.sh + +name: phlop.pfm +parent: base + +profile: +- name: base + inc: inc + +- name: pfm + inc: tpp/pfm/include + +- name: pfm_lib + parent: pfm + mode: static + inc: tpp/pfm/lib, 0 + src: tpp/pfm/lib + +- name: pfm_events + self: pfm_lib + main: tpp/pfm/examples/check_events.c + out: check_events + +- name: pfm_info + self: pfm_lib + main: tpp/pfm/examples/showevtinfo.c + out: showevtinfo diff --git a/mkn.yaml b/mkn.yaml index d633af4..28cc1ba 100644 --- a/mkn.yaml +++ b/mkn.yaml @@ -7,28 +7,6 @@ profile: - name: base inc: inc -- name: pfm - inc: tpp/pfm/include - sub: pfm&tpp/pfm(git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4) - -- name: pfm_lib - parent: pfm - mode: static - inc: | - tpp/pfm/include - tpp/pfm/lib, 0 - src: tpp/pfm/lib - -- name: pfm_events - self: pfm_lib - main: tpp/pfm/examples/check_events.c - out: check_events - -- name: pfm_info - self: pfm_lib - main: tpp/pfm/examples/showevtinfo.c - out: showevtinfo - - name: scope_timer parent: base src: src/phlop/timing/scope_timer.cpp @@ -40,5 +18,3 @@ profile: src: src/phlop/timing/threaded_scope_timer.cpp mode: shared test: tests/timing/test_threaded_scope_timer.cpp - - diff --git a/phlop/app/nvidia/__main__.py b/phlop/app/nvidia/__main__.py index 318c6a8..bc16f88 100644 --- a/phlop/app/nvidia/__main__.py +++ b/phlop/app/nvidia/__main__.py @@ -7,6 +7,6 @@ available_modules = """Available: phlop.app.nvidia.csan - phlop.app.nvidia.ncucsan""" + phlop.app.nvidia.ncu""" print(available_modules) diff --git a/phlop/app/nvidia/csan.py b/phlop/app/nvidia/csan.py index 5bde740..d0ba3e5 100644 --- a/phlop/app/nvidia/csan.py +++ b/phlop/app/nvidia/csan.py @@ -21,11 +21,17 @@ def build_command(cli_args): - return f"compute-sanitizer --tool {cli_args.tool} {cli_args.remaining}" + cmd_parts = [ + "compute-sanitizer", + f"--tool {cli_args.tool}", + cli_args.extra if cli_args.extra else "", + " ".join(cli_args.remaining) if cli_args.remaining else "", + ] + return " ".join(filter(None, cmd_parts)) def exec(cli_args): - return run(build_command(cli_args), check=True) + return run(build_command(cli_args), check=True, cwd=cli_args.dir) def cli_args_parser(description="compute-sanitizer tool"): @@ -36,7 +42,7 @@ def cli_args_parser(description="compute-sanitizer tool"): quiet="Redirect output to /dev/null", logging="0=off, 1=on non zero exit code, 2=always", outfile="path for saved file if active", - tool="", + tool="Sanitizer tool to use (memcheck, racecheck, initcheck, synccheck)", extra="forward string to csan command", ) @@ -45,7 +51,6 @@ def cli_args_parser(description="compute-sanitizer tool"): ) parser.add_argument("remaining", nargs=argparse.REMAINDER) parser.add_argument("-d", "--dir", default=".", help=_help.dir) - parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) parser.add_argument("-t", "--tool", default="memcheck", help=_help.tool) parser.add_argument("--logging", type=int, default=1, help=_help.logging) diff --git a/phlop/app/nvidia/ncu.py b/phlop/app/nvidia/ncu.py index d0920e0..482c57a 100644 --- a/phlop/app/nvidia/ncu.py +++ b/phlop/app/nvidia/ncu.py @@ -45,7 +45,6 @@ def cli_args_parser(description="ncu tool"): ) parser.add_argument("remaining", nargs=argparse.REMAINDER) parser.add_argument("-d", "--dir", default=".", help=_help.dir) - parser.add_argument("-i", "--infiles", default=None, help=_help.infiles) parser.add_argument("-o", "--outfile", default=None, help=_help.outfile) parser.add_argument("-t", "--tool", default="stat", help=_help.tool) parser.add_argument("--logging", type=int, default=1, help=_help.logging) diff --git a/sh/setup_pfm.sh b/sh/setup_pfm.sh index 4b07bc5..2600b73 100755 --- a/sh/setup_pfm.sh +++ b/sh/setup_pfm.sh @@ -5,7 +5,7 @@ CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" && cd "$CWD"/.. set -ex [ ! -d "tpp/pfm" ] && ( - git clone git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4 tpp/pfm + git clone https://github.com/wcohen/libpfm4 tpp/pfm --depth 4 --shallow-submodules --recursive cd tpp/pfm make )