Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing compiler name & version from Spack environment to include in the performance log file #262

Merged
merged 25 commits into from
Apr 11, 2024
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3d72c50
Added compiler name & version to the performance log file
kaanolgu Jan 31, 2024
323ae11
Moving all changes to utils.py
kaanolgu Feb 2, 2024
b4f3658
spack_spec keys and values are printed apart from the compiler name a…
kaanolgu Feb 13, 2024
8ba7561
[NEW] Dictionary for spack_spec
kaanolgu Feb 26, 2024
b79f781
[NEW] MPI output
kaanolgu Feb 29, 2024
7b5e51e
Attempt to solve CI/test failed error
kaanolgu Feb 29, 2024
7a02712
Merge branch 'main' into ko/spack-spec-parser
kaanolgu Feb 29, 2024
f696cd5
[NEW] Concretized multiple spac_spec all in one dictionary
kaanolgu Mar 10, 2024
dd066c1
Merge branch 'ko/spack-spec-parser' of https://github.com/ukri-excali…
kaanolgu Mar 11, 2024
51c5713
[FIX] CI failing
kaanolgu Mar 11, 2024
49f2286
* MPI is moved into dictionary of each spec
kaanolgu Mar 21, 2024
4850d38
Merge branch 'main' into ko/spack-spec-parser
kaanolgu Mar 22, 2024
aed6bc2
[FIX] MPI to be string if present
kaanolgu Mar 25, 2024
5ac953f
Merge branch 'ko/spack-spec-parser' of github.com:ukri-excalibur/exca…
kaanolgu Mar 25, 2024
878aa5e
Update expected fields with unpacked names
tkoskela Mar 25, 2024
493c7bf
Install with -e
tkoskela Mar 25, 2024
3559fec
Minor formatting fixes.
pineapple-cat Mar 27, 2024
57767c7
Modified new key column insertion and added recursive flattening of r…
pineapple-cat Mar 27, 2024
c17b12e
Merge branch 'main' into ko/spack-spec-parser
pineapple-cat Mar 27, 2024
7c871e5
Fixed key columns return bug.
pineapple-cat Apr 9, 2024
9fbc4f1
Merge branch 'main' into ko/spack-spec-parser
kaanolgu Apr 9, 2024
cbcaeeb
Fix bug with unpacking nested dicts in column names - there might be …
ilectra Apr 10, 2024
2e46df8
Added unit test for key column unpacking.
pineapple-cat Apr 10, 2024
1177f71
Merge branch 'main' into ko/spack-spec-parser
pineapple-cat Apr 10, 2024
1272d8c
Debug unit test for recursive dictionary unpacking.
ilectra Apr 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -62,7 +62,7 @@ jobs:
shell: bash
run: |
python -m pip install --upgrade pip
pip install .[test]
pip install -e .[test]
- name: Install Spack
shell: bash
working-directory: ${{runner.workspace}}
22 changes: 19 additions & 3 deletions benchmarks/modules/utils.py
Original file line number Diff line number Diff line change
@@ -10,8 +10,8 @@
from reframe.core.exceptions import BuildSystemError
from reframe.core.logging import getlogger
from reframe.utility.osext import run_command


import reframe.utility.osext as osext
import reframe.utility.sanity as sn
SYSFILE = 'systems/sysinfo.json' # interpreted relative to jupyter root

def get_jupyter_root():
@@ -243,6 +243,7 @@ def identify_build_environment(current_partition):
class SpackTest(rfm.RegressionTest):
build_system = 'Spack'
spack_spec = variable(str, value='', loggable=True)
spack_spec_dict = variable(str, value='', loggable=True)

@run_before('compile')
def setup_spack_environment(self):
@@ -267,11 +268,25 @@ def setup_spack_environment(self):
f'(cd {cp_dir}; find . \( -name "spack.yaml" -o -name "compilers.yaml" -o -name "packages.yaml" \) -print0 | xargs -0 tar cf - | tar -C {dest} -xvf -)',
f'spack -e {self.build_system.environment} config add "config:install_tree:root:{env_dir}/opt"',
]

cmd_spack_spec_dict = 'from spack import environment;\
spec_list = environment.active_environment().concrete_roots();\
key_list_for_each = [spec.variants.dict.keys() for spec in spec_list];\
result_dict = {spec.name: {"compiler": {"name": spec.compiler.name, "version": str(spec.compiler.versions).lstrip("=")}, "variants": {key: str(spec.variants.dict[key].value) if isinstance(spec.variants.dict[key].value, bool) else "" if spec.variants.dict[key].value is None else list(spec.variants.dict[key].value) if isinstance(spec.variants.dict[key].value, tuple) else spec.variants.dict[key].value for key in key_list_for_each[i]},"mpi":str(spec["mpi"]) if "mpi" in spec else "" } for i, spec in enumerate(spec_list)};\
print(result_dict)'
self.postrun_cmds.append(f'echo "spack_spec_dict: $(spack -e {self.build_system.environment} python -c \'{cmd_spack_spec_dict}\')"')

# Keep the `spack.lock` file in the output directory so that the Spack
# environment can be faithfully reproduced later.
self.keep_files.append(os.path.realpath(os.path.join(self.build_system.environment, 'spack.lock')))


@run_after('run')
def get_full_variants(self):
with osext.change_dir(self.stagedir):
self.spack_spec_dict = sn.extractsingle(r'spack_spec_dict: \s*(.*)', self.stdout, 1).evaluate()
# convert all single quotes to double quotes since JSON does not recognise it
self.spack_spec_dict = self.spack_spec_dict.replace("'", "\"")

@run_before('compile')
def setup_build_system(self):
# The `self.spack_spec` attribute is the user-facing and loggable
@@ -309,6 +324,7 @@ def setup_build_job_num_cpus(self):
self.build_job.num_cpus_per_task = min(16, self.current_partition.processor.num_cpus)



if __name__ == '__main__':

#v = get_sysinfo(sys.argv[-1])
1 change: 1 addition & 0 deletions benchmarks/reframe_config.py
Original file line number Diff line number Diff line change
@@ -781,6 +781,7 @@ def spack_root_to_path():
'%(check_environ)s|'
'%(check_extra_resources)s|'
'%(check_env_vars)s|'
'%(check_spack_spec_dict)s|'
'%(check_tags)s'
),
'format_perfvars': (
52 changes: 41 additions & 11 deletions post-processing/perflog_handler.py
Original file line number Diff line number Diff line change
@@ -127,9 +127,9 @@ def read_perflog(path: Path):
df.drop("display_name", axis=1, inplace=True)

# replace other columns with dictionary contents
dict_cols = [c for c in ["extra_resources", "env_vars"] if c in df.columns]
dict_cols = [c for c in ["extra_resources", "env_vars", "spack_spec_dict"] if c in df.columns]
for col in dict_cols:
results = df[col].apply(lambda x: json.loads(x))
results = df[col].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
# insert new columns and contents
insert_key_cols(df, df.columns.get_loc(col), results)
# drop old column
@@ -156,19 +156,49 @@ def get_display_name_info(display_name: str):
return test_name, dict(params)


def insert_key_cols(df: pd.DataFrame, index: int, results: 'list[dict]'):
def find_key_cols(row_info: 'dict | None', key_cols={}, col_name=None):
"""
Modify a dataframe to include new columns (extracted from results) inserted at
a given index.
Return key columns and their values by recursively finding the innermost
dictionary contents of given row information.

Args:
df: pd.DataFrame, to be modified by this function.
index: int, index as which to insert new columns into the dataframe.
results: list[dict], contains key-value mapping information for all rows.
row_info: dict | None, contains key-value mapping information from one row.
key_cols: dict, flattened dictionary contents from row_info.
col_name: str | None, the name of a previous column key to be used as a prefix for new column keys.
"""

if isinstance(row_info, dict):
for k in row_info.keys():
# determine new key column name
new_col_name = "{0}_{1}".format(col_name, k) if col_name else k
# recurse if key value is also a dict
if isinstance(row_info.get(k), dict):
find_key_cols(row_info.get(k), key_cols, col_name=new_col_name)
# otherwise add key-value pair to key columns dict
else:
key_cols[new_col_name] = row_info.get(k)
return key_cols


def insert_key_cols(df: pd.DataFrame, index: int, results: 'list[dict]'):
"""
Modify a dataframe to include new columns (extracted from results) inserted at
a given index, with names optionally prefixed by the original column name and each key.

Args:
df: DataFrame, to be modified by this function.
index: int, index at which to insert new columns into the dataframe.
results: 'list[dict]', contains key-value mapping information from all rows.
"""

# flatten results into key columns dicts
key_cols = [find_key_cols(r, key_cols={}) for r in results]
# get set of keys from all rows
keys = set(chain.from_iterable([r.keys() for r in results]))
keys = set(chain.from_iterable([k.keys() for k in key_cols]))

for k in keys:
# insert keys as new columns
df.insert(index, k, [r[k] if k in r.keys() else None for r in results])
if k not in df.columns:
# insert keys as new columns
df.insert(index, k, [c.get(k) if k in c else None for c in key_cols])
# increment index for next column insertion to maintain order
index += 1
21 changes: 20 additions & 1 deletion post-processing/test_post_processing.py
Original file line number Diff line number Diff line change
@@ -62,6 +62,23 @@ def test_display_name_parsing():
assert len(params) == 0


# Test that recursive unpacking of key columns works as expected
def test_key_col_unpacking():

test_dict1 = {"benchmark": "bench1", "bench1": {"compiler": {"name": "compiler1", "version": 9.2}}}
test_dict2 = {"benchmark": "bench2", "compiler": {"name": "compiler2", "version": 12.1},
"variants": {"cuda": True}, "mpi": ""}

# flatten test dicts into key columns dicts
key_cols = [log_hand.find_key_cols(r, key_cols={}) for r in [test_dict1, test_dict2]]

# expected results
assert key_cols == [
{"benchmark": "bench1", "bench1_compiler_name": "compiler1", "bench1_compiler_version": 9.2},
{"benchmark": "bench2", "compiler_name": "compiler2", "compiler_version": 12.1,
"variants_cuda": True, "mpi": ""}]


@pytest.fixture(scope="module")
# Fixture to run sombrero benchmark example, generate perflogs, and clean up after test
def run_sombrero():
@@ -159,7 +176,9 @@ def test_read_perflog(run_sombrero):
"num_cpus_per_task", "num_tasks_per_node", "num_gpus_per_node",
"flops_value", "flops_unit", "flops_ref", "flops_lower_thres",
"flops_upper_thres", "spack_spec", "test_name", "tasks", "cpus_per_task",
"system", "partition", "job_nodelist", "environ", "OMP_NUM_THREADS", "tags"]
"system", "partition", "job_nodelist", "environ", "OMP_NUM_THREADS",
"sombrero_compiler_name", "sombrero_compiler_version",
"sombrero_variants_build_system", "sombrero_mpi", "tags"]

# check example perflog file is read appropriately
# check all expected columns are present