From c6a7d45b36368e9c41222f40b642ab5dceafed38 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 13 Nov 2024 19:38:56 -0700 Subject: [PATCH] Allow the dpm to query the server for local peer info The local PMIx server may have info on the local peers for a job - so allow the client to request that info if it doesn't already possess it. Also clean up some diagnostic output so it shows the PMIx attribute name (or just the provided string if it isn't an attribute name) to make it easier to correlate to the code. Signed-off-by: Ralph Castain --- ompi/dpm/dpm.c | 2 +- opal/mca/pmix/pmix-internal.h | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 8e4057daffb..719b0c4a735 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -430,7 +430,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, wildcard_rank.jobid = proc->super.proc_name.jobid; wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; /* retrieve the local peers for the specified jobid */ - OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS, + OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCAL_PEERS, &wildcard_rank, &val, PMIX_STRING); if (OPAL_SUCCESS == rc && NULL != val) { char **peers = opal_argv_split(val, ','); diff --git a/opal/mca/pmix/pmix-internal.h b/opal/mca/pmix/pmix-internal.h index 4e10393f60f..3c00306f501 100644 --- a/opal/mca/pmix/pmix-internal.h +++ b/opal/mca/pmix/pmix-internal.h @@ -9,7 +9,7 @@ * reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * Copyright (c) 2021 Argonne National Laboratory. All rights * reserved. * $COPYRIGHT$ @@ -293,7 +293,7 @@ typedef struct { OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \ OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, \ - OPAL_NAME_PRINT(*(p)), (s))); \ + OPAL_NAME_PRINT(*(p)), PMIx_Get_attribute_name(s))); \ OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \ PMIX_INFO_LOAD(&_info, PMIX_OPTIONAL, NULL, PMIX_BOOL); \ (r) = PMIx_Get(&(_proc), (s), &(_info), 1, &(_kv)); \ @@ -334,7 +334,7 @@ typedef struct { OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ "%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \ OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, \ - OPAL_NAME_PRINT(*(p)), (s))); \ + OPAL_NAME_PRINT(*(p)), PMIx_Get_attribute_name(s))); \ OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \ PMIX_INFO_LOAD(&_info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); \ (r) = PMIx_Get(&(_proc), (s), &(_info), 1, &(_kv)); \ @@ -370,7 +370,8 @@ typedef struct { size_t _sz; \ OPAL_OUTPUT_VERBOSE( \ (1, opal_pmix_verbose_output, "%s[%s:%d] MODEX RECV VALUE FOR PROC %s KEY %s", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, OPAL_NAME_PRINT(*(p)), (s))); \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, OPAL_NAME_PRINT(*(p)), \ + PMIx_Get_attribute_name(s))); \ OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \ (r) = PMIx_Get(&(_proc), (s), NULL, 0, &(_kv)); \ if (NULL == _kv) { \ @@ -406,7 +407,7 @@ typedef struct { OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ "%s[%s:%d] MODEX RECV STRING OPTIONAL FOR PROC %s KEY %s", \ OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, \ - OPAL_NAME_PRINT(*(p)), (s))); \ + OPAL_NAME_PRINT(*(p)), PMIx_Get_attribute_name(s))); \ *(d) = NULL; \ *(sz) = 0; \ OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \ @@ -444,7 +445,8 @@ typedef struct { pmix_info_t _info; \ OPAL_OUTPUT_VERBOSE( \ (1, opal_pmix_verbose_output, "%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, OPAL_NAME_PRINT(*(p)), (s))); \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), __FILE__, __LINE__, OPAL_NAME_PRINT(*(p)), \ + PMIx_Get_attribute_name(s))); \ *(d) = NULL; \ *(sz) = 0; \ OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \