Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

redfishpower: add extra timeout debug information #154

Merged
merged 4 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions man/powerman.dev.5.in
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,14 @@ Get device in a state so login script will work
.TP
.I "status_all, status[%s]"
Obtain plug state for all plugs or only the specified plug.
When all plugs of a device are involved in a plug status query,
the status_all script, if defined, will be called in preference to the
status script; otherwise the status script is called for each plug.
.TP
.I "on_all, on_range[%s], on[%s]"
.I "on_all, on_ranged[%s], on[%s]"
Power on all plugs, a range of plugs, or the specified plug.
.TP
.I "off_all, off_range[%s], off[%s]"
.I "off_all, off_ranged[%s], off[%s]"
Power off all plugs, a range of plugs, or the specified plug.
.TP
.I "cycle_all, cycle_range[%s], cycle[%s]"
.I "cycle_all, cycle_ranged[%s], cycle[%s]"
Power cycle all plugs, a range of plugs, or the specified plug.
The intent of this command was to map to the RPC's cycle command;
however, device script are increasingly implementing this in terms of
Expand Down Expand Up @@ -122,7 +119,7 @@ Flash beacon on the specified plug.
.I "beacon_off[%s]"
Clear beacon on the specified plug.
.TP
.I "reset_all, reset_range[%s], reset[%s]"
.I "reset_all, reset_ranged[%s], reset[%s]"
Reset all plugs, a range of plugs, or only the specified plug.
Reset refers to signaling a motherboard reset butten header, not a plug cycle.
.LP
Expand Down
101 changes: 78 additions & 23 deletions src/redfishpower/redfishpower.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,12 @@ static zhashx_t *test_power_status;
#define CMD_ON "on"
#define CMD_OFF "off"

#define STATUS_ON "on"
#define STATUS_OFF "off"
#define STATUS_UNKNOWN "unknown"
#define STATUS_ON "on"
#define STATUS_OFF "off"
#define STATUS_PAUSED "Paused"
#define STATUS_POWERING_ON "PoweringOn"
#define STATUS_POWERING_OFF "PoweringOff"
#define STATUS_UNKNOWN "unknown"

enum {
STATE_SEND_POWERCMD, /* stat, on, off */
Expand Down Expand Up @@ -368,14 +371,21 @@ static void stat_cmd(zlistx_t *activecmds, CURLM *mh, char **av)
hostlist_destroy(lhosts);
}

static void parse_onoff_response(struct powermsg *pm, const char **strp)
/* status_strp - on, off, unknown
* rstatus_strp - on, off, paused, poweringoff, poweringon, unknown
*/
static void parse_onoff_response(struct powermsg *pm,
const char **status_strp,
const char **rstatus_strp)
{
if (pm->output) {
json_error_t error;
json_t *o;

if (!(o = json_loads(pm->output, 0, &error))) {
(*strp) = "parse error";
(*status_strp) = "parse error";
if (rstatus_strp)
(*rstatus_strp) = "parse error";
if (verbose)
printf("%s: parse response error %s\n",
pm->hostname,
Expand All @@ -384,49 +394,69 @@ static void parse_onoff_response(struct powermsg *pm, const char **strp)
else {
json_t *val = json_object_get(o, "PowerState");
if (!val) {
(*strp) = "no powerstate";
(*status_strp) = "no powerstate";
if (verbose)
printf("%s: no PowerState\n", pm->hostname);
}
else {
const char *str = json_string_value(val);
if (strcasecmp(str, "On") == 0)
(*strp) = STATUS_ON;
else if (strcasecmp(str, "Off") == 0)
(*strp) = STATUS_OFF;
if (strcasecmp(str, "On") == 0) {
(*status_strp) = STATUS_ON;
if (rstatus_strp)
(*rstatus_strp) = STATUS_ON;
}
else if (strcasecmp(str, "Off") == 0) {
(*status_strp) = STATUS_OFF;
if (rstatus_strp)
(*rstatus_strp) = STATUS_OFF;
}
else {
(*strp) = STATUS_UNKNOWN;
(*status_strp) = STATUS_UNKNOWN;
if (rstatus_strp) {
if (strcasecmp(str, "Paused") == 0)
(*rstatus_strp) = STATUS_PAUSED;
else if (strcasecmp(str, "PoweringOff") == 0)
(*rstatus_strp) = STATUS_POWERING_OFF;
else if (strcasecmp(str, "PoweringOn") == 0)
(*rstatus_strp) = STATUS_POWERING_ON;
else
(*rstatus_strp) = STATUS_UNKNOWN;
}
if (verbose)
printf("%s: unknown status - %s\n",
pm->hostname,str);
pm->hostname, str);
}
}
}
json_decref(o);
}
else
(*strp) = "no output error";
(*status_strp) = "no output error";
}

static void parse_onoff(struct powermsg *pm, const char **strp)
static void parse_onoff(struct powermsg *pm,
const char **status_strp,
const char **rstatus_strp)
{
if (!test_mode) {
parse_onoff_response(pm, strp);
parse_onoff_response(pm, status_strp, rstatus_strp);
return;
}
else {
char *tmp = zhashx_lookup(test_power_status, pm->hostname);
if (!tmp)
err_exit(false, "zhashx_lookup on test status failed");
(*strp) = tmp;
(*status_strp) = tmp;
if (rstatus_strp)
(*rstatus_strp) = tmp;
}
}

static void stat_process(struct powermsg *pm)
{
const char *str;
parse_onoff(pm, &str);
printf("%s: %s\n", pm->hostname, str);
const char *status_str;
parse_onoff(pm, &status_str, NULL);
printf("%s: %s\n", pm->hostname, status_str);
}

static void stat_cleanup(struct powermsg *pm)
Expand Down Expand Up @@ -556,19 +586,44 @@ static void on_off_process(zlistx_t *delayedcmds, struct powermsg *pm)
}
}
else if (pm->state == STATE_WAIT_UNTIL_ON_OFF) {
const char *str;
const char *status_str;
const char *rstatus_str;
struct timeval now;

parse_onoff(pm, &str);
if (strcmp(str, pm->cmd) == 0) {
parse_onoff(pm, &status_str, &rstatus_str);
if (strcmp(status_str, pm->cmd) == 0) {
printf("%s: %s\n", pm->hostname, "ok");
return;
}

/* check if we've timed out */
gettimeofday(&now, NULL);
if (timercmp(&now, &pm->timeout, >)) {
printf("%s: %s\n", pm->hostname, "timeout");
/* if target is not what it should be, this is unexpected, likely
* hardware problem */
if ((strcmp(pm->cmd, CMD_ON) == 0
&& strcmp(status_str, STATUS_OFF) == 0)
|| (strcmp(pm->cmd, CMD_OFF) == 0
&& strcmp(status_str, STATUS_ON) == 0))
printf("%s: timeout - unexpected %s\n",
pm->hostname, status_str);
/* if still powering on/off, this is the "normal" timeout
* scenario, timeout should be increased
*/
else if ((strcmp(pm->cmd, CMD_ON) == 0
&& strcmp(rstatus_str, STATUS_POWERING_ON) == 0)
|| (strcmp(pm->cmd, CMD_OFF) == 0
&& strcmp(rstatus_str, STATUS_POWERING_OFF) == 0))
printf("%s: timeout - %s still in progress\n",
pm->hostname, pm->cmd);
else {
if (verbose)
printf("%s: timeout - unknown status %s\n",
pm->hostname, rstatus_str);
else
printf("%s: timeout\n",
pm->hostname);
}
return;
}

Expand Down
Loading