Skip to content

Commit

Permalink
Merge pull request #172 from chu11/powerman_diag
Browse files Browse the repository at this point in the history
powerman: support error diagnostics with setresult
  • Loading branch information
mergify[bot] authored Apr 11, 2024
2 parents 00cd6c3 + 4db0fc6 commit 598d70d
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 37 deletions.
21 changes: 20 additions & 1 deletion src/powerman/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ static void _create_client_socket(int fd);
static void _create_client_stdio(void);
static void _act_finish(int client_id, ActError acterr, const char *fmt, ...);
static void _telemetry_printf(int client_id, const char *fmt, ...);
static void _diag_printf(int client_id, const char *fmt, ...);
#if HAVE_TCP_WRAPPERS
/* tcp wrappers support */
extern int hosts_ctl(char *daemon, char *client_name, char *client_addr,
Expand Down Expand Up @@ -636,7 +637,7 @@ static void _parse_input(Client * c, char *input)
dbg(DBG_CLIENT, "_parse_input: enqueuing actions");
cmd->pending = dev_enqueue_actions(cmd->com, cmd->hl, _act_finish,
c->telemetry ? _telemetry_printf : NULL,
c->client_id, cmd->arglist);
_diag_printf, c->client_id, cmd->arglist);
if (cmd->pending == 0) {
_client_printf(c, CP_ERR_UNIMPL);
_destroy_command(cmd);
Expand Down Expand Up @@ -669,6 +670,24 @@ static void _telemetry_printf(int client_id, const char *fmt, ...)
}
}

/*
* Callback for device diagnostics
*/
static void _diag_printf(int client_id, const char *fmt, ...)
{
va_list ap;
Client *c;
char *str;

if ((c = _find_client(client_id))) {
va_start(ap, fmt);
str = hvsprintf(fmt, ap);
va_end(ap);
_client_printf(c, CP_INFO_DIAG, str);
xfree(str);
}
}

/* See chaos/powerman#138.
* We don't want these messages syslogged when we run the test suite,
* so send them to stderr and when powerman is run as a system service,
Expand Down
1 change: 1 addition & 0 deletions src/powerman/client_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
#define CP_INFO_NODES "306 %s" CP_EOL
#define CP_INFO_XNODES "307 %s" CP_EOL
#define CP_INFO_ACTERROR "308 %s" CP_EOL
#define CP_INFO_DIAG "309 %s" CP_EOL

#endif /* PM_CLIENT_PROTO_H */

Expand Down
43 changes: 28 additions & 15 deletions src/powerman/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ typedef struct {
List exec; /* stack of ExecCtxs (outer block is first) */
ActionCB complete_fun; /* callback for action completion */
VerbosePrintf vpf_fun; /* callback for device telemetry */
DiagPrintf dpf_fun; /* callback for device diagnostics */
int client_id; /* client id so completion can find client */
ActError errnum; /* errno for action */
struct timeval time_stamp; /* time stamp for timeouts */
Expand Down Expand Up @@ -120,13 +121,14 @@ static int _get_all_script(Device * dev, int com);
static int _get_ranged_script(Device * dev, int com);
static int _enqueue_actions(Device * dev, int com, hostlist_t hl,
ActionCB complete_fun, VerbosePrintf vpf_fun,
int client_id, ArgList arglist);
DiagPrintf dpf_fun, int client_id, ArgList arglist);
static Action *_create_action(Device * dev, int com, List plugs,
ActionCB complete_fun, VerbosePrintf vpf_fun,
int client_id, ArgList arglist);
DiagPrintf dpf_fun, int client_id, ArgList arglist);
static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,
ActionCB complete_fun,
VerbosePrintf vpf_fun,
DiagPrintf dpf_fun,
int client_id, ArgList arglist);
static char *_getregex_buf(cbuf_t b, xregex_t re, xregex_match_t xm);
static bool _command_needs_device(Device * dev, hostlist_t hl);
Expand Down Expand Up @@ -258,7 +260,7 @@ static void _rewind_action(Action *act)

static Action *_create_action(Device * dev, int com, List plugs,
ActionCB complete_fun, VerbosePrintf vpf_fun,
int client_id, ArgList arglist)
DiagPrintf dpf_fun, int client_id, ArgList arglist)
{
Action *act;
ExecCtx *e;
Expand All @@ -268,6 +270,7 @@ static Action *_create_action(Device * dev, int com, List plugs,
act->com = com;
act->complete_fun = complete_fun;
act->vpf_fun = vpf_fun;
act->dpf_fun = dpf_fun;
act->client_id = client_id;

act->exec = list_create((ListDelF)_destroy_exec_ctx);
Expand Down Expand Up @@ -465,7 +468,8 @@ bool dev_check_actions(int com, hostlist_t hl)
* actions "check in".
*/
int dev_enqueue_actions(int com, hostlist_t hl, ActionCB complete_fun,
VerbosePrintf vpf_fun, int client_id, ArgList arglist)
VerbosePrintf vpf_fun, DiagPrintf dpf_fun,
int client_id, ArgList arglist)
{
Device *dev;
ListIterator itr;
Expand All @@ -480,7 +484,7 @@ int dev_enqueue_actions(int com, hostlist_t hl, ActionCB complete_fun,
continue; /* unimplemented script */
if (hl && !_command_needs_device(dev, hl))
continue; /* uninvolved device */
count = _enqueue_actions(dev, com, hl, complete_fun, vpf_fun,
count = _enqueue_actions(dev, com, hl, complete_fun, vpf_fun, dpf_fun,
client_id, arglist);
if (count > 0 && dev->connect_state != DEV_CONNECTED)
dev->retry_count = 0; /* expedite retries on this device since */
Expand All @@ -493,7 +497,7 @@ int dev_enqueue_actions(int com, hostlist_t hl, ActionCB complete_fun,

static int _enqueue_actions(Device * dev, int com, hostlist_t hl,
ActionCB complete_fun, VerbosePrintf vpf_fun,
int client_id, ArgList arglist)
DiagPrintf dpf_fun, int client_id, ArgList arglist)
{
Action *act;
int count = 0;
Expand All @@ -506,15 +510,15 @@ static int _enqueue_actions(Device * dev, int com, hostlist_t hl,
_rewind_action(act);
dbg(DBG_ACTION, "resetting iterator for non-login action");
}
act = _create_action(dev, com, NULL, complete_fun, vpf_fun,
act = _create_action(dev, com, NULL, complete_fun, vpf_fun, dpf_fun,
client_id, arglist);
list_prepend(dev->acts, act);
count++;
break;
case PM_LOG_OUT:
case PM_PING:
act = _create_action(dev, com, NULL, complete_fun, vpf_fun, client_id,
arglist);
act = _create_action(dev, com, NULL, complete_fun, vpf_fun, dpf_fun,
client_id, arglist);
list_append(dev->acts, act);
count++;
break;
Expand All @@ -529,7 +533,7 @@ static int _enqueue_actions(Device * dev, int com, hostlist_t hl,
case PM_STATUS_TEMP:
case PM_STATUS_BEACON:
count += _enqueue_targeted_actions(dev, com, hl, complete_fun,
vpf_fun, client_id, arglist);
vpf_fun, dpf_fun, client_id, arglist);
break;
default:
assert(false);
Expand Down Expand Up @@ -634,6 +638,7 @@ static bool _is_query_action(int com)
static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,
ActionCB complete_fun,
VerbosePrintf vpf_fun,
DiagPrintf dpf_fun,
int client_id, ArgList arglist)
{
List new_acts = list_create((ListDelF) _destroy_action);
Expand Down Expand Up @@ -680,7 +685,7 @@ static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,
}

act = _create_action(dev, com, plugs, complete_fun, vpf_fun,
client_id, arglist);
dpf_fun, client_id, arglist);
list_append(new_acts, act);
}
}
Expand Down Expand Up @@ -709,7 +714,7 @@ static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,

if (ncom != -1) {
act = _create_action(dev, ncom, NULL, complete_fun,
vpf_fun, client_id, arglist);
vpf_fun, dpf_fun, client_id, arglist);
list_append(dev->acts, act);
count++;
}
Expand All @@ -723,7 +728,7 @@ static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,

if (ncom != -1) {
act = _create_action(dev, ncom, ranged_plugs, complete_fun,
vpf_fun, client_id, arglist);
vpf_fun, dpf_fun, client_id, arglist);
list_append(dev->acts, act);
used_ranged_plugs++;
count++;
Expand All @@ -750,7 +755,7 @@ static int _enqueue_targeted_actions(Device * dev, int com, hostlist_t hl,
*/
static void _enqueue_login(Device *dev)
{
_enqueue_actions(dev, PM_LOG_IN, NULL, NULL, NULL, 0, NULL);
_enqueue_actions(dev, PM_LOG_IN, NULL, NULL, NULL, NULL, 0, NULL);
}


Expand Down Expand Up @@ -1172,6 +1177,14 @@ static bool _process_setresult(Device *dev, Action *act, ExecCtx *e)
xfree(arg->val);
arg->val = xstrdup(str);
}

if (result != RT_SUCCESS) {
char strbuf[1024];
snprintf(strbuf, sizeof(strbuf), "%s", arg->val);
/* remove trailing carriage return or newline */
strbuf[strcspn(strbuf, "\r\n")] = '\0';
act->dpf_fun(act->client_id, "%s: %s", arg->node, strbuf);
}
}
xfree(str);
/* if no match, do nothing */
Expand Down Expand Up @@ -1410,7 +1423,7 @@ static void _enqueue_ping(Device * dev, struct timeval *timeout)

if (dev->scripts[PM_PING] != NULL && timerisset(&dev->ping_period)) {
if (_timeout(&dev->last_ping, &dev->ping_period, &timeleft)) {
_enqueue_actions(dev, PM_PING, NULL, NULL, NULL, 0, NULL);
_enqueue_actions(dev, PM_PING, NULL, NULL, NULL, NULL, 0, NULL);
if (gettimeofday(&dev->last_ping, NULL) < 0)
err_exit(true, "gettimeofday");
dbg(DBG_ACTION, "%s: enqeuuing ping", dev->name);
Expand Down
11 changes: 8 additions & 3 deletions src/powerman/device_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,20 @@ typedef struct _device {

typedef enum { ACT_ESUCCESS, ACT_EEXPFAIL, ACT_EABORT, ACT_ECONNECTTIMEOUT,
ACT_ELOGINTIMEOUT } ActError;
typedef void (*ActionCB) (int client_id, ActError acterr, const char *fmt, ...);
typedef void (*VerbosePrintf) (int client_id, const char *fmt, ...);
typedef void (*ActionCB) (int client_id, ActError acterr, const char *fmt, ...)
__attribute__ ((format (printf, 3, 4)));
typedef void (*VerbosePrintf) (int client_id, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
typedef void (*DiagPrintf) (int client_id, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));

#define MIN_DEV_BUF 1024
#define MAX_DEV_BUF 1024*64

void dev_add(Device * dev);
int dev_enqueue_actions(int com, hostlist_t hl, ActionCB complete_fun,
VerbosePrintf vpf_fun, int client_id, ArgList arglist);
VerbosePrintf vpf_fun, DiagPrintf dpf_fun,
int client_id, ArgList arglist);
bool dev_check_actions(int com, hostlist_t hl);

Device *dev_create(const char *name);
Expand Down
8 changes: 7 additions & 1 deletion src/powerman/powerman.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,12 @@ static bool _suppress(int num)
return false;
}

static FILE *getstream(int num)
{
/* diagnostic output goes to stderr */
return (num == 309) ? stderr : stdout;
}

/* Get a line from the socket and display on stdout.
* Return the numerical portion of the response.
*/
Expand All @@ -419,7 +425,7 @@ static int _process_line(int fd)
num = -1;
if (strlen(buf) > 4) {
if (!_suppress(num))
printf("%s\n", buf + 4);
fprintf(getstream(num), "%s\n", buf + 4);
} else
err_exit(false, "unexpected response from server");
xfree(buf);
Expand Down
3 changes: 2 additions & 1 deletion t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ TESTSCRIPTS = \
t0032-list.t \
t0033-valgrind.t \
t0034-redfishpower.t \
t0035-power-result.t
t0035-power-result.t \
t0036-diagnostics.t

# make check runs these TAP tests directly (both scripts and programs)
TESTS = \
Expand Down
Loading

0 comments on commit 598d70d

Please sign in to comment.