diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 032091a9ee..a8902d3df3 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,527 +1,517 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright (C) 2004-2016 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #include #include #include #include #include #include #include #include #include #define STORM_INTERVAL 2 /* in seconds */ #define STORM_MULTIPLIER 5 /* multiplied by the number of nodes */ struct election_s { enum election_result state; guint count; char *name; char *uname; GSourceFunc cb; GHashTable *voted; mainloop_timer_t *timeout; /* When to stop if not everyone casts a vote */ }; static void election_complete(election_t *e) { crm_info("Election %s complete", e->name); e->state = election_won; if(e->cb) { e->cb(e); } election_reset(e); } static gboolean election_timer_cb(gpointer user_data) { election_t *e = user_data; crm_info("Election %s %p timed out", e->name, e); election_complete(e); return FALSE; } enum election_result election_state(election_t *e) { if(e) { return e->state; } return election_error; } election_t * election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb) { static guint count = 0; election_t *e = calloc(1, sizeof(election_t)); if(e != NULL) { if(name) { e->name = crm_strdup_printf("election-%s", name); } else { e->name = crm_strdup_printf("election-%u", count++); } e->cb = cb; e->uname = strdup(uname); e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e); crm_trace("Created %s %p", e->name, e); } return e; } void election_remove(election_t *e, const char *uname) { if(e && uname && e->voted) { g_hash_table_remove(e->voted, uname); } } void election_reset(election_t *e) { crm_trace("Resetting election %s", e->name); if(e) { mainloop_timer_stop(e->timeout); } if (e && e->voted) { crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted)); g_hash_table_destroy(e->voted); e->voted = NULL; } } void election_fini(election_t *e) { if(e) { election_reset(e); crm_trace("Destroying %s", e->name); mainloop_timer_del(e->timeout); free(e->uname); free(e->name); free(e); } } static void election_timeout_start(election_t *e) { if(e) { mainloop_timer_start(e->timeout); } } void election_timeout_stop(election_t *e) { if(e) { mainloop_timer_stop(e->timeout); } } void election_timeout_set_period(election_t *e, guint period) { if(e) { mainloop_timer_set_period(e->timeout, period); } else { crm_err("No election defined"); } } static int crm_uptime(struct timeval *output) { static time_t expires = 0; static struct rusage info; time_t tm_now = time(NULL); if (expires < tm_now) { int rc = 0; info.ru_utime.tv_sec = 0; info.ru_utime.tv_usec = 0; rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); expires = 0; return -1; } crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); } expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */ output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; return 1; } static int crm_compare_age(struct timeval your_age) { struct timeval our_age; crm_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld.%ld vs %ld.%ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Lose: %ld.%ld vs %ld.%ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return -1; } return 0; } void election_vote(election_t *e) { struct timeval age; xmlNode *vote = NULL; crm_node_t *our_node; if(e == NULL) { crm_trace("Not voting in election: not initialized"); return; } our_node = crm_get_peer(0, e->uname); if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { crm_trace("Cannot vote yet: %p", our_node); return; } e->state = election_in_progress; vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); e->count++; crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid); crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count); crm_uptime(&age); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec); send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); free_xml(vote); crm_debug("Started election %d", e->count); if (e->voted) { g_hash_table_destroy(e->voted); e->voted = NULL; } election_timeout_start(e); return; } bool election_check(election_t *e) { int voted_size = 0; int num_members = crm_active_peers(); if(e == NULL) { crm_trace("not initialized"); return FALSE; } if (e->voted) { voted_size = g_hash_table_size(e->voted); } /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (voted_size >= num_members) { /* we won and everyone has voted */ election_timeout_stop(e); if (voted_size > num_members) { GHashTableIter gIter; const crm_node_t *node; char *key = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { if (crm_is_peer_active(node)) { crm_err("member: %s proc=%.32x", node->uname, node->processes); } } g_hash_table_iter_init(&gIter, e->voted); while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { crm_err("voted: %s", key); } } election_complete(e); return TRUE; } else { crm_debug("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return FALSE; } #define loss_dampen 2 /* in seconds */ /* A_ELECTION_COUNT */ enum election_result election_count_vote(election_t *e, xmlNode *vote, bool can_win) { int age = 0; int election_id = -1; int log_level = LOG_INFO; gboolean use_born_on = FALSE; gboolean done = FALSE; gboolean we_lose = FALSE; const char *op = NULL; const char *from = NULL; const char *reason = "unknown"; const char *election_owner = NULL; crm_node_t *our_node = NULL, *your_node = NULL; static int election_wins = 0; xmlNode *novote = NULL; time_t tm_now = time(NULL); static time_t expires = 0; static time_t last_election_loss = 0; /* if the membership copy is NULL we REALLY shouldn't be voting * the question is how we managed to get here. */ CRM_CHECK(vote != NULL, return election_error); if(e == NULL) { crm_info("Not voting in election: not initialized"); return election_lost; } else if(crm_peer_cache == NULL) { crm_info("Not voting in election: no peer cache"); return election_lost; } op = crm_element_value(vote, F_CRM_TASK); from = crm_element_value(vote, F_CRM_HOST_FROM); election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER); crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id); your_node = crm_get_peer(0, from); our_node = crm_get_peer(0, e->uname); if (e->voted == NULL) { crm_debug("Created voted hash"); e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } if (is_heartbeat_cluster()) { use_born_on = TRUE; } else if (is_classic_ais_cluster()) { use_born_on = TRUE; } if(can_win == FALSE) { reason = "Not eligible"; we_lose = TRUE; } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_lose = TRUE; } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) { log_level = LOG_TRACE; reason = "Superseded"; done = TRUE; } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(from); CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE)); /* update the list of nodes that have voted */ g_hash_table_replace(e->voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else { struct timeval your_age; const char *your_version = crm_element_value(vote, F_CRM_VERSION); int tv_sec = 0; int tv_usec = 0; crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec); crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec); your_age.tv_sec = tv_sec; your_age.tv_usec = tv_usec; age = crm_compare_age(your_age); if (crm_str_eq(from, e->uname, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(from); CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE)); /* update ourselves in the list of nodes that have voted */ g_hash_table_replace(e->voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) { reason = "Version"; we_lose = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) { reason = "Version"; } else if (age < 0) { reason = "Uptime"; we_lose = TRUE; } else if (age > 0) { reason = "Uptime"; /* TODO: Check for y(our) born < 0 */ } else if (use_born_on && your_node->born < our_node->born) { reason = "Born"; we_lose = TRUE; } else if (use_born_on && your_node->born > our_node->born) { reason = "Born"; } else if (e->uname == NULL) { reason = "Unknown host name"; we_lose = TRUE; } else if (strcasecmp(e->uname, from) > 0) { reason = "Host name"; we_lose = TRUE; } else { reason = "Host name"; CRM_ASSERT(strcasecmp(e->uname, from) < 0); /* can't happen... * } else if(strcasecmp(e->uname, from) == 0) { * */ } } if (expires < tm_now) { election_wins = 0; expires = tm_now + STORM_INTERVAL; } else if (done == FALSE && we_lose == FALSE) { int peers = 1 + g_hash_table_size(crm_peer_cache); /* If every node has to vote down every other node, thats N*(N-1) total elections * Allow some leway before _really_ complaining */ election_wins++; if (election_wins > (peers * peers)) { crm_warn("Election storm detected: %d elections in %d seconds", election_wins, STORM_INTERVAL); election_wins = 0; expires = tm_now + STORM_INTERVAL; crm_write_blackbox(0, NULL); } } if (done) { do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)", election_id, e->count, election_owner, op, from, reason); return e->state; } else if (we_lose == FALSE) { do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)", election_id, election_owner, op, from, reason); if (last_election_loss == 0 || tm_now - last_election_loss > (time_t) loss_dampen) { last_election_loss = 0; election_timeout_stop(e); /* Start a new election by voting down this, and other, peers */ e->state = election_start; return e->state; } crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)", election_id, loss_dampen, ctime(&last_election_loss)); } novote = create_request(CRM_OP_NOVOTE, NULL, from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)", election_id, election_owner, op, from, reason); election_timeout_stop(e); crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner); crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id); send_cluster_message(your_node, crm_msg_crmd, novote, TRUE); free_xml(novote); last_election_loss = tm_now; e->state = election_lost; return e->state; } diff --git a/lib/services/services.c b/lib/services/services.c index a3c99a65eb..4be425c8ee 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,861 +1,850 @@ /* - * Copyright (C) 2010 Andrew Beekhof + * Copyright (C) 2010-2016 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops*/ GList *blocked_ops = NULL; /* ops currently active (in-flight) */ GList *inflight_ops = NULL; svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { return resources_action_create(name, "lsb", NULL, name, action, interval, timeout, NULL, 0); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); return "lsb"; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return "systemd"; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return "upstart"; } #endif return NULL; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { crm_err("A service or resource action must have a name."); goto return_error; } if (crm_strlen_zero(standard)) { crm_err("A service action must have a valid standard."); goto return_error; } if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { crm_err("An OCF resource action must have a provider."); goto return_error; } if (crm_strlen_zero(agent)) { crm_err("A service or resource action must have an agent."); goto return_error; } if (crm_strlen_zero(action)) { crm_err("A service or resource action must specify an action."); goto return_error; } if (safe_str_eq(action, "monitor") && ( #if SUPPORT_HEARTBEAT safe_str_eq(standard, "heartbeat") || #endif safe_str_eq(standard, "lsb") || safe_str_eq(standard, "service"))) { action = "status"; } /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); op->action = strdup(action); op->interval = interval; op->timeout = timeout; op->standard = strdup(standard); op->agent = strdup(agent); op->sequence = ++operations; op->flags = flags; if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) { goto return_error; } if (strcasecmp(op->standard, "service") == 0) { const char *expanded = resources_find_service_class(op->agent); if(expanded) { crm_debug("Found a %s agent for %s/%s", expanded, op->rsc, op->agent); free(op->standard); op->standard = strdup(expanded); } else { crm_info("Cannot determine the standard for %s (%s)", op->rsc, op->agent); free(op->standard); op->standard = strdup("lsb"); } CRM_ASSERT(op->standard); } if (strcasecmp(op->standard, "ocf") == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, "lsb") == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_HEARTBEAT } else if (strcasecmp(op->standard, "heartbeat") == 0) { int index; int param_num; char buf_tmp[20]; void *value_tmp; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the HB_RA_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); value_tmp = g_hash_table_lookup(params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ continue; } op->opaque->args[param_num++] = strdup(value_tmp); } /* Add operation code as the last argument, */ /* and the teminating NULL pointer */ op->opaque->args[param_num++] = strdup(op->action); op->opaque->args[param_num] = NULL; #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, "systemd") == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, "upstart") == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif } else if (strcasecmp(op->standard, "service") == 0) { op->opaque->exec = strdup(SERVICE_SCRIPT); op->opaque->args[0] = strdup(SERVICE_SCRIPT); op->opaque->args[1] = strdup(agent); op->opaque->args[2] = strdup(action); #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, "nagios") == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } #if SUPPORT_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if(op->opaque == NULL) { return; } #if SUPPORT_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); if(dbus_pending_call_get_completed(op->opaque->pending)) { crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); } dbus_pending_call_cancel(op->opaque->pending); dbus_pending_call_unref(op->opaque->pending); op->opaque->pending = NULL; } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } gboolean services_action_cancel(const char *name, const char *action, int interval) { svc_action_t *op = NULL; char id[512]; snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval); if (!(op = g_hash_table_lookup(recurring_actions, id))) { return FALSE; } /* Always kill the recurring timer */ cancel_recurring_action(op); if (op->pid == 0) { op->status = PCMK_LRM_OP_CANCELLED; if (op->opaque->callback) { op->opaque->callback(op); } blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); } else { crm_info("Cancelling in-flight op: performing early termination of %s (pid=%d)", id, op->pid); op->cancel = 1; if (mainloop_child_kill(op->pid) == FALSE) { /* even though the early termination failed, * the op will be marked as cancelled once it completes. */ crm_err("Termination of %s (pid=%d) failed", id, op->pid); return FALSE; } } return TRUE; } gboolean services_action_kick(const char *name, const char *action, int interval /* ms */) { svc_action_t * op = NULL; char *id = NULL; if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) { return FALSE; } op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /* add new recurring operation, check for duplicates. * - if duplicate found, return TRUE, immediately reschedule op. * - if no dup, return FALSE, inserve into recurring op list.*/ static gboolean handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *)) { svc_action_t * dup = NULL; if (recurring_actions == NULL) { recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); return FALSE; } /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the dup. */ services_action_free(op); return TRUE; } return FALSE; } static gboolean action_async_helper(svc_action_t * op) { if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART return upstart_job_exec(op, FALSE); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD return systemd_unit_exec(op); #endif } else { return services_os_action_execute(op, FALSE); } /* The 'op' has probably been freed if the execution functions return TRUE. */ /* Avoid using the 'op' in here. */ return FALSE; } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } CRM_ASSERT(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { if (handle_duplicate_recurring(op, action_callback) == TRUE) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (op->rsc && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return action_async_helper(op); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (safe_str_eq(op->rsc, rsc)) { return TRUE; } } return FALSE; } void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; gboolean res = FALSE; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); res = action_async_helper(op); if (res == FALSE) { op->status = PCMK_LRM_OP_ERROR; /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ operation_finalize(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART rc = upstart_job_exec(op, TRUE); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD rc = systemd_unit_exec(op); #endif } else { rc = services_os_action_execute(op, TRUE); } crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { return resources_list_agents("lsb", NULL); } #if SUPPORT_HEARTBEAT static GList * resources_os_list_hb_agents(void) { return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE); } #endif GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; standards = g_list_append(standards, strdup("ocf")); standards = g_list_append(standards, strdup("lsb")); standards = g_list_append(standards, strdup("service")); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); if (agents) { standards = g_list_append(standards, strdup("systemd")); g_list_free_full(agents, free); } #endif #if SUPPORT_UPSTART agents = upstart_job_listall(); if (agents) { standards = g_list_append(standards, strdup("upstart")); g_list_free_full(agents, free); } #endif #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { standards = g_list_append(standards, strdup("nagios")); g_list_free_full(agents, free); } #endif #if SUPPORT_HEARTBEAT standards = g_list_append(standards, strdup("heartbeat")); #endif return standards; } GList * resources_list_providers(const char *standard) { if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if (standard == NULL || strcasecmp(standard, "service") == 0) { GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, "lsb") == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_HEARTBEAT } else if (strcasecmp(standard, "heartbeat") == 0) { return resources_os_list_hb_agents(); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, "systemd") == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, "upstart") == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, "nagios") == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 398a36b602..cd7fd3f213 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,913 +1,902 @@ /* - * Copyright (C) 2010 Andrew Beekhof + * Copyright (C) 2010-2016 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SIGNALFD_H #include #endif #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" #if SUPPORT_CIBSECRETS # include "crm/common/cib_secrets.h" #endif /* ops currently active (in-flight) */ extern GList *inflight_ops; static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err("fcntl() write failed"); } } else { crm_err("fcntl() read failed"); } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { crm_trace("Got %d chars: %.80s", rc, buf); buf[rc] = 0; data = realloc_safe(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void add_OCF_env_vars(svc_action_t * op) { if (!op->standard || strcasecmp("ocf", op->standard) != 0) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean operation_finalize(svc_action_t * op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *)op); } } if (op->opaque->callback) { op->opaque->callback(op); } op->pid = 0; inflight_ops = g_list_remove(inflight_ops, op); handle_blocked_ops(); if (!recurring && op->synchronous == FALSE) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); return TRUE; } services_action_cleanup(op); return FALSE; } static void operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stderr", prefix); dispatch_stderr(op); crm_trace("%s: %p", op->id, op->stderr_data); mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stdout", prefix); dispatch_stdout(op); crm_trace("%s: %p", op->id, op->stdout_data); mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, "%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; crm_debug("%s - exited with rc=%d", prefix, exitcode); } free(prefix); prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); free(prefix); prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); operation_finalize(op); } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ static void services_handle_exec_error(svc_action_t * op, int error) { int rc_not_installed, rc_insufficient_priv, rc_exec_error; /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ if (safe_str_eq(op->standard, "lsb") && safe_str_eq(op->action, "status")) { rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS } else if (safe_str_eq(op->standard, "nagios")) { rc_not_installed = NAGIOS_NOT_INSTALLED; rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { rc_not_installed = PCMK_OCF_NOT_INSTALLED; rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ op->rc = rc_not_installed; op->status = PCMK_LRM_OP_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ op->rc = rc_insufficient_priv; op->status = PCMK_LRM_OP_ERROR; break; default: op->rc = rc_exec_error; op->status = PCMK_LRM_OP_ERROR; } } static void action_launch_child(svc_action_t *op) { int lpc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } #if SUPPORT_CIBSECRETS if (replace_secret_params(op->rsc, op->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(op->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif /* Setup environment correctly */ add_OCF_env_vars(op); /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services_handle_exec_error(op, errno); _exit(op->rc); } #ifndef HAVE_SYS_SIGNALFD_H static int sigchld_pipe[2] = { -1, -1 }; static void sigchld_handler() { if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) { crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe"); } } #endif static void action_synced_wait(svc_action_t * op, sigset_t *mask) { int status = 0; int timeout = op->timeout; int sfd = -1; time_t start = -1; struct pollfd fds[3]; int wait_rc = 0; #ifdef HAVE_SYS_SIGNALFD_H sfd = signalfd(-1, mask, SFD_NONBLOCK); if (sfd < 0) { crm_perror(LOG_ERR, "signalfd() failed"); } #else sfd = sigchld_pipe[0]; #endif fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sfd; fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %d", op->pid); start = time(NULL); do { int poll_rc = poll(fds, 3, timeout); if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if (fds[2].revents & POLLIN) { #ifdef HAVE_SYS_SIGNALFD_H struct signalfd_siginfo fdsi; ssize_t s; s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); } else if (fdsi.ssi_signo == SIGCHLD) { #else if (1) { /* Clear out the sigchld pipe. */ char ch; while (read(sfd, &ch, 1) == 1); #endif wait_rc = waitpid(op->pid, &status, WNOHANG); if (wait_rc < 0){ crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); } else if (wait_rc > 0) { break; } } } } else if (poll_rc == 0) { timeout = 0; break; } else if (poll_rc < 0) { if (errno != EINTR) { crm_perror(LOG_ERR, "poll() failed"); break; } } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Child done: %d", op->pid); if (wait_rc <= 0) { int killrc = kill(op->pid, SIGKILL); op->rc = PCMK_OCF_UNKNOWN_ERROR; if (op->timeout > 0 && timeout <= 0) { op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); } else { op->status = PCMK_LRM_OP_ERROR; } if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ waitpid(op->pid, &status, 0); } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif svc_read_output(op->opaque->stdout_fd, op, FALSE); svc_read_output(op->opaque->stderr_fd, op, TRUE); close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); #ifdef HAVE_SYS_SIGNALFD_H close(sfd); #endif } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous) { int stdout_fd[2]; int stderr_fd[2]; struct stat st; sigset_t *pmask; #ifdef HAVE_SYS_SIGNALFD_H sigset_t mask; sigset_t old_mask; #define sigchld_cleanup() do { \ if (sigismember(&old_mask, SIGCHLD) == 0) { \ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \ crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \ } \ } \ } while (0) #else struct sigaction sa; struct sigaction old_sa; #define sigchld_cleanup() do { \ if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \ crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \ } \ close(sigchld_pipe[0]); \ close(sigchld_pipe[1]); \ sigchld_pipe[0] = sigchld_pipe[1] = -1; \ } while(0) #endif /* Fail fast */ if(stat(op->opaque->exec, &st) != 0) { int rc = errno; crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stdout_fd) < 0) { int rc = errno; crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stderr_fd) < 0) { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } return FALSE; } if (synchronous) { #ifdef HAVE_SYS_SIGNALFD_H sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigemptyset(&old_mask); if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld"); } pmask = &mask; #else if(pipe(sigchld_pipe) == -1) { crm_perror(LOG_ERR, "pipe() failed"); } set_fd_opts(sigchld_pipe[0], O_NONBLOCK); set_fd_opts(sigchld_pipe[1], O_NONBLOCK); sa.sa_handler = sigchld_handler; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); if (sigaction(SIGCHLD, &sa, &old_sa) < 0) { crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler"); } pmask = NULL; #endif } op->pid = fork(); switch (op->pid) { case -1: { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } sigchld_cleanup(); return FALSE; } case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err("dup2() failed (stderr)"); } close(stderr_fd[1]); } if (synchronous) { sigchld_cleanup(); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (synchronous) { action_synced_wait(op, pmask); sigchld_cleanup(); } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, (op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } #if SUPPORT_NAGIOS GList * resources_os_list_nagios_agents(void) { GList *plugin_list = NULL; GList *result = NULL; GList *gIter = NULL; plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE); /* Make sure both the plugin and its metadata exist */ for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) { const char *plugin = gIter->data; char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin); struct stat st; if (stat(metadata, &st) == 0) { result = g_list_append(result, strdup(plugin)); } free(metadata); } g_list_free_full(plugin_list, free); return result; } #endif diff --git a/lib/services/systemd.c b/lib/services/systemd.c index c3d9c01c02..e6e11147b9 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -1,770 +1,759 @@ /* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * Copyright (C) 2012-2016 Andrew Beekhof * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (C) 2012 Andrew Beekhof + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include gboolean systemd_unit_exec_with_unit(svc_action_t * op, const char *unit); #define BUS_NAME "org.freedesktop.systemd1" #define BUS_NAME_MANAGER BUS_NAME ".Manager" #define BUS_NAME_UNIT BUS_NAME ".Unit" #define BUS_PATH "/org/freedesktop/systemd1" static inline DBusMessage * systemd_new_method(const char *method) { crm_trace("Calling: %s on " BUS_NAME_MANAGER, method); return dbus_message_new_method_call(BUS_NAME, BUS_PATH, BUS_NAME_MANAGER, method); } /* * Functions to manage a static DBus connection */ static DBusConnection* systemd_proxy = NULL; static inline DBusPendingCall * systemd_send(DBusMessage *msg, void(*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout) { return pcmk_dbus_send(msg, systemd_proxy, done, user_data, timeout); } static inline DBusMessage * systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout) { return pcmk_dbus_send_recv(msg, systemd_proxy, error, timeout); } /*! * \internal * \brief Send a method to systemd without arguments, and wait for reply * * \param[in] method Method to send * * \return Systemd reply on success, NULL (and error will be logged) otherwise * * \note The caller must call dbus_message_unref() on the reply after * handling it. */ static DBusMessage * systemd_call_simple_method(const char *method) { DBusMessage *msg = systemd_new_method(method); DBusMessage *reply = NULL; DBusError error; /* Don't call systemd_init() here, because that calls this */ CRM_CHECK(systemd_proxy, return NULL); if (msg == NULL) { crm_err("Could not create message to send %s to systemd", method); return NULL; } dbus_error_init(&error); reply = systemd_send_recv(msg, &error, DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Could not send %s to systemd: %s (%s)", method, error.message, error.name); dbus_error_free(&error); return NULL; } else if (reply == NULL) { crm_err("Could not send %s to systemd: no reply received", method); return NULL; } return reply; } static gboolean systemd_init(void) { static int need_init = 1; /* http://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html */ if (systemd_proxy && dbus_connection_get_is_connected(systemd_proxy) == FALSE) { crm_warn("Connection to System DBus is closed. Reconnecting..."); pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; need_init = 1; } if (need_init) { need_init = 0; systemd_proxy = pcmk_dbus_connect(); } if (systemd_proxy == NULL) { return FALSE; } return TRUE; } static inline char * systemd_get_property(const char *unit, const char *name, void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending, int timeout) { return systemd_proxy? pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME_UNIT, name, callback, userdata, pending, timeout) : NULL; } void systemd_cleanup(void) { if (systemd_proxy) { pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; } } /* * end of systemd_proxy functions */ /*! * \internal * \brief Check whether a file name represents a systemd unit * * \param[in] name File name to check * * \return Pointer to "dot" before filename extension if so, NULL otherwise */ static const char * systemd_unit_extension(const char *name) { if (name) { const char *dot = strrchr(name, '.'); if (dot && (!strcmp(dot, ".service") || !strcmp(dot, ".socket"))) { return dot; } } return NULL; } static char * systemd_service_name(const char *name) { if (name == NULL) { return NULL; } if (systemd_unit_extension(name)) { return strdup(name); } return crm_strdup_printf("%s.service", name); } static void systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; unsigned int reload_count = GPOINTER_TO_UINT(user_data); dbus_error_init(&error); if(pending) { reply = dbus_pending_call_steal_reply(pending); } if (pcmk_dbus_find_error(pending, reply, &error)) { crm_err("Could not issue systemd reload %d: %s", reload_count, error.message); dbus_error_free(&error); } else { crm_trace("Reload %d complete", reload_count); } if(pending) { dbus_pending_call_unref(pending); } if(reply) { dbus_message_unref(reply); } } static bool systemd_daemon_reload(int timeout) { static unsigned int reload_count = 0; DBusMessage *msg = systemd_new_method("Reload"); reload_count++; CRM_ASSERT(msg != NULL); systemd_send(msg, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count), timeout); dbus_message_unref(msg); return TRUE; } static bool systemd_mask_error(svc_action_t *op, const char *error) { crm_trace("Could not issue %s for %s: %s", op->action, op->rsc, error); if(strstr(error, "org.freedesktop.systemd1.InvalidName") || strstr(error, "org.freedesktop.systemd1.LoadFailed") || strstr(error, "org.freedesktop.systemd1.NoSuchUnit")) { if (safe_str_eq(op->action, "stop")) { crm_trace("Masking %s failure for %s: unknown services are stopped", op->action, op->rsc); op->rc = PCMK_OCF_OK; return TRUE; } else { crm_trace("Mapping %s failure for %s: unknown services are not installed", op->action, op->rsc); op->rc = PCMK_OCF_NOT_INSTALLED; op->status = PCMK_LRM_OP_NOT_INSTALLED; return FALSE; } } return FALSE; } static const char * systemd_loadunit_result(DBusMessage *reply, svc_action_t * op) { const char *path = NULL; DBusError error; if (pcmk_dbus_find_error((void*)&path, reply, &error)) { if(op && !systemd_mask_error(op, error.name)) { crm_err("Could not load systemd unit %s for %s: %s", op->agent, op->id, error.message); } dbus_error_free(&error); } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) { dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); } if(op) { if (path) { systemd_unit_exec_with_unit(op, path); } else if (op->synchronous == FALSE) { operation_finalize(op); } } return path; } static void systemd_loadunit_cb(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t * op = user_data; if(pending) { reply = dbus_pending_call_steal_reply(pending); } crm_trace("Got result: %p for %p / %p for %s", reply, pending, op->opaque->pending, op->id); CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); systemd_loadunit_result(reply, user_data); if(reply) { dbus_message_unref(reply); } } static char * systemd_unit_by_name(const gchar * arg_name, svc_action_t *op) { DBusMessage *msg; DBusMessage *reply = NULL; DBusPendingCall* pending = NULL; char *name = NULL; /* Equivalent to GetUnit if it's already loaded */ if (systemd_init() == FALSE) { return FALSE; } msg = systemd_new_method("LoadUnit"); CRM_ASSERT(msg != NULL); name = systemd_service_name(arg_name); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); free(name); if(op == NULL || op->synchronous) { const char *unit = NULL; char *munit = NULL; reply = systemd_send_recv(msg, NULL, (op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT)); dbus_message_unref(msg); unit = systemd_loadunit_result(reply, op); if(unit) { munit = strdup(unit); } if(reply) { dbus_message_unref(reply); } return munit; } pending = systemd_send(msg, systemd_loadunit_cb, op, op->timeout); if(pending) { services_set_op_pending(op, pending); } dbus_message_unref(msg); return NULL; } GList * systemd_unit_listall(void) { int lpc = 0; GList *units = NULL; DBusMessageIter args; DBusMessageIter unit; DBusMessageIter elem; DBusMessage *reply = NULL; if (systemd_init() == FALSE) { return NULL; } /* " \n" \ " \n" \ " \n" \ */ reply = systemd_call_simple_method("ListUnits"); if (reply == NULL) { return NULL; } if (!dbus_message_iter_init(reply, &args)) { crm_err("Could not list systemd units: systemd reply has no arguments"); dbus_message_unref(reply); return NULL; } if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) { crm_err("Could not list systemd units: systemd reply has invalid arguments"); dbus_message_unref(reply); return NULL; } dbus_message_iter_recurse(&args, &unit); while (dbus_message_iter_get_arg_type (&unit) != DBUS_TYPE_INVALID) { DBusBasicValue value; if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __FUNCTION__, __LINE__)) { continue; } dbus_message_iter_recurse(&unit, &elem); if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) { continue; } dbus_message_iter_get_basic(&elem, &value); crm_trace("DBus ListUnits listed: %s", value.str); if(value.str) { const char *match = systemd_unit_extension(value.str); if (match) { char *unit_name; if (!strcmp(match, ".service")) { /* service is the "default" unit type, so strip it */ unit_name = strndup(value.str, match - value.str); } else { unit_name = strdup(value.str); } lpc++; units = g_list_append(units, unit_name); } } dbus_message_iter_next (&unit); } dbus_message_unref(reply); crm_trace("Found %d systemd services", lpc); return units; } gboolean systemd_unit_exists(const char *name) { char *unit = NULL; /* Note: Makes a blocking dbus calls * Used by resources_find_service_class() when resource class=service */ unit = systemd_unit_by_name(name, NULL); if(unit) { free(unit); return TRUE; } return FALSE; } static char * systemd_unit_metadata(const char *name, int timeout) { char *meta = NULL; char *desc = NULL; char *path = systemd_unit_by_name(name, NULL); if (path) { /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */ desc = systemd_get_property(path, "Description", NULL, NULL, NULL, timeout); } else { desc = crm_strdup_printf("Systemd unit file for %s", name); } meta = crm_strdup_printf("\n" "\n" "\n" " 1.0\n" " \n" " %s\n" " \n" " systemd unit file for %s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" "\n", name, desc, name); free(desc); free(path); return meta; } static void systemd_exec_result(DBusMessage *reply, svc_action_t *op) { DBusError error; if (pcmk_dbus_find_error((void*)&error, reply, &error)) { /* ignore "already started" or "not running" errors */ if (!systemd_mask_error(op, error.name)) { crm_err("Could not issue %s for %s: %s", op->action, op->rsc, error.message); } dbus_error_free(&error); } else { if(!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) { crm_warn("Call to %s passed but return type was unexpected", op->action); op->rc = PCMK_OCF_OK; } else { const char *path = NULL; dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_info("Call to %s passed: %s", op->action, path); op->rc = PCMK_OCF_OK; } } operation_finalize(op); } static void systemd_async_dispatch(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t *op = user_data; if(pending) { reply = dbus_pending_call_steal_reply(pending); } crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); systemd_exec_result(reply, op); if(reply) { dbus_message_unref(reply); } } #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/" static void systemd_unit_check(const char *name, const char *state, void *userdata) { svc_action_t * op = userdata; crm_trace("Resource %s has %s='%s'", op->rsc, name, state); if(state == NULL) { op->rc = PCMK_OCF_NOT_RUNNING; } else if (g_strcmp0(state, "active") == 0) { op->rc = PCMK_OCF_OK; } else if (g_strcmp0(state, "activating") == 0) { op->rc = PCMK_OCF_PENDING; } else if (g_strcmp0(state, "deactivating") == 0) { op->rc = PCMK_OCF_PENDING; } else { op->rc = PCMK_OCF_NOT_RUNNING; } if (op->synchronous == FALSE) { services_set_op_pending(op, NULL); operation_finalize(op); } } gboolean systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) { const char *method = op->action; DBusMessage *msg = NULL; DBusMessage *reply = NULL; CRM_ASSERT(unit); if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) { DBusPendingCall *pending = NULL; char *state; state = systemd_get_property(unit, "ActiveState", (op->synchronous? NULL : systemd_unit_check), op, (op->synchronous? NULL : &pending), op->timeout); if (op->synchronous) { systemd_unit_check("ActiveState", state, op); free(state); return op->rc == PCMK_OCF_OK; } else if (pending) { services_set_op_pending(op, pending); return TRUE; } else { return operation_finalize(op); } } else if (g_strcmp0(method, "start") == 0) { FILE *file_strm = NULL; char *override_dir = crm_strdup_printf("%s/%s.service.d", SYSTEMD_OVERRIDE_ROOT, op->agent); char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent); mode_t orig_umask; method = "StartUnit"; crm_build_path(override_dir, 0755); /* Ensure the override file is world-readable. This is not strictly * necessary, but it avoids a systemd warning in the logs. */ orig_umask = umask(S_IWGRP | S_IWOTH); file_strm = fopen(override_file, "w"); umask(orig_umask); if (file_strm != NULL) { /* TODO: Insert the start timeout in too */ char *override = crm_strdup_printf( "[Unit]\n" "Description=Cluster Controlled %s\n" "Before=pacemaker.service\n" "\n" "[Service]\n" "Restart=no\n", op->agent); int rc = fprintf(file_strm, "%s\n", override); free(override); if (rc < 0) { crm_perror(LOG_ERR, "Cannot write to systemd override file %s", override_file); } } else { crm_err("Cannot open systemd override file %s for writing", override_file); } if (file_strm != NULL) { fflush(file_strm); fclose(file_strm); } systemd_daemon_reload(op->timeout); free(override_file); free(override_dir); } else if (g_strcmp0(method, "stop") == 0) { char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent); method = "StopUnit"; unlink(override_file); free(override_file); systemd_daemon_reload(op->timeout); } else if (g_strcmp0(method, "restart") == 0) { method = "RestartUnit"; } else { op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE; goto cleanup; } crm_debug("Calling %s for %s: %s", method, op->rsc, unit); msg = systemd_new_method(method); CRM_ASSERT(msg != NULL); /* (ss) */ { const char *replace_s = "replace"; char *name = systemd_service_name(op->agent); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); free(name); } if (op->synchronous == FALSE) { DBusPendingCall *pending = systemd_send(msg, systemd_async_dispatch, op, op->timeout); dbus_message_unref(msg); if(pending) { services_set_op_pending(op, pending); return TRUE; } else { return operation_finalize(op); } } else { reply = systemd_send_recv(msg, NULL, op->timeout); dbus_message_unref(msg); systemd_exec_result(reply, op); if(reply) { dbus_message_unref(reply); } return FALSE; } cleanup: if (op->synchronous == FALSE) { return operation_finalize(op); } return op->rc == PCMK_OCF_OK; } static gboolean systemd_timeout_callback(gpointer p) { svc_action_t * op = p; op->opaque->timerid = 0; crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); operation_finalize(op); return FALSE; } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean systemd_unit_exec(svc_action_t * op) { char *unit = NULL; CRM_ASSERT(op); CRM_ASSERT(systemd_init()); op->rc = PCMK_OCF_UNKNOWN_ERROR; crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'", op->synchronous ? "" : "a", op->action, op->agent, op->rsc); if (safe_str_eq(op->action, "meta-data")) { /* TODO: See if we can teach the lrmd not to make these calls synchronously */ op->stdout_data = systemd_unit_metadata(op->agent, op->timeout); op->rc = PCMK_OCF_OK; if (op->synchronous == FALSE) { return operation_finalize(op); } return TRUE; } unit = systemd_unit_by_name(op->agent, op); free(unit); if (op->synchronous == FALSE) { if (op->opaque->pending) { op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); services_add_inflight_op(op); return TRUE; } else { return operation_finalize(op); } } return op->rc == PCMK_OCF_OK; }