diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c index f5552905dd..ed96c1f17f 100644 --- a/daemons/controld/controld_throttle.c +++ b/daemons/controld/controld_throttle.c @@ -1,554 +1,554 @@ /* - * Copyright 2013-2019 the Pacemaker project contributors + * Copyright 2013-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include /* These values don't need to be bits, but these particular values must be kept * for backward compatibility during rolling upgrades. */ enum throttle_state_e { throttle_none = 0x0000, throttle_low = 0x0001, throttle_med = 0x0010, throttle_high = 0x0100, throttle_extreme = 0x1000, }; struct throttle_record_s { int max; enum throttle_state_e mode; char *node; }; static int throttle_job_max = 0; static float throttle_load_target = 0.0; #define THROTTLE_FACTOR_LOW 1.2 #define THROTTLE_FACTOR_MEDIUM 1.6 #define THROTTLE_FACTOR_HIGH 2.0 static GHashTable *throttle_records = NULL; static mainloop_timer_t *throttle_timer = NULL; static const char * load2str(enum throttle_state_e mode) { switch (mode) { case throttle_extreme: return "extreme"; case throttle_high: return "high"; case throttle_med: return "medium"; case throttle_low: return "low"; case throttle_none: return "negligible"; default: return "undetermined"; } } #if SUPPORT_PROCFS /*! * \internal * \brief Return name of /proc file containing the CIB daemon's load statistics * * \return Newly allocated memory with file name on success, NULL otherwise * * \note It is the caller's responsibility to free the return value. * This will return NULL if the daemon is being run via valgrind. * This should be called only on Linux systems. */ static char * find_cib_loadfile(void) { - int pid = crm_procfs_pid_of("pacemaker-based"); + pid_t pid = pcmk__procfs_pid_of("pacemaker-based"); - return pid? crm_strdup_printf("/proc/%d/stat", pid) : NULL; + return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL; } static bool throttle_cib_load(float *load) { /* /proc/[pid]/stat Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c. The fields, in order, with their proper scanf(3) format specifiers, are: pid %d (1) The process ID. comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out. state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging. ppid %d (4) The PID of the parent. pgrp %d (5) The process group ID of the process. session %d (6) The session ID of the process. tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.) tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process. flags %u (%lu before Linux 2.6.22) (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk. cminflt %lu (11) The number of minor faults that the process's waited-for children have made. majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk. cmajflt %lu (13) The number of major faults that the process's waited-for children have made. utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations. stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). */ static char *loadfile = NULL; static time_t last_call = 0; static long ticks_per_s = 0; static unsigned long last_utime, last_stime; char buffer[64*1024]; FILE *stream = NULL; time_t now = time(NULL); if(load == NULL) { return FALSE; } else { *load = 0.0; } if(loadfile == NULL) { last_call = 0; last_utime = 0; last_stime = 0; loadfile = find_cib_loadfile(); if (loadfile == NULL) { crm_warn("Couldn't find CIB load file"); return FALSE; } ticks_per_s = sysconf(_SC_CLK_TCK); crm_trace("Found %s", loadfile); } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); free(loadfile); loadfile = NULL; return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *comm = calloc(1, 256); char state = 0; int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0; unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0; rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid, &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime); free(comm); if(rc != 15) { crm_err("Only %d of 15 fields found in %s", rc, loadfile); fclose(stream); return FALSE; } else if(last_call > 0 && last_call < now && last_utime <= utime && last_stime <= stime) { time_t elapsed = now - last_call; unsigned long delta_utime = utime - last_utime; unsigned long delta_stime = stime - last_stime; *load = (delta_utime + delta_stime); /* Cast to a float before division */ *load /= ticks_per_s; *load /= elapsed; crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed); } else { crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s); } last_call = now; last_utime = utime; last_stime = stime; fclose(stream); return TRUE; } fclose(stream); return FALSE; } static bool throttle_load_avg(float *load) { char buffer[256]; FILE *stream = NULL; const char *loadfile = "/proc/loadavg"; if(load == NULL) { return FALSE; } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc); return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *nl = strstr(buffer, "\n"); /* Grab the 1-minute average, ignore the rest */ *load = strtof(buffer, NULL); if(nl) { nl[0] = 0; } fclose(stream); return TRUE; } fclose(stream); return FALSE; } /*! * \internal * \brief Check a load value against throttling thresholds * * \param[in] load Load value to check * \param[in] desc Description of metric (for logging) * \param[in] thresholds Low/medium/high/extreme thresholds * * \return Throttle mode corresponding to load value */ static enum throttle_state_e throttle_check_thresholds(float load, const char *desc, float thresholds[4]) { if (load > thresholds[3]) { crm_notice("Extreme %s detected: %f", desc, load); return throttle_extreme; } else if (load > thresholds[2]) { crm_notice("High %s detected: %f", desc, load); return throttle_high; } else if (load > thresholds[1]) { crm_info("Moderate %s detected: %f", desc, load); return throttle_med; } else if (load > thresholds[0]) { crm_debug("Noticeable %s detected: %f", desc, load); return throttle_low; } crm_trace("Negligible %s detected: %f", desc, load); return throttle_none; } static enum throttle_state_e throttle_handle_load(float load, const char *desc, int cores) { float normalize; float thresholds[4]; if (cores == 1) { /* On a single core machine, a load of 1.0 is already too high */ normalize = 0.6; } else { /* Normalize the load to be per-core */ normalize = cores; } thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; thresholds[3] = load + 1.0; /* never extreme */ return throttle_check_thresholds(load, desc, thresholds); } #endif static enum throttle_state_e throttle_mode(void) { enum throttle_state_e mode = throttle_none; #if SUPPORT_PROCFS unsigned int cores; float load; float thresholds[4]; - cores = crm_procfs_num_cores(); + cores = pcmk__procfs_num_cores(); if(throttle_cib_load(&load)) { float cib_max_cpu = 0.95; /* The CIB is a single-threaded task and thus cannot consume * more than 100% of a CPU (and 1/cores of the overall system * load). * * On a many-cored system, the CIB might therefore be maxed out * (causing operations to fail or appear to fail) even though * the overall system load is still reasonable. * * Therefore, the 'normal' thresholds can not apply here, and we * need a special case. */ if(cores == 1) { cib_max_cpu = 0.4; } if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) { cib_max_cpu = throttle_load_target; } thresholds[0] = cib_max_cpu * 0.8; thresholds[1] = cib_max_cpu * 0.9; thresholds[2] = cib_max_cpu; /* Can only happen on machines with a low number of cores */ thresholds[3] = cib_max_cpu * 1.5; mode = throttle_check_thresholds(load, "CIB load", thresholds); } if(throttle_load_target <= 0) { /* If we ever make this a valid value, the cluster will at least behave as expected */ return mode; } if(throttle_load_avg(&load)) { enum throttle_state_e cpu_load; cpu_load = throttle_handle_load(load, "CPU load", cores); if (cpu_load > mode) { mode = cpu_load; } crm_debug("Current load is %f across %u core(s)", load, cores); } #endif // SUPPORT_PROCFS return mode; } static void throttle_send_command(enum throttle_state_e mode) { xmlNode *xml = NULL; static enum throttle_state_e last = -1; if(mode != last) { crm_info("New throttle mode: %s load (was %s)", load2str(mode), load2str(last)); last = mode; xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode); crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max); send_cluster_message(NULL, crm_msg_crmd, xml, TRUE); free_xml(xml); } } static gboolean throttle_timer_cb(gpointer data) { throttle_send_command(throttle_mode()); return TRUE; } static void throttle_record_free(gpointer p) { struct throttle_record_s *r = p; free(r->node); free(r); } void throttle_set_load_target(float target) { throttle_load_target = target; } void throttle_update_job_max(const char *preference) { int max = 0; - throttle_job_max = 2 * crm_procfs_num_cores(); + throttle_job_max = 2 * pcmk__procfs_num_cores(); if(preference) { /* Global preference from the CIB */ max = crm_int_helper(preference, NULL); if(max > 0) { throttle_job_max = max; } } preference = getenv("PCMK_node_action_limit"); if(preference) { /* Per-node override */ max = crm_int_helper(preference, NULL); if(max > 0) { throttle_job_max = max; } } } void throttle_init(void) { if(throttle_records == NULL) { throttle_records = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, throttle_record_free); throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); } throttle_update_job_max(NULL); mainloop_timer_start(throttle_timer); } void throttle_fini(void) { if (throttle_timer != NULL) { mainloop_timer_del(throttle_timer); throttle_timer = NULL; } if (throttle_records != NULL) { g_hash_table_destroy(throttle_records); throttle_records = NULL; } } int throttle_get_total_job_limit(int l) { /* Cluster-wide limit */ GHashTableIter iter; int limit = l; int peers = crm_active_peers(); struct throttle_record_s *r = NULL; g_hash_table_iter_init(&iter, throttle_records); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { switch(r->mode) { case throttle_extreme: if(limit == 0 || limit > peers/4) { limit = QB_MAX(1, peers/4); } break; case throttle_high: if(limit == 0 || limit > peers/2) { limit = QB_MAX(1, peers/2); } break; default: break; } } if(limit == l) { /* crm_trace("No change to batch-limit=%d", limit); */ } else if(l == 0) { crm_trace("Using batch-limit=%d", limit); } else { crm_trace("Using batch-limit=%d instead of %d", limit, l); } return limit; } int throttle_get_job_limit(const char *node) { int jobs = 1; struct throttle_record_s *r = NULL; r = g_hash_table_lookup(throttle_records, node); if(r == NULL) { r = calloc(1, sizeof(struct throttle_record_s)); r->node = strdup(node); r->mode = throttle_low; r->max = throttle_job_max; crm_trace("Defaulting to local values for unknown node %s", node); g_hash_table_insert(throttle_records, r->node, r); } switch(r->mode) { case throttle_extreme: case throttle_high: jobs = 1; /* At least one job must always be allowed */ break; case throttle_med: jobs = QB_MAX(1, r->max / 4); break; case throttle_low: jobs = QB_MAX(1, r->max / 2); break; case throttle_none: jobs = QB_MAX(1, r->max); break; default: crm_err("Unknown throttle mode %.4x on %s", r->mode, node); break; } return jobs; } void throttle_update(xmlNode *xml) { int max = 0; int mode = 0; struct throttle_record_s *r = NULL; const char *from = crm_element_value(xml, F_CRM_HOST_FROM); crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode); crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max); r = g_hash_table_lookup(throttle_records, from); if(r == NULL) { r = calloc(1, sizeof(struct throttle_record_s)); r->node = strdup(from); g_hash_table_insert(throttle_records, r->node, r); } r->max = max; r->mode = (enum throttle_state_e) mode; crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", from, load2str((enum throttle_state_e) mode), max, throttle_get_job_limit(from)); } diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index bb5219ce02..53add3071e 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,217 +1,216 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_INTERNAL__H #define CRM_COMMON_INTERNAL__H #include /* for gboolean */ #include /* for struct dirent */ #include /* for getpid() */ #include /* for bool */ #include // uid_t, gid_t, pid_t #include #if SUPPORT_CIBSECRETS // Internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal I/O utilities (from io.c) */ int pcmk__real_path(const char *path, char **resolved_path); char *pcmk__series_filename(const char *directory, const char *series, int sequence, bool bzip); int pcmk__read_series_sequence(const char *directory, const char *series, unsigned int *seq); void pcmk__write_series_sequence(const char *directory, const char *series, unsigned int sequence, int max); int pcmk__chown_series_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); bool pcmk__daemon_can_write(const char *dir, const char *file); void pcmk__sync_directory(const char *name); int pcmk__file_contents(const char *filename, char **contents); int pcmk__write_sync(int fd, const char *contents); int pcmk__set_nonblocking(int fd); const char *pcmk__get_tmpdir(void); void pcmk__close_fds_in_child(bool); /* internal procfs utilities (from procfs.c) */ -int crm_procfs_process_info(struct dirent *entry, char *name, int *pid); -int crm_procfs_pid_of(const char *name); -unsigned int crm_procfs_num_cores(void); +pid_t pcmk__procfs_pid_of(const char *name); +unsigned int pcmk__procfs_num_cores(void); /* internal XML schema functions (from xml.c) */ void crm_schema_init(void); void crm_schema_cleanup(void); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); int pcmk__read_pidfile(const char *filename, pid_t *pid); int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, const char *expected_name, pid_t *pid); int pcmk__lock_pidfile(const char *filename, const char *name); /* interal functions related to resource operations (from operations.c) */ char *pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms); char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); void filter_action_parameters(xmlNode *param_set, const char *version); // miscellaneous utilities (from utils.c) const char *pcmk_message_name(const char *name); /* internal generic string functions (from strings.c) */ long long crm_int_helper(const char *text, char **end_text); guint crm_parse_ms(const char *text); bool crm_starts_with(const char *str, const char *prefix); gboolean crm_ends_with(const char *s, const char *match); gboolean crm_ends_with_ext(const char *s, const char *match); char *add_list_element(char *list, const char *value); bool crm_compress_string(const char *data, int length, int max, char **result, unsigned int *result_len); gint crm_alpha_sort(gconstpointer a, gconstpointer b); /* Correctly displaying singular or plural is complicated; consider "1 node has" * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. * * if (a == 1) { * crm_info("singular message"): * } else { * crm_info("plural message"); * } * * though even that's not sufficient for all languages besides English (if we * ever desire to do translations of output and log messages). But the following * convenience macros are "good enough" and more concise for many cases. */ /* Example: * crm_info("Found %d %s", nentries, * pcmk__plural_alt(nentries, "entry", "entries")); */ #define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) // Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); #define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") static inline char * crm_concat(const char *prefix, const char *suffix, char join) { CRM_ASSERT(prefix && suffix); return crm_strdup_printf("%s%c%s", prefix, join, suffix); } static inline int crm_strlen_zero(const char *s) { return !s || *s == '\0'; } static inline char * crm_getpid_s() { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define CRM_FAIL_COUNT_PREFIX "fail-count" #define CRM_LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * crm_fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * crm_failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return crm_fail_attr_name(CRM_FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * crm_lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return crm_fail_attr_name(CRM_LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } #endif /* CRM_COMMON_INTERNAL__H */ diff --git a/lib/common/procfs.c b/lib/common/procfs.c index 891dc87561..a36ef2b826 100644 --- a/lib/common/procfs.c +++ b/lib/common/procfs.c @@ -1,166 +1,165 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include /*! * \internal * \brief Get process ID and name associated with a /proc directory entry * * \param[in] entry Directory entry (must be result of readdir() on /proc) * \param[out] name If not NULL, a char[16] to hold the process name * \param[out] pid If not NULL, will be set to process ID of entry * - * \return 0 on success, -1 if entry is not for a process or info not found - * + * \return Standard Pacemaker return code * \note This should be called only on Linux systems, as not all systems that * support /proc store process names and IDs in the same way. The kernel * limits the process name to the first 15 characters (plus terminator). * It would be nice if there were a public kernel API constant for that * limit, but there isn't. */ -int -crm_procfs_process_info(struct dirent *entry, char *name, int *pid) +static int +pcmk__procfs_process_info(struct dirent *entry, char *name, pid_t *pid) { int fd, local_pid; FILE *file; struct stat statbuf; char procpath[128] = { 0 }; /* We're only interested in entries whose name is a PID, * so skip anything non-numeric or that is too long. * * 114 = 128 - strlen("/proc/") - strlen("/status") - 1 */ local_pid = atoi(entry->d_name); if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) { return -1; } if (pid) { - *pid = local_pid; + *pid = (pid_t) local_pid; } /* Get this entry's file information */ strcpy(procpath, "/proc/"); strcat(procpath, entry->d_name); fd = open(procpath, O_RDONLY); if (fd < 0 ) { return -1; } if (fstat(fd, &statbuf) < 0) { close(fd); return -1; } close(fd); /* We're only interested in subdirectories */ if (!S_ISDIR(statbuf.st_mode)) { return -1; } /* Read the first entry ("Name:") from the process's status file. * We could handle the valgrind case if we parsed the cmdline file * instead, but that's more of a pain than it's worth. */ if (name != NULL) { strcat(procpath, "/status"); file = fopen(procpath, "r"); if (!file) { return -1; } if (fscanf(file, "Name:\t%15[^\n]", name) != 1) { fclose(file); return -1; } name[15] = 0; fclose(file); } return 0; } /*! * \internal * \brief Return process ID of a named process * * \param[in] name Process name (as used in /proc/.../status) * * \return Process ID of named process if running, 0 otherwise * * \note This will return 0 if the process is being run via valgrind. * This should be called only on Linux systems. */ -int -crm_procfs_pid_of(const char *name) +pid_t +pcmk__procfs_pid_of(const char *name) { DIR *dp; struct dirent *entry; - int pid = 0; + pid_t pid = 0; char entry_name[64] = { 0 }; dp = opendir("/proc"); if (dp == NULL) { crm_notice("Can not read /proc directory to track existing components"); return 0; } while ((entry = readdir(dp)) != NULL) { - if ((crm_procfs_process_info(entry, entry_name, &pid) == 0) + if ((pcmk__procfs_process_info(entry, entry_name, &pid) == pcmk_rc_ok) && safe_str_eq(entry_name, name) - && (pcmk__pid_active((pid_t) pid, NULL) == pcmk_rc_ok)) { + && (pcmk__pid_active(pid, NULL) == pcmk_rc_ok)) { - crm_info("Found %s active as process %d", name, pid); + crm_info("Found %s active as process %lld", name, (long long) pid); break; } pid = 0; } closedir(dp); return pid; } /*! * \internal * \brief Calculate number of logical CPU cores from procfs * * \return Number of cores (or 1 if unable to determine) */ unsigned int -crm_procfs_num_cores(void) +pcmk__procfs_num_cores(void) { int cores = 0; FILE *stream = NULL; /* Parse /proc/stat instead of /proc/cpuinfo because it's smaller */ stream = fopen("/proc/stat", "r"); if (stream == NULL) { crm_perror(LOG_INFO, "Could not open /proc/stat"); } else { char buffer[2048]; while (fgets(buffer, sizeof(buffer), stream)) { if (crm_starts_with(buffer, "cpu") && isdigit(buffer[3])) { ++cores; } } fclose(stream); } return cores? cores : 1; } diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index 23155c5245..848236cebc 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,274 +1,278 @@ /* - * Copyright 2013-2019 the Pacemaker project contributors + * Copyright 2013-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #ifdef _POSIX_MEMLOCK # include #endif static pid_t sbd_pid = 0; enum pcmk_panic_flags { pcmk_panic_none = 0x00, pcmk_panic_delay = 0x01, pcmk_panic_kdump = 0x02, pcmk_panic_shutdown = 0x04, }; static void sysrq_trigger(char t) { #if SUPPORT_PROCFS FILE *procf; // Root can always write here, regardless of kernel.sysrq value procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { crm_perror(LOG_WARNING, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); #endif // SUPPORT_PROCFS return; } static void pcmk_panic_local(void) { int rc = pcmk_ok; uid_t uid = geteuid(); pid_t ppid = getppid(); if(uid != 0 && ppid > 1) { /* We're a non-root pacemaker daemon (pacemaker-based, * pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with * the original pacemakerd parent. * * Of these, only the controller is likely to be initiating resets. */ - do_crm_log_always(LOG_EMERG, "Signaling parent %d to panic", ppid); + do_crm_log_always(LOG_EMERG, "Signaling parent %lld to panic", + (long long) ppid); crm_exit(CRM_EX_PANIC); return; } else if (uid != 0) { #if SUPPORT_PROCFS /* * No permissions, and no pacemakerd parent to escalate to. * Track down the new pacemakerd process and send a signal instead. */ union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); - ppid = crm_procfs_pid_of("pacemakerd"); - do_crm_log_always(LOG_EMERG, "Signaling pacemakerd(%d) to panic", ppid); + ppid = pcmk__procfs_pid_of("pacemakerd"); + do_crm_log_always(LOG_EMERG, "Signaling pacemakerd[%lld] to panic", + (long long) ppid); if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { - crm_perror(LOG_EMERG, "Cannot signal pacemakerd(%d) to panic", ppid); + crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic", + (long long) ppid); } #endif // SUPPORT_PROCFS /* The best we can do now is die */ crm_exit(CRM_EX_PANIC); return; } /* We're either pacemakerd, or a pacemaker daemon running as root */ if (safe_str_eq("crash", getenv("PCMK_panic_action"))) { sysrq_trigger('c'); } else { sysrq_trigger('b'); } /* reboot(RB_HALT_SYSTEM); rc = errno; */ reboot(RB_AUTOBOOT); rc = errno; - do_crm_log_always(LOG_EMERG, "Reboot failed, escalating to %d: %s (%d)", ppid, pcmk_strerror(rc), rc); + do_crm_log_always(LOG_EMERG, "Reboot failed, escalating to parent %lld: %s " + CRM_XS " rc=%d", (long long) ppid, pcmk_rc_str(rc), rc); if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } static void pcmk_panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); do_crm_log_always(LOG_EMERG, "Signaling sbd[%lld] to panic", (long long) sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate", (long long) sbd_pid); pcmk_panic_local(); } if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } void pcmk_panic(const char *origin) { static struct qb_log_callsite *panic_cs = NULL; if (panic_cs == NULL) { panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay", LOG_TRACE, __LINE__, crm_trace_nonlog); } /* Ensure sbd_pid is set */ (void)pcmk_locate_sbd(); if (panic_cs && panic_cs->targets) { /* getppid() == 1 means our original parent no longer exists */ do_crm_log_always(LOG_EMERG, "Shutting down instead of panicking the node " CRM_XS " origin=%s sbd=%lld parent=%d", origin, (long long) sbd_pid, getppid()); crm_exit(CRM_EX_FATAL); return; } if(sbd_pid > 1) { do_crm_log_always(LOG_EMERG, "Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, origin); pcmk_panic_sbd(); } else { do_crm_log_always(LOG_EMERG, "Panicking the system directly: %s", origin); pcmk_panic_local(); } } pid_t pcmk_locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid"); sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); #if SUPPORT_PROCFS } else { /* Fall back to /proc for systems that support it */ - sbd_pid = (pid_t) crm_procfs_pid_of("sbd"); + sbd_pid = pcmk__procfs_pid_of("sbd"); crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); #endif // SUPPORT_PROCFS } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long crm_get_sbd_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } long crm_auto_watchdog_timeout() { long sbd_timeout = crm_get_sbd_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } gboolean check_sbd_timeout(const char *value) { long st_timeout = value? crm_get_msec(value) : 0; if (st_timeout < 0) { st_timeout = crm_auto_watchdog_timeout(); crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)", value? value : "default"); } else if (pcmk_locate_sbd() == 0) { do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout configured (%s) but SBD not active", (value? value : "auto")); crm_exit(CRM_EX_FATAL); return FALSE; } else { long sbd_timeout = crm_get_sbd_timeout(); if (st_timeout < sbd_timeout) { do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout (%s) too short (must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return FALSE; } crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms", value, sbd_timeout); } return TRUE; }