diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 57d74db322..9ca3eae787 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1709 +1,1740 @@ /* * Copyright 2009-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; static gboolean no_cib_connect = FALSE; static gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static xmlNode *local_cib = NULL; static pe_working_set_t *fenced_data_set = NULL; static const unsigned long long data_set_flags = pe_flag_quick_location | pe_flag_no_compat | pe_flag_no_counts; static cib_t *cib_api = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_LOG, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, { NULL, NULL, NULL } }; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, F_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (pcmk__str_eq(op, "poke", pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (STONITH_OP_DEVICE_ADD, * STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or * STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed * \param[in] active Current number of devices or topologies in use */ static void send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); fenced_send_notification(op, result, notify_data); free_xml(notify_data); } /*! * \internal * \brief Send notifications for a device change to subscribed clients * * \param[in] op Notification type (STONITH_OP_DEVICE_ADD or * STONITH_OP_DEVICE_DEL) * \param[in] result Operation result * \param[in] desc ID of device that changed */ void fenced_send_device_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { send_config_notification(op, result, desc, g_hash_table_size(device_list)); } /*! * \internal * \brief Send notifications for a topology level change to subscribed clients * * \param[in] op Notification type (STONITH_OP_LEVEL_ADD or * STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc String representation of level ([]) */ void fenced_send_level_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { send_config_notification(op, result, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); fenced_unregister_level(data, &desc, &result); fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); pcmk__reset_result(&result); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, true); } } static void remove_topology_level(xmlNode *match) { int index = 0; char *key = NULL; CRM_CHECK(match != NULL, return); key = stonith_level_key(match, fenced_target_by_unknown); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } static void add_topology_level(xmlNode *match) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(match != NULL, return); fenced_register_level(match, &desc, &result); fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); pcmk__reset_result(&result); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, fenced_target_by_unknown); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); remove_topology_level(match); add_topology_level(match); } } /* Fencing */ static void fencing_topology_init(void) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static pe_node_t * our_node_allowed_for(const pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, TRUE); free_xml(xml); if (rc != pcmk_ok) { crm_crit("Cannot register watchdog pseudo fence agent"); crm_exit(CRM_EX_FATAL); } } } else { /* be silent if no device - todo parameter to stonith_device_remove */ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { stonith_device_remove(STONITH_WATCHDOG_ID, true); } } } static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { long timeout_ms = 0; xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", cib, LOG_NEVER); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if (value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_watchdog_timeout(); } stonith_watchdog_timeout_ms = timeout_ms; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in,out] rsc Resource to check * \param[in,out] data_set Cluster working set with device information */ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; pe_node_t *parent = NULL; /* If this is a complex resource, check children rather than this resource itself. */ if(rsc->children) { GList *gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(pe_rsc_is_clone(rsc)) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return; } /* If this STONITH resource is disabled, remove it. */ if (pe__resource_is_disabled(rsc)) { crm_info("Device %s has been disabled", rsc->id); return; } /* if watchdog-fencing is disabled handle any watchdog-fence resource as if it was disabled */ if ((stonith_watchdog_timeout_ms <= 0) && pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_info("Watchdog-fencing disabled thus handling " "device %s as disabled", rsc->id); return; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", pe__node_name(node), node->weight); } return; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ int score = (node->weight < 0)? node->weight : parent->weight; crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, pcmk_readable_score(score)); return; } else { /* Our node is allowed, so update the device information */ int rc; xmlNode *data; GHashTable *rsc_params = NULL; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); rsc_params = pe_rsc_params(rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); g_hash_table_iter_init(&gIter, rsc_params); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(data, TRUE); CRM_ASSERT(rc == pcmk_ok); free_xml(data); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); if (fenced_data_set->now != NULL) { crm_time_free(fenced_data_set->now); fenced_data_set->now = NULL; } fenced_data_set->localhost = stonith_our_uname; pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(fenced_data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; reason = strdup("(meta) attribute deleted from resource"); break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, true); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in CIB"); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_NEVER); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { add_topology_level(change->children); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { remove_topology_level(match->children); add_topology_level(match->children); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; long timeout_ms_saved = stonith_watchdog_timeout_ms; gboolean need_full_refresh = FALSE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); need_full_refresh = TRUE; } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = TRUE; } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); watchdog_device_update(); } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = copy_xml(output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { stonith_cleanup(); crm_exit(CRM_EX_OK); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); free(stonith_our_uname); stonith_our_uname = NULL; free_xml(local_cib); local_cib = NULL; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "stand-alone", no_argument, 0, 's', "\tDeprecated (will be removed in a future release)", pcmk__option_default }, { "stand-alone-w-cpg", no_argument, 0, 'c', "\tIntended for use in regression testing only", pcmk__option_default }, { "logfile", required_argument, 0, 'l', NULL, pcmk__option_default }, { "verbose", no_argument, 0, 'V', NULL, pcmk__option_default }, { "version", no_argument, 0, '$', NULL, pcmk__option_default }, { "help", no_argument, 0, '?', NULL, pcmk__option_default }, { 0, 0, 0, 0 } }; static void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } +static pcmk__cluster_option_t fencer_options[] = { + /* name, old name, type, allowed values, + * default value, validator, + * short description, + * long description + */ + { + PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, + N_("Advanced use only: An alternate parameter to supply instead of 'port'"), + N_("some devices do not support the " + "standard 'port' parameter or may provide additional ones. Use " + "this to specify an alternate, device-specific, parameter " + "that should indicate the machine to be fenced. A value of " + "none can be used to tell the cluster not to supply any " + "additional parameters.") + }, + { + PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL, + N_("A mapping of host names to ports numbers for devices that do not support host names."), + N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2") + }, + { + PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL, + N_("Eg. node1,node2,node3"), + N_("A list of machines controlled by " + "this device (Optional unless pcmk_host_list=static-list)") + }, + { + PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL, + N_("How to determine which machines are controlled by the device."), + N_("Allowed values: dynamic-list " + "(query the device via the 'list' command), static-list " + "(check the pcmk_host_list attribute), status " + "(query the device via the 'status' command), " + "none (assume every device can fence every " + "machine)") + }, + { + PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL, + N_("Enable a base delay for fencing actions and specify base delay value."), + N_("Enable a delay of no more than the " + "time specified before executing fencing actions. Pacemaker " + "derives the overall delay by taking the value of " + "pcmk_delay_base and adding a random delay value such " + "that the sum is kept below this maximum.") + }, + { + PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL, + N_("Enable a base delay for " + "fencing actions and specify base delay value."), + N_("This enables a static delay for " + "fencing actions, which can help avoid \"death matches\" where " + "two nodes try to fence each other at the same time. If " + "pcmk_delay_max is also used, a random delay will be " + "added such that the total delay is kept below that value." + "This can be set to a single time value to apply to any node " + "targeted by this device (useful if a separate device is " + "configured for each target), or to a node map (for example, " + "\"node1:1s;node2:5\") to set a different value per target.") + }, + { + PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL, + N_("The maximum number of actions can be performed in parallel on this device"), + N_("Cluster property concurrent-fencing=true needs to be configured first." + "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") + }, + { + "pcmk_reboot_action",NULL, "string", NULL, "reboot", NULL, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), + N_("Some devices do not support the standard commands or may provide additional ones.\n" + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + }, + { + "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.") + }, + { + "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.") + }, + { + "pcmk_off_action",NULL, "string", NULL, "off", NULL, + N_("Advanced use only: An alternate command to run instead of \'off\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + }, + { + "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'off\' actions.") + }, + { + "pcmk_off_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.") + }, + { + "pcmk_on_action",NULL, "string", NULL, "on", NULL, + N_("Advanced use only: An alternate command to run instead of 'on'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + }, + { + "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'on\' actions.") + }, + { + "pcmk_on_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.") + }, + { + "pcmk_list_action",NULL, "string", NULL, "list", NULL, + N_("Advanced use only: An alternate command to run instead of \'list\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + }, + { + "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'list\' actions.") + }, + { + "pcmk_list_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.") + }, + { + "pcmk_monitor_action",NULL, "string", NULL, "monitor", NULL, + N_("Advanced use only: An alternate command to run instead of \'monitor\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") + }, + { + "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal.\n" + "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.") + }, + { + "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.") + }, + { + "pcmk_status_action",NULL, "string", NULL, "status", NULL, + N_("Advanced use only: An alternate command to run instead of \'status\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + }, + { + "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, + N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'status\' actions.") + }, + { + "pcmk_status_retries",NULL, "integer", NULL, "2", NULL, + N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." + " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.") + }, +}; + +void +fencer_metadata(void) +{ + char *s = pcmk__format_option_metadata("pacemaker-fenced", + "Instance attributes available for all \"stonith\"-class resources", + "Instance attributes available for all \"stonith\"-class resources" + "and used by Pacemaker's fence daemon, formerly known as stonithd", + fencer_options, + PCMK__NELEM(fencer_options)); + printf("%s", s); + free(s); +} + +/* +static const char * +fenceder_options(GHashTable *options, const char *name) +{ + return pcmk__cluster_option(options, fenceder_options, + PCMK__NELEM(fenceder_options), name); +} +*/ int main(int argc, char **argv) { int flag; - int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t *cluster = NULL; - const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; crm_ipc_t *old_instance = NULL; int rc = pcmk_rc_ok; crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for executing fencing devices in a " "Pacemaker cluster"); while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': { int rc = pcmk__add_logfile(optarg); if (rc != pcmk_rc_ok) { /* Logging has not yet been initialized, so stderr is * the only way to get information out */ fprintf(stderr, "Logging to %s is disabled: %s\n", optarg, pcmk_rc_str(rc)); } } break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) { - printf("\n"); - printf("\n"); - printf(" 1.0\n"); - printf(" Instance attributes available for all \"stonith\"-class resources" - " and used by Pacemaker's fence daemon, formerly known as stonithd\n"); -#ifdef ENABLE_NLS - printf(_(" Instance attributes available for all \"stonith\"-class resources" - " and used by Pacemaker's fence daemon, formerly known as stonithd\n")); -#endif - printf(" Instance attributes available for all \"stonith\"-class resources\n"); -#ifdef ENABLE_NLS - printf(_(" Instance attributes available for all \"stonith\"-class resources\n")); -#endif - printf(" \n"); - -#if 0 - // priority is not implemented yet - printf(" \n"); - printf(" Devices that are not in a topology " - "are tried in order of highest to lowest integer priority\n"); - printf(" \n"); - printf(" \n"); -#endif - - printf(" \n", - PCMK_STONITH_HOST_ARGUMENT); - printf(" Some devices do not support the " - "standard 'port' parameter or may provide additional ones. Use " - "this to specify an alternate, device-specific, parameter " - "that should indicate the machine to be fenced. A value of " - "'%s' can be used to tell the cluster not to supply any " - "additional parameters.\n" - " \n", PCMK__VALUE_NONE); -#ifdef ENABLE_NLS - printf(_(" Some devices do not support the " - "standard 'port' parameter or may provide additional ones. Use " - "this to specify an alternate, device-specific, parameter " - "that should indicate the machine to be fenced. A value of " - "'%s' can be used to tell the cluster not to supply any " - "additional parameters.\n" - " \n"), PCMK__VALUE_NONE); -#endif - printf - (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); -#ifdef ENABLE_NLS - printf - (_(" Advanced use only: An alternate parameter to supply instead of 'port'\n")); -#endif - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_HOST_MAP); - printf - (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); -#ifdef ENABLE_NLS - printf - (_(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n")); -#endif - printf - (" A mapping of host names to ports numbers for devices that do not support host names.\n"); -#ifdef ENABLE_NLS - printf - (_(" A mapping of host names to ports numbers for devices that do not support host names.\n")); -#endif - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_HOST_LIST); - printf(" Eg. node1,node2,node3\n"); - printf(" A list of machines controlled by " - "this device (Optional unless %s=static-list).\n", - PCMK_STONITH_HOST_CHECK); - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_HOST_CHECK); - printf(" Allowed values: dynamic-list " - "(query the device via the 'list' command), static-list " - "(check the " PCMK_STONITH_HOST_LIST " attribute), status " - "(query the device via the 'status' command), " - PCMK__VALUE_NONE " (assume every device can fence every " - "machine)\n"); - printf - (" How to determine which machines are controlled by the device.\n"); - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_DELAY_MAX); - printf(" This prevents double fencing when " - "using slow devices such as sbd.\nUse this to enable a random " - "delay for fencing actions.\nThe overall delay is derived from " - "this random delay value adding a static delay so that the sum " - "is kept below the maximum delay.\n"); - printf(" Enable a delay of no more than the " - "time specified before executing fencing actions. Pacemaker " - "derives the overall delay by taking the value of " - PCMK_STONITH_DELAY_BASE " and adding a random delay value such " - "that the sum is kept below this maximum.\n"); - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_DELAY_BASE); - printf(" This enables a static delay for " - "fencing actions, which can help avoid \"death matches\" where " - "two nodes try to fence each other at the same time. If " - PCMK_STONITH_DELAY_MAX " is also used, a random delay will be " - "added such that the total delay is kept below that value.\n" - "This can be set to a single time value to apply to any node " - "targeted by this device (useful if a separate device is " - "configured for each target), or to a node map (for example, " - "\"node1:1s;node2:5\") to set a different value per target.\n" - " \n"); - printf(" Enable a base delay for " - "fencing actions and specify base delay value.\n"); - printf(" \n"); - printf(" \n"); - - printf(" \n", - PCMK_STONITH_ACTION_LIMIT); - printf - (" Cluster property concurrent-fencing=true needs to be configured first.\n" - "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); - printf - (" The maximum number of actions can be performed in parallel on this device\n"); - printf(" \n"); - printf(" \n"); - - - for (lpc = 0; lpc < PCMK__NELEM(actions); lpc++) { - printf(" \n", actions[lpc]); - printf - (" Some devices do not support the standard commands or may provide additional ones.\n" - "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", - actions[lpc]); - printf - (" Advanced use only: An alternate command to run instead of '%s'\n", - actions[lpc]); - printf(" \n", actions[lpc]); - printf(" \n"); - - printf(" \n", actions[lpc]); - printf - (" Some devices need much more/less time to complete than normal.\n" - "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", - actions[lpc]); - printf - (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", - actions[lpc]); - printf(" \n"); - printf(" \n"); - - printf(" \n", actions[lpc]); - printf(" Some devices do not support multiple connections." - " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." - " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." - "\n", actions[lpc]); - printf - (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", - actions[lpc]); - printf(" \n"); - printf(" \n"); - } - - printf(" \n"); - printf("\n"); + fencer_metadata(); return CRM_EX_OK; - } - + } if (optind != argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (crm_ipc_connect(old_instance)) { - /* IPC end-point already up */ + // IPC end-point already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { - /* not up or not authentic, we'll proceed either way */ + // not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); cluster = calloc(1, sizeof(crm_cluster_t)); CRM_ASSERT(cluster != NULL); if (stand_alone == FALSE) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } stonith_our_uname = strdup(cluster->uname); if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); pcmk__register_formats(NULL, formats); rc = pcmk__output_new(&out, "log", NULL, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { crm_err("Can't log resource details due to internal error: %s\n", pcmk_rc_str(rc)); crm_exit(CRM_EX_FATAL); } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_TRACE); fenced_data_set->priv = out; - /* Create the mainloop and run it... */ + // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); stonith_cleanup(); free(cluster->uuid); free(cluster->uname); free(cluster); pe_free_working_set(fenced_data_set); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); pcmk__unregister_formats(); crm_exit(CRM_EX_OK); } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 18e8da48f9..546799306a 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,293 +1,294 @@ /* * Copyright 2009-2022 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ char *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! Requested fencing delay. * Value -1 means disable any static/random fencing delays. */ int delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST_LEVEL_MAX]; } stonith_topology_t; void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, int mode); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_device_notification(const char *op, const pcmk__action_result_t *result, const char *desc); void fenced_send_level_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(crm_node_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); +void fencer_metadata(void); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; diff --git a/po/zh_CN.po b/po/zh_CN.po index a709950685..74abd9ca13 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1,289 +1,503 @@ # # Copyright 2003-2022 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU Lesser General Public License # version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. # #, fuzzy msgid "" msgstr "" "Project-Id-Version: Pacemaker 2\n" "Report-Msgid-Bugs-To: developers@clusterlabs.org\n" -"POT-Creation-Date: 2022-02-11 13:46+0800\n" +"POT-Creation-Date: 2022-09-13 22:43+0800\n" "PO-Revision-Date: 2021-11-08 11:04+0800\n" "Last-Translator: Vivi \n" "Language-Team: CHINESE \n" "Language: zh_CN\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" #: daemons/controld/controld_control.c:525 msgid "Pacemaker version on cluster node elected Designated Controller (DC)" msgstr "集群选定的控制器节点(DC)的 Pacemaker 版本" #: daemons/controld/controld_control.c:526 msgid "" "Includes a hash which identifies the exact changeset the code was built " "from. Used for diagnostic purposes." msgstr "它包含一个标识所构建代码变更版本的哈希值,其可用于诊断。" #: daemons/controld/controld_control.c:531 msgid "The messaging stack on which Pacemaker is currently running" msgstr "Pacemaker 正在使用的消息传输引擎" #: daemons/controld/controld_control.c:532 msgid "Used for informational and diagnostic purposes." msgstr "用于提供信息和诊断。" #: daemons/controld/controld_control.c:536 msgid "An arbitrary name for the cluster" msgstr "任意的集群名称" #: daemons/controld/controld_control.c:537 msgid "" "This optional value is mostly for users' convenience as desired in " "administration, but may also be used in Pacemaker configuration rules via " "the #cluster-name node attribute, and by higher-level tools and resource " "agents." -msgstr "该可选值主要是为了方便用户管理使用," -"也可以在pacemaker 配置规则中通过 #cluster-name 节点属性配置使用," -"也可以通过高级工具和资源代理使用。" +msgstr "" +"该可选值主要是为了方便用户管理使用,也可以在pacemaker 配置规则中通过 " +"#cluster-name 节点属性配置使用,也可以通过高级工具和资源代理使用。" #: daemons/controld/controld_control.c:545 msgid "How long to wait for a response from other nodes during start-up" msgstr "启动过程中等待其他节点响应的时间" #: daemons/controld/controld_control.c:546 msgid "" "The optimal value will depend on the speed and load of your network and the " "type of switches used." msgstr "其最佳值将取决于你的网络速度和负载以及所用交换机的类型。" #: daemons/controld/controld_control.c:551 msgid "" "Zero disables polling, while positive values are an interval in " "seconds(unless other units are specified, for example \"5min\")" msgstr "" "设置为0将禁用轮询,设置为正数将是以秒为单位的时间间隔(除非使用了其他单位,比" "如\"5min\"表示5分钟)" #: daemons/controld/controld_control.c:554 msgid "" "Polling interval to recheck cluster state and evaluate rules with date " "specifications" msgstr "重新检查集群状态并且评估具有日期规格的配置规则的轮询间隔" +#: daemons/controld/controld_control.c:556 +msgid "" +"Pacemaker is primarily event-driven, and looks ahead to know when to recheck " +"cluster state for failure timeouts and most time-based rules. However, it " +"will also recheck the cluster after this amount of inactivity, to evaluate " +"rules with date specifications and serve as a fail-safe for certain types of " +"scheduler bugs." +msgstr "" +"Pacemaker 主要是通过事件驱动的,并能预期重新检查集群状态以评估大多数基于时间" +"的规则以及过期的错误。然而无论如何,在集群经过该时间间隔的不活动状态后,它还" +"将重新检查集群,以评估具有日期规格的规则,并为某些类型的调度程序缺陷提供故障" +"保护。" + #: daemons/controld/controld_control.c:565 msgid "Maximum amount of system load that should be used by cluster nodes" msgstr "集群节点应该使用的最大系统负载量" #: daemons/controld/controld_control.c:566 msgid "" "The cluster will slow down its recovery process when the amount of system " "resources used (currently CPU) approaches this limit" msgstr "当使用的系统资源量(当前为CPU)接近此限制时,集群将减慢其恢复过程" #: daemons/controld/controld_control.c:572 msgid "" "Maximum number of jobs that can be scheduled per node (defaults to 2x cores)" msgstr "每个节点可以调度的最大作业数(默认为2x内核数)" #: daemons/controld/controld_control.c:576 msgid "How a cluster node should react if notified of its own fencing" msgstr "集群节点在收到针对自己的 fence 操作结果通知时应如何反应" #: daemons/controld/controld_control.c:577 msgid "" "A cluster node may receive notification of its own fencing if fencing is " "misconfigured, or if fabric fencing is in use that doesn't cut cluster " "communication. Allowed values are \"stop\" to attempt to immediately stop " "Pacemaker and stay stopped, or \"panic\" to attempt to immediately reboot " "the local node, falling back to stop on failure." -msgstr "如果有错误的 fence 配置,或者在使用 fabric fence 机制 (并不会切断集群通信)," -"则集群节点可能会收到针对自己的 fence 结果通知。允许的值为 \"stop\" 尝试立即停止 pacemaker " -"并保持停用状态,或者 \"panic\" 尝试立即重新启动本地节点,并在失败时返回执行stop。" +msgstr "" +"如果有错误的 fence 配置,或者在使用 fabric fence 机制 (并不会切断集群通信)," +"则集群节点可能会收到针对自己的 fence 结果通知。允许的值为 \"stop\" 尝试立即停" +"止 pacemaker 并保持停用状态,或者 \"panic\" 尝试立即重新启动本地节点,并在失败" +"时返回执行stop。" #: daemons/controld/controld_control.c:587 msgid "" "Declare an election failed if it is not decided within this much time. If " "you need to adjust this value, it probably indicates the presence of a bug." msgstr "" -"如果集群在本项设置时间内没有作出决定则宣布选举失败。如果您需要调整该值,这可能代表" -"存在某些缺陷。" +"如果集群在本项设置时间内没有作出决定则宣布选举失败。如果您需要调整该值,这可" +"能代表存在某些缺陷。" #: daemons/controld/controld_control.c:595 msgid "" "Exit immediately if shutdown does not complete within this much time. If you " "need to adjust this value, it probably indicates the presence of a bug." -msgstr "如果在这段时间内关机仍未完成,则立即退出。如果您需要调整该值,这可能代表" -"存在某些缺陷。" +msgstr "" +"如果在这段时间内关机仍未完成,则立即退出。如果您需要调整该值,这可能代表存在" +"某些缺陷。" #: daemons/controld/controld_control.c:603 #: daemons/controld/controld_control.c:610 msgid "" "If you need to adjust this value, it probably indicates the presence of a " "bug." msgstr "如果您需要调整该值,这可能代表存在某些缺陷。" #: daemons/controld/controld_control.c:616 msgid "" "*** Advanced Use Only *** Enabling this option will slow down cluster " "recovery under all conditions" -msgstr "" -"*** Advanced Use Only *** 启用此选项将在所有情况下减慢集群恢复的速度" +msgstr "*** Advanced Use Only *** 启用此选项将在所有情况下减慢集群恢复的速度" #: daemons/controld/controld_control.c:618 msgid "" "Delay cluster recovery for this much time to allow for additional events to " "occur. Useful if your configuration is sensitive to the order in which ping " "updates arrive." -msgstr "集群恢复将被推迟指定的时间间隔,以等待更多事件发生。" -"如果您的配置对 ping 更新到达的顺序很敏感,这就很有用" +msgstr "" +"集群恢复将被推迟指定的时间间隔,以等待更多事件发生。如果您的配置对 ping 更新" +"到达的顺序很敏感,这就很有用" #: daemons/controld/controld_control.c:625 +#, fuzzy msgid "" -"How long to wait before we can assume nodes are safely down when watchdog-" -"based self-fencing via SBD is in use" -msgstr "当基于 watchdog 的自我 fence 机制通过SBD 被执行时," -"我们可以假设节点安全关闭之前需要等待多长时间" +"How long before nodes can be assumed to be safely down when watchdog-based " +"self-fencing via SBD is in use" +msgstr "" +"当基于 watchdog 的自我 fence 机制通过SBD 被执行时,我们可以假设节点安全关闭之" +"前需要等待多长时间" #: daemons/controld/controld_control.c:627 msgid "" -"If nonzero, along with `have-watchdog=true` automatically set by the " -"cluster, when fencing is required, watchdog-based self-fencing will be " -"performed via SBD without requiring a fencing resource explicitly " -"configured. If `stonith-watchdog-timeout` is set to a positive value, unseen " -"nodes are assumed to self-fence within this much time. +WARNING:+ It must be " -"ensured that this value is larger than the `SBD_WATCHDOG_TIMEOUT` " -"environment variable on all nodes. Pacemaker verifies the settings " -"individually on all nodes and prevents startup or shuts down if configured " -"wrongly on the fly. It's strongly recommended that `SBD_WATCHDOG_TIMEOUT` is " -"set to the same value on all nodes. If `stonith-watchdog-timeout` is set to " -"a negative value, and `SBD_WATCHDOG_TIMEOUT` is set, twice that value will " -"be used. +WARNING:+ In this case, it's essential (currently not verified by " -"Pacemaker) that `SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes." -msgstr "" -"如果值非零,且集群设置了 `have-watchdog=true` ,当需要 fence 操作时,基于 watchdog 的自我 fence 机制将通过SBD执行," -"而不需要显式配置 fence 资源。如果 `stonith-watchdog-timeout` 被设为正值,则假定不可见的节点在这段时间内自我fence。" -" +WARNING:+ 必须确保该值大于所有节点上的`SBD_WATCHDOG_TIMEOUT` 环境变量。Pacemaker将在所有节点上单独验证设置," -"如发现有错误的动态配置,将防止节点启动或关闭。强烈建议在所有节点上将 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值。" -"如果 `stonith-watchdog-timeout` 设置为负值。并且设置了 `SBD_WATCHDOG_TIMEOUT` ,则将使用该值的两倍," -" +WARNING:+ 在这种情况下,必须将所有节点上 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值(目前没有通过pacemaker验证)。" - -#: daemons/controld/controld_control.c:648 +"If this is set to a positive value, lost nodes are assumed to self-fence " +"using watchdog-based SBD within this much time. This does not require a " +"fencing resource to be explicitly configured, though a fence_watchdog " +"resource can be configured, to limit use to specific nodes. If this is set " +"to 0 (the default), the cluster will never assume watchdog-based self-" +"fencing. If this is set to a negative value, the cluster will use twice the " +"local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is " +"positive, or otherwise treat this as 0. WARNING: When used, this timeout " +"must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-" +"based SBD, and Pacemaker will refuse to start on any of those nodes where " +"this is not true for the local value or SBD is not active. When this is set " +"to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on " +"all nodes that use SBD, otherwise data corruption or loss could occur." +msgstr "" + +#: daemons/controld/controld_control.c:646 msgid "" "How many times fencing can fail before it will no longer be immediately re-" "attempted on a target" msgstr "fence操作失败多少次会停止立即尝试" -#: daemons/controld/controld_control.c:556 + +#: daemons/fenced/pacemaker-fenced.c:1403 +msgid "Eg. node1,node2,node3" +msgstr "例如 node1,node2,node3" + +#: daemons/fenced/pacemaker-fenced.c:1404 msgid "" -"Pacemaker is primarily event-driven, and looks ahead to know when to recheck " -"cluster state for failure timeouts and most time-based rules. However, it " -"will also recheck the cluster after this amount of inactivity, to evaluate " -"rules with date specifications and serve as a fail-safe for certain types of " -"scheduler bugs." +"A list of machines controlled by this device (Optional unless " +"pcmk_host_list=static-list)" msgstr "" -"Pacemaker 主要是通过事件驱动的,并能预期重新检查集群状态以评估大多数基于时间" -"的规则以及过期的错误。然而无论如何,在集群经过该时间间隔的不活动状态后,它还" -"将重新检查集群,以评估具有日期规格的规则,并为某些类型的调度程序缺陷提供故障保护。" +"该设备控制的机器列表(可选参数,除非 pcmk_host_list 设置为 static-list)" -#: daemons/fenced/pacemaker-fenced.c:1464 -#, c-format +#: daemons/fenced/pacemaker-fenced.c:1409 +msgid "How to determine which machines are controlled by the device." +msgstr "如何确定设备控制哪些机器。" + +#: daemons/fenced/pacemaker-fenced.c:1410 msgid "" -" Instance attributes available for all \"stonith\"-" -"class resources and used by Pacemaker's fence daemon, formerly known as " -"stonithd\n" +"Allowed values: dynamic-list (query the device via the 'list' command), " +"static-list (check the pcmk_host_list attribute), status (query the device " +"via the 'status' command), none (assume every device can fence every machine)" msgstr "" -" 实例属性可用于所有stonith类资源,并由Pacemaker的fence" -"守护程序使用(以前称为stonithd)\n" +"允许的值:dynamic-list(通过'list'命令查询设备)," +"static-list(检查pcmk_host_list属性),status(通过'status'命令查询设备)," +"none(假设每个设备都可fence 每台机器 )" -#: daemons/fenced/pacemaker-fenced.c:1469 -#, c-format +#: daemons/fenced/pacemaker-fenced.c:1419 +#: daemons/fenced/pacemaker-fenced.c:1428 +msgid "Enable a base delay for fencing actions and specify base delay value." +msgstr "在执行 fencing 操作前启用不超过指定时间的延迟。" + +#: daemons/fenced/pacemaker-fenced.c:1420 +msgid "" +"Enable a delay of no more than the time specified before executing fencing " +"actions. Pacemaker derives the overall delay by taking the value of " +"pcmk_delay_base and adding a random delay value such that the sum is kept " +"below this maximum." +msgstr "" +"在执行 fencing 操作前启用不超过指定时间的延迟。" +" Pacemaker通过获取pcmk_delay_base的值并添加随机延迟值来得出总体延迟," +"从而使总和保持在此最大值以下。" + +#: daemons/fenced/pacemaker-fenced.c:1430 +msgid "" +"This enables a static delay for fencing actions, which can help avoid " +"\"death matches\" where two nodes try to fence each other at the same time. " +"If pcmk_delay_max is also used, a random delay will be added such that the " +"total delay is kept below that value.This can be set to a single time value " +"to apply to any node targeted by this device (useful if a separate device is " +"configured for each target), or to a node map (for example, \"node1:1s;" +"node2:5\") to set a different value per target." +msgstr "" +"这使fencing 操作启用静态延迟,这可以帮助避免" +"\"death matches\"即两个节点试图同时互相fence." +"如果还使用了pcmk_delay_max,则将添加随机延迟," +"以使总延迟保持在该值以下。可以将其设置为单个时间值," +"以应用于该设备针对的任何节点(适用于为每个目标分别配置了各自的设备的情况)" +", 或着设置为一个节点映射 (例如,\"node1:1s;node2:5\")从而为每个目标设置不同值。" + +#: daemons/fenced/pacemaker-fenced.c:1442 +msgid "" +"The maximum number of actions can be performed in parallel on this device" +msgstr "" +"可以在该设备上并发执行的最多操作数量" + +#: daemons/fenced/pacemaker-fenced.c:1443 +msgid "" +"Cluster property concurrent-fencing=true needs to be configured first.Then " +"use this to specify the maximum number of actions can be performed in " +"parallel on this device. -1 is unlimited." +msgstr "" +"需要首先配置集群属性 concurrent-fencing=true 。然后使用此参数指定可以在该设备上并发执行的最多操作数量。 -1 代表没有限制" + +#: daemons/fenced/pacemaker-fenced.c:1454 +msgid "" +"Advanced use only: Specify an alternate timeout to use for reboot actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于'reboot' 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1460 +msgid "" +"Advanced use only: The maximum number of times to retry the 'reboot' command " +"within the timeout period" +msgstr "" +"仅高级使用:在超时前重试'reboot'命令的最大次数" + +#: daemons/fenced/pacemaker-fenced.c:1461 msgid "" -" Instance attributes available for all \"stonith\"-" -"class resources\n" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'reboot' actions before giving up." msgstr "" -" 可用于所有stonith类资源的实例属性\n" +"一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +"因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'reboot' 操作的次数." + +#: daemons/fenced/pacemaker-fenced.c:1473 +msgid "" +"Advanced use only: Specify an alternate timeout to use for off actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于off 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1479 +msgid "" +"Advanced use only: The maximum number of times to retry the 'off' command " +"within the timeout period" +msgstr "" +"仅高级使用:在超时前重试'off'命令的最大次数" + +#: daemons/fenced/pacemaker-fenced.c:1480 +msgid "" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'off' actions before giving up." +msgstr "" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'off' 操作的次数." #: daemons/fenced/pacemaker-fenced.c:1492 -#, fuzzy, c-format msgid "" -" Some devices do not support the standard 'port' " -"parameter or may provide additional ones. Use this to specify an alternate, " -"device-specific, parameter that should indicate the machine to be fenced. A " -"value of '%s' can be used to tell the cluster not to supply any additional " -"parameters.\n" -" \n" -msgstr "" -" 某些设备可能不支持使用标准的'port'(端口)参数,也可" -"能会提供额外的端口参数。\n" -"使用此参数可以为需要fence(防护)的机器指定一个备用的,专用于该设备的参数,该参" -"数应指出要fence的机器。\n" -"使用值'%s'可用来告诉集群不提供任何额外的参数\n" -" \n" - -#: daemons/fenced/pacemaker-fenced.c:1504 -#, c-format +"Advanced use only: Specify an alternate timeout to use for on actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于on 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1498 msgid "" -" Advanced use only: An alternate parameter to " -"supply instead of 'port'\n" +"Advanced use only: The maximum number of times to retry the 'on' command " +"within the timeout period" msgstr "" -" Advanced use only:(仅限高级使用)备用参数可替" -"代'port'\n" +"仅高级使用:在超时前重试'on'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1515 -#, c-format +#: daemons/fenced/pacemaker-fenced.c:1499 msgid "" -" Eg. node1:1;node2:2,3 would tell the cluster to " -"use port 1 for node1 and ports 2 and 3 for node2\n" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'on' actions before giving up." msgstr "" -" 例如:pcmk_host_map=\"node:1;node2:2,3\"表示让集群" -"的节点node1使用端口1,节点node2使用端口2和端口3。\n" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'on' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1521 -#, c-format +#: daemons/fenced/pacemaker-fenced.c:1511 +msgid "" +"Advanced use only: Specify an alternate timeout to use for list actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于list 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1517 +msgid "" +"Advanced use only: The maximum number of times to retry the 'list' command " +"within the timeout period" +msgstr "" +"仅高级使用:在超时前重试'list'命令的最大次数" + +#: daemons/fenced/pacemaker-fenced.c:1518 msgid "" -" A mapping of host names to ports numbers for " -"devices that do not support host names.\n" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'list' actions before giving up." msgstr "" -" 为不支持主机名的设备提供主机名和端口号的映射\n" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'list' 操作的次数." + -#: lib/cib/cib_utils.c:558 +#: daemons/fenced/pacemaker-fenced.c:1530 +msgid "" +"Advanced use only: Specify an alternate timeout to use for monitor actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于monitor 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1531 +msgid "" +"Some devices need much more/less time to complete than normal.\n" +"Use this to specify an alternate, device-specific, timeout for 'monitor' " +"actions." +msgstr "" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'monitor' 操作的次数." + +#: daemons/fenced/pacemaker-fenced.c:1536 +msgid "" +"Advanced use only: The maximum number of times to retry the 'monitor' " +"command within the timeout period" +msgstr "" +"仅高级使用:在超时前重试'monitor'命令的最大次数" + +#: daemons/fenced/pacemaker-fenced.c:1537 +msgid "" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'monitor' actions before giving up." +msgstr "" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'monitor' 操作的次数." + + +#: daemons/fenced/pacemaker-fenced.c:1549 +msgid "" +"Advanced use only: Specify an alternate timeout to use for status actions " +"instead of stonith-timeout" +msgstr "" +"仅高级使用:指定用于status 操作的替代超时,而不是stonith-timeout" + +#: daemons/fenced/pacemaker-fenced.c:1555 +msgid "" +"Advanced use only: The maximum number of times to retry the 'status' command " +"within the timeout period" +msgstr "" +"仅高级使用:在超时前重试'status'命令的最大次数" + +#: daemons/fenced/pacemaker-fenced.c:1556 +msgid "" +"Some devices do not support multiple connections. Operations may 'fail' if " +"the device is busy with another task so Pacemaker will automatically retry " +"the operation, if there is time remaining. Use this option to alter the " +"number of times Pacemaker retries 'status' actions before giving up." +msgstr "" +" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ," +" 因此Pacemaker将自动重试(如果时间允许)。" +" 使用此选项更改Pacemaker在放弃之前重试'status' 操作的次数." + +#: lib/cib/cib_utils.c:559 msgid "Enable Access Control Lists (ACLs) for the CIB" msgstr "为CIB启用访问控制列表(ACL)" -#: lib/cib/cib_utils.c:564 +#: lib/cib/cib_utils.c:565 msgid "Maximum IPC message backlog before disconnecting a cluster daemon" msgstr "断开集群守护程序之前的最大IPC消息积压" -#: lib/cib/cib_utils.c:565 +#: lib/cib/cib_utils.c:566 msgid "" "Raise this if log has \"Evicting client\" messages for cluster daemon PIDs " "(a good value is the number of resources in the cluster multiplied by the " "number of nodes)." msgstr "" "如果日志中有针对集群守护程序PID的消息“Evicting client”,(则建议将值设为集群" "中的资源数量乘以节点数量)" -#: lib/common/options.c:591 +#: lib/common/options.c:621 msgid " Allowed values: " msgstr "" #: lib/pengine/common.c:119 msgid "Whether watchdog integration is enabled" msgstr "是否启用看门狗集成设置" -#: tools/crm_resource.c:1405 +#: tools/crm_resource.c:1408 #, fuzzy, c-format msgid "Metadata query for %s failed: %s" msgstr ",查询%s的元数据失败: %s\n" -#: tools/crm_resource.c:1411 +#: tools/crm_resource.c:1414 #, c-format msgid "'%s' is not a valid agent specification" msgstr "'%s' 是一个无效的代理" + +#~ msgid "" +#~ "If nonzero, along with `have-watchdog=true` automatically set by the " +#~ "cluster, when fencing is required, watchdog-based self-fencing will be " +#~ "performed via SBD without requiring a fencing resource explicitly " +#~ "configured. If `stonith-watchdog-timeout` is set to a positive value, " +#~ "unseen nodes are assumed to self-fence within this much time. +WARNING:+ " +#~ "It must be ensured that this value is larger than the " +#~ "`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. Pacemaker " +#~ "verifies the settings individually on all nodes and prevents startup or " +#~ "shuts down if configured wrongly on the fly. It's strongly recommended " +#~ "that `SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes. If " +#~ "`stonith-watchdog-timeout` is set to a negative value, and " +#~ "`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. +WARNING:+ " +#~ "In this case, it's essential (currently not verified by Pacemaker) that " +#~ "`SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes." +#~ msgstr "" +#~ "如果值非零,且集群设置了 `have-watchdog=true` ,当需要 fence 操作时,基于 " +#~ "watchdog 的自我 fence 机制将通过SBD执行,而不需要显式配置 fence 资源。如" +#~ "果 `stonith-watchdog-timeout` 被设为正值,则假定不可见的节点在这段时间内自" +#~ "我fence。 +WARNING:+ 必须确保该值大于所有节点上的`SBD_WATCHDOG_TIMEOUT` 环" +#~ "境变量。Pacemaker将在所有节点上单独验证设置,如发现有错误的动态配置,将防" +#~ "止节点启动或关闭。强烈建议在所有节点上将 `SBD_WATCHDOG_TIMEOUT` 设置为相同" +#~ "的值。如果 `stonith-watchdog-timeout` 设置为负值。并且设置了 " +#~ "`SBD_WATCHDOG_TIMEOUT` ,则将使用该值的两倍, +WARNING:+ 在这种情况下,必" +#~ "须将所有节点上 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值(目前没有通过pacemaker" +#~ "验证)。" + +#~ msgid "" +#~ " Instance attributes available for all \"stonith\"-" +#~ "class resources and used by Pacemaker's fence daemon, formerly known as " +#~ "stonithd\n" +#~ msgstr "" +#~ " 可用于所有stonith类资源的实例属性,并由Pacemaker的" +#~ "fence守护程序使用(以前称为stonithd)\n" + +#~ msgid "" +#~ " Instance attributes available for all \"stonith\"-" +#~ "class resources\n" +#~ msgstr "" +#~ " 可用于所有stonith类资源的实例属性\n"