diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
index 7ba1f6de2a..495bf0f0dd 100644
--- a/cts/lab/patterns.py
+++ b/cts/lab/patterns.py
@@ -1,414 +1,415 @@
""" Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)
"""
__copyright__ = "Copyright 2008-2022 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import sys, os
from cts.CTSvars import *
patternvariants = {}
class BasePatterns(object):
def __init__(self, name):
self.name = name
patternvariants[name] = self
self.ignore = [
"avoid confusing Valgrind",
# Logging bug in some versions of libvirtd
r"libvirtd.*: internal error: Failed to parse PCI config address",
# pcs can log this when node is fenced, but fencing is OK in some
# tests (and we will catch it in pacemaker logs when not OK)
r"pcs.daemon:No response from: .* request: get_configs, error:",
]
self.BadNews = []
self.components = {}
self.commands = {
"StatusCmd" : "crmadmin -t 60 -S %s 2>/dev/null",
"CibQuery" : "cibadmin -Ql",
"CibAddXml" : "cibadmin --modify -c --xml-text %s",
"CibDelXpath" : "cibadmin --delete --xpath %s",
# 300,000 == 5 minutes
"RscRunning" : CTSvars.CRM_DAEMON_DIR + "/cts-exec-helper -R -r %s",
"CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
# tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
# tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
# tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
# tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
"ReduceCommCmd" : "",
"RestoreCommCmd" : "tc qdisc del dev lo root",
"MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''",
"MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
"StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
}
self.search = {
"Pat:DC_IDLE" : "pacemaker-controld.*State transition.*-> S_IDLE",
# This won't work if we have multiple partitions
"Pat:Local_started" : "%s\W.*controller successfully started",
"Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC",
"Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE",
"Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN",
"Pat:They_stopped" : "%s\W.*LOST:.* %s ",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:They_up" : "%s %s\W.*OVERRIDE THIS PATTERN",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
"Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s",
"Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
"Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
"Pat:Fencing_active" : r"stonith resource .* is active on 2 nodes (attempting recovery)",
"Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
"Pat:RscOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok",
"Pat:RscOpFail" : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ",
"Pat:CloneOpFail" : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ",
"Pat:RscRemoteOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
"Pat:NodeFenced" : r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
}
def get_component(self, key):
if key in self.components:
return self.components[key]
print("Unknown component '%s' for %s" % (key, self.name))
return []
def get_patterns(self, key):
if key == "BadNews":
return self.BadNews
elif key == "BadNewsIgnore":
return self.ignore
elif key == "Commands":
return self.commands
elif key == "Search":
return self.search
elif key == "Components":
return self.components
def __getitem__(self, key):
if key == "Name":
return self.name
elif key in self.commands:
return self.commands[key]
elif key in self.search:
return self.search[key]
else:
print("Unknown template '%s' for %s" % (key, self.name))
return None
class crm_corosync(BasePatterns):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
BasePatterns.__init__(self, name)
self.commands.update({
"StartCmd" : "service corosync start && service pacemaker start",
"StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop",
"EpochCmd" : "crm_node -e",
"QuorumCmd" : "crm_node -q",
"PartitionCmd" : "crm_node -p",
})
self.search.update({
# Close enough ... "Corosync Cluster Engine exiting normally" isn't
# printed reliably.
"Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines",
"Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_dead" : "pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_up" : "\W%s\W.*pacemaker-controld.*Node %s state is now member",
"Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(",
# "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes()
"Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)",
"Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit",
"Pat:InfraUp" : "%s\W.*corosync.*Initializing transport",
"Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker",
})
self.ignore = self.ignore + [
r"crm_mon:",
r"crmadmin:",
r"update_trace_data",
r"async_notify:.*strange, client not found",
r"Parse error: Ignoring unknown option .*nodename",
r"error.*: Operation 'reboot' .* using FencingFail returned ",
r"getinfo response error: 1$",
r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)",
]
self.BadNews = [
r"[^(]error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting",
r"schedulerd.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r":global_timer_callback",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)",
r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
r"Peer is not part of our cluster",
r"We appear to be in an election loop",
r"Unknown node -> we will not deliver message",
r"(Blackbox dump requested|Problem detected)",
r"pacemakerd.*Could not connect to Cluster Configuration Database API",
r"Receiving messages from a node we think is dead",
r"share the same cluster nodeid",
r"share the same name",
#r"crm_ipc_send:.*Request .* failed",
#r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
# Not inherently bad, but worth tracking
#r"No need to invoke the TE",
#r"ping.*: DEBUG: Updated connected = 0",
#r"Digest mis-match:",
r"pacemaker-controld:.*Transition failed: terminated",
r"Local CIB .* differs from .*:",
r"warn.*:\s*Continuing but .* will NOT be used",
r"warn.*:\s*Cluster configuration file .* is corrupt",
#r"Executing .* fencing operation",
r"Election storm",
r"stalled the FSA with pending inputs",
]
self.components["common-ignore"] = [
r"Pending action:",
r"resource( was|s were) active at shutdown",
r"pending LRM operations at shutdown",
r"Lost connection to the CIB manager",
r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported",
r"pacemaker-controld.*:\s*Performing A_EXIT_1 - forcefully exiting ",
r".*:\s*Requesting fencing \([^)]+\) of node ",
r"(Blackbox dump requested|Problem detected)",
]
self.components["corosync-ignore"] = [
r"Could not connect to Corosync CFG: CS_ERR_LIBRARY",
r"error:.*Connection to the CPG API failed: Library error",
r"\[[0-9]+\] exited with status [0-9]+ \(",
r"\[[0-9]+\] terminated with signal 15",
r"pacemaker-based.*error:.*Corosync connection lost",
r"pacemaker-fenced.*error:.*Corosync connection terminated",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
r"pacemaker-controld.*error:.*Could not recover from internal error",
r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
r"error:.*cib_(shm|rw) IPC provider disconnected while waiting",
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"crit: Fencing daemon connection failed",
# This is overbroad, but we don't have a way to say that only
# certain transition errors are acceptable (if the fencer respawns,
# fence devices may appear multiply active). We have to rely on
# other causes of a transition error logging their own error
# message, which is the usual practice.
r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["corosync"] = [
# We expect each daemon to lose its cluster connection.
# However, if the CIB manager loses its connection first,
# it's possible for another daemon to lose that connection and
# exit before losing the cluster connection.
r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer",
r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer",
r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"schedulerd.*Scheduling node .* for fencing",
r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK",
]
self.components["pacemaker-based"] = [
r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102",
r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1",
r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit",
r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit",
r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit",
r"pacemaker-.* Connection to cib_.* (failed|closed)",
r"pacemaker-attrd.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy",
r"pacemaker-controld.* State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self.components["pacemaker-based-ignore"] = [
r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error (Lost connection to fencer)",
+ r"pacemaker-controld.*:Could not connect to attrd: Connection refused",
# This is overbroad, but we don't have a way to say that only
# certain transition errors are acceptable (if the fencer respawns,
# fence devices may appear multiply active). We have to rely on
# other causes of a transition error logging their own error
# message, which is the usual practice.
r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["pacemaker-execd"] = [
r"pacemaker-controld.*Connection to executor failed",
r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1",
r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit",
r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
]
self.components["pacemaker-execd-ignore"] = [
r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)",
r"pacemaker-(attrd|controld).*Could not execute alert",
]
self.components["pacemaker-controld"] = [
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling node .* for fencing",
# Only if the node wasn't the DC: "State transition S_IDLE",
"State transition .* -> S_IDLE",
]
self.components["pacemaker-controld-ignore"] = []
self.components["pacemaker-attrd"] = []
self.components["pacemaker-attrd-ignore"] = []
self.components["pacemaker-schedulerd"] = [
"State transition .* S_RECOVERY",
r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit",
r"pacemaker-controld\[[0-9]+\] exited with status 1 \(",
r"Connection to the scheduler failed",
"pacemaker-controld.*I_ERROR.*save_cib_contents",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
"pacemaker-controld.*Could not recover from internal error",
]
self.components["pacemaker-schedulerd-ignore"] = [
r"Connection to pengine.* (failed|closed)",
]
self.components["pacemaker-fenced"] = [
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"Fencing daemon connection failed",
r"pacemaker-controld.*Fencer successfully connected",
]
self.components["pacemaker-fenced-ignore"] = [
r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"crit:.*Fencing daemon connection failed",
r"error:.*Fencer connection failed \(will retry\)",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error (Lost connection to fencer)",
# This is overbroad, but we don't have a way to say that only
# certain transition errors are acceptable (if the fencer respawns,
# fence devices may appear multiply active). We have to rely on
# other causes of a transition error logging their own error
# message, which is the usual practice.
r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["pacemaker-fenced-ignore"].extend(self.components["common-ignore"])
class crm_corosync_docker(crm_corosync):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
crm_corosync.__init__(self, name)
self.commands.update({
"StartCmd" : "pcmk_start",
"StopCmd" : "pcmk_stop",
})
class PatternSelector(object):
def __init__(self, name=None):
self.name = name
self.base = BasePatterns("crm-base")
if not name:
crm_corosync("crm-corosync")
elif name == "crm-corosync":
crm_corosync(name)
elif name == "crm-corosync-docker":
crm_corosync_docker(name)
def get_variant(self, variant):
if variant in patternvariants:
return patternvariants[variant]
print("defaulting to crm-base for %s" % variant)
return self.base
def get_patterns(self, variant, kind):
return self.get_variant(variant).get_patterns(kind)
def get_template(self, variant, key):
v = self.get_variant(variant)
return v[key]
def get_component(self, variant, kind):
return self.get_variant(variant).get_component(kind)
def __getitem__(self, key):
return self.get_template(self.name, key)
# python cts/CTSpatt.py -k crm-corosync -t StartCmd
if __name__ == '__main__':
pdir=os.path.dirname(sys.path[0])
sys.path.insert(0, pdir) # So that things work from the source directory
kind=None
template=None
skipthis=None
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-k" or args[i] == "--kind":
skipthis=1
kind = args[i+1]
elif args[i] == "-t" or args[i] == "--template":
skipthis=1
template = args[i+1]
else:
print("Illegal argument " + args[i])
print(PatternSelector(kind)[template])
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
index 424dbbcc5d..b2a6864f4e 100644
--- a/daemons/pacemakerd/pacemakerd.h
+++ b/daemons/pacemakerd/pacemakerd.h
@@ -1,37 +1,35 @@
/*
* Copyright 2010-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#define MAX_RESPAWN 100
extern GMainLoop *mainloop;
extern struct qb_ipcs_service_handlers mcp_ipc_callbacks;
extern const char *pacemakerd_state;
extern gboolean running_with_sbd;
extern unsigned int shutdown_complete_state_reported_to;
extern gboolean shutdown_complete_state_reported_client_closed;
extern crm_trigger_t *shutdown_trigger;
extern crm_trigger_t *startup_trigger;
extern time_t subdaemon_check_progress;
gboolean mcp_read_config(void);
gboolean cluster_connect_cfg(void);
void cluster_disconnect_cfg(void);
int find_and_track_existing_processes(void);
gboolean init_children_processes(void *user_data);
void restart_cluster_subdaemons(void);
void pcmk_shutdown(int nsig);
-void pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id);
-void pcmk_handle_shutdown_request(pcmk__client_t *c, xmlNode *msg, uint32_t id, uint32_t flags);
void pcmkd_shutdown_corosync(void);
bool pcmkd_corosync_connected(void);
diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c
index 6f0e48bec2..7ed9899d67 100644
--- a/daemons/pacemakerd/pcmkd_messages.c
+++ b/daemons/pacemakerd/pcmkd_messages.c
@@ -1,202 +1,278 @@
/*
* Copyright 2010-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include "pacemakerd.h"
+#include
#include
#include
#include
#include
#include
#include
#include
-void
-pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
+static GHashTable *pcmkd_handlers = NULL;
+
+static xmlNode *
+handle_node_cache_request(pcmk__request_t *request)
+{
+ crm_trace("Ignoring request from client %s to purge node "
+ "because peer cache is not used",
+ pcmk__client_name(request->ipc_client));
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_OK);
+ return NULL;
+}
+
+static xmlNode *
+handle_ping_request(pcmk__request_t *request)
{
+ xmlNode *msg = request->xml;
+
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
/* Pinged for status */
crm_trace("Pinged from " F_CRM_SYS_FROM "='%s' " F_CRM_ORIGIN "='%s'",
pcmk__s(from, ""),
pcmk__s(crm_element_value(msg, F_CRM_ORIGIN), ""));
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
+
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
crm_xml_add_ll(ping, XML_ATTR_TSTAMP,
(long long) subdaemon_check_progress);
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
reply = create_reply(msg, ping);
+
free_xml(ping);
- if (reply) {
- if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
- pcmk_rc_ok) {
- crm_err("Failed sending ping reply to client %s",
- pcmk__client_name(c));
- }
- free_xml(reply);
+
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Failed building ping reply for client %s",
+ pcmk__client_name(request->ipc_client));
} else {
- crm_err("Failed building ping reply for client %s",
- pcmk__client_name(c));
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
+
/* just proceed state on sbd pinging us */
if (from && strstr(from, "sbd")) {
if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) {
if (pcmk__get_sbd_sync_resource_startup()) {
crm_notice("Shutdown-complete-state passed to SBD.");
}
- shutdown_complete_state_reported_to = c->pid;
+
+ shutdown_complete_state_reported_to = request->ipc_client->pid;
+
} else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) {
crm_notice("Received startup-trigger from SBD.");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
mainloop_set_trigger(startup_trigger);
}
}
+
+ return reply;
}
-void
-pcmk_handle_shutdown_request(pcmk__client_t *c, xmlNode *msg, uint32_t id, uint32_t flags)
+static xmlNode *
+handle_shutdown_request(pcmk__request_t *request)
{
+ xmlNode *msg = request->xml;
+
xmlNode *shutdown = NULL;
xmlNode *reply = NULL;
/* Only allow privileged users (i.e. root or hacluster) to shut down
* Pacemaker from the command line (or direct IPC), so that other users
* are forced to go through the CIB and have ACLs applied.
*/
- bool allowed = pcmk_is_set(c->flags, pcmk__client_privileged);
+ bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged);
+
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INDETERMINATE);
shutdown = create_xml_node(NULL, XML_CIB_ATTR_SHUTDOWN);
if (allowed) {
crm_notice("Shutting down in response to IPC request %s from %s",
crm_element_value(msg, F_CRM_REFERENCE),
crm_element_value(msg, F_CRM_ORIGIN));
crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_OK);
} else {
crm_warn("Ignoring shutdown request from unprivileged client %s",
- pcmk__client_name(c));
+ pcmk__client_name(request->ipc_client));
crm_xml_add_int(shutdown, XML_LRM_ATTR_OPSTATUS, CRM_EX_INSUFFICIENT_PRIV);
}
reply = create_reply(msg, shutdown);
free_xml(shutdown);
- if (reply) {
- if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) != pcmk_rc_ok) {
- crm_err("Failed sending shutdown reply to client %s",
- pcmk__client_name(c));
- }
- free_xml(reply);
+
+ if (reply == NULL) {
+ pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "Failed building shutdown reply for client %s",
+ pcmk__client_name(request->ipc_client));
} else {
- crm_err("Failed building shutdown reply for client %s",
- pcmk__client_name(c));
+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
if (allowed) {
pcmk_shutdown(15);
}
+
+ return reply;
+}
+
+static xmlNode *
+handle_unknown_request(pcmk__request_t *request)
+{
+ pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
+ "ack", NULL, CRM_EX_INVALID_PARAM);
+
+ pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Unknown IPC request type '%s' (bug?)",
+ pcmk__client_name(request->ipc_client));
+ return NULL;
+}
+
+static void
+pcmkd_register_handlers(void)
+{
+ pcmk__server_command_t handlers[] = {
+ { CRM_OP_RM_NODE_CACHE, handle_node_cache_request },
+ { CRM_OP_PING, handle_ping_request },
+ { CRM_OP_QUIT, handle_shutdown_request },
+ { NULL, handle_unknown_request },
+ };
+
+ pcmkd_handlers = pcmk__register_handlers(handlers);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
if (shutdown_complete_state_reported_to == client->pid) {
shutdown_complete_state_reported_client_closed = TRUE;
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
}
}
pcmk__free_client(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pcmk_ipc_closed(c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
- const char *task = NULL;
xmlNode *msg = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
CRM_CHECK(c != NULL, return 0);
+ if (pcmkd_handlers == NULL) {
+ pcmkd_register_handlers();
+ }
+
msg = pcmk__client_data2xml(c, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
return 0;
- }
- task = crm_element_value(msg, F_CRM_TASK);
- if (pcmk__str_empty(task)) {
- crm_debug("IPC command from client %s is missing task",
- pcmk__client_name(c));
- pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INVALID_PARAM);
+ } else {
+ char *log_msg = NULL;
+ const char *reason = NULL;
+ xmlNode *reply = NULL;
- } else if (pcmk__str_eq(task, CRM_OP_QUIT, pcmk__str_none)) {
- pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
- pcmk_handle_shutdown_request(c, msg, id, flags);
+ pcmk__request_t request = {
+ .ipc_client = c,
+ .ipc_id = id,
+ .ipc_flags = flags,
+ .peer = NULL,
+ .xml = msg,
+ .call_options = 0,
+ .result = PCMK__UNKNOWN_RESULT,
+ };
- } else if (pcmk__str_eq(task, CRM_OP_RM_NODE_CACHE, pcmk__str_none)) {
- crm_trace("Ignoring request from client %s to purge node "
- "because peer cache is not used", pcmk__client_name(c));
- pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_OK);
+ request.op = crm_element_value_copy(request.xml, F_CRM_TASK);
+ CRM_CHECK(request.op != NULL, return 0);
- } else if (pcmk__str_eq(task, CRM_OP_PING, pcmk__str_none)) {
- pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
- pcmk_handle_ping_request(c, msg, id);
+ reply = pcmk__process_request(&request, pcmkd_handlers);
- } else {
- crm_debug("Unrecognized IPC command '%s' from client %s",
- task, pcmk__client_name(c));
- pcmk__ipc_send_ack(c, id, flags, "ack", NULL, CRM_EX_INVALID_PARAM);
+ if (reply != NULL) {
+ pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event);
+ free_xml(reply);
+ }
+
+ reason = request.result.exit_reason;
+
+ log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
+ request.op, pcmk__request_origin_type(&request),
+ pcmk__request_origin(&request),
+ pcmk_exec_status_str(request.result.execution_status),
+ (reason == NULL)? "" : " (",
+ (reason == NULL)? "" : reason,
+ (reason == NULL)? "" : ")");
+
+ if (!pcmk__result_ok(&request.result)) {
+ crm_warn("%s", log_msg);
+ } else {
+ crm_debug("%s", log_msg);
+ }
+
+ free(log_msg);
+ pcmk__reset_request(&request);
}
free_xml(msg);
return 0;
}
struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = NULL,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
index 5496cd12c0..3d7e7ff904 100644
--- a/lib/common/ipc_attrd.c
+++ b/lib/common/ipc_attrd.c
@@ -1,569 +1,569 @@
/*
* Copyright 2011-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include
#include
#include
#include
#include
#include
#include
#include "crmcommon_private.h"
static void
set_pairs_data(pcmk__attrd_api_reply_t *data, xmlNode *msg_data)
{
const char *name = NULL;
pcmk__attrd_query_pair_t *pair;
name = crm_element_value(msg_data, PCMK__XA_ATTR_NAME);
for (xmlNode *node = first_named_child(msg_data, XML_CIB_TAG_NODE);
node != NULL; node = crm_next_same_xml(node)) {
pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
CRM_ASSERT(pair != NULL);
pair->node = crm_element_value(node, PCMK__XA_ATTR_NODE_NAME);
pair->name = name;
pair->value = crm_element_value(node, PCMK__XA_ATTR_VALUE);
data->data.pairs = g_list_prepend(data->data.pairs, pair);
}
}
static bool
reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
{
const char *command = crm_element_value(request, PCMK__XA_TASK);
return pcmk__str_any_of(command, PCMK__ATTRD_CMD_UPDATE,
PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY,
PCMK__ATTRD_CMD_QUERY, NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
const char *value = NULL;
crm_exit_t status = CRM_EX_OK;
pcmk__attrd_api_reply_t reply_data = {
pcmk__attrd_reply_unknown
};
if (pcmk__str_eq((const char *) reply->name, "ack", pcmk__str_none)) {
return false;
}
/* Do some basic validation of the reply */
value = crm_element_value(reply, F_TYPE);
if (pcmk__str_empty(value)
|| !pcmk__str_eq(value, T_ATTRD, pcmk__str_none)) {
crm_info("Unrecognizable message from attribute manager: "
"message type '%s' not '" T_ATTRD "'", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
value = crm_element_value(reply, F_SUBTYPE);
/* Only the query command gets a reply for now. NULL counts as query for
* backward compatibility with attribute managers <2.1.3 that didn't set it.
*/
if (pcmk__str_eq(value, PCMK__ATTRD_CMD_QUERY, pcmk__str_null_matches)) {
if (!xmlHasProp(reply, (pcmkXmlStr) PCMK__XA_ATTR_NAME)) {
status = ENXIO; // Most likely, the attribute doesn't exist
goto done;
}
reply_data.reply_type = pcmk__attrd_reply_query;
set_pairs_data(&reply_data, reply);
} else {
crm_info("Unrecognizable message from attribute manager: "
"message subtype '%s' unknown", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
/* Free any reply data that was allocated */
if (reply_data.data.pairs) {
g_list_free_full(reply_data.data.pairs, free);
}
return false;
}
pcmk__ipc_methods_t *
pcmk__attrd_api_methods()
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = NULL;
cmds->free_data = NULL;
cmds->post_connect = NULL;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
}
return cmds;
}
/*!
* \internal
* \brief Create a generic pacemaker-attrd operation
*
* \param[in] user_name If not NULL, ACL user to set for operation
*
* \return XML of pacemaker-attrd operation
*/
static xmlNode *
create_attrd_op(const char *user_name)
{
xmlNode *attrd_op = create_xml_node(NULL, __func__);
crm_xml_add(attrd_op, F_TYPE, T_ATTRD);
crm_xml_add(attrd_op, F_ORIG, (crm_system_name? crm_system_name: "unknown"));
crm_xml_add(attrd_op, PCMK__XA_ATTR_USER, user_name);
return attrd_op;
}
static int
create_api(pcmk_ipc_api_t **api)
{
int rc = pcmk_new_ipc_api(api, pcmk_ipc_attrd);
if (rc != pcmk_rc_ok) {
- crm_err("error: Could not connect to attrd: %s", pcmk_rc_str(rc));
+ crm_err("Could not connect to attrd: %s", pcmk_rc_str(rc));
}
return rc;
}
static void
destroy_api(pcmk_ipc_api_t *api)
{
pcmk_disconnect_ipc(api);
pcmk_free_ipc_api(api);
api = NULL;
}
static int
connect_and_send_attrd_request(pcmk_ipc_api_t *api, xmlNode *request)
{
int rc = pcmk_rc_ok;
int max = 5;
while (max > 0) {
crm_info("Connecting to cluster... %d retries remaining", max);
rc = pcmk_connect_ipc(api, pcmk_ipc_dispatch_sync);
if (rc == pcmk_rc_ok) {
rc = pcmk__send_ipc_request(api, request);
break;
} else if (rc == EAGAIN || rc == EALREADY) {
sleep(5 - max);
max--;
} else {
- crm_err("error: Could not connect to attrd: %s", pcmk_rc_str(rc));
+ crm_err("Could not connect to attrd: %s", pcmk_rc_str(rc));
break;
}
}
return rc;
}
static int
send_attrd_request(pcmk_ipc_api_t *api, xmlNode *request)
{
return pcmk__send_ipc_request(api, request);
}
int
pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
const char *resource, const char *operation,
const char *interval_spec, const char *user_name,
uint32_t options)
{
int rc = pcmk_rc_ok;
xmlNode *request = create_attrd_op(user_name);
const char *interval_desc = NULL;
const char *op_desc = NULL;
const char *target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE);
crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node);
crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource);
crm_xml_add(request, PCMK__XA_ATTR_OPERATION, operation);
crm_xml_add(request, PCMK__XA_ATTR_INTERVAL, interval_spec);
crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE,
pcmk_is_set(options, pcmk__node_attr_remote));
if (api == NULL) {
rc = create_api(&api);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = connect_and_send_attrd_request(api, request);
destroy_api(api);
} else if (!pcmk_ipc_is_connected(api)) {
rc = connect_and_send_attrd_request(api, request);
} else {
rc = send_attrd_request(api, request);
}
free_xml(request);
if (operation) {
interval_desc = interval_spec? interval_spec : "nonrecurring";
op_desc = operation;
} else {
interval_desc = "all";
op_desc = "operations";
}
crm_debug("Asked pacemaker-attrd to clear failure of %s %s for %s on %s: %s (%d)",
interval_desc, op_desc, (resource? resource : "all resources"),
(node? node : "all nodes"), pcmk_rc_str(rc), rc);
return rc;
}
int
pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name,
uint32_t options)
{
const char *target = NULL;
if (name == NULL) {
return EINVAL;
}
target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
/* Make sure the right update option is set. */
options &= ~pcmk__node_attr_delay;
options |= pcmk__node_attr_value;
return pcmk__attrd_api_update(api, node, name, NULL, NULL, NULL, NULL, options);
}
int
pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node)
{
int rc = pcmk_rc_ok;
xmlNode *request = NULL;
const char *display_host = (node ? node : "localhost");
const char *target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
request = create_attrd_op(NULL);
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE);
crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node);
if (api == NULL) {
rc = create_api(&api);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = connect_and_send_attrd_request(api, request);
destroy_api(api);
} else if (!pcmk_ipc_is_connected(api)) {
rc = connect_and_send_attrd_request(api, request);
} else {
rc = send_attrd_request(api, request);
}
free_xml(request);
crm_debug("Asked pacemaker-attrd to purge %s: %s (%d)",
display_host, pcmk_rc_str(rc), rc);
return rc;
}
int
pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
uint32_t options)
{
int rc = pcmk_rc_ok;
xmlNode *request = NULL;
const char *target = NULL;
if (name == NULL) {
return EINVAL;
}
target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
request = create_attrd_op(NULL);
crm_xml_add(request, PCMK__XA_ATTR_NAME, name);
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_QUERY);
crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node);
rc = send_attrd_request(api, request);
free_xml(request);
if (node) {
crm_debug("Queried pacemaker-attrd for %s on %s: %s (%d)",
name, node, pcmk_rc_str(rc), rc);
} else {
crm_debug("Queried pacemaker-attrd for %s: %s (%d)",
name, pcmk_rc_str(rc), rc);
}
return rc;
}
int
pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node)
{
int rc = pcmk_rc_ok;
xmlNode *request = NULL;
const char *display_host = (node ? node : "localhost");
const char *target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
request = create_attrd_op(NULL);
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH);
crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node);
if (api == NULL) {
rc = create_api(&api);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = connect_and_send_attrd_request(api, request);
destroy_api(api);
} else if (!pcmk_ipc_is_connected(api)) {
rc = connect_and_send_attrd_request(api, request);
} else {
rc = send_attrd_request(api, request);
}
free_xml(request);
crm_debug("Asked pacemaker-attrd to refresh %s: %s (%d)",
display_host, pcmk_rc_str(rc), rc);
return rc;
}
static void
add_op_attr(xmlNode *op, uint32_t options)
{
if (pcmk_all_flags_set(options, pcmk__node_attr_value | pcmk__node_attr_delay)) {
crm_xml_add(op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE_BOTH);
} else if (pcmk_is_set(options, pcmk__node_attr_value)) {
crm_xml_add(op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
} else if (pcmk_is_set(options, pcmk__node_attr_delay)) {
crm_xml_add(op, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE_DELAY);
}
}
static void
populate_update_op(xmlNode *op, const char *node, const char *name, const char *value,
const char *dampen, const char *set, uint32_t options)
{
if (pcmk_is_set(options, pcmk__node_attr_pattern)) {
crm_xml_add(op, PCMK__XA_ATTR_PATTERN, name);
} else {
crm_xml_add(op, PCMK__XA_ATTR_NAME, name);
}
add_op_attr(op, options);
crm_xml_add(op, PCMK__XA_ATTR_VALUE, value);
crm_xml_add(op, PCMK__XA_ATTR_DAMPENING, dampen);
crm_xml_add(op, PCMK__XA_ATTR_NODE_NAME, node);
crm_xml_add(op, PCMK__XA_ATTR_SET, set);
crm_xml_add_int(op, PCMK__XA_ATTR_IS_REMOTE,
pcmk_is_set(options, pcmk__node_attr_remote));
crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE,
pcmk_is_set(options, pcmk__node_attr_private));
}
int
pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name,
const char *value, const char *dampen, const char *set,
const char *user_name, uint32_t options)
{
int rc = pcmk_rc_ok;
xmlNode *request = NULL;
const char *display_host = (node ? node : "localhost");
const char *target = NULL;
if (name == NULL) {
return EINVAL;
}
target = pcmk__node_attr_target(node);
if (target != NULL) {
node = target;
}
request = create_attrd_op(user_name);
populate_update_op(request, node, name, value, dampen, set, options);
if (api == NULL) {
rc = create_api(&api);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = connect_and_send_attrd_request(api, request);
destroy_api(api);
} else if (!pcmk_ipc_is_connected(api)) {
rc = connect_and_send_attrd_request(api, request);
} else {
rc = send_attrd_request(api, request);
}
free_xml(request);
crm_debug("Asked pacemaker-attrd to update %s on %s: %s (%d)",
name, display_host, pcmk_rc_str(rc), rc);
return rc;
}
int
pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampen,
const char *set, const char *user_name,
uint32_t options)
{
int rc = pcmk_rc_ok;
xmlNode *request = NULL;
if (attrs == NULL) {
return EINVAL;
}
/* There are two different ways of handling a list of attributes:
*
* (1) For messages originating from some command line tool, we have to send
* them one at a time. In this loop, we just call pcmk__attrd_api_update
* for each, letting it deal with creating the API object if it doesn't
* already exist.
*
* The reason we can't use a single message in this case is that we can't
* trust that the server supports it. Remote nodes could be involved
* here, and there's no guarantee that a newer client running on a remote
* node is talking to (or proxied through) a cluster node with a newer
* attrd. We also can't just try sending a single message and then falling
* back on multiple. There's no handshake with the attrd server to
* determine its version. And then we would need to do that fallback in the
* dispatch function for this to work for all connection types (mainloop in
* particular), and at that point we won't know what the original message
* was in order to break it apart and resend as individual messages.
*
* (2) For messages between daemons, we can be assured that the local attrd
* will support the new message and that it can send to the other attrds
* as one request or split up according to the minimum supported version.
*/
for (GList *iter = attrs; iter != NULL; iter = iter->next) {
pcmk__attrd_query_pair_t *pair = (pcmk__attrd_query_pair_t *) iter->data;
if (pcmk__is_daemon) {
const char *target = NULL;
xmlNode *child = NULL;
/* First time through this loop - create the basic request. */
if (request == NULL) {
request = create_attrd_op(user_name);
add_op_attr(request, options);
}
/* Add a child node for this operation. We add the task to the top
* level XML node so attrd_ipc_dispatch doesn't need changes. And
* then we also add the task to each child node in populate_update_op
* so attrd_client_update knows what form of update is taking place.
*/
child = create_xml_node(request, XML_ATTR_OP);
target = pcmk__node_attr_target(pair->node);
if (target != NULL) {
pair->node = target;
}
populate_update_op(child, pair->node, pair->name, pair->value, dampen,
set, options);
} else {
rc = pcmk__attrd_api_update(api, pair->node, pair->name, pair->value,
dampen, set, user_name, options);
}
}
/* If we were doing multiple attributes at once, we still need to send the
* request. Do that now, creating and destroying the API object if needed.
*/
if (pcmk__is_daemon) {
bool created_api = false;
if (api == NULL) {
rc = create_api(&api);
if (rc != pcmk_rc_ok) {
return rc;
}
created_api = true;
}
rc = connect_and_send_attrd_request(api, request);
free_xml(request);
if (created_api) {
destroy_api(api);
}
}
return rc;
}