diff --git a/crmd/fsa_defines.h b/crmd/fsa_defines.h old mode 100644 new mode 100755 index db82801972..a1ed3e5295 --- a/crmd/fsa_defines.h +++ b/crmd/fsa_defines.h @@ -1,479 +1,479 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FSA_DEFINES__H #define FSA_DEFINES__H /*====================================== * States the DC/CRMd can be in *======================================*/ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_FINALIZE_JOIN,/* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in crmd/slave mode */ S_POLICY_ENGINE,/* Determin the next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_STARTING, /* we are just starting out */ S_PENDING, /* we are not a full/active member yet */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE,/* Attempt to make the calculated next stable * state of the cluster a reality */ S_HALT, /* Freeze - dont do anything * Something ad happened that needs the admin to fix * Wait for I_ELECTION */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL /* This is an illegal FSA state */ /* (must be last) */ }; #define MAXSTATE S_ILLEGAL /* A state diagram can be constructed from the dc_fsa.dot with the following command: dot -Tpng crmd_fsa.dot > crmd_fsa.png Description: Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the CCM which indicates the election algorithm needs to run. If the election algorithm is triggered we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but arent anymore). The election algorithm has been adapted from http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR521 Loosly known as the Bully Algorithm, its major points are: - Election is initiated by any node (N) notices that the coordinator is no longer responding - Concurrent multiple elections are possible - Algorithm + N sends ELECTION messages to all nodes that occur earlier in the CCM's membership list. + If no one responds, N wins and becomes coordinator + N sends out COORDINATOR messages to all other nodes in the partition + If one of higher-ups answers, it takes over. N is done. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldnt do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authorative on itself and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the Policy Engine. It is possible that between invoking the Policy Engine and recieving an answer, that we recieve more input. In this case we would discard the orginal result and invoke it again. Once we are satisfied with the output from the Policy Engine we enter S_TRANSITION_ENGINE and feed the Policy Engine's output to the Transition Engine who attempts to make the Policy Engine's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is by asking the DC for permission and waiting it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the CRMd/DC can relay messages for its sub-systems, but outbound messages (from sub-systems) should probably be blocked until S_INTEGRATION (for the DC case) or the join protocol has completed (for the CRMd case) */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others a synthesised based on * the "register" (see below) and the contents or source of messages. * * At this point, my plan is to have a loop of some sort that keeps * going until recieving I_NULL * *======================================*/ enum crmd_fsa_input { /* 0 */ I_NULL, /* Nothing happened */ /* 1 */ I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_PE_CALC, /* The Policy Engine needs to be invoked */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ /* 9 */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATED, I_FINALIZED, I_NODE_JOIN, /* A node has entered the cluster */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_TE_SUCCESS, /* Some non-resource, non-ccm action is required * of us, eg. ping */ /* 20 */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_STOP, /* We have been told to shutdown */ I_TERMINATE, /* Actually exit */ I_STARTUP, I_PE_SUCCESS, /* The action completed successfully */ I_JOIN_OFFER, /* The DC is offering membership */ I_JOIN_REQUEST, /* The client is requesting membership */ I_JOIN_RESULT, /* If not the DC: The result of a join request * Else: A client is responding with its local state info */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we cant do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ I_LRM_EVENT, /* 30 */ I_PENDING, I_HALT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL /* This is an illegal value for an FSA input */ /* (must be last) */ }; #define MAXINPUT I_ILLEGAL #define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but I can * forsee that this may not always be the case. So I've spilt them up so * that if they ever do need to be called independantly in the future, it * wont be a problem. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected LRM. * *======================================*/ /* Dont do anything */ #define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than starting the CIB, * connecting to HA or the CCM) that might be needed as part * of the startup. */ #define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ #define A_STARTED 0x0000000000000002ULL /* Connect to Heartbeat */ #define A_HA_CONNECT 0x0000000000000004ULL #define A_HA_DISCONNECT 0x0000000000000008ULL #define A_INTEGRATE_TIMER_START 0x0000000000000010ULL #define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL #define A_FINALIZE_TIMER_START 0x0000000000000040ULL #define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL /* -- Election actions -- */ #define A_DC_TIMER_START 0x0000000000000100ULL #define A_DC_TIMER_STOP 0x0000000000000200ULL #define A_ELECTION_COUNT 0x0000000000000400ULL #define A_ELECTION_VOTE 0x0000000000000800ULL #define A_ELECTION_START 0x0000000000001000ULL /* -- Message processing -- */ /* Process the queue of requests */ #define A_MSG_PROCESS 0x0000000000002000ULL /* Send the message to the correct recipient */ #define A_MSG_ROUTE 0x0000000000004000ULL /* Send a welcome message to new node(s) */ #define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL /* -- Server Join protocol actions -- */ /* Send a welcome message to all nodes */ #define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL /* Process the remote node's ack of our join message */ #define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL /* Send out the reults of the Join phase */ #define A_DC_JOIN_FINALIZE 0x0000000000040000ULL /* Send out the reults of the Join phase */ #define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL /* -- Client Join protocol actions -- */ #define A_CL_JOIN_QUERY 0x0000000000100000ULL #define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL /* Request membership to the DC list */ #define A_CL_JOIN_REQUEST 0x0000000000400000ULL /* Did the DC accept or reject the request */ #define A_CL_JOIN_RESULT 0x0000000000800000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ #define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ #define A_DC_RELEASE 0x0000000002000000ULL /* */ #define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ #define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ #define A_SHUTDOWN 0x0000000010000000ULL #define A_STOP 0x0000000020000000ULL #define A_EXIT_0 0x0000000040000000ULL #define A_EXIT_1 0x0000000080000000ULL #define A_SHUTDOWN_REQ 0x0000000100000000ULL #define A_ELECTION_CHECK 0x0000000200000000ULL #define A_DC_JOIN_FINAL 0x0000000400000000ULL /* -- CCM actions -- */ #define A_CCM_CONNECT 0x0000001000000000ULL #define A_CCM_DISCONNECT 0x0000002000000000ULL /* -- CIB actions -- */ #define A_CIB_START 0x0000020000000000ULL #define A_CIB_STOP 0x0000040000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determin * tasks that can be performed in parallel, execute them, wait * for replies and then determin the next set until the new * state is reached or no further tasks can be taken. */ #define A_TE_INVOKE 0x0000100000000000ULL #define A_TE_START 0x0000200000000000ULL #define A_TE_STOP 0x0000400000000000ULL #define A_TE_CANCEL 0x0000800000000000ULL #define A_TE_HALT 0x0001000000000000ULL /* -- Policy Engine actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ #define A_PE_INVOKE 0x0002000000000000ULL #define A_PE_START 0x0004000000000000ULL #define A_PE_STOP 0x0008000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ #define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ #define A_UPDATE_NODESTATUS 0x0020000000000000ULL #define A_CIB_BUMPGEN 0x0040000000000000ULL #define A_READCONFIG 0x0080000000000000ULL /* -- LRM Actions -- */ /* Connect to the Local Resource Manager */ #define A_LRM_CONNECT 0x0100000000000000ULL /* Disconnect from the Local Resource Manager */ #define A_LRM_DISCONNECT 0x0200000000000000ULL #define A_LRM_INVOKE 0x0400000000000000ULL #define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ #define A_LOG 0x1000000000000000ULL #define A_ERROR 0x2000000000000000ULL #define A_WARN 0x4000000000000000ULL #define O_EXIT (A_SHUTDOWN|A_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) #define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) #define O_DC_TIMER_RESTART (A_NOTHING) #define O_PE_RESTART (A_PE_START|A_PE_STOP) #define O_TE_RESTART (A_TE_START|A_TE_STOP) #define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) - +#define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ #define R_THE_DC 0x00000001ULL /* Are we the DC? */ #define R_STARTING 0x00000002ULL /* Are we starting up? */ #define R_SHUTDOWN 0x00000004ULL /* Are we trying to shut down? */ #define R_STAYDOWN 0x00000008ULL /* Should we restart? */ #define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ #define R_READ_CONFIG 0x00000040ULL #define R_INVOKE_PE 0x00000080ULL /* Does the PE needed to be invoked at the next appropriate point? */ #define R_CIB_CONNECTED 0x00000100ULL /* Is the CIB connected? */ #define R_PE_CONNECTED 0x00000200ULL /* Is the Policy Engine connected? */ #define R_TE_CONNECTED 0x00000400ULL /* Is the Transition Engine connected? */ #define R_LRM_CONNECTED 0x00000800ULL /* Is the Local Resource Manager connected? */ #define R_CIB_REQUIRED 0x00001000ULL /* Is the CIB required? */ #define R_PE_REQUIRED 0x00002000ULL /* Is the Policy Engine required? */ #define R_TE_REQUIRED 0x00004000ULL /* Is the Transition Engine required? */ #define R_ST_REQUIRED 0x00008000ULL /* Is the Stonith daemon required? */ #define R_CIB_DONE 0x00010000ULL /* Have we calculated the CIB? */ #define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ #define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */ #define R_CCM_DATA 0x00100000ULL /* Have we got CCM data yet */ #define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ #define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ #define R_CCM_DISCONNECTED 0x00800000ULL /* did we sign out of our own accord */ #define R_REQ_PEND 0x01000000ULL /* Are there Requests waiting for processing? */ #define R_PE_PEND 0x02000000ULL /* Has the PE been invoked and we're awaiting a reply? */ #define R_TE_PEND 0x04000000ULL /* Has the TE been invoked and we're awaiting completion? */ #define R_RESP_PEND 0x08000000ULL /* Do we have clients waiting on a response? if so perhaps we shouldnt stop yet */ #define R_IN_TRANSITION 0x10000000ULL /* */ #define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all * resources in preparation for * shutting down */ #define R_IN_RECOVERY 0x80000000ULL enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CCM_CALLBACK, C_CRMD_STATUS_CALLBACK, C_LRM_OP_CALLBACK, C_LRM_MONITOR_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_HEARTBEAT_FAILED, C_SUBSYSTEM_CONNECT, C_HA_DISCONNECT, C_FSA_INTERNAL, C_ILLEGAL }; extern const char *fsa_input2string(enum crmd_fsa_input input); extern const char *fsa_state2string(enum crmd_fsa_state state); extern const char *fsa_cause2string(enum crmd_fsa_cause cause); extern const char *fsa_action2string(long long action); #endif diff --git a/crmd/fsa_matrix.h b/crmd/fsa_matrix.h index 7b95766277..75fab7112c 100644 --- a/crmd/fsa_matrix.h +++ b/crmd/fsa_matrix.h @@ -1,1229 +1,1230 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef XML_FSA_MATRIX__H #define XML_FSA_MATRIX__H /* * The state transition table. The rows are inputs, and * the columns are states. */ const enum crmd_fsa_state crmd_fsa_state [MAXINPUT][MAXSTATE] = { /* Got an I_NULL */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_CIB_OP */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_CIB_UPDATE */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_DC_TIMEOUT */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_ELECTION, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RECOVERY, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_ELECTION, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_ELECTION, }, /* Got an I_ELECTION */ { /* S_IDLE ==> */ S_ELECTION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_ELECTION, /* S_FINALIZE_JOIN ==> */ S_ELECTION, /* S_NOT_DC ==> */ S_ELECTION, /* S_POLICY_ENGINE ==> */ S_ELECTION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_ELECTION, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_ELECTION, /* S_HALT ==> */ S_HALT, }, /* Got an I_PE_CALC */ { /* S_IDLE ==> */ S_POLICY_ENGINE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_DC */ { /* S_IDLE ==> */ S_RELEASE_DC, /* S_ELECTION ==> */ S_RELEASE_DC, /* S_INTEGRATION ==> */ S_RELEASE_DC, /* S_FINALIZE_JOIN ==> */ S_RELEASE_DC, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RELEASE_DC, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RELEASE_DC, /* S_HALT ==> */ S_HALT, }, /* Got an I_ELECTION_DC */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_INTEGRATION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_INTEGRATION, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_ERROR */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RECOVERY, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RECOVERY, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_TERMINATE, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_RECOVERY, }, /* Got an I_FAIL */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RELEASE_DC, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_STOPPING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_RELEASE_DC, }, /* Got an I_INTEGRATED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_FINALIZE_JOIN, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_FINALIZED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_POLICY_ENGINE, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_RECOVERY, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_NODE_JOIN */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_NOT_DC */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_NOT_DC, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_RECOVERED */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_PENDING, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_FAIL */ { /* S_IDLE ==> */ S_STOPPING, /* S_ELECTION ==> */ S_STOPPING, /* S_INTEGRATION ==> */ S_STOPPING, /* S_FINALIZE_JOIN ==> */ S_STOPPING, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_STOPPING, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_STOPPING, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_STOPPING, /* S_HALT ==> */ S_HALT, }, /* Got an I_RELEASE_SUCCESS */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_PENDING, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_RESTART */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_TE_SUCCESS */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_IDLE, /* S_HALT ==> */ S_HALT, }, /* Got an I_ROUTER */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_SHUTDOWN */ { /* S_IDLE ==> */ S_POLICY_ENGINE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_POLICY_ENGINE, /* S_HALT ==> */ S_ELECTION, }, /* Got an I_STOP */ { /* S_IDLE ==> */ S_STOPPING, /* S_ELECTION ==> */ S_STOPPING, /* S_INTEGRATION ==> */ S_STOPPING, /* S_FINALIZE_JOIN ==> */ S_STOPPING, /* S_NOT_DC ==> */ S_STOPPING, /* S_POLICY_ENGINE ==> */ S_STOPPING, /* S_RECOVERY ==> */ S_STOPPING, /* S_RELEASE_DC ==> */ S_STOPPING, /* S_STARTING ==> */ S_STOPPING, /* S_PENDING ==> */ S_STOPPING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_STOPPING, /* S_HALT ==> */ S_STOPPING, }, /* Got an I_TERMINATE */ { /* S_IDLE ==> */ S_TERMINATE, /* S_ELECTION ==> */ S_TERMINATE, /* S_INTEGRATION ==> */ S_TERMINATE, /* S_FINALIZE_JOIN ==> */ S_TERMINATE, /* S_NOT_DC ==> */ S_TERMINATE, /* S_POLICY_ENGINE ==> */ S_TERMINATE, /* S_RECOVERY ==> */ S_TERMINATE, /* S_RELEASE_DC ==> */ S_TERMINATE, /* S_STARTING ==> */ S_TERMINATE, /* S_PENDING ==> */ S_TERMINATE, /* S_STOPPING ==> */ S_TERMINATE, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TERMINATE, /* S_HALT ==> */ S_TERMINATE, }, /* Got an I_STARTUP */ { /* S_IDLE ==> */ S_RECOVERY, /* S_ELECTION ==> */ S_RECOVERY, /* S_INTEGRATION ==> */ S_RECOVERY, /* S_FINALIZE_JOIN ==> */ S_RECOVERY, /* S_NOT_DC ==> */ S_RECOVERY, /* S_POLICY_ENGINE ==> */ S_RECOVERY, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_RECOVERY, /* S_HALT ==> */ S_HALT, }, /* Got an I_PE_SUCCESS */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_OFFER */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_REQUEST */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_INTEGRATION, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_JOIN_RESULT */ { /* S_IDLE ==> */ S_INTEGRATION, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_INTEGRATION, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_RECOVERY, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_INTEGRATION, /* S_HALT ==> */ S_HALT, }, /* Got an I_WAIT_FOR_EVENT */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_DC_HEARTBEAT */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_LRM_EVENT */ { /* S_IDLE ==> */ S_IDLE, /* S_ELECTION ==> */ S_ELECTION, /* S_INTEGRATION ==> */ S_INTEGRATION, /* S_FINALIZE_JOIN ==> */ S_FINALIZE_JOIN, /* S_NOT_DC ==> */ S_NOT_DC, /* S_POLICY_ENGINE ==> */ S_POLICY_ENGINE, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_TRANSITION_ENGINE, /* S_HALT ==> */ S_HALT, }, /* Got an I_PENDING */ { /* S_IDLE ==> */ S_PENDING, /* S_ELECTION ==> */ S_PENDING, /* S_INTEGRATION ==> */ S_PENDING, /* S_FINALIZE_JOIN ==> */ S_PENDING, /* S_NOT_DC ==> */ S_PENDING, /* S_POLICY_ENGINE ==> */ S_PENDING, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_PENDING, /* S_PENDING ==> */ S_PENDING, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_PENDING, /* S_HALT ==> */ S_HALT, }, /* Got an I_HALT */ { /* S_IDLE ==> */ S_HALT, /* S_ELECTION ==> */ S_HALT, /* S_INTEGRATION ==> */ S_HALT, /* S_FINALIZE_JOIN ==> */ S_HALT, /* S_NOT_DC ==> */ S_HALT, /* S_POLICY_ENGINE ==> */ S_HALT, /* S_RECOVERY ==> */ S_RECOVERY, /* S_RELEASE_DC ==> */ S_RELEASE_DC, /* S_STARTING ==> */ S_STARTING, /* S_PENDING ==> */ S_HALT, /* S_STOPPING ==> */ S_STOPPING, /* S_TERMINATE ==> */ S_TERMINATE, /* S_TRANSITION_ENGINE ==> */ S_HALT, /* S_HALT ==> */ S_HALT, }, }; /* * The action table. Each entry is a set of actions to take or-ed * together. Like the state table, the rows are inputs, and * the columns are states. */ /* NOTE: In the fsa, the actions are extracted then state is updated. */ const long long crmd_fsa_actions [MAXINPUT][MAXSTATE] = { /* Got an I_NULL */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_NOTHING, /* S_INTEGRATION ==> */ A_NOTHING, /* S_FINALIZE_JOIN ==> */ A_NOTHING, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_NOTHING, /* S_PENDING ==> */ A_NOTHING, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_NOTHING, }, /* Got an I_CIB_OP */ { /* S_IDLE ==> */ A_ERROR, /* S_ELECTION ==> */ A_ERROR, /* S_INTEGRATION ==> */ A_ERROR, /* S_FINALIZE_JOIN ==> */ A_ERROR, /* S_NOT_DC ==> */ A_ERROR, /* S_POLICY_ENGINE ==> */ A_ERROR, /* S_RECOVERY ==> */ A_ERROR, /* S_RELEASE_DC ==> */ A_ERROR, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_ERROR, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_ERROR, /* S_HALT ==> */ A_ERROR, }, /* Got an I_CIB_UPDATE */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_LOG, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_DC_TIMEOUT */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_ELECTION_VOTE|A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_ELECTION_VOTE|A_WARN, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_TE_CANCEL|A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_ELECTION */ { /* S_IDLE ==> */ A_ELECTION_VOTE, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_ELECTION_VOTE, /* S_FINALIZE_JOIN ==> */ A_ELECTION_VOTE, /* S_NOT_DC ==> */ A_ELECTION_VOTE, /* S_POLICY_ENGINE ==> */ A_ELECTION_VOTE, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_ELECTION_VOTE, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_ELECTION_VOTE, /* S_HALT ==> */ A_ELECTION_VOTE, }, /* Got an I_PE_CALC */ { /* S_IDLE ==> */ A_PE_INVOKE, /* S_ELECTION ==> */ A_NOTHING, /* S_INTEGRATION ==> */ A_NOTHING, /* S_FINALIZE_JOIN ==> */ A_NOTHING, /* S_NOT_DC ==> */ A_ERROR, /* S_POLICY_ENGINE ==> */ A_PE_INVOKE, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_ERROR, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_PE_INVOKE, /* S_HALT ==> */ A_ERROR, }, /* Got an I_RELEASE_DC */ { /* S_IDLE ==> */ O_RELEASE, /* S_ELECTION ==> */ O_RELEASE, /* S_INTEGRATION ==> */ O_RELEASE|A_WARN, /* S_FINALIZE_JOIN ==> */ O_RELEASE|A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ O_RELEASE|A_WARN, /* S_RECOVERY ==> */ O_RELEASE, /* S_RELEASE_DC ==> */ O_RELEASE|A_WARN, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ O_RELEASE|A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_ELECTION_DC */ { /* S_IDLE ==> */ A_WARN|A_ELECTION_VOTE, /* S_ELECTION ==> */ A_LOG|A_DC_TAKEOVER|A_PE_START|A_TE_START|A_DC_JOIN_OFFER_ALL|A_DC_TIMER_STOP, /* S_INTEGRATION ==> */ A_WARN|A_ELECTION_VOTE|A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_WARN|A_ELECTION_VOTE|A_DC_JOIN_OFFER_ALL, /* S_NOT_DC ==> */ A_LOG|A_ELECTION_VOTE, /* S_POLICY_ENGINE ==> */ A_WARN|A_ELECTION_VOTE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN|A_ELECTION_VOTE, /* S_STARTING ==> */ A_LOG|A_WARN, /* S_PENDING ==> */ A_LOG|A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN|A_ELECTION_VOTE, /* S_HALT ==> */ A_WARN, }, /* Got an I_ERROR */ { /* S_IDLE ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, /* S_ELECTION ==> */ A_ERROR|A_RECOVER|O_RELEASE, /* S_INTEGRATION ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, /* S_FINALIZE_JOIN ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, /* S_NOT_DC ==> */ A_ERROR|A_RECOVER, /* S_POLICY_ENGINE ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, /* S_RECOVERY ==> */ A_ERROR|O_RELEASE, /* S_RELEASE_DC ==> */ A_ERROR|A_RECOVER, /* S_STARTING ==> */ A_ERROR|A_RECOVER, /* S_PENDING ==> */ A_ERROR|A_RECOVER, /* S_STOPPING ==> */ A_ERROR|A_EXIT_1, /* S_TERMINATE ==> */ A_ERROR|A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, /* S_HALT ==> */ A_ERROR|A_RECOVER|O_RELEASE|A_ELECTION_START, }, /* Got an I_FAIL */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN|A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_WARN|A_DC_JOIN_OFFER_ALL, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN|A_DC_JOIN_OFFER_ALL|A_TE_CANCEL, /* S_RECOVERY ==> */ A_WARN|O_RELEASE, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN|A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_WARN|O_TE_RESTART|A_RECOVER, /* S_HALT ==> */ A_WARN, }, /* Got an I_INTEGRATED */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_FINALIZE, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_WARN, }, /* Got an I_FINALIZED */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_FINAL|A_TE_CANCEL, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_NOTHING, /* S_HALT ==> */ A_WARN, }, /* Got an I_NODE_JOIN */ { /* S_IDLE ==> */ A_TE_HALT|A_DC_JOIN_OFFER_ONE, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_OFFER_ONE, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_TE_HALT|A_DC_JOIN_OFFER_ONE, /* S_HALT ==> */ A_WARN, }, /* Got an I_NOT_DC */ { /* S_IDLE ==> */ A_WARN|O_RELEASE, /* S_ELECTION ==> */ A_ERROR|A_ELECTION_START|A_DC_TIMER_STOP, /* S_INTEGRATION ==> */ A_ERROR|O_RELEASE, /* S_FINALIZE_JOIN ==> */ A_ERROR|O_RELEASE, /* S_NOT_DC ==> */ A_LOG, /* S_POLICY_ENGINE ==> */ A_ERROR|O_RELEASE, /* S_RECOVERY ==> */ A_ERROR|O_RELEASE, /* S_RELEASE_DC ==> */ A_ERROR|O_RELEASE, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_LOG|A_DC_TIMER_STOP, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_ERROR|O_RELEASE, /* S_HALT ==> */ A_WARN, }, /* Got an I_RECOVERED */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RELEASE_FAIL */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_NOTHING, /* S_RECOVERY ==> */ A_WARN|A_SHUTDOWN_REQ, /* S_RELEASE_DC ==> */ A_NOTHING, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RELEASE_SUCCESS */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_RESTART */ { /* S_IDLE ==> */ A_NOTHING, /* S_ELECTION ==> */ A_LOG|A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_LOG|A_DC_JOIN_OFFER_ALL, /* S_FINALIZE_JOIN ==> */ A_LOG|A_DC_JOIN_FINALIZE, /* S_NOT_DC ==> */ A_LOG|A_NOTHING, /* S_POLICY_ENGINE ==> */ A_LOG|A_PE_INVOKE, /* S_RECOVERY ==> */ A_LOG|A_RECOVER|O_RELEASE, /* S_RELEASE_DC ==> */ A_LOG|O_RELEASE, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_LOG|A_TE_INVOKE, /* S_HALT ==> */ A_WARN, }, /* Got an I_TE_SUCCESS */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_ERROR, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_RECOVER|A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_ERROR, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_ROUTER */ { /* S_IDLE ==> */ A_MSG_ROUTE, /* S_ELECTION ==> */ A_MSG_ROUTE, /* S_INTEGRATION ==> */ A_MSG_ROUTE, /* S_FINALIZE_JOIN ==> */ A_MSG_ROUTE, /* S_NOT_DC ==> */ A_MSG_ROUTE, /* S_POLICY_ENGINE ==> */ A_MSG_ROUTE, /* S_RECOVERY ==> */ A_MSG_ROUTE, /* S_RELEASE_DC ==> */ A_MSG_ROUTE, /* S_STARTING ==> */ A_MSG_ROUTE, /* S_PENDING ==> */ A_MSG_ROUTE, /* S_STOPPING ==> */ A_MSG_ROUTE, /* S_TERMINATE ==> */ A_MSG_ROUTE, /* S_TRANSITION_ENGINE ==> */ A_MSG_ROUTE, /* S_HALT ==> */ A_WARN|A_MSG_ROUTE, }, /* Got an I_SHUTDOWN */ { /* S_IDLE ==> */ A_LOG|A_SHUTDOWN_REQ, /* S_ELECTION ==> */ A_LOG|A_SHUTDOWN_REQ|A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_LOG|A_SHUTDOWN_REQ, /* S_FINALIZE_JOIN ==> */ A_LOG|A_SHUTDOWN_REQ, /* S_NOT_DC ==> */ A_SHUTDOWN_REQ, /* S_POLICY_ENGINE ==> */ A_LOG|A_SHUTDOWN_REQ, /* S_RECOVERY ==> */ A_WARN|O_EXIT|O_RELEASE, /* S_RELEASE_DC ==> */ A_WARN|A_SHUTDOWN_REQ, /* S_STARTING ==> */ A_WARN|O_EXIT, /* S_PENDING ==> */ A_SHUTDOWN_REQ, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_WARN|A_SHUTDOWN_REQ, /* S_HALT ==> */ A_WARN|A_ELECTION_START|A_SHUTDOWN_REQ, }, /* Got an I_STOP */ { /* S_IDLE ==> */ A_ERROR|O_RELEASE|O_EXIT, /* S_ELECTION ==> */ O_RELEASE|O_EXIT, /* S_INTEGRATION ==> */ A_ERROR|O_RELEASE|O_EXIT, /* S_FINALIZE_JOIN ==> */ A_ERROR|O_RELEASE|O_EXIT, /* S_NOT_DC ==> */ O_EXIT, /* S_POLICY_ENGINE ==> */ A_WARN|O_RELEASE|O_EXIT, /* S_RECOVERY ==> */ A_ERROR|O_RELEASE|O_EXIT, /* S_RELEASE_DC ==> */ A_ERROR|O_RELEASE|O_EXIT, /* S_STARTING ==> */ O_EXIT, /* S_PENDING ==> */ O_EXIT, /* S_STOPPING ==> */ O_EXIT, /* S_TERMINATE ==> */ A_ERROR|A_EXIT_1, /* S_TRANSITION_ENGINE ==> */ A_LOG|O_RELEASE|O_EXIT, /* S_HALT ==> */ O_RELEASE|O_EXIT|A_WARN, }, /* Got an I_TERMINATE */ { /* S_IDLE ==> */ A_ERROR|O_EXIT, /* S_ELECTION ==> */ A_ERROR|O_EXIT, /* S_INTEGRATION ==> */ A_ERROR|O_EXIT, /* S_FINALIZE_JOIN ==> */ A_ERROR|O_EXIT, /* S_NOT_DC ==> */ A_ERROR|O_EXIT, /* S_POLICY_ENGINE ==> */ A_ERROR|O_EXIT, /* S_RECOVERY ==> */ A_ERROR|O_EXIT, /* S_RELEASE_DC ==> */ A_ERROR|O_EXIT, /* S_STARTING ==> */ O_EXIT, /* S_PENDING ==> */ A_ERROR|O_EXIT, /* S_STOPPING ==> */ O_EXIT, /* S_TERMINATE ==> */ O_EXIT, /* S_TRANSITION_ENGINE ==> */ A_ERROR|O_EXIT, /* S_HALT ==> */ A_ERROR|O_EXIT, }, /* Got an I_STARTUP */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_LOG|A_STARTUP|A_CIB_START|A_LRM_CONNECT|A_CCM_CONNECT|A_HA_CONNECT|A_READCONFIG|A_STARTED, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, /* Got an I_PE_SUCCESS */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_LOG|A_TE_INVOKE, /* S_RECOVERY ==> */ A_RECOVER|A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_OFFER */ { /* S_IDLE ==> */ A_WARN|A_CL_JOIN_REQUEST, /* S_ELECTION ==> */ A_WARN|A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_CL_JOIN_REQUEST, /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_REQUEST, /* S_NOT_DC ==> */ A_CL_JOIN_REQUEST|A_DC_TIMER_STOP, /* S_POLICY_ENGINE ==> */ A_WARN|A_CL_JOIN_REQUEST, /* S_RECOVERY ==> */ A_WARN|A_CL_JOIN_REQUEST|A_DC_TIMER_STOP, /* S_RELEASE_DC ==> */ A_WARN|A_CL_JOIN_REQUEST|A_DC_TIMER_STOP, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_CL_JOIN_REQUEST|A_DC_TIMER_STOP, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_WARN|A_CL_JOIN_REQUEST, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_REQUEST */ { /* S_IDLE ==> */ A_DC_JOIN_OFFER_ONE, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_DC_JOIN_PROCESS_REQ, /* S_FINALIZE_JOIN ==> */ A_DC_JOIN_OFFER_ONE, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_DC_JOIN_OFFER_ONE, /* S_HALT ==> */ A_WARN, }, /* Got an I_JOIN_RESULT */ { /* S_IDLE ==> */ A_ERROR|A_TE_HALT|A_DC_JOIN_OFFER_ALL, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_LOG, /* S_FINALIZE_JOIN ==> */ A_CL_JOIN_RESULT|A_DC_JOIN_PROCESS_ACK, /* S_NOT_DC ==> */ A_ERROR|A_CL_JOIN_ANNOUNCE, /* S_POLICY_ENGINE ==> */ A_ERROR|A_TE_HALT|A_DC_JOIN_OFFER_ALL, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_ERROR, /* S_PENDING ==> */ A_CL_JOIN_RESULT, /* S_STOPPING ==> */ A_ERROR, /* S_TERMINATE ==> */ A_ERROR, /* S_TRANSITION_ENGINE ==> */ A_ERROR|A_TE_HALT|A_DC_JOIN_OFFER_ALL, /* S_HALT ==> */ A_WARN, }, /* Got an I_WAIT_FOR_EVENT */ { /* S_IDLE ==> */ A_LOG, /* S_ELECTION ==> */ A_LOG, /* S_INTEGRATION ==> */ A_LOG, /* S_FINALIZE_JOIN ==> */ A_LOG, /* S_NOT_DC ==> */ A_LOG, /* S_POLICY_ENGINE ==> */ A_LOG, /* S_RECOVERY ==> */ A_LOG, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG, /* S_STOPPING ==> */ A_LOG, /* S_TERMINATE ==> */ A_LOG, /* S_TRANSITION_ENGINE ==> */ A_LOG, /* S_HALT ==> */ A_WARN, }, /* Got an I_DC_HEARTBEAT */ { /* S_IDLE ==> */ A_ERROR, /* S_ELECTION ==> */ A_WARN|A_ELECTION_VOTE, /* S_INTEGRATION ==> */ A_ERROR, /* S_FINALIZE_JOIN ==> */ A_ERROR, /* S_NOT_DC ==> */ A_NOTHING, /* S_POLICY_ENGINE ==> */ A_ERROR, /* S_RECOVERY ==> */ A_NOTHING, /* S_RELEASE_DC ==> */ A_LOG, /* S_STARTING ==> */ A_LOG, /* S_PENDING ==> */ A_LOG|A_CL_JOIN_ANNOUNCE, /* S_STOPPING ==> */ A_NOTHING, /* S_TERMINATE ==> */ A_NOTHING, /* S_TRANSITION_ENGINE ==> */ A_ERROR, /* S_HALT ==> */ A_WARN, }, /* Got an I_LRM_EVENT */ { /* S_IDLE ==> */ A_LRM_EVENT, /* S_ELECTION ==> */ A_LRM_EVENT, /* S_INTEGRATION ==> */ A_LRM_EVENT, /* S_FINALIZE_JOIN ==> */ A_LRM_EVENT, /* S_NOT_DC ==> */ A_LRM_EVENT, /* S_POLICY_ENGINE ==> */ A_LRM_EVENT, /* S_RECOVERY ==> */ A_LRM_EVENT, /* S_RELEASE_DC ==> */ A_LRM_EVENT, /* S_STARTING ==> */ A_LRM_EVENT, /* S_PENDING ==> */ A_LRM_EVENT, /* S_STOPPING ==> */ A_LRM_EVENT, /* S_TERMINATE ==> */ A_LRM_EVENT, /* S_TRANSITION_ENGINE ==> */ A_LRM_EVENT, /* S_HALT ==> */ A_WARN, }, +/* For everyone ending up in S_PENDING, (re)start the DC timer and wait for I_JOIN_OFFER or I_NOT_DC */ /* Got an I_PENDING */ { - /* S_IDLE ==> */ O_RELEASE, - /* S_ELECTION ==> */ O_RELEASE|A_DC_TIMER_STOP, - /* S_INTEGRATION ==> */ O_RELEASE, - /* S_FINALIZE_JOIN ==> */ O_RELEASE, - /* S_NOT_DC ==> */ A_LOG, - /* S_POLICY_ENGINE ==> */ O_RELEASE, + /* S_IDLE ==> */ O_RELEASE|O_DC_TIMER_RESTART, + /* S_ELECTION ==> */ O_RELEASE|O_DC_TIMER_RESTART, + /* S_INTEGRATION ==> */ O_RELEASE|O_DC_TIMER_RESTART, + /* S_FINALIZE_JOIN ==> */ O_RELEASE|O_DC_TIMER_RESTART, + /* S_NOT_DC ==> */ A_LOG|O_DC_TIMER_RESTART, + /* S_POLICY_ENGINE ==> */ O_RELEASE|O_DC_TIMER_RESTART, /* S_RECOVERY ==> */ A_WARN, - /* S_RELEASE_DC ==> */ A_WARN, + /* S_RELEASE_DC ==> */ A_WARN|O_DC_TIMER_RESTART, /* S_STARTING ==> */ A_LOG|A_DC_TIMER_START|A_CL_JOIN_QUERY, - /* S_PENDING ==> */ A_LOG, + /* S_PENDING ==> */ A_LOG|O_DC_TIMER_RESTART, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, - /* S_TRANSITION_ENGINE ==> */ O_RELEASE, + /* S_TRANSITION_ENGINE ==> */ O_RELEASE|O_DC_TIMER_RESTART, /* S_HALT ==> */ A_WARN, }, /* Got an I_HALT */ { /* S_IDLE ==> */ A_WARN, /* S_ELECTION ==> */ A_WARN, /* S_INTEGRATION ==> */ A_WARN, /* S_FINALIZE_JOIN ==> */ A_WARN, /* S_NOT_DC ==> */ A_WARN, /* S_POLICY_ENGINE ==> */ A_WARN, /* S_RECOVERY ==> */ A_WARN, /* S_RELEASE_DC ==> */ A_WARN, /* S_STARTING ==> */ A_WARN, /* S_PENDING ==> */ A_WARN, /* S_STOPPING ==> */ A_WARN, /* S_TERMINATE ==> */ A_WARN, /* S_TRANSITION_ENGINE ==> */ A_WARN, /* S_HALT ==> */ A_WARN, }, }; #endif diff --git a/crmd/pengine.c b/crmd/pengine.c old mode 100755 new mode 100644 index 3eff403337..8a0ff6a1d4 --- a/crmd/pengine.c +++ b/crmd/pengine.c @@ -1,328 +1,340 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if(rc == cib_ok) { int len = 15; char *filename = NULL; len += strlen(id); len += strlen(PE_STATE_DIR); crm_malloc0(filename, len); CRM_CHECK(filename != NULL, return); sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id); if(write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save CIB contents after PE crash to %s", filename); } else { crm_notice("Saved CIB contents after PE crash to %s", filename); } crm_free(filename); } crm_free(id); } static void pe_connection_destroy(gpointer user_data) { clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); if(is_set(fsa_input_register, pe_subsystem->flag_required)) { int rc = cib_ok; cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)", pe_subsystem->pid, uuid_str); /* *The PE died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_cib_conn->cmds->register_callback( fsa_cib_conn, rc, 5, FALSE, crm_strdup(uuid_str), "save_cib_contents", save_cib_contents); } else { crm_info("Connection to the Policy Engine released"); } pe_subsystem->pid = -1; pe_subsystem->ipc = NULL; pe_subsystem->client = NULL; mainloop_set_trigger(fsa_source); return; } static gboolean pe_msg_dispatch(IPC_Channel *client, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while(IPC_ISRCONN(client) && client->ops->is_message_pending(client)) { msg = xmlfromIPC(client, MAX_IPC_DELAY); if (msg != NULL) { route_message(C_IPC_MESSAGE, msg); free_xml(msg); } } if (client->ch_status != IPC_CONNECT) { crm_info("Received HUP from %s:[%d]", pe_subsystem->name, pe_subsystem->pid); stay_connected = FALSE; } mainloop_set_trigger(fsa_source); return stay_connected; } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { static GCHSource *pe_source = NULL; struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; if(action & stop_actions) { clear_bit_inplace(fsa_input_register, pe_subsystem->flag_required); if(is_heartbeat_cluster()) { stop_subsystem(this_subsys, FALSE); } else { if(pe_source) { G_main_del_IPC_Channel(pe_source); pe_source = NULL; } if(pe_subsystem->ipc) { pe_subsystem->ipc->ops->destroy(pe_subsystem->ipc); pe_subsystem->ipc = NULL; } clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); } } if(action & start_actions) { if(cur_state != S_STOPPING) { if(is_openais_cluster()) { set_bit_inplace(fsa_input_register, pe_subsystem->flag_required); } else if(is_heartbeat_cluster()) { if(start_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } sleep(3); } pe_subsystem->ipc = init_client_ipc_comms_nodispatch(CRM_SYSTEM_PENGINE); if(pe_subsystem->ipc == NULL) { crm_warn("Setup of client connection failed," " not adding channel to mainloop"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if(is_openais_cluster()) { pe_subsystem->pid = pe_subsystem->ipc->farside_pid;; } set_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); pe_source = G_main_add_IPC_Channel( G_PRIORITY_HIGH, pe_subsystem->ipc, FALSE, pe_msg_dispatch, NULL, pe_connection_destroy); } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { + if(AM_I_DC == FALSE) { + crm_err("Not DC: No need to invoke the PE (anymore): %s", + fsa_action2string(action)); + return; + } + if(is_set(fsa_input_register, R_PE_CONNECTED) == FALSE){ + if(is_set(fsa_input_register, R_SHUTDOWN)) { + crm_err("Cannot shut down gracefully without the PE"); + register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); + + } else { crm_info("Waiting for the PE to connect"); crmd_fsa_stall(NULL); - return; + } + return; } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke the PE without a consistent" " copy of the CIB!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); crm_info("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ crm_free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_cib_conn->cmds->register_callback( fsa_cib_conn, fsa_pe_query, 60, FALSE, NULL, "do_pe_invoke_callback", do_pe_invoke_callback); } void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { xmlNode *cmd = NULL; if(rc != cib_ok) { crm_err("Cant retrieve the CIB: %s", cib_error2string(rc)); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if(call_id != fsa_pe_query) { crm_debug_2("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if(AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the PE anymore"); return; } else if(fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); return; } else if(last_peer_update != 0) { crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if(fsa_state != S_POLICY_ENGINE) { crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state)); return; } CRM_DEV_ASSERT(output != NULL); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); if(ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); crm_free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); if(send_ipc_message(pe_subsystem->ipc, cmd) == FALSE) { crm_err("Could not contact the pengine"); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_info("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/doc/Pacemaker_Explained/en-US/Ap-LSB.xml b/doc/Pacemaker_Explained/en-US/Ap-LSB.xml index 669ec906d6..7d8b2a4161 100644 --- a/doc/Pacemaker_Explained/en-US/Ap-LSB.xml +++ b/doc/Pacemaker_Explained/en-US/Ap-LSB.xml @@ -1,64 +1,64 @@ Is This init Script LSB Compatible? The relevant part of LSB spec can be found at: http://refspecs.freestandards.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html. It includes a description of all the return codes listed here. Assuming some_service is configured correctly and currently not active, the following sequence will help you determine if it is LSB compatible: Start (stopped): /etc/init.d/some_service start ; echo "result: $?" Did the service start? Did the command print result: 0 (in addition to the regular output)? - para>Status (running): + Status (running): /etc/init.d/some_service status ; echo "result: $?" Did the script accept the command? Did the script indicate the service was running? Did the command print result: 0 (in addition to the regular output)? - para>Start (running): + Start (running): /etc/init.d/some_service start ; echo "result: $?" Is the service still running? Did the command print result: 0 (in addition to the regular output)? - para>Stop (running): + Stop (running): /etc/init.d/some_service stop ; echo "result: $?" Was the service stopped? Did the command print result: 0 (in addition to the regular output)? - para>Status (stopped): + Status (stopped): /etc/init.d/some_service status ; echo "result: $?" Did the script accept the command? Did the script indicate the service was not running? Did the command print result: 3 (in addition to the regular output)? Stop (stopped): /etc/init.d/some_service stop ; echo "result: $?" Is the service still stopped? Did the command print result: 0 (in addition to the regular output)? Status (failed): This step is not readily testable and relies on manual inspection of the script. The script can use one of the error codes (other than 3) listed in the LSB spec to indicate that it is active but failed. This tells the cluster that before moving the resource to another node, it needs to stop it on the existing one first. The script can use one of the error codes (other than 3) listed in the LSB spec to indicate that it is active but failed. This tells the cluster that before moving the resource to another node, it needs to stop it on the existing one first. If the answer to any of the above questions is no, then the script is not LSB compliant. Your options are then to either fix the script or write an OCF agent based on the existing script. diff --git a/extra/resources/SystemHealth b/extra/resources/SystemHealth index 94386db82c..e09303c6a9 100644 --- a/extra/resources/SystemHealth +++ b/extra/resources/SystemHealth @@ -1,236 +1,250 @@ #!/bin/sh # # SystemHealth OCF RA. # # Copyright (c) 2009 International Business Machines (IBM), Mark Hamzy # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### meta_data() { cat < 0.1 This is a SystemHealth Resource Agent. It is used to monitor the health of a system via IPMI. SystemHealth resource agent END } ####################################################################### SystemHealth_usage() { cat < /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "servicelog_notify not found!" - return $RC + return $OCF_ERR_INSTALLED fi which ipmiservicelogd > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "ipmiservicelogd not found!" - return $RC + return $OCF_ERR_INSTALLED + fi + + test -x $OCF_RESKEY_program + RC=$? + + if [ $RC != 0 ]; then + ocf_log err "$OCF_RESKEY_program not found!" + return $OCF_ERR_INSTALLED fi } SystemHealth_start() { SystemHealth_monitor RC=$? if [ $RC = $OCF_ERR_GENERIC ]; then return $OCF_ERR_GENERIC elif [ $RC = $OCF_SUCCESS ]; then ocf_log warn "starting an already started SystemHealth" return $OCF_SUCCESS fi service ipmi start > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "Could not start service IPMI!" return $OCF_ERR_GENERIC fi ipmiservicelogd smi 0 > /dev/null 2>&1 & RC=$? if [ $RC != 0 ]; then ocf_log err "Could not start ipmiservicelogd!" return $OCF_ERR_GENERIC fi servicelog_notify --add --type=EVENT --command="$OCF_RESKEY_program" --method=num_arg --match='type=4' > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "servicelog_notify register handler failed!" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } SystemHealth_stop() { SystemHealth_monitor RC=$? if [ $RC = $OCF_ERR_GENERIC ]; then return $OCF_ERR_GENERIC elif [ $RC = $OCF_SUCCESS ]; then killall ipmiservicelogd RC1=$? if [ $RC1 != 0 ]; then ocf_log err "Could not stop ipmiservicelogd!" fi servicelog_notify --remove --command="$OCF_RESKEY_program" > /dev/null 2>&1 RC2=$? if [ $RC2 != 0 ]; then ocf_log err "servicelog_notify remove handler failed!" fi if [ $RC1 = 0 -a $RC2 = 0 ]; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi elif [ $RC = $OCF_NOT_RUNNING ]; then ocf_log warn "stopping an already stopped SystemHealth" return $OCF_SUCCESS else ocf_log err "SystemHealth_stop: should not be here!" return $OCF_ERR_GENERIC fi } SystemHealth_monitor() { # Monitor _MUST!_ differentiate correctly between running # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING). # That is THREE states, not just yes/no. - SystemHealth_check_tools - RC=$? - - if [ $RC != 0 ]; then - return $OCF_ERR_GENERIC - fi - if [ ! -f /var/run/ipmiservicelogd.pid0 ]; then ocf_log debug "ipmiservicelogd is not running!" return $OCF_NOT_RUNNING fi ps -p `cat /var/run/ipmiservicelogd.pid0` > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log debug "ipmiservicelogd's pid `cat /var/run/ipmiservicelogd.pid0` is not running!" rm /var/run/ipmiservicelogd.pid0 return $OCF_ERR_GENERIC fi servicelog_notify --list --command="$OCF_RESKEY_program" > /dev/null 2>&1 RC=$? if [ $RC = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } SystemHealth_validate() { SystemHealth_check_tools RC=$? if [ $RC != 0 ]; then - return $OCF_ERR_ARGS + return $RC fi return $OCF_SUCCESS } : ${OCF_RESKEY_program=/usr/sbin/notifyServicelogEvent} case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; +usage|help) SystemHealth_usage + exit $OCF_SUCCESS + ;; +esac + +SystemHealth_check_tools +RC=$? + +if [ $RC != 0 ]; then + case $__OCF_ACTION in + stop) exit $OCF_SUCCESS;; + *) exit $RC;; + esac +fi + +case $__OCF_ACTION in start) SystemHealth_start;; stop) SystemHealth_stop;; monitor) SystemHealth_monitor;; -reload) ocf_log err "Reloading..." +reload) ocf_log info "Reloading..." SystemHealth_start ;; -validate-all) SystemHealth_validate;; -usage|help) SystemHealth_usage - exit $OCF_SUCCESS - ;; +validate-all) ;; *) SystemHealth_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c index 3bf46808e1..f1cd7bafb3 100644 --- a/lib/ais/plugin.c +++ b/lib/ais/plugin.c @@ -1,1666 +1,1666 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef AIS_COROSYNC # include # include # include #endif #include #include #include "plugin.h" #include "utils.h" #include #include #include #include #include #include #include #include #include struct corosync_api_v1 *pcmk_api = NULL; uint32_t plugin_has_votes = 0; uint32_t plugin_expected_votes = 2; int use_mgmtd = 0; int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; char *local_cname = NULL; int local_cname_len = 0; uint32_t local_nodeid = 0; char *ipc_channel_name = NULL; static uint64_t local_born_on = 0; uint64_t membership_seq = 0; pthread_t pcmk_wait_thread; gboolean wait_active = TRUE; gboolean have_reliable_membership_id = FALSE; GHashTable *ipc_client_list = NULL; GHashTable *membership_list = NULL; GHashTable *membership_notify_list = NULL; #define MAX_RESPAWN 100 #define LOOPBACK_ID 16777343 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { coroipc_request_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; uint64_t born_on; } __attribute__((packed)); static crm_child_t pcmk_children[] = { { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", NULL, NULL, NULL, NULL }, { 0, crm_proc_ais, crm_flag_none, 0, 0, FALSE, "ais", NULL, NULL, NULL, NULL }, { 0, crm_proc_lrmd, crm_flag_none, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd", NULL, NULL }, { 0, crm_proc_cib, crm_flag_members, 2, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib", NULL, NULL }, { 0, crm_proc_crmd, crm_flag_members, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd", NULL, NULL }, { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd", NULL, NULL }, { 0, crm_proc_stonithd, crm_flag_none, 0, 0, TRUE, "stonithd", NULL, "/bin/false", NULL, NULL }, { 0, crm_proc_pe, crm_flag_none, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine", NULL, NULL }, { 0, crm_proc_mgmtd, crm_flag_none, 7, 0, TRUE, "mgmtd", NULL, CRM_DAEMON_DIR"/mgmtd", NULL, NULL }, { 0, crm_proc_stonith_ng, crm_flag_none, 1, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd", NULL, NULL }, }; void send_cluster_id(void); int send_cluster_msg_raw(const AIS_Message *ais_msg); char *pcmk_generate_membership_data(void); gboolean check_message_sanity(const AIS_Message *msg, const char *data); #ifdef AIS_COROSYNC typedef const void ais_void_ptr; int pcmk_shutdown(void); void pcmk_peer_update(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); #else typedef void ais_void_ptr; extern totempg_groups_handle openais_group_handle; int pcmk_shutdown(struct objdb_iface_ver0 *objdb); void pcmk_peer_update(enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id); #endif int pcmk_startup (struct corosync_api_v1 *corosync_api); int pcmk_config_init(struct corosync_api_v1 *corosync_api); int pcmk_ipc_exit (void *conn); int pcmk_ipc_connect (void *conn); void pcmk_ipc(void *conn, ais_void_ptr *msg); void pcmk_exec_dump(void); void pcmk_cluster_swab(void *msg); void pcmk_cluster_callback(ais_void_ptr *message, unsigned int nodeid); void pcmk_nodeid(void *conn, ais_void_ptr *msg); void pcmk_nodes(void *conn, ais_void_ptr *msg); void pcmk_notify(void *conn, ais_void_ptr *msg); void pcmk_remove_member(void *conn, ais_void_ptr *msg); void pcmk_quorum(void *conn, ais_void_ptr *msg); void pcmk_cluster_id_swab(void *msg); void pcmk_cluster_id_callback(ais_void_ptr *message, unsigned int nodeid); void ais_remove_peer(char *node_id); static struct corosync_lib_handler pcmk_lib_service[] = { { /* 0 */ .lib_handler_fn = pcmk_ipc, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 1 */ .lib_handler_fn = pcmk_nodes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 2 */ .lib_handler_fn = pcmk_notify, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 3 */ .lib_handler_fn = pcmk_nodeid, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (struct crm_ais_nodeid_resp_s), .response_id = crm_class_nodeid, #endif }, { /* 4 */ .lib_handler_fn = pcmk_remove_member, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, { /* 5 */ .lib_handler_fn = pcmk_quorum, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, #ifdef AIS_WHITETANK .response_size = sizeof (coroipc_response_header_t), .response_id = CRM_MESSAGE_IPC_ACK, #endif }, }; static struct corosync_exec_handler pcmk_exec_service[] = { { /* 0 */ .exec_handler_fn = pcmk_cluster_callback, .exec_endian_convert_fn = pcmk_cluster_swab }, { /* 1 */ .exec_handler_fn = pcmk_cluster_id_callback, .exec_endian_convert_fn = pcmk_cluster_id_swab } }; /* * Exports the interface for the service */ struct corosync_service_engine pcmk_service_handler = { .name = (unsigned char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, .id = PCMK_SERVICE_ID, .private_data_size = 0, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = pcmk_ipc_connect, .lib_exit_fn = pcmk_ipc_exit, .exec_init_fn = pcmk_startup, .exec_exit_fn = pcmk_shutdown, .config_init_fn = pcmk_config_init, #ifdef AIS_COROSYNC .priority = 50, .lib_engine = pcmk_lib_service, .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_engine = pcmk_exec_service, .exec_engine_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), #else .lib_service = pcmk_lib_service, .lib_service_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_service = pcmk_exec_service, .exec_service_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), #endif .confchg_fn = pcmk_peer_update, .exec_dump_fn = pcmk_exec_dump, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct corosync_service_engine *pcmk_get_handler_ver0 (void); #ifdef AIS_COROSYNC struct corosync_service_engine_iface_ver0 pcmk_service_handler_iface = { .corosync_get_service_engine_ver0 = pcmk_get_handler_ver0 }; #else struct openais_service_handler_iface_ver0 pcmk_service_handler_iface = { .openais_get_service_handler_ver0 = pcmk_get_handler_ver0 }; #endif static struct lcr_iface openais_pcmk_ver0[1] = { { .name = "pacemaker", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp pcmk_comp_ver0 = { .iface_count = 1, .ifaces = openais_pcmk_ver0 }; struct corosync_service_engine *pcmk_get_handler_ver0 (void) { return (&pcmk_service_handler); } __attribute__ ((constructor)) static void register_this_component (void) { lcr_interfaces_set (&openais_pcmk_ver0[0], &pcmk_service_handler_iface); lcr_component_register (&pcmk_comp_ver0); } static int plugin_has_quorum(void) { if((plugin_expected_votes >> 1) < plugin_has_votes) { return 1; } return 0; } static void update_expected_votes(int value) { if(value < plugin_has_votes) { /* Never drop below the number of connected nodes */ ais_info("Cannot update expected quorum votes %d -> %d:" " value cannot be less that the current number of votes", plugin_expected_votes, value); } else if(plugin_expected_votes != value) { ais_info("Expected quorum votes %d -> %d", plugin_expected_votes, value); plugin_expected_votes = value; } } /* Create our own local copy of the config so we can navigate it */ static void process_ais_conf(void) { char *value = NULL; gboolean any_log = FALSE; hdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; ais_info("Reading configure"); top_handle = config_find_init(pcmk_api, "logging"); local_handle = config_find_next(pcmk_api, "logging", top_handle); get_config_opt(pcmk_api, local_handle, "debug", &value, "on"); if(ais_get_boolean(value)) { plugin_log_level = LOG_DEBUG; pcmk_env.debug = "1"; } else { plugin_log_level = LOG_INFO; pcmk_env.debug = "0"; } - get_config_opt(pcmk_api, local_handle, "to_file", &value, "off"); + get_config_opt(pcmk_api, local_handle, "to_logfile", &value, "off"); if(ais_get_boolean(value)) { get_config_opt(pcmk_api, local_handle, "logfile", &value, NULL); if(value == NULL) { ais_err("Logging to a file requested but no log file specified"); } else { struct passwd *superuser = getpwnam("root"); struct passwd *user = getpwnam(CRM_DAEMON_USER); AIS_CHECK(user != NULL, ais_err("Cluster user %s does not exist", CRM_DAEMON_USER)); AIS_CHECK(superuser != NULL, ais_err("Superuser %s does not exist", "root")); pcmk_env.logfile = value; if(superuser && user) { /* Ensure the file has the correct permissions */ FILE *logfile = fopen(value, "a"); int logfd = fileno(logfile); fchown(logfd, superuser->pw_uid, user->pw_gid); fchmod(logfd, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP); fprintf(logfile, "Set r/w permissions for "CRM_DAEMON_USER"\n"); fflush(logfile); fsync(fileno(logfile)); fclose(logfile); any_log = TRUE; } } } get_config_opt(pcmk_api, local_handle, "to_syslog", &value, "on"); if(any_log && ais_get_boolean(value) == FALSE) { ais_info("User configured file based logging and explicitly disabled syslog."); value = "none"; } else { if(ais_get_boolean(value) == FALSE) { ais_err("Please enable some sort of logging, either 'to_file: on' or 'to_syslog: on'."); ais_err("If you use file logging, be sure to also define a value for 'logfile'"); } get_config_opt(pcmk_api, local_handle, "syslog_facility", &value, "daemon"); } pcmk_env.syslog = value; config_find_done(pcmk_api, local_handle); top_handle = config_find_init(pcmk_api, "service"); local_handle = config_find_next(pcmk_api, "service", top_handle); while(local_handle) { value = NULL; pcmk_api->object_key_get(local_handle, "name", strlen("name"), (void**)&value, NULL); if(ais_str_eq("pacemaker", value)) { break; } local_handle = config_find_next(pcmk_api, "service", top_handle); } get_config_opt(pcmk_api, local_handle, "clustername", &local_cname, "pcmk"); local_cname_len = strlen(local_cname); get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no"); pcmk_env.use_logd = value; get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no"); if(ais_get_boolean(value) == FALSE) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(crm_proc_mgmtd & pcmk_children[lpc].flag) { /* Disable mgmtd startup */ pcmk_children[lpc].start_seq = 0; break; } } } config_find_done(pcmk_api, local_handle); } int pcmk_config_init(struct corosync_api_v1 *unused) { return 0; } static void *pcmk_wait_dispatch (void *arg) { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; while(wait_active) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid > 0) { int status; pid_t pid = wait4( pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { continue; } else if(pid < 0) { ais_perror("Call to wait4(%s) failed", pcmk_children[lpc].name); continue; } /* cleanup */ pcmk_children[lpc].pid = 0; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; if(WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_err("Child process %s terminated with signal %d" " (pid=%d, core=%s)", pcmk_children[lpc].name, sig, pid, WCOREDUMP(status)?"true":"false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); do_ais_log(rc==0?LOG_NOTICE:LOG_ERR, "Child process %s exited (pid=%d, rc=%d)", pcmk_children[lpc].name, pid, rc); if(rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } } pcmk_children[lpc].respawn_count += 1; if(pcmk_children[lpc].respawn_count > MAX_RESPAWN) { ais_err("Child respawn count exceeded by %s", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } if(pcmk_children[lpc].respawn) { ais_notice("Respawning failed child process: %s", pcmk_children[lpc].name); spawn_child(&(pcmk_children[lpc])); } else { send_cluster_id(); } } } sched_yield (); nanosleep (&waitsleep, 0); } return 0; } static uint32_t pcmk_update_nodeid(void) { int last = local_nodeid; #if AIS_COROSYNC local_nodeid = pcmk_api->totem_nodeid_get(); #else local_nodeid = totempg_my_nodeid_get(); #endif if(last != local_nodeid) { if(last == 0) { ais_info("Local node id: %u", local_nodeid); } else { char *last_s = NULL; ais_malloc0(last_s, 32); ais_warn("Detected local node id change: %u -> %u", last, local_nodeid); snprintf(last_s, 31, "%u", last); ais_remove_peer(last_s); ais_free(last_s); } update_member(local_nodeid, 0, 0, 1, 0, local_uname, CRM_NODE_MEMBER, NULL); } return local_nodeid; } int pcmk_startup(struct corosync_api_v1 *init_with) { int rc = 0; int lpc = 0; int start_seq = 1; struct utsname us; struct rlimit cores; static int max = SIZEOF(pcmk_children); struct passwd *pwentry = getpwnam(CRM_DAEMON_USER); pcmk_api = init_with; #ifdef AIS_WHITETANK log_init ("crm"); #endif pcmk_env.debug = "0"; pcmk_env.logfile = NULL; pcmk_env.use_logd = "false"; pcmk_env.syslog = "daemon"; process_ais_conf(); membership_list = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, destroy_ais_node); membership_notify_list = g_hash_table_new(g_direct_hash, g_direct_equal); ipc_client_list = g_hash_table_new(g_direct_hash, g_direct_equal); ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = getrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Cannot determine current maximum core size."); } if(cores.rlim_max <= 0) { cores.rlim_max = RLIM_INFINITY; rc = setrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } } else { ais_info("Maximum core file size is: %lu", cores.rlim_max); if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } } AIS_CHECK(pwentry != NULL, ais_err("Cluster user %s does not exist", CRM_DAEMON_USER); return TRUE); mkdir(CRM_STATE_DIR, 0750); chown(CRM_STATE_DIR, pwentry->pw_uid, pwentry->pw_gid); mkdir(HA_STATE_DIR"/heartbeat", 0755); /* Used by RAs - Leave owned by root */ mkdir(HA_STATE_DIR"/heartbeat/rsctmp", 0755); /* Used by RAs - Leave owned by root */ rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Service: %d", PCMK_SERVICE_ID); ais_info("Local hostname: %s", local_uname); pcmk_update_nodeid(); pthread_create (&pcmk_wait_thread, NULL, pcmk_wait_dispatch, NULL); for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if(start_seq == pcmk_children[lpc].start_seq) { spawn_child(&(pcmk_children[lpc])); } } } return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s), ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead( gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if(node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, 0, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST, NULL); } } void pcmk_peer_update ( enum totem_configuration_type configuration_type, #ifdef AIS_COROSYNC const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id #else unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id #endif ) { int lpc = 0; int changed = 0; int do_update = 0; AIS_ASSERT(ring_id != NULL); switch(configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%ld, new=%ld, lost=%ld", do_update?"Stable":"Transitional", ring_id->seq, (long)member_list_entries, (long)joined_list_entries, (long)left_list_entries); if(do_update == 0) { for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if(node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_LOST, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if(changed && joined_list_entries == 0 && left_list_entries == 0) { ais_err("Something strange happened: %d", changed); changed = 0; } ais_debug_2("Reaping unseen nodes..."); g_hash_table_foreach(membership_list, ais_mark_unseen_peer_dead, &changed); if(member_list_entries > 1) { /* Used to set born-on in send_cluster_id()) * We need to wait until we have at least one peer since first * membership id is based on the one before we stopped and isn't reliable */ have_reliable_membership_id = TRUE; } if(changed) { ais_debug("%d nodes changed", changed); pcmk_update_nodeid(); send_member_notification(); } send_cluster_id(); } int pcmk_ipc_exit (void *conn) { int lpc = 0; const char *client = NULL; void *async_conn = conn; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].conn == conn) { if(wait_active == FALSE) { /* Make sure the shutdown loop exits */ pcmk_children[lpc].pid = 0; } pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; client = pcmk_children[lpc].name; break; } } g_hash_table_remove(membership_notify_list, async_conn); g_hash_table_remove(ipc_client_list, async_conn); do_ais_log(client?LOG_INFO:(LOG_DEBUG+1), "Client %s (conn=%p, async-conn=%p) left", client?client:"unknown-transient", conn, async_conn); return (0); } int pcmk_ipc_connect (void *conn) { /* OpenAIS hasn't finished setting up the connection at this point * Sending messages now messes up the protocol! */ return (0); } /* * Executive message handlers */ void pcmk_cluster_swab(void *msg) { AIS_Message *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->size = swab32 (ais_msg->size); ais_msg->is_compressed = swab32 (ais_msg->is_compressed); ais_msg->compressed_size = swab32 (ais_msg->compressed_size); ais_msg->host.id = swab32 (ais_msg->host.id); ais_msg->host.pid = swab32 (ais_msg->host.pid); ais_msg->host.type = swab32 (ais_msg->host.type); ais_msg->host.size = swab32 (ais_msg->host.size); ais_msg->host.local = swab32 (ais_msg->host.local); ais_msg->sender.id = swab32 (ais_msg->sender.id); ais_msg->sender.pid = swab32 (ais_msg->sender.pid); ais_msg->sender.type = swab32 (ais_msg->sender.type); ais_msg->sender.size = swab32 (ais_msg->sender.size); ais_msg->sender.local = swab32 (ais_msg->sender.local); } void pcmk_cluster_callback ( ais_void_ptr *message, unsigned int nodeid) { const AIS_Message *ais_msg = message; ais_debug_2("Message from node %u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if(ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_debug_3("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } } void pcmk_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->pid = swab32 (ais_msg->pid); ais_msg->votes = swab32 (ais_msg->votes); ais_msg->processes = swab32 (ais_msg->processes); } void pcmk_cluster_id_callback (ais_void_ptr *message, unsigned int nodeid) { int changed = 0; const struct crm_identify_msg_s *msg = message; if(nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member( nodeid, msg->born_on, membership_seq, msg->votes, msg->processes, msg->uname, NULL, msg->version); if(changed) { send_member_notification(); } } struct res_overlay { coroipc_response_header_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn) { if(res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } res_overlay->header.id = CRM_MESSAGE_IPC_ACK; res_overlay->header.size = sizeof (coroipc_response_header_t); res_overlay->header.error = CS_OK; #ifdef AIS_COROSYNC pcmk_api->ipc_response_send (conn, res_overlay, res_overlay->header.size); #else openais_response_send (conn, res_overlay, res_overlay->header.size); #endif } /* local callbacks */ void pcmk_ipc(void *conn, ais_void_ptr *msg) { AIS_Message *mutable; int type = 0, size = 0; gboolean transient = TRUE; const AIS_Message *ais_msg = (const AIS_Message*)msg; void *async_conn = conn; ais_debug_2("Message from client %p", conn); if(check_message_sanity(msg, ((const AIS_Message*)msg)->data) == FALSE) { /* The message is corrupted - ignore */ send_ipc_ack(conn); msg = NULL; return; } /* Make a copy of the message here and ACK it * The message is only valid until a response is sent * but the response must also be sent _before_ we send anything else */ mutable = ais_msg_copy(ais_msg); AIS_ASSERT(check_message_sanity(mutable, mutable->data)); size = mutable->header.size; /* ais_malloc0(ais_msg, size); */ /* memcpy(ais_msg, msg, size); */ type = mutable->sender.type; ais_debug_3("type: %d local: %d conn: %p host type: %d ais: %d sender pid: %d child pid: %d size: %d", type, mutable->host.local, pcmk_children[type].conn, mutable->host.type, crm_msg_ais, mutable->sender.pid, pcmk_children[type].pid, ((int)SIZEOF(pcmk_children))); if(type > crm_msg_none && type < SIZEOF(pcmk_children)) { /* known child process */ transient = FALSE; } /* If this check fails, the order of pcmk_children probably * doesn't match that of the crm_ais_msg_types enum */ AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, ais_err("Sender: %d, child[%d]: %d", mutable->sender.pid, type, pcmk_children[type].pid); return); if(transient == FALSE && type > crm_msg_none && mutable->host.local && pcmk_children[type].conn == NULL && mutable->host.type == crm_msg_ais) { AIS_CHECK(mutable->sender.type != mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); ais_info("Recorded connection %p for %s/%d", conn, pcmk_children[type].name, pcmk_children[type].pid); pcmk_children[type].conn = conn; pcmk_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if(pcmk_children[type].flags & crm_flag_members) { char *update = pcmk_generate_membership_data(); g_hash_table_replace(membership_notify_list, async_conn, async_conn); ais_info("Sending membership update "U64T" to %s", membership_seq, pcmk_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none,update); } } else if(transient) { AIS_CHECK(mutable->sender.type == mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); g_hash_table_replace(ipc_client_list, async_conn, GUINT_TO_POINTER(mutable->sender.pid)); } mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); route_ais_message(mutable, TRUE); send_ipc_ack(conn); msg = NULL; ais_free(mutable); } int pcmk_shutdown ( #ifdef AIS_COROSYNC void #else struct objdb_iface_ver0 *objdb #endif ) { int lpc = 0; static int phase = 0; static int max_wait = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); if(phase == 0) { ais_notice("Shuting down Pacemaker"); phase = max; } wait_active = FALSE; /* stop the wait loop */ for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { if(phase != pcmk_children[lpc].start_seq) { continue; } #ifdef AIS_WHITETANK retry: #endif if(pcmk_children[lpc].pid) { pid_t pid = 0; int status = 0; time_t now = time(NULL); if(pcmk_children[lpc].respawn) { max_wait = 5; /* 5 * 30s = 2.5 minutes... plenty once the crmd is gone */ next_log = now + 30; pcmk_children[lpc].respawn = FALSE; stop_child(&(pcmk_children[lpc]), SIGTERM); } pid = wait4(pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid < 0) { ais_perror("Call to wait4(%s/%d) failed - treating it as stopped", pcmk_children[lpc].name, pcmk_children[lpc].pid); } else if(pid == 0) { if(now >= next_log) { max_wait--; next_log = now + 30; ais_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", pcmk_children[lpc].name, pcmk_children[lpc].pid, pcmk_children[lpc].start_seq); if(max_wait <= 0 && phase < pcmk_children[crm_msg_crmd].start_seq) { ais_err("Child %s taking too long to terminate, sending SIGKILL", pcmk_children[lpc].name); stop_child(&(pcmk_children[lpc]), SIGKILL); } } #ifdef AIS_WHITETANK { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; sched_yield (); nanosleep (&waitsleep, 0); goto retry; } #else /* Return control to corosync */ return -1; #endif } } /* cleanup */ ais_notice("%s confirmed stopped", pcmk_children[lpc].name); pcmk_children[lpc].async_conn = NULL; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].pid = 0; } } send_cluster_id(); ais_notice("Shutdown complete"); /* TODO: Add back the logsys flush call once its written */ #ifdef AIS_WHITETANK /* Bug bnc#482847, bnc#482905 * * All cluster services are now down, we could allow OpenAIS to continue * unloading plugins, but its kinda new at that and there are a bunch of * race conditions that get exercised. * * Take the easy way out for now (on whitetank) and eventually fix for * CoroSync which is where everyone wants to be eventually anyway */ ais_notice("Forcing clean exit of OpenAIS"); exit(0); #endif return 0; } struct member_loop_data { char *string; }; void member_vote_count_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (ais_str_eq(CRM_NODE_MEMBER, node->state)) { plugin_has_votes += node->votes; } } void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_debug_2("Dumping node %u", node->id); data->string = append_member(data->string, node); } char *pcmk_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 256; ais_malloc0(data.string, size); plugin_has_votes = 0; g_hash_table_foreach(membership_list, member_vote_count_fn, NULL); if(plugin_has_votes > plugin_expected_votes) { update_expected_votes(plugin_has_votes); } snprintf(data.string, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9) ;/* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void pcmk_nodes(void *conn, ais_void_ptr *msg) { char *data = pcmk_generate_membership_data(); void *async_conn = conn; /* send the ACK before we send any other messages * - but after we no longer need to access the message */ send_ipc_ack(conn); msg = NULL; if(async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); } void pcmk_remove_member(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; if(data != NULL) { char *bcast = ais_concat("remove-peer", data, ':'); send_cluster_msg(crm_msg_ais, NULL, bcast); ais_info("Sent: %s", bcast); ais_free(bcast); } ais_free(data); } static void send_quorum_details(void *conn) { int size = 256; char *data = NULL; ais_malloc0(data, size); snprintf(data, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); send_client_msg(conn, crm_class_quorum, crm_msg_none, data); ais_free(data); } void pcmk_quorum(void *conn, ais_void_ptr *msg) { char *dummy = NULL; const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; /* Make sure the current number of votes is accurate */ dummy = pcmk_generate_membership_data(); ais_free(dummy); /* Calls without data just want the current quorum details */ if(data != NULL && strlen(data) > 0) { int value = ais_get_int(data, NULL); update_expected_votes(value); } send_quorum_details(conn); ais_free(data); } void pcmk_notify(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); void *async_conn = conn; int enable = 0; int sender = ais_msg->sender.pid; send_ipc_ack(conn); msg = NULL; if(ais_str_eq("true", data)) { enable = 1; } ais_info("%s node notifications for child %d (%p)", enable?"Enabling":"Disabling", sender, async_conn); if(enable) { g_hash_table_replace(membership_notify_list, async_conn, async_conn); } else { g_hash_table_remove(membership_notify_list, async_conn); } ais_free(data); } void pcmk_nodeid(void *conn, ais_void_ptr *msg) { static int counter = 0; struct crm_ais_nodeid_resp_s resp; ais_debug_2("Sending local nodeid: %d to %p[%d]", local_nodeid, conn, counter); resp.header.id = crm_class_nodeid; resp.header.size = sizeof (struct crm_ais_nodeid_resp_s); resp.header.error = CS_OK; resp.id = local_nodeid; resp.counter = counter++; memset(resp.uname, 0, MAX_NAME); memcpy(resp.uname, local_uname, local_uname_len); memset(resp.cname, 0, MAX_NAME); memcpy(resp.cname, local_cname, local_cname_len); #ifdef AIS_COROSYNC pcmk_api->ipc_response_send (conn, &resp, resp.header.size); #else openais_response_send (conn, &resp, resp.header.size); #endif } static gboolean ghash_send_update(gpointer key, gpointer value, gpointer data) { if(send_client_msg(value, crm_class_members, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void send_member_notification(void) { char *update = pcmk_generate_membership_data(); ais_info("Sending membership update "U64T" to %d children", membership_seq, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_update, update); ais_free(update); } gboolean check_message_sanity(const AIS_Message *msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { ais_err("Message with no size"); sane = FALSE; } if(sane && msg->header.error != CS_OK) { ais_err("Message header contains an error: %d", msg->header.error); sane = FALSE; } AIS_CHECK(msg->header.size > sizeof(AIS_Message), ais_err("Message %d size too small: %d < %zu", msg->header.id, msg->header.size, sizeof(AIS_Message)); return FALSE); if(sane && ais_data_len(msg) != tmp_size) { ais_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if(sane && ais_data_len(msg) == 0) { ais_err("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; ais_err("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } ais_debug_2("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { AIS_CHECK(sane, ais_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size)); } else if(repaired) { ais_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_debug_3("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } static int delivered_transient = 0; static void deliver_transient_msg(gpointer key, gpointer value, gpointer user_data) { int pid = GPOINTER_TO_INT(value); AIS_Message *mutable = user_data; if(pid == mutable->host.type) { int rc = send_client_ipc(key, mutable); delivered_transient++; ais_info("Sent message to %s.%d (rc=%d)", ais_dest(&(mutable->host)), pid, rc); if(rc != 0) { ais_warn("Sending message to %s.%d failed (rc=%d)", ais_dest(&(mutable->host)), pid, rc); log_ais_message(LOG_DEBUG, mutable); } } } gboolean route_ais_message(const AIS_Message *msg, gboolean local_origin) { int rc = 0; int dest = msg->host.type; const char *reason = "unknown"; AIS_Message *mutable = ais_msg_copy(msg); static int service_id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); ais_debug_3("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", mutable->id, ais_dest(&(mutable->host)), msg_type2text(dest), ais_dest(&(mutable->sender)), msg_type2text(mutable->sender.type), mutable->sender.pid, local_origin?"false":"true", ais_data_len((mutable))); if(local_origin == FALSE) { if(mutable->host.size == 0 || ais_str_eq(local_uname, mutable->host.uname)) { mutable->host.local = TRUE; } } if(check_message_sanity(mutable, mutable->data) == FALSE) { /* Dont send this message to anyone */ rc = 1; goto bail; } if(mutable->host.local) { void *conn = NULL; const char *lookup = NULL; if(dest == crm_msg_ais) { process_ais_message(mutable); goto bail; } else if(dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest == crm_msg_te) { /* te messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest >= SIZEOF(pcmk_children)) { /* Transient client */ delivered_transient = 0; g_hash_table_foreach(ipc_client_list, deliver_transient_msg, mutable); if(delivered_transient) { ais_debug_2("Sent message to %d transient clients: %d", delivered_transient, dest); goto bail; } else { /* try the crmd */ ais_debug_2("Sending message to transient client %d via crmd", dest); dest = crm_msg_crmd; } } else if(dest == 0) { ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, mutable); log_printf(LOG_ERR, "%s", get_ais_data(mutable)); rc = 1; goto bail; } lookup = msg_type2text(dest); conn = pcmk_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); if(mutable->header.id == service_id) { mutable->header.id = 0; /* reset this back to zero for IPC messages */ } else if(mutable->header.id != 0) { ais_err("reset header id back to zero from %d", mutable->header.id); mutable->header.id = 0; /* reset this back to zero for IPC messages */ } reason = "ipc delivery failed"; rc = send_client_ipc(conn, mutable); } else if(local_origin) { /* forward to other hosts */ ais_debug_3("Forwarding to cluster"); reason = "cluster delivery failed"; rc = send_cluster_msg_raw(mutable); } if(rc != 0) { ais_warn("Sending message to %s.%s failed: %s (rc=%d)", ais_dest(&(mutable->host)), msg_type2text(dest), reason, rc); log_ais_message(LOG_DEBUG, mutable); } bail: ais_free(mutable); return rc==0?TRUE:FALSE; } int send_cluster_msg_raw(const AIS_Message *ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *mutable = ais_msg_copy(ais_msg); AIS_ASSERT(local_nodeid != 0); AIS_ASSERT(ais_msg->header.size == (sizeof(AIS_Message) + ais_data_len(ais_msg))); if(mutable->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */, msg_id++; ais_err("Message ID wrapped around")); mutable->id = msg_id; } mutable->header.error = CS_OK; mutable->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); iovec.iov_base = (char *)mutable; iovec.iov_len = mutable->header.size; ais_debug_3("Sending message (size=%u)", (unsigned int)iovec.iov_len); #if AIS_COROSYNC rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); #else rc = totempg_groups_mcast_joined(openais_group_handle, &iovec, 1, TOTEMPG_SAFE); #endif if(rc == 0 && mutable->is_compressed == FALSE) { ais_debug_2("Message sent: %.80s", mutable->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d): %.120s", rc, mutable->data)); ais_free(mutable); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int lpc = 0; int len = 0; struct iovec iovec; struct crm_identify_msg_s *msg = NULL; AIS_ASSERT(local_nodeid != 0); if(local_born_on == 0 && have_reliable_membership_id) { local_born_on = membership_seq; } ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; /* msg->header.error = CS_OK; */ msg->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 1); len = min(local_uname_len, MAX_NAME-1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME-1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = crm_proc_ais; msg->born_on = local_born_on; for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid != 0) { msg->processes |= pcmk_children[lpc].flag; } } ais_debug("Local update: id=%u, born="U64T", seq="U64T"", local_nodeid, local_born_on, membership_seq); update_member( local_nodeid, local_born_on, membership_seq, msg->votes, msg->processes, NULL, NULL, VERSION); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; #if AIS_COROSYNC rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); #else rc = totempg_groups_mcast_joined(openais_group_handle, &iovec, 1, TOTEMPG_SAFE); #endif AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); } static gboolean ghash_send_removal(gpointer key, gpointer value, gpointer data) { send_quorum_details(value); if(send_client_msg(value, crm_class_rmpeer, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void ais_remove_peer(char *node_id) { uint32_t id = ais_get_int(node_id, NULL); crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if(node == NULL) { ais_info("Peer %u is unknown", id); } else if(ais_str_eq(CRM_NODE_MEMBER, node->state)) { ais_warn("Peer %u/%s is still active", id, node->uname); } else if(g_hash_table_remove(membership_list, GUINT_TO_POINTER(id))) { plugin_expected_votes--; ais_notice("Removed dead peer %u from the membership list", id); ais_info("Sending removal of %u to %d children", id, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_removal, node_id); } else { ais_warn("Peer %u/%s was not removed", id, node->uname); } } gboolean process_ais_message(const AIS_Message *msg) { int len = ais_data_len(msg); char *data = get_ais_data(msg); do_ais_log(LOG_DEBUG, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname==local_uname?"false":"true", ais_data_len(msg), data); if(data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { char *node = data+12; ais_remove_peer(node); } ais_free(data); return TRUE; } static void member_dump_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; ais_info(" node id:%u, uname=%s state=%s processes=%.16x born="U64T" seen="U64T" addr=%s version=%s", node->id, node->uname?node->uname:"-unknown-", node->state, node->processes, node->born, node->last_seen, node->addr?node->addr:"-unknown-", node->version?node->version:"-unknown-"); } void pcmk_exec_dump(void) { /* Called after SIG_USR2 */ process_ais_conf(); ais_info("Local id: %u, uname: %s, born: "U64T, local_nodeid, local_uname, local_born_on); ais_info("Membership id: "U64T", quorate: %s, expected: %u, actual: %u", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_dump_fn, NULL); }