diff --git a/cts/agents/sam_test_agent.c b/cts/agents/sam_test_agent.c index 43c05dfd..17a91e2e 100644 --- a/cts/agents/sam_test_agent.c +++ b/cts/agents/sam_test_agent.c @@ -1,417 +1,1505 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Provides test of SAM API */ #include -#include #include #include #include #include -#include +#include #include #include #include #include #include #include -#include -#include - #include +#include #include +#include +#include +#include +#include #include "common_test_agent.h" +extern const char *__progname; static int test2_sig_delivered = 0; -static int test4_hc_cb_count = 0; +static int test5_hc_cb_count = 0; +static int test6_sig_delivered = 0; /* * First test will just register SAM, with policy restart. First instance will * sleep one second, send hc and sleep another 3 seconds. This should force restart. * Second instance will sleep one second, send hc, stop hc and sleep 3 seconds. * Then start hc again and sleep 3 seconds. This should force restart again. * Last instance just calls initialize again. This should end with error. * Then call start, followed by stop and start again. Finally, we will call finalize * twice. One should succeed, second should fail. After this, we will call every function * (none should succeed). */ static int test1 (void) { cs_error_t error; unsigned int instance_id; int i; - syslog (LOG_INFO,"%s: initialize\n", __FUNCTION__); + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s: register\n", __FUNCTION__); + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { syslog (LOG_ERR, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1 || instance_id == 2) { - syslog (LOG_INFO,"%s iid %d: start\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { syslog (LOG_ERR, "Can't start hc. Error %d\n", error); return 1; } for (i = 0; i < 10; i++) { - syslog (LOG_INFO,"%s iid %d: sleep 1\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); - syslog (LOG_INFO,"%s iid %d: hc send\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { syslog (LOG_ERR, "Can't send hc. Error %d\n", error); return 1; } } if (instance_id == 2) { - syslog (LOG_INFO,"%s iid %d: stop\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: stop\n", __FUNCTION__, instance_id); error = sam_stop (); if (error != CS_OK) { syslog (LOG_ERR, "Can't send hc. Error %d\n", error); return 1; } } - syslog (LOG_INFO,"%s iid %d: sleep 3\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: sleep 3\n", __FUNCTION__, instance_id); sleep (3); - syslog (LOG_INFO,"%s iid %d: start\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { syslog (LOG_ERR, "Can't start hc. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s iid %d: sleep 3\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: sleep 3\n", __FUNCTION__, instance_id); sleep (3); return 0; } if (instance_id == 3) { error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); if (error == CS_OK) { syslog (LOG_ERR, "Can initialize SAM API after initialization"); return 1; } error = sam_start (); if (error != CS_OK) { syslog (LOG_ERR, "Can't start hc. Error %d\n", error); return 1; } error = sam_stop (); if (error != CS_OK) { syslog (LOG_ERR, "Can't stop hc. Error %d\n", error); return 1; } error = sam_finalize (); if (error != CS_OK) { syslog (LOG_ERR, "Can't finalize sam. Error %d\n", error); return 1; } error = sam_finalize (); if (error == CS_OK) { syslog (LOG_ERR, "Can finalize sam after finalization!\n"); return 1; } if (sam_initialize (2, SAM_RECOVERY_POLICY_RESTART) == CS_OK || sam_start () == CS_OK || sam_stop () == CS_OK || sam_register (NULL) == CS_OK || sam_hc_send () == CS_OK || sam_hc_callback_register (NULL) == CS_OK) { syslog (LOG_ERR, "Can call one of function after finalization!\n"); return 1; } return 0; } return 1; } static void test2_signal (int sig) { - syslog (LOG_INFO,"%s\n", __FUNCTION__); + syslog (LOG_INFO, "%s\n", __FUNCTION__); test2_sig_delivered = 1; } /* * This tests recovery policy quit and callback. */ static int test2 (void) { cs_error_t error; unsigned int instance_id; - syslog (LOG_INFO,"%s: initialize\n", __FUNCTION__); + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); error = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT); if (error != CS_OK) { syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s: register\n", __FUNCTION__); + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { syslog (LOG_ERR, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1) { signal (SIGTERM, test2_signal); - syslog (LOG_INFO,"%s iid %d: start\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { syslog (LOG_ERR, "Can't start hc. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s iid %d: sleep 1\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: sleep 1\n", __FUNCTION__, instance_id); sleep (1); - syslog (LOG_INFO,"%s iid %d: hc send\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: hc send\n", __FUNCTION__, instance_id); error = sam_hc_send (); if (error != CS_OK) { syslog (LOG_ERR, "Can't send hc. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); while (!test2_sig_delivered) { sleep (1); } - syslog (LOG_INFO,"%s iid %d: wait for real kill\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: wait for real kill\n", __FUNCTION__, instance_id); sleep (3); } return 1; } /* - * Smoke test. Better to turn off coredump ;) This has no time limit, just restart process - * when it dies. + * Test sam_data_store, sam_data_restore and sam_data_getsize */ -static int test3 (void) { - cs_error_t error; +static int test4 (void) +{ + size_t size; + cs_error_t err; + int i; unsigned int instance_id; - int tmp1, tmp2, tmp3; + char saved_data[128]; + char saved_data2[128]; - syslog (LOG_INFO,"%s: initialize\n", __FUNCTION__); - error = sam_initialize (0, SAM_RECOVERY_POLICY_RESTART); - if (error != CS_OK) { - syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", error); + syslog (LOG_INFO, "%s: sam_data_getsize 1\n", __FUNCTION__); + err = sam_data_getsize (&size); + if (err != CS_ERR_BAD_HANDLE) { + syslog (LOG_ERR, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); return 1; } - syslog (LOG_INFO,"%s: register\n", __FUNCTION__); - error = sam_register (&instance_id); - if (error != CS_OK) { - syslog (LOG_ERR, "Can't register. Error %d\n", error); + + syslog (LOG_INFO, "%s: sam_data_getsize 2\n", __FUNCTION__); + err = sam_data_getsize (NULL); + if (err != CS_ERR_INVALID_PARAM) { + syslog (LOG_ERR, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); return 1; } - if (instance_id < 100) { - syslog (LOG_INFO,"%s iid %d: start\n", __FUNCTION__, instance_id); - error = sam_start (); - if (error != CS_OK) { - syslog (LOG_ERR, "Can't start hc. Error %d\n", error); + syslog (LOG_INFO, "%s: sam_data_store 1\n", __FUNCTION__); + err = sam_data_store (NULL, 0); + if (err != CS_ERR_BAD_HANDLE) { + syslog (LOG_ERR, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_restore 1\n", __FUNCTION__); + err = sam_data_restore (saved_data, sizeof (saved_data)); + if (err != CS_ERR_BAD_HANDLE) { + syslog (LOG_ERR, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_initialize\n", __FUNCTION__); + err = sam_initialize (0, SAM_RECOVERY_POLICY_RESTART); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_getsize 3\n", __FUNCTION__); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_ERR_BAD_HANDLE. Error returned %d\n", err); + return 1; + } + if (size != 0) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_restore 2\n", __FUNCTION__); + err = sam_data_restore (NULL, sizeof (saved_data)); + if (err != CS_ERR_INVALID_PARAM) { + syslog (LOG_ERR, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); + return 1; + } + + /* + * Store some real data + */ + for (i = 0; i < sizeof (saved_data); i++) { + saved_data[i] = (char)(i + 5); + } + + syslog (LOG_INFO, "%s: sam_data_store 2\n", __FUNCTION__); + err = sam_data_store (saved_data, sizeof (saved_data)); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_getsize 4\n", __FUNCTION__); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + if (size != sizeof (saved_data)) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_restore 3\n", __FUNCTION__); + err = sam_data_restore (saved_data2, sizeof (saved_data2) - 1); + if (err != CS_ERR_INVALID_PARAM) { + syslog (LOG_ERR, "Test should return CS_ERR_INVALID_PARAM. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_restore 4\n", __FUNCTION__); + err = sam_data_restore (saved_data2, sizeof (saved_data2)); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + if (memcmp (saved_data, saved_data2, sizeof (saved_data2)) != 0) { + syslog (LOG_ERR, "Retored data are not same\n"); + return 1; + } + + memset (saved_data2, 0, sizeof (saved_data2)); + + syslog (LOG_INFO, "%s: sam_data_store 3\n", __FUNCTION__); + err = sam_data_store (NULL, 1); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_getsize 5\n", __FUNCTION__); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + if (size != 0) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } + + syslog (LOG_INFO, "%s: sam_data_store 4\n", __FUNCTION__); + err = sam_data_store (saved_data, sizeof (saved_data)); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", err); + return 1; + } + + if (instance_id == 1) { + syslog (LOG_INFO, "%s iid %d: sam_start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); return 1; } - syslog (LOG_INFO,"%s iid %d: divide by zero\n", __FUNCTION__, instance_id); - tmp2 = rand (); - tmp3 = 0; - tmp1 = tmp2 / tmp3; + syslog (LOG_INFO, "%s iid %d: sam_data_getsize 6\n", __FUNCTION__, instance_id); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + if (size != sizeof (saved_data2)) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } - return 1; + syslog (LOG_INFO, "%s iid %d: sam_data_restore 5\n", __FUNCTION__, instance_id); + err = sam_data_restore (saved_data2, sizeof (saved_data2)); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + if (memcmp (saved_data, saved_data2, sizeof (saved_data2)) != 0) { + syslog (LOG_ERR, "Retored data are not same\n"); + return 1; + } + + for (i = 0; i < sizeof (saved_data); i++) { + saved_data[i] = (char)(i - 5); + } + + syslog (LOG_INFO, "%s iid %d: sam_data_store 5\n", __FUNCTION__, instance_id); + err = sam_data_store (saved_data, sizeof (saved_data) - 7); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + exit (1); } - return 0; + if (instance_id == 2) { + syslog (LOG_INFO, "%s iid %d: sam_start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 1; + } + syslog (LOG_INFO, "%s iid %d: sam_data_getsize 7\n", __FUNCTION__, instance_id); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + if (size != sizeof (saved_data2) - 7) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } + + syslog (LOG_INFO, "%s iid %d: sam_data_restore 6\n", __FUNCTION__, instance_id); + err = sam_data_restore (saved_data2, sizeof (saved_data2)); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + for (i = 0; i < sizeof (saved_data); i++) { + saved_data[i] = (char)(i - 5); + } + + if (memcmp (saved_data, saved_data2, sizeof (saved_data2) - 7) != 0) { + syslog (LOG_ERR, "Retored data are not same\n"); + return 1; + } + + syslog (LOG_INFO, "%s iid %d: sam_data_store 6\n", __FUNCTION__, instance_id); + err = sam_data_store (NULL, 0); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + + exit (1); + } + + if (instance_id == 3) { + syslog (LOG_INFO, "%s iid %d: sam_data_getsize 8\n", __FUNCTION__, instance_id); + err = sam_data_getsize (&size); + if (err != CS_OK) { + syslog (LOG_ERR, "Test should return CS_OK. Error returned %d\n", err); + return 1; + } + if (size != 0) { + syslog (LOG_ERR, "Test should return size of 0. Returned %zx\n", size); + return 1; + } + } + + return (0); } -static int test4_hc_cb (void) +static int test5_hc_cb (void) { - syslog (LOG_INFO,"%s %d\n", __FUNCTION__, ++test4_hc_cb_count); + syslog (LOG_INFO, "%s %d\n", __FUNCTION__, ++test5_hc_cb_count); + + sam_data_store (&test5_hc_cb_count, sizeof (test5_hc_cb_count)); - if (test4_hc_cb_count > 10) + if (test5_hc_cb_count > 10) return 1; return 0; } /* * Test event driven healtchecking. */ -static int test4 (void) +static int test5 (void) { cs_error_t error; unsigned int instance_id; + int hc_cb_count; - syslog (LOG_INFO,"%s: initialize\n", __FUNCTION__); + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); error = sam_initialize (100, SAM_RECOVERY_POLICY_RESTART); if (error != CS_OK) { syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s: register\n", __FUNCTION__); + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); error = sam_register (&instance_id); if (error != CS_OK) { syslog (LOG_ERR, "Can't register. Error %d\n", error); return 1; } if (instance_id == 1) { - syslog (LOG_INFO,"%s iid %d: hc callback register\n", __FUNCTION__, instance_id); - error = sam_hc_callback_register (test4_hc_cb); + syslog (LOG_INFO, "%s iid %d: hc callback register\n", __FUNCTION__, instance_id); + error = sam_hc_callback_register (test5_hc_cb); if (error != CS_OK) { syslog (LOG_ERR, "Can't register hc cb. Error %d\n", error); return 1; } - syslog (LOG_INFO,"%s iid %d: start\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); error = sam_start (); if (error != CS_OK) { syslog (LOG_ERR, "Can't start hc. Error %d\n", error); return 1; } sleep (2); - syslog (LOG_INFO,"%s iid %d: Failed. Wasn't killed.\n", __FUNCTION__, instance_id); + syslog (LOG_INFO, "%s iid %d: Failed. Wasn't killed.\n", __FUNCTION__, instance_id); return 1; } if (instance_id == 2) { + error = sam_data_restore (&hc_cb_count, sizeof (hc_cb_count)); + if (error != CS_OK) { + syslog (LOG_ERR, "sam_data_restore should return CS_OK. Error returned %d\n", error); + return 1; + } + + if (hc_cb_count != 11) { + syslog (LOG_ERR, "%s iid %d: Premature killed. hc_cb_count should be 11 and it is %d\n", + __FUNCTION__, instance_id - 1, hc_cb_count); + return 1; + + } return 0; } return 1; } -static void do_command (int sock, char* func, char*args[], int num_args) +static void test6_signal (int sig) { + cs_error_t error; + + syslog (LOG_INFO, "%s\n", __FUNCTION__); + test6_sig_delivered++; + + if ((error = sam_data_store (&test6_sig_delivered, sizeof (test6_sig_delivered))) != CS_OK) { + syslog (LOG_ERR, "Can't store data! Error : %d\n", error); + } +} + +/* + * Test warn signal set. + */ +static int test6 (void) { + cs_error_t error; + unsigned int instance_id; + int test6_sig_del; + + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); + error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", error); + return 1; + } + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + error = sam_register (&instance_id); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", error); + return 1; + } + + if (instance_id == 1) { + error = sam_warn_signal_set (SIGUSR1); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't set warn signal. Error %d\n", error); + return 1; + } + + signal (SIGUSR1, test6_signal); + + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); + error = sam_start (); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", error); + return 1; + } + + syslog (LOG_INFO, "%s iid %d: sleep 1\n", __FUNCTION__, instance_id); + sleep (1); + + syslog (LOG_INFO, "%s iid %d: hc send\n", __FUNCTION__, instance_id); + error = sam_hc_send (); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't send hc. Error %d\n", error); + return 1; + } + + + syslog (LOG_INFO, "%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); + while (!test6_sig_delivered) { + sleep (1); + } + + syslog (LOG_INFO, "%s iid %d: wait for real kill\n", __FUNCTION__, instance_id); + + sleep (3); + + syslog (LOG_INFO, "%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); + return (1); + } + + if (instance_id == 2) { + error = sam_data_restore (&test6_sig_del, sizeof (test6_sig_del)); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't restore data. Error %d\n", error); + return 1; + } + + if (test6_sig_del != 1) { + syslog (LOG_ERR, "Previous test failed. Signal was not delivered\n"); + return 1; + } + + error = sam_warn_signal_set (SIGKILL); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't set warn signal. Error %d\n", error); + return 1; + } + + signal (SIGUSR1, test6_signal); + + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); + error = sam_start (); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", error); + return 1; + } + + syslog (LOG_INFO, "%s iid %d: sleep 1\n", __FUNCTION__, instance_id); + sleep (1); + + syslog (LOG_INFO, "%s iid %d: hc send\n", __FUNCTION__, instance_id); + error = sam_hc_send (); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't send hc. Error %d\n", error); + return 1; + } + + + syslog (LOG_INFO, "%s iid %d: wait for delivery of signal\n", __FUNCTION__, instance_id); + while (!test6_sig_delivered) { + sleep (1); + } + + syslog (LOG_INFO, "%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); + return (1); + } + + if (instance_id == 3) { + error = sam_data_restore (&test6_sig_del, sizeof (test6_sig_del)); + if (error != CS_OK) { + syslog (LOG_ERR, "Can't restore data. Error %d\n", error); + return 1; + } + + if (test6_sig_del != 1) { + syslog (LOG_ERR, "Previous test failed. Signal WAS delivered\n"); + return 1; + } + + return (0); + } + + return 1; +} + +static void *test7_thread (void *arg) { - char response[100]; - pid_t pid; - int err; - int stat; - int please_wait = 1; + /* Wait 5s */ + sleep (5); + exit (0); +} - snprintf (response, 100, "%s", FAIL_STR); +/* + * Test quorum + */ +static int test_quorum (void) { + confdb_handle_t cdb_handle; + cs_error_t err; + hdb_handle_t quorum_handle; + size_t value_len; + char key_value[256]; + unsigned int instance_id; + pthread_t kill_thread; - if (parse_debug) - syslog (LOG_INFO,"RPC:%s() called.", func); + err = confdb_initialize (&cdb_handle, NULL); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err); + return (1); + } + + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d. Test skipped\n", err); + return (1); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "quorum", strlen("quorum"), &quorum_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"quorum\": %d. Test skipped\n", err); + return (1); + } + + err = confdb_key_get(cdb_handle, quorum_handle, "provider", strlen("provider"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"provider\" key: %d. Test skipped\n", err); + return (1); + } + + if (!(value_len - 1 == strlen ("testquorum") && memcmp (key_value, "testquorum", value_len - 1) == 0)) { + syslog (LOG_INFO, "Provider is not testquorum. Test skipped\n"); + return (1); + } + + /* + * Set to not quorate + */ + err = confdb_key_create(cdb_handle, quorum_handle, "quorate", strlen("quorate"), "0", strlen("0")); + if (err != CS_OK) { + syslog (LOG_INFO, "Can't create confdb key. Error %d\n", err); + return (2); + } + + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); + err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUORUM_RESTART); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", err); + return 2; + } + + if (instance_id == 1) { + /* + * Sam start should block forever, but 10s for us should be enough + */ + pthread_create (&kill_thread, NULL, test7_thread, NULL); + + syslog (LOG_INFO, "%s iid %d: start - should block forever (waiting 5s)\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + syslog (LOG_INFO, "%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); + return (2); + } + + if (instance_id == 2) { + /* + * Set to quorate + */ + err = confdb_key_create(cdb_handle, quorum_handle, "quorate", strlen("quorate"), "1", strlen("1")); + if (err != CS_OK) { + syslog (LOG_INFO, "Can't create confdb key. Error %d\n", err); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + /* + * Set corosync unquorate + */ + err = confdb_key_create(cdb_handle, quorum_handle, "quorate", strlen("quorate"), "0", strlen("0")); + if (err != CS_OK) { + syslog (LOG_INFO, "Can't create confdb key. Error %d\n", err); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: sleep 3\n", __FUNCTION__, instance_id); + sleep (3); + + syslog (LOG_INFO, "%s iid %d: wasn't killed\n", __FUNCTION__, instance_id); + return (2); + } + + if (instance_id == 3) { + return (0); + } + + return (2); +} + +/* + * Test confdb integration + quit policy + */ +static int test8 (pid_t pid, pid_t old_pid, int test_n) { + confdb_handle_t cdb_handle; + cs_error_t err; + hdb_handle_t res_handle, proc_handle, pid_handle; + size_t value_len; + uint64_t tstamp1, tstamp2; + int32_t msec_diff; + char key_value[256]; + unsigned int instance_id; + char tmp_obj[PATH_MAX]; + confdb_value_types_t cdbtype; + + err = confdb_initialize (&cdb_handle, NULL); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err); + return (1); + } + + syslog (LOG_INFO, "%s test %d\n", __FUNCTION__, test_n); + + if (test_n == 2) { + /* + * Object should not exist + */ + syslog (LOG_INFO, "%s Testing if object exists (it shouldn't)\n", __FUNCTION__); + + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err == CS_OK) { + syslog (LOG_INFO, "Could find object \"%s\": %d.\n", tmp_obj, err); + return (2); + } + } + + if (test_n == 1 || test_n == 2) { + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); + err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", err); + return 2; + } + + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"recovery\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("quit") || memcmp (key_value, "quit", value_len) != 0) { + syslog (LOG_INFO, "Recovery key \"%s\" is not \"watchdog\".\n", key_value); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("stopped") || memcmp (key_value, "stopped", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"stopped\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("running") || memcmp (key_value, "running", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"running\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: stop\n", __FUNCTION__, instance_id); + err = sam_stop (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't stop hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("stopped") || memcmp (key_value, "stopped", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"stopped\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: sleeping 5\n", __FUNCTION__, instance_id); + sleep (5); + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("stopped") || memcmp (key_value, "stopped", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"stopped\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: start 2\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("running") || memcmp (key_value, "running", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"running\".\n"); + return (2); + } + + if (test_n == 2) { + syslog (LOG_INFO, "%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id); + sleep (5); + + return (2); + } else { + syslog (LOG_INFO, "%s iid %d: Test HC\n", __FUNCTION__, instance_id); + err = sam_hc_send (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't send hc. Error %d\n", err); + return 2; + } + err = confdb_key_get_typed (cdb_handle, pid_handle, "last_updated", &tstamp1, &value_len, &cdbtype); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + syslog (LOG_INFO, "%s iid %d: Sleep 1\n", __FUNCTION__, instance_id); + sleep (1); + err = sam_hc_send (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't send hc. Error %d\n", err); + return 2; + } + sleep (1); + err = confdb_key_get_typed (cdb_handle, pid_handle, "last_updated", &tstamp2, &value_len, &cdbtype); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + msec_diff = (tstamp2 - tstamp1)/CS_TIME_NS_IN_MSEC; + + if (msec_diff < 500 || msec_diff > 2000) { + syslog (LOG_INFO, "Difference %d is not within <500, 2000> interval.\n", msec_diff); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: stop 2\n", __FUNCTION__, instance_id); + err = sam_stop (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't stop hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("stopped") || memcmp (key_value, "stopped", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"stopped\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: exiting\n", __FUNCTION__, instance_id); + return (0); + } + } + + if (test_n == 3) { + syslog (LOG_INFO, "%s Testing if status is failed\n", __FUNCTION__); + + /* + * Previous should be FAILED + */ + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"failed\".\n"); + return (2); + } + + return (0); + } + + return (2); +} + +/* + * Test confdb integration + restart policy + */ +static int test9 (pid_t pid, pid_t old_pid, int test_n) { + confdb_handle_t cdb_handle; + cs_error_t err; + hdb_handle_t res_handle, proc_handle, pid_handle; + size_t value_len; + char key_value[256]; + unsigned int instance_id; + char tmp_obj[PATH_MAX]; + + err = confdb_initialize (&cdb_handle, NULL); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err); + return (1); + } + + syslog (LOG_INFO, "%s test %d\n", __FUNCTION__, test_n); + + if (test_n == 1) { + syslog (LOG_INFO, "%s: initialize\n", __FUNCTION__); + err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CONFDB); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", err); + return 2; + } + syslog (LOG_INFO, "%s: iid %d\n", __FUNCTION__, instance_id); + + if (instance_id < 3) { + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), + &res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"recovery\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("restart") || memcmp (key_value, "restart", value_len) != 0) { + syslog (LOG_INFO, "Recovery key \"%s\" is not \"restart\".\n", key_value); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("stopped") || memcmp (key_value, "stopped", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"stopped\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("running") || memcmp (key_value, "running", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"running\".\n"); + return (2); + } + + syslog (LOG_INFO, "%s iid %d: waiting for kill\n", __FUNCTION__, instance_id); + sleep (10); + + return (2); + } + + if (instance_id == 3) { + syslog (LOG_INFO, "%s iid %d: mark failed\n", __FUNCTION__, instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + err = sam_mark_failed (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't mark failed. Error %d\n", err); + return 2; + } + + sleep (10); + + return (2); + } + + return (2); + } + + if (test_n == 2) { + syslog (LOG_INFO, "%s Testing if status is failed\n", __FUNCTION__); + + /* + * Previous should be FAILED + */ + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + syslog (LOG_INFO, "Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) { + syslog (LOG_INFO, "State key is not \"failed\".\n"); + return (2); + } + + return (0); + } + + return (2); +} + +static int hc_allways_respond_cb(void) +{ + syslog (LOG_INFO, "%s() -> health check OK.", __FUNCTION__); + return 0; +} + +static int setup_hc (void) +{ + cs_error_t err; + unsigned int instance_id; + + err = sam_initialize (1000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + syslog (LOG_INFO, "%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't register. Error %d\n", err); + return 2; + } + err = sam_hc_callback_register (hc_allways_respond_cb); + + syslog (LOG_INFO, "%s instance id %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + syslog (LOG_ERR, "Can't start hc. Error %d\n", err); + return 2; + } + + return (0); +} + +static int do_test_command(int sock, char* func, char*args[], int num_args) +{ + int err = 0; + pid_t pid; + int stat; pid = fork (); if (pid == -1) { syslog (LOG_ERR, "Can't fork\n"); - send (sock, response, strlen (response) + 1, 0); - return; + return -1; } if (pid == 0) { if (strcmp ("test1", func) == 0) { err = test1 (); } else if (strcmp ("test2", func) == 0) { err = test2 (); - } else if (strcmp ("test3", func) == 0) { - err = test3 (); } else if (strcmp ("test4", func) == 0) { err = test4 (); - } else { - err = -1; - please_wait = 0; - syslog (LOG_ERR,"%s RPC:%s not supported!", __func__, func); - snprintf (response, 100, "%s", NOT_SUPPORTED_STR); + } else if (strcmp ("test5", func) == 0) { + err = test5 (); + } else if (strcmp ("test6", func) == 0) { + err = test6 (); + } else if (strcmp ("test_quorum", func) == 0) { + err = test_quorum (); + } else if (strcmp ("test8", func) == 0) { + err = test8 (getpid(), 0, 1); + } else if (strcmp ("test9", func) == 0) { + err = test9 (getpid(), 0, 1); } + sam_finalize (); + exit(err); } - if (please_wait) { + if (pid > 0) { waitpid (pid, &stat, 0); - if (WEXITSTATUS (stat) == 0) { - snprintf (response, 100, "%s", OK_STR); - } else { - snprintf (response, 100, "%s", FAIL_STR); - } + return WEXITSTATUS (stat); } - send (sock, response, strlen (response) + 1, 0); + return -1; } +static void do_command (int sock, char* func, char*args[], int num_args) +{ + char response[100]; + int err = 0; + + if (parse_debug) { + syslog (LOG_INFO, "RPC:%s() called.", func); + } + if (strncmp ("test", func, 4) == 0) { + err = do_test_command(sock, func, args, num_args); + } else if (strcmp ("setup_hc", func) == 0) { + err = setup_hc (); + } else if (strcmp ("sam_stop", func) == 0) { + sam_stop (); + sam_finalize(); + } else { + err = -1; + syslog (LOG_ERR,"%s RPC:%s not supported!", __func__, func); + snprintf (response, 100, "%s", NOT_SUPPORTED_STR); + } + + if (err == 0) { + snprintf (response, 100, "%s", OK_STR); + } else if (err == 1) { + snprintf (response, 100, "%s", FAIL_STR); + syslog (LOG_ERR, "%s() test skipped?! (%d).", func, err); + } else { + snprintf (response, 100, "%s", FAIL_STR); + syslog (LOG_ERR, "%s() failed (%d).", func, err); + } + send (sock, response, strlen (response) + 1, 0); +} + int main (int argc, char *argv[]) { int ret; openlog (NULL, LOG_CONS|LOG_PID, LOG_DAEMON); syslog (LOG_ERR, "sam_test_agent STARTING"); parse_debug = 1; ret = test_agent_run (9036, do_command); syslog (LOG_ERR, "sam_test_agent EXITING"); return ret; } - - - - - - - - diff --git a/cts/agents/votequorum_test_agent.c b/cts/agents/votequorum_test_agent.c index bbfb52e9..7ebd12f5 100644 --- a/cts/agents/votequorum_test_agent.c +++ b/cts/agents/votequorum_test_agent.c @@ -1,335 +1,339 @@ /* * Copyright (c) 2010 Red Hat Inc * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common_test_agent.h" static quorum_handle_t q_handle = 0; static votequorum_handle_t vq_handle = 0; static void votequorum_notification_fn( votequorum_handle_t handle, uint64_t context, uint32_t quorate, uint32_t node_list_entries, votequorum_node_t node_list[]) { syslog (LOG_INFO, "VQ notification quorate: %d", quorate); } static void quorum_notification_fn( quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { syslog (LOG_INFO, "NQ notification quorate: %d", quorate); } static int vq_dispatch_wrapper_fn (hdb_handle_t handle, int fd, int revents, void *data) { cs_error_t error = votequorum_dispatch (vq_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { syslog (LOG_ERR, "%s() got LIB error disconnecting from corosync.", __func__); poll_dispatch_delete (ta_poll_handle_get(), fd); close (fd); } return 0; } static int q_dispatch_wrapper_fn (hdb_handle_t handle, int fd, int revents, void *data) { cs_error_t error = quorum_dispatch (q_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { syslog (LOG_ERR, "%s() got LIB error disconnecting from corosync.", __func__); poll_dispatch_delete (ta_poll_handle_get(), fd); close (fd); } return 0; } static int q_lib_init(void) { votequorum_callbacks_t vq_callbacks; quorum_callbacks_t q_callbacks; int ret = 0; + int retry = 3; int fd; if (vq_handle == 0) { syslog (LOG_INFO, "votequorum_initialize"); vq_callbacks.votequorum_notify_fn = votequorum_notification_fn; vq_callbacks.votequorum_expectedvotes_notify_fn = NULL; ret = CS_ERR_NOT_EXIST; - while (ret == CS_ERR_NOT_EXIST) { + while (ret == CS_ERR_NOT_EXIST && retry > 0) { ret = votequorum_initialize (&vq_handle, &vq_callbacks); - sleep (1); + if (ret == CS_ERR_NOT_EXIST) { + sleep (1); + retry--; + } } if (ret != CS_OK) { syslog (LOG_ERR, "votequorum_initialize FAILED: %d\n", ret); vq_handle = 0; } else { ret = votequorum_trackstart (vq_handle, vq_handle, CS_TRACK_CHANGES); if (ret != CS_OK) { syslog (LOG_ERR, "votequorum_trackstart FAILED: %d\n", ret); } votequorum_fd_get (vq_handle, &fd); poll_dispatch_add (ta_poll_handle_get(), fd, POLLIN|POLLNVAL, NULL, vq_dispatch_wrapper_fn); } } if (q_handle == 0) { syslog (LOG_INFO, "quorum_initialize"); q_callbacks.quorum_notify_fn = quorum_notification_fn; ret = quorum_initialize (&q_handle, &q_callbacks); if (ret != CS_OK) { syslog (LOG_ERR, "quorum_initialize FAILED: %d\n", ret); q_handle = 0; } else { ret = quorum_trackstart (q_handle, CS_TRACK_CHANGES); if (ret != CS_OK) { syslog (LOG_ERR, "quorum_trackstart FAILED: %d\n", ret); } quorum_fd_get (q_handle, &fd); poll_dispatch_add (ta_poll_handle_get(), fd, POLLIN|POLLNVAL, NULL, q_dispatch_wrapper_fn); } } return ret; } static void lib_init (int sock) { int ret; char response[100]; snprintf (response, 100, "%s", OK_STR); ret = q_lib_init (); if (ret != CS_OK) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "q_lib_init FAILED: %d\n", ret); } send (sock, response, strlen (response), 0); } static void getinfo (int sock) { int ret; struct votequorum_info info; char response[100]; q_lib_init (); ret = votequorum_getinfo(vq_handle, 0, &info); if (ret != CS_OK) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "votequorum_getinfo FAILED: %d\n", ret); goto send_response; } snprintf (response, 100, "%d:%d:%d:%d:%d", info.node_votes, info.node_expected_votes, info.highest_expected, info.total_votes, info.quorum); send_response: send (sock, response, strlen (response), 0); } static void setexpected (int sock, char *arg) { int ret; char response[100]; q_lib_init (); ret = votequorum_setexpected (vq_handle, atoi(arg)); if (ret != CS_OK) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "set expected votes FAILED: %d\n", ret); goto send_response; } snprintf (response, 100, "%s", OK_STR); send_response: votequorum_finalize (vq_handle); send (sock, response, strlen (response) + 1, 0); } static void setvotes (int sock, char *arg) { int ret; char response[100]; q_lib_init (); ret = votequorum_setvotes (vq_handle, 0, atoi(arg)); if (ret != CS_OK) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "set votes FAILED: %d\n", ret); goto send_response; } snprintf (response, 100, "%s", OK_STR); send_response: votequorum_finalize (vq_handle); send (sock, response, strlen (response), 0); } static void getquorate (int sock) { int ret; int quorate; char response[100]; q_lib_init (); ret = quorum_getquorate (q_handle, &quorate); if (ret != CS_OK) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "getquorate FAILED: %d\n", ret); goto send_response; } snprintf (response, 100, "%d", quorate); send_response: send (sock, response, strlen (response), 0); } static void context_test (int sock) { char response[100]; char *cmp; snprintf (response, 100, "%s", OK_STR); votequorum_context_set (vq_handle, response); votequorum_context_get (vq_handle, (void**)&cmp); if (response != cmp) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "votequorum context not the same"); } quorum_context_set (q_handle, response); quorum_context_get (q_handle, (const void**)&cmp); if (response != cmp) { snprintf (response, 100, "%s", FAIL_STR); syslog (LOG_ERR, "quorum context not the same"); } send (sock, response, strlen (response) + 1, 0); } static void do_command (int sock, char* func, char*args[], int num_args) { char response[100]; if (parse_debug) syslog (LOG_DEBUG,"RPC:%s() called.", func); if (strcmp ("votequorum_getinfo", func) == 0) { getinfo (sock); } else if (strcmp ("votequorum_setvotes", func) == 0) { setvotes (sock, args[0]); } else if (strcmp ("votequorum_setexpected", func) == 0) { setexpected (sock, args[0]); } else if (strcmp ("quorum_getquorate", func) == 0) { getquorate (sock); } else if (strcmp ("init", func) == 0) { lib_init (sock); } else if (strcmp ("context_test", func) == 0) { context_test (sock); } else if (strcmp ("are_you_ok_dude", func) == 0) { snprintf (response, 100, "%s", OK_STR); send (sock, response, strlen (response) + 1, 0); } else { syslog (LOG_ERR,"%s RPC:%s not supported!", __func__, func); snprintf (response, 100, "%s", NOT_SUPPORTED_STR); send (sock, response, strlen (response), 0); } } int main (int argc, char *argv[]) { int ret; openlog (NULL, LOG_CONS|LOG_PID, LOG_DAEMON); syslog (LOG_ERR, "votequorum_test_agent STARTING"); parse_debug = 1; ret = test_agent_run (9037, do_command); syslog (LOG_ERR, "votequorum_test_agent EXITING"); return ret; } diff --git a/cts/corosync.py b/cts/corosync.py index 84323739..70dff3c7 100644 --- a/cts/corosync.py +++ b/cts/corosync.py @@ -1,619 +1,630 @@ '''CTS: Cluster Testing System: corosync... ''' __copyright__=''' Copyright (c) 2010 Red Hat, Inc. ''' # All rights reserved. # # Author: Angus Salkeld # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. import os import sys import time import socket import shutil import string import augeas from cts.CTS import ClusterManager from cts.CTSscenarios import ScenarioComponent from cts.CTS import RemoteExec from cts.CTSvars import CTSvars ################################################################### class CoroConfig(object): def __init__(self, corobase=None): self.base = "/files/etc/corosync/corosync.conf/" self.new_root = "/tmp/aug-root/" if corobase == None: self.corobase = os.getcwd() + "/.." else: self.corobase = corobase example = self.corobase + "/conf/corosync.conf.example" if os.path.isdir(self.new_root): shutil.rmtree (self.new_root) os.makedirs (self.new_root + "/etc/corosync") shutil.copy (example, self.new_root + "/etc/corosync/corosync.conf") self.aug = augeas.Augeas (root=self.new_root, loadpath=self.corobase + "/conf/lenses") self.original = {} # store the original values (of totem), so we can restore them in # apply_default_config() totem = self.aug.match('/files/etc/corosync/corosync.conf/totem/*') for c in totem: # /files/etc/corosync/corosync.conf/ short_name = c[len(self.base):] self.original[short_name] = self.aug.get(c) interface = self.aug.match('/files/etc/corosync/corosync.conf/totem/interface/*') for c in interface: short_name = c[len(self.base):] self.original[short_name] = self.aug.get(c) def get (self, name): return self.aug.get (self.base + name) def set (self, name, value): token = self.aug.set (self.base + name, str(value)) def save (self): self.aug.save() def get_filename(self): return self.new_root + "/etc/corosync/corosync.conf" ################################################################### class corosync_flatiron(ClusterManager): ''' bla ''' def __init__(self, Environment, randseed=None): ClusterManager.__init__(self, Environment, randseed) self.update({ "Name" : "corosync(flatiron)", "StartCmd" : CTSvars.INITDIR+"/corosync start", "StopCmd" : CTSvars.INITDIR+"/corosync stop", "RereadCmd" : CTSvars.INITDIR+"/corosync reload", "StatusCmd" : CTSvars.INITDIR+"/corosync status %s", "DeadTime" : 30, "StartTime" : 15, # Max time to start up "StableTime" : 10, "BreakCommCmd" : "/usr/share/corosync/tests/net_breaker.sh BreakCommCmd %s", "FixCommCmd" : "/usr/share/corosync/tests/net_breaker.sh FixCommCmd %s", "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status.*", "Pat:They_stopped" : "%s.*Member left:.*%s.*", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:Local_starting" : "%s.*started and ready to provide service.", "Pat:Local_started" : "%s.*started and ready to provide service.", "Pat:Master_started" : "%s.*Completed service synchronization, ready to provide service.", "Pat:Slave_started" : "%s.*Completed service synchronization, ready to provide service.", "Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s", "Pat:ChildExit" : "Child process .* exited", "Pat:DC_IDLE" : ".*A processor joined or left the membership and a new membership was formed.", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"core dump", r"Could not bind AF_UNIX", ), "LogFileName" : Environment["LogFileName"], }) self.start_cpg = True self.cpg_agent = {} self.confdb_agent = {} self.sam_agent = {} self.votequorum_agent = {} self.config = CoroConfig () self.node_to_ip = {} self.new_config = {} self.new_config['service[1]/name'] = 'corosync_tst_sv2' self.new_config['service[1]/ver'] = '0' self.applied_config = {} for n in self.Env["nodes"]: ip = socket.gethostbyname(n) ips = ip.split('.') ips[3] = '0' ip_mask = '.'.join(ips) self.new_config['totem/interface/bindnetaddr'] = str(ip_mask) return def apply_default_config(self): for c in self.applied_config: if 'bindnetaddr' in c: continue elif not self.config.original.has_key(c): # new config option (non default) pass elif self.applied_config[c] is not self.config.original[c]: # reset to the original self.new_config[c] = self.config.original[c] if len(self.new_config) > 0: self.debug('applying default config') self.stopall() def apply_new_config(self): if len(self.new_config) > 0: self.debug('applying new config') self.stopall() self.startall() def install_all_config(self): tmp1 = {} for c in self.new_config: self.log('configuring: ' + c + ' = '+ str(self.new_config[c])) self.config.set (c, self.new_config[c]) self.applied_config[c] = self.new_config[c] tmp1[c] = self.new_config[c] for c in tmp1: del self.new_config[c] self.config.save() src_file = self.config.get_filename() for node in self.Env["nodes"]: self.rsh.cp(src_file, "%s:%s" % (node, "/etc/corosync/")) def install_config(self, node): # install gets new_config and installs it, then moves the # config to applied_config if len(self.new_config) > 0: self.install_all_config() def key_for_node(self, node): if not self.node_to_ip.has_key(node): self.node_to_ip[node] = socket.gethostbyname (node) return self.node_to_ip[node] def StartaCM(self, node): if not self.ShouldBeStatus.has_key(node): self.ShouldBeStatus[node] = "down" if self.ShouldBeStatus[node] != "down": return 1 self.debug('starting corosync on : ' + node) ret = ClusterManager.StartaCM(self, node) if self.start_cpg: if self.cpg_agent.has_key(node): self.cpg_agent[node].restart() else: self.cpg_agent[node] = CpgTestAgent(node, self.Env) self.cpg_agent[node].start() if self.confdb_agent.has_key(node): self.confdb_agent[node].restart() if self.sam_agent.has_key(node): self.sam_agent[node].restart() # votequorum agent started as needed. if self.applied_config.has_key('quorum/provider'): - if self.votequorum_agent.has_key(node): - self.votequorum_agent[node].restart() - else: - self.votequorum_agent[node] = VoteQuorumTestAgent(node, self.Env) - self.votequorum_agent[node].start() + if self.applied_config['quorum/provider'] is 'corosync_votequorum': + if self.votequorum_agent.has_key(node): + self.votequorum_agent[node].restart() + else: + self.votequorum_agent[node] = VoteQuorumTestAgent(node, self.Env) + self.votequorum_agent[node].start() return ret def StopaCM(self, node): if self.ShouldBeStatus[node] != "up": return 1 self.debug('stoping corosync on : ' + node) if self.cpg_agent.has_key(node): self.cpg_agent[node].stop() if self.sam_agent.has_key(node): self.sam_agent[node].stop() if self.votequorum_agent.has_key(node): self.votequorum_agent[node].stop() return ClusterManager.StopaCM(self, node) def test_node_CM(self, node): # 2 - up and stable # 1 - unstable # 0 - down out = self.rsh(node, self["StatusCmd"], 1) is_stopped = string.find(out, 'stopped') is_dead = string.find(out, 'dead') ret = (is_dead is -1 and is_stopped is -1) try: if ret: ret = 2 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s" % (node, "up", self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node] = "up" else: self.ShouldBeStatus[node] = "down" return ret def StataCM(self, node): '''Report the status of corosync on a given node''' if self.test_node_CM(node) > 0: return 1 else: return None def RereadCM(self, node): self.log('reloading corosync on : ' + node) return ClusterManager.RereadCM(self, node) def find_partitions(self): ccm_partitions = [] return ccm_partitions def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self["QuorumCmd"], 1) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.log("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): return None ################################################################### class TestAgentComponent(ScenarioComponent): def __init__(self, Env): self.Env = Env def IsApplicable(self): '''Return TRUE if the current ScenarioComponent is applicable in the given LabEnvironment given to the constructor. ''' return True def SetUp(self, CM): '''Set up the given ScenarioComponent''' self.CM = CM for node in self.Env["nodes"]: if not CM.StataCM(node): raise RuntimeError ("corosync not up") if self.CM.start_cpg: self.CM.cpg_agent[node] = CpgTestAgent(node, CM.Env) self.CM.cpg_agent[node].start() self.CM.confdb_agent[node] = ConfdbTestAgent(node, CM.Env) self.CM.confdb_agent[node].start() self.CM.sam_agent[node] = SamTestAgent(node, CM.Env) self.CM.sam_agent[node].start() # votequorum agent started as needed. - if CM.applied_config.has_key('quorum/provider'): - self.CM.votequorum_agent[node] = VoteQuorumTestAgent(node, CM.Env) - self.CM.votequorum_agent[node].start() + if self.CM.applied_config.has_key('quorum/provider'): + if CM.applied_config['quorum/provider'] is 'corosync_votequorum': + self.CM.votequorum_agent[node] = VoteQuorumTestAgent(node, CM.Env) + self.CM.votequorum_agent[node].start() return 1 def TearDown(self, CM): '''Tear down (undo) the given ScenarioComponent''' self.CM = CM for node in self.Env["nodes"]: if self.CM.cpg_agent.has_key(node): self.CM.cpg_agent[node].stop() self.CM.confdb_agent[node].stop() self.CM.sam_agent[node].stop() if self.CM.votequorum_agent.has_key(node): self.CM.votequorum_agent[node].stop() ################################################################### class TestAgent(object): def __init__(self, binary, node, port, env=None): self.node = node self.node_address = None self.port = port self.sock = None self.binary = binary self.started = False self.rsh = RemoteExec(Env=env) self.func_name = None self.used = False self.env = env self.send_recv = False def restart(self): self.stop() self.start() def clean_start(self): if self.used or not self.status(): self.env.debug('test agent: cleaning %s on node %s' % (self.binary, self.node)) self.stop() self.start() def status(self): if not self.started: return False try: self.send (["are_you_ok_dude"]) self.read () self.started = True return True except RuntimeError, msg: self.started = False return False def start(self): '''Set up the given ScenarioComponent''' self.env.debug('test agent: starting %s on node %s' % (self.binary, self.node)) self.sock = socket.socket (socket.AF_INET, socket.SOCK_STREAM) ip = socket.gethostbyname(self.node) self.rsh(self.node, self.binary, blocking=0) is_connected = False retries = 0 while not is_connected: try: retries = retries + 1 self.sock.connect ((ip, self.port)) is_connected = True except socket.error, msg: if retries > 5: self.env.log("Retried " + str(retries) + " times. Error: " + str(msg)) time.sleep(1) self.started = True self.used = False def stop(self): '''Tear down (undo) the given ScenarioComponent''' self.env.debug('test agent: stopping %s on node %s' % (self.binary, self.node)) self.sock.close () self.rsh(self.node, "killall " + self.binary + " 2>/dev/null") self.started = False + def kill(self): + '''Tear down (undo) the given ScenarioComponent''' + self.env.debug('test agent: killing %s on node %s' % (self.binary, self.node)) + self.rsh(self.node, "killall -9 " + self.binary + " 2>/dev/null") + self.started = False + + def getpid(self): + return self.rsh(self.node, 'pidof ' + self.binary, 1) + def send (self, args): if not self.started: self.start() real_msg = str (len (args)) for a in args: a_str = str(a) real_msg += ":" + str (len (a_str)) + ":" + a_str real_msg += ";" sent = 0 try: sent = self.sock.send (real_msg) except socket.error, msg: self.env.debug("send(%s): %s; error: %s" % (self.node, real_msg, msg)) if sent == 0: raise RuntimeError ("socket connection broken") self.used = True def __getattribute__(self,name): try: return object.__getattribute__(self, name) except: self.func_name = name if self.send_recv: return self.send_recv_dynamic else: return self.send_dynamic def send_recv_dynamic (self, *args): self.send_dynamic (args) try: res = self.read () except RuntimeError, msg: - self.env.log("send_recv_dynamic: %s; error: %s" % (str(real_msg), msg)) + self.env.log("send_recv_dynamic: %s(); error: %s" % (self.func_name, msg)) return res def send_dynamic (self, *args): if not self.started: self.start() # number of args+func real_msg = str (len (args) + 1) + ":" + str(len(self.func_name)) + ":" + self.func_name for a in args: a_str = str(a) real_msg += ":" + str (len (a_str)) + ":" + a_str real_msg += ";" sent = 0 try: sent = self.sock.send (real_msg) except socket.error, msg: self.env.debug("send_dynamic(%s): %s; error: %s" % (self.node, real_msg, msg)) if sent == 0: raise RuntimeError ("socket connection broken") self.used = True def read (self): msg = self.sock.recv (4096) if msg == '': raise RuntimeError("socket connection broken") return msg class CpgConfigEvent: def __init__(self, msg): info = msg.split(',') self.group_name = info[0] self.node_id = info[1] self.node = None self.pid = info[2] if "left" in info[3]: self.is_member = False else: self.is_member = True def __str__ (self): str = self.group_name + "," + self.node_id + "," + self.pid + "," if self.is_member: return str + "joined" else: return str + "left" ################################################################### class CpgTestAgent(TestAgent): def __init__(self, node, Env=None): TestAgent.__init__(self, "cpg_test_agent", node, 9034, env=Env) self.initialized = False self.nodeid = None def start(self): if not self.started: TestAgent.start(self) self.cpg_initialize() self.used = False def stop(self): try: if self.started: self.cpg_finalize() except RuntimeError, msg: # if cpg_agent is down, we are not going to stress self.env.debug("CpgTestAgent::cpg_finalize() - %s" % msg) TestAgent.stop(self) def cpg_local_get(self): if self.nodeid == None: self.send (["cpg_local_get"]) self.nodeid = self.read () return self.nodeid def record_config_events(self, truncate=True): if truncate: self.send (["record_config_events", "truncate"]) else: self.send (["record_config_events", "append"]) return self.read () def read_config_event(self): self.send (["read_config_event"]) msg = self.read () if "None" in msg: return None else: return CpgConfigEvent(msg) def read_messages(self, atmost): self.send (["read_messages", atmost]) msg = self.read () if "None" in msg: return None else: return msg def context_test(self): self.send (["context_test"]) return self.read () ################################################################### class ConfdbTestAgent(TestAgent): def __init__(self, node, Env=None): TestAgent.__init__(self, "confdb_test_agent", node, 9035, env=Env) self.initialized = False self.nodeid = None self.send_recv = True ################################################################### class SamTestAgent(TestAgent): def __init__(self, node, Env=None): TestAgent.__init__(self, "sam_test_agent", node, 9036, env=Env) self.initialized = False self.nodeid = None self.send_recv = True ################################################################### class VoteQuorumTestAgent(TestAgent): def __init__(self, node, Env=None): TestAgent.__init__(self, "votequorum_test_agent", node, 9037, env=Env) self.initialized = False self.nodeid = None self.send_recv = True def start(self): if not self.started: TestAgent.start(self) self.init() self.used = False diff --git a/cts/corotests.py b/cts/corotests.py index 22d8c79f..514ca3af 100644 --- a/cts/corotests.py +++ b/cts/corotests.py @@ -1,1208 +1,1646 @@ __copyright__=''' Copyright (c) 2010 Red Hat, Inc. ''' # All rights reserved. # # Author: Angus Salkeld # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. +import random from UserDict import UserDict from cts.CTStests import * from corosync import CpgTestAgent ################################################################### class CoroTest(CTSTest): ''' basic class to make sure that new configuration is applied and old configuration is removed. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.start = StartTest(cm) self.stop = StopTest(cm) self.config = {} self.need_all_up = True self.CM.start_cpg = True def setup(self, node): ret = CTSTest.setup(self, node) # setup the authkey localauthkey = '/tmp/authkey' if not os.path.exists(localauthkey): self.CM.rsh(node, 'corosync-keygen') self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey) for n in self.CM.Env["nodes"]: if n is not node: #copy key onto other nodes self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey")) # copy over any new config for c in self.config: self.CM.new_config[c] = self.config[c] # apply the config self.CM.apply_new_config() # start/stop all corosyncs' for n in self.CM.Env["nodes"]: if self.need_all_up and not self.CM.StataCM(n): self.incr("started") self.start(n) if self.need_all_up and self.CM.start_cpg: self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.name) self.CM.cpg_agent[n].cfg_initialize() if not self.need_all_up and self.CM.StataCM(n): self.incr("stopped") self.stop(n) return ret def config_valid(self, config): return True def teardown(self, node): self.CM.apply_default_config() return CTSTest.teardown(self, node) ################################################################### class CpgContextTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="CpgContextTest" self.CM.start_cpg = True def __call__(self, node): self.incr("calls") res = self.CM.cpg_agent[node].context_test() if 'OK' in res: return self.success() else: return self.failure('context_test failed') ################################################################### class CpgConfigChangeBase(CoroTest): ''' join a cpg group on each node, and test that the following causes a leave event: - a call to cpg_leave() - app exit - node leave - node leave (with large token timeout) ''' def setup(self, node): ret = CoroTest.setup(self, node) self.listener = None self.wobbly = None for n in self.CM.Env["nodes"]: if self.wobbly is None: self.wobbly = n elif self.listener is None: self.listener = n if self.CM.cpg_agent.has_key(self.wobbly): self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get() if self.CM.cpg_agent.has_key(self.listener): self.CM.cpg_agent[self.listener].record_config_events(truncate=True) return ret def wait_for_config_change(self): found = False max_timeout = 30 * 60 waited = 0 printit = 0 self.CM.log("Waiting for config change on " + self.listener) while not found: try: event = self.CM.cpg_agent[self.listener].read_config_event() except: return self.failure('connection to test cpg_agent failed.') if not event == None: self.CM.debug("RECEIVED: " + str(event)) if event == None: if waited >= max_timeout: return self.failure("timedout(" + str(waited) + " sec) == no event!") else: time.sleep(1) waited = waited + 1 printit = printit + 1 if printit is 60: print 'waited ' + str(waited) + ' seconds' printit = 0 elif str(event.node_id) in str(self.wobbly_id) and not event.is_member: self.CM.log("Got the config change in " + str(waited) + " seconds") found = True else: self.CM.debug("No match") self.CM.debug("wobbly nodeid:" + str(self.wobbly_id)) self.CM.debug("event nodeid:" + str(event.node_id)) self.CM.debug("event.is_member:" + str(event.is_member)) if found: return self.success() ################################################################### class CpgCfgChgOnGroupLeave(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnGroupLeave" def failure_action(self): self.CM.log("calling cpg_leave() on " + self.wobbly) self.CM.cpg_agent[self.wobbly].cpg_leave(self.name) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnNodeLeave(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeLeave" def failure_action(self): self.CM.log("stopping corosync on " + self.wobbly) self.stop(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnLowestNodeJoin(CTSTest): ''' 1) stop all nodes 2) start all but the node with the smallest ip address 3) start recording events 4) start the last node ''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="CpgCfgChgOnLowestNodeJoin" self.start = StartTest(cm) self.stop = StopTest(cm) self.config = {} self.need_all_up = False self.config['compatibility'] = 'none' def config_valid(self, config): return True def lowest_ip_set(self): self.lowest = None for n in self.CM.Env["nodes"]: if self.lowest is None: self.lowest = n self.CM.log("lowest node is " + self.lowest) def setup(self, node): # stop all nodes for n in self.CM.Env["nodes"]: self.CM.StopaCM(n) self.lowest_ip_set() # copy over any new config for c in self.config: self.CM.new_config[c] = self.config[c] # install the config self.CM.install_all_config() # start all but lowest self.listener = None for n in self.CM.Env["nodes"]: if n is not self.lowest: if self.listener is None: self.listener = n self.incr("started") self.CM.log("starting " + n) self.start(n) self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.name) # start recording events pats = [] pats.append("%s .*sync: node joined.*" % self.listener) pats.append("%s .*sync: activate correctly.*" % self.listener) self.sync_log = self.create_watch(pats, 60) self.sync_log.setwatch() self.CM.log("setup done") return CTSTest.setup(self, node) def __call__(self, node): self.incr("calls") self.start(self.lowest) self.CM.cpg_agent[self.lowest].clean_start() self.CM.cpg_agent[self.lowest].cpg_join(self.name) self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get() self.CM.log("waiting for sync events") if not self.sync_log.lookforall(): return self.failure("Patterns not found: " + repr(self.sync_log.unmatched)) else: return self.success() ################################################################### class CpgCfgChgOnExecCrash(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnExecCrash" def failure_action(self): self.CM.log("sending KILL to corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeIsolate" def config_valid(self, config): if config.has_key('totem/rrp_mode'): return False else: return True def failure_action(self): self.CM.log("isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() def teardown(self, node): self.CM.unisolate_node (self.wobbly) return CpgConfigChangeBase.teardown(self, node) ################################################################### class CpgCfgChgOnNodeRestart(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeRestart" self.CM.start_cpg = False def config_valid(self, config): if config.has_key('totem/secauth'): if config['totem/secauth'] is 'on': return False else: return True if config.has_key('totem/rrp_mode'): return False else: return True def failure_action(self): self.CM.log("2: isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) self.CM.log("3: Killing corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" self.CM.log("4: unisolating node " + self.wobbly) self.CM.unisolate_node (self.wobbly) self.CM.log("5: starting corosync on " + self.wobbly) self.CM.StartaCM(self.wobbly) time.sleep(5) self.CM.log("6: starting cpg on all nodes") self.CM.start_cpg = True for node in self.CM.Env["nodes"]: self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env) self.CM.cpg_agent[node].start() self.CM.cpg_agent[node].cpg_join(self.name) self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get() self.CM.cpg_agent[self.listener].record_config_events(truncate=True) self.CM.log("7: isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) self.CM.log("8: Killing corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" self.CM.log("9: unisolating node " + self.wobbly) self.CM.unisolate_node (self.wobbly) self.CM.log("10: starting corosync on " + self.wobbly) self.CM.StartaCM(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() def teardown(self, node): self.CM.unisolate_node (self.wobbly) return CpgConfigChangeBase.teardown(self, node) ################################################################### class CpgMsgOrderBase(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.num_msgs_per_node = 0 self.total_num_msgs = 0 def setup(self, node): ret = CoroTest.setup(self, node) for n in self.CM.Env["nodes"]: self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.name) self.CM.cpg_agent[n].record_messages() time.sleep(1) return ret def cpg_msg_blaster(self): for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node) def wait_and_validate_order(self): msgs = {} for n in self.CM.Env["nodes"]: msgs[n] = [] stopped = False waited = 0 while len(msgs[n]) < self.total_num_msgs and waited < 360: msg = self.CM.cpg_agent[n].read_messages(50) if not msg == None: msgl = msg.split(";") # remove empty entries not_done=True while not_done: try: msgl.remove('') except: not_done = False msgs[n].extend(msgl) elif msg == None: time.sleep(2) waited = waited + 2 if len(msgs[n]) < self.total_num_msgs: return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n]))) fail = False error_message = '' for i in range(0, self.total_num_msgs): first = None for n in self.CM.Env["nodes"]: # first test for errors params = msgs[n][i].split(":") if not 'OK' in params[3]: fail = True error_message = 'error: ' + params[3] + ' in received message' self.CM.log(str(params)) # then look for out of order messages if first == None: first = n else: if not msgs[first][i] == msgs[n][i]: # message order not the same! fail = True error_message = 'message out of order' self.CM.log(msgs[first][i] + " != " + msgs[n][i]) if fail: return self.failure(error_message) else: return self.success() ################################################################### class CpgMsgOrderBasic(CpgMsgOrderBase): ''' each sends & logs lots of messages ''' def __init__(self, cm): CpgMsgOrderBase.__init__(self,cm) self.name="CpgMsgOrderBasic" self.num_msgs_per_node = 9000 def __call__(self, node): self.incr("calls") for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node) return self.wait_and_validate_order() ################################################################### class CpgMsgOrderZcb(CpgMsgOrderBase): ''' each sends & logs lots of messages ''' def __init__(self, cm): CpgMsgOrderBase.__init__(self,cm) self.name="CpgMsgOrderZcb" self.num_msgs_per_node = 9000 def __call__(self, node): self.incr("calls") for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node) return self.wait_and_validate_order() ################################################################### class MemLeakObject(CoroTest): ''' run mem_leak_test.sh -1 ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="MemLeakObject" def __call__(self, node): self.incr("calls") mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1") if mem_leaked is 0: return self.success() else: return self.failure(str(mem_leaked) + 'kB memory leaked.') ################################################################### class MemLeakSession(CoroTest): ''' run mem_leak_test.sh -2 ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="MemLeakSession" def __call__(self, node): self.incr("calls") mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2") if mem_leaked is 0: return self.success() else: return self.failure(str(mem_leaked) + 'kB memory leaked.') ################################################################### class ServiceLoadTest(CoroTest): ''' Test loading and unloading of service engines ''' def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ServiceLoadTest" def is_loaded(self, node): check = 'corosync-objctl runtime.services. | grep evs' (res, out) = self.CM.rsh(node, check, stdout=2) if res is 0: return True else: return False def service_unload(self, node): # unload evs pats = [] pats.append("%s .*Service engine unloaded: corosync extended.*" % node) unloaded = self.create_watch(pats, 60) unloaded.setwatch() self.CM.rsh(node, 'corosync-cfgtool -u corosync_evs') if not unloaded.lookforall(): self.CM.log("Patterns not found: " + repr(unloaded.unmatched)) self.error_message = "evs service not unloaded" return False if self.is_loaded(node): self.error_message = "evs has been unload, why are it's session objects are still there?" return False return True def service_load(self, node): # now reload it. pats = [] pats.append("%s .*Service engine loaded.*" % node) loaded = self.create_watch(pats, 60) loaded.setwatch() self.CM.rsh(node, 'corosync-cfgtool -l corosync_evs') if not loaded.lookforall(): self.CM.log("Patterns not found: " + repr(loaded.unmatched)) self.error_message = "evs service not unloaded" return False return True def __call__(self, node): self.incr("calls") should_be_loaded = True if self.is_loaded(node): ret = self.service_unload(node) should_be_loaded = False else: ret = self.service_load(node) should_be_loaded = True if not ret: return self.failure(self.error_message) if self.is_loaded(node): ret = self.service_unload(node) else: ret = self.service_load(node) if not ret: return self.failure(self.error_message) return self.success() ################################################################### class ConfdbReplaceTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ConfdbReplaceTest" def __call__(self, node): self.incr("calls") res = self.CM.confdb_agent[node].set_get_test() if 'OK' in res: return self.success() else: return self.failure('set_get_test failed') ################################################################### class ConfdbContextTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ConfdbContextTest" def __call__(self, node): self.incr("calls") res = self.CM.confdb_agent[node].context_test() if 'OK' in res: return self.success() else: return self.failure('context_test failed') ################################################################### class ConfdbIncrementTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ConfdbIncrementTest" def __call__(self, node): self.incr("calls") res = self.CM.confdb_agent[node].increment_decrement_test() if 'OK' in res: return self.success() else: return self.failure('increment_decrement_test failed') ################################################################### class ConfdbObjectFindTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ConfdbObjectFindTest" def __call__(self, node): self.incr("calls") res = self.CM.confdb_agent[node].object_find_test() if 'OK' in res: return self.success() else: return self.failure('object_find_test failed') ################################################################### class ConfdbNotificationTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="ConfdbNotificationTest" def __call__(self, node): self.incr("calls") res = self.CM.confdb_agent[node].notification_test() if 'OK' in res: return self.success() else: return self.failure('notification_test failed') ################################################################### class SamTest1(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest1" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test1() if 'OK' in res: return self.success() else: - return self.failure('sam test 1 failed') + return self.failure(self.name + ' failed') ################################################################### class SamTest2(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest2" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test2() if 'OK' in res: return self.success() else: - return self.failure('sam test 2 failed') + return self.failure(self.name + ' failed') ################################################################### -class SamTest3(CoroTest): +class SamTest4(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) - self.name="SamTest3" + self.name="SamTest4" def __call__(self, node): self.incr("calls") - res = self.CM.sam_agent[node].test3() + res = self.CM.sam_agent[node].test4() if 'OK' in res: return self.success() else: - return self.failure('sam test 3 failed') + return self.failure(self.name + ' failed') ################################################################### -class SamTest4(CoroTest): +class SamTest5(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) - self.name="SamTest4" + self.name="SamTest5" def __call__(self, node): self.incr("calls") - res = self.CM.sam_agent[node].test4() + res = self.CM.sam_agent[node].test5() if 'OK' in res: return self.success() else: - return self.failure('sam test 4 failed') + return self.failure(self.name + ' failed') +################################################################### +class SamTest6(CoroTest): + def __init__(self, cm): + CoroTest.__init__(self, cm) + self.name="SamTest6" + + def __call__(self, node): + self.incr("calls") + res = self.CM.sam_agent[node].test6() + if 'OK' in res: + return self.success() + else: + return self.failure(self.name + ' failed') + +################################################################### +class SamTestQuorum(CoroTest): + def __init__(self, cm): + CoroTest.__init__(self, cm) + self.name="SamTestQuorum" + self.config['quorum/provider'] = 'testquorum' + self.config['quorum/quorate'] = '1' + + def __call__(self, node): + self.incr("calls") + res = self.CM.sam_agent[node].test_quorum() + if 'OK' in res: + return self.success() + else: + return self.failure(self.name + ' failed') + +################################################################### +class SamTest8(CoroTest): + def __init__(self, cm): + CoroTest.__init__(self, cm) + self.name="SamTest8" + + def __call__(self, node): + self.incr("calls") + res = self.CM.sam_agent[node].test8() + if 'OK' in res: + return self.success() + else: + return self.failure(self.name + ' failed') + +################################################################### +class SamTest9(CoroTest): + def __init__(self, cm): + CoroTest.__init__(self, cm) + self.name="SamTest9" + + def __call__(self, node): + self.incr("calls") + res = self.CM.sam_agent[node].test9() + if 'OK' in res: + return self.success() + else: + return self.failure(self.name + ' failed') class QuorumState(object): def __init__(self, cm, node): self.node = node self.CM = cm self.CM.votequorum_agent[self.node].init() def refresh(self): info = self.CM.votequorum_agent[self.node].votequorum_getinfo() assert(info != 'FAIL') assert(info != 'NOT_SUPPORTED') #self.CM.log('refresh: ' + info) params = info.split(':') self.node_votes = int(params[0]) self.expected_votes = int(params[1]) self.highest_expected = int(params[2]) self.total_votes = int(params[3]) self.quorum = int(params[4]) self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate() assert(self.quorate != 'FAIL') assert(self.quorate != 'NOT_SUPPORTED') #self.CM.log('quorate: ' + str(self.quorate)) ################################################################### class VoteQuorumBase(CoroTest): def setup(self, node): ret = CoroTest.setup(self, node) self.id_map = {} self.listener = None for n in self.CM.Env["nodes"]: if self.listener is None: self.listener = n if self.need_all_up: self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.name) self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get() return ret def config_valid(self, config): if config.has_key('totem/rrp_mode'): return False if config.has_key('quorum/provider'): return False return True ################################################################### class VoteQuorumGoDown(VoteQuorumBase): # all up # calc min expected votes to get Q # bring nodes down one-by-one # confirm cluster looses Q when V < EV # def __init__(self, cm): VoteQuorumBase.__init__(self, cm) self.name="VoteQuorumGoDown" self.victims = [] self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected #self.CM.log('set expected to %d' % (self.expected)) def __call__(self, node): self.incr("calls") pats = [] pats.append("%s .*VQ notification quorate: 0" % self.listener) pats.append("%s .*NQ notification quorate: 0" % self.listener) quorum = self.create_watch(pats, 30) quorum.setwatch() state = QuorumState(self.CM, self.listener) state.refresh() for n in self.CM.Env["nodes"]: if n is self.listener: continue self.victims.append(n) self.CM.StopaCM(n) #if not self.wait_for_quorum_change(): # return self.failure(self.error_message) nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims) state.refresh() #self.expected = self.expected - 1 if state.node_votes != 1: self.failure('unexpected number of node_votes') if state.expected_votes != self.expected: self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected)) self.failure('unexpected number of expected_votes') if state.total_votes != nodes_alive: self.failure('unexpected number of total votes:%d, nodes_alive:%d', (state.total_votes, nodes_alive)) min = ((len(self.CM.Env["nodes"]) + 2) / 2) if min != state.quorum: self.failure('we should have %d (not %d) as quorum' % (min, state.quorum)) if nodes_alive < state.quorum: if state.quorate == 1: self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive)) else: if state.quorate == 0: self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive)) if not quorum.lookforall(): self.CM.log("Patterns not found: " + repr(quorum.unmatched)) return self.failure('quorm event not found') return self.success() # all down # calc min expected votes to get Q # bring nodes up one-by-one # confirm cluster gains Q when V >= EV # ################################################################### class VoteQuorumGoUp(VoteQuorumBase): # all up # calc min expected votes to get Q # bring nodes down one-by-one # confirm cluster looses Q when V < EV # def __init__(self, cm): VoteQuorumBase.__init__(self, cm) self.name="VoteQuorumGoUp" self.need_all_up = False self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected #self.CM.log('set expected to %d' % (self.expected)) def __call__(self, node): self.incr("calls") pats = [] pats.append("%s .*VQ notification quorate: 1" % self.listener) pats.append("%s .*NQ notification quorate: 1" % self.listener) quorum = self.create_watch(pats, 30) quorum.setwatch() self.CM.StartaCM(self.listener) nodes_alive = 1 state = QuorumState(self.CM, self.listener) state.refresh() for n in self.CM.Env["nodes"]: if n is self.listener: continue #if not self.wait_for_quorum_change(): # return self.failure(self.error_message) if state.node_votes != 1: self.failure('unexpected number of node_votes') if state.expected_votes != self.expected: self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected)) self.failure('unexpected number of expected_votes') if state.total_votes != nodes_alive: self.failure('unexpected number of total votes') min = ((len(self.CM.Env["nodes"]) + 2) / 2) if min != state.quorum: self.failure('we should have %d (not %d) as quorum' % (min, state.quorum)) if nodes_alive < state.quorum: if state.quorate == 1: self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive)) else: if state.quorate == 0: self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive)) self.CM.StartaCM(n) nodes_alive = nodes_alive + 1 state.refresh() if not quorum.lookforall(): self.CM.log("Patterns not found: " + repr(quorum.unmatched)) return self.failure('quorm event not found') return self.success() ################################################################### class VoteQuorumContextTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="VoteQuorumContextTest" self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected def __call__(self, node): self.incr("calls") res = self.CM.votequorum_agent[node].context_test() if 'OK' in res: return self.success() else: return self.failure('context_test failed') ################################################################### class GenSimulStart(CoroTest): '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenSimulStart" self.need_all_up = False self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() ################################################################### class GenSimulStop(CoroTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenSimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.need_all_up = True def __call__(self, dummy): '''Perform the 'GenSimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() ################################################################### class GenStopAllBeekhof(CoroTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenStopAllBeekhof" self.need_all_up = True def __call__(self, node): '''Perform the 'GenStopAllBeekhof' test. ''' self.incr("calls") stopping = int(time.time()) for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].pcmk_test() self.CM.cpg_agent[n].msg_blaster(10000) self.CM.cpg_agent[n].cfg_shutdown() self.CM.ShouldBeStatus[n] = "down" waited = 0 max_wait = 60 still_up = list(self.CM.Env["nodes"]) while len(still_up) > 0: waited = int(time.time()) - stopping self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited)) if waited > max_wait: break time.sleep(3) for v in self.CM.Env["nodes"]: if v in still_up: self.CM.ShouldBeStatus[n] = "down" if not self.CM.StataCM(v): still_up.remove(v) waited = int(time.time()) - stopping if waited > max_wait: for v in still_up: self.CM.log("%s killing corosync on %s" % (self.name, v)) self.CM.rsh(v, 'killall -SIGSEGV corosync cpg_test_agent') return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up))) self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited)) return self.success() +################################################################### +class NoWDConfig(CoroTest): + '''Assertion: no config == no watchdog +Setup: no config, kmod inserted +1] make sure watchdog is not enabled +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="NoWDConfig" + self.need_all_up = False + + def config_valid(self, config): + return not config.has_key('resources') + + def __call__(self, node): + '''Perform the 'NoWDConfig' test. ''' + self.incr("calls") + + self.CM.StopaCM(node) + pats = [] + pats.append("%s .*no resources configured." % node) + w = self.create_watch(pats, 60) + w.setwatch() + + self.CM.StartaCM(node) + if not w.lookforall(): + return self.failure("Patterns not found: " + repr(w.unmatched)) + else: + return self.success() + +################################################################### +class WDConfigNoWd(CoroTest): + '''Assertion: watchdog config but no watchdog kmod will emit a log +Setup: config watchdog, but no kmod +1] look in the log for warning that there is no kmod +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="WDConfigNoWd" + self.need_all_up = False + + def __call__(self, node): + '''Perform the 'WDConfigNoWd' test. ''' + self.incr("calls") + + self.CM.StopaCM(node) + self.CM.rsh(node, 'rmmod softdog') + pats = [] + pats.append("%s .*No Watchdog, try modprobe.*" % node) + w = self.create_watch(pats, 60) + w.setwatch() + + self.CM.StartaCM(node) + if not w.lookforall(): + return self.failure("Patterns not found: " + repr(w.unmatched)) + else: + return self.success() + + +################################################################### +class NoWDOnCorosyncStop(CoroTest): + '''Configure WD then /etc/init.d/corosync stop +must stay up for > 60 secs +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="NoWDOnCorosyncStop" + self.need_all_up = False + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + + self.CM.StopaCM(node) + self.CM.rsh(node, 'modprobe softdog') + self.CM.StartaCM(node) + pats = [] + pats.append("%s .*Unexpected close, not stopping watchdog.*" % node) + w = self.create_watch(pats, 60) + w.setwatch() + self.CM.StopaCM(node) + + if w.lookforall(): + return self.failure("Should have closed the WD better: " + repr(w.matched)) + else: + return self.success() + + +################################################################### +class WDOnForkBomb(CoroTest): + '''Configure memory resource +run memory leaker / forkbomb +confirm watchdog action +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="WDOnForkBomb" + self.need_all_up = False + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + self.config['resources/system/memory_used/recovery'] = 'watchdog' + self.config['resources/system/memory_used/max'] = '80' + self.config['resources/system/memory_used/poll_period'] = '800' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + + # get the uptime + up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() + self.CM.StopaCM(node) + self.CM.rsh(node, 'modprobe softdog') + self.CM.StartaCM(node) + + self.CM.rsh(node, ':(){ :|:& };:', blocking=0) + + self.CM.log("wait for it to watchdog") + time.sleep(60 * 3) + + ping_able = False + while not ping_able: + if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0: + ping_able = True + self.CM.log("can ping 10 in 10secs.") + else: + self.CM.log("not yet responding to pings.") + + self.CM.ShouldBeStatus[node] = "down" + # wait for the node to come back up + self.CM.log("waiting for node to come back up.") + if self.CM.ns.WaitForNodeToComeUp(node): + up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() + if int(up_after) < int(up_before): + return self.success() + else: + return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after)) + else: + return self.failure("node didn't seem to come back up") + + +################################################################### +class SamWdIntegration1(CoroTest): + '''start sam hc +kill agent +confirm action +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="SamWdIntegration1" + self.need_all_up = True + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + self.CM.sam_agent[node].setup_hc() + pids = self.CM.sam_agent[node].getpid().rstrip().split(" ") + + pats = [] + for pid in pids: + pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid)) + + w = self.create_watch(pats, 60) + w.setwatch() + + self.CM.sam_agent[node].kill() + + look_result = w.look() + if not look_result: + return self.failure("Patterns not found: " + repr(w.regexes)) + else: + return self.success() + +################################################################### +class SamWdIntegration2(CoroTest): + '''start sam hc +call sam_stop() +confirm resource "stopped" and no watchdog action. +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="SamWdIntegration2" + self.need_all_up = True + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + self.CM.sam_agent[node].setup_hc() + pids = self.CM.sam_agent[node].getpid().rstrip().split(" ") + + no_pats = [] + yes_pats = [] + for pid in pids: + no_pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid)) + yes_pats.append('%s .*Fsm:sam_test_agent:%s event "config_changed", state "running" --> "stopped"' % (node, pid)) + + yes_w = self.create_watch(yes_pats, 10) + no_w = self.create_watch(no_pats, 10) + yes_w.setwatch() + no_w.setwatch() + time.sleep(2) + + self.CM.sam_agent[node].sam_stop() + + yes_matched = yes_w.look() + no_matched = no_w.look() + if no_matched: + return self.failure("Patterns found: " + repr(no_matched)) + else: + if not yes_matched: + return self.failure("Patterns NOT found: " + repr(yes_w.regexes)) + + return self.success() + +################################################################### +class WdDeleteResource(CoroTest): + '''config resource & start corosync +check that it is getting checked +delete the object resource object +check that we do NOT get watchdog'ed +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="WdDeleteResource" + self.need_all_up = True + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + self.config['logging/logger_subsys[2]/subsys'] = 'MON' + self.config['logging/logger_subsys[2]/debug'] = 'on' + self.config['resources/system/memory_used/recovery'] = 'watchdog' + self.config['resources/system/memory_used/max'] = '80' + self.config['resources/system/memory_used/poll_period'] = '800' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + + no_pats = [] + yes_pats = [] + no_pats.append('%s .*resource "memory_used" failed!' % node) + yes_pats.append('%s .*resource "memory_used" deleted from objdb!' % node) + yes_w = self.create_watch(yes_pats, 10) + no_w = self.create_watch(no_pats, 10) + yes_w.setwatch() + no_w.setwatch() + time.sleep(2) + + self.CM.rsh(node, 'corosync-objctl -d resources.system.memory_used') + + yes_matched = yes_w.look() + no_matched = no_w.look() + if no_matched: + return self.failure("Patterns found: " + repr(no_matched)) + else: + if not yes_matched: + return self.failure("Patterns NOT found: " + repr(yes_w.regexes)) + + return self.success() + + +################################################################### +class ResourcePollAdjust(CoroTest): + '''config resource & start corosync +change the poll_period +check that we do NOT get watchdog'ed +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="ResourcePollAdjust" + self.need_all_up = True + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + self.config['logging/logger_subsys[2]/subsys'] = 'MON' + self.config['logging/logger_subsys[2]/debug'] = 'on' + self.config['resources/system/memory_used/recovery'] = 'none' + self.config['resources/system/memory_used/max'] = '80' + self.config['resources/system/memory_used/poll_period'] = '800' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + + no_pats = [] + no_pats.append('%s .*resource "memory_used" failed!' % node) + no_pats.append('%s .*Could NOT use poll_period.*' % node) + no_w = self.create_watch(no_pats, 10) + no_w.setwatch() + changes = 0 + while changes < 50: + changes = changes + 1 + poll_period = int(random.random() * 5000) + if poll_period < 500: + poll_period = 500 + self.CM.log("setting poll_period to: %d" % poll_period) + self.CM.rsh(node, 'corosync-objctl -w resources.system.memory_used.poll_period=%d' % poll_period) + sleep_time = poll_period * 2 / 1000 + if sleep_time < 1: + sleep_time = 1 + time.sleep(sleep_time) + + no_matched = no_w.look() + if no_matched: + return self.failure("Patterns found: " + repr(no_matched)) + + return self.success() + + +################################################################### +class RebootOnHighMem(CoroTest): + '''Configure memory resource +run memory leaker / forkbomb +confirm reboot action +''' + def __init__(self, cm): + CoroTest.__init__(self,cm) + self.name="RebootOnHighMem" + self.need_all_up = True + self.config['logging/logger_subsys[1]/subsys'] = 'WD' + self.config['logging/logger_subsys[1]/debug'] = 'on' + self.config['resources/system/memory_used/recovery'] = 'reboot' + self.config['resources/system/memory_used/max'] = '80' + self.config['resources/system/memory_used/poll_period'] = '800' + + def __call__(self, node): + '''Perform the test. ''' + self.incr("calls") + + # get the uptime + up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() + cmd = 'corosync-objctl resources.system.memory_used. | grep current | cut -d= -f2' + mem_current_str = self.CM.rsh(node, cmd, 1).rstrip() + mem_new_max = int(mem_current_str) + 5 + + self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max)) + cmd = 'corosync-objctl -w resources.system.memory_used.max=' + str(mem_new_max) + self.CM.rsh(node, cmd) + + self.CM.rsh(node, 'memhog -r10000 200m', blocking=0) + + self.CM.log("wait for it to reboot") + time.sleep(60 * 3) + cmd = 'corosync-objctl resources.system.memory_used. | grep current | cut -d= -f2' + mem_current_str = self.CM.rsh(node, cmd, 1).rstrip() + self.CM.log("current mem usage: %s" % (mem_current_str)) + + ping_able = False + while not ping_able: + if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0: + ping_able = True + self.CM.log("can ping 10 in 10secs.") + else: + self.CM.log("not yet responding to pings.") + + self.CM.ShouldBeStatus[node] = "down" + # wait for the node to come back up + self.CM.log("waiting for node to come back up.") + if self.CM.ns.WaitForNodeToComeUp(node): + up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() + if int(up_after) < int(up_before): + return self.success() + else: + return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after)) + else: + return self.failure("node didn't seem to come back up") GenTestClasses = [] GenTestClasses.append(GenSimulStart) GenTestClasses.append(GenSimulStop) GenTestClasses.append(GenStopAllBeekhof) GenTestClasses.append(CpgMsgOrderBasic) GenTestClasses.append(CpgMsgOrderZcb) GenTestClasses.append(CpgCfgChgOnExecCrash) GenTestClasses.append(CpgCfgChgOnGroupLeave) GenTestClasses.append(CpgCfgChgOnNodeLeave) GenTestClasses.append(CpgCfgChgOnNodeIsolate) #GenTestClasses.append(CpgCfgChgOnNodeRestart) GenTestClasses.append(CpgCfgChgOnLowestNodeJoin) GenTestClasses.append(VoteQuorumGoDown) GenTestClasses.append(VoteQuorumGoUp) AllTestClasses = [] AllTestClasses.append(ConfdbReplaceTest) AllTestClasses.append(ConfdbIncrementTest) AllTestClasses.append(ConfdbObjectFindTest) AllTestClasses.append(ConfdbNotificationTest) AllTestClasses.append(ConfdbContextTest) AllTestClasses.append(CpgContextTest) AllTestClasses.append(VoteQuorumContextTest) AllTestClasses.append(SamTest1) AllTestClasses.append(SamTest2) -AllTestClasses.append(SamTest3) AllTestClasses.append(SamTest4) +AllTestClasses.append(SamTest5) +AllTestClasses.append(SamTest6) +AllTestClasses.append(SamTestQuorum) +AllTestClasses.append(SamTest8) +AllTestClasses.append(SamTest9) +AllTestClasses.append(SamWdIntegration1) +AllTestClasses.append(SamWdIntegration2) +AllTestClasses.append(NoWDConfig) +AllTestClasses.append(WDConfigNoWd) +AllTestClasses.append(NoWDOnCorosyncStop) +AllTestClasses.append(WDOnForkBomb) +AllTestClasses.append(WdDeleteResource) +AllTestClasses.append(RebootOnHighMem) +AllTestClasses.append(ResourcePollAdjust) AllTestClasses.append(ServiceLoadTest) AllTestClasses.append(MemLeakObject) AllTestClasses.append(MemLeakSession) - AllTestClasses.append(FlipTest) AllTestClasses.append(RestartTest) AllTestClasses.append(StartOnebyOne) AllTestClasses.append(StopOnebyOne) AllTestClasses.append(RestartOnebyOne) class ConfigContainer(UserDict): def __init__ (self, name): self.name = name UserDict.__init__(self) def CoroTestList(cm, audits): result = [] configs = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) default = ConfigContainer('default') + default['logging/fileline'] = 'on' default['logging/function_name'] = 'off' default['logging/logfile_priority'] = 'info' default['logging/syslog_priority'] = 'info' default['logging/syslog_facility'] = 'daemon' default['uidgid/uid'] = '0' default['uidgid/gid'] = '0' configs.append(default) a = ConfigContainer('none_5min') a['compatibility'] = 'none' a['totem/token'] = (5 * 60 * 1000) a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1 configs.append(a) b = ConfigContainer('pcmk_basic') b['compatibility'] = 'whitetank' b['totem/token'] = 5000 b['totem/token_retransmits_before_loss_const'] = 10 b['totem/join'] = 1000 b['totem/consensus'] = 7500 configs.append(b) c = ConfigContainer('pcmk_sec_nss') c['totem/secauth'] = 'on' c['totem/crypto_accept'] = 'new' c['totem/crypto_type'] = 'nss' c['totem/token'] = 5000 c['totem/token_retransmits_before_loss_const'] = 10 c['totem/join'] = 1000 c['totem/consensus'] = 7500 configs.append(c) s = ConfigContainer('pcmk_vq') s['quorum/provider'] = 'corosync_votequorum' s['quorum/expected_votes'] = len(cm.Env["nodes"]) s['totem/token'] = 5000 s['totem/token_retransmits_before_loss_const'] = 10 s['totem/join'] = 1000 s['totem/vsftype'] = 'none' s['totem/consensus'] = 7500 s['totem/max_messages'] = 20 configs.append(s) d = ConfigContainer('sec_sober') d['totem/secauth'] = 'on' d['totem/crypto_type'] = 'sober' configs.append(d) e = ConfigContainer('threads_4') e['totem/threads'] = 4 configs.append(e) if not cm.Env["RrpBindAddr"] is None: g = ConfigContainer('rrp_passive') g['totem/rrp_mode'] = 'passive' g['totem/interface[2]/ringnumber'] = '1' g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"] g['totem/interface[2]/mcastaddr'] = '226.94.1.2' g['totem/interface[2]/mcastport'] = '5405' configs.append(g) h = ConfigContainer('rrp_active') h['totem/rrp_mode'] = 'active' h['totem/interface[2]/ringnumber'] = '1' h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"] h['totem/interface[2]/mcastaddr'] = '226.94.1.2' h['totem/interface[2]/mcastport'] = '5405' configs.append(h) else: print 'Not including rrp tests. Use --rrp-binaddr to enable them.' num=1 for cfg in configs: for testclass in GenTestClasses: bound_test = testclass(cm) if bound_test.is_applicable() and bound_test.config_valid(cfg): bound_test.Audits = audits for c in cfg.keys(): bound_test.config[c] = cfg[c] bound_test.name = bound_test.name + '_' + cfg.name result.append(bound_test) num = num + 1 return result