diff --git a/tools/hb2openais.sh.in b/tools/hb2openais.sh.in index 03f639a299..b54fc32324 100755 --- a/tools/hb2openais.sh.in +++ b/tools/hb2openais.sh.in @@ -1,698 +1,706 @@ #!/bin/sh # Copyright (C) 2008 Dejan Muhamedagic <dmuhamedagic@suse.de> # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # . @sysconfdir@/ha.d/shellfuncs . $HA_NOARCHBIN/utillib.sh . $HA_NOARCHBIN/ha_cf_support.sh PROG=`basename $0` # FIXME: once this is part of the package! PROGDIR=`dirname $0` echo "$PROGDIR" | grep -qs '^/' || { test -f @sbindir@/$PROG && PROGDIR=@sbindir@ test -f $HA_NOARCHBIN/$PROG && PROGDIR=$HA_NOARCHBIN } # the default syslog facility is not (yet) exported by heartbeat # to shell scripts # DEFAULT_HA_LOGFACILITY="daemon" export DEFAULT_HA_LOGFACILITY AIS_CONF=/etc/ais/openais.conf AIS_KEYF=/etc/ais/authkey PY_HELPER=$HA_BIN/hb2openais-helper.py CRM_VARLIB=$HA_VARLIB/crm CIB=$CRM_VARLIB/cib.xml CIBSIG=$CRM_VARLIB/cib.xml.sig CIBLAST=$CRM_VARLIB/cib.xml.last CIBLAST_SIG=$CRM_VARLIB/cib.xml.sig.last HOSTCACHE=$HA_VARLIB/hostcache HB_UUID=$HA_VARLIB/hb_uuid DONE_F=$HA_VARRUN/heartbeat/.$PROG.conv_done BACKUPDIR=/var/tmp/`basename $PROG .sh`.backup RM_FILES=" $CIBSIG $HOSTCACHE $HB_UUID $CIBLAST $CIBLAST_SIG" REMOTE_RM_FILES=" $CIB $RM_FILES" BACKUP_FILES=" $AIS_CONF $AIS_KEYF $REMOTE_RM_FILES " DIST_FILES=" $AIS_CONF $AIS_KEYF $DONE_F " MAN_TARF=/var/tmp/`basename $PROG .sh`.tar.gz : ${SSH_OPTS="-T"} usage() { cat<<EOF usage: $PROG [-F] [-u user] [-T directory] [revert] -U: upgrade the CIB to v1.0 -F: force conversion despite it being done beforehand -u user: a user to sudo with (otherwise, you'd have to run this as root) -T directory: a directory containing ha.cf/logd.cf/cib.xml/hostcache (use for testing); with this option files are not copied to other nodes and there are no destructive commands executed; you may run as unprivileged uid EOF exit } SUDO_USER="" MYSUDO="" TEST_DIR="" FORCE="" UPGRADE="" while getopts UFu:T:h o; do case "$o" in h) usage;; U) UPGRADE=1;; F) FORCE=1;; u) SUDO_USER="$OPTARG";; T) TEST_DIR="$OPTARG";; [?]) usage;; esac done shift $(($OPTIND-1)) [ $# -gt 1 ] && usage [ "$TEST_DIR" ] && [ $# -ne 0 ] && usage if [ "$TEST_DIR" ]; then cp $TEST_DIR/cib.xml $TEST_DIR/cib-out.xml CIB=$TEST_DIR/cib-out.xml HOSTCACHE=$TEST_DIR/hostcache HA_CF=$TEST_DIR/ha.cf AIS_CONF=$TEST_DIR/openais.conf if [ "$SUDO_USER" ]; then info "-u option ignored when used with -T" fi else ps -ef | grep -wqs [c]rmd && fatal "you must first stop heartbeat on _all_ nodes" if [ "$SUDO_USER" ]; then MYSUDO="sudo -u $SUDO_USER" fi fi CONF=$HA_CF LOGD_CF=`findlogdcf $TEST_DIR /etc $HA_DIR` export LOGD_CF prerequisites() { test -f $HA_CF || fatal "$HA_CF does not exist: cannot proceed" iscfvartrue crm || grep -w "^crm" $HA_CF | grep -wqs respawn || fatal "crm is not enabled: we cannot convert v1 configurations" $DRY test -f $CIB || fatal "CIB $CIB does not exist: cannot proceed" [ "$FORCE" ] && rm -f "$DONE_F" if [ -f "$DONE_F" ]; then info "Conversion to OpenAIS already done, exiting" exit 0 fi } # some notes about unsupported stuff unsupported() { respawned_progs=`awk '/^respawn/{print $3}' $HA_CF |while read p; do basename $p; done` grep -qs "^serial" $HA_CF && warning "serial media is not supported by OpenAIS" for prog in $respawned_progs; do case $prog in mgmtd|pingd|evmsd) : these are fine ;; *) warning "program $prog is being controlled by heartbeat (thru respawn)" warning "you have to find another way of running it" ;; esac done } # # find nodes for this cluster # getnodes() { # 1. hostcache if [ -f $HOSTCACHE ]; then awk '{print $1}' $HOSTCACHE return fi # 2. ha.cf getcfvar node } # # does ssh work? # testsshuser() { if [ "$2" ]; then ssh -T -o Batchmode=yes $2@$1 true 2>/dev/null else ssh -T -o Batchmode=yes $1 true 2>/dev/null fi } findsshuser() { for u in "" $TRY_SSH; do rc=0 for n in `getnodes`; do [ "$node" = "$WE" ] && continue testsshuser $n $u || { rc=1 break } done if [ $rc -eq 0 ]; then echo $u return 0 fi done return 1 } newportinfo() { info "the port number for the multicast is set to 5405" info "please update your firewall rules (if any)" } changemediainfo() { info "openais uses multicast for communication" info "please make sure that your network infrastructure supports it" } multicastinfo() { info "multicast for openais ring $1 set to $2:$3" } netaddrinfo() { info "network address for openais ring $1 set to $2" } backup_files() { [ "$TEST_DIR" ] && return info "backing up $BACKUP_FILES to $BACKUPDIR" $DRY mkdir $BACKUPDIR || { echo sorry, could not create $BACKUPDIR directory echo please cleanup exit 1 } if [ -z "$DRY" ]; then tar cf - $BACKUP_FILES | gzip > $BACKUPDIR/$WE.tar.gz || { echo sorry, could not create $BACKUPDIR/$WE.tar.gz exit 1 } else $DRY "tar cf - $BACKUP_FILES | gzip > $BACKUPDIR/$WE.tar.gz" fi } revert() { [ "$TEST_DIR" ] && return test -d $BACKUPDIR || { echo sorry, there is no $BACKUPDIR directory echo cannot revert exit 1 } info "restoring $BACKUP_FILES from $BACKUPDIR/$WE.tar.gz" gzip -dc $BACKUPDIR/$WE.tar.gz | (cd / && tar xf -) || { echo sorry, could not unpack $BACKUPDIR/$WE.tar.gz exit 1 } } pls_press_enter() { [ "$TEST_DIR" ] && return cat<<EOF Please press enter to continue or ^C to exit ... EOF read junk echo "" } introduction() { cat<<EOF This is a Heartbeat to OpenAIS conversion tool. * IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT * IMPORTANT * Please read this and don't proceed before understanding what we try to do and what is required. 1. You need to know your cluster in detail. This program will inform you on changes it makes. It is up to you to verify that the changes are meaningful. We will also probably ask some questions now and again. 2. This procedure is supposed to be run on one node only. Although the main cluster configuration (the CIB) is automatically replicated, there are some things which have to be copied by other means. For that to work, we need sshd running on all nodes and root access working. 3. Do not run this procedure on more than one node! EOF pls_press_enter cat<<EOF The procedure consists of two parts: the OpenAIS configuration and the Pacemaker/CRM CIB configuration. The first part is obligatory. The second part may be skipped unless your cluster configuration requires changes due to the change from Heartbeat to OpenAIS. We will try to analyze your configuration and let you know whether the CIB configuration should be changed as well. However, you will still have a choice to skip the CIB mangling part in case you want to do that yourself. The next step is to create the OpenAIS configuration. If you want to leave, now is the time to interrupt the program. EOF pls_press_enter } confirm() { while :; do printf "$1 (y/n) " read ans if echo $ans | grep -iqs '^[yn]'; then echo $ans | grep -iqs '^y' return $? else echo Please answer with y or n fi done } want_to_proceed() { [ "$TEST_DIR" ] && return 0 confirm "Do you want to proceed?" } intro_part2() { cat<<EOF The second part of the configuration deals with the CIB. According to our analysis (you should have seen some messages), this step is necessary. EOF want_to_proceed || return } gethbmedia() { grep "^[bum]cast" $HA_CF } pl_ipcalc() { perl -e ' # stolen from internet! my $ipaddr=$ARGV[0]; my $nmask=$ARGV[1]; my @addrarr=split(/\./,$ipaddr); my ( $ipaddress ) = unpack( "N", pack( "C4",@addrarr ) ); my @maskarr=split(/\./,$nmask); my ( $netmask ) = unpack( "N", pack( "C4",@maskarr ) ); # Calculate network address by logical AND operation of addr & # netmask # and convert network address to IP address format my $netadd = ( $ipaddress & $netmask ); my @netarr=unpack( "C4", pack( "N",$netadd ) ); my $netaddress=join(".",@netarr); print "$netaddress\n"; ' $1 $2 } get_if_val() { test "$1" || return awk -v key=$1 ' { for( i=1; i<=NF; i++ ) if( match($i,key) ) { sub(key,"",$i); print $i exit } }' } netaddress() { ip=`ifconfig $1 | grep 'inet addr:' | get_if_val addr:` mask=`ifconfig $1 | grep 'Mask:' | get_if_val Mask:` test "$mask" || fatal "could not get the network mask for interface $1" pl_ipcalc $ip $mask } sw=0 do_tabs() { for i in `seq $sw`; do printf "\t"; done } newstanza() { do_tabs printf "%s {\n" $1 let sw=sw+1 } endstanza() { let sw=sw-1 do_tabs printf "}\n" } setvalue() { name=$1 val=$2 test "$val" || { fatal "sorry, no value set for $name" } do_tabs echo "$name: $val" } setcomment() { do_tabs echo "# $*" } setdebug() { [ "$HA_LOGLEVEL" = debug ] && echo "on" || echo "off" } WE=`uname -n` # who am i? if [ "$1" = revert ]; then revert exit fi test -d $BACKUPDIR && fatal "please remove the backup directory: $BACKUPDIR" prerequisites introduction backup_files unsupported # 1. Generate the openais.conf openaisconf() { info "Generating $AIS_CONF from $HA_CF ..." # the totem stanza cpunum=`grep -c ^processor /proc/cpuinfo` setcomment "Generated by hb2openais on `date`" setcomment "Please read the openais.conf.5 manual page" newstanza aisexec setcomment "Run as root - this is necessary to be able to manage resources with Pacemaker" setvalue user root setvalue group root endstanza newstanza service setcomment "Load the Pacemaker Cluster Resource Manager" setvalue name pacemaker setvalue ver 0 if uselogd; then setvalue use_logd yes info "Make sure that the logd service is started (chkconfig logd on)" fi if grep -qs "^respawn.*mgmtd" $HA_CF; then setvalue use_mgmtd yes fi endstanza newstanza totem setvalue version 2 setcomment "How long before declaring a token lost (ms)" setvalue token 10000 setcomment "How many token retransmits before forming a new configuration" setvalue token_retransmits_before_loss_const 20 setcomment "How long to wait for join messages in the membership protocol (ms)" setvalue join 60 setcomment "How long to wait for consensus to be achieved before" setcomment "starting a new round of membership configuration (ms)" setvalue consensus 4800 setcomment "Turn off the virtual synchrony filter" setvalue vsftype none setcomment "Number of messages that may be sent by one processor on receipt of the token" setvalue max_messages 20 setcomment "Limit generated nodeids to 31-bits (positive signed integers)" setvalue clear_node_high_bit yes setcomment "Disable encryption" setvalue secauth off setvalue threads 0 setcomment "Optionally assign a fixed node id (integer)" setcomment "nodeid: 1234" ring=0 gethbmedia | while read media_type iface address rest; do info "Processing interface $iface of type $media_type ..." case "$media_type" in ucast|bcast) mcastaddr=226.94.1.1 ;; mcast) mcastaddr=$address ;; esac mcastport=5405 echo `netaddress $iface` $mcastaddr $mcastport done | sort -u | while read network addr port; do + if [ $ring -ge 2 ]; then + warning "openais supports only two rings!" + info "consider bonding interfaces" + warning "skipping communication link on $network" + setcomment "$network skipped: too many rings" + continue + fi + setcomment "from heartbeat's $media_type:$iface:$address" newstanza interface setvalue ringnumber $ring setvalue bindnetaddr $network netaddrinfo $ring $network multicastinfo $ring $addr $port setvalue mcastport $port setvalue mcastaddr $addr let ring=$ring+1 endstanza done changemediainfo endstanza # the logging stanza getlogvars # enforce some syslog facility debugsetting=`setdebug` newstanza logging setvalue debug $debugsetting setvalue fileline off setvalue to_stderr no if [ "$HA_LOGFILE" ]; then setvalue to_file yes setvalue logfile $HA_LOGFILE else setvalue to_file no fi if [ "$HA_LOGFACILITY" ]; then setvalue to_syslog yes setvalue syslog_facility $HA_LOGFACILITY else setvalue to_syslog no fi endstanza newstanza amf setvalue mode disabled endstanza } if [ -z "$DRY" ]; then openaisconf > $AIS_CONF || fatal "cannot create $AIS_CONF" grep -wqs interface $AIS_CONF || fatal "no media found in $HA_CF" else openaisconf fi [ "$AIS_KEYF" ] && if [ "$TEST_DIR" ]; then info "Skipping OpenAIS authentication key generation ..." else info "Generating a key for OpenAIS authentication ..." $DRY ais-keygen || fatal "cannot generate the key using ais-keygen" fi # remove various files which could get in a way if [ -z "$TEST_DIR" ]; then $DRY rm -f $RM_FILES fi fixcibperms() { [ "$TEST_DIR" ] && return uid=`ls -ldn $CRM_VARLIB | awk '{print $3}'` gid=`ls -ldn $CRM_VARLIB | awk '{print $4}'` $DRY $MYSUDO chown $uid:$gid $CIB } upgrade_cib() { $DRY $MYSUDO CIB_file=$CIB cibadmin --upgrade --force } # remove the nodes section from the CIB tmpfile=`maketempfile` $MYSUDO sh -c "python $PY_HELPER zap_nodes <$CIB >$tmpfile" || fatal "cannot clear the nodes section from the CIB" $DRY $MYSUDO mv $tmpfile $CIB info "Cleared the nodes section in the CIB" if [ `getnodes | wc -w` -eq 2 ]; then # set tmpfile=`maketempfile` $MYSUDO sh -c "python $PY_HELPER ignore_quorum <$CIB >$tmpfile" || fatal "cannot set the no-quorum-policy property" $DRY $MYSUDO mv $tmpfile $CIB info "Since this is a 2-node cluster," info "the no-quorum-policy cluster property was set to ignore." fi info "Done converting ha.cf to openais.conf" info "Please check the resulting $AIS_CONF" info "and in particular interface stanzas and logging." info "If you find problems, please edit $AIS_CONF now!" # # first part done (openais), on to the CIB analyze_cib() { info "Analyzing the CIB..." $MYSUDO sh -c "python $PY_HELPER analyze_cib <$CIB" } part2() { intro_part2 || return 0 tmpfile=`maketempfile` opts="-c $HA_CF" [ "$TEST_DIR" ] && opts="-T $opts" $MYSUDO sh -c "python $PY_HELPER $opts convert_cib <$CIB >$tmpfile" || fatal "failed to process the CIB" $DRY $MYSUDO mv $tmpfile $CIB info "Processed the CIB successfully" } # make the user believe that something's happening :) some_dots_idle() { cnt=0 printf "$2 ." while [ $cnt -lt $1 ]; do sleep 1 printf "." ctn=$((cnt+1)) done echo } print_dc() { crm_mon -1 | awk '/Current DC/{print $3}' } dcidle() { dc=`$MYSUDO print_dc` if [ "$dc" = "$WE" ]; then maxcnt=60 cnt=0 while [ $cnt -lt $maxcnt ]; do stat=`$MYSUDO crmadmin -S $dc` echo $stat | grep -qs S_IDLE && break [ "$1" = "-v" ] && echo $stat sleep 1 printf "." cnt=$((cnt+1)) done echo $stat | grep -qs S_IDLE else some_dots_idle 10 #just wait for 10 seconds fi } wait_crm() { cnt=10 dc="" while [ -z "$dc" -a $cnt -gt 0 ]; do dc=`$MYSUDO print_dc` cnt=$((cnt-1)) done if [ x = x"$dc" ]; then echo "sorry, no dc found/elected" exit 1 fi dcidle } start_cluster() { $DRY /etc/init.d/openais start } tune_ocfs2() { [ "$TEST_DIR" ] && return cat<<EOF The ocfs2 metadata has to change to reflect the cluster stack change. To do that, we have to start the cluster stack on this node. EOF pls_press_enter $DRY start_cluster some_dots_idle 10 "waiting for crm to start" if $DRY wait_crm; then info "starting the tunefs.ocfs2" $DRY tunefs.ocfs2 --update-cluster-stack else fatal "could not start pacemaker; please check the logs" fi } analyze_cib rc=$? [ $rc -gt 1 ] && fatal "error while analyzing CIB" if [ $rc -eq 1 ] || grep -qs "^respawn.*pingd" $HA_CF && info "a pingd resource has to be created"; then part2 if grep -qs '<primitive.*type="ocfs2"' $CIB; then tune_ocfs2 fi else info "No need to process CIB further" fi # upgrade the CIB to v1.0 if [ "$UPGRADE" ] || confirm "Do you want to upgrade the CIB to v1.0?"; then upgrade_cib info "Upgraded the CIB to v1.0" else info "Skipped upgrading the CIB to v1.0" info "You should do this sooner rather than later!" fi fixcibperms [ "$TEST_DIR" ] && exit $DRY touch $DONE_F # finally, copy files to all nodes info "Copying files to other nodes ..." info "(please provide root password if prompted)" ssh_opts="-l root $SSH_OPTS" rc=0 for node in `getnodes`; do [ "$node" = "$WE" ] && continue if [ "$DRY" ]; then $DRY "(cd / && tar cf - $DIST_FILES) | ssh $ssh_opts $node \"rm -f $REMOTE_RM_FILES && cd / && tar xf -\"" else echo "Copying to node $node ..." (cd / && tar cf - $DIST_FILES) | ssh $ssh_opts $node "rm -f $REMOTE_RM_FILES && cd / && tar xf -" let rc=$rc+$? fi done info "Done transfering files" if [ $rc -ne 0 ]; then warning "we could not update some ssh nodes" info "before starting the cluster stack on those nodes:" info "copy and unpack $MAN_TARF (from the / directory)" info "and execute: rm -f $REMOTE_RM_FILES" (cd / && tar cf - $DIST_FILES | gzip > $MAN_TARF) fi