Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1701671
CTSaudits.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
24 KB
Referenced Files
None
Subscribers
None
CTSaudits.py
View Options
'''CTS: Cluster Testing System: Audit module
'''
__copyright__
=
'''
Copyright (C) 2000, 2001,2005 Alan Robertson <alanr@unix.sh>
Licensed under the GNU GPL.
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import
time
,
os
,
string
,
re
from
cts
import
CTS
class
ClusterAudit
:
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
def
__call__
(
self
):
raise
ValueError
(
"Abstract Class member (__call__)"
)
def
is_applicable
(
self
):
'''Return TRUE if we are applicable in the current test configuration'''
raise
ValueError
(
"Abstract Class member (is_applicable)"
)
return
1
def
log
(
self
,
args
):
self
.
CM
.
log
(
"audit:
%s
"
%
args
)
def
debug
(
self
,
args
):
self
.
CM
.
debug
(
"audit:
%s
"
%
args
)
def
name
(
self
):
raise
ValueError
(
"Abstract Class member (name)"
)
AllAuditClasses
=
[
]
class
LogAudit
(
ClusterAudit
):
def
name
(
self
):
return
"LogAudit"
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
def
RestartClusterLogging
(
self
,
nodes
=
None
):
if
not
nodes
:
nodes
=
self
.
CM
.
Env
[
"nodes"
]
self
.
CM
.
log
(
"Restarting logging on:
%s
"
%
repr
(
nodes
))
for
node
in
nodes
:
cmd
=
self
.
CM
.
Env
[
"logrestartcmd"
]
if
self
.
CM
.
rsh
(
node
,
cmd
,
blocking
=
0
)
!=
0
:
self
.
CM
.
log
(
"ERROR: Cannot restart logging on
%s
[
%s
failed]"
%
(
node
,
cmd
))
def
TestLogging
(
self
):
patterns
=
[]
prefix
=
"Test message from"
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
# Look for the node name in two places to make sure
# that syslog is logging with the correct hostname
patterns
.
append
(
"
%s
.*
%s
%s
"
%
(
node
,
prefix
,
node
))
watch
=
CTS
.
LogWatcher
(
self
.
CM
.
Env
[
"LogFileName"
],
patterns
,
60
)
watch
.
setwatch
()
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
cmd
=
"logger -p
%s
.info
%s
%s
"
%
(
self
.
CM
.
Env
[
"SyslogFacility"
],
prefix
,
node
)
if
self
.
CM
.
rsh
(
node
,
cmd
,
blocking
=
0
)
!=
0
:
self
.
CM
.
log
(
"ERROR: Cannot execute remote command [
%s
] on
%s
"
%
(
cmd
,
node
))
watch_result
=
watch
.
lookforall
()
if
watch
.
unmatched
:
for
regex
in
watch
.
unmatched
:
self
.
CM
.
log
(
"Test message [
%s
] not found in logs."
%
(
regex
))
return
0
return
1
def
__call__
(
self
):
max
=
3
attempt
=
0
self
.
CM
.
ns
.
WaitForAllNodesToComeUp
(
self
.
CM
.
Env
[
"nodes"
])
while
attempt
<=
max
and
self
.
TestLogging
()
==
0
:
attempt
=
attempt
+
1
self
.
RestartClusterLogging
()
time
.
sleep
(
60
*
attempt
)
if
attempt
>
max
:
self
.
CM
.
log
(
"ERROR: Cluster logging unrecoverable."
)
return
0
return
1
def
is_applicable
(
self
):
if
self
.
CM
.
Env
[
"DoBSC"
]:
return
0
return
1
class
DiskAudit
(
ClusterAudit
):
def
name
(
self
):
return
"DiskspaceAudit"
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
def
__call__
(
self
):
result
=
1
dfcmd
=
"df -k /var/log | tail -1 | tr -s ' ' | cut -d' ' -f2"
self
.
CM
.
ns
.
WaitForAllNodesToComeUp
(
self
.
CM
.
Env
[
"nodes"
])
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
dfout
=
self
.
CM
.
rsh
(
node
,
dfcmd
,
1
)
if
not
dfout
:
self
.
CM
.
log
(
"ERROR: Cannot execute remote df command [
%s
] on
%s
"
%
(
dfcmd
,
node
))
else
:
try
:
idfout
=
int
(
dfout
)
except
(
ValueError
,
TypeError
):
self
.
CM
.
log
(
"Warning: df output from
%s
was invalid [
%s
]"
%
(
node
,
dfout
))
else
:
if
idfout
==
0
:
self
.
CM
.
log
(
"CRIT: Completely out of log disk space on
%s
"
%
node
)
result
=
None
elif
idfout
<=
1000
:
self
.
CM
.
log
(
"WARN: Low on log disk space (
%d
Mbytes) on
%s
"
%
(
idfout
,
node
))
return
result
def
is_applicable
(
self
):
if
self
.
CM
.
Env
[
"DoBSC"
]:
return
0
return
1
class
AuditResource
:
def
__init__
(
self
,
cm
,
line
):
fields
=
line
.
split
()
self
.
CM
=
cm
self
.
line
=
line
self
.
type
=
fields
[
1
]
self
.
id
=
fields
[
2
]
self
.
clone_id
=
fields
[
3
]
self
.
parent
=
fields
[
4
]
self
.
rprovider
=
fields
[
5
]
self
.
rclass
=
fields
[
6
]
self
.
rtype
=
fields
[
7
]
self
.
host
=
fields
[
8
]
self
.
needs_quorum
=
fields
[
9
]
self
.
flags
=
int
(
fields
[
10
])
self
.
flags_s
=
fields
[
11
]
if
self
.
parent
==
"NA"
:
self
.
parent
=
None
def
unique
(
self
):
if
self
.
flags
&
int
(
"0x00000020"
,
16
):
return
1
return
0
def
orphan
(
self
):
if
self
.
flags
&
int
(
"0x00000001"
,
16
):
return
1
return
0
def
managed
(
self
):
if
self
.
flags
&
int
(
"0x00000002"
,
16
):
return
1
return
0
class
AuditConstraint
:
def
__init__
(
self
,
cm
,
line
):
fields
=
line
.
split
()
self
.
CM
=
cm
self
.
line
=
line
self
.
type
=
fields
[
1
]
self
.
id
=
fields
[
2
]
self
.
rsc
=
fields
[
3
]
self
.
target
=
fields
[
4
]
self
.
score
=
fields
[
5
]
self
.
rsc_role
=
fields
[
6
]
self
.
target_role
=
fields
[
7
]
if
self
.
rsc_role
==
"NA"
:
self
.
rsc_role
=
None
if
self
.
target_role
==
"NA"
:
self
.
target_role
=
None
class
PrimitiveAudit
(
ClusterAudit
):
def
name
(
self
):
return
"PrimitiveAudit"
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
def
doResourceAudit
(
self
,
resource
):
rc
=
1
active
=
self
.
CM
.
ResourceLocation
(
resource
.
id
)
if
len
(
active
)
==
1
:
if
self
.
CM
.
HasQuorum
(
None
):
self
.
debug
(
"Resource
%s
active on
%s
"
%
(
resource
.
id
,
repr
(
active
)))
elif
resource
.
needs_quorum
==
1
:
self
.
CM
.
log
(
"Resource
%s
active without quorum:
%s
"
%
(
resource
.
id
,
repr
(
active
)))
rc
=
0
elif
not
resource
.
managed
():
self
.
CM
.
log
(
"Resource
%s
not managed. Active on
%s
"
%
(
resource
.
id
,
repr
(
active
)))
elif
not
resource
.
unique
():
# TODO: Figure out a clever way to actually audit these resource types
if
len
(
active
)
>
1
:
self
.
debug
(
"Non-unique resource
%s
is active on:
%s
"
%
(
resource
.
id
,
repr
(
active
)))
else
:
self
.
debug
(
"Non-unique resource
%s
is not active"
%
resource
.
id
)
elif
len
(
active
)
>
1
:
self
.
CM
.
log
(
"Resource
%s
is active multiple times:
%s
"
%
(
resource
.
id
,
repr
(
active
)))
rc
=
0
elif
resource
.
orphan
():
self
.
debug
(
"Resource
%s
is an inactive orphan"
%
resource
.
id
)
elif
len
(
self
.
inactive_nodes
)
==
0
:
self
.
CM
.
log
(
"WARN: Resource
%s
not served anywhere"
%
resource
.
id
)
rc
=
0
elif
self
.
CM
.
Env
[
"warn-inactive"
]
==
1
:
if
self
.
CM
.
HasQuorum
(
None
)
or
not
resource
.
needs_quorum
:
self
.
CM
.
log
(
"WARN: Resource
%s
not served anywhere (Inactive nodes:
%s
)"
%
(
resource
.
id
,
repr
(
self
.
inactive_nodes
)))
else
:
self
.
debug
(
"Resource
%s
not served anywhere (Inactive nodes:
%s
)"
%
(
resource
.
id
,
repr
(
self
.
inactive_nodes
)))
elif
self
.
CM
.
HasQuorum
(
None
)
or
not
resource
.
needs_quorum
:
self
.
debug
(
"Resource
%s
not served anywhere (Inactive nodes:
%s
)"
%
(
resource
.
id
,
repr
(
self
.
inactive_nodes
)))
return
rc
def
setup
(
self
):
self
.
target
=
None
self
.
resources
=
[]
self
.
constraints
=
[]
self
.
active_nodes
=
[]
self
.
inactive_nodes
=
[]
self
.
debug
(
"Do Audit
%s
"
%
self
.
name
())
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
if
self
.
CM
.
ShouldBeStatus
[
node
]
==
"up"
:
self
.
active_nodes
.
append
(
node
)
else
:
self
.
inactive_nodes
.
append
(
node
)
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
if
self
.
target
==
None
and
self
.
CM
.
ShouldBeStatus
[
node
]
==
"up"
:
self
.
target
=
node
if
not
self
.
target
:
# TODO: In Pacemaker 1.0 clusters we'll be able to run crm_resource
# with CIB_file=/path/to/cib.xml even when the cluster isn't running
self
.
debug
(
"No nodes active - skipping
%s
"
%
self
.
name
())
return
0
(
rc
,
lines
)
=
self
.
CM
.
rsh
(
self
.
target
,
"crm_resource -c"
,
None
)
for
line
in
lines
:
if
re
.
search
(
"^Resource"
,
line
):
self
.
resources
.
append
(
AuditResource
(
self
.
CM
,
line
))
elif
re
.
search
(
"^Constraint"
,
line
):
self
.
constraints
.
append
(
AuditConstraint
(
self
.
CM
,
line
))
else
:
self
.
CM
.
log
(
"Unknown entry:
%s
"
%
line
);
return
1
def
__call__
(
self
):
rc
=
1
if
not
self
.
setup
():
return
1
for
resource
in
self
.
resources
:
if
resource
.
type
==
"primitive"
:
if
self
.
doResourceAudit
(
resource
)
==
0
:
rc
=
0
return
rc
def
is_applicable
(
self
):
if
self
.
CM
[
"Name"
]
==
"crm-lha"
:
return
1
if
self
.
CM
[
"Name"
]
==
"crm-ais"
:
return
1
return
0
class
GroupAudit
(
PrimitiveAudit
):
def
name
(
self
):
return
"GroupAudit"
def
__call__
(
self
):
rc
=
1
if
not
self
.
setup
():
return
1
for
group
in
self
.
resources
:
if
group
.
type
==
"group"
:
first_match
=
1
group_location
=
None
for
child
in
self
.
resources
:
if
child
.
parent
==
group
.
id
:
nodes
=
self
.
CM
.
ResourceLocation
(
child
.
id
)
if
first_match
and
len
(
nodes
)
>
0
:
group_location
=
nodes
[
0
]
first_match
=
0
if
len
(
nodes
)
>
1
:
rc
=
0
self
.
CM
.
log
(
"Child
%s
of
%s
is active more than once:
%s
"
%
(
child
.
id
,
group
.
id
,
repr
(
nodes
)))
elif
len
(
nodes
)
==
0
:
# Groups are allowed to be partially active
# However we do need to make sure later children aren't running
group_location
=
None
self
.
debug
(
"Child
%s
of
%s
is stopped"
%
(
child
.
id
,
group
.
id
))
elif
nodes
[
0
]
!=
group_location
:
rc
=
0
self
.
CM
.
log
(
"Child
%s
of
%s
is active on the wrong node (
%s
) expected
%s
"
%
(
child
.
id
,
group
.
id
,
nodes
[
0
],
group_location
))
else
:
self
.
debug
(
"Child
%s
of
%s
is active on
%s
"
%
(
child
.
id
,
group
.
id
,
nodes
[
0
]))
return
rc
class
CloneAudit
(
PrimitiveAudit
):
def
name
(
self
):
return
"CloneAudit"
def
__call__
(
self
):
rc
=
1
if
not
self
.
setup
():
return
1
for
clone
in
self
.
resources
:
if
clone
.
type
==
"clone"
:
for
child
in
self
.
resources
:
if
child
.
parent
==
clone
.
id
and
child
.
type
==
"primitive"
:
self
.
debug
(
"Checking child
%s
of
%s
..."
%
(
child
.
id
,
clone
.
id
))
# Check max and node_max
# Obtain with:
# crm_resource -g clone_max --meta -r child.id
# crm_resource -g clone_node_max --meta -r child.id
return
rc
class
ColocationAudit
(
PrimitiveAudit
):
def
name
(
self
):
return
"ColocationAudit"
def
crm_location
(
self
,
resource
):
(
rc
,
lines
)
=
self
.
CM
.
rsh
(
self
.
target
,
"crm_resource -W -r
%s
-Q"
%
resource
,
None
)
hosts
=
[]
if
rc
==
0
:
for
line
in
lines
:
fields
=
line
.
split
()
hosts
.
append
(
fields
[
0
])
return
hosts
def
__call__
(
self
):
rc
=
1
if
not
self
.
setup
():
return
1
for
coloc
in
self
.
constraints
:
if
coloc
.
type
==
"rsc_colocation"
:
source
=
self
.
crm_location
(
coloc
.
rsc
)
target
=
self
.
crm_location
(
coloc
.
target
)
if
len
(
source
)
==
0
:
self
.
debug
(
"Colocation audit (
%s
):
%s
not running"
%
(
coloc
.
id
,
coloc
.
rsc
))
else
:
for
node
in
source
:
if
not
node
in
target
:
rc
=
0
self
.
CM
.
log
(
"Colocation audit (
%s
):
%s
running on
%s
(not in
%s
)"
%
(
coloc
.
id
,
coloc
.
rsc
,
node
,
repr
(
target
)))
else
:
self
.
debug
(
"Colocation audit (
%s
):
%s
running on
%s
(in
%s
)"
%
(
coloc
.
id
,
coloc
.
rsc
,
node
,
repr
(
target
)))
return
rc
class
CrmdStateAudit
(
ClusterAudit
):
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
self
.
Stats
=
{
"calls"
:
0
,
"success"
:
0
,
"failure"
:
0
,
"skipped"
:
0
,
"auditfail"
:
0
}
def
has_key
(
self
,
key
):
return
self
.
Stats
.
has_key
(
key
)
def
__setitem__
(
self
,
key
,
value
):
self
.
Stats
[
key
]
=
value
def
__getitem__
(
self
,
key
):
return
self
.
Stats
[
key
]
def
incr
(
self
,
name
):
'''Increment (or initialize) the value associated with the given name'''
if
not
self
.
Stats
.
has_key
(
name
):
self
.
Stats
[
name
]
=
0
self
.
Stats
[
name
]
=
self
.
Stats
[
name
]
+
1
def
__call__
(
self
):
passed
=
1
up_are_down
=
0
down_are_up
=
0
unstable_list
=
[]
self
.
debug
(
"Do Audit
%s
"
%
self
.
name
())
for
node
in
self
.
CM
.
Env
[
"nodes"
]:
should_be
=
self
.
CM
.
ShouldBeStatus
[
node
]
rc
=
self
.
CM
.
test_node_CM
(
node
)
if
rc
>
0
:
if
should_be
==
"down"
:
down_are_up
=
down_are_up
+
1
if
rc
==
1
:
unstable_list
.
append
(
node
)
elif
should_be
==
"up"
:
up_are_down
=
up_are_down
+
1
if
len
(
unstable_list
)
>
0
:
passed
=
0
self
.
CM
.
log
(
"Cluster is not stable:
%d
(of
%d
):
%s
"
%
(
len
(
unstable_list
),
self
.
CM
.
upcount
(),
repr
(
unstable_list
)))
if
up_are_down
>
0
:
passed
=
0
self
.
CM
.
log
(
"
%d
(of
%d
) nodes expected to be up were down."
%
(
up_are_down
,
len
(
self
.
CM
.
Env
[
"nodes"
])))
if
down_are_up
>
0
:
passed
=
0
self
.
CM
.
log
(
"
%d
(of
%d
) nodes expected to be down were up."
%
(
down_are_up
,
len
(
self
.
CM
.
Env
[
"nodes"
])))
return
passed
def
name
(
self
):
return
"CrmdStateAudit"
def
is_applicable
(
self
):
if
self
.
CM
[
"Name"
]
==
"crm-lha"
:
return
1
if
self
.
CM
[
"Name"
]
==
"crm-ais"
:
return
1
return
0
class
CIBAudit
(
ClusterAudit
):
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
self
.
Stats
=
{
"calls"
:
0
,
"success"
:
0
,
"failure"
:
0
,
"skipped"
:
0
,
"auditfail"
:
0
}
def
has_key
(
self
,
key
):
return
self
.
Stats
.
has_key
(
key
)
def
__setitem__
(
self
,
key
,
value
):
self
.
Stats
[
key
]
=
value
def
__getitem__
(
self
,
key
):
return
self
.
Stats
[
key
]
def
incr
(
self
,
name
):
'''Increment (or initialize) the value associated with the given name'''
if
not
self
.
Stats
.
has_key
(
name
):
self
.
Stats
[
name
]
=
0
self
.
Stats
[
name
]
=
self
.
Stats
[
name
]
+
1
def
__call__
(
self
):
self
.
debug
(
"Do Audit
%s
"
%
self
.
name
())
passed
=
1
ccm_partitions
=
self
.
CM
.
find_partitions
()
if
len
(
ccm_partitions
)
==
0
:
self
.
debug
(
"
\t
No partitions to audit"
)
return
1
for
partition
in
ccm_partitions
:
self
.
debug
(
"
\t
Auditing CIB consistency for:
%s
"
%
partition
)
partition_passed
=
0
if
self
.
audit_cib_contents
(
partition
)
==
0
:
passed
=
0
return
passed
def
audit_cib_contents
(
self
,
hostlist
):
passed
=
1
node0
=
None
node0_xml
=
None
partition_hosts
=
hostlist
.
split
()
for
node
in
partition_hosts
:
node_xml
=
self
.
store_remote_cib
(
node
,
node0
)
if
node_xml
==
None
:
self
.
CM
.
log
(
"Could not perform audit: No configuration from
%s
"
%
node
)
passed
=
0
elif
node0
==
None
:
node0
=
node
node0_xml
=
node_xml
elif
node0_xml
==
None
:
self
.
CM
.
log
(
"Could not perform audit: No configuration from
%s
"
%
node0
)
passed
=
0
else
:
(
rc
,
result
)
=
self
.
CM
.
rsh
(
node0
,
"crm_diff -VV -cf --new
%s
--original
%s
"
%
(
node_xml
,
node0_xml
),
None
)
if
rc
!=
0
:
self
.
CM
.
log
(
"Diff between
%s
and
%s
failed:
%d
"
%
(
node0_xml
,
node_xml
,
rc
))
passed
=
0
for
line
in
result
:
if
not
re
.
search
(
"<diff/>"
,
line
):
passed
=
0
self
.
debug
(
"CibDiff[
%s
-
%s
]:
%s
"
%
(
node0
,
node
,
line
))
else
:
self
.
debug
(
"CibDiff[
%s
-
%s
] Ignoring:
%s
"
%
(
node0
,
node
,
line
))
# self.CM.rsh(node0, "rm -f %s" % node_xml)
# self.CM.rsh(node0, "rm -f %s" % node0_xml)
return
passed
def
store_remote_cib
(
self
,
node
,
target
):
combined
=
""
filename
=
"/tmp/ctsaudit.
%s
.xml"
%
node
if
not
target
:
target
=
node
(
rc
,
lines
)
=
self
.
CM
.
rsh
(
node
,
self
.
CM
[
"CibQuery"
],
None
)
if
rc
!=
0
:
self
.
CM
.
log
(
"Could not retrieve configuration"
)
return
None
self
.
CM
.
rsh
(
"localhost"
,
"rm -f
%s
"
%
filename
)
for
line
in
lines
:
self
.
CM
.
rsh
(
"localhost"
,
"echo
\'
%s
\'
>>
%s
"
%
(
line
[:
-
1
],
filename
))
if
self
.
CM
.
rsh
.
cp
(
filename
,
"root@
%s
:
%s
"
%
(
target
,
filename
))
!=
0
:
self
.
CM
.
log
(
"Could not store configuration"
)
return
None
return
filename
def
name
(
self
):
return
"CibAudit"
def
is_applicable
(
self
):
if
self
.
CM
[
"Name"
]
==
"crm-lha"
:
return
1
if
self
.
CM
[
"Name"
]
==
"crm-ais"
:
return
1
return
0
class
PartitionAudit
(
ClusterAudit
):
def
__init__
(
self
,
cm
):
self
.
CM
=
cm
self
.
Stats
=
{
"calls"
:
0
,
"success"
:
0
,
"failure"
:
0
,
"skipped"
:
0
,
"auditfail"
:
0
}
self
.
NodeEpoche
=
{}
self
.
NodeState
=
{}
self
.
NodeQuorum
=
{}
def
has_key
(
self
,
key
):
return
self
.
Stats
.
has_key
(
key
)
def
__setitem__
(
self
,
key
,
value
):
self
.
Stats
[
key
]
=
value
def
__getitem__
(
self
,
key
):
return
self
.
Stats
[
key
]
def
incr
(
self
,
name
):
'''Increment (or initialize) the value associated with the given name'''
if
not
self
.
Stats
.
has_key
(
name
):
self
.
Stats
[
name
]
=
0
self
.
Stats
[
name
]
=
self
.
Stats
[
name
]
+
1
def
__call__
(
self
):
self
.
debug
(
"Do Audit
%s
"
%
self
.
name
())
passed
=
1
ccm_partitions
=
self
.
CM
.
find_partitions
()
if
ccm_partitions
==
None
or
len
(
ccm_partitions
)
==
0
:
return
1
if
len
(
ccm_partitions
)
!=
self
.
CM
.
partitions_expected
:
self
.
CM
.
log
(
"ERROR:
%d
cluster partitions detected:"
%
len
(
ccm_partitions
))
passed
=
0
for
partition
in
ccm_partitions
:
self
.
CM
.
log
(
"
\t
%s
"
%
partition
)
for
partition
in
ccm_partitions
:
partition_passed
=
0
if
self
.
audit_partition
(
partition
)
==
0
:
passed
=
0
return
passed
def
trim_string
(
self
,
avalue
):
if
not
avalue
:
return
None
if
len
(
avalue
)
>
1
:
return
avalue
[:
-
1
]
def
trim2int
(
self
,
avalue
):
if
not
avalue
:
return
None
if
len
(
avalue
)
>
1
:
return
int
(
avalue
[:
-
1
])
def
audit_partition
(
self
,
partition
):
passed
=
1
dc_found
=
[]
dc_allowed_list
=
[]
lowest_epoche
=
None
node_list
=
partition
.
split
()
self
.
debug
(
"Auditing partition:
%s
"
%
(
partition
))
for
node
in
node_list
:
if
self
.
CM
.
ShouldBeStatus
[
node
]
!=
"up"
:
self
.
CM
.
log
(
"Warn: Node
%s
appeared out of nowhere"
%
(
node
))
self
.
CM
.
ShouldBeStatus
[
node
]
=
"up"
# not in itself a reason to fail the audit (not what we're
# checking for in this audit)
self
.
NodeState
[
node
]
=
self
.
CM
.
rsh
(
node
,
self
.
CM
[
"StatusCmd"
]
%
node
,
1
)
self
.
NodeEpoche
[
node
]
=
self
.
CM
.
rsh
(
node
,
self
.
CM
[
"EpocheCmd"
],
1
)
self
.
NodeQuorum
[
node
]
=
self
.
CM
.
rsh
(
node
,
self
.
CM
[
"QuorumCmd"
],
1
)
self
.
debug
(
"Node
%s
:
%s
-
%s
-
%s
."
%
(
node
,
self
.
NodeState
[
node
],
self
.
NodeEpoche
[
node
],
self
.
NodeQuorum
[
node
]))
self
.
NodeState
[
node
]
=
self
.
trim_string
(
self
.
NodeState
[
node
])
self
.
NodeEpoche
[
node
]
=
self
.
trim2int
(
self
.
NodeEpoche
[
node
])
self
.
NodeQuorum
[
node
]
=
self
.
trim_string
(
self
.
NodeQuorum
[
node
])
if
not
self
.
NodeEpoche
[
node
]:
self
.
CM
.
log
(
"Warn: Node
%s
dissappeared: cant determin epoche"
%
(
node
))
self
.
CM
.
ShouldBeStatus
[
node
]
=
"down"
# not in itself a reason to fail the audit (not what we're
# checking for in this audit)
elif
lowest_epoche
==
None
or
self
.
NodeEpoche
[
node
]
<
lowest_epoche
:
lowest_epoche
=
self
.
NodeEpoche
[
node
]
if
not
lowest_epoche
:
self
.
CM
.
log
(
"Lowest epoche not determined in
%s
"
%
(
partition
))
passed
=
0
for
node
in
node_list
:
if
self
.
CM
.
ShouldBeStatus
[
node
]
==
"up"
:
if
self
.
CM
.
is_node_dc
(
node
,
self
.
NodeState
[
node
]):
dc_found
.
append
(
node
)
if
self
.
NodeEpoche
[
node
]
==
lowest_epoche
:
self
.
debug
(
"
%s
: OK"
%
node
)
elif
not
self
.
NodeEpoche
[
node
]:
self
.
debug
(
"Check on
%s
ignored: no node epoche"
%
node
)
elif
not
lowest_epoche
:
self
.
debug
(
"Check on
%s
ignored: no lowest epoche"
%
node
)
else
:
self
.
CM
.
log
(
"DC
%s
is not the oldest node (
%d
vs.
%d
)"
%
(
node
,
self
.
NodeEpoche
[
node
],
lowest_epoche
))
passed
=
0
if
len
(
dc_found
)
==
0
:
self
.
CM
.
log
(
"DC not found on any of the
%d
allowed nodes:
%s
(of
%s
)"
%
(
len
(
dc_allowed_list
),
str
(
dc_allowed_list
),
str
(
node_list
)))
elif
len
(
dc_found
)
>
1
:
self
.
CM
.
log
(
"
%d
DCs (
%s
) found in cluster partition:
%s
"
%
(
len
(
dc_found
),
str
(
dc_found
),
str
(
node_list
)))
passed
=
0
if
passed
==
0
:
for
node
in
node_list
:
if
self
.
CM
.
ShouldBeStatus
[
node
]
==
"up"
:
self
.
CM
.
log
(
"epoche
%s
:
%s
"
%
(
self
.
NodeEpoche
[
node
],
self
.
NodeState
[
node
]))
return
passed
def
name
(
self
):
return
"PartitionAudit"
def
is_applicable
(
self
):
if
self
.
CM
[
"Name"
]
==
"crm-lha"
:
return
1
if
self
.
CM
[
"Name"
]
==
"crm-ais"
:
return
1
return
0
AllAuditClasses
.
append
(
DiskAudit
)
AllAuditClasses
.
append
(
LogAudit
)
AllAuditClasses
.
append
(
CrmdStateAudit
)
AllAuditClasses
.
append
(
PartitionAudit
)
AllAuditClasses
.
append
(
PrimitiveAudit
)
AllAuditClasses
.
append
(
GroupAudit
)
AllAuditClasses
.
append
(
CloneAudit
)
AllAuditClasses
.
append
(
ColocationAudit
)
AllAuditClasses
.
append
(
CIBAudit
)
def
AuditList
(
cm
):
result
=
[]
for
auditclass
in
AllAuditClasses
:
result
.
append
(
auditclass
(
cm
))
return
result
File Metadata
Details
Attached
Mime Type
text/x-script.python
Expires
Tue, Oct 29, 7:09 PM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
942496
Default Alt Text
CTSaudits.py (24 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment