RAID monitor

Fujitsu.Servers.PRIMERGY.Linux.Raid.ShellScript.LessThan.ThreeState.Monitor (UnitMonitor)

Monitors the SV RAID daemon (amDaemon) process using a shell script and generates an alert if the process appears to mis-function.

Knowledge Base article:

Summary

This Monitor monitors the health of SV RAID daemon (amDaemon).

Causes

An unhealthy state for this monitor indicates that the SV RAID daemon (amDaemon) does not respond.

Resolutions

Restart the failed SV RAID daemon (amDaemon) on the target server.

Element properties:

TargetFujitsu.Servers.PRIMERGY.Linux.RaidManagement
Parent MonitorSystem.Health.AvailabilityState
CategoryAvailabilityHealth
EnabledTrue
Alert GenerateTrue
Alert SeverityMatchMonitorHealth
Alert PriorityNormal
Alert Auto ResolveTrue
Monitor TypeFujitsu.Servers.PRIMERGY.Linux.ShellScript.LessThanThreshold.ThreeState.MonitorType
RemotableTrue
AccessibilityPublic
Alert Message
RAID shell script found a problem!
Script on host {0} returned with value {3}, output (if any): {1} {2}
RunAsDefault

Source Code:

<UnitMonitor ID="Fujitsu.Servers.PRIMERGY.Linux.Raid.ShellScript.LessThan.ThreeState.Monitor" Accessibility="Public" Enabled="true" Target="Fujitsu.Servers.PRIMERGY.Linux.RaidManagement" ParentMonitorID="SystemHealth!System.Health.AvailabilityState" Remotable="true" Priority="Normal" TypeID="Fujitsu.Servers.PRIMERGY.Linux.ShellScript.LessThanThreshold.ThreeState.MonitorType" ConfirmDelivery="false">
<Category>AvailabilityHealth</Category>
<AlertSettings AlertMessage="Fujitsu.Servers.PRIMERGY.Linux.Raid.ShellScript.LessThan.ThreeState.Monitor.AlertMessage">
<AlertOnState>Warning</AlertOnState>
<AutoResolve>true</AutoResolve>
<AlertPriority>Normal</AlertPriority>
<AlertSeverity>MatchMonitorHealth</AlertSeverity>
<AlertParameters>
<AlertParameter1>$Target/Property[Type="Fujitsu.Servers.PRIMERGY.Linux.ManagementClientSoftware"]/NetworkName$</AlertParameter1>
<AlertParameter2>$Data/Context///*[local-name()="StdOut"]$</AlertParameter2>
<AlertParameter3>$Data/Context///*[local-name()="StdErr"]$</AlertParameter3>
<AlertParameter4>$Data/Context///*[local-name()="ReturnCode"]$</AlertParameter4>
</AlertParameters>
</AlertSettings>
<OperationalStates>
<OperationalState ID="StatusOK" MonitorTypeStateID="StatusOK" HealthState="Success"/>
<OperationalState ID="StatusWarning" MonitorTypeStateID="StatusWarning" HealthState="Warning"/>
<OperationalState ID="StatusError" MonitorTypeStateID="StatusError" HealthState="Error"/>
</OperationalStates>
<Configuration>
<Interval>300</Interval>
<TargetSystem>$Target/Property[Type="Fujitsu.Servers.PRIMERGY.Linux.ManagementClientSoftware"]/NetworkName$</TargetSystem>
<ScriptName>amDaemonHealth.sh</ScriptName>
<ShellScript>#!/bin/sh
# Fujitsu
# Copyright 2014-2018 FUJITSU LIMITED
#
# amDaemonHealth.sh - monitors the health of ServerView RAID daemon

OK=0
WARN=1
ERR=2
RETVALUE=$OK # we assume all is well
SCRIPTLOG="/tmp/Fujitsu/$0.log"

# arguments check
DBG=`echo $1 | awk '{print tolower($0)}'`
[ "$DBG" = "dbg" ] &amp;&amp; echo "Running $0 in debug mode"

# In DEBUG mode: generate some files under "/tmp" to see, the script is running
if [ "$DBG" = "dbg" ]
then
mkdir -p /tmp/Fujitsu/
tail $SCRIPTLOG 2&gt;/dev/null &gt; $SCRIPTLOG.bk
mv -f $SCRIPTLOG.bk $SCRIPTLOG
echo "$0 V__MP_VERSION__ last ran at `date`" &gt;&gt; $SCRIPTLOG
fi

EECD=`whereis eecd | awk 'NF&gt;1{print $2}'`
if [ "$EECD" = "" ]
then
# either not a system from FUJITSU or no ServerView agents installed - do nothing!
[ "$DBG" = "dbg" ] &amp;&amp; echo "Not a system from FUJITSU or ServerView Agents not installed."
exit $OK
fi

# test that amDaemon is installed
AMDAEMON=`ls /usr/sbin/amDaemon 2&gt;/dev/null | wc -l`
if [ $AMDAEMON -lt 1 ]
then
AMDAEMON=`LC_ALL=C ls /usr/sbin/amDaemon 2&gt;&amp;1 | grep -i 'permission denied'`
if [ "$AMDAEMON" = "" ]
then
echo "ServerView RAID (amDaemon) is *NOT* installed! Abort with Error."
exit $ERR
fi
fi
[ "$DBG" = "dbg" ] &amp;&amp; echo "ServerView RAID (amDaemon) is installed at: /usr/sbin/amDaemon"

# test that ServerView RAID (amDaemon) is running
AMDAEMONRUNNING=`ps -A | grep -i '[a]mdaemon'`
if [ "$AMDAEMONRUNNING" = "" ]
then
echo "ServerView RAID (amDaemon) is *NOT* running! Abort with Error."
exit $ERR
fi
[ "$DBG" = "dbg" ] &amp;&amp; echo "ServerView RAID (amDaemon) is running"

# test if ServerView RAID (amDaemon) is listening
AMDAEMONLISTENING=`netstat -nltp 2&gt;/dev/null | grep 3173 | wc -l`
if [ $AMDAEMONLISTENING -lt 1 ]
then
echo "ServerView RAID (amDaemon) is *NOT* listening! Abort with Error."
exit $ERR
fi
[ "$DBG" = "dbg" ] &amp;&amp; echo "ServerView RAID (amDaemon) is listening"

exit $RETVALUE
</ShellScript>
<ScriptArguments>3_chars_DBG_for_DEBUG</ScriptArguments>
<Timeout>60</Timeout>
<UserName>$RunAs[Name="Unix!Microsoft.Unix.ActionAccount"]/UserName$</UserName>
<Password>$RunAs[Name="Unix!Microsoft.Unix.ActionAccount"]/Password$</Password>
<FilterExpression>
<RegExExpression>
<ValueExpression>
<XPathQuery>//*[local-name()="ReturnCode"]</XPathQuery>
</ValueExpression>
<Operator>MatchesRegularExpression</Operator>
<Pattern>^[0-2]$</Pattern>
</RegExExpression>
</FilterExpression>
<WarningThreshold>1</WarningThreshold>
<ErrorThreshold>2</ErrorThreshold>
</Configuration>
</UnitMonitor>