if __name__ == '__main__':
if len(sys.argv) < 7: exit(1)
if fts_version() != "__MP_VERSION__":
print "FTSModule version mismatch. Wait for next discovery run."
exit(1)
# Monitoring of the components
for i in range(15): # Enumerate through all components
if check_component == ALL_COMPONENTS or check_component == components_name[i]:
if i in [RAIDSUBSYSTEM, RAIDCONTROLLER, PHYSICALDISK, LOGICALDRIVE] \
and (raid_info['controllers'] == None or len(raid_info['controllers']) == 0):
continue
if i in [STORAGE, NETWORK, RAIDSUBSYSTEM, RAIDCONTROLLER, PHYSICALDISK, LOGICALDRIVE]:
status = snmp_components_health_status(i)
else:
status = cim_components_health_status(i)
# Save worst state for Other status
if status != STATUS_OK:
if status == STATUS_ERROR:
components_health = status
else:
if components_health != STATUS_ERROR:
components_health = status
def cim_components_health_status(subsystem_number):
component_name, component_status, failure_reasons = [], [], []
component = components_name[subsystem_number]
subsystem_name = cim_health_subsystem[subsystem_number]
overall_health = STATUS_OK
if subsystem_number == MGMTCTRL:
return get_management_health()
if subsystem_number == POWERCONSUMP:
return get_power_consumption_heatlh()
if subsystem_number == PYOVERALLSTATE:
for instance in health_list:
if str(instance['elementname']) == subsystem_name:
component_name.append('ServerView Health State')
if components_health != None:
tmpstatus = get_health(instance['healthstate'])
if tmpstatus == components_health:
component_status.append(STATUS_OK) # One of components has the same error status, user need to solve that error first
failure_reasons.append("")
else:
component_status.append(tmpstatus)
log.debug("Trying to get reason for overall status %s..." % tmpstatus)
reason = get_other_failure_reason(health_list)
log.debug("Reason: %s." % reason)
failure_reasons.append(reason)
else:
component_status.append(get_health(instance['healthstate']))
component_name.append('ServerView Agents Version')
component_status.append(STATUS_OK) # Assume OK
if 'BIOS Selftest' in str(instance['elementname']):
component_name.append('BIOS Selftest')
component_status.append(get_health(instance['healthstate']))
else:
for instance in health_list:
if str(instance['elementname']) == subsystem_name:
overall_health = get_health(instance['healthstate'])
instance_id = instance['instanceid']
for subinstance in health_list:
if instance_id+"-" in subinstance['instanceid'] and int(subinstance['presence']) != 2:
component_name.append(scom.get_component_name(subsystem_number, \
str(subinstance['elementname']), str(subinstance['associationkey']), str(subinstance['instanceid'])))
status = get_health(subinstance['healthstate'])
if status == STATUS_OK:
failure_reasons.append("")
else:
log.debug("Trying to get reason for status %s..." % status)
reason = get_failure_reason(cim, str(subinstance['elementname']), subsystem_number)
log.debug("Reason: %s." % reason)
failure_reasons.append(reason)
if int(subinstance['presence']) == 3 and status != STATUS_ERROR:
status = STATUS_DEGRADED
failure_reasons.append("Presence is set to 'not manageable'")
if int(subinstance['presence']) != 1 and int(subinstance['presence']) != 3:
scom.communication_errors.append("Invalid presence value (%d) for '%s'" % \
(int(subinstance['presence']), str(subinstance['elementname'])))
component_status.append(status)
return scom.append_status(component, component_name, component_status, subsystem_number, failure_reasons)
def snmp_components_health_status(subsystem_number):
try:
if subsystem_number == STORAGE:
return get_storage_health()
if subsystem_number == NETWORK:
return get_network_health()
if subsystem_number == RAIDSUBSYSTEM:
return get_overall_raid_health()
if subsystem_number == RAIDCONTROLLER:
return get_raid_health()
if subsystem_number == PHYSICALDISK:
return get_raid_phys_dev_health()
if subsystem_number == LOGICALDRIVE:
return get_raid_log_drv_health()
except:
log.scom_alert(SCOM_INFO, '260 Error gathering %s component info from SNMP: %s' % (cim_health_subsystem[subsystem_number], sys.exc_info()))
return STATUS_UNKNOWN
def get_power_consumption_heatlh():
component_name, component_status = [], []
for instance in health_list:
if str(instance['elementname']) == cim_health_subsystem[POWERCONSUMP]:
component_name.append(components_name[POWERCONSUMP])
component_status.append(get_health(instance['healthstate']))
return scom.append_status(components_name[POWERCONSUMP], component_name, component_status, POWERCONSUMP)
def get_storage_health():
component_name, component_status = [], []
# if (not snmp.enabled) or (raid_present): return STATUS_UNKNOWN # we should monitor both Storage and RAID subsystems
if not snmp.enabled: return STATUS_UNKNOWN
for idx in snmp.dev_idxs: # cached
if "DiskStorage" in snmp.get_var(SNMP_DEVICE_TYPE, idx):
descr = snmp.get_var(SNMP_DEVICE_DESCR, idx)
status = SNMP_StorState[snmp.get_var_i(SNMP_DEVICE_STATUS, idx)]
component_name.append(descr)
component_status.append(status)
return scom.append_status(components_name[STORAGE], component_name, component_status, STORAGE)
def get_network_health():
if not snmp.enabled: return STATUS_UNKNOWN
component_name, component_status = [], []
idxs = snmp.get_idx(SNMP_NET_IF_INDEX, SNMP_NET_IF_INDEX_LEN)
for idx in idxs:
if 'ethernet' in snmp.get_var(SNMP_NET_IF_TYPE, idx):
descr = snmp.get_var(SNMP_NET_IF_DESCR, idx)
admstat = int(snmp.get_var(SNMP_NET_IF_ADMIN_STATUS, idx).split('(')[1][:-1])
oprstat = int(snmp.get_var(SNMP_NET_IF_OPER_STATUS, idx).split('(')[1][:-1])
status = STATUS_OK # we assume everything is ok
if oprstat == 4 or oprstat == 6: # ifOperStatus=unknown or notPresent
status = STATUS_DEGRADED
if admstat == 2 and oprstat == 1: # ifAdminStatus=down and ifOperStatus=up then something is wrong
status = STATUS_ERROR
# status from device
for d_idx in snmp.dev_idxs: # cached
if "Network" in snmp.get_var(SNMP_DEVICE_TYPE, d_idx):
if descr in snmp.get_var(SNMP_DEVICE_DESCR, d_idx):
dstatus = SNMP_NetState[snmp.get_var_i(SNMP_DEVICE_STATUS, d_idx)]
status = int_to_status[max(status_to_int[status], status_to_int[dstatus])]
component_name.append(descr)
component_status.append(status)
return scom.append_status(components_name[NETWORK], component_name, component_status, NETWORK)
def get_raid_info():
global raid_info
log.debug('Getting RAID info')
if check_amDaemon():
raid_info['daemon'] = True
cimControllers = cim.enumerate_instances("SVS_PGYHostRaidController", True)
if cimControllers == None and snmp.enabled:
raid_info['version'] = snmp.get_var(SNMP_SV_RAID_AGENT_VER)
raid_info['avail'] = True
raid_info['controllers'] = snmp.get_idx(SNMP_CONTROL_CTRLNR, SNMP_CONTROL_INDEX_LEN)
elif cimControllers != None:
software = cim.enumerate_instance_names("SVS_PGYSoftwareIdentity")
for instance in software:
if 'RAID Manager' in instance['InstanceID'][1]:
inst = cim.get_instance(instance)
raid_info['version'] = inst[0]['versionstring']
break
raid_info['avail'] = True
raid_info['protocol'] = 'CIM'
raid_info['controllers'] = cimControllers
log.debug(" gathered RAID info: daemon %s, avail %s, version %s, protocol %s" % (raid_info['daemon'], raid_info['avail'], raid_info['version'], raid_info['protocol']))
def get_raid_version():
if not raid_info['avail']: return STATUS_UNKNOWN
component_name, component_status = [], []
svstatus = STATUS_OK
if raid_info['version'] < MIN_RD_VER:
svstatus = STATUS_ERROR
if raid_info['version'] < WAR_RD_VER:
svstatus = STATUS_DEGRADED
component_name.append("ServerView RAID Version")
component_status.append(svstatus)
return scom.append_status("ServerView RAID Version", component_name, component_status, -1)
def get_overall_raid_health():
if not raid_info['avail']: return STATUS_UNKNOWN
component_name, component_status = [], []
if raid_info['protocol'] == 'CIM':
svstatus = STATUS_UNKNOWN
for item in health_list:
if item['elementname'] != None and 'ServerView RAID System' in item['elementname']:
svstatus = get_health(item['healthstate'])
else:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_SV_RAID_STATUS)]
component_name.append(components_name[RAIDSUBSYSTEM])
component_status.append(svstatus)
return scom.append_status(components_name[RAIDSUBSYSTEM], component_name, component_status, RAIDSUBSYSTEM)
def get_raid_health():
def get_cim():
if raid_info['controllers'] != None:
component_name, component_status = [], []
for controller in raid_info['controllers']:
component_name.append(controller['elementname'])
component_status.append(get_health(controller['healthstate']))
if len(raid_info['controllers']) > 0:
return scom.append_status(components_name[RAIDCONTROLLER], component_name, component_status, RAIDCONTROLLER)
return STATUS_UNKNOWN
def get_snmp():
component_name, component_status, index = [], [], 1
if raid_info['controllers'] == None or len(raid_info['controllers']) == 0:
return STATUS_UNKNOWN
idxs = raid_info['controllers']
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_CONTROL_SV_STATUS)]
for idx in idxs:
driver = snmp.get_var(SNMP_CONTROL_DRIVER_NAME, idx)
if driver.lower() != 'md':
descr = snmp.get_var(SNMP_CONTROL_DESCR, idx)
status = STATUS_OK
if svstatus != STATUS_OK:
status = SNMP_CompState[snmp.get_var_i(SNMP_CONTROL_STATUS, idx)]
component_name.append(descr)
component_status.append(status)
index += 1
if index > 1:
return scom.append_status(components_name[RAIDCONTROLLER], component_name, component_status, RAIDCONTROLLER)
return STATUS_UNKNOWN
if not raid_info['avail']: return STATUS_UNKNOWN
if get_raid_version() == STATUS_ERROR: return STATUS_UNKNOWN # checks if we can monitor raid
if raid_info['protocol'] == 'SNMP':
return get_snmp()
else:
return get_cim()
def get_raid_drive_health(cimclass, component_no):
if not raid_info['avail']: return STATUS_UNKNOWN
drives = cim.enumerate_instances(cimclass)
if len(drives) > 0:
component_name, component_status = [], []
for drive in drives:
component_name.append("%s (%s)" % (drive['elementname'], drive['deviceid']))
component_status.append(get_health(drive['healthstate']))
return scom.append_status(components_name[component_no], component_name, component_status, component_no)
return STATUS_UNKNOWN
def get_raid_phys_dev_health():
def get_snmp():
component_name, component_status, index = [], [], 0
idxs = snmp.get_idx(SNMP_PHYS_DEV_CTRLNR, SNMP_PHYS_DEV_INDEX_LEN)
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_PHYS_DEV_SV_STATUS)]
for idx in idxs:
status = STATUS_OK
descr = add_controller_number(snmp.get_var(SNMP_PHYS_DEV_CTRLNR, idx), snmp.get_var(SNMP_PHYS_DEV_DESCR, idx))
if svstatus != STATUS_OK:
pdstatus = SNMP_DevState[snmp.get_var_i(SNMP_PHYS_DEV_STATUS, idx)]
if status_to_int[pdstatus] > status_to_int[STATUS_OK]:
status = svstatus
component_name.append(descr)
component_status.append(status)
index += 1
if index > 0:
return scom.append_status(components_name[PHYSICALDISK], component_name, component_status, PHYSICALDISK)
return STATUS_UNKNOWN
if not raid_info['avail']: return STATUS_UNKNOWN
if raid_info['protocol'] == 'SNMP':
return get_snmp()
else:
return get_raid_drive_health("SVS_PGYDiskDrive", PHYSICALDISK)
def get_raid_log_drv_health():
def get_snmp():
component_name, component_status, index = [], [], 0
idxs = snmp.get_idx(SNMP_LOG_DRV_CTRLNR, SNMP_LOG_DRV_INDEX_LEN)
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_LOG_DRV_SV_STATUS)]
for idx in idxs:
descr = add_controller_number(snmp.get_var(SNMP_LOG_DRV_CTRLNR, idx), snmp.get_var(SNMP_LOG_DRV_NAME, idx))
status = STATUS_OK
if svstatus != STATUS_OK:
ldstatus = SNMP_DrvState[snmp.get_var_i(SNMP_LOG_DRV_STATUS, idx)]
if status_to_int[ldstatus] > status_to_int[STATUS_OK]:
status = svstatus
component_name.append(descr)
component_status.append(status)
index += 1
if index > 0:
return scom.append_status(components_name[LOGICALDRIVE], component_name, component_status, LOGICALDRIVE)
return STATUS_UNKNOWN
if not raid_info['avail']: return STATUS_UNKNOWN
if raid_info['protocol'] == 'SNMP':
return get_snmp()
else:
return get_raid_drive_health("SVS_PGYStoragePoolCompositeExtent", LOGICALDRIVE)