if __name__ == '__main__':
if len(sys.argv) < 7: exit(1)
if fts_version() != "__MP_VERSION__":
print "FTSModule version mismatch. Wait for next discovery run."
exit(1)
# Monitoring of the components
for i in range(15): # Enumerate through all components
if check_component == ALL_COMPONENTS or check_component == components_name[i]:
if i in [STORAGE, NETWORK, RAIDSUBSYSTEM, RAIDCONTROLLER, PHYSICALDISK, LOGICALDRIVE]:
status = snmp_components_health_status(i)
else:
status = cim_components_health_status(i)
# Save worst state for Other status
if status != STATUS_OK:
if status == STATUS_ERROR:
components_health = status
else:
if components_health != STATUS_ERROR:
components_health = status
def cim_components_health_status(subsystem_number):
component_name, component_status, failure_reasons = [], [], []
component = components_name[subsystem_number]
subsystem_name = cim_health_subsystem[subsystem_number]
overall_health = STATUS_OK
if subsystem_number == MGMTCTRL:
return get_management_health()
if subsystem_number == POWERCONSUMP:
return get_power_consumption_heatlh()
if subsystem_number == PYOVERALLSTATE:
for instance in health_list:
if str(instance['elementname']) == subsystem_name:
component_name.append('ServerView Health State')
if components_health != None:
tmpstatus = get_health(instance['healthstate'])
if tmpstatus == components_health:
component_status.append(STATUS_OK) # One of components has the same error status, user need to solve that error first
failure_reasons.append("")
else:
component_status.append(tmpstatus)
log.debug("Trying to get reason for overall status %s..." % tmpstatus)
reason = get_other_failure_reason(health_list)
log.debug("Reason: %s." % reason)
failure_reasons.append(reason)
else:
component_status.append(get_health(instance['healthstate']))
component_name.append('ServerView Agents Version')
component_status.append(STATUS_OK) # Assume OK
if 'BIOS Selftest' in str(instance['elementname']):
component_name.append('BIOS Selftest')
component_status.append(get_health(instance['healthstate']))
else:
for instance in health_list:
if str(instance['elementname']) == subsystem_name:
overall_health = get_health(instance['healthstate'])
instance_id = instance['instanceid']
for subinstance in health_list:
if instance_id+"-" in subinstance['instanceid']:
component_name.append(str(subinstance['elementname']))
status = get_health(subinstance['healthstate'])
component_status.append(status)
if status == STATUS_OK:
failure_reasons.append("")
else:
log.debug("Trying to get reason for status %s..." % status)
reason = get_failure_reason(cim, str(subinstance['elementname']), subsystem_number)
log.debug("Reason: %s." % reason)
failure_reasons.append(reason)
return scom.append_status(component, component_name, component_status, subsystem_number, failure_reasons)
def snmp_components_health_status(subsystem_number):
try:
if subsystem_number == STORAGE:
return get_storage_health()
if subsystem_number == NETWORK:
return get_network_health()
if subsystem_number == RAIDSUBSYSTEM:
return get_overall_raid_health()
if subsystem_number == RAIDCONTROLLER:
return get_raid_health()
if subsystem_number == PHYSICALDISK:
return get_raid_phys_dev_health()
if subsystem_number == LOGICALDRIVE:
return get_raid_log_drv_health()
except:
log.scom_alert(SCOM_INFO, '260 Error gathering %s component info from SNMP: %s' % (cim_health_subsystem[subsystem_number], sys.exc_info()))
return STATUS_UNKNOWN
def get_power_consumption_heatlh():
component_name, component_status = [], []
instance_list = cim.enumerate_instances("SVS_PGYPowerConsumptionSensor")
for instance in instance_list:
# Info: SV Agents SCCI only supply the 'Total Power' sensor (#E0) with sensible values.
# If 'CurrentState' is unknown power control mode is not set to 'Power Limit'.
if 'deviceid' in instance and 'e0' in instance['deviceid'].lower() and instance['elementname'] == "Total Power":
component_name.append("Power Level")
if instance['currentstate'] != "Unknown":
if get_health(instance['healthstate']) == STATUS_OK:
if instance['currentstate'] == "Normal":
component_status.append(STATUS_OK)
if instance['currentstate'] == "Non-Critical":
component_status.append(STATUS_WARNING)
if instance['currentstate'] == "Critical":
component_status.append(STATUS_ERROR)
else:
component_status.append(get_health(instance['healthstate']))
else:
component_status.append(STATUS_OK)
break
return scom.append_status(components_name[POWERCONSUMP], component_name, component_status, POWERCONSUMP)
def get_storage_health():
component_name, component_status = [], []
# if (not snmp.enabled) or (raid_present): return STATUS_UNKNOWN # we should monitor both Storage and RAID subsystems
if not snmp.enabled: return STATUS_UNKNOWN
for idx in snmp.dev_idxs: # cached
if "DiskStorage" in snmp.get_var(SNMP_DEVICE_TYPE, idx):
descr = snmp.get_var(SNMP_DEVICE_DESCR, idx)
status = SNMP_StorState[snmp.get_var_i(SNMP_DEVICE_STATUS, idx)]
component_name.append(descr)
component_status.append(status)
return scom.append_status(components_name[STORAGE], component_name, component_status, STORAGE)
def get_network_health():
if not snmp.enabled: return STATUS_UNKNOWN
component_name, component_status = [], []
idxs = snmp.get_idx(SNMP_NET_IF_INDEX, SNMP_NET_IF_INDEX_LEN)
for idx in idxs:
if 'ethernet' in snmp.get_var(SNMP_NET_IF_TYPE, idx):
descr = snmp.get_var(SNMP_NET_IF_DESCR, idx)
admstat = int(snmp.get_var(SNMP_NET_IF_ADMIN_STATUS, idx).split('(')[1][:-1])
oprstat = int(snmp.get_var(SNMP_NET_IF_OPER_STATUS, idx).split('(')[1][:-1])
status = STATUS_OK # we assume everything is ok
if oprstat == 4 or oprstat == 6: # ifOperStatus=unknown or notPresent
status = STATUS_DEGRADED
if admstat == 2 and oprstat == 1: # ifAdminStatus=down and ifOperStatus=up then something is wrong
status = STATUS_ERROR
# status from device
for d_idx in snmp.dev_idxs: # cached
if "Network" in snmp.get_var(SNMP_DEVICE_TYPE, d_idx):
if descr in snmp.get_var(SNMP_DEVICE_DESCR, d_idx):
dstatus = SNMP_NetState[snmp.get_var_i(SNMP_DEVICE_STATUS, d_idx)]
status = int_to_status[max(status_to_int[status], status_to_int[dstatus])]
component_name.append(descr)
component_status.append(status)
return scom.append_status(components_name[NETWORK], component_name, component_status, NETWORK)
def get_raid_version():
global raid_version
if not snmp.enabled or not raid_present: return STATUS_UNKNOWN
component_name, component_status = [], []
svstatus = STATUS_OK
raid_version = snmp.get_var(SNMP_SV_RAID_AGENT_VER)
if raid_version < MIN_RD_VER:
svstatus = STATUS_ERROR
if raid_version < WAR_RD_VER:
svstatus = STATUS_DEGRADED
component_name.append("ServerView RAID Version")
component_status.append(svstatus)
return scom.append_status("ServerView RAID Version", component_name, component_status, -1)
def get_overall_raid_health():
if not snmp.enabled or not raid_present: return STATUS_UNKNOWN
component_name, component_status = [], []
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_SV_RAID_STATUS)]
component_name.append(components_name[RAIDSUBSYSTEM])
component_status.append(svstatus)
return scom.append_status(components_name[RAIDSUBSYSTEM], component_name, component_status, RAIDSUBSYSTEM)
def get_raid_health():
global raid_present, raid_controllers
if not snmp.enabled: return STATUS_UNKNOWN
component_name, component_status, index = [], [], 1
idxs = snmp.get_idx(SNMP_CONTROL_CTRLNR, SNMP_CONTROL_INDEX_LEN)
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_CONTROL_SV_STATUS)]
for idx in idxs:
driver = snmp.get_var(SNMP_CONTROL_DRIVER_NAME, idx)
if driver.lower() != 'md':
descr = snmp.get_var(SNMP_CONTROL_DESCR, idx)
status = STATUS_OK
if svstatus != STATUS_OK:
status = SNMP_CompState[snmp.get_var_i(SNMP_CONTROL_STATUS, idx)]
component_name.append(descr)
component_status.append(status)
index += 1
if index > 1:
raid_present = True
if get_raid_version() == STATUS_ERROR: return STATUS_UNKNOWN # checks if we can monitor raid
raid_controllers = scom.append_status(components_name[RAIDCONTROLLER], component_name, component_status, RAIDCONTROLLER)
return raid_controllers
else:
return STATUS_UNKNOWN
def get_raid_phys_dev_health():
global raid_phys_devs
if not snmp.enabled: return STATUS_UNKNOWN
component_name, component_status, index = [], [], 0
idxs = snmp.get_idx(SNMP_PHYS_DEV_CTRLNR, SNMP_PHYS_DEV_INDEX_LEN)
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_PHYS_DEV_SV_STATUS)]
for idx in idxs:
status = STATUS_OK
descr = add_controller_number(snmp.get_var(SNMP_PHYS_DEV_CTRLNR, idx), snmp.get_var(SNMP_PHYS_DEV_DESCR, idx))
if svstatus != STATUS_OK:
pdstatus = SNMP_DevState[snmp.get_var_i(SNMP_PHYS_DEV_STATUS, idx)]
if status_to_int[pdstatus] > status_to_int[STATUS_OK]:
status = svstatus
component_name.append(descr)
component_status.append(status)
index += 1
if index > 0:
raid_phys_devs = scom.append_status(components_name[PHYSICALDISK], component_name, component_status, PHYSICALDISK)
return raid_phys_devs
else:
return STATUS_UNKNOWN
def get_raid_log_drv_health():
global raid_log_drvs
if not snmp.enabled: return STATUS_UNKNOWN
component_name, component_status, index = [], [], 0
idxs = snmp.get_idx(SNMP_LOG_DRV_CTRLNR, SNMP_LOG_DRV_INDEX_LEN)
svstatus = STATUS_OK
if len(idxs) > 0:
svstatus = SNMP_CompState[snmp.get_var_i(SNMP_LOG_DRV_SV_STATUS)]
for idx in idxs:
descr = add_controller_number(snmp.get_var(SNMP_LOG_DRV_CTRLNR, idx), snmp.get_var(SNMP_LOG_DRV_NAME, idx))
status = STATUS_OK
if svstatus != STATUS_OK:
ldstatus = SNMP_DrvState[snmp.get_var_i(SNMP_LOG_DRV_STATUS, idx)]
if status_to_int[ldstatus] > status_to_int[STATUS_OK]:
status = svstatus
component_name.append(descr)
component_status.append(status)
index += 1
if index > 0:
raid_log_drvs = scom.append_status(components_name[LOGICALDRIVE], component_name, component_status, LOGICALDRIVE)
return raid_log_drvs
else:
return STATUS_UNKNOWN