This monitor tracks the submission and activation filters timeout via the timeout events. The HPC Job Scheduler Service gives these two filters some time to complete before it times out these programs. (By default, the filter timeout values are 15 seconds.) When either one of the filters times out, an event is generated in the Windows HPC Server event log category. If the event is generated within 24 hours of monitor health calculation time, then the health state will change to Warning.
Note: Submission filter timeout is not tracked on a failover cluster.
The status of the monitor will be automatically reset to Green if no error events are logged for 24 hours.
This condition occurs when timeout events ID 10 or 11 are generated in the Windows HPC Server event catalog.
To troubleshoot and fix this problem:
Click on the State Change Events tab.
Check the description field in the Details pane for an event description.
Make sure that the program that is referred to by the event can be completed within the configured timeout time period. The timeout period can be set using the following commands in HPC PowerShell:Set-HpcClusterProperty –SubmissionFilterTimeout <timeout value>Set-HpcClusterProperty –ActivationFilterTimeout <timeout value>
Target | Microsoft.HPC.2008.HeadNode.HPCPack.JobScheduler | ||
Parent Monitor | System.Health.PerformanceState | ||
Category | PerformanceHealth | ||
Enabled | True | ||
Alert Generate | True | ||
Alert Severity | Warning | ||
Alert Priority | Normal | ||
Alert Auto Resolve | True | ||
Monitor Type | Microsoft.Windows.SingleEventLogTimer2StateMonitorType | ||
Remotable | True | ||
Accessibility | Public | ||
Alert Message |
| ||
RunAs | Default |
<UnitMonitor ID="Microsoft.HPC.2008.Monitor.JobScheduler.JobFiltersTimeout" Accessibility="Public" Enabled="true" Target="Microsoft.HPC.2008.HeadNode.HPCPack.JobScheduler" ParentMonitorID="Health!System.Health.PerformanceState" Remotable="true" Priority="Normal" TypeID="Windows!Microsoft.Windows.SingleEventLogTimer2StateMonitorType" ConfirmDelivery="true">
<Category>PerformanceHealth</Category>
<AlertSettings AlertMessage="Microsoft.HPC.2008.Monitor.JobScheduler.JobFiltersTimeout_AlertMessageResourceID">
<AlertOnState>Warning</AlertOnState>
<AutoResolve>true</AutoResolve>
<AlertPriority>Normal</AlertPriority>
<AlertSeverity>Warning</AlertSeverity>
</AlertSettings>
<OperationalStates>
<OperationalState ID="EventRaised" MonitorTypeStateID="EventRaised" HealthState="Warning"/>
<OperationalState ID="TimerEventRaised" MonitorTypeStateID="TimerEventRaised" HealthState="Success"/>
</OperationalStates>
<Configuration>
<ComputerName>$Target/Host/Host/Property[Type="Windows!Microsoft.Windows.Computer"]/NetworkName$</ComputerName>
<LogName>Windows HPC Server</LogName>
<Expression>
<And>
<Expression>
<Or>
<Expression>
<SimpleExpression>
<ValueExpression>
<XPathQuery Type="UnsignedInteger">EventDisplayNumber</XPathQuery>
</ValueExpression>
<Operator>Equal</Operator>
<ValueExpression>
<Value Type="UnsignedInteger">10</Value>
</ValueExpression>
</SimpleExpression>
</Expression>
<Expression>
<SimpleExpression>
<ValueExpression>
<XPathQuery Type="UnsignedInteger">EventDisplayNumber</XPathQuery>
</ValueExpression>
<Operator>Equal</Operator>
<ValueExpression>
<Value Type="UnsignedInteger">11</Value>
</ValueExpression>
</SimpleExpression>
</Expression>
</Or>
</Expression>
<Expression>
<RegExExpression>
<ValueExpression>
<XPathQuery Type="String">PublisherName</XPathQuery>
</ValueExpression>
<Operator>ContainsSubstring</Operator>
<Pattern>HPCServer</Pattern>
</RegExExpression>
</Expression>
</And>
</Expression>
<TimerWaitInSeconds>86400</TimerWaitInSeconds>
</Configuration>
</UnitMonitor>