HPC 2008 R2 Consumption Based Power Management Module

Microsoft.HPC.2008R2.Module.ConsumptionPower (WriteActionModuleType)

Element properties:


Member Modules:

ID Module Type TypeId RunAs 
Action WriteAction Microsoft.HPC.2008R2.Module.PowershellExecuter Default

Overrideable Parameters:

IDParameterTypeSelectorDisplay NameDescription
TimeoutSecondsint$Config/TimeoutSeconds$Action Timeout Seconds
HighCapacityint$Config/HighCapacity$High Capacity Level
MediumCapacityint$Config/MediumCapacity$Medium Capacity Level
LowCapacityint$Config/LowCapacity$Low Capacity Level

Source Code:

<WriteActionModuleType ID="Microsoft.HPC.2008R2.Module.ConsumptionPower" Accessibility="Public" RunAs="HPCLibrary!Microsoft.HPC.RunAsProfile.AdminActionAccount" Batching="false">
<xsd:element minOccurs="1" name="TimeoutSeconds" type="xsd:integer"/>
<xsd:element minOccurs="1" name="ClusterName" type="xsd:string"/>
<xsd:element minOccurs="1" name="Direction" type="xsd:integer"/>
<xsd:element minOccurs="1" name="HighCapacity" type="xsd:integer"/>
<xsd:element minOccurs="1" name="MediumCapacity" type="xsd:integer"/>
<xsd:element minOccurs="1" name="LowCapacity" type="xsd:integer"/>
<OverrideableParameter ID="TimeoutSeconds" Selector="$Config/TimeoutSeconds$" ParameterType="int"/>
<OverrideableParameter ID="HighCapacity" Selector="$Config/HighCapacity$" ParameterType="int"/>
<OverrideableParameter ID="MediumCapacity" Selector="$Config/MediumCapacity$" ParameterType="int"/>
<OverrideableParameter ID="LowCapacity" Selector="$Config/LowCapacity$" ParameterType="int"/>
<ModuleImplementation Isolation="Any">
<WriteAction ID="Action" TypeID="Microsoft.HPC.2008R2.Module.PowershellExecuter">

param ([string]$clusterName, [int]$direction, [int]$highCapacity, [int]$mediumCapacity, [int]$lowCapacity, [int]$timeoutSeconds)

if ($direction -eq 0)
# no need to change capacity level

# global variables
$logPath = "\\" + $clusterName + "\CcpSpoolDir\SCOM.consumption.xml"
$eventLogName = "Application"
$eventLogSource = "HPCManagementPack"
$failedToHibernateEvent = 311
$timeoutEvent = 312
$failedToStartEvent = 313
$failedToOnlineEvent = 314

Add-PSSnapin Microsoft.HPC
New-EventLog -LogName $eventLogName -Source $eventLogSource -EA:SilentlyContinue

# Get node count difference according to capacity #
Function GetNodesToHibernate($currentCapacity, $targetCapacity, $totalCount)
if ($currentCapacity -ge $targetCapacity)
return [int](($currentCapacity - $targetCapacity) * $totalCount / 100)
return [System.Math]::Floor(($currentCapacity - $targetCapacity) * $totalCount / 100)

# Hibernate nodes #
Function Hibernate($onlineNodes, $hibernatedNodes, $countToHibernate)
$targets = New-Object System.Collections.ArrayList
$counters = $onlineNodes | Get-HpcMetricValue -Name HPCCoresInUse -EA:SilentlyContinue
$freeNodes = @{}
$counters | % {
if ($_.Value -eq 0)
$freeNodes.Add($_.NodeName, $null) # use as an set

foreach ($node in $onlineNodes)
if ($targets.Count -ge $countToHibernate)

if ((-not $node.IsHeadNode) -and (-not ([string]$node.NodeRole).Contains("BrokerNode")) -and ($freeNodes.ContainsKey($node.NetBiosName))) # installed as cn and is free

if ($targets.Count -gt 0)
# Take node offline
$now = Get-Date
$workitem = Set-HpcNodeState -Name $targets -State Offline -Async -Scheduler $clusterName
while ((-not $workitem.IsCompleted) -and ($now.AddSeconds($timeoutSeconds) -ge (Get-Date)))
Start-Sleep 2

$offline = $true
if ((-not $workitem.IsCompleted) -or (-not $workitem.CompletedSuccessfully))
$offline = $false

foreach ($op in $workitem.Operations)
if ($op.State -ne 'Committed')
$offline = $false

if (-not $offline) # failed to offline or timeout
Write-EventLog -LogName $eventLogName -Source $eventLogSource -EventId $timeoutEvent -EntryType Warning -Message "Consumption based power management rule: Nodes are not offline within the time limit. Operations will be canceled."

$workitem.Operations | Stop-HpcOperation -Scheduler $clusterName -EA:SilentlyContinue
else # nodes are offline

# log the nodes
Export-Clixml -Path $logPath -InputObject $hibernatedNodes

# export the node names to be hibernated
$targets | % {
$wmi = [WMICLASS]"\\$_\ROOT\CIMV2:Win32_process"
if ($wmi -ne $null)
$wmi.Create("cmd.exe /c shutdown /h")

if ($error)
$message = @"
Failure occured when executing consumption based power management rule:
$message += $error
Write-EventLog -LogName $eventLogName -Source $eventLogSource -EventId $failedToHibernateEvent -EntryType Error -Message $message

# Wake up nodes #
Function Wakeup($hibernatedNodes, $countToWakeup)
$nodesToWakeup = New-Object System.Collections.ArrayList
$nodesToHibernate = New-Object System.Collections.ArrayList
foreach ($node in $hibernatedNodes)
if ($nodesToWakeup.Count -lt $countToWakeup)

$now = Get-Date
Start-HpcNode -Name $nodesToWakeup -Scheduler $clusterName
$errorMsg = $error
# wait for nodes to come up
while ($now.AddSeconds($timeoutSeconds) -ge (Get-Date))
$unreachableNodes = Get-HpcNode -Name $nodesToWakeup -Health Unreachable -Scheduler $clusterName -EA:SilentlyContinue
if ($unreachableNodes -eq $null)
break # all nodes are reachable

Start-Sleep 15

$unreachableNodes = Get-HpcNode -Name $nodesToWakeup -Health Unreachable -Scheduler $clusterName -EA:SilentlyContinue
if ($unreachableNodes -ne $null) # some nodes failed to start
$message = @"
Some nodes failed to start by consumption based power management rule. These nodes include:
$unreachableNodes | % {
$message += $_.NetBiosName + " "
$message += @"

$message += $errorMsg
Write-EventLog -LogName $eventLogName -Source $eventLogSource -Id $failedToStartEvent -EntryType Error -Message $message

$nodes = Get-HpcNode -Name $nodesToWakeup -State Offline -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($nodes -ne $null) # some nodes need to be brought online
$nodes | Set-HpcNodeState -State Online -Scheduler $clusterName
$errorMsg = $error

if ($error)
$nodes = Get-HpcNode -Name $targets -State Offline -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($nodes -ne $null) # some nodes failed to be brought online even after retrial
$message = @"
Some nodes failed to be brought online by consumption based power management rule. These nodes include:
$nodes | % {
$message += $_.NetBiosName + " "
$message += @"

$message += $errorMsg
Write-EventLog -LogName $eventLogName -Source $eventLogSource -Id $failedToOnlineEvent -EntryType Error -Message $message

Export-Clixml -Path $logPath -InputObject $nodesToHibernate

# Main workflow #

# Get current capacity
$onlineCount = 0
$onlineNodes = Get-HpcNode -GroupName ComputeNodes -State Online -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($onlineNodes -ne $null)
if ($onlineNodes.GetType().Name -ne "HpcNode")
$onlineCount = $onlineNodes.Count
$onlineCount = 1

$hibernatedNodes = $null
if (Test-Path $logPath)
$hibernatedNodes = Import-Clixml -Path $logPath

if ($hibernatedNodes -eq $null)
$hibernatedNodes = New-Object System.Collections.ArrayList

if ($onlineCount + $hibernatedNodes.Count -eq 0)
# there are no nodes

$capacity = $onlineCount * 100 / ($onlineCount + $hibernatedNodes.Count)

# Get target capacity
$nodesToHibernate = 0
if ($direction -gt 0) #increase capacity
$capacities = $lowCapacity, $mediumCapacity, $highCapacity
foreach ($targetCapacity in $capacities)
$difference = GetNodesToHibernate $capacity $targetCapacity ($onlineCount + $hibernatedNodes.Count)
if ($difference -lt 0)
$nodesToHibernate = $difference
else # decrease capacity
$capacities = $highCapacity, $mediumCapacity, $lowCapacity
foreach ($targetCapacity in $capacities)
$difference = GetNodesToHibernate $capacity $targetCapacity ($onlineCount + $hibernatedNodes.Count)
if ($difference -gt 0)
$nodesToHibernate = $difference

# Do action
if ($nodesToHibernate -eq 0)
elseif (($nodesToHibernate -gt 0) -and ($onlineCount -gt 0))
Hibernate $onlineNodes $hibernatedNodes $nodesToHibernate
elseif (($nodesToHibernate -lt 0) -and ($hibernatedNodes.Count -gt 0))
Wakeup $hibernatedNodes (-$nodesToHibernate)

<Parameters>'$Config/ClusterName$' $Config/Direction$ $Config/HighCapacity$ $Config/MediumCapacity$ $Config/LowCapacity$ $Config/TimeoutSeconds$</Parameters>
<Node ID="Action"/>