HPC 2008 R2 Consumption Based Power Management Module

Microsoft.HPC.2008R2.Module.ConsumptionPower (WriteActionModuleType)

Element properties:

TypeWriteActionModuleType
IsolationAny
AccessibilityPublic
RunAsMicrosoft.HPC.RunAsProfile.AdminActionAccount
InputTypeSystem.BaseData

Member Modules:

ID Module Type TypeId RunAs 
Action WriteAction Microsoft.HPC.2008R2.Module.PowershellExecuter Default

Overrideable Parameters:

IDParameterTypeSelectorDisplay NameDescription
TimeoutSecondsint$Config/TimeoutSeconds$Action Timeout Seconds
HighCapacityint$Config/HighCapacity$High Capacity Level
MediumCapacityint$Config/MediumCapacity$Medium Capacity Level
LowCapacityint$Config/LowCapacity$Low Capacity Level

Source Code:

<WriteActionModuleType ID="Microsoft.HPC.2008R2.Module.ConsumptionPower" Accessibility="Public" RunAs="HPCLibrary!Microsoft.HPC.RunAsProfile.AdminActionAccount" Batching="false">
<Configuration>
<xsd:element minOccurs="1" name="TimeoutSeconds" type="xsd:integer"/>
<xsd:element minOccurs="1" name="ClusterName" type="xsd:string"/>
<xsd:element minOccurs="1" name="Direction" type="xsd:integer"/>
<xsd:element minOccurs="1" name="HighCapacity" type="xsd:integer"/>
<xsd:element minOccurs="1" name="MediumCapacity" type="xsd:integer"/>
<xsd:element minOccurs="1" name="LowCapacity" type="xsd:integer"/>
</Configuration>
<OverrideableParameters>
<OverrideableParameter ID="TimeoutSeconds" Selector="$Config/TimeoutSeconds$" ParameterType="int"/>
<OverrideableParameter ID="HighCapacity" Selector="$Config/HighCapacity$" ParameterType="int"/>
<OverrideableParameter ID="MediumCapacity" Selector="$Config/MediumCapacity$" ParameterType="int"/>
<OverrideableParameter ID="LowCapacity" Selector="$Config/LowCapacity$" ParameterType="int"/>
</OverrideableParameters>
<ModuleImplementation Isolation="Any">
<Composite>
<MemberModules>
<WriteAction ID="Action" TypeID="Microsoft.HPC.2008R2.Module.PowershellExecuter">
<ScriptName>ConsumptionPowerSavingModule</ScriptName>
<ScriptBody><Script>

param ([string]$clusterName, [int]$direction, [int]$highCapacity, [int]$mediumCapacity, [int]$lowCapacity, [int]$timeoutSeconds)


if ($direction -eq 0)
{
# no need to change capacity level
return
}


# global variables
$logPath = "\\" + $clusterName + "\CcpSpoolDir\SCOM.consumption.xml"
$eventLogName = "Application"
$eventLogSource = "HPCManagementPack"
$failedToHibernateEvent = 311
$timeoutEvent = 312
$failedToStartEvent = 313
$failedToOnlineEvent = 314

Add-PSSnapin Microsoft.HPC
New-EventLog -LogName $eventLogName -Source $eventLogSource -EA:SilentlyContinue

###################################################################
# Get node count difference according to capacity #
###################################################################
Function GetNodesToHibernate($currentCapacity, $targetCapacity, $totalCount)
{
if ($currentCapacity -ge $targetCapacity)
{
return [int](($currentCapacity - $targetCapacity) * $totalCount / 100)
}
else
{
return [System.Math]::Floor(($currentCapacity - $targetCapacity) * $totalCount / 100)
}
}

###################################################################
# Hibernate nodes #
###################################################################
Function Hibernate($onlineNodes, $hibernatedNodes, $countToHibernate)
{
$targets = New-Object System.Collections.ArrayList
$counters = $onlineNodes | Get-HpcMetricValue -Name HPCCoresInUse -EA:SilentlyContinue
$freeNodes = @{}
$counters | % {
if ($_.Value -eq 0)
{
$freeNodes.Add($_.NodeName, $null) # use as an set
}
}

foreach ($node in $onlineNodes)
{
if ($targets.Count -ge $countToHibernate)
{
break
}

if ((-not $node.IsHeadNode) -and (-not ([string]$node.NodeRole).Contains("BrokerNode")) -and ($freeNodes.ContainsKey($node.NetBiosName))) # installed as cn and is free
{
$targets.Add($node.NetBiosName)
}
}

if ($targets.Count -gt 0)
{
# Take node offline
$now = Get-Date
$workitem = Set-HpcNodeState -Name $targets -State Offline -Async -Scheduler $clusterName
while ((-not $workitem.IsCompleted) -and ($now.AddSeconds($timeoutSeconds) -ge (Get-Date)))
{
Start-Sleep 2
}

$offline = $true
if ((-not $workitem.IsCompleted) -or (-not $workitem.CompletedSuccessfully))
{
$offline = $false
}

foreach ($op in $workitem.Operations)
{
if ($op.State -ne 'Committed')
{
$offline = $false
break
}
}

if (-not $offline) # failed to offline or timeout
{
Write-EventLog -LogName $eventLogName -Source $eventLogSource -EventId $timeoutEvent -EntryType Warning -Message "Consumption based power management rule: Nodes are not offline within the time limit. Operations will be canceled."

$workitem.Operations | Stop-HpcOperation -Scheduler $clusterName -EA:SilentlyContinue
}
else # nodes are offline
{
$error.Clear()

# log the nodes
$hibernatedNodes.AddRange($targets)
Export-Clixml -Path $logPath -InputObject $hibernatedNodes

# export the node names to be hibernated
$targets | % {
$wmi = [WMICLASS]"\\$_\ROOT\CIMV2:Win32_process"
if ($wmi -ne $null)
{
$wmi.Create("cmd.exe /c shutdown /h")
$wmi.Dispose()
}
}

if ($error)
{
$message = @"
Failure occured when executing consumption based power management rule:
"@
$message += $error
Write-EventLog -LogName $eventLogName -Source $eventLogSource -EventId $failedToHibernateEvent -EntryType Error -Message $message
}
}
}
}


###################################################################
# Wake up nodes #
###################################################################
Function Wakeup($hibernatedNodes, $countToWakeup)
{
$nodesToWakeup = New-Object System.Collections.ArrayList
$nodesToHibernate = New-Object System.Collections.ArrayList
foreach ($node in $hibernatedNodes)
{
if ($nodesToWakeup.Count -lt $countToWakeup)
{
$nodesToWakeup.Add($node)
}
else
{
$nodesToHibernate.Add($node)
}
}

$now = Get-Date
$error.Clear()
Start-HpcNode -Name $nodesToWakeup -Scheduler $clusterName
$errorMsg = $error
# wait for nodes to come up
while ($now.AddSeconds($timeoutSeconds) -ge (Get-Date))
{
$unreachableNodes = Get-HpcNode -Name $nodesToWakeup -Health Unreachable -Scheduler $clusterName -EA:SilentlyContinue
if ($unreachableNodes -eq $null)
{
break # all nodes are reachable
}

Start-Sleep 15
}

$unreachableNodes = Get-HpcNode -Name $nodesToWakeup -Health Unreachable -Scheduler $clusterName -EA:SilentlyContinue
if ($unreachableNodes -ne $null) # some nodes failed to start
{
$message = @"
Some nodes failed to start by consumption based power management rule. These nodes include:
"@
$unreachableNodes | % {
$message += $_.NetBiosName + " "
$nodesToHibernate.Add($_.NetBiosName)
}
$message += @"


"@
$message += $errorMsg
Write-EventLog -LogName $eventLogName -Source $eventLogSource -Id $failedToStartEvent -EntryType Error -Message $message
}

$nodes = Get-HpcNode -Name $nodesToWakeup -State Offline -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($nodes -ne $null) # some nodes need to be brought online
{
$error.Clear()
$nodes | Set-HpcNodeState -State Online -Scheduler $clusterName
$errorMsg = $error

if ($error)
{
$nodes = Get-HpcNode -Name $targets -State Offline -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($nodes -ne $null) # some nodes failed to be brought online even after retrial
{
$message = @"
Some nodes failed to be brought online by consumption based power management rule. These nodes include:
"@
$nodes | % {
$message += $_.NetBiosName + " "
$nodesToHibernate.Add($_.NetBiosName)
}
$message += @"


"@
$message += $errorMsg
Write-EventLog -LogName $eventLogName -Source $eventLogSource -Id $failedToOnlineEvent -EntryType Error -Message $message
}
}
}

Export-Clixml -Path $logPath -InputObject $nodesToHibernate
}


###################################################################
# Main workflow #
###################################################################

# Get current capacity
$onlineCount = 0
$onlineNodes = Get-HpcNode -GroupName ComputeNodes -State Online -HealthState OK, Warning -Scheduler $clusterName -EA:SilentlyContinue
if ($onlineNodes -ne $null)
{
if ($onlineNodes.GetType().Name -ne "HpcNode")
{
$onlineCount = $onlineNodes.Count
}
else
{
$onlineCount = 1
}
}

$hibernatedNodes = $null
if (Test-Path $logPath)
{
$hibernatedNodes = Import-Clixml -Path $logPath
}

if ($hibernatedNodes -eq $null)
{
$hibernatedNodes = New-Object System.Collections.ArrayList
}

if ($onlineCount + $hibernatedNodes.Count -eq 0)
{
# there are no nodes
return
}

$capacity = $onlineCount * 100 / ($onlineCount + $hibernatedNodes.Count)

# Get target capacity
$nodesToHibernate = 0
if ($direction -gt 0) #increase capacity
{
$capacities = $lowCapacity, $mediumCapacity, $highCapacity
foreach ($targetCapacity in $capacities)
{
$difference = GetNodesToHibernate $capacity $targetCapacity ($onlineCount + $hibernatedNodes.Count)
if ($difference -lt 0)
{
$nodesToHibernate = $difference
break
}
}
}
else # decrease capacity
{
$capacities = $highCapacity, $mediumCapacity, $lowCapacity
foreach ($targetCapacity in $capacities)
{
$difference = GetNodesToHibernate $capacity $targetCapacity ($onlineCount + $hibernatedNodes.Count)
if ($difference -gt 0)
{
$nodesToHibernate = $difference
break
}
}
}

# Do action
if ($nodesToHibernate -eq 0)
{
return
}
elseif (($nodesToHibernate -gt 0) -and ($onlineCount -gt 0))
{
Hibernate $onlineNodes $hibernatedNodes $nodesToHibernate
}
elseif (($nodesToHibernate -lt 0) -and ($hibernatedNodes.Count -gt 0))
{
Wakeup $hibernatedNodes (-$nodesToHibernate)
}

</Script></ScriptBody>
<Parameters>'$Config/ClusterName$' $Config/Direction$ $Config/HighCapacity$ $Config/MediumCapacity$ $Config/LowCapacity$ $Config/TimeoutSeconds$</Parameters>
<TimeoutSeconds>$Config/TimeoutSeconds$</TimeoutSeconds>
</WriteAction>
</MemberModules>
<Composition>
<Node ID="Action"/>
</Composition>
</Composite>
</ModuleImplementation>
<InputType>System!System.BaseData</InputType>
</WriteActionModuleType>