TransformCloudServicesVirtualMachineDailyFactResource (Resource)

Element properties:

TypeResource
File NameTransformCloudServicesVirtualMachineDailyFact.sql
AccessibilityPublic

Source Code:

<Resource ID="TransformCloudServicesVirtualMachineDailyFactResource" Accessibility="Public" FileName="TransformCloudServicesVirtualMachineDailyFact.sql"/>

File Content: TransformCloudServicesVirtualMachineDailyFact.sql

USE [DWRepository]

GO

/****** Object: StoredProcedure [dbo].[TransformCloudServicesVirtualMachineDailyFactProc] Script Date: 11/22/2011 16:52:15 ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

IF NOT EXISTS (SELECT * FROM sysobjects WHERE type = 'P' AND name = N'TransformCloudServicesVirtualMachineDailyFactProc')
BEGIN
EXECUTE ('CREATE PROCEDURE dbo.[TransformCloudServicesVirtualMachineDailyFactProc] @WaterMark xml AS RETURN 1')
END
GO

ALTER PROCEDURE [dbo].[TransformCloudServicesVirtualMachineDailyFactProc](@WaterMark XML)
AS BEGIN
SET NOCOUNT ON
SET XACT_ABORT ON

DECLARE @utc DATETIME = GETUTCDATE()
DECLARE @errorNumber INT,
@errorSeverity INT,
@errorState INT,
@errorLine INT,
@errorProcedure NVARCHAR(256),
@errorMessage NVARCHAR(MAX),
@task NVARCHAR(512)

DECLARE @batchId INT = 0,
@WaterMarkBatchId INT,
@Updated INT = 0,
@Inserted INT = 0,
@retval INT = -1,
@err INT = 0,
@startTranCount INT = @@TRANCOUNT,
@source1WM DATETIME,
@source1MaxWM DATETIME,
@startTime DATETIME,
@batchSize INT,
@processedRowCount INT = 0,
@rejectedRowCount INT = 0,
@rowsToProcess INT = 0

DECLARE @loopCount INT = 0,
@canContinue BIT = 1,
@rc INT = 1,
@level INT = 1

DECLARE
@todayKey INT = CONVERT(NVARCHAR(8), @utc, 112),
@CPUCountMeasure FLOAT = NULL,
@MemoryGBMeasure FLOAT = NULL,
@StorageGBMeasure FLOAT = NULL,
@IsActive INT = NULL

DECLARE @transformName SYSNAME = 'TransformCloudServicesVirtualMachineDailyFactProc'
SELECT @source1MaxWM = @utc,
@startTime = @utc

BEGIN TRY


--Shred Watermark and get BatchId and present watermarked BatchId
SELECT @task = 'Shredding WaterMark'
SELECT * INTO #tempTable
FROM etl.ShredWaterMark(@WaterMark)

SELECT @task = 'Reading WaterMark for @WaterMarkBatchId and @BatchId'
SELECT @WaterMarkBatchId = WaterMark, @BatchId = BatchId
FROM #tempTable
WHERE WarehouseEntityName = N'CloudServicesVirtualMachineSnapshotFact' AND WaterMarkType = 'BatchId'

SELECT @task = 'Dropping DWTemp tables'

IF OBJECT_ID('DWTemp.MaxDayKeyTable') IS NOT NULL
DROP TABLE DWTemp.MaxDayKeyTable

IF OBJECT_ID('DWTemp.MaxDayKeyTable2') IS NOT NULL
DROP TABLE DWTemp.MaxDayKeyTable2

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2

-- The grain of the fact table is 1 row per day.
-- The logic of the transform is that it takes the row from the max datekey (from DailyFact)
-- and replicate that value to all the day's for which the records are missing.
-- IE, say, today is 10th of Jan and max record in teh Fact is 1st Jan
-- we copy the row from the 1st Jan and replicate it for all the days till the 10th.
-- In addition to replicating the cost from the max row, this tx merges the cost
-- from snapshot fact if there are any changes to the cost recorded in the fact.

-- read the max DateKey from DailyFact. If it is null, then read the min dateKey from the Snapshot Fact.
SELECT @task = 'Reading MAX DateKey into DWTemp.MaxDayKeyTable'
SELECT ISNULL(MAX(vmDailyView.DateKey), MIN(vmSnapShotView.DateKey)) as MaxDayKey,
vmSnapShotView.CloudServicesVirtualMachineDimKey
INTO DWTemp.MaxDayKeyTable
FROM dbo.CloudServicesVirtualMachineSnapshotFactvw vmSnapShotView
LEFT JOIN dbo.CloudServicesVirtualMachineDailyFactvw vmDailyView
ON vmSnapShotView.CloudServicesVirtualMachineDimKey = vmDailyView.CloudServicesVirtualMachineDimKey
GROUP BY vmSnapShotView.CloudServicesVirtualMachineDimKey

--Get the maxDayKey for SnapShot View too.
--We need to start replicating Source2 from the eariler of these two dates.
SELECT
CASE WHEN MAX(vmSnapShotView.DateKey) > maxDayKeyTable.MaxDayKey
THEN maxDayKeyTable.MaxDayKey
ELSE MAX(vmSnapShotView.DateKey)
END AS MaxDayKey,
vmSnapShotView.CloudServicesVirtualMachineDimKey
INTO DWTemp.MaxDayKeyTable2
FROM dbo.CloudServicesVirtualMachineSnapshotFactvw vmSnapShotView
INNER JOIN DWTemp.MaxDayKeyTable maxDayKeyTable
ON vmSnapShotView.CloudServicesVirtualMachineDimKey = maxDayKeyTable.CloudServicesVirtualMachineDimKey
GROUP BY vmSnapShotView.CloudServicesVirtualMachineDimKey, maxDayKeyTable.MaxDayKey

-- Once we populated MaxDayKeyTable, we need to check if there is atleast one row in the DailyFAct
-- Because if not, the replication code below would not work.
--therefore we should insert a dummy row
--Check if date falls between creation and expiry date.
SELECT dateKeyTable.MaxDayKey AS DateKey,
dateKeyTable.CloudServicesVirtualMachineDimKey,
CAST(vmDailyView.CPUCountMeasure AS FLOAT) AS CPUCountMeasure,
CAST(vmDailyView.MemoryGBMeasure AS FLOAT) AS MemoryGBMeasure,
CAST(vmDailyView.StorageGBMeasure AS FLOAT) AS StorageGBMeasure,
CASE
WHEN vmDim.[IsDeleted] = 1 THEN 0 --overriding the IsActive from VmDim, becauase in some scenarios, IsActive was still found to be 1 though VM was deleted.
ELSE vmDailyView.[IsActive]
END AS [IsActive]
INTO DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0
FROM DWTemp.MaxDayKeyTable dateKeyTable
LEFT JOIN dbo.CloudServicesVirtualMachineDailyFactvw vmDailyView
ON dateKeyTable.CloudServicesVirtualMachineDimKey = vmDailyView.CloudServicesVirtualMachineDimKey
AND dateKeyTable.MaxDayKey = vmDailyView.DateKey
INNER JOIN dbo.CloudServicesVirtualMachineDimvw vmDim
ON vmDim.CloudServicesVirtualMachineDimKey = dateKeyTable.CloudServicesVirtualMachineDimKey
WHERE dateKeyTable.MaxDayKey <= CONVERT(NVARCHAR(8), ISNULL(vmDim.ExpirationDate, @utc), 112) AND
vmDim.State <> 'CreationFailed'

-- Read all the cost changes from the snapshot fact
-- only retain the latest change for any given day
-- IE, suppose that there a 3 changes to cost on 3rd of Jan
-- What we want is only the last change for any given day.
-- In addition to only reading the latest change, we also want to create 1 row per day
-- in the temp table source2
-- NOTE, for all the days for which there are no changes, we will end up inserted NULL values for the cost
-- Got it
-- We will also multiply it with perunitcost, so that it is compatible with DailyFact data
SELECT @task = 'Read the latest cost changes from the snapshot fact'

;WITH T AS
(
SELECT vmSnapShotView.DateKey,
vmSnapShotView.CloudServicesVirtualMachineDimKey,
vmSnapShotView.CPUCountMeasure,
vmSnapShotView.StorageGBMeasure,
vmSnapShotView.MemoryGBMeasure,
vmSnapShotView.[IsActive],
ROW_NUMBER() OVER(PARTITION BY DateKey,CloudServicesVirtualMachineDimKey ORDER BY UpdatedDate DESC) AS DayIdx
FROM dbo.CloudServicesVirtualMachineSnapshotFactvw vmSnapShotView
WHERE vmSnapShotView.[State] <> 'CreationFailed'
)
SELECT
dimDT.DateKey,
maxDayKey2.CloudServicesVirtualMachineDimKey,
T.CPUCountMeasure,
T.[IsActive],
CAST(T.MemoryGBMeasure AS FLOAT) AS MemoryGBMeasure,
CAST(T.StorageGBMeasure AS FLOAT) AS StorageGBMeasure
INTO DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2
FROM dbo.DateDim dimDT
CROSS JOIN DWTemp.MaxDayKeyTable2 maxDayKey2
LEFT JOIN T
ON T.DateKey = dimDT.DateKey AND
T.CloudServicesVirtualMachineDimKey = maxDayKey2.CloudServicesVirtualMachineDimKey AND
T.DayIdx = 1
INNER JOIN dbo.CloudServicesVirtualMachineDimvw vmDim
ON vmDim.CloudServicesVirtualMachineDimKey = maxDayKey2.CloudServicesVirtualMachineDimKey
WHERE dimDT.DateKey >= maxDayKey2.MaxDayKey AND
dimDT.DateKey <= @todayKey AND
dimDT.DateKey <= CONVERT(NVARCHAR(8), ISNULL(vmDim.ExpirationDate, @utc), 112)

-- Now we need to update TransformCloudServicesVirtualMachineDailyFactProc_Source2
-- from TransformCloudServicesVirtualMachineDailyFactProc_Source0, because some rows here may have null,
-- though the data exists in source0 table already
UPDATE source2 SET
source2.CPUCountMeasure = ISNULL(source2.CPUCountMeasure, source0.CPUCountMeasure),
source2.StorageGBMeasure = ISNULL(source2.StorageGBMeasure, source0.StorageGBMeasure),
source2.MemoryGBMeasure = ISNULL(source2.MemoryGBMeasure, source0.MemoryGBMeasure),
source2.[IsActive] = ISNULL(source2.[IsActive], source0.[IsActive])
FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2 source2
INNER JOIN DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0 source0
ON source2.DateKey = source0.DateKey AND
source2.CloudServicesVirtualMachineDimKey = source0.CloudServicesVirtualMachineDimKey AND
source0.CPUCountMeasure IS NOT NULL AND
source0.MemoryGBMeasure IS NOT NULL AND
source0.StorageGBMeasure IS NOT NULL

-- Clustered index is needed for the next update statement to work
CREATE CLUSTERED INDEX Idx1_VMSource2 ON
DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2(CloudServicesVirtualMachineDimKey, DateKey)

-- This update statement works a little mysteriously
-- This is how it works: SQL starts updating the rows one after another
-- in the proces of iterating row after row, I end up saving the cost values in the local variables
-- This will help in copying forward values for any missing (NULL) cost values.
-- For ex: suppose we have cost for 1st jan, 3rd jan, 7th jan
-- this query, will end up updating setting the cost as it was on the 1st jan to both 1st and 2nd (because 2nd was null)
-- then the cost from the 3rd day gets copied to 3rd, 4th, 5th and 6th days
-- and then the cost from the 7th day gets copied to 7th, 8th, 9th and 10th days.
UPDATE T SET
CPUCountMeasure = ISNULL(CPUCountMeasure, @CPUCountMeasure),
MemoryGBMeasure = ISNULL(MemoryGBMeasure, @MemoryGBMeasure),
StorageGBMeasure = ISNULL(StorageGBMeasure, @StorageGBMeasure),
[IsActive] = ISNULL([IsActive], @IsActive),
@CPUCountMeasure = ISNULL(CPUCountMeasure, @CPUCountMeasure),
@MemoryGBMeasure = ISNULL(MemoryGBMeasure, @MemoryGBMeasure),
@StorageGBMeasure = ISNULL(StorageGBMeasure, @StorageGBMeasure),
@IsActive = ISNULL([IsActive], @IsActive)
FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2 AS T
OPTION (MAXDOP 1)

-- We have left Sourceo unattended so far, lets replicate it to Source1
-- Read the latest cost values from the daily fact
-- and replicate it to all the missing days
-- store the replicated rows in temp table source1
SELECT @task = 'Replicate cost from the MAX DateKey to all the missing days'


SELECT
dimDT.DateKey,
source0.CloudServicesVirtualMachineDimKey,
source0.CPUCountMeasure,
source0.MemoryGBMeasure,
source0.StorageGBMeasure,
source0.[IsActive]
INTO DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1
FROM dbo.DateDim dimDT, DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0 source0
INNER JOIN dbo.CloudServicesVirtualMachineDimvw vmDim
ON vmDim.CloudServicesVirtualMachineDimKey = source0.CloudServicesVirtualMachineDimKey
WHERE dimDT.DateKey >= source0.DateKey AND
dimDT.DateKey <= @todayKey AND
dimDT.DateKey <= CONVERT(NVARCHAR(8), ISNULL(vmDim.ExpirationDate, @utc), 112)
--AND
--source0.[State] <> 'CreationFailed'


-- finally it is time to merge cost from the snapshot fact and the daily fact
-- if there are any updated cost values for any given day, it will be used to update the cost from the daily fact
UPDATE source1 SET
source1.CPUCountMeasure = ISNULL(source2.CPUCountMeasure, source1.CPUCountMeasure),
source1.StorageGBMeasure = ISNULL(source2.StorageGBMeasure, source1.StorageGBMeasure),
source1.MemoryGBMeasure = ISNULL(source2.MemoryGBMeasure, source1.MemoryGBMeasure),
source1.[IsActive] = ISNULL(source2.[IsActive], source1.[IsActive])
FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1 source1
LEFT JOIN DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2 source2
ON source1.DateKey = source2.DateKey AND
source1.CloudServicesVirtualMachineDimKey = source2.CloudServicesVirtualMachineDimKey

BEGIN TRANSACTION -- Start

-- update the information back to the DailyFactVw for the existing Data
SELECT @task = 'Updating CloudServicesVirtualMachineDailyFactvw'
UPDATE vwDaily SET
vwDaily.CPUCountMeasure = T.CPUCountMeasure,
vwDaily.StorageGBMeasure = T.StorageGBMeasure,
vwDaily.MemoryGBMeasure = T.MemoryGBMeasure,
vwDaily.[IsActive] = T.[IsActive],
UpdatedBatchId = @BatchId
FROM dbo.CloudServicesVirtualMachineDailyFactvw AS vwDaily
INNER JOIN DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1 AS T
ON vwDaily.DateKey = T.DateKey AND
vwDaily.CloudServicesVirtualMachineDimKey = T.CloudServicesVirtualMachineDimKey AND
(vwDaily.CPUCountMeasure <> T.CPUCountMeasure OR
vwDaily.StorageGBMeasure <> T.StorageGBMeasure OR
vwDaily.MemoryGBMeasure <> T.MemoryGBMeasure OR
vwDaily.[IsActive] <> T.[IsActive])
WHERE T.CPUCountMeasure IS NOT NULL AND
T.StorageGBMeasure IS NOT NULL AND
T.MemoryGBMeasure IS NOT NULL

SELECT @Updated = @Updated + @@RowCount, @err = @@error

-- DELETE THE ROWS FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1, which are already inserted
DELETE source1
FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1 source1
INNER JOIN dbo.CloudServicesVirtualMachineDailyFactvw AS vwDaily
ON vwDaily.DateKey = source1.DateKey AND
vwDaily.CloudServicesVirtualMachineDimKey = source1.CloudServicesVirtualMachineDimKey

-- Finally, insert into the daily fact
SELECT @task = 'Inserting into CloudServicesVirtualMachineDailyFactvw'

INSERT INTO dbo.CloudServicesVirtualMachineDailyFactvw (
CloudServicesVirtualMachineDimKey,
DateKey,
[IsActive],
CPUCountMeasure,
StorageGBMeasure,
MemoryGBMeasure,
InsertedBatchId,
UpdatedBatchId
)
SELECT DISTINCT
source1.CloudServicesVirtualMachineDimKey,
source1.DateKey,
source1.[IsActive],
source1.CPUCountMeasure,
source1.StorageGBMeasure,
source1.MemoryGBMeasure,
@batchId AS InsertedBatchId,
0 AS UpdatedBatchId
FROM DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1 source1

SELECT @Inserted = @Inserted + @@ROWCOUNT, @err = @@error

COMMIT TRANSACTION --End
--PRINT N'End of Body'


-- update watermark
BEGIN
SELECT @task = 'Updating WaterMark'
UPDATE #tempTable SET
WaterMark = @BatchId
WHERE WarehouseEntityName = N'CloudServicesVirtualMachineSnapshotFact'
AND WaterMarkType= N'BatchId'
END

SELECT @task = 'Preparing WaterMark for returning'
SELECT
@WaterMark = (
SELECT ModuleName AS "@ModuleName", ProcessName AS "@ProcessName", @BatchId AS "@BatchId",
(SELECT DISTINCT WarehouseEntityName AS "@WarehouseEntityName", WarehouseEntityTypeName AS "@WarehouseEntityTypeName", EntityGuid AS "@EntityGuid",
CASE WarehouseEntityTypeName WHEN 'Inbound' THEN 'DateTime' WHEN 'Enumeration' THEN 'DateTime' ELSE 'BatchId' END AS "@WaterMarkType",
CASE WarehouseEntityTypeName WHEN 'Inbound' THEN CONVERT(NVARCHAR(64), WaterMark, 109) WHEN 'Enumeration' THEN CONVERT(NVARCHAR(64), WaterMark, 109) ELSE CAST(@BatchId AS NVARCHAR(64)) END AS "@WaterMark"
FROM #tempTable
FOR XML path('Entity'), type)
FROM (SELECT DISTINCT ModuleName, ProcessName from #tempTable) a
FOR XML path('Module'), type)

SELECT @WaterMark AS WaterMark, @BatchId AS BatchId, @Updated AS UpdatedRowCount, @Inserted AS InsertedRowCount

IF OBJECT_ID('tempdb..#tempTable') IS NOT NULL DROP TABLE #tempTable

IF OBJECT_ID('DWTemp.MaxDayKeyTable') IS NOT NULL
DROP TABLE DWTemp.MaxDayKeyTable

IF OBJECT_ID('DWTemp.MaxDayKeyTable2') IS NOT NULL
DROP TABLE DWTemp.MaxDayKeyTable2

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source0

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source1

IF OBJECT_ID('DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2') IS NOT NULL
DROP TABLE DWTemp.TransformCloudServicesVirtualMachineDailyFactProc_Source2

END TRY
BEGIN CATCH
DECLARE @errorFmt VARCHAR(256)

SELECT @errorFmt = N'ErrorNumber="%d" Message="%s" Severity="%d" State="%d" ProcedureName="%s" LineNumber="%d" Task="%s"',
@errorNumber = ERROR_NUMBER(), @errorMessage = ERROR_MESSAGE(), @errorSeverity = ERROR_SEVERITY(), @errorState = ERROR_STATE(), @errorLine = ERROR_LINE(), @errorProcedure = ERROR_PROCEDURE()

IF(@@TRANCOUNT > @startTranCount) ROLLBACK TRANSACTION

RAISERROR (@errorFmt, 18, @errorState, @errorNumber, @errorMessage, @errorSeverity, @errorState, @errorProcedure, @errorLine, @task)
RETURN -1
END CATCH

SET XACT_ABORT OFF
SET NOCOUNT OFF
END


GO


--Update the Data Retention policy to 3 months:

DECLARE @dataRetentionTimeInMinutes INT = 129600 -- 3 * 30 * 24 * 60 = 129600 (3 months)

UPDATE gi SET gi.RetentionPeriodInMinutes = @dataRetentionTimeInMinutes
FROM etl.WarehouseEntity we
INNER JOIN etl.WarehouseEntityGroomingInfo gi
ON we.WarehouseEntityId = gi.WarehouseEntityId
WHERE we.WarehouseEntityName = 'CloudServicesVirtualMachineDailyFact'
AND gi.RetentionPeriodInMinutes != @dataRetentionTimeInMinutes

GO