TransformEntityManagedTypeFactResource (Resource)

Element properties:

TypeResource
File NameTransformEntityManagedTypeFact.sql
AccessibilityPublic

Source Code:

<Resource ID="TransformEntityManagedTypeFactResource" Accessibility="Public" FileName="TransformEntityManagedTypeFact.sql"/>

File Content: TransformEntityManagedTypeFact.sql

IF NOT EXISTS (SELECT * FROM sysobjects WHERE type = 'P' AND name = N'TransformEntityManagedTypeFactProc')

BEGIN
EXECUTE ('CREATE PROCEDURE dbo.[TransformEntityManagedTypeFactProc] @WaterMark xml AS RETURN 1')
END
GO

DECLARE @Statement NVARCHAR(MAX)

SET @Statement=
N'ALTER PROCEDURE [dbo].[TransformEntityManagedTypeFactProc](@WaterMark XML)
AS BEGIN
SET NOCOUNT ON
SET XACT_ABORT ON

DECLARE @utc DATETIME = GETUTCDATE()
DECLARE @errorNumber INT,
@errorSeverity INT,
@errorState INT,
@errorLine INT,
@errorProcedure NVARCHAR(256),
@errorMessage NVARCHAR(MAX),
@task NVARCHAR(512)

DECLARE @BatchId INT,
@Updated INT = 0,
@Inserted INT = 0,
@retval INT = -1,
@err INT = 0,
@startTranCount INT = @@TRANCOUNT,
@source1WM DATETIME,
@source1MaxWM DATETIME,
@startTime DATETIME,
@batchSize INT = 50000,
@processedRowCount INT = 0,
@rejectedRowCount INT = 0

DECLARE @loopCount INT = 0,
@canContinue BIT = 1

SELECT @source1MaxWM = @utc,
@startTime = @utc

BEGIN TRY

EXEC dbo.InitializeTransform
@transformName = ''TransformEntityManagedTypeFactProc'',
@transformTemplateType = ''Manual'',
@waterMark = @WaterMark,
@warehouseEntityName = ''BaseManagedEntity'',
@waterMarkType = ''DateTime'',
@sourceTableName = ''inbound.BaseManagedEntity'',
@utc = @utc,
@source1WM = @source1WM OUTPUT,
@source1MaxWM = @source1MaxWM OUTPUT,
@batchId = @batchId OUTPUT

WHILE(@canContinue = 1)
BEGIN
SELECT @task = ''Preparing #transformTemp0''
SELECT TOP(@batchSize) WITH TIES *
INTO #transformTemp2
FROM inbound.BaseManagedEntity inbBME
WHERE inbBME.DWTimestamp >= @source1WM
AND inbBME.DWTimeStamp < @utc
ORDER BY DWTimestamp

SELECT @processedRowCount = @@ROWCOUNT
IF( @processedRowCount = 0) BREAK;


--deleting all duplicate rows from inbBME--
SELECT *, ROW_NUMBER() OVER (PARTITION BY BaseManagedEntityId, DatasourceId order by DWTimeStamp desc) as rownum
INTO #transformTemp0
FROM #transformTemp2

DELETE #transformTemp0
WHERE rownum > 1

SELECT @task = ''Evaluating @source1MaxWM''
SELECT @source1MaxWM = (SELECT MAX(DWTimeStamp) FROM #transformTemp0)

SELECT @source1MaxWM = ( SELECT MIN(DWTimestamp)
FROM inbound.BaseManagedEntity
WHERE DWTimeStamp > @source1MaxWM)

SELECT @source1MaxWM = ISNULL(@source1MaxWM, @utc)

/*
************************************************************************************************
* Step 1: Prepare a temp table with following bits of information:
* - Dim keys
* - DateKey (Relationship Added Date)
* - CreatedDate
* - DeletedDate (Deleted Date)
* Step 3: Delete unnecessary events (events that are either already present in the fact or
* that have been accounted for in the previous step) from the temp table
* Step 4: Update the fact
* Step 5: Insert new records
************************************************************************************************
*/
SELECT @task = ''Step 1: Preparing #TransformTemp1''
EXEC @err = dbo.InferDatasourceDimProc
@sourceTableName = ''inbound.BaseManagedEntity'',
@columnName = ''DataSourceId'',
@filterColumnName = ''DWTimeStamp'',
@minTimeStamp = @source1WM,
@maxTimeStamp = @source1MaxWM,
@batchId = @BatchId

SELECT dimDS.DataSourceDimKey,
dimEnt.EntityDimKey AS EntityDimKey,
dimMgdTyp.ManagedTypeDimKey AS ManagedTypeDimKey,
CONVERT(nvarchar(8), inbBME.TimeAdded, 112) AS DateKey,
inbBME.TimeAdded AS CreatedDate,
inbBME.BaseManagedEntityId,
inbBME.DatasourceId,
CASE WHEN inbBME.IsDeleted = 0 THEN NULL ELSE inbBME.LastModified END AS DeletedDate,
ROW_NUMBER() OVER( PARTITION BY dimDS.DataSourceDimKey,
dimEnt.EntityDimKey,
dimMgdTyp.ManagedTypeDimKey
ORDER BY inbBME.LastModified
) AS RowNum
INTO #transformTemp1
FROM #transformTemp0 inbBME
-- REDMOND\mando. BUG: 163243
-- SQL was not honoring this inner join with inbBME and inbTME.
-- The execution plan showed that inbBME was joining with dimDS and then to dmMGdTyp and then to inbTME
-- This plan ended up being a very inferior plan. With just over a million rows in inbBME and inbTME,
-- this plan ended up taking upwards of 1Hr 40Mins, eventually resulting in a timeout.
-- Following options were tried to get a different/better execution plan:
-- (Note: Adding any indexes on Inbound tables was/is not an option. This is done intentionally
-- in order to keep the Extracts as fast as possible)
-- 1. Adding index on dimMgdTyp (ManagedTypeId). Result: No change in the query plan
-- 2. Re-ordered the joins. Result: No change in the query plan
-- 3. Re-organized the query into multiple sub-queries. Result: No change in the query plan.
-- 4. Finally, in order to force the plan to start with a join between inbBME and inbTME,
-- I''ve decided to add a join hint of HASH. The reason for HASH is that with a simiplified query,
-- sql was using a hash join with inbBME and inbTME and producing the resultset in less than 1 min.
-- With the updated query plan, and the same dataset, the Transform now takes just over 1min to complete.
INNER HASH JOIN inbound.TypedManagedEntity inbTME ON
inbTME.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND inbTME.DatasourceId = inbBME.DatasourceId
INNER JOIN dbo.EntityDimvw dimEnt ON
dimEnt.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND dimEnt.SourceId = inbBME.DatasourceId
JOIN dbo.ManagedTypeDimvw dimMgdTyp ON
dimMgdTyp.BaseManagedEntityId = inbTME.ManagedTypeId
AND dimMgdTyp.SourceId = inbTME.DatasourceId
JOIN dbo.DataSourceDimvw dimDS ON
inbBME.DatasourceId = dimDS.SourceId

SELECT @task = ''Step 2: Keep the last record within the Batch''
DELETE tmp
FROM #transformTemp1 tmp
WHERE RowNum > 1

-- Due to bug 235558 (inbound.TypedManagedEntity is not extracted in update case)
-- store the records in current batch which already exist in the fact table to a temp table
-- filter out those rows when we do the early arriving data handling
-- side effect, we don''t have update to the table, which is fine due to Entity and managedType actually don''t have any update
select temp.*
into #alreadyExistInFactTable
from #transformTemp0 temp
join EntityDimvw dimEnt on dimEnt.BaseManagedEntityId = temp.BaseManagedEntityId and dimEnt.SourceId = temp.DatasourceId
join EntityManagedTypeFactvw fact on fact.EntityDimKey = dimEnt.EntityDimKey

SELECT @task = ''Step 3: Handle early arriving data ''
-- If there are any early inbound rows that didn''t get inserted into the temp because the
-- corresponding row wasn''t in EntityDim yet, change DWTimestamp to hit them next time
UPDATE inbBME SET
DWTimestamp = @source1MaxWM,
RejectedCount = inbBME.RejectedCount + 1,
BatchId = @BatchId
FROM inbound.BaseManagedEntity inbBME
LEFT JOIN #transformTemp1 txTemp ON
txTemp.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND txTemp.DatasourceId = inbBME.DatasourceId
LEFT JOIN Staging.EntityTypeExceptionList exList ON
inbBME.DatasourceId = ISNULL(exList.DatasourceId, inbBME.DatasourceId) -- if sourceid is null, then exclude from all sources
AND inbBME.BaseManagedTypeId = exList.BaseManagedTypeId
LEFT JOIN #alreadyExistInFactTable toRemove on toRemove.BaseManagedEntityId = inbBME.BaseManagedEntityId
WHERE inbBME.DWTimestamp >= @source1WM
AND inbBME.DWTimestamp < @source1MaxWM
AND txTemp.BaseManagedEntityId IS NULL
AND exList.BaseManagedTypeId IS NULL
AND toRemove.BaseManagedEntityId is null

SELECT @rejectedRowCount = @@ROWCOUNT

-- If there are any early inbound rows that didn''t get inserted into the temp because the
-- corresponding row wasn''t in EntityDim/ManagedTypeDim yet, change DWTimestamp to hit them next time
UPDATE inbTME SET
DWTimestamp = @source1MaxWM,
RejectedCount = inbTME.RejectedCount + 1,
BatchId = @BatchId
From inbound.TypedManagedEntity inbTME
join ( select tmp0.*
from #transformTemp0 tmp0
left join #transformTemp1 tmp1 ON
tmp0.BaseManagedEntityId = tmp1.BaseManagedEntityId
and tmp0.DatasourceId = tmp1.DatasourceId
left join #alreadyExistInFactTable toRemove on toRemove.BaseManagedEntityId = tmp0.BaseManagedEntityId
where tmp1.BaseManagedEntityId IS NULL AND toRemove.BaseManagedEntityId IS NULL
) as Source ON
Source.BaseManagedEntityId = inbTME.BaseManagedEntityId
and Source.DatasourceId = inbTME.DatasourceId
left join Staging.EntityTypeExceptionList exList on
inbTME.DatasourceId = ISNULL(exList.DatasourceId, inbTME.DatasourceId) -- if sourceid is null, then exclude from all sources
and inbTME.ManagedTypeId = exList.BaseManagedTypeId
where exList.BaseManagedTypeId is null

-- this should never happen. and if it does happen, then it is better to bail out now
-- it appears we don''t have anything to do for now

IF(@rejectedRowCount = @processedRowCount) BREAK;

-- Delete early inbound rows from our temp table
-- DELETE FROM #transformTemp1 WHERE EntityDimKey IS NULL


BEGIN TRANSACTION
SELECT @task = ''Step 4: Updating Facts''
UPDATE fact SET
DeletedDate = source.DeletedDate,
UpdatedBatchId = @BatchId
FROM dbo.EntityManagedTypeFactvw fact
JOIN #transformTemp1 source ON
source.EntityDimKey = fact.EntityDimKey
AND source.ManagedTypeDimKey = fact.ManagedTypeDimKey
AND source.DataSourceDimKey = fact.DataSourceDimKey
WHERE fact.DeletedDate IS NULL and source.DeletedDate is not null


SELECT @Updated = @Updated + @@RowCount,
@err = @@error

-- Delete any rows that would cause Dup Key errors on Insert
SELECT @task = ''Step 4.5: Deleting any events from #TransformTemp1 that already have a fact row''
DELETE temp
FROM #transformTemp1 temp
JOIN dbo.EntityManagedTypeFactvw fact ON
temp.DateKey = fact.DateKey
AND temp.EntityDimKey = fact.EntityDimKey
AND temp.ManagedTypeDimKey = fact.ManagedTypeDimKey
AND temp.DataSourceDimKey = fact.DataSourceDimKey



SELECT @task = ''Step 5: Inserting Facts''
INSERT INTO EntityManagedTypeFactvw (
DateKey,
DataSourceDimKey,
EntityDimKey,
ManagedTypeDimKey,
CreatedDate,
DeletedDate,
InsertedBatchId,
UpdatedBatchId
)
SELECT DISTINCT
source.DateKey,
source.DataSourceDimKey,
source.EntityDimKey,
source.ManagedTypeDimKey,
source.CreatedDate,
source.DeletedDate,
@BatchId,
0
FROM #transformTemp1 source


SELECT @Inserted = @Inserted + @@RowCount,
@err = @@error

COMMIT TRANSACTION

IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#transformTemp2'') IS NOT NULL DROP TABLE #transformTemp2
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable

SET @source1WM = @source1MaxWM

SELECT @loopCount = @loopCount + 1

EXEC dbo.UninitializeTransformLoop
@transformName = ''TransformEntityManagedTypeFactProc'',
@transformTemplateType = ''Manual'',
@utc = @utc,
@inserted = @Inserted,
@updated = @Updated,
@loopCount = @loopCount,
@canContinue = @canContinue OUTPUT
END -- END WHILE

EXEC dbo.UninitializeTransform
@transformName = ''TransformEntityManagedTypeFactProc'',
@transformTemplateType = ''Manual'',
@waterMark = @WaterMark,
@warehouseEntityName = ''BaseManagedEntity'',
@waterMarkType = ''DateTime'',
@source1MaxWM = @source1MaxWM,
@batchId = @BatchId,
@inserted = @Inserted,
@updated = @Updated

IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable
END TRY
BEGIN CATCH
DECLARE @errorFmt VARCHAR(256)

SELECT @errorFmt = N''ErrorNumber="%d" Message="%s" Severity="%d" State="%d" ProcedureName="%s" LineNumber="%d" Task="%s"'',
@errorNumber = ERROR_NUMBER(),
@errorMessage = ERROR_MESSAGE(),
@errorSeverity = ERROR_SEVERITY(),
@errorState = ERROR_STATE(),
@errorLine = ERROR_LINE(),
@errorProcedure = ERROR_PROCEDURE()

IF(@@TRANCOUNT > @startTranCount) ROLLBACK TRANSACTION

IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable

RAISERROR (
@errorFmt,
18,
@errorState,
@errorNumber,
@errorMessage,
@errorSeverity,
@errorState,
@errorProcedure,
@errorLine,
@task
)

RETURN -1
END CATCH

SET XACT_ABORT OFF
SET NOCOUNT OFF
END'

IF EXISTS(SELECT * FROM sys.objects WHERE type = 'U' AND name = 'TablePartition' )
BEGIN
EXEC sp_executesql @Statement
END
GO