IF NOT EXISTS (SELECT * FROM sysobjects WHERE type = 'P' AND name = N'TransformEntityManagedTypeFactProc')
BEGIN
EXECUTE ('CREATE PROCEDURE dbo.[TransformEntityManagedTypeFactProc] @WaterMark xml AS RETURN 1')
END
GO
DECLARE @Statement NVARCHAR(MAX)
SET @Statement=
N'ALTER PROCEDURE [dbo].[TransformEntityManagedTypeFactProc](@WaterMark XML)
AS BEGIN
SET NOCOUNT ON
SET XACT_ABORT ON
WHILE(@canContinue = 1)
BEGIN
SELECT @task = ''Preparing #transformTemp0''
SELECT TOP(@batchSize) WITH TIES *
INTO #transformTemp2
FROM inbound.BaseManagedEntity inbBME
WHERE inbBME.DWTimestamp >= @source1WM
AND inbBME.DWTimeStamp < @utc
ORDER BY DWTimestamp
--deleting all duplicate rows from inbBME--
SELECT *, ROW_NUMBER() OVER (PARTITION BY BaseManagedEntityId, DatasourceId order by DWTimeStamp desc) as rownum
INTO #transformTemp0
FROM #transformTemp2
/*
************************************************************************************************
* Step 1: Prepare a temp table with following bits of information:
* - Dim keys
* - DateKey (Relationship Added Date)
* - CreatedDate
* - DeletedDate (Deleted Date)
* Step 3: Delete unnecessary events (events that are either already present in the fact or
* that have been accounted for in the previous step) from the temp table
* Step 4: Update the fact
* Step 5: Insert new records
************************************************************************************************
*/
SELECT @task = ''Step 1: Preparing #TransformTemp1''
EXEC @err = dbo.InferDatasourceDimProc
@sourceTableName = ''inbound.BaseManagedEntity'',
@columnName = ''DataSourceId'',
@filterColumnName = ''DWTimeStamp'',
@minTimeStamp = @source1WM,
@maxTimeStamp = @source1MaxWM,
@batchId = @BatchId
SELECT dimDS.DataSourceDimKey,
dimEnt.EntityDimKey AS EntityDimKey,
dimMgdTyp.ManagedTypeDimKey AS ManagedTypeDimKey,
CONVERT(nvarchar(8), inbBME.TimeAdded, 112) AS DateKey,
inbBME.TimeAdded AS CreatedDate,
inbBME.BaseManagedEntityId,
inbBME.DatasourceId,
CASE WHEN inbBME.IsDeleted = 0 THEN NULL ELSE inbBME.LastModified END AS DeletedDate,
ROW_NUMBER() OVER( PARTITION BY dimDS.DataSourceDimKey,
dimEnt.EntityDimKey,
dimMgdTyp.ManagedTypeDimKey
ORDER BY inbBME.LastModified
) AS RowNum
INTO #transformTemp1
FROM #transformTemp0 inbBME
-- REDMOND\mando. BUG: 163243
-- SQL was not honoring this inner join with inbBME and inbTME.
-- The execution plan showed that inbBME was joining with dimDS and then to dmMGdTyp and then to inbTME
-- This plan ended up being a very inferior plan. With just over a million rows in inbBME and inbTME,
-- this plan ended up taking upwards of 1Hr 40Mins, eventually resulting in a timeout.
-- Following options were tried to get a different/better execution plan:
-- (Note: Adding any indexes on Inbound tables was/is not an option. This is done intentionally
-- in order to keep the Extracts as fast as possible)
-- 1. Adding index on dimMgdTyp (ManagedTypeId). Result: No change in the query plan
-- 2. Re-ordered the joins. Result: No change in the query plan
-- 3. Re-organized the query into multiple sub-queries. Result: No change in the query plan.
-- 4. Finally, in order to force the plan to start with a join between inbBME and inbTME,
-- I''ve decided to add a join hint of HASH. The reason for HASH is that with a simiplified query,
-- sql was using a hash join with inbBME and inbTME and producing the resultset in less than 1 min.
-- With the updated query plan, and the same dataset, the Transform now takes just over 1min to complete.
INNER HASH JOIN inbound.TypedManagedEntity inbTME ON
inbTME.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND inbTME.DatasourceId = inbBME.DatasourceId
INNER JOIN dbo.EntityDimvw dimEnt ON
dimEnt.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND dimEnt.SourceId = inbBME.DatasourceId
JOIN dbo.ManagedTypeDimvw dimMgdTyp ON
dimMgdTyp.BaseManagedEntityId = inbTME.ManagedTypeId
AND dimMgdTyp.SourceId = inbTME.DatasourceId
JOIN dbo.DataSourceDimvw dimDS ON
inbBME.DatasourceId = dimDS.SourceId
SELECT @task = ''Step 2: Keep the last record within the Batch''
DELETE tmp
FROM #transformTemp1 tmp
WHERE RowNum > 1
-- Due to bug 235558 (inbound.TypedManagedEntity is not extracted in update case)
-- store the records in current batch which already exist in the fact table to a temp table
-- filter out those rows when we do the early arriving data handling
-- side effect, we don''t have update to the table, which is fine due to Entity and managedType actually don''t have any update
select temp.*
into #alreadyExistInFactTable
from #transformTemp0 temp
join EntityDimvw dimEnt on dimEnt.BaseManagedEntityId = temp.BaseManagedEntityId and dimEnt.SourceId = temp.DatasourceId
join EntityManagedTypeFactvw fact on fact.EntityDimKey = dimEnt.EntityDimKey
SELECT @task = ''Step 3: Handle early arriving data ''
-- If there are any early inbound rows that didn''t get inserted into the temp because the
-- corresponding row wasn''t in EntityDim yet, change DWTimestamp to hit them next time
UPDATE inbBME SET
DWTimestamp = @source1MaxWM,
RejectedCount = inbBME.RejectedCount + 1,
BatchId = @BatchId
FROM inbound.BaseManagedEntity inbBME
LEFT JOIN #transformTemp1 txTemp ON
txTemp.BaseManagedEntityId = inbBME.BaseManagedEntityId
AND txTemp.DatasourceId = inbBME.DatasourceId
LEFT JOIN Staging.EntityTypeExceptionList exList ON
inbBME.DatasourceId = ISNULL(exList.DatasourceId, inbBME.DatasourceId) -- if sourceid is null, then exclude from all sources
AND inbBME.BaseManagedTypeId = exList.BaseManagedTypeId
LEFT JOIN #alreadyExistInFactTable toRemove on toRemove.BaseManagedEntityId = inbBME.BaseManagedEntityId
WHERE inbBME.DWTimestamp >= @source1WM
AND inbBME.DWTimestamp < @source1MaxWM
AND txTemp.BaseManagedEntityId IS NULL
AND exList.BaseManagedTypeId IS NULL
AND toRemove.BaseManagedEntityId is null
SELECT @rejectedRowCount = @@ROWCOUNT
-- If there are any early inbound rows that didn''t get inserted into the temp because the
-- corresponding row wasn''t in EntityDim/ManagedTypeDim yet, change DWTimestamp to hit them next time
UPDATE inbTME SET
DWTimestamp = @source1MaxWM,
RejectedCount = inbTME.RejectedCount + 1,
BatchId = @BatchId
From inbound.TypedManagedEntity inbTME
join ( select tmp0.*
from #transformTemp0 tmp0
left join #transformTemp1 tmp1 ON
tmp0.BaseManagedEntityId = tmp1.BaseManagedEntityId
and tmp0.DatasourceId = tmp1.DatasourceId
left join #alreadyExistInFactTable toRemove on toRemove.BaseManagedEntityId = tmp0.BaseManagedEntityId
where tmp1.BaseManagedEntityId IS NULL AND toRemove.BaseManagedEntityId IS NULL
) as Source ON
Source.BaseManagedEntityId = inbTME.BaseManagedEntityId
and Source.DatasourceId = inbTME.DatasourceId
left join Staging.EntityTypeExceptionList exList on
inbTME.DatasourceId = ISNULL(exList.DatasourceId, inbTME.DatasourceId) -- if sourceid is null, then exclude from all sources
and inbTME.ManagedTypeId = exList.BaseManagedTypeId
where exList.BaseManagedTypeId is null
-- this should never happen. and if it does happen, then it is better to bail out now
-- it appears we don''t have anything to do for now
IF(@rejectedRowCount = @processedRowCount) BREAK;
-- Delete early inbound rows from our temp table
-- DELETE FROM #transformTemp1 WHERE EntityDimKey IS NULL
BEGIN TRANSACTION
SELECT @task = ''Step 4: Updating Facts''
UPDATE fact SET
DeletedDate = source.DeletedDate,
UpdatedBatchId = @BatchId
FROM dbo.EntityManagedTypeFactvw fact
JOIN #transformTemp1 source ON
source.EntityDimKey = fact.EntityDimKey
AND source.ManagedTypeDimKey = fact.ManagedTypeDimKey
AND source.DataSourceDimKey = fact.DataSourceDimKey
WHERE fact.DeletedDate IS NULL and source.DeletedDate is not null
-- Delete any rows that would cause Dup Key errors on Insert
SELECT @task = ''Step 4.5: Deleting any events from #TransformTemp1 that already have a fact row''
DELETE temp
FROM #transformTemp1 temp
JOIN dbo.EntityManagedTypeFactvw fact ON
temp.DateKey = fact.DateKey
AND temp.EntityDimKey = fact.EntityDimKey
AND temp.ManagedTypeDimKey = fact.ManagedTypeDimKey
AND temp.DataSourceDimKey = fact.DataSourceDimKey
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#transformTemp2'') IS NOT NULL DROP TABLE #transformTemp2
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable
SET @source1WM = @source1MaxWM
SELECT @loopCount = @loopCount + 1
EXEC dbo.UninitializeTransformLoop
@transformName = ''TransformEntityManagedTypeFactProc'',
@transformTemplateType = ''Manual'',
@utc = @utc,
@inserted = @Inserted,
@updated = @Updated,
@loopCount = @loopCount,
@canContinue = @canContinue OUTPUT
END -- END WHILE
IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable
END TRY
BEGIN CATCH
DECLARE @errorFmt VARCHAR(256)
IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL DROP TABLE #transformTemp1
IF OBJECT_ID(''tempdb..#alreadyExistInFactTable'') IS NOT NULL DROP TABLE #alreadyExistInFactTable