IF NOT EXISTS (SELECT * FROM sysobjects WHERE type = 'P' AND name = N'TransformEntityRelatesToEntityFactProc')
BEGIN
EXECUTE ('CREATE PROCEDURE dbo.[TransformEntityRelatesToEntityFactProc] @WaterMark xml AS RETURN 1')
END
GO
DECLARE @Statement NVARCHAR(MAX)
SET @Statement=
N'ALTER PROCEDURE [dbo].[TransformEntityRelatesToEntityFactProc](@WaterMark XML)
AS BEGIN
SET NOCOUNT ON
SET XACT_ABORT ON
DECLARE @BatchId INT,
@Updated INT = 0,
@Inserted INT = 0,
@retval INT = -1,
@err INT = 0,
@startTranCount INT = @@TRANCOUNT,
@source1WM DATETIME,
@source1MaxWM DATETIME,
@startTime DATETIME,
@batchSize INT,
@processedRowCount INT = 0,
@rejectedRowCount INT = 0
DECLARE @loopCount INT = 0,
@canContinue BIT = 1,
@rc INT = 1,
@level INT = 1
DECLARE @deletedE2E AS TABLE ( EntityDimKey INT NULL,
RelationshipTypeDimKey INT NULL,
TargetEntityDimKey INT NULL,
NewDeletedDate DATETIME NULL,
OldDeletedDate DATETIME NULL,
TobeUpdated BIT NULL,
Level INT NULL
)
--SELECT @task = ''Evaluating @source1MaxWM''
-- SET @source1MaxWM = GETUTCDATE()
WHILE(@canContinue = 1)
BEGIN
SELECT @task = ''Preparing #transformTemp0''
SELECT TOP(@batchSize) WITH TIES *
INTO #transformTemp0
FROM inbound.Relationship inbRel
WHERE inbRel.DWTimestamp >= @source1WM
AND inbRel.DWTimeStamp < @utc
ORDER BY DWTimestamp
SELECT @processedRowCount = @@ROWCOUNT
IF( @processedRowCount <> 0)
BEGIN --begin@processedRowCount
SELECT @task = ''Evaluating @source1MaxWM''
SELECT @source1MaxWM = (SELECT MAX(DWTimeStamp) FROM #transformTemp0)
SELECT @source1MaxWM = ( SELECT MIN(DWTimestamp)
FROM inbound.Relationship
WHERE DWTimeStamp > @source1MaxWM)
SELECT @source1MaxWM = ISNULL(@source1MaxWM, @utc)
/*
************************************************************************************************
* Step 1: Prepare a temp table with following bits of information:
* Step 2: ''Close'' all the ''Open'' events within the Batch
* Step 3: Delete unnecessary events (events that are either already present in the fact or
* that have been accounted for in the previous step) from the temp table
* Step 4: Update the fact to ''Close'' all the ''Open'' relationships
* Step 5: Insert new relationships
************************************************************************************************
*/
SELECT @task = ''Step 1: Preparing #TransformTemp1''
EXEC @err = dbo.InferDatasourceDimProc
@sourceTableName = ''inbound.Relationship'',
@columnName = ''DataSourceId'',
@filterColumnName = ''DWTimeStamp'',
@minTimeStamp = @source1WM,
@maxTimeStamp = @source1MaxWM,
@batchId = @BatchId
SELECT CONVERT(NVARCHAR(8), inbRel.TimeAdded, 112) AS DateKey,
dimDS.DataSourceDimKey,
dimRelType.RelationshipTypeDimKey,
dimSrcEnt.EntityDimKey AS SourceEntityDimKey,
dimTgtEnt.EntityDimKey AS TargetEntityDimKey,
inbRel.TimeAdded AS CreatedDate,
CASE WHEN inbRel.IsDeleted = 0 THEN ''9999/01/01'' ELSE inbRel.LastModified END AS DeletedDate,
IDENTITY(INT, 1, 1) AS IdentKey,
CASE WHEN dimRelType.TargetMaxCardinality = 1 THEN 1 ELSE 0 END AS IsSingleCardinalityRelationship,
inbRel.InsertedTimeStamp
INTO #transformTemp1
FROM #transformTemp0 inbRel
JOIN dbo.DataSourceDimvw dimDS ON (inbRel.DatasourceId = dimDS.SourceId)
JOIN dbo.RelationshipTypeDimvw dimRelType ON (dimRelType.RelationshipTypeId = inbRel.RelationshipTypeId AND dimRelType.SourceId = inbRel.DatasourceId)
JOIN dbo.EntityDimvw dimSrcEnt ON (dimSrcEnt.BaseManagedEntityId = inbRel.SourceEntityId AND dimSrcEnt.SourceId = inbRel.DatasourceId)
JOIN dbo.EntityDimvw dimTgtEnt ON (dimTgtEnt.BaseManagedEntityId = inbRel.TargetEntityId AND dimTgtEnt.SourceId = inbRel.DatasourceId)
OPTION(FORCE ORDER) -- Execution plan generated by SQL was really really bad
-- It was doing a join order that looked like this: ((((DatasourceDim + EntityDim) + EntityDim) + inbRel) + dimRelType)
-- even though, we know that inbRel could have a max of ~50K records even when EntityDim could have millions of rows in it
-- In the perf env, we had close to 2 mill rows in EntityDim and this join order
-- was not only timing out but was resulting in tempdb to run out of disk space on a 1-TB disk, phew!
-- So, I am changing the join order and forcing SQL to use this order
-- forcing the order should be safe becuase we know that inbRel can only have a max of ~50K rows.
-- With the new join order, tempDb disk space continues to hover around 30-MB for the entire batch of transform.
-- This is being done in resolution to bug# 167251
CREATE CLUSTERED INDEX CI_E2ETransformTemp1 ON #transformTemp1 (IdentKey) --CTS optimizations- Moved index creation from bottom to here.
CREATE NONCLUSTERED INDEX NCI0_E2ETransformTemp1 ON #transformTemp1 (CreatedDate, DeletedDate, InsertedTimeStamp) --CTS optimizations- Moved index creation from bottom to here.
CREATE NONCLUSTERED INDEX NCI0_E2ETransformTemp21 ON #transformTemp1 (SourceEntityDimKey,RelationshipTypeDimKey,DataSourceDimKey) include (IsSingleCardinalityRelationship, TargetEntityDimKey) --CTS optimizations
UPDATE STATISTICS #transformTemp1 WITH FULLSCAN --CTS optimizations
SELECT @task = ''Step 3: Including fact records into #TransformTemp1''
INSERT INTO #transformTemp1 (
DateKey,
DataSourceDimKey,
RelationshipTypeDimKey,
SourceEntityDimKey,
TargetEntityDimKey,
CreatedDate,
DeletedDate,
IsSingleCardinalityRelationship,
InsertedTimeStamp
)
SELECT fact.DateKey,
fact.DataSourceDimKey,
fact.RelationshipTypeDimKey,
fact.EntityDimKey,
fact.TargetEntityDimKey,
MAX(fact.CreatedDate) AS CreatedDate,
MAX(ISNULL(fact.DeletedDate, ''9999/01/01'')) AS DeletedDate,
MAX(txTemp1.IsSingleCardinalityRelationship) AS IsSingleCardinalityRelationship,
''1900/01/01'' AS InsertedTimeStamp
FROM dbo.EntityRelatesToEntityFactvw fact
INNER JOIN #transformTemp1 txTemp1 ON
fact.EntityDimKey = txTemp1.SourceEntityDimKey
AND fact.RelationshipTypeDimKey = txTemp1.RelationshipTypeDimKey
AND fact.DataSourceDimKey = txTemp1.DataSourceDimKey
AND (txTemp1.IsSingleCardinalityRelationship = 1 OR fact.TargetEntityDimKey = txTemp1.TargetEntityDimKey)
WHERE fact.CreatedDate <= txTemp1.CreatedDate
AND (fact.DeletedDate IS NULL OR fact.DeletedDate >= txTemp1.CreatedDate)
GROUP BY fact.DateKey,
fact.DataSourceDimKey,
fact.RelationshipTypeDimKey,
fact.EntityDimKey,
fact.TargetEntityDimKey
UNION
SELECT fact.DateKey,
fact.DataSourceDimKey,
fact.RelationshipTypeDimKey,
fact.EntityDimKey,
fact.TargetEntityDimKey,
MIN(fact.CreatedDate) AS CreatedDate,
MAX(ISNULL(fact.DeletedDate, ''9999/01/01'')) AS DeletedDate,
MAX(txTemp1.IsSingleCardinalityRelationship) AS IsSingleCardinalityRelationship,
''1900/01/01'' AS InsertedTimeStamp
FROM dbo.EntityRelatesToEntityFactvw fact
INNER JOIN #transformTemp1 txTemp1 ON
fact.EntityDimKey = txTemp1.SourceEntityDimKey
AND fact.RelationshipTypeDimKey = txTemp1.RelationshipTypeDimKey
AND fact.DataSourceDimKey = txTemp1.DataSourceDimKey
AND (txTemp1.IsSingleCardinalityRelationship = 1 OR fact.TargetEntityDimKey = txTemp1.TargetEntityDimKey)
WHERE fact.CreatedDate >= txTemp1.CreatedDate
AND (txTemp1.DeletedDate IS NULL OR txTemp1.DeletedDate >= fact.CreatedDate)
GROUP BY fact.DateKey,
fact.DataSourceDimKey,
fact.RelationshipTypeDimKey,
fact.EntityDimKey,
fact.TargetEntityDimKey
UPDATE STATISTICS #transformTemp1 WITH FULLSCAN --CTS optimizations
SELECT @task = ''Preparing #transformTemponeHalf''
SELECT tCurrent.IdentKey, MIN(CASE WHEN tCurrent.CreatedDate = tNext.CreatedDate THEN tNext.DeletedDate ELSE tNext.CreatedDate END) AS CreatedDate
INTO #transformTemponeHalf
FROM #transformTemp1 tCurrent
JOIN #transformTemp1 tNext ON (tCurrent.SourceEntityDimKey = tNext.SourceEntityDimKey AND tCurrent.RelationshipTypeDimKey = tNext.RelationshipTypeDimKey AND (tCurrent.IsSingleCardinalityRelationship = 1 OR tCurrent.TargetEntityDimKey = tNext.TargetEntityDimKey))
WHERE tCurrent.CreatedDate <= tNext.CreatedDate
AND tCurrent.DeletedDate > tNext.CreatedDate
AND tCurrent.InsertedTimeStamp < tNext.InsertedTimeStamp
GROUP BY tCurrent.IdentKey
SELECT @task = ''Closing relationships from #transformTemponeHalf''
UPDATE tCurrent SET
DeletedDate = tNext.CreatedDate
FROM #transformTEmp1 tCurrent
JOIN #transformTempOneHalf tNext ON (tCurrent.IdentKey = tNext.IdentKey)
SELECT DateKey,
DataSourceDimKey,
RelationshipTypeDimKey,
SourceEntityDimKey,
TargetEntityDimKey,
MIN(CreatedDate) AS CreatedDate,
MAX(DeletedDate) AS DeletedDate
INTO #transformTemp2
FROM #transformTemp1
GROUP BY DateKey, DataSourceDimKey, RelationshipTypeDimKey, SourceEntityDimKey, TargetEntityDimKey
SELECT @task = ''Step 4.1: Updating Inbount DWTimeStamp for early arriving records''
UPDATE inbRel SET
DWTimeStamp = GETUTCDATE(),
RejectedCount = RejectedCount + 1,
BatchId = @BatchId
FROM inbound.Relationship inbRel
LEFT JOIN (
SELECT ds.SourceId,
rt.RelationshipTypeId,
se.BaseManagedEntityId as SourceEntityId,
te.BaseManagedEntityId as TargetEntityId
FROM #transformTemp1 temp1
INNER JOIN DataSourceDimvw ds ON temp1.DataSourceDimKey = ds.DataSourceDimKey
INNER JOIN RelationshipTypeDimvw rt ON temp1.RelationshipTypeDimKey = rt.RelationshipTypeDimKey
INNER JOIN EntityDimvw se ON temp1.SourceEntityDimKey = se.EntityDimKey
INNER JOIN EntityDimvw te ON temp1.TargetEntityDimKey = te.EntityDimKey
) AS temp1
ON inbRel.DatasourceId = temp1.SourceId
AND inbRel.RelationshipTypeId = temp1.RelationshipTypeId
AND inbRel.SourceEntityId = temp1.SourceEntityId
AND inbRel.TargetEntityId = temp1.TargetEntityId
-- gkapila: The following three joins are required to filter out relationships whose source or target type is excluded
LEFT JOIN RelationshipTypeDimvw relDim ON inbRel.RelationshipTypeId = relDim.RelationshipTypeId
LEFT JOIN Staging.EntityTypeExceptionList exListSource ON relDim.SourceManagedTypeId = exListSource.BaseManagedTypeId
LEFT JOIN Staging.EntityTypeExceptionList exListTarget ON relDim.TargetManagedTypeId = exListTarget.BaseManagedTypeId
WHERE temp1.SourceEntityId IS NULL
AND exListSource.BaseManagedTypeId IS NULL
AND exListTarget.BaseManagedTypeId IS NULL
AND inbRel.DWTimestamp >= @source1WM
AND inbRel.DWTimestamp < @source1MaxWM
SELECT @rejectedRowCount = @@ROWCOUNT
SELECT @task = ''Step 4.2: Dropping #TransformTemp1''
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL BEGIN DROP TABLE #transformTemp1 END
IF OBJECT_ID(''tempdb..#transformTemponeHalf'') IS NOT NULL BEGIN DROP TABLE #transformTemponeHalf END
IF OBJECT_ID(''tempdb..#transformTempOneandHalf'') IS NOT NULL BEGIN DROP TABLE #transformTempOneandHalf END
IF(@rejectedRowCount <> @processedRowCount)
BEGIN --Begin@rejectedRowCount
BEGIN TRANSACTION
SELECT @task = ''Step 6: Updating destination fact''
UPDATE fact SET
CreatedDate = txTemp.CreatedDate,
DeletedDate = NULLIF(txTemp.DeletedDate, ''9999/01/01''),
UpdatedBatchId = @BatchId
FROM dbo.EntityRelatesToEntityFactvw fact
INNER JOIN #transformTemp2 txTemp ON
fact.DateKey = txTemp.DateKey
AND fact.DataSourceDimKey = txTemp.DataSourceDimKey
AND fact.RelationshipTypeDimKey = txTemp.RelationshipTypeDimKey
AND fact.EntityDimKey = txTemp.SourceEntityDimKey
AND fact.TargetEntityDimKey = txTemp.TargetEntityDimKey
WHERE fact.CreatedDate <> txTemp.CreatedDate
OR ISNULL(fact.DeletedDate, ''9999/01/01'') <> ISNULL(txTemp.DeletedDate, ''9999/01/01'')
SELECT @task = ''Step 6.1: Removing all the "non-delete" events from @deletedE2E''
DELETE @deletedE2E
WHERE ISNULL(NewDeletedDate, ''9999/01/01'') <> ISNULL(OldDeletedDate, ''9999/01/01'')
SELECT @task = ''Step 7: Getting rid of unnecessary records''
DELETE txTemp
FROM #transformTemp2 txTemp
INNER JOIN dbo.EntityRelatesToEntityFactvw fact ON
fact.DateKey = txTemp.DateKey
AND fact.DataSourceDimKey = txTemp.DataSourceDimKey
AND fact.RelationshipTypeDimKey = txTemp.RelationshipTypeDimKey
AND fact.EntityDimKey = txTemp.SourceEntityDimKey
AND fact.TargetEntityDimKey = txTemp.TargetEntityDimKey
SELECT @task = ''Step 8: Inserting new records into destination fact''
INSERT INTO dbo.EntityRelatesToEntityFactvw (
DateKey,
DataSourceDimKey,
RelationshipTypeDimKey,
EntityDimKey,
TargetEntityDimKey,
CreatedDate,
DeletedDate,
InsertedBatchId,
UpdatedBatchId
)
SELECT DateKey,
DataSourceDimKey,
RelationshipTypeDimKey,
SourceEntityDimKey,
TargetEntityDimKey,
CreatedDate,
NULLIF(DeletedDate, ''9999/01/01''),
@BatchId AS InsertedBatchId,
0 AS UpdatedBatchId
FROM #transformTemp2 txTemp
SELECT @task = ''Step 9: Removing all the "non-delete" events from @deletedE2E''
INSERT INTO @deletedE2E(EntityDimKey, RelationshipTypeDimKey, TargetEntityDimKey, NewDeletedDate, Level)
SELECT ins.EntityDimKey,
ins.RelationshipTypeDimKey,
ins.TargetEntityDimKey,
ins.DeletedDate AS NewDeletedDate,
1 AS Level
FROM dbo.EntityRelatesToEntityFactvw AS ins
WHERE InsertedBatchId = @BatchId
INSERT INTO @deletedE2E(EntityDimKey, RelationshipTypeDimKey, TargetEntityDimKey, NewDeletedDate, Level)
SELECT upd.EntityDimKey,
upd.RelationshipTypeDimKey,
upd.TargetEntityDimKey,
upd.DeletedDate AS NewDeletedDate,
1 AS Level
FROM dbo.EntityRelatesToEntityFactvw AS upd
WHERE UpdatedBatchId = @BatchId
INSERT INTO @deletedE2E(EntityDimKey, RelationshipTypeDimKey, TargetEntityDimKey, NewDeletedDate, Level)
SELECT factE2E.EntityDimKey,
factE2E.RelationshipTypeDimKey,
factE2E.TargetEntityDimKey,
del.LastModified AS NewDeletedDate,
2 AS Level
FROM dbo.EntityRelatesToEntityFactvw factE2E
INNER JOIN dbo.EntityDimvw del ON factE2E.EntityDimKey = del.EntityDimKey AND del.IsDeleted = 1
INNER JOIN dbo.fn_GetFlattenedRelationshipTypeHierarchy(''System.Hosting'') relType on relType.RelationshipTypeDimKey = factE2E.RelationshipTypeDimKey
WHERE del.InsertedBatchId = @BatchId
INSERT INTO @deletedE2E(EntityDimKey, RelationshipTypeDimKey, TargetEntityDimKey, NewDeletedDate, Level)
SELECT factE2E.EntityDimKey,
factE2E.RelationshipTypeDimKey,
factE2E.TargetEntityDimKey,
del.LastModified AS NewDeletedDate,
2 AS Level
FROM dbo.EntityRelatesToEntityFactvw factE2E
INNER JOIN dbo.EntityDimvw del ON factE2E.EntityDimKey = del.EntityDimKey AND del.IsDeleted = 1
INNER JOIN dbo.fn_GetFlattenedRelationshipTypeHierarchy(''System.Hosting'') relType on relType.RelationshipTypeDimKey = factE2E.RelationshipTypeDimKey
WHERE del.UpdatedBatchId = @BatchId
IF EXISTS(SELECT ''x'' FROM @deletedE2E)
BEGIN
SELECT @task = ''Step 9.1: Populating @deletedE2E iteratively from E2E fact''
WHILE(@rc > 0)
BEGIN
INSERT INTO @deletedE2E(EntityDimKey, RelationshipTypeDimKey, TargetEntityDimKey, NewDeletedDate, OldDeletedDate, Level)
SELECT factE2E.EntityDimKey, factE2E.RelationshipTypeDimKey, factE2E.TargetEntityDimKey, NewDeletedDate, OldDeletedDate, @level + 1
FROM dbo.EntityRelatesToEntityFactvw factE2E
INNER JOIN dbo.fn_GetFlattenedRelationshipTypeHierarchy(''System.Hosting'') relType on relType.RelationshipTypeDimKey = factE2E.RelationshipTypeDimKey
INNER JOIN @deletedE2E del
INNER JOIN dbo.fn_GetFlattenedRelationshipTypeHierarchy(''System.Hosting'') delrelType on delrelType.RelationshipTypeDimKey = del.RelationshipTypeDimKey
ON factE2E.EntityDimKey = del.TargetEntityDimKey and del.Level = @level
OPTION(FORCE ORDER)
SELECT @rc = @@ROWCOUNT, @level = @level + 1
END
SELECT @task = ''Step 9.2: Closing relationships based on @deletedE2E''
UPDATE e2e SET
DeletedDate = NewDeletedDate,
UpdatedBatchId = @BatchId
FROM dbo.EntityRelatesToEntityFactvw e2e
INNER JOIN @deletedE2E del ON e2e.EntityDimKey = del.EntityDimKey AND e2e.RelationshipTypeDimKey = del.RelationshipTypeDimKey AND e2e.TargetEntityDimKey = del.TargetEntityDimKey
WHERE del.Level > 1
AND del.NewDeletedDate IS NOT NULL
SET @Updated = @Updated + @@ROWCOUNT
END
IF(@processedRowCount = 0) BREAK;
IF OBJECT_ID(''tempdb..#transformTemp2'') IS NOT NULL BEGIN DROP TABLE #transformTemp2 END
IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
END TRY
BEGIN CATCH
DECLARE @errorFmt VARCHAR(256)
IF OBJECT_ID(''tempdb..#tempTable'') IS NOT NULL DROP TABLE #tempTable
IF OBJECT_ID(''tempdb..#transformTemp0'') IS NOT NULL DROP TABLE #transformTemp0
IF OBJECT_ID(''tempdb..#transformTemp1'') IS NOT NULL BEGIN DROP TABLE #transformTemp1 END
IF OBJECT_ID(''tempdb..#transformTemponeHalf'') IS NOT NULL BEGIN DROP TABLE #transformTemponeHalf END
IF OBJECT_ID(''tempdb..#transformTempOneandHalf'') IS NOT NULL BEGIN DROP TABLE #transformTempOneandHalf END
IF OBJECT_ID(''tempdb..#transformTemp2'') IS NOT NULL
BEGIN
DROP TABLE #transformTemp2
END