SHA1 Script Component Driven by Table Metadata
Three Biml files that work together to build SHA1-row-hash extract packages without needing a custom component or T-SQL HASHBYTES. 'Connections.biml' declares an OLE DB connection and a 'Database'/'Schema' for AdventureWorksDW2012. 'Import Tables.biml' calls 'sourceConnection.ImportDB' (excluding foreign keys, column defaults, and views) and emits matching 'Table' nodes for a hard-coded list of dimensions (DimAccount, DimCurrency, DimCustomer, DimEmployee, DimOrganization, DimProduct).
The third file generates one 'ScriptComponentProject' and one extract 'Package' per table. The script's 'ScriptMain.cs' walks 'ComponentMetaData.InputCollection[0].InputColumnCollection', captures column types in a 'ColumnInfo' array during 'PreExecute', and in 'Input0_ProcessInputRow' uses a per-DataType switch on 'inputBuffer.GetColumnInfo' to serialize each cell into a growing 'inputByteBuffer' through a 'Utility' helper (which handles bool, decimal, DateTime/DateTimeOffset with optional millisecond format, GUID, TimeSpan, numeric primitives, and strings via UTF-8). A 'nullHandling' suffix captures null markers and the length of variable-width values so logically distinct rows do not collide. The final SHA1 is computed by 'SHA1CryptoServiceProvider' and written to a Binary(20) 'SHA1' output column.
Each generated package reads its table via 'SELECT GetColumnList() FROM SchemaQualifiedName', passes the rows through the SHA1 script transformation, and stops there: wire your own destination to the script output. The 'InputBuffer' is built from columns where 'IsUsedInPrimaryKey == false', 'IsIdentityColumn == false', and 'Length != -1' (so it skips MAX-typed columns); adjust those filters if you want different inputs. Test thoroughly against your own data before relying on the resulting hash for change detection.
<!-- Connections.biml-->
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Connections>
<OleDbConnection Name="AW" ConnectionString="Server=localhost;Initial Catalog=AdventureWorksDW2012;Provider=SQLNCLI11;Integrated Security=SSPI;" />
</Connections>
<Databases>
<Database Name="AW" ConnectionName="AW" />
</Databases>
<Schemas>
<Schema Name="dbo" DatabaseName="AW" />
</Schemas>
</Biml>
<!-- Import Tables.biml-->
<#@ import namespace="Varigence.Hadron.CoreLowerer.SchemaManagement" #>
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<# var sourceConnection = (AstDbConnectionNode)RootNode.Connections["AW"];
var importedSchema = sourceConnection.ImportDB("", "", ImportOptions.ExcludeForeignKey | ImportOptions.ExcludeColumnDefault | ImportOptions.ExcludeViews);
// Filtering a couple of tables.
var tableNamesToImport = new List<string>() { "DimAccount", "DimCurrency", "DimCustomer", "DimEmployee", "DimOrganization", "DimProduct" };#>
<Tables>
<# foreach (var tableNode in importedSchema.TableNodes.Where(item => tableNamesToImport.Contains(item.Name)).OrderBy(item => item.Name)) { #>
<#=tableNode.GetBiml()#>
<# } #>
</Tables>
</Biml>
<!-- Use the following code to create the Packages to Extract Data and add SHA1 columns-->
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<ScriptProjects>
<# foreach(var tableNode in RootNode.Tables) {#>
<ScriptComponentProject Name="SCP_SHA1_<#=tableNode.Name#>" ProjectCoreName="SCP_SHA1_<#=tableNode.Name#>">
<AssemblyReferences>
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSPipelineWrap.dll" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSRuntimeWrap.dll" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.PipelineHost.dll" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.TxScript.dll" />
<AssemblyReference AssemblyPath="System.dll" />
<AssemblyReference AssemblyPath="System.AddIn.dll" />
<AssemblyReference AssemblyPath="System.Data.dll" />
<AssemblyReference AssemblyPath="System.Xml.dll" />
</AssemblyReferences>
<Files>
<File Path="ScriptMain.cs">using System;
using System.Collections.Generic;
using System.Data;
using System.Reflection;
using Microsoft.SqlServer.Dts.Pipeline;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Security.Cryptography;
using System.Text;
using System.IO;
using SCP_SHA1_<#=tableNode.Name#>.csproj;
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
private int ColumnCount;
private ColumnInfo[] inputColumnInfos;
private PipelineBuffer inputBuffer;
private SHA1CryptoServiceProvider SHA1Hash;
private struct ColumnInfo
{
public int bufferColumnIndex;
public string columnName;
public string dataType;
}
public override void PreExecute()
{
base.PreExecute();
SHA1Hash = new SHA1CryptoServiceProvider();
ColumnCount = ComponentMetaData.InputCollection[0].InputColumnCollection.Count;
inputColumnInfos = new ColumnInfo[ColumnCount];
int[] columnIndexes = GetColumnIndexes(ComponentMetaData.InputCollection[0].ID);
int columnIndex = 0;
foreach (IDTSInputColumn100 item in ComponentMetaData.InputCollection[0].InputColumnCollection)
{
inputColumnInfos[columnIndex] = new ColumnInfo
{
columnName = Convert.ToString(item.Name),
bufferColumnIndex =
columnIndexes[ComponentMetaData.InputCollection[0].InputColumnCollection.GetObjectIndexByID(item.ID)
],
dataType = item.DataType.ToString()
};
columnIndex++;
}
}
public override void ProcessInput(int InputID, Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer Buffer)
{
inputBuffer = Buffer;
base.ProcessInput(InputID, Buffer);
}
public override void PostExecute()
{
base.PostExecute();
}
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
byte[] inputByteBuffer = new byte[1000];
Int32 bufferUsed = 0;
string nullHandling = String.Empty;
uint blobLength = 0;
Int32 columnToProcessID = 0;
for (int i = 0; i < ColumnCount; i++)
{
var info = inputColumnInfos[i];
columnToProcessID = info.bufferColumnIndex;
if (!inputBuffer.IsNull(columnToProcessID))
{
nullHandling += "N";
switch (inputBuffer.GetColumnInfo(columnToProcessID).DataType)
{
case DataType.DT_BOOL:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetBoolean(columnToProcessID));
break;
case DataType.DT_IMAGE:
blobLength = inputBuffer.GetBlobLength(columnToProcessID);
Utility.Append(ref inputByteBuffer, ref bufferUsed,
inputBuffer.GetBlobData(columnToProcessID, 0, (int)blobLength));
nullHandling += blobLength.ToString();
break;
case DataType.DT_BYTES:
byte[] bytesFromBuffer = inputBuffer.GetBytes(columnToProcessID);
Utility.Append(ref inputByteBuffer, ref bufferUsed, bytesFromBuffer);
nullHandling += bytesFromBuffer.GetLength(0).ToString();
break;
case DataType.DT_CY:
case DataType.DT_DECIMAL:
case DataType.DT_NUMERIC:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDecimal(columnToProcessID));
break;
case DataType.DT_DBDATE:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDate(columnToProcessID), true);
break;
case DataType.DT_DATE:
case DataType.DT_DBTIMESTAMP:
case DataType.DT_DBTIMESTAMP2:
case DataType.DT_FILETIME:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDateTime(columnToProcessID), true);
break;
case DataType.DT_DBTIME:
case DataType.DT_DBTIME2:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetTime(columnToProcessID));
break;
case DataType.DT_GUID:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetGuid(columnToProcessID));
break;
case DataType.DT_I1:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetSByte(columnToProcessID));
break;
case DataType.DT_I2:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt16(columnToProcessID));
break;
case DataType.DT_I4:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt32(columnToProcessID));
break;
case DataType.DT_I8:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt64(columnToProcessID));
break;
case DataType.DT_NTEXT:
case DataType.DT_STR:
case DataType.DT_TEXT:
case DataType.DT_WSTR:
String stringFromBuffer = inputBuffer.GetString(columnToProcessID);
Utility.Append(ref inputByteBuffer, ref bufferUsed, stringFromBuffer, Encoding.UTF8);
nullHandling += stringFromBuffer.Length.ToString();
break;
case DataType.DT_R4:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetSingle(columnToProcessID));
break;
case DataType.DT_R8:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDouble(columnToProcessID));
break;
case DataType.DT_UI1:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetByte(columnToProcessID));
break;
case DataType.DT_UI2:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt16(columnToProcessID));
break;
case DataType.DT_UI4:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt32(columnToProcessID));
break;
case DataType.DT_UI8:
Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt64(columnToProcessID));
break;
case DataType.DT_EMPTY:
case DataType.DT_NULL:
default:
break;
}
}
else
{
nullHandling += "Y";
}
}
Utility.Append(ref inputByteBuffer, ref bufferUsed, nullHandling, Encoding.UTF8);
var sha1Hasher = new SHA1CryptoServiceProvider();
Row.SHA1 = sha1Hasher.ComputeHash(inputByteBuffer);
}
}
</File>
<File Path="Utility.cs">using System;
using System.IO;
using System.Text;
namespace SCP_SHA1_<#=tableNode.Name#>.csproj
{
public static class Utility
{
#region Types to Byte Arrays
public static byte[] ToArray(bool value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(decimal value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
private static byte[] ToArray(DateTimeOffset value, Boolean millisecondHandling)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(millisecondHandling
? value.ToString("yyyy-MM-dd HH:mm:ss.fffffff zzz")
: value.ToString("u"));
return stream.ToArray();
}
}
}
public static byte[] ToArray(DateTime value, Boolean millisecondHandling)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(millisecondHandling
? value.ToString("yyyy-MM-dd HH:mm:ss.fffffff")
: value.ToString("u"));
return stream.ToArray();
}
}
}
public static byte[] ToArray(TimeSpan value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value.ToString());
return stream.ToArray();
}
}
}
public static byte[] ToArray(byte value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(Guid value)
{
return value.ToByteArray();
}
public static byte[] ToArray(short value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(int value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(long value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(float value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(double value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(ushort value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(uint value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(ulong value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
public static byte[] ToArray(sbyte value)
{
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream))
{
writer.Write(value);
return stream.ToArray();
}
}
}
#endregion
#region Byte Array Appending
public static void Append(ref byte[] array, ref Int32 bufferUsed, bool value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
private static void Append(ref byte[] array, ref Int32 bufferUsed, System.DateTimeOffset value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value, true));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, DateTime value, Boolean millisecondHandling)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value, millisecondHandling));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, TimeSpan value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, Guid value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, ulong value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, float value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, byte value)
{
if (bufferUsed + 1 >= array.Length)
{
System.Array.Resize<byte>(ref array, array.Length + 1000);
}
array[bufferUsed++] = value;
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, byte[] value)
{
var valueLength = value.Length;
var arrayLength = array.Length;
if (bufferUsed + valueLength >= arrayLength)
{
if (valueLength > 1000)
{
System.Array.Resize<byte>(ref array, arrayLength + valueLength + 1000);
}
else
{
System.Array.Resize<byte>(ref array, arrayLength + 1000);
}
}
System.Array.Copy(value, 0, array, bufferUsed, valueLength);
bufferUsed += valueLength;
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, sbyte value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, short value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, ushort value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, int value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, long value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, uint value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, double value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, decimal value)
{
Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, char value, Encoding encoding)
{
Utility.Append(ref array, ref bufferUsed, encoding.GetBytes(new char[] { value }));
}
public static void Append(ref byte[] array, ref Int32 bufferUsed, string value, System.Text.Encoding encoding)
{
Utility.Append(ref array, ref bufferUsed, encoding.GetBytes(value));
}
#endregion
}
}
</File>
</Files>
<OutputBuffers>
<OutputBuffer Name="Output0">
<Columns>
<Column Name="SHA1" Length="20" DataType="Binary" />
</Columns>
</OutputBuffer>
</OutputBuffers>
<InputBuffer Name="Input0">
<Columns>
<# foreach(var columnNode in tableNode.Columns.Where(c => c.IsUsedInPrimaryKey == false && c.IsIdentityColumn == false && c.Length != -1)) { #>
<Column Name="<#=columnNode#>" <#=GetBimlDatatype(columnNode.DataType.ToString(), columnNode.Length.ToString(), columnNode.Precision.ToString(), columnNode.Scale.ToString())#> UsageType="ReadOnly" />
<# } #>
</Columns>
</InputBuffer>
</ScriptComponentProject>
<# } #>
</ScriptProjects>
<Packages>
<# foreach (var tableNode in RootNode.Tables) { #>
<Package Name="Extract <#=tableNode.SsisSafeScopedName#>" ConstraintMode="Linear" PackageSubpath="AW_EXTRACT">
<Tasks>
<Dataflow Name="Copy Table">
<Transformations>
<OleDbSource Name="OLE_SRC <#=tableNode.SsisSafeScopedName#>" ConnectionName="AW">
<DirectInput>SELECT <#=tableNode.GetColumnList() #> FROM <#=tableNode.SchemaQualifiedName#></DirectInput>
</OleDbSource>
<ScriptComponentTransformation Name="SC Add SHA1">
<InputPath OutputPathName="OLE_SRC <#=tableNode.SsisSafeScopedName#>.Output"></InputPath>
<ScriptComponentProjectReference ScriptComponentProjectName="SCP_SHA1_<#=tableNode.Name#>" />
</ScriptComponentTransformation>
</Transformations>
</Dataflow>
</Tasks>
</Package>
<# } #>
</Packages>
<#+
private string GetBimlDatatype(string dataType, string length, string precision, string scale)
{
var bimlDatatype = "DataType=\"Byte\"";
switch (dataType)
{
case "Boolean":
case "SByte":
case "Single":
case "Int16":
case "Int32":
case "Int64":
case "Double":
case "Currency":
case "Date":
case "DateTime":
case "DateTime2":
case "DateTimeOffset":
case "Xml":
bimlDatatype = "DataType=\"" + dataType + "\"";
break;
case "Decimal":
bimlDatatype = "DataType=\"" + dataType + "\" Precision=\"" + precision + "\" Scale=\"" + scale + "\"";
break;
case "VarNumeric":
case "String":
case "StringFixedLength":
bimlDatatype = "DataType=\"" + dataType + "\" Length=\"" + length + "\"";
break;
case "AnsiString":
case "AnsiStringFixedLength":
bimlDatatype = "DataType=\"Byte\"";
break;
default:
break;
}
return bimlDatatype;
}
#>
</Biml>