Skip to main content

Generate CSV file format from Database

One of the patterns that I use extensively when loading flat files is to do the heavy lifting on the data type validation using the connection manager. This allows me to redirect individual rows that don't pass data type validation inside the data flow without any effort. However, it means you spend a lot of time editing each connection manager to the format of the destination table you are writing to. This snippet uses the column/data type definitions already in place to generate the connection managers with the appropriate data types. You'll get a separate package for each table with a dummy data flow task just as a placeholder. The package, connection manager, flat file format, and database table are all matched together based on the name.

It's shown as a single script here, but you can split the Connections node and File Formats nodes into separate files. Since these are generic routines, you can reuse them across a variety of packages when you need to do this kind of generation.

<#@ template language="C#" hostspecific="true"#>
<#@ import namespace="System.Data"#>
<#@ import namespace="System.Data.SqlClient"#>
<#@ import namespace="System.IO"#>
<#@ import namespace="Varigence.Hadron.CoreLowerer.SchemaManagement" #>
<# var connection = SchemaManager.CreateConnectionNode("SchemaProvider", "Data Source=(local);Initial Catalog=AdventureWorks;Provider=SQLNCLI11.1;Integrated Security=SSPI;"); #>
<# var tables = connection.GenerateTableNodes("Orders"); #>

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Annotations>
<Annotation>
Creates one package per table within the specified schema with a flat file connection manager configured based on database metadata
You can split the FileFormats and Connections nodes into separate files as I've done for modularity.
Relies on SQL Server 2008+ CTE functionality
</Annotation>
</Annotations>
<#
string CnSrcDB = "Server=(local);Initial Catalog=AdventureWorks;Integrated Security=SSPI;Provider=SQLNCLI11.1";
string TableName ="";
string ColumnName ="";
string DataType ="";
string ColumnDelimeter =";";
string SchemaName="Orders";
string ExcludeCols="'LoadID','StageLoadID','SourceDB'";
string MaxLength="";
string Precision="";
string Scale="";
string RowNumber="";
string MaxColumns="";

DataTable Cols;
#>
<FileFormats>
<# foreach (var table in tables) { #>
<FlatFileFormat FlatFileType="Delimited" ColumnNamesInFirstDataRow="true" RowDelimiter="CRLF" Name="<#=table.Name#>">
<Columns>
<# Cols = ExternalDataAccess.GetDataTable(CnSrcDB,"WITH Schema_CTE (TableName, ColumnName, BimlType, MaxLength, Precision, Scale, " +
"RowNumber) AS (SELECT OBJECT_NAME(object_id), a.name, c.BimlType, a.max_length, a.precision, a.scale, " +
"ROW_NUMBER () OVER(PARTITION BY OBJECT_NAME(object_id) ORDER BY a.column_id) RowNum FROM sys.columns a " +
"INNER JOIN sys.types b ON a.system_type_id = b.system_type_id LEFT OUTER JOIN (VALUES ('nvarchar','String'), " +
"('nchar','String'), ('varchar','AnsiString'), ('char','AnsiString'), ('int','Int32'),('smallint','Int16'), " +
"('tinyint','Int16'),('bigint','Int64'),('datetime','DateTime'),('money','Currency'), ('float','Double'), " +
"('decimal','Decimal'), ('uniqueidentifier','GUID'), ('date','Date'), ('time','Time'), ('binary','Binary'), " +
"('datetime2','DateTime2'), ('datetimeoffset','DateTimeOffset'), ('smalldatetime','DateTime'), ('real','Double'), " +
"('bit','Boolean'), ('numeric','Decimal'), ('smallmoney','Currency'), ('sql_variant','String'), ('hierarchyid','String'), " +
"('geometry','String'), ('geography','String'), ('varbinary','Binary'), ('timestamp','String'), ('xml','XML')) " +
"AS c(SQLType,BimlType) ON b.name = c.SQLType WHERE OBJECT_SCHEMA_NAME(object_id) = '"+SchemaName+"' AND b.name != 'sysname' " +
"AND a.Name NOT IN ("+ExcludeCols+") AND OBJECT_NAME(object_id) = '"+table.Name+"') SELECT ColumnName, BimlType, " +
"CASE WHEN BimlType != 'String' THEN 0 ELSE MaxLength END MaxLength, CASE WHEN BimlType != 'Decimal' THEN 0 ELSE Precision END " +
"Precision, Scale, RowNumber, MAX(RowNumber) OVER(PARTITION BY TableName) MaxColumn FROM Schema_CTE");
foreach (DataRow Row in Cols.Rows){
ColumnName=Row[0].ToString();
DataType=Row[1].ToString();
MaxLength=Row[2].ToString();
Precision=Row[3].ToString();
Scale=Row[4].ToString();
RowNumber=Row[5].ToString();
MaxColumns=Row[6].ToString();
ColumnDelimeter = ";";
if (RowNumber == MaxColumns){ ColumnDelimeter = "CRLF"; } ;
#>
<Column Name="<#=ColumnName#>" ColumnType="Delimited" DataType= "<#=DataType#>" Delimiter="<#=ColumnDelimeter#>" Length="<#=MaxLength#>" Precision="<#=Precision#>" Scale="<#=Scale#>"></Column>
<# } #>
</Columns>
</FlatFileFormat>
<#} #>
</FileFormats>
<Connections>
<OleDbConnection
Name="CnOleDBAdventureWorks2012"
ConnectionString="Data Source=(local);Initial Catalog=AdventureWorks;Provider=SQLNCLI10.1;Integrated Security=SSPI;Connect Timeout=30;"
RetainSameConnection="true">
</OleDbConnection>
<# foreach (var table in tables) { #>
<FlatFileConnection Name="<#=table.Name#>"
FileFormat="<#=table.Name#>"
FilePath="C:\\<#=table.Name#>.csv"></FlatFileConnection>
<# }#>
</Connections>
<Packages>
<# foreach (var table in tables) { #>
<Package Name="<#=table.Name#>" ConstraintMode="Linear" FailParentOnFailure="false">
<Tasks>
<Dataflow Name="<#=table.Name#>">
<Transformations>
<FlatFileSource Name="<#=table.Name#>"
ConnectionName="<#=table.Name#>"></FlatFileSource>
</Transformations>
</Dataflow>
</Tasks>
</Package>

<# } #>

</Packages>
</Biml>

Submitted by Michael Hotek.