Reading an RSS Feed with a Script Component
Defines a 'ScriptComponentProject' named 'SC_RSS_READER' that uses 'System.Xml.XmlTextReader' to fetch an RSS feed at the URL stored in the 'User::RSSURI' package variable, walks the rss/channel/item nodes, and emits one output row per item with both channel-level and item-level fields. The accompanying package wires the script source into a dataflow with a partial-cache 'Lookup' against 'dbo.RSSFeedReader' on a SQL Server source connection, and the 'NoMatch' output is sent to an 'OleDbDestination' so only previously unseen items (matched on 'ItemGuid', 'ItemPubDate', and 'ItemTitle') are inserted.
The annotation block at the top of the Biml contains the 'CREATE TABLE' script for 'dbo.RSSFeedReader' that the package writes into. Adjust the 'OleDbConnection' string to point at your own database, change the default 'RSSURI' value if you want a different feed, and avoid polling more often than once per hour to prevent the upstream feed provider from rate-limiting your IP.
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Annotations>
<Annotation>
RSSFeedReader Table Create script:
CREATE TABLE [dbo].[RSSFeedReader](
[ChannelTitle] [nvarchar](255) NULL,
[ChannelDescription] [nvarchar](2048) NULL,
[ChannelLink] [nvarchar](255) NULL,
[ChannelLanguage] [nvarchar](255) NULL,
[ItemDescription] [nvarchar](2048) NULL,
[ItemGuid] [nvarchar](128) NOT NULL,
[ItemTitle] [nvarchar](255) NOT NULL,
[ItemPubDate] [nvarchar](50) NOT NULL,
[ItemLink] [nvarchar](255) NULL,
[RSSURI] [nvarchar](255) NULL,
CONSTRAINT [PK_RSSFeedReader] PRIMARY KEY CLUSTERED
(
[ItemGuid] ASC,
[ItemTitle] ASC,
[ItemPubDate] ASC
)
)
</Annotation>
</Annotations>
<Connections>
<OleDbConnection
Name="CnOleDBAdventureWorks2012"
ConnectionString="Data Source=.\MSSQL2012;Initial Catalog=AdventureWorks2012;Provider=SQLNCLI10.1;Integrated Security=SSPI;Connect Timeout=30;"
RetainSameConnection="true">
</OleDbConnection>
</Connections>
<ScriptProjects>
<ScriptComponentProject Name="SC_RSS_READER">
<AssemblyReferences>
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSPipelineWrap" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSRuntimeWrap" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.PipelineHost" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.TxScript" />
<AssemblyReference AssemblyPath="System.dll" />
<AssemblyReference AssemblyPath="System.AddIn.dll" />
<AssemblyReference AssemblyPath="System.Data.dll" />
<AssemblyReference AssemblyPath="System.Xml.dll" />
</AssemblyReferences>
<OutputBuffers>
<OutputBuffer Name="Output0" IsSynchronous ="false">
<Annotations>
<Annotation>
IsSynchronous="false" ==>
When there are no non-synchronus buffers,
the compiler does not emit the CreateNewOutputRows
virtual base method (to match the BIDS/SSDT behavior)
</Annotation>
</Annotations>
<Columns>
<Column Name="ChannelTitle" DataType="String" Length="255"></Column>
<Column Name="ChannelDescription" DataType="String" Length="2048"></Column>
<Column Name="ChannelLink" DataType="String" Length="255"></Column>
<Column Name="ChannelLanguage" DataType="String" Length="255"></Column>
<Column Name="ItemDescription" DataType="String" Length="2048"></Column>
<Column Name="ItemGuid" DataType="String" Length="128"></Column>
<Column Name="ItemTitle" DataType="String" Length="255"></Column>
<Column Name="ItemPubDate" DataType="String" Length="50"></Column>
<Column Name="ItemLink" DataType="String" Length="255"></Column>
<Column Name="RSSURI" DataType="String" Length="255"></Column>
</Columns>
</OutputBuffer>
</OutputBuffers>
<ReadOnlyVariables>
<Variable Namespace="User" VariableName="RSSURI" DataType="String"></Variable>
</ReadOnlyVariables>
<Files>
<File Path="AssemblyInfo.cs">
using System.Reflection;
using System.Runtime.CompilerServices;
[assembly: AssemblyTitle("SC_RSS_READER")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("SC_RSS_READER")]
[assembly: AssemblyCopyright("Copyright @ 2012")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: AssemblyVersion("1.0.*")]
</File>
<File Path="main.cs">
<![CDATA[
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Xml;
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
public override void CreateNewOutputRows()
{
XmlTextReader rssReader;
XmlDocument rssDoc;
XmlNode nodeRss;
XmlNode nodeChannel;
XmlNode nodeItem;
string RSSURI = this.Variables.RSSURI.ToString();
rssReader = new XmlTextReader(RSSURI);
rssDoc = new XmlDocument();
rssDoc.Load(rssReader);
for (int i = 0; i < rssDoc.ChildNodes.Count; i++)
{
if (rssDoc.ChildNodes[i].Name == "rss")
{
nodeRss = rssDoc.ChildNodes[i];
for (int j = 0; j < nodeRss.ChildNodes.Count; j++)
{
if (nodeRss.ChildNodes[j].Name == "channel")
{
nodeChannel = nodeRss.ChildNodes[j];
for (int k = 0; k < nodeChannel.ChildNodes.Count; k++)
{
if (nodeChannel.ChildNodes[k].Name == "item")
{
nodeItem = nodeChannel.ChildNodes[k];
Output0Buffer.AddRow();
Output0Buffer.ChannelTitle = nodeChannel["title"].InnerText;
Output0Buffer.ChannelDescription = nodeChannel["description"].InnerText;
Output0Buffer.ChannelLink = nodeChannel["link"].InnerText;
Output0Buffer.ChannelLanguage = nodeChannel["language"].InnerText;
Output0Buffer.ItemDescription = nodeItem["description"].InnerText;
Output0Buffer.ItemGuid = nodeItem["guid"].InnerText;
Output0Buffer.ItemTitle = nodeItem["title"].InnerText;
Output0Buffer.ItemPubDate = nodeItem["pubDate"].InnerText;
Output0Buffer.ItemLink = nodeItem["link"].InnerText;
Output0Buffer.RSSURI = RSSURI;
}
}
}
}
}
}
}
}
]]>
</File>
</Files>
</ScriptComponentProject>
</ScriptProjects>
<Packages>
<Package Name ="RSS.Feed.Reader" ConstraintMode="Linear">
<Variables>
<Variable Name="RSSURI" DataType="String">http://rss.cnn.com/rss/edition.rss</Variable>
</Variables>
<Tasks>
<Dataflow Name ="DFT Read RSS Feed" >
<Transformations>
<ScriptComponentSource Name="SC RSS Feed Reader">
<ScriptComponentProjectReference ScriptComponentProjectName="SC_RSS_READER"></ScriptComponentProjectReference>
</ScriptComponentSource>
<Lookup Name="LKP Existing ItemGuids"
NoMatchBehavior="RedirectRowsToNoMatchOutput"
OleDbConnectionName="CnOleDBAdventureWorks2012"
CacheMode="Partial">
<ExternalTableInput Table="[dbo].[RSSFeedReader]"></ExternalTableInput>
<Inputs>
<Column SourceColumn="ItemGuid" TargetColumn="ItemGuid"></Column>
<Column SourceColumn="ItemPubDate" TargetColumn="ItemPubDate"></Column>
<Column SourceColumn="ItemTitle" TargetColumn="ItemTitle"></Column>
</Inputs>
</Lookup>
<OleDbDestination Name="ODD dbo RSSFeedReader" ConnectionName="CnOleDBAdventureWorks2012">
<InputPath OutputPathName="LKP Existing ItemGuids.NoMatch"></InputPath>
<ExternalTableOutput Table="[dbo].[RSSFeedReader]"></ExternalTableOutput>
</OleDbDestination>
</Transformations>
</Dataflow>
</Tasks>
</Package>
</Packages>
</Biml>
Submitted by John Minkjan.