Skip to main content

Reading a Twitter Feed with a Script Component

Defines a 'ScriptComponentProject' named 'SCS_Twitter_Feed' whose 'main.cs' calls 'search.twitter.com/search.atom' for the hashtag in the 'User::SearchTerm' variable, parses the returned Atom entries with 'System.Xml.XmlDocument', and emits up to 100 rows per run (the API page size). An Execute SQL task reads the highest 'TweetNumber' already in 'dbo.TwitterLog' for the given search term and passes it as 'since_id' in the URL, so each run only fetches tweets newer than the last load. The dataflow tags every row with the current search term via a 'DerivedColumns' transformation and inserts the result into 'dbo.TwitterLog' via an 'OleDbDestination'.

The 'CREATE TABLE' script for 'dbo.TwitterLog' is embedded in the 'Annotations' block at the top of the Biml. Note that the public Twitter Search Atom API used by this snippet was retired by Twitter and is no longer reachable; the script is preserved as a worked example of a non-synchronous 'ScriptComponentSource' that fetches data from an HTTP endpoint and combines it with an Execute SQL high-water mark. Adjust the 'OleDbConnection' to your own database before running.

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Annotations>
<Annotation>
Twitter Log Table Script:

CREATE TABLE [dbo].[TwitterLog](
[TweetNumber] [bigint] NULL,
[TweetID] [nvarchar](128) NULL,
[PublishedDateTime] [nvarchar](50) NULL,
[UpdatedDateTime] [nvarchar](50) NULL,
[TweetContent] [nvarchar](2048) NULL,
[TweetTitle] [nvarchar](2048) NULL,
[TweetURI] [nvarchar](2048) NULL,
[TweeterName] [nvarchar](2048) NULL,
[TweetLanguage] [nvarchar](128) NULL,
[TweetSource] [nvarchar](128) NULL,
[TweetResultType] [nvarchar](128) NULL,
[TweetGeoInfo] [nvarchar](128) NULL,
[TweeterImageLink] [nvarchar](128) NULL,
[TweetLink] [nvarchar](128) NULL,
[SearchTerm] [nvarchar](128) NULL
)
</Annotation>
</Annotations>
<Connections>
<OleDbConnection
Name="CnOleDBAdventureWorks2012"
ConnectionString="Data Source=.\MSSQL2012;Initial Catalog=AdventureWorks2012;Provider=SQLNCLI10.1;Integrated Security=SSPI;Connect Timeout=30;"/>
</Connections>
<ScriptProjects>
<ScriptComponentProject Name ="SCS_Twitter_Feed">
<AssemblyReferences>
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSPipelineWrap" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSRuntimeWrap" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.PipelineHost" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.TxScript" />
<AssemblyReference AssemblyPath="System.dll" />
<AssemblyReference AssemblyPath="System.AddIn.dll" />
<AssemblyReference AssemblyPath="System.Data.dll" />
<AssemblyReference AssemblyPath="System.Xml.dll" />
</AssemblyReferences>
<OutputBuffers>
<OutputBuffer Name="Output0" IsSynchronous="false">
<Annotations>
<Annotation>
IsSynchronous="false" ==>
When there are no non-synchronus buffers,
the compiler does not emit the CreateNewOutputRows
virtual base method (to match the BIDS/SSDT behavior)
</Annotation>
</Annotations>
<Columns>
<Column Name="TweetNumber" DataType ="Int64"></Column>
<Column Name="TweetID" DataType="String" Length="128"></Column>
<Column Name="PublishedDateTime" DataType="String" Length="50"></Column>
<Column Name="UpdatedDateTime" DataType="String" Length="50"></Column>
<Column Name="TweetContent" DataType="String" Length="2048"></Column>
<Column Name="TweetTitle" DataType="String" Length="2048"></Column>
<Column Name="TweetURI" DataType="String" Length="2048"></Column>
<Column Name="TweeterName" DataType="String" Length="2048"></Column>
<Column Name="TweetLanguage" DataType="String" Length="128"></Column>
<Column Name="TweetSource" DataType="String" Length="128"></Column>
<Column Name="TweetResultType" DataType="String" Length="128"></Column>
<Column Name="TweetGeoInfo" DataType="String" Length="128"></Column>
<Column Name="TweeterImageLink" DataType="String" Length="128"></Column>
<Column Name="TweetLink" DataType="String" Length="128"></Column>
</Columns>
</OutputBuffer>
</OutputBuffers>
<ReadOnlyVariables>
<Variable Namespace="User" VariableName="LastTweetNumber" DataType="Int64"></Variable>
<Variable Namespace="User" VariableName="SearchTerm" DataType="String"></Variable>
</ReadOnlyVariables>
<Files>
<File Path="AssemblyInfo.cs">
using System.Reflection;
using System.Runtime.CompilerServices;

[assembly: AssemblyTitle("SCS_Twitter_Feed")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("SCS_Twitter_Feed")]
[assembly: AssemblyCopyright("Copyright @ 2012")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

[assembly: AssemblyVersion("1.0.*")]

</File>
<File Path="main.cs">


using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Xml;
using System.Web;
using System.Net;



[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{

public override void CreateNewOutputRows()
{
System.Xml.XmlDocument xml_doc = new System.Xml.XmlDocument();
string TwitterUri = "http://search.twitter.com/search.atom?q=%23"
+ this.Variables.SearchTerm
+ <![CDATA["&rpp=100&result_type=recent&since_id="]]>
+ this.Variables.LastTweetNumber ;
xml_doc = GetResponse(TwitterUri);

XmlNodeList child_nodes = xml_doc.GetElementsByTagName("entry");
string[] temp;

foreach (XmlNode child in child_nodes)
{

Output0Buffer.AddRow();
Output0Buffer.TweetID = child.ChildNodes.Item(0).InnerText;
temp = child.ChildNodes.Item(0).InnerText.Split(':');
Output0Buffer.TweetNumber = Convert.ToInt64(temp[2]);
Output0Buffer.PublishedDateTime= child.ChildNodes.Item(1).InnerText;
Output0Buffer.TweetLink = child.ChildNodes.Item(2).Attributes["href"].Value.ToString();
Output0Buffer.TweetTitle = child.ChildNodes.Item(3).InnerText;
Output0Buffer.TweetContent =child.ChildNodes.Item(4).InnerText;
Output0Buffer.UpdatedDateTime = child.ChildNodes.Item(5).InnerText;
Output0Buffer.TweeterImageLink = child.ChildNodes.Item(6).Attributes["href"].Value.ToString();
Output0Buffer.TweetGeoInfo = child.ChildNodes.Item(7).InnerText;
Output0Buffer.TweetResultType = child.ChildNodes.Item(8).ChildNodes.Item(0).InnerText;
Output0Buffer.TweetSource = child.ChildNodes.Item(9).InnerText;
Output0Buffer.TweetLanguage= child.ChildNodes.Item(10).InnerText;
Output0Buffer.TweeterName = child.ChildNodes.Item(11).ChildNodes.Item(0).InnerText;
Output0Buffer.TweetURI = child.ChildNodes.Item(11).ChildNodes.Item(1).InnerText;
}


}

public XmlDocument GetResponse(string uri)
{

XmlDocument doc = new XmlDocument();

WebRequest myRequest = WebRequest.Create(new Uri(uri));
IWebProxy proxy = myRequest.Proxy;
if (proxy != null)
{
proxy.GetProxy(myRequest.RequestUri);
}


doc.Load(myRequest.GetResponse().GetResponseStream());

return doc;


}
}

</File>
</Files>

</ScriptComponentProject>
</ScriptProjects>


<Packages>
<Package Name ="Twitter.Reader" ConstraintMode="Linear">
<Variables>
<Variable Name ="LastTweetNumber" DataType="Int64" >0</Variable>
<Variable Name="SearchTerm" DataType="String">SSIS</Variable>
</Variables>
<Tasks>
<ExecuteSQL Name ="EST Get Last TweetNumber" ConnectionName="CnOleDBAdventureWorks2012" ResultSet="SingleRow">
<DirectInput> SELECT isnull(max([TweetNumber]),0) FROM [dbo].[TwitterLog] where [SearchTerm] =? </DirectInput>
<Parameters>
<Parameter Name="0" DataType="String" Length="128" Direction ="Input" VariableName ="User.SearchTerm"></Parameter>
</Parameters>
<Results>
<Result Name="0" VariableName="User.LastTweetNumber"></Result>
</Results>
</ExecuteSQL>
<Dataflow Name ="DFT Get Tweets">
<Transformations>
<ScriptComponentSource Name="SCS Twitter Feed">
<ScriptComponentProjectReference ScriptComponentProjectName="SCS_Twitter_Feed"></ScriptComponentProjectReference>
</ScriptComponentSource>
<DerivedColumns Name="DC SearchTerm">
<Columns>
<Column Name="SearchTerm" DataType ="String" Length ="128">@[User::SearchTerm]</Column>
</Columns>

</DerivedColumns>
<OleDbDestination Name="ODD dbo TwitterLog" ConnectionName="CnOleDBAdventureWorks2012">
<ExternalTableOutput Table="[dbo].[TwitterLog]"></ExternalTableOutput>
</OleDbDestination>
</Transformations>

</Dataflow>
</Tasks>
</Package>
</Packages>
</Biml>

Submitted by John Minkjan.