Thursday, 18 December 2008

Archiving another Twitter user's timeline with Tweet-SQL

Here's a modified version of the script I created to archive your twitter timeline with Tweet-SQL. This script will allow you to archive another users timeline provided they haven't protected their updates. This script will be good for archiving 2000 statuses before you hit your api limit. In a future script I'll show how you can archive even larger timeline using the since_id and count in the @optional parameter.

First create a table to hold the users timeline...

CREATE TABLE TwitterArchive

(

      created_at DATETIME,

      id INT PRIMARY KEY,

      [text] NVARCHAR(140),

      source NVARCHAR(100),

      truncated NVARCHAR(5),

      in_reply_to_status_id INT,

      in_reply_to_user_id INT,

      favorited NVARCHAR(5),

      status_Id INT,

      statuses_Id INT                                                              

);



Just modify the value for @user_id_or_nick and you're good to go...


DECLARE @xml XML,

            @handle INT,

            @page INT,

            @optional NVARCHAR(10),

            @rowcount TINYINT,

            @user_id_or_nick NVARCHAR(30);


-- Set the users timeline to archive

SET @user_id_or_nick = 'lost_in_bangkok';

-- Starting page

SET @page = 1;

-- Turn off resultsets from Tweet-SQL

EXEC dbo.tweet_cfg_resultset_send 0;

-- Set the optional parameter to request page 1

SET @optional = '?page=' + CAST(@page AS NVARCHAR(4));

-- Get page 1 of your timeline

EXEC dbo.tweet_sts_user_timeline @user_id_or_nick, @optional, @xml OUTPUT;

SET @rowcount = 20;

WHILE (@rowcount = 20) -- While we still have results to deal with

BEGIN

      -- Prepare an xml document

      EXEC sp_xml_preparedocument @handle OUTPUT, @xml;

      -- Insert the page into a table

      INSERT INTO dbo.TwitterArchive

      (

            created_at,

            id,

            [text],

            source,

            truncated,

            in_reply_to_status_id,

            in_reply_to_user_id,

            favorited,

            status_Id,

            statuses_Id

      )

      SELECT  dbo.tweet_fnc_dateconvert(created_at),

                  id,

                  [text],

                  source,

                  truncated,

                  in_reply_to_status_id,

                  in_reply_to_user_id,

                  favorited,

                  status_Id,

                  statuses_Id

      FROM OPENXML(@handle, '/statuses/status', 2)

      WITH

      (

            created_at NVARCHAR(30),

            id INT,

            [text] NVARCHAR(140),

            source NVARCHAR(100),

            truncated NVARCHAR(5),

            in_reply_to_status_id INT,

            in_reply_to_user_id INT,

            favorited NVARCHAR(5),

            status_Id INT,

            statuses_Id INT

      );

      -- Get the rowcount, when < 20 then we have the last page

      SET @rowcount = @@ROWCOUNT;

      -- Which page have we done?

      PRINT 'Archived page ' + CAST(@page AS NVARCHAR(4));

      -- destroy the xml document

      EXEC sp_xml_removedocument @handle;

      -- Increment the page count

      SET @page = @page + 1;

      -- Setup the optional parameter

      SET @optional = '?page=' + CAST(@page AS NVARCHAR(4));

      -- Wait for a bit...

      WAITFOR DELAY '00:00:05';

      -- Get the next page

      EXEC dbo.tweet_sts_user_timeline @user_id_or_nick, @optional, @xml OUTPUT;

END

-- Turn resultsets from Tweet-SQL back on as appropriate

EXEC dbo.tweet_cfg_resultset_send 1;

No comments: