Saving your tweets in a database using sqlite, rhino, scribe, javascript
In the current post, I 'll describe a simple method to save your tweets in a sqlite database using Mozilla Rhino.
Prerequisites
- sqlite
- Apache Rhino. I think it should be de-facto available when the java developer toolkit (JDK) is installed
- Scribe, the simple OAuth library for Java . It also requires Apache codec
The config.js file
Open an account on https://dev.twitter.com/ and create an App to receive an API-key and an API-secret.Create the following file 'config.js' filled with the correct parameters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var config={ | |
screen_name:"__YOUR_SCREEN_NAME__", /* your twitter screen name */ | |
max_return:20, /* number of tweets to return for each query */ | |
wait_minutes:5, /* time to wait between two queries */ | |
jdbc: { | |
uri:"jdbc:sqlite:tweets.sqlite" /* path the the sqlite database */ | |
}, | |
api: { /* given by https://dev.twitter.com/apps/new */ | |
key:"__YOUR_KEY__", | |
secret:"__YOUR_SECRET__" | |
} | |
}; |
The javascript
The following javascript file opens a Oauth connection, retrieves the tweets and stores them into sqlite. I've commented the code, I hope it is clear enough.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: | |
* Pierre Lindenbaum PhD. | |
* http://plindenbaum.blogspot.com | |
* @yokofakun | |
* Date: | |
* 2012-11-02 | |
* Motivation: | |
* store my tweets in a sqlite database using rhino, scribe, javascript, sqlite | |
*/ | |
importPackage(org.scribe.builder); | |
importPackage(org.scribe.builder.api); | |
importPackage(org.scribe.model); | |
importPackage(org.scribe.oauth); | |
importClass(java.io.PrintWriter); | |
importClass(java.io.File); | |
importPackage(java.sql); | |
/* get the JDBC driver for SQLite */ | |
java.lang.Class.forName("org.sqlite.JDBC") | |
/* load the configuration file */ | |
load("config.js"); | |
/** loop forever */ | |
for(;;) | |
{ | |
var tokens={}; | |
try { | |
/* is there a previously saved token here ?, try to load it */ | |
var content=readFile("tokens.js"); | |
tokens=eval(content); | |
} | |
catch(e) | |
{ | |
} | |
/* build a OAuth-Service */ | |
var service = new ServiceBuilder() | |
.provider(TwitterApi) | |
.apiKey(config.api.key) | |
.apiSecret(config.api.secret) | |
.build(); | |
var accessToken=null; | |
/* no OAuth token already defined ? */ | |
if(!(tokens && tokens.token && tokens.secret)) | |
{ | |
/* get authorization Url */ | |
var requestToken = service.getRequestToken(); | |
var authorizationUrl = service.getAuthorizationUrl(requestToken); | |
print("GO TO : "+authorizationUrl + " and paste the PIN"); | |
/* open a GUI dialog, ask the PIN */ | |
var pin=Packages.javax.swing.JOptionPane.showInputDialog(authorizationUrl); | |
if(pin==null || pin=="") java.lang.System.exit(-1); | |
var verifier = new Verifier(pin); | |
accessToken = service.getAccessToken(requestToken, verifier); | |
/* ok this is our new token object */ | |
tokens= { | |
token: ""+accessToken.token, | |
secret: ""+accessToken.secret, | |
rawResponse: ""+accessToken.rawResponse | |
}; | |
/* serialize it into the file "token.js" */ | |
var writer=new PrintWriter("tokens.js"); | |
writer.println(tokens.toSource()); | |
writer.flush(); | |
writer.close(); | |
} | |
else | |
{ | |
/* use the previously stored token object to get the access-token */ | |
accessToken=new Token(tokens.token,tokens.secret,tokens.rawResponse); | |
} | |
/* open a connection to SQL lite */ | |
var conn = DriverManager.getConnection(config.jdbc.uri); | |
/* create a simple table to store the tweets */ | |
conn.createStatement().executeUpdate("create table if not exists tweet(id TEXT not null unique,content TEXT not null);"); | |
/* build the OAuthRequest */ | |
request = new OAuthRequest(Verb.GET, "https://api.twitter.com/1.1/statuses/user_timeline.json"); | |
request.addQuerystringParameter("screen_name", config.screen_name); | |
request.addQuerystringParameter("count", ""+config.max_return); | |
service.signRequest(accessToken, request); | |
/* send the request and parse the response */ | |
var response = request.send(); | |
var tweets=eval("("+response.body+")"); | |
/* create JDBC statement to store the tweets */ | |
var pstmt = conn.prepareStatement("insert or ignore into tweet (id,content) values (?,?);"); | |
/* loop over the tweets */ | |
for(var i=0;i< tweets.length;++i) | |
{ | |
var tweet=tweets[i]; | |
if(!(tweet.id_str)) continue; | |
/* discard tweet.user if it is 'me' */ | |
if(tweet.user.screen_name==config.screen_name) delete tweet.user; | |
/* fill the JDBC statement */ | |
pstmt.setString(1,tweet.id_str); | |
pstmt.setString(2,tweet.toSource()); | |
/* and insert it */ | |
if(pstmt.executeUpdate()==1) | |
{ | |
print("Inserted "+tweet.toSource()); | |
} | |
} | |
/* close JDBC */ | |
pstmt.close(); | |
conn.close(); | |
/* wait ... */ | |
print("Sleep..."+config.wait_minutes+" minutes..."); | |
java.lang.Thread.sleep(1000*60*config.wait_minutes); | |
} | |
Running the script using Rhino
scribe.libs=/path/to/scribe-1.3.2.jar:/path/to/commons-codec.jar
rhino.libs=/usr/share/java/js.jar:/usr/share/java/jline.jar
sqlite.libs=/path/to/sqlitejdbc-v056.jar
CLASSPATH=${rhino.libs}:${scribe.libs}:${sqlite.libs}
java -cp ${CLASSPATH} org.mozilla.javascript.tools.shell.Main -f twitter2sqlite.js
At the first time, the user is asked to authorize the application to use the twitter APIAs a test, I wrote the following tweet:
wrote a tool to save my tweets: This is a test . ( #rhino, #jdbc, #sqlite, #scribe #javascript )
- Pierre Lindenbaum (@yokofakun) November 2, 2012
... and the tweet was later inserted in the database...
Sleep...
Inserted ({created_at:"Fri Nov 02 20:29:04 +0000 2012", id:264464160664981500, id_str:"264464160664981504", text:"wrote a tool to save my tweets: This is a test . ( #rhino, #jdbc, #sqlite, #scribe #javascript )", source:"web", truncated:false, in_reply_to_status_id:null, in_reply_to_status_id_str:null, in_reply_to_user_id:null, in_reply_to_user_id_str:null, in_reply_to_screen_name:null, geo:null, coordinates:null, place:null, contributors:null, retweet_count:0, entities:{hashtags:[{text:"rhino", indices:[51, 57]}, {text:"jdbc", indices:[59, 64]}, {text:"sqlite", indices:[66, 73]}, {text:"scribe", indices:[75, 82]}, {text:"javascript", indices:[83, 94]}], urls:[], user_mentions:[]}, favorited:false, retweeted:false})
Sleep...
Sleep...
Sleep...
Later, the tweets can be extracted using the sqlite command line:
$ sqlite3 tweets.sqlite 'select * from tweet'
264464160664981504|({created_at:"Fri Nov 02 20:29:04 +0000 2012", id:264464160664981500, id_str:"264464160664981504", text:"wrote a tool to save my tweets: This
264421310841638913|({created_at:"Fri Nov 02 17:38:47 +0000 2012", id:264421310841638900, id_str:"264421310841638913", text:"The tools for recalibration have cha
264264932097400832|({created_at:"Fri Nov 02 07:17:24 +0000 2012", id:264264932097400830, id_str:"264264932097400832", text:"@warandpeace you're welcome. Your sh
264158323287416832|({created_at:"Fri Nov 02 00:13:46 +0000 2012", id:264158323287416830, id_str:"264158323287416832", text:"Drawing of the day November 1, 2012.
264142732174438400|({created_at:"Thu Nov 01 23:11:49 +0000 2012", id:264142732174438400, id_str:"264142732174438400", text:"[delicious] PLOS Collections: How th
264064117558624256|({created_at:"Thu Nov 01 17:59:26 +0000 2012", id:264064117558624260, id_str:"264064117558624256", text:"I've added a stupid basic dependency
264025607724204034|({created_at:"Thu Nov 01 15:26:24 +0000 2012", id:264025607724204030, id_str:"264025607724204034", text:"in the desert lab, checking my on-go
264013563704795136|({created_at:"Thu Nov 01 14:38:33 +0000 2012", id:264013563704795140, id_str:"264013563704795136", text:"Drawing of the day November 1, 2012.
263996436679630848|({created_at:"Thu Nov 01 13:30:29 +0000 2012", id:263996436679630850, id_str:"263996436679630848", text:"RT @RealistComics: he's tall, dark a
263966759210590208|({created_at:"Thu Nov 01 11:32:34 +0000 2012", id:263966759210590200, id_str:"263966759210590208", text:"RT @guermonprez: #Aubry Un avion nor
263946369847398402|({created_at:"Thu Nov 01 10:11:33 +0000 2012", id:263946369847398400, id_str:"263946369847398402", text:"[delicious] OVal: object validation
263946366919790593|({created_at:"Thu Nov 01 10:11:32 +0000 2012", id:263946366919790600, id_str:"263946366919790593", text:"[delicious] MyBatis #tweet: a first
263941020729896960|({created_at:"Thu Nov 01 09:50:17 +0000 2012", id:263941020729896960, id_str:"263941020729896960", text:"RT @josh_wills: I have never been pr
263938670187388928|({created_at:"Thu Nov 01 09:40:57 +0000 2012", id:263938670187388930, id_str:"263938670187388928", text:"RT @softmodeling @peterneubauer: Usi
263936362716200960|({created_at:"Thu Nov 01 09:31:47 +0000 2012", id:263936362716200960, id_str:"263936362716200960", text:"declined to review an article about
263934528186351616|({created_at:"Thu Nov 01 09:24:29 +0000 2012", id:263934528186351600, id_str:"263934528186351616", text:"@figshare Thanks, ( was http://t.co/
263815846139412480|({created_at:"Thu Nov 01 01:32:53 +0000 2012", id:263815846139412480, id_str:"263815846139412480", text:"Drawing of the day October 30, 2012.
263731855919026176|({created_at:"Wed Oct 31 19:59:09 +0000 2012", id:263731855919026180, id_str:"263731855919026176", text:"[delicious] An integrated map of gen
263726281647067136|({created_at:"Wed Oct 31 19:36:59 +0000 2012", id:263726281647067140, id_str:"263726281647067136", text:"RT @bryan_howie: 1000 Genomes paper
263695076516052992|({created_at:"Wed Oct 31 17:33:00 +0000 2012", id:263695076516053000, id_str:"263695076516052992", text:"\"Forget your Past\" ( abandoned Bul
That's it
Pierre
No comments:
Post a Comment