24 September 2010

Connecting to a MongoDB database from R using the C API for MongoDB

Today, Neil posted an article titled" Connecting to a MongoDB database from R using Java". In the current post, I'll show how to use the C API for MongoDB to fetch some MongoDB data from R. The code will be somehow similar to my previous post "A stateful C function for R: parsing Fasta sequences".

OK, First, let's add a few values in mongo:

for(i=1;i> 20;++i)
db.dbsnps.save({_id:"rs"+i,name:"rs"+i});


The C code contains 3 functions.

The first function mongoRconnect connects to the MongoDB server and put the pointer into a R variable.
SEXP mongoRconnect()
{
mongo_connection* conn; /* ptr */
mongo_connection_options opts[1];
mongo_conn_return status;

conn=(mongo_connection*)malloc(sizeof(mongo_connection));
strcpy( opts->host , "127.0.0.1" );
opts->port = 27017;
status = mongo_connect( conn, opts );

return R_MakeExternalPtr(conn, R_NilValue, R_NilValue);
}
The second method mongoRdiconnect closes the connection:
SEXP mongoRdiconnect(SEXP r_handle)
{
mongo_connection* conn;
conn = (mongo_connection*)R_ExternalPtrAddr(r_handle);;
if(conn==NULL) MONGO_ERROR("conn==NULL");
mongo_destroy( conn );
free(conn);
R_ClearExternalPtr(r_handle);
return ScalarInteger(0);
}
The last method mongoRquery scans the database test.dbsnps and inserts the name of the snps into an R array:
SEXP mongoRquery(SEXP r_handle)
{
SEXP values=NULL;
mongo_cursor *cursor;
bson empty[1];
bson_empty( empty );
int i;
mongo_connection* conn = R_ExternalPtrAddr(r_handle);
SEXP* array=NULL;
int array_size=0;
//the R value contains two objects


if(conn==NULL) MONGO_ERROR("handle==NULL");
cursor = mongo_find( conn,
"test.dbsnps",/* ns */
empty,/* fields */
empty,/* return */
0,/* return */
0,/* skip */
0 /* options */
);

while( mongo_cursor_next( cursor ) )
{
bson_iterator it[1];
if ( bson_find( it, &(cursor->current), "name" ))
{
array=(SEXP*)realloc(array,(array_size+1)*sizeof(SEXP));
if(array==NULL) error("out of memory");
array[array_size]=mkChar( bson_iterator_string( it ));
array_size++;
}
}
mongo_cursor_destroy( cursor );
PROTECT(values = allocVector(STRSXP, array_size));
for(i=0;i< array_size;++i)
{
SET_STRING_ELT(values, i, array[i]);
}
free(array);
UNPROTECT(1);
return values;
}
This C code is then be called from R:
mongo <- mongo.open()
mongo.snps(mongo)
mongo.close(mongo)

Result:
[1] "rs1" "rs2" "rs3" "rs4" "rs5" "rs6" "rs7" "rs8" "rs9" "rs10"
[11] "rs11" "rs12" "rs13" "rs14" "rs15" "rs16" "rs17" "rs18" "rs19"



Source code


Makefile

R_HOME=R-2.11.0
MONGO_HOME=mongo-c-driver
run:
gcc -fPIC -I -g -c -Wall -DMONGO_HAVE_STDINT -I ${R_HOME}/include -I ${MONGO_HOME}/src mongoR.c ${MONGO_HOME}/src/*.c
gcc -shared -Wl,-soname,rmongo.so.1 -o librmongo.so *.o
${R_HOME}/bin/R --no-save < mongo.R
clean:
rm *.o


mongoR.c

(again, I'm not sure about those PROTECT/UNPROTECT ...)
#include <ctype.h>
#include <errno.h>
#include <R.h>
#include <Rinternals.h>
#include <bson.h>
#include <mongo.h>

#define MONGO_ERROR(a) { error(a); fputs(a,stdout);exit(EXIT_FAILURE);}

/**
* connect to MONGO
*/
SEXP mongoRconnect()
{
mongo_connection* conn; /* ptr */
mongo_connection_options opts[1];
mongo_conn_return status;

conn=(mongo_connection*)malloc(sizeof(mongo_connection));
if(conn==NULL)
{
MONGO_ERROR("out of memory");
}

strcpy( opts->host , "127.0.0.1" );
opts->port = 27017;

status = mongo_connect( conn, opts );
if(status!= mongo_conn_success)
{
MONGO_ERROR("connection failed");
}

/** the handle is bound a R variable */
return R_MakeExternalPtr(conn, R_NilValue, R_NilValue);
}

/**
* close the mongo connection
*/
SEXP mongoRdiconnect(SEXP r_handle)
{
mongo_connection* conn;
conn = (mongo_connection*)R_ExternalPtrAddr(r_handle);;
if(conn==NULL) MONGO_ERROR("conn==NULL");
mongo_destroy( conn );
free(conn);
R_ClearExternalPtr(r_handle);
return ScalarInteger(0);
}

/**
* get all SNPS
*/
SEXP mongoRquery(SEXP r_handle)
{
SEXP values=NULL;
mongo_cursor *cursor;
bson empty[1];
bson_empty( empty );
int i;
mongo_connection* conn = R_ExternalPtrAddr(r_handle);
SEXP* array=NULL;
int array_size=0;
//the R value contains two objects


if(conn==NULL) MONGO_ERROR("handle==NULL");
cursor = mongo_find( conn,
"test.dbsnps",/* ns */
empty,/* fields */
empty,/* return */
0,/* return */
0,/* skip */
0 /* options */
);

while( mongo_cursor_next( cursor ) )
{
bson_iterator it[1];
if ( bson_find( it, &(cursor->current), "name" ))
{
array=(SEXP*)realloc(array,(array_size+1)*sizeof(SEXP));
if(array==NULL) error("out of memory");
array[array_size]=mkChar( bson_iterator_string( it ));
array_size++;
}
}
mongo_cursor_destroy( cursor );
PROTECT(values = allocVector(STRSXP, array_size));
for(i=0;i< array_size;++i)
{
SET_STRING_ELT(values, i, array[i]);
}
free(array);
UNPROTECT(1);
return values;
}


mongo.R

dyn.load(paste("librmongo", .Platform$dynlib.ext, sep=""))

mongo.open <- function()
{
.Call("mongoRconnect")
}

mongo.close <- function(handler)
{
.Call("mongoRdiconnect",handler)
}

mongo.snps <- function(handler)
{
.Call("mongoRquery",handler)
}

mongo <- mongo.open()
mongo.snps(mongo)
mongo.close(mongo)


That's it,
Pierre

1 comment: