Showing posts with label rest. Show all posts
Showing posts with label rest. Show all posts

20 May 2014

A nodejs-based REST server for the UCSC @GenomeBrowser



Node.js provides a simple mechanism to write a REST server. As an exercise, I wrote a REST server for the mysql server of the UCSC genome bowser. The code is available on github at:


Starting the server


$ cd bionode
$ node ucsc/ucsc.js
Server running at http://localhost:8080/

METHOD: /schema/databases



Lists the available databases :e.g: http://localhost:8080/schemas/databases


[
"information_schema",
"ailMel1",
"allMis1",
"anoCar1",

(...)
"visiGene",
"xenTro1",
"xenTro2",
"xenTro3"
]


This method accepts a parameter callback for JSON-P : e.g: http://localhost:8080/schemas/databases?callback=handle


handle([
"information_schema",
"ailMel1",
"allMis1",
"anoCar1",
(...)
"visiGene",
"xenTro1",
"xenTro2",
"xenTro3"
]);


METHOD: /schema/:database/tables


Lists the available tables for a given database :e.g: http://localhost:8080/schemas/anoCar1/tables


[
"all_mrna",
"author",
"blastHg18KG",
"cds",
(...)
"xenoRefFlat",
"xenoRefGene",
"xenoRefSeqAli"
]

This method accepts a parameter callback for JSON-P : e.g: http://localhost:8080/schemas/anoCar1/tables?callback=handle


handle([
"all_mrna",
"author",
"blastHg18KG",
"cds",
"cell",
(...)
"xenoRefFlat",
"xenoRefGene",
"xenoRefSeqAli"
]);

METHOD: /schema/:database/:table


Returns a schema for the given database.table. E.g: http://localhost:8080/schemas/anoCar1/xenoMrna



{"database":"anoCar1","table":"xenoMrna","fields":[{"name":"bin","type":"smallint(5) unsigned","key":""},{"name":"matches","type":"int(10) unsigned","key":""},{"name":"misMatches","type":"int(10) unsigned","key":""},{"name":"repMatches","type":"int(10) unsigned","key":""},{"name":"nCount","type":"int(10) unsigned","key":""},{"name":"qNumInsert","type":"int(10) unsigned","key":""},{"name":"qBaseInsert","type":"int(10) unsigned","key":""},{"name":"tNumInsert","type":"int(10) unsigned","key":""},{"name":"tBaseInsert","type":"int(10) unsigned","key":""},{"name":"strand","type":"char(2)","key":""},{"name":"qName","type":"varchar(255)","key":"MUL"},{"name":"qSize","type":"int(10) unsigned","key":""},{"name":"qStart","type":"int(10) unsigned","key":""},{"name":"qEnd","type":"int(10) unsigned","key":""},{"name":"tName","type":"varchar(255)","key":"MUL"},{"name":"tSize","type":"int(10) unsigned","key":""},{"name":"tStart","type":"int(10) unsigned","key":""},{"name":"tEnd","type":"int(10) unsigned","key":""},{"name":"blockCount","type":"int(10) unsigned","key":""},{"name":"blockSizes","type":"longblob","key":""},{"name":"qStarts","type":"longblob","key":""},{"name":"tStarts","type":"longblob","key":""}]}


This method accepts a parameter callback for JSON-P : e.g: http://localhost:8080/schemas/anoCar1/xenoMrna?callback=handler


handler({"database":"anoCar1","table":"xenoMrna","fields":[{"name":"bin","type":"smallint(5) unsigned","key":""},{"name":"matches","type":"int(10) unsigned","key":""},{"name":"misMatches","type":"int(10) unsigned","key":""},{"name":"repMatches","type":"int(10) unsigned","key":""},{"name":"nCount","type":"int(10) unsigned","key":""},{"name":"qNumInsert","type":"int(10) unsigned","key":""},{"name":"qBaseInsert","type":"int(10) unsigned","key":""},{"name":"tNumInsert","type":"int(10) unsigned","key":""},{"name":"tBaseInsert","type":"int(10) unsigned","key":""},{"name":"strand","type":"char(2)","key":""},{"name":"qName","type":"varchar(255)","key":"MUL"},{"name":"qSize","type":"int(10) unsigned","key":""},{"name":"qStart","type":"int(10) unsigned","key":""},{"name":"qEnd","type":"int(10) unsigned","key":""},{"name":"tName","type":"varchar(255)","key":"MUL"},{"name":"tSize","type":"int(10) unsigned","key":""},{"name":"tStart","type":"int(10) unsigned","key":""},{"name":"tEnd","type":"int(10) unsigned","key":""},{"name":"blockCount","type":"int(10) unsigned","key":""},{"name":"blockSizes","type":"longblob","key":""},{"name":"qStarts","type":"longblob","key":""},{"name":"tStarts","type":"longblob","key":""}]});


METHOD: /ucsc/:database/:table/:column/:key


Fetch the rows for a given database.name having a :column==:key . The :column must be indexed. E.g: http://localhost:8080/ucsc/anoCar1/ensGene/name/ENSACAT00000004346


[
{"bin":592,"name":"ENSACAT00000004346","chrom":"scaffold_111","strand":"-","txStart":991522,"txEnd":996396,"cdsStart":991522,"cdsEnd":996396,"exonCount":3,"exonStarts":"991522,995669,995976,","exonEnds":"991954,995972,996396,","score":0,"name2":"PELO","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,0,0,"}
]

This method accepts a parameter callback for JSON-P : e.g: http://localhost:8080/ucsc/anoCar1/ensGene/name/ENSACAT00000004346?callback=handler


handler([
{"bin":592,"name":"ENSACAT00000004346","chrom":"scaffold_111","strand":"-","txStart":991522,"txEnd":996396,"cdsStart":991522,"cdsEnd":996396,"exonCount":3,"exonStarts":"991522,995669,995976,","exonEnds":"991954,995972,996396,","score":0,"name2":"PELO","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,0,0,"}
]);

METHOD: /ucsc/:database/:table?chrom=?&start=?&end=?




Fetch the rows for a given genomic database.name overlapping the given range. This method uses the UCSC-bin index if it is available; E.g: http://localhost:8080/ucsc/anoCar1/ensGene?chrom=scaffold_111&start=600000&end=900000


[
{"bin":589,"name":"ENSACAT00000003906","chrom":"scaffold_111","strand":"-","txStart":594783,"txEnd":614216,"cdsStart":595000,"cdsEnd":614201,"exonCount":9,"exonStarts":"594783,601291,601744,603640,604745,604865,609139,611740,614097,","exonEnds":"595105,601406,601813,603736,604771,604942,609173,611840,614216,","score":0,"name2":"DPM1","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,2,2,2,0,1,0,2,0,"},
{"bin":589,"name":"ENSACAT00000003908","chrom":"scaffold_111","strand":"+","txStart":614382,"txEnd":615600,"cdsStart":614382,"cdsEnd":615600,"exonCount":1,"exonStarts":"614382,","exonEnds":"615600,","score":0,"name2":"MOCS3","cdsStartStat":"incmpl","cdsEndStat":"cmpl","exonFrames":"0,"},
{"bin":589,"name":"ENSACAT00000003918","chrom":"scaffold_111","strand":"-","txStart":638920,"txEnd":642127,"cdsStart":638920,"cdsEnd":642127,"exonCount":2,"exonStarts":"638920,641368,","exonEnds":"639691,642127,","score":0,"name2":"KCNG1","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,0,"},
{"bin":591,"name":"ENSACAT00000003920","chrom":"scaffold_111","strand":"+","txStart":814576,"txEnd":826972,"cdsStart":814576,"cdsEnd":826972,"exonCount":3,"exonStarts":"814576,825125,826845,","exonEnds":"814594,825247,826972,","score":0,"name2":"ENSACAG00000003945","cdsStartStat":"incmpl","cdsEndStat":"cmpl","exonFrames":"0,0,2,"},
{"bin":591,"name":"ENSACAT00000004042","chrom":"scaffold_111","strand":"-","txStart":849731,"txEnd":881887,"cdsStart":849731,"cdsEnd":881887,"exonCount":24,"exonStarts":"849731,851343,855421,856165,857842,858090,861054,861943,862949,863773,865029,865639,867414,868216,872220,873601,874396,876850,877105,877711,878919,879681,881320,881738,","exonEnds":"849809,851460,855511,856279,857947,858201,861157,862027,863026,863866,865171,865722,867525,868368,872360,873738,874600,876994,877263,877850,878993,879847,881471,881887,","score":0,"name2":"ITGA2","cdsStartStat":"incmpl","cdsEndStat":"incmpl","exonFrames":"0,0,0,0,0,0,2,2,0,0,2,0,0,1,2,0,0,0,1,0,1,0,2,0,"},
{"bin":591,"name":"ENSACAT00000004050","chrom":"scaffold_111","strand":"-","txStart":883724,"txEnd":897808,"cdsStart":883724,"cdsEnd":897808,"exonCount":5,"exonStarts":"883724,885433,889264,889742,897701,","exonEnds":"883858,885548,889356,889852,897808,","score":0,"name2":"ENSACAG00000004086","cdsStartStat":"incmpl","cdsEndStat":"incmpl","exonFrames":"1,0,1,2,0,"}
]

This method accepts a parameter callback for JSON-P : e.g: http://localhost:8080/ucsc/anoCar1/ensGene?chrom=scaffold_111&start=600000&end=900000&callback=handler


handler([
{"bin":589,"name":"ENSACAT00000003906","chrom":"scaffold_111","strand":"-","txStart":594783,"txEnd":614216,"cdsStart":595000,"cdsEnd":614201,"exonCount":9,"exonStarts":"594783,601291,601744,603640,604745,604865,609139,611740,614097,","exonEnds":"595105,601406,601813,603736,604771,604942,609173,611840,614216,","score":0,"name2":"DPM1","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,2,2,2,0,1,0,2,0,"},
{"bin":589,"name":"ENSACAT00000003908","chrom":"scaffold_111","strand":"+","txStart":614382,"txEnd":615600,"cdsStart":614382,"cdsEnd":615600,"exonCount":1,"exonStarts":"614382,","exonEnds":"615600,","score":0,"name2":"MOCS3","cdsStartStat":"incmpl","cdsEndStat":"cmpl","exonFrames":"0,"},
{"bin":589,"name":"ENSACAT00000003918","chrom":"scaffold_111","strand":"-","txStart":638920,"txEnd":642127,"cdsStart":638920,"cdsEnd":642127,"exonCount":2,"exonStarts":"638920,641368,","exonEnds":"639691,642127,","score":0,"name2":"KCNG1","cdsStartStat":"cmpl","cdsEndStat":"cmpl","exonFrames":"0,0,"},
{"bin":591,"name":"ENSACAT00000003920","chrom":"scaffold_111","strand":"+","txStart":814576,"txEnd":826972,"cdsStart":814576,"cdsEnd":826972,"exonCount":3,"exonStarts":"814576,825125,826845,","exonEnds":"814594,825247,826972,","score":0,"name2":"ENSACAG00000003945","cdsStartStat":"incmpl","cdsEndStat":"cmpl","exonFrames":"0,0,2,"},
{"bin":591,"name":"ENSACAT00000004042","chrom":"scaffold_111","strand":"-","txStart":849731,"txEnd":881887,"cdsStart":849731,"cdsEnd":881887,"exonCount":24,"exonStarts":"849731,851343,855421,856165,857842,858090,861054,861943,862949,863773,865029,865639,867414,868216,872220,873601,874396,876850,877105,877711,878919,879681,881320,881738,","exonEnds":"849809,851460,855511,856279,857947,858201,861157,862027,863026,863866,865171,865722,867525,868368,872360,873738,874600,876994,877263,877850,878993,879847,881471,881887,","score":0,"name2":"ITGA2","cdsStartStat":"incmpl","cdsEndStat":"incmpl","exonFrames":"0,0,0,0,0,0,2,2,0,0,2,0,0,1,2,0,0,0,1,0,1,0,2,0,"},
{"bin":591,"name":"ENSACAT00000004050","chrom":"scaffold_111","strand":"-","txStart":883724,"txEnd":897808,"cdsStart":883724,"cdsEnd":897808,"exonCount":5,"exonStarts":"883724,885433,889264,889742,897701,","exonEnds":"883858,885548,889356,889852,897808,","score":0,"name2":"ENSACAG00000004086","cdsStartStat":"incmpl","cdsEndStat":"incmpl","exonFrames":"1,0,1,2,0,"}
]);


That's it,

Pierre

24 May 2013

A Tribble/FeatureCodec handling JSON-based annotations files.

I wrote a java FeatureCodec for JSON with a the tribble library.
Citing the GATK tream: "The Tribble project was started as an effort to overhaul our reference-ordered data system; we had many different formats that were shoehorned into a common framework that didn't really work as intended. What we wanted was a common framework that allowed for searching of reference ordered data, regardless of the underlying type. Jim Robinson had developed indexing schemes for text-based files, which was incorporated into the Tribble library."".

The library is available at:https://github.com/lindenb/jsontribble.


The library contains the tools to sort, index and query the json file.

As a proof of concept, I also created a REST-based service to query those files.

REST/JSON

For example http://localhost:8080/jsontribble/rest/tribble/resources/dbsnp/annotations.json?chrom=chr1&start=881826&end=981826 returns:
{"header":{"description":"UCSC  snp137: select count(*) from snp137 where FIND_IN_SET(func,\"missense\")>0 and avHet>0.1"}
,"features":[
{"chrom":"chr1","start":881826,"end":881827,"name":"rs112341375","score":0,"strand":"+","refNCBI":"G","refUCSC":"G","observed":"C/G","class":"single","valid":["by-frequency"],"avHet":0.5,"func":["missense"],"submitters":["BUSHMAN"]}
{"chrom":"chr1","start":897119,"end":897120,"name":"rs28530579","score":0,"strand":"+","refNCBI":"G","refUCSC":"G","observed":"C/G","class":"single","valid":["unknown"],"avHet":0.375,"func":["missense"],"submitters":["ABI","ENSEMBL","SSAHASNP"]}
{"chrom":"chr1","start":907739,"end":907740,"name":"rs112235940","score":0,"strand":"+","refNCBI":"G","refUCSC":"G","observed":"A/G","class":"single","valid":["unknown"],"avHet":0.5,"func":["missense"],"submitters":["COMPLETE_GENOMICS"]}
{"chrom":"chr1","start":949607,"end":949608,"name":"rs1921","score":0,"strand":"+","refNCBI":"G","refUCSC":"G","observed":"A/C/G","class":"single","valid":["by-cluster","by-frequency","by-1000genomes"],"avHet":0.464348,"func":["missense"],"submitters":["1000GENOMES","AFFY","BGI","BUSHMAN","CGAP-GAI","CLINSEQ_SNP","COMPLETE_GENOMICS","CORNELL","DEBNICK","EXOME_CHIP","GMI","HGSV","ILLUMINA","ILLUMINA-UK","KRIBB_YJKIM","LEE","MGC_GENOME_DIFF","NHLBI-ESP","SC_JCM","SC_SNP","SEATTLESEQ","SEQUENOM","UWGC","WIAF","YUSUKE"],"bitfields":["maf-5-some-pop","maf-5-all-pops"]}
]}

REST/XML

Example http://localhost:8080/jsontribble/rest/tribble/resources/dbsnp/annotations.xml?chrom=chr1&start=897119&end=981826
<?xml version="1.0" encoding="UTF-8"?>
<annotations chrom="chr1" start="897119" end="981826">
  <header>
    <description>UCSC  snp137: select count(*) from snp137 where FIND_IN_SET(func,"missense")&gt;0 and avHet&gt;0.1</description>
  </header>
  <features>
    <feature>
      <chrom>chr1</chrom>
      <start type="integer">897119</start>
      <end type="integer">897120</end>
      <name>rs28530579</name>
      <score type="integer">0</score>
      <strand>+</strand>
      <refNCBI>G</refNCBI>
      <refUCSC>G</refUCSC>
      <observed>C/G</observed>
      <class>single</class>
      <valid>

BED/text

Example: http://localhost:8080/jsontribble/rest/tribble/resources/merge/annotations.bed?chrom=chr1&start=897119&end=981826.
chr1    895966  901099  {"chrom":"chr1","start":895966,"end":901099,"strand":"+","name":"uc001aca.2","cds...
chr1    896828  897858  {"chrom":"chr1","start":896828,"end":897858,"strand":"+","name":"uc001acb.1","cds...
chr1    897008  897858  {"chrom":"chr1","start":897008,"end":897858,"strand":"+","name":"uc010nya.1","cds...
chr1    897119  897120  {"chrom":"chr1","start":897119,"end":897120,"name":"rs28530579","score":0,"strand...
chr1    897734  899229  {"chrom":"chr1","start":897734,"end":899229,"strand":"+","name":"uc010nyb.1","cds...
chr1    901876  910484  {"chrom":"chr1","start":901876,"end":910484,"strand":"+","name":"uc001acd.3","cds...
chr1    901876  910484  {"chrom":"chr1","start":901876,"end":910484,"strand":"+","name":"uc001ace.3","cds...
chr1    901876  910484  {"chrom":"chr1","start":901876,"end":910484,"strand":"+","name":"uc001acf.3","cds...
chr1    907739  907740  {"chrom":"chr1","start":907739,"end":907740,"name":"rs112235940","score":0,"stran...
chr1    910578  917473  {"chrom":"chr1","start":910578,"end":917473,"strand":"-","name":"uc001ach.2","cds...
chr1    934341  935552  {"chrom":"chr1","start":934341,"end":935552,"strand":"-","name":"uc001aci.2","cds...
chr1    934341  935552  {"chrom":"chr1","start":934341,"end":935552,"strand":"-","name":"uc010nyc.1","cds...
chr1    948846  949919  {"chrom":"chr1","start":948846,"end":949919,"strand":"+","name":"uc001acj.4","cds...
chr1    949607  949608  {"chrom":"chr1","start":949607,"end":949608,"name":"rs1921","score":0,"strand":"+...
chr1    955502  991499  {"chrom":"chr1","start":955502,"end":991499,"strand":"+","name":"uc001ack.2","cds...

27 September 2012

Playing with the #Ensembl REST API: filtering a VCF with javascript

The new Ensembl REST API was announced today: "We are pleased to announce the beta release of our programming language agnostic REST API, for Release 68 data, at http://beta.rest.ensembl. Our initial release provides access to:

  • Sequences (genomic, cDNA, CDS and protein)
  • VEP (Variant Effect Predictor)
  • Homologies
  • Gene Trees
  • Assembly and coordinate mapping."

In the current post I will filter a VCF file with this API and javascript.

The VCF

Our initial file is the following VCF:
##fileformat=VCFv4.0
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
#CHROM POS ID REF ALT QUAL FILTER INFO
1 10327 rs112750067 T C . PASS DP=65
1 69427 rs148502021 T A . PASS DP=1557
1 69452 rs142004627 A G . PASS DP=155
1 69475 rs148502021 C T . PASS DP=231
1 865583 rs148711625 A G . PASS DP=231
1 866460 rs148884928 A G . PASS DP=23
1 866461 . G A . PASS DP=24

The javascript

The VCF will be read on the standard input using the following script and the Rhino JS engine. The script reads the VCF and for each substitution, it calls the Ensembl REST API, parses the JSON response and return the transcript identifier if the mutation is a missense_variant or a polyphen_prediction="probably damaging":
importPackage(java.io);
importPackage(java.lang);



function sleep(milliseconds)
  {
  /* hacked from http://www.phpied.com/sleep-in-javascript/ */
  var start = new Date().getTime();
  for (var i = 0; i < 1e7; i++) {
    if ((new Date().getTime() - start) > milliseconds){
      break;
      }
    }
  }


function damagingTranscript(json)
 {
 for(var d in json.data)
  {
  var transcripts=json.data[d].transcripts;
 
  if(!transcripts) return null;
  for(var t in transcripts)
   {
   var transcript=transcripts[t];
   for(var a in transcript.alleles)
    {
    var allele=transcript.alleles[a];
    if(allele.polyphen_prediction=="probably damaging" ||
       allele.consequence_terms.indexOf("missense_variant")!=-1 )
        {
        return transcript.transcript_id;
        }
    }
   }
  }
 return null;
 }

var baseRegex=new RegExp(/^[ATGCatgc]$/);



var stdin = new java.io.BufferedReader( new java.io.InputStreamReader(java.lang.System['in']) );
var line;
while((line=stdin.readLine())!=null)
 {
 if(line.startsWith("#"))
  {
  print(line); continue;
  }
 var tokens=line.split("\t");
 var chrom=tokens[0];
 var pos= parseInt(tokens[1]);
 var ref= tokens[3];
 var alt= tokens[4];
 if(!baseRegex.test(ref)) continue;
 if(!baseRegex.test(alt)) continue;
 
 sleep(200);
 var url="http://beta.rest.ensembl.org/vep/human/"+
  chrom+":"+pos+"-"+pos+"/"+alt+
  "/consequences?content-type=application/json";
 var jsonStr=readUrl(url,"UTF-8");
 var json=eval("("+jsonStr+")");
 var transcript=damagingTranscript(json);
 if(transcript==null) continue;
 tokens[7]+=(";TRANSCRIPT="+transcript);
 print(tokens.join('\t'));
 }

As an example, here is the response from the ENSEMBL server for 1:866460 A/G:

http://beta.rest.ensembl.org/vep/human/1:866460-866460/G/consequences?content-type=application/json
{
    "data": [
        {
            "location": {
                "coord_system": "chromosome",
                "name": "1",
                "strand": 1,
                "end": "866460",
                "start": "866460"
            },
            "hgvs": {
                "G": "1:g.866460C>G"
            },
            "transcripts": [
                {
                    "translation_stable_id": "ENSP00000411579",
                    "intron_number": null,
                    "cdna_end": 385,
                    "translation_end": 99,
                    "exon_number": "4/7",
                    "is_canonical": 0,
                    "transcript_id": "ENST00000420190",
                    "cdna_start": 385,
                    "gene_id": "ENSG00000187634",
                    "cds_start": 296,
                    "translation_start": 99,
                    "alleles": {
                        "C/G": {
                            "sift_prediction": "deleterious",
                            "polyphen_prediction": "benign",
                            "polyphen_score": 0.001,
                            "display_codon_allele_string": "gCg/gGg",
                            "hgvs_protein": "ENSP00000411579.1:p.Ala99Gly",
                            "sift_score": 0.04,
                            "consequence_terms": [
                                "missense_variant"
                            ],
                            "pep_allele_string": "A/G",
                            "codon_allele_string": "GCG/GGG",
                            "hgvs_transcript": "ENST00000420190.1:c.296C>G"
                        }
                    },
                    "name": "SAMD11",
                    "biotype": "protein_coding",
                    "cds_end": 296,
                    "cdna_allele_string": "C/G",
                    "codon_position": 2
                },
                {
                    "translation_stable_id": "ENSP00000393181",
                    "intron_number": null,
                    "cdna_end": 356,
                    "translation_end": 99,
                    "exon_number": "4/5",
                    "is_canonical": 0,
                    "transcript_id": "ENST00000437963",
                    "cdna_start": 356,
                    "gene_id": "ENSG00000187634",
                    "cds_start": 296,
                    "translation_start": 99,
                    "alleles": {
                        "C/G": {
                            "sift_prediction": "deleterious",
                            "polyphen_prediction": "benign",
                            "polyphen_score": 0.001,
                            "display_codon_allele_string": "gCg/gGg",
                            "hgvs_protein": "ENSP00000393181.1:p.Ala99Gly",
                            "sift_score": 0.03,
                            "consequence_terms": [
                                "missense_variant"
                            ],
                            "pep_allele_string": "A/G",
                            "codon_allele_string": "GCG/GGG",
                            "hgvs_transcript": "ENST00000437963.1:c.296C>G"
                        }
                    },
                    "name": "SAMD11",
                    "biotype": "protein_coding",
                    "cds_end": 296,
                    "cdna_allele_string": "C/G",
                    "codon_position": 2
                },
                {
                    "translation_stable_id": "ENSP00000342313",
                    "intron_number": null,
                    "cdna_end": 379,
                    "translation_end": 99,
                    "exon_number": "4/14",
                    "is_canonical": 1,
                    "transcript_id": "ENST00000342066",
                    "cdna_start": 379,
                    "gene_id": "ENSG00000187634",
                    "cds_start": 296,
                    "alleles": {
                        "C/G": {
                            "sift_prediction": "deleterious",
                            "polyphen_prediction": "benign",
                            "polyphen_score": 0.001,
                            "display_codon_allele_string": "gCg/gGg",
                            "hgvs_protein": "ENSP00000342313.3:p.Ala99Gly",
                            "sift_score": 0.01,
                            "consequence_terms": [
                                "missense_variant"
                            ],
                            "pep_allele_string": "A/G",
                            "codon_allele_string": "GCG/GGG",
                            "hgvs_transcript": "ENST00000342066.3:c.296C>G"
                        }
                    },
                    "translation_start": 99,
                    "name": "SAMD11",
                    "biotype": "protein_coding",
                    "cds_end": 296,
                    "cdna_allele_string": "C/G",
                    "ccds": "CCDS2.2",
                    "codon_position": 2
                },
                {
                    "translation_stable_id": "ENSP00000349216",
                    "intron_number": null,
                    "cdna_end": 67,
                    "translation_end": 23,
                    "exon_number": "2/12",
                    "is_canonical": 0,
                    "transcript_id": "ENST00000341065",
                    "cdna_start": 67,
                    "gene_id": "ENSG00000187634",
                    "cds_start": 68,
                    "translation_start": 23,
                    "alleles": {
                        "C/G": {
                            "sift_prediction": "deleterious",
                            "polyphen_prediction": "benign",
                            "polyphen_score": 0.008,
                            "display_codon_allele_string": "gCg/gGg",
                            "hgvs_protein": "ENSP00000349216.4:p.Ala23Gly",
                            "sift_score": 0.01,
                            "consequence_terms": [
                                "missense_variant"
                            ],
                            "pep_allele_string": "A/G",
                            "codon_allele_string": "GCG/GGG",
                            "hgvs_transcript": "ENST00000341065.4:c.67C>G"
                        }
                    },
                    "name": "SAMD11",
                    "biotype": "protein_coding",
                    "cds_end": 68,
                    "cdna_allele_string": "C/G",
                    "codon_position": 2
                }
            ]
        }
    ]
}

Invoking RHINO, filtering the VCF

$ cat input.vcf | rhino -f restensembl.js
  
##fileformat=VCFv4.0
##INFO=
#CHROM POS ID REF ALT QUAL FILTER INFO
1 69427 rs148502021 T A . PASS DP=1557;TRANSCRIPT=ENST00000335137
1 69452 rs142004627 A G . PASS DP=155;TRANSCRIPT=ENST00000335137
1 69475 rs148502021 C T . PASS DP=231;TRANSCRIPT=ENST00000335137
1 865583 rs148711625 A G . PASS DP=231;TRANSCRIPT=ENST00000420190
1 866460 rs148884928 A G . PASS DP=23;TRANSCRIPT=ENST00000420190

Limitations for the Ensembl REST API:




That's it,

Pierre

04 July 2011

The Neo4j REST API. My notebook

Neo4j is a open-source graph engine implemented in Java. This post is my notebook for the Neo4J-server, a server combining a REST API and a webadmin application into a single stand-alone server.

Download & extract Neo4J


Download Neo4J from here
$ tar xfz neo4j-community-1.4.M04-unix.tar.gz
$ rm neo4j-community-1.4.M04-unix.tar.gz
$ cd neo4j-community-1.4.M04/

Start the Neo4J Server

#edit the config file if needed
$ nano conf/neo4j-server.properties
#start the server
$ ./bin/neo4j start
Starting Neo4j Server...
Waiting for Neo4j Server.....
7/4/11 7:07:13 PM org.neo4j.server.database.Database INFO: Using database at NEO4J/neo4j-community-1.4.M04/data/graph.db
7/4/11 7:07:13 PM org.neo4j.server.modules.DiscoveryModule INFO: Mounted discovery module at [/]
Adding JAXRS packages [org.neo4j.server.rest.discovery] at [/]
Adding JAXRS packages [org.neo4j.server.rest.web] at [/db/data]
Adding JAXRS packages [org.neo4j.server.webadmin.rest] at [/db/manage]
7/4/11 7:07:13 PM org.neo4j.server.modules.RESTApiModule INFO: Mounted REST API at [/db/data/]
7/4/11 7:07:13 PM org.neo4j.server.modules.ManagementApiModule INFO: Mounted management API at [/db/manage/]
7/4/11 7:07:13 PM org.neo4j.server.modules.WebAdminModule INFO: Mounted webadmin at [/webadmin]
7/4/11 7:07:13 PM org.neo4j.server.NeoServerWithEmbeddedWebServer INFO: Starting Neo Server on port [7474]
7/4/11 7:07:13 PM org.neo4j.server.web.Jetty6WebServer INFO: Mounting static content at [/webadmin] from [webadmin-html]
7/4/11 7:07:15 PM org.neo4j.server.NeoServerWithEmbeddedWebServer INFO: Server started on [http://okazaki:7474/]
running: PID:2816

Get the server root


$ curl -D - -H Accept:application/json "http://localhost:7474/db/data/"
HTTP/1.1 200 OK
Content-Length: 410
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

{
"relationship_index" : "http://localhost:7474/db/data/index/relationship",
"node" : "http://localhost:7474/db/data/node",
"relationship_types" : "http://localhost:7474/db/data/relationship/types",
"extensions_info" : "http://localhost:7474/db/data/ext",
"node_index" : "http://localhost:7474/db/data/index/node",
"reference_node" : "http://localhost:7474/db/data/node/0",
"extensions" : {
}

Create an empty node

$ curl -D - -H Accept:application/json -X POST http://localhost:7474/db/data/node
HTTP/1.1 201 Created
Content-Length: 968
Location: http://localhost:7474/db/data/node/2
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

{
"outgoing_relationships" : "http://localhost:7474/db/data/node/2/relationships/out",
(...)

Set properties for this node

$ curl -D - -H Content-Type:application/json -X PUT \
-d '{"name":"Charles Darwin","birthDate":"1809-02-12","deathDate":"1882-04-19","knownFor":["Voyage of the Beagle","On the Origin of Species evolution by natural selection"]}' \
http://localhost:7474/db/data/node/2/properties
HTTP/1.1 204 No Content
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

View this node

$ curl -D - -H Accept:application/json  http://localhost:7474/db/data/node/2HTTP/1.1 200 OK
Content-Length: 1166
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

{
"outgoing_relationships" : "http://localhost:7474/db/data/node/2/relationships/out",
"data" : {
"knownFor" : [ "Voyage of the Beagle", "On the Origin of Species evolution by natural selection" ],
"name" : "Charles Darwin",
"birthDate" : "1809-02-12",
"deathDate" : "1882-04-19"
},
"traverse" : "http://localhost:7474/db/data/node/2/traverse/{returnType}",
"all_typed_relationships" : "http://localhost:7474/db/data/node/2/relationships/all/{-list|&|types}",
"property" : "http://localhost:7474/db/data/node/2/properties/{key}",
"self" : "http://localhost:7474/db/data/node/2",
"properties" : "http://localhost:7474/db/data/node/2/properties",
"outgoing_typed_relationships" : "http://localhost:7474/db/data/node/2/relationships/out/{-list|&|types}",
"incoming_relationships" : "http://localhost:7474/db/data/node/2/relationships/in",
"extensions" : {
},
"create_relationship" : "http://localhost:7474/db/data/node/2/relationships",
"all_relationships" : "http://localhost:7474/db/data/node/2/relationships/all",
"incoming_typed_relationships" : "http://localhost:7474/db/data/node/2/relationships/in/{-list|&|types}"
}

Only show the properties for that node:
$ curl -H Accept:application/json  http://localhost:7474/db/data/node/2/properties
{
"knownFor" : [ "Voyage of the Beagle", "On the Origin of Species evolution by natural selection" ],
"name" : "Charles Darwin",
"birthDate" : "1809-02-12",
"deathDate" : "1882-04-19"
}

Create a new node with some properties

$ curl -D - -H Accept:application/json -H Content-Type:application/json -X POST -d '{"name":"Alfred Russel Wallace","birthDate":"1823-01-08","deathDate":"1913-11-07","knownFor":["natural selection","biogeography"]}' "http://localhost:7474/db/data/node"
HTTP/1.1 201 Created
Content-Length: 1127
Location: http://localhost:7474/db/data/node/3
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

{
"outgoing_relationships" : "http://localhost:7474/db/data/node/3/relationships/out",
"data" : {
"knownFor" : [ "natural selection", "biogeography" ],
"name" : "Alfred Russel Wallace",
"birthDate" : "1823-01-08",
"deathDate" : "1913-11-07"
},
"traverse" : "http://localhost:7474/db/data/node/3/traverse/{returnType}",
(...)
}

Set one property

$ curl D - -H Accept:application/json -H Content-Type:application/json -X PUT \
-d '"United Kingdom"' \
"http://localhost:7474/db/data/node/3/properties/citizenship"
HTTP/1.1 204 No Content
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

$ curl -H Accept:application/json http://localhost:7474/db/data/node/3/properties{
"knownFor" : [ "natural selection", "biogeography" ],
"name" : "Alfred Russel Wallace",
"citizenship" : "United Kingdom",
"birthDate" : "1823-01-08",
"deathDate" : "1913-11-07"
}

Remove a node

$ curl -D - -H Accept:application/json -X POST http://localhost:7474/db/data/node
HTTP/1.1 201 Created
Content-Length: 968
Location: http://localhost:7474/db/data/node/4
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

$ curl -D - -X DELETE http://localhost:7474/db/data/node/4
HTTP/1.1 204 No Content
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

Create a relationship from Darwin to Wallace

$ curl -D - -H Accept:application/json -H Content-Type:application/json -X POST -d '{"type":"KNOWS","to":"http://localhost:7474/db/data/node/3","data":{"ref":"http://en.wikipedia.org/wiki/Charles_Darwin"}}' "http://localhost:7474/db/data/node/2/relationships"
HTTP/1.1 201 Created
Content-Length: 439
Location: http://localhost:7474/db/data/relationship/0
Content-Encoding: UTF-8
Content-Type: application/json
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

{
"start" : "http://localhost:7474/db/data/node/2",
"data" : {
"ref" : "http://en.wikipedia.org/wiki/Charles_Darwin"
},
"self" : "http://localhost:7474/db/data/relationship/0",
"property" : "http://localhost:7474/db/data/relationship/0/properties/{key}",
"properties" : "http://localhost:7474/db/data/relationship/0/properties",
"type" : "KNOWS",
"extensions" : {
},
"end" : "http://localhost:7474/db/data/node/3"
}

#view properties for this relationship
$ curl -H Content-Type:application/json "http://localhost:7474/db/data/relationship/0/properties"
{
"ref" : "http://en.wikipedia.org/wiki/Charles_Darwin"
}

Add another property to the relationship

$ curl -D - -H Content-Type:application/json  -X PUT -d '"Darwin received a letter from Wallace asking if the book would examine human origins"' "http://localhost:7474/db/data/relationship/0/properties/comment"
HTTP/1.1 204 No Content
Access-Control-Allow-Origin: *
Server: Jetty(6.1.25)

$ curl -H Content-Type:application/json "http://localhost:7474/db/data/relationship/0/properties"
{
"ref" : "http://en.wikipedia.org/wiki/Charles_Darwin",
"comment" : "Darwin received a letter from Wallace asking if the book would examine human origins"
}

List Types of Relationship

$ curl -H Content-Type:application/json "http://localhost:7474/db/data/relationship/types"
["KNOWS"]

List Relationships

#from Darwin
$ curl -H Content-Type:application/json "http://localhost:7474/db/data/node/2/relationships/out/KNOWS"
[ {
"start" : "http://localhost:7474/db/data/node/2",
"data" : {
"ref" : "http://en.wikipedia.org/wiki/Charles_Darwin",
"comment" : "Darwin received a letter from Wallace asking if the book would examine human origins"
},
"self" : "http://localhost:7474/db/data/relationship/0",
"property" : "http://localhost:7474/db/data/relationship/0/properties/{key}",
"properties" : "http://localhost:7474/db/data/relationship/0/properties",
"type" : "KNOWS",
"extensions" : {
},
"end" : "http://localhost:7474/db/data/node/3"
} ]
#in to Darwin
$ curl -H Content-Type:application/json "http://localhost:7474/db/data/node/2/relationships/in/KNOWS"
[ ]
#out from wallace
$ curl -H Content-Type:application/json "http://localhost:7474/db/data/node/3/relationships/out/KNOWS"
[ ]
#all from/to wallace
$ curl -H Content-Type:application/json "http://localhost:7474/db/data/node/3/relationships/all/KNOWS"
[ {
"start" : "http://localhost:7474/db/data/node/2",
"data" : {
"ref" : "http://en.wikipedia.org/wiki/Charles_Darwin",
"comment" : "Darwin received a letter from Wallace asking if the book would examine human origins"
},
"self" : "http://localhost:7474/db/data/relationship/0",
"property" : "http://localhost:7474/db/data/relationship/0/properties/{key}",
"properties" : "http://localhost:7474/db/data/relationship/0/properties",
"type" : "KNOWS",
"extensions" : {
},
"end" : "http://localhost:7474/db/data/node/3"
} ]


Stop the Neo4J Server

$ ./bin/neo4j stop
Stopping Neo4j Server...
7/4/11 7:09:30 PM org.neo4j.server.NeoServerBootstrapper INFO: Neo4j Server shutdown initiated by kill signal
7/4/11 7:09:30 PM org.neo4j.server.NeoServerWithEmbeddedWebServer INFO: Successfully shutdown Neo Server on port [7474]
Waiting for Neo4j Server to exit...
Stopped Neo4j Server.


That's all for today, next time I'll dive into the indexes.

See also

The path from EgonWillighagen to Jandot : Neo4j , a graph API for java: my notebook.

That's it,

Pierre