In the current post, I'll show how I've used this web service to annotate a VCF file with this information.
The web service provided by the ESP is based on the SOAP protocol.
Here is an example of the XML response:
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
<ns3:local xmlns:ns2="http://webservice.evs.gs.washington.edu/" xmlns:ns3="uri"> | |
<chromosome>1</chromosome> | |
<start>120457968</start> | |
<stop>120457969</stop> | |
<strand>+</strand> | |
<snpList> | |
<positionString>1:120457968</positionString> | |
<chrPosition>120457968</chrPosition> | |
<alleles>T/C</alleles> | |
<uaAlleleCounts>1/2701</uaAlleleCounts> | |
<aaAlleleCounts>0/2176</aaAlleleCounts> | |
<totalAlleleCounts>1/4877</totalAlleleCounts> | |
<uaAlleleAndCount>T=1/C=2701</uaAlleleAndCount> | |
<aaAlleleAndCount>T=0/C=2176</aaAlleleAndCount> | |
<totalAlleleAndCount>T=1/C=4877</totalAlleleAndCount> | |
<uaMAF>0.037</uaMAF> | |
<aaMAF>0.0</aaMAF> | |
<totalMAF>0.0205</totalMAF> | |
<avgSampleReadDepth>198</avgSampleReadDepth> | |
<geneList>NOTCH2</geneList> | |
<snpFunction> | |
<chromosome>1</chromosome> | |
<position>120457968</position> | |
<conservationScore>1.0</conservationScore> | |
<conservationScoreGERP>5.5</conservationScoreGERP> | |
<snpFxnList> | |
<mrnaAccession>NM_024408</mrnaAccession> | |
<fxnClassGVS>missense</fxnClassGVS> | |
<aminoAcids>MET,ILE</aminoAcids> | |
<proteinPos>2459/2472</proteinPos> | |
<cdnaPos>7377</cdnaPos> | |
<pphPrediction>unknown</pphPrediction> | |
<granthamScore>10</granthamScore> | |
</snpFxnList> | |
<refAllele>C</refAllele> | |
<ancestralAllele>C</ancestralAllele> | |
<firstRsId>0</firstRsId> | |
<secondRsId>0</secondRsId> | |
<filters>PASS</filters> | |
<clinicalLink>unknown</clinicalLink> | |
</snpFunction> | |
<conservationScore>1.0</conservationScore> | |
<conservationScoreGERP>5.5</conservationScoreGERP> | |
<refAllele>C</refAllele> | |
<altAlleles>T</altAlleles> | |
<ancestralAllele>C</ancestralAllele> | |
<chromosome>1</chromosome> | |
<hasAtLeastOneAccession>true</hasAtLeastOneAccession> | |
<rsIds>none</rsIds> | |
<filters>PASS</filters> | |
<clinicalLink>unknown</clinicalLink> | |
</snpList> | |
<setOfSiteCoverageInfo> | |
<chromosome>1</chromosome> | |
<position>120457968</position> | |
<totalSamplesCovered>2439</totalSamplesCovered> | |
<avgSampleReadDepth>198.0</avgSampleReadDepth> | |
<eaSamplesCovered>1351</eaSamplesCovered> | |
<avgEaSampleReadDepth>202.0</avgEaSampleReadDepth> | |
<aaSamplesCovered>1088</aaSamplesCovered> | |
<avgAaSampleReadDepth>194.0</avgAaSampleReadDepth> | |
</setOfSiteCoverageInfo> | |
<setOfSiteCoverageInfo> | |
<chromosome>1</chromosome> | |
<position>120457969</position> | |
<totalSamplesCovered>2439</totalSamplesCovered> | |
<avgSampleReadDepth>197.0</avgSampleReadDepth> | |
<eaSamplesCovered>1351</eaSamplesCovered> | |
<avgEaSampleReadDepth>201.0</avgEaSampleReadDepth> | |
<aaSamplesCovered>1088</aaSamplesCovered> | |
<avgAaSampleReadDepth>193.0</avgAaSampleReadDepth> | |
</setOfSiteCoverageInfo> | |
</ns3:local> |
$ wsimport -keep "http://evs.gs.washington.edu/wsEVS/EVSDataQueryService?wsdl" parsing WSDL... generating code... compiling code...
Here is the java code running this client. It scans the VCF, calls the webservice for each variation and insert the annotation as JSON in a new column .
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: | |
* Pierre Lindenbaum PhD | |
* WWW: | |
* http://plindenbaum.blogspot.com | |
* Date: | |
* 2011-11-16 | |
* Motivation: | |
* annotate VCF with data from http://evs.gs.washington.edu/EVS/ | |
*/ | |
import java.io.BufferedReader; | |
import java.io.InputStreamReader; | |
import java.lang.reflect.Method; | |
import java.util.List; | |
import java.util.regex.Pattern; | |
import edu.washington.gs.evs.webservice.*; | |
/* first, generate the classes with wsimport -keep "http://evs.gs.washington.edu/wsEVS/EVSDataQueryService?wsdl" */ | |
public class EVSClient | |
{ | |
public static void main(String[] args) | |
{ | |
try | |
{ | |
Pattern tab=Pattern.compile("[\t]"); | |
DataQueryService service=new DataQueryService(); | |
DataQuery port=service.getDataQueryPort(); | |
BufferedReader in=new BufferedReader(new InputStreamReader(System.in)); | |
String line; | |
while((line=in.readLine())!=null) | |
{ | |
if(line.startsWith("#")) | |
{ | |
System.out.print(line); | |
if(!line.startsWith("##")) | |
{ | |
System.out.print("\tEVS"); | |
} | |
System.out.println(); | |
continue; | |
} | |
String tokens[]=tab.split(line); | |
int position=Integer.parseInt(tokens[1]); | |
//calls the service chr:start-end | |
EvsData data=port.getEvsData(tokens[0]+":"+position+"-"+(position)); | |
System.out.print(line); | |
System.out.print("\t"); | |
if(data==null || data.getStart()!=position) | |
{ | |
System.out.print("."); | |
} | |
else | |
{ | |
printjson(data); | |
} | |
System.out.println(); | |
} | |
} | |
catch(Throwable err) | |
{ | |
err.printStackTrace(); | |
} | |
} | |
/** transforms a java objet to json using reflection */ | |
private static void printjson(Object o)throws Exception | |
{ | |
if(o==null) | |
{ | |
System.out.print("null"); | |
} | |
else if(o instanceof Number || o.getClass()==Boolean.class) | |
{ | |
System.out.print(o.toString()); | |
} | |
else if(o.getClass()==String.class) | |
{ | |
String s=o.toString(); | |
System.out.print("\""); | |
for(int i=0;i< s.length();++i) | |
{ | |
switch(s.charAt(i)) | |
{ | |
case '\"': System.out.print("\\\"");break; | |
case '\'': System.out.print("\\\'");break; | |
case '\\': System.out.print("\\\\");break; | |
case '\n': System.out.print("\\n");break; | |
case '\t': System.out.print("\\t");break; | |
default:System.out.print(s.charAt(i));break; | |
} | |
} | |
System.out.print("\""); | |
} | |
else if(o instanceof List) | |
{ | |
@SuppressWarnings("rawtypes") | |
List L=(List)o; | |
System.out.print("["); | |
for(int i=0;i< L.size();++i) | |
{ | |
if(i>0) System.out.print(","); | |
printjson(L.get(i)); | |
} | |
System.out.print("]"); | |
} | |
else | |
{ | |
boolean first=true; | |
System.out.print("{"); | |
for(Method method:o.getClass().getMethods()) | |
{ | |
String name=method.getName(); | |
if(name.equals("getClass")) continue; | |
if(!name.startsWith("get")) continue; | |
if(method.getParameterTypes().length != 0) continue; | |
if(Void.class.equals(method.getReturnType())) continue; | |
if(!first) System.out.print(","); | |
first=false; | |
name=name.substring(3); | |
printjson(name.substring(0, 1).toLowerCase()+name.substring(1)); | |
System.out.print(":"); | |
printjson(method.invoke(o)); | |
} | |
System.out.print("}"); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
evsclient.jar :EVSClient.java edu | |
javac $< | |
echo "Main-Class: EVSClient" > manifest.txt | |
jar cvfm $@ manifest.txt EVSClient.class edu | |
edu: | |
wsimport -keep "http://evs.gs.washington.edu/wsEVS/EVSDataQueryService?wsdl" | |
test: evsclient.jar | |
curl -s "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20100804/supporting/EUR.2of4intersection_allele_freq.20100804.sites.vcf.gz" |\ | |
gunzip -c |\ | |
java -jar evsclient.jar | |
clean: | |
rm -rf *.class evsclient.jar edu |
Result (some columns have been cut)
curl -s "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20100804/supporting/EUR.2of4intersection_allele_freq.20100804.sites.vcf.gz" |\ gunzip -c |\ java -jar evsclient.jar ##fileformat=VCFv4.0 ##filedat=20101112 ##datarelease=20100804 ##samples=629 ##description="Where BI calls are present, genotypes and alleles are from BI. In there absence, UM genotypes are used. If neither are available, no genotype information is present and the alleles are from the NCBI calls." (...) #CHROM POS ID EVS 1 10469 rs117577454 {"start":10469,"chromosome":"1","stop":10470,"strand":"+","snpList":[],"setOfSiteCoverageInfo":[]} 1 10583 rs58108140 {"start":10583,"chromosome":"1","stop":10584,"strand":"+","snpList":[],"setOfSiteCoverageInfo":[]} 1 11508 . {"start":11508,"chromosome":"1","stop":11509,"strand":" (...) 1 69511 . {"start":69511,"chromosome":"1","stop":69512,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"1.0","conservationScoreGERP":"0.5","refAllele":"A","ancestralAllele":"G","filters":"PASS","clinicalLink":"unknown","positionString":"1:69511","chrPosition":69511,"alleles":"G/A","uaAlleleCounts":"1373/47","aaAlleleCounts":"880/600","totalAlleleCounts":"2253/647","uaAlleleAndCount":"G=1373/A=47","aaAlleleAndCount":"G=880/A=600","totalAlleleAndCount":"G=2253/A=647","uaMAF":3.3099,"aaMAF":40.5405,"totalMAF":22.3103,"avgSampleReadDepth":185,"geneList":"OR4F5","snpFunction":{"chromosome":"1","position":69511,"conservationScore":"1.0","conservationScoreGERP":"0.5","snpFxnList":[{"mrnaAccession":"NM_001005484","fxnClassGVS":"missense","aminoAcids":"THR,ALA","proteinPos":"141/306","cdnaPos":421,"pphPrediction":"benign","granthamScore":"58"}],"refAllele":"A","ancestralAllele":"G","firstRsId":75062661,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"G","hasAtLeastOneAccession":"true","rsIds":"rs75062661"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":69511,"avgSampleReadDepth":185.0,"totalSamplesCovered":1452,"eaSamplesCovered":712,"avgEaSampleReadDepth":157.0,"aaSamplesCovered":740,"avgAaSampleReadDepth":211.0},{"chromosome":"1","position":69512,"avgSampleReadDepth":180.0,"totalSamplesCovered":1501,"eaSamplesCovered":739,"avgEaSampleReadDepth":153.0,"aaSamplesCovered":762,"avgAaSampleReadDepth":207.0}]} (...) 1 901923 . {"start":901923,"chromosome":"1","stop":901924,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"1.0","conservationScoreGERP":"5.0","refAllele":"C","ancestralAllele":"C","filters":"PASS","clinicalLink":"unknown","positionString":"1:901923","chrPosition":901923,"alleles":"A/C","uaAlleleCounts":"2/2542","aaAlleleCounts":"52/1934","totalAlleleCounts":"54/4476","uaAlleleAndCount":"A=2/C=2542","aaAlleleAndCount":"A=52/C=1934","totalAlleleAndCount":"A=54/C=4476","uaMAF":0.0786,"aaMAF":2.6183,"totalMAF":1.1921,"avgSampleReadDepth":35,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":901923,"conservationScore":"1.0","conservationScoreGERP":"5.0","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"missense","aminoAcids":"SER,ARG","proteinPos":"4/612","cdnaPos":12,"pphPrediction":"probably-damaging","granthamScore":"110"}],"refAllele":"C","ancestralAllele":"C","firstRsId":0,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"A","hasAtLeastOneAccession":"true","rsIds":"none"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":901923,"avgSampleReadDepth":35.0,"totalSamplesCovered":2280,"eaSamplesCovered":1272,"avgEaSampleReadDepth":32.0,"aaSamplesCovered":1008,"avgAaSampleReadDepth":38.0},{"chromosome":"1","position":901924,"avgSampleReadDepth":35.0,"totalSamplesCovered":2283,"eaSamplesCovered":1273,"avgEaSampleReadDepth":32.0,"aaSamplesCovered":1010,"avgAaSampleReadDepth":38.0}]} 1 902069 rs116147894 {"start":902069,"chromosome":"1","stop":902070,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"0.0","conservationScoreGERP":"1.0","refAllele":"T","ancestralAllele":"T","filters":"PASS","clinicalLink":"unknown","positionString":"1:902069","chrPosition":902069,"alleles":"C/T","uaAlleleCounts":"2/320","aaAlleleCounts":"18/212","totalAlleleCounts":"20/532","uaAlleleAndCount":"C=2/T=320","aaAlleleAndCount":"C=18/T=212","totalAlleleAndCount":"C=20/T=532","uaMAF":0.6211,"aaMAF":7.8261,"totalMAF":3.6232,"avgSampleReadDepth":13,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":902069,"conservationScore":"0.0","conservationScoreGERP":"1.0","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"intron","aminoAcids":"none","proteinPos":"NA","cdnaPos":-1,"pphPrediction":"unknown","granthamScore":"NA"}],"refAllele":"T","ancestralAllele":"T","firstRsId":0,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"C","hasAtLeastOneAccession":"true","rsIds":"none"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":902069,"avgSampleReadDepth":13.0,"totalSamplesCovered":304,"eaSamplesCovered":169,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":135,"avgAaSampleReadDepth":12.0},{"chromosome":"1","position":902070,"avgSampleReadDepth":12.0,"totalSamplesCovered":338,"eaSamplesCovered":190,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":148,"avgAaSampleReadDepth":12.0}]} 1 902108 rs62639981 {"start":902108,"chromosome":"1","stop":902109,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"0.0","conservationScoreGERP":"-8.7","refAllele":"C","ancestralAllele":"unknown","filters":"PASS","clinicalLink":"unknown","positionString":"1:902108","chrPosition":902108,"alleles":"T/C","uaAlleleCounts":"5/333","aaAlleleCounts":"0/248","totalAlleleCounts":"5/581","uaAlleleAndCount":"T=5/C=333","aaAlleleAndCount":"T=0/C=248","totalAlleleAndCount":"T=5/C=581","uaMAF":1.4793,"aaMAF":0.0,"totalMAF":0.8532,"avgSampleReadDepth":13,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":902108,"conservationScore":"0.0","conservationScoreGERP":"-8.7","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"coding-synonymous","aminoAcids":"none","proteinPos":"36/612","cdnaPos":108,"pphPrediction":"unknown","granthamScore":"NA"}],"refAllele":"C","ancestralAllele":"unknown","firstRsId":62639981,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"T","hasAtLeastOneAccession":"true","rsIds":"rs62639981"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":902108,"avgSampleReadDepth":13.0,"totalSamplesCovered":294,"eaSamplesCovered":170,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":124,"avgAaSampleReadDepth":13.0},{"chromosome":"1","position":902109,"avgSampleReadDepth":13.0,"totalSamplesCovered":309,"eaSamplesCovered":177,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":132,"avgAaSampleReadDepth":13.0}]} (...)That's it
Pierre
Hello Pierre, can you please give me more information about how to annotate my vcf using this gist ?
ReplyDeleteI downloaded the files from webservice and your two files (Makefile and EVSClient.java), so now I have this structure:
/Makefile
/EVSClient.java
/edu
When I run Makefile I get this error:
./Makefile: 1: evsclient.jar: not found
./Makefile: 2: Syntax error: newline unexpected
I never used a Makefile so I don't know what is going wrong. Supossing that I have a file exome.vcf how I can annotate using this code ?
Thank you for your help.
A Makefile is not an executable.
ReplyDeleteJust type "make". Furthermore, the webservice has changed since I wrote that post.
This comment has been removed by the author.
ReplyDelete