I've written a tool named "apache velocity" which parse json data and processes it with "Apache velocity" (a template engine ). The (javacc) source code is available here:
The NHLBI Exome Sequencing Project (ESP) has released a web service to query their data. "The goal of the NHLBI GO Exome Sequencing Project (ESP) is to discover novel genes and mechanisms contributing to heart, lung and blood disorders by pioneering the application of next-generation sequencing of the protein coding regions of the human genome across diverse, richly-phenotyped populations and to share these datasets and findings with the scientific community to extend and enrich the diagnosis, management and treatment of heart, lung and blood disorders.". In the current post, I'll show how I've used this web service to annotate a VCF file with this information.
The web service provided by the ESP is based on the SOAP protocol. Here is an example of the XML response:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Here is the java code running this client. It scans the VCF, calls the webservice for each variation and insert the annotation as JSON in a new column .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -s "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20100804/supporting/EUR.2of4intersection_allele_freq.20100804.sites.vcf.gz" |\
gunzip -c |\
java -jar evsclient.jar
##fileformat=VCFv4.0
##filedat=20101112
##datarelease=20100804
##samples=629
##description="Where BI calls are present, genotypes and alleles are from BI. In there absence, UM genotypes are used. If neither are available, no genotype information is present and the alleles are from the NCBI calls."
(...)
#CHROM POS ID EVS
1 10469 rs117577454 {"start":10469,"chromosome":"1","stop":10470,"strand":"+","snpList":[],"setOfSiteCoverageInfo":[]}
1 10583 rs58108140 {"start":10583,"chromosome":"1","stop":10584,"strand":"+","snpList":[],"setOfSiteCoverageInfo":[]}
1 11508 . {"start":11508,"chromosome":"1","stop":11509,"strand":"
(...)
1 69511 . {"start":69511,"chromosome":"1","stop":69512,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"1.0","conservationScoreGERP":"0.5","refAllele":"A","ancestralAllele":"G","filters":"PASS","clinicalLink":"unknown","positionString":"1:69511","chrPosition":69511,"alleles":"G/A","uaAlleleCounts":"1373/47","aaAlleleCounts":"880/600","totalAlleleCounts":"2253/647","uaAlleleAndCount":"G=1373/A=47","aaAlleleAndCount":"G=880/A=600","totalAlleleAndCount":"G=2253/A=647","uaMAF":3.3099,"aaMAF":40.5405,"totalMAF":22.3103,"avgSampleReadDepth":185,"geneList":"OR4F5","snpFunction":{"chromosome":"1","position":69511,"conservationScore":"1.0","conservationScoreGERP":"0.5","snpFxnList":[{"mrnaAccession":"NM_001005484","fxnClassGVS":"missense","aminoAcids":"THR,ALA","proteinPos":"141/306","cdnaPos":421,"pphPrediction":"benign","granthamScore":"58"}],"refAllele":"A","ancestralAllele":"G","firstRsId":75062661,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"G","hasAtLeastOneAccession":"true","rsIds":"rs75062661"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":69511,"avgSampleReadDepth":185.0,"totalSamplesCovered":1452,"eaSamplesCovered":712,"avgEaSampleReadDepth":157.0,"aaSamplesCovered":740,"avgAaSampleReadDepth":211.0},{"chromosome":"1","position":69512,"avgSampleReadDepth":180.0,"totalSamplesCovered":1501,"eaSamplesCovered":739,"avgEaSampleReadDepth":153.0,"aaSamplesCovered":762,"avgAaSampleReadDepth":207.0}]}
(...)
1 901923 . {"start":901923,"chromosome":"1","stop":901924,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"1.0","conservationScoreGERP":"5.0","refAllele":"C","ancestralAllele":"C","filters":"PASS","clinicalLink":"unknown","positionString":"1:901923","chrPosition":901923,"alleles":"A/C","uaAlleleCounts":"2/2542","aaAlleleCounts":"52/1934","totalAlleleCounts":"54/4476","uaAlleleAndCount":"A=2/C=2542","aaAlleleAndCount":"A=52/C=1934","totalAlleleAndCount":"A=54/C=4476","uaMAF":0.0786,"aaMAF":2.6183,"totalMAF":1.1921,"avgSampleReadDepth":35,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":901923,"conservationScore":"1.0","conservationScoreGERP":"5.0","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"missense","aminoAcids":"SER,ARG","proteinPos":"4/612","cdnaPos":12,"pphPrediction":"probably-damaging","granthamScore":"110"}],"refAllele":"C","ancestralAllele":"C","firstRsId":0,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"A","hasAtLeastOneAccession":"true","rsIds":"none"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":901923,"avgSampleReadDepth":35.0,"totalSamplesCovered":2280,"eaSamplesCovered":1272,"avgEaSampleReadDepth":32.0,"aaSamplesCovered":1008,"avgAaSampleReadDepth":38.0},{"chromosome":"1","position":901924,"avgSampleReadDepth":35.0,"totalSamplesCovered":2283,"eaSamplesCovered":1273,"avgEaSampleReadDepth":32.0,"aaSamplesCovered":1010,"avgAaSampleReadDepth":38.0}]}
1 902069 rs116147894 {"start":902069,"chromosome":"1","stop":902070,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"0.0","conservationScoreGERP":"1.0","refAllele":"T","ancestralAllele":"T","filters":"PASS","clinicalLink":"unknown","positionString":"1:902069","chrPosition":902069,"alleles":"C/T","uaAlleleCounts":"2/320","aaAlleleCounts":"18/212","totalAlleleCounts":"20/532","uaAlleleAndCount":"C=2/T=320","aaAlleleAndCount":"C=18/T=212","totalAlleleAndCount":"C=20/T=532","uaMAF":0.6211,"aaMAF":7.8261,"totalMAF":3.6232,"avgSampleReadDepth":13,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":902069,"conservationScore":"0.0","conservationScoreGERP":"1.0","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"intron","aminoAcids":"none","proteinPos":"NA","cdnaPos":-1,"pphPrediction":"unknown","granthamScore":"NA"}],"refAllele":"T","ancestralAllele":"T","firstRsId":0,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"C","hasAtLeastOneAccession":"true","rsIds":"none"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":902069,"avgSampleReadDepth":13.0,"totalSamplesCovered":304,"eaSamplesCovered":169,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":135,"avgAaSampleReadDepth":12.0},{"chromosome":"1","position":902070,"avgSampleReadDepth":12.0,"totalSamplesCovered":338,"eaSamplesCovered":190,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":148,"avgAaSampleReadDepth":12.0}]}
1 902108 rs62639981 {"start":902108,"chromosome":"1","stop":902109,"strand":"+","snpList":[{"chromosome":"1","conservationScore":"0.0","conservationScoreGERP":"-8.7","refAllele":"C","ancestralAllele":"unknown","filters":"PASS","clinicalLink":"unknown","positionString":"1:902108","chrPosition":902108,"alleles":"T/C","uaAlleleCounts":"5/333","aaAlleleCounts":"0/248","totalAlleleCounts":"5/581","uaAlleleAndCount":"T=5/C=333","aaAlleleAndCount":"T=0/C=248","totalAlleleAndCount":"T=5/C=581","uaMAF":1.4793,"aaMAF":0.0,"totalMAF":0.8532,"avgSampleReadDepth":13,"geneList":"PLEKHN1","snpFunction":{"chromosome":"1","position":902108,"conservationScore":"0.0","conservationScoreGERP":"-8.7","snpFxnList":[{"mrnaAccession":"NM_032129","fxnClassGVS":"coding-synonymous","aminoAcids":"none","proteinPos":"36/612","cdnaPos":108,"pphPrediction":"unknown","granthamScore":"NA"}],"refAllele":"C","ancestralAllele":"unknown","firstRsId":62639981,"secondRsId":0,"filters":"PASS","clinicalLink":"unknown"},"altAlleles":"T","hasAtLeastOneAccession":"true","rsIds":"rs62639981"}],"setOfSiteCoverageInfo":[{"chromosome":"1","position":902108,"avgSampleReadDepth":13.0,"totalSamplesCovered":294,"eaSamplesCovered":170,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":124,"avgAaSampleReadDepth":13.0},{"chromosome":"1","position":902109,"avgSampleReadDepth":13.0,"totalSamplesCovered":309,"eaSamplesCovered":177,"avgEaSampleReadDepth":13.0,"aaSamplesCovered":132,"avgAaSampleReadDepth":13.0}]}
(...)