Reading/Writing a VCF file with the GATK-API.
This is a simple post to save my notes about reading a VCF file and writing it to another file using the java libraries of the GATK. The only way I found requires a SAMSequenceDictionary and always writes an index.:
The code
import java.io.*;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.*;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.picard.reference.*;
import java.util.Iterator;
import java.util.Map;
/**
* motivation:
* copy a VCF
* usage:
* javac -cp ${GATK} ReadVCF.java
* java -cp ${GATK}:. ReadVCF ref.fa my.vcf
*/
public class ReadVCF
{
public static void main(String args[]) throws Exception
{
/** latest VCF specification */
final VCFCodec vcfCodec = new VCFCodec();
/** we don't need some indexed VCFs */
boolean requireIndex=false;
/* load a SAM sequence dictionary */
SAMSequenceDictionary dict=new IndexedFastaSequenceFile(
new File(args[0])).getSequenceDictionary();
/* loop over each vcf */
for(int i=1;i< args.length;++i)
{
/* input VCF */
String filename=args[i];
/* output VCF */
File fileout=new File("tmp"+i+".vcf");
VariantContextWriter writer=VariantContextWriterFactory.create(fileout,dict);
/* get A VCF Reader */
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(
filename, vcfCodec, requireIndex);
/* read the header */
VCFHeader header = (VCFHeader)reader.getHeader();
/* write the header */
writer.writeHeader(header);
/** loop over each Variation */
Iterator<VariantContext> it = reader.iterator();
while ( it.hasNext() )
{
/* get next variation and save it */
VariantContext vc = it.next();
writer.add(vc);
}
/* we're done */
reader.close();
writer.close();
}
}
}Makefile
GATK=GenomeAnalysisTKLite-2.2-15-g4828906/GenomeAnalysisTKLite.jar
VCF=gatk-master/public/testdata/exampleDBSNP.vcf
REF=./gatk-master/public/testdata/exampleFASTA.fasta
all:
javac -cp ${GATK} -nowarn ReadVCF.java
java -cp ${GATK}:. ReadVCF $(REF) ${VCF}
That's it,
Pierre
No comments:
Post a Comment