The java library for BigBed and BigWig: my notebook
Jim Robinson and his team, from the Broad Institute/IGV, have recently released a java library parsing the BigBed and the BigWig formats. Here is my notebook for this API.
Download and compile the library
The sources are hosted at: http://bigwig.googlecode.com/.$ svn checkout http://bigwig.googlecode.com/svn/trunk/ bigwig-read-only
$ cd bigwig-read-only
$ ant
Buildfile: build.xml
compile:
[mkdir] Created dir: /path/to/bigwig-read-only/build
[javac] Compiling 38 source files to /path/to/bigwig-read-only/build
[javac] Note: /path/to/bigwig-read-only/src/org/broad/igv/bbfile/BPTree.java uses unchecked or unsafe operations.
[javac] Note: Recompile with -Xlint:unchecked for details.
dist:
[mkdir] Created dir: /path/to/bigwig-read-only/dist
[jar] Building jar: /path/to/bigwig-read-only/dist/BigWig.jar
BUILD SUCCESSFUL
Total time: 3 seconds
Code
The following java code prints all the Bed or the Wig data in a given genomics region.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: | |
* Pierre Lindenbaum PhD | |
* Date: | |
* June-2011 | |
* Contact: | |
* plindenbaum@yahoo.fr | |
* Reference: | |
* http://code.google.com/p/bigwig/ | |
* WWW: | |
* http://plindenbaum.blogspot.com | |
* Motivation: | |
* testing the java API for bigwig/bigbed | |
* Compilation: | |
* javac -cp /path/to/bigwig-read-only/dist/BigWig.jar:. BigCat.java | |
* Usage: | |
* java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat -h | |
*/ | |
import java.io.IOException; | |
import java.util.HashSet; | |
import java.util.Set; | |
import org.broad.igv.bbfile.BBFileHeader; | |
import org.broad.igv.bbfile.BBFileReader; | |
import org.broad.igv.bbfile.BedFeature; | |
import org.broad.igv.bbfile.BigBedIterator; | |
import org.broad.igv.bbfile.BigWigIterator; | |
import org.broad.igv.bbfile.WigItem; | |
public class BigCat | |
{ | |
public static void main(String[] args) throws Exception | |
{ | |
//allow overlaps flag | |
boolean contained=false; | |
//selected chromosome | |
String chrom=null; | |
//selected start region | |
Integer start=null; | |
//selected end region | |
Integer end=null; | |
int optind = 0; | |
while (optind < args.length) | |
{ | |
if ( args[optind].equals("-h") || | |
args[optind].equals("-help") || | |
args[optind].equals("--help")) | |
{ | |
System.err.println("Pierre Lindenbaum PhD. 2011"); | |
System.err.println("Options:"); | |
System.err.println(" -h help; This screen."); | |
System.err.println(" -p <chrom:start-end>"); | |
System.err.println(" -c wigitem must be Contained in the specified base region (default:allow overlaps)"); | |
System.err.println(" (bigbed|bigwig) file"); | |
return; | |
} | |
else if (args[optind].equals("-c")) | |
{ | |
contained=true; | |
} | |
else if (args[optind].equals("-p")) | |
{ | |
//parse 'chrom:start-end' | |
String s=args[++optind]; | |
int i=s.indexOf(':'); | |
if(i==-1) | |
{ | |
System.err.println("Bad chrom:start-end in :"+s); | |
System.exit(-1); | |
} | |
chrom=s.substring(0,i); | |
int j=s.indexOf('-',i+1); | |
if(j==-1) | |
{ | |
System.err.println("Bad chrom:start-end in :"+s); | |
System.exit(-1); | |
} | |
start=Integer.parseInt(s.substring(i+1,j)); | |
end=Integer.parseInt(s.substring(j+1)); | |
if(end<start) | |
{ | |
System.err.println("end< start in :"+s); | |
System.exit(-1); | |
} | |
} | |
else if (args[optind].equals("--")) | |
{ | |
optind++; | |
break; | |
} | |
else if (args[optind].startsWith("-")) | |
{ | |
System.err.println("Unknown option " + args[optind]); | |
System.exit(-1); | |
} | |
else | |
{ | |
break; | |
} | |
++optind; | |
} | |
if(optind+1!=args.length) | |
{ | |
System.err.println("Illegal number of arguments."); | |
System.exit(-1); | |
} | |
String uri=args[optind++]; | |
//open big file | |
BBFileReader reader=new BBFileReader(uri); | |
//get the big header | |
BBFileHeader bbFileHdr = reader.getBBFileHeader(); | |
if(!bbFileHdr.isHeaderOK()) | |
{ | |
throw new IOException("bad header for "+uri); | |
} | |
//is it wig or bed ? | |
if(!(bbFileHdr.isBigBed() || bbFileHdr.isBigWig())) | |
{ | |
throw new IOException("undefined big type for "+uri); | |
} | |
//chromosome was specified, test if it exists in this file | |
if(chrom!=null) | |
{ | |
Set<String> set=new HashSet<String>(reader.getChromosomeNames()); | |
if(!set.contains(chrom)) | |
{ | |
System.err.println(uri+" doesn't contain chromosome:"+chrom); | |
return; | |
} | |
} | |
//BED index | |
if(bbFileHdr.isBigBed()) | |
{ | |
BigBedIterator iter; | |
//get an iterator for BigBed features which occupy a chromosome selection region. | |
if(chrom!=null) //region was specified | |
{ | |
iter=reader.getBigBedIterator(chrom,start,chrom,end,contained); | |
} | |
//get all | |
else | |
{ | |
iter=reader.getBigBedIterator(); | |
} | |
//loop over iterator | |
while(iter.hasNext()) | |
{ | |
BedFeature f=iter.next(); | |
System.out.print(f.getChromosome()); | |
System.out.print("\t"); | |
System.out.print(f.getStartBase()); | |
System.out.print("\t"); | |
System.out.print(f.getEndBase()); | |
System.out.print("\t"); | |
System.out.print(f.getRestOfFields()); | |
System.out.println(); | |
} | |
} | |
//WIG index | |
else if(bbFileHdr.isBigWig()) | |
{ | |
BigWigIterator iter=null; | |
if(chrom!=null)//region was specified | |
{ | |
// iterator for BigWig values which occupy the specified startChromosome region. | |
iter=reader.getBigWigIterator(chrom,start,chrom,end,contained); | |
} | |
else | |
{ | |
// iterator for BigWig values | |
iter=reader.getBigWigIterator(); | |
} | |
//loop over iterator | |
while(iter.hasNext()) | |
{ | |
WigItem f=iter.next(); | |
System.out.print(f.getChromosome()); | |
System.out.print("\t"); | |
System.out.print(f.getStartBase()); | |
System.out.print("\t"); | |
System.out.print(f.getEndBase()); | |
System.out.print("\t"); | |
System.out.print(f.getWigValue()); | |
System.out.println(); | |
} | |
} | |
} | |
} |
Compile
$javac -cp /path/to/bigwig-read-only/dist/BigWig.jar:. BigCat.java
Test
List the data in a BigBed file:java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat /path/to/bigwig-read-only/test/data/chr21.bb | head
chr21 9434178 9434609
chr21 9434178 9434609
chr21 9508110 9508214
chr21 9516607 9516987
chr21 9903013 9903230
chr21 9903013 9903230
chr21 9905661 9906613
chr21 9907217 9907519
chr21 9907241 9907415
chr21 9907597 9908258
List the data in a BigBed file for the region: 'chr21:9906000-9908000', allow the overlaps.
$ java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat -p chr21:9906000-9908000 /path/to/bigwig-read-only/test/data/chr21.bb | head
chr21 9905661 9906613
chr21 9907217 9907519
chr21 9907241 9907415
chr21 9907597 9908258
List the data in a BigBed file for the region: 'chr21:9906000-9908000', do not allow the overlaps.
$ java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat -p chr21:9906000-9908000 -c /path/to/bigwig-read-only/test/data/chr21.bb | head
chr21 9907217 9907519
chr21 9907241 9907415
List the data in a BigWig file:
$ java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat /path/to/bigwig-read-only/test/data/wigVarStepExample.bw | head
chr21 9411190 9411195 50.0
chr21 9411195 9411200 40.0
chr21 9411200 9411205 60.0
chr21 9411205 9411210 20.0
chr21 9411210 9411215 20.0
chr21 9411215 9411220 20.0
chr21 9411220 9411225 40.0
chr21 9411225 9411230 60.0
chr21 9411230 9411235 40.0
chr21 9411235 9411240 40.0
List the data in a BigWig file for the region: 'chr21:9906000-9908000'
$ java -cp /path/to/bigwig-read-only/dist/BigWig.jar:/path/to/bigwig-read-only/lib/log4j-1.2.15.jar:. BigCat -p chr21:9906000-9908000 /path/to/bigwig-read-only/test/data/wigVarStepExample.bw | head
chr21 9906000 9906005 20.0
chr21 9906005 9906010 60.0
chr21 9906010 9906015 60.0
chr21 9906015 9906020 60.0
chr21 9906020 9906025 80.0
chr21 9906025 9906030 60.0
chr21 9906030 9906035 40.0
chr21 9906035 9906040 80.0
chr21 9906040 9906045 80.0
chr21 9906045 9906050 80.0
See also
That's it,
Pierre