RDF/Jena: a simple extension for XSLT/XALAN. Testing with NCBI-Gene
In a previous post, I've shown that the XALAN XSLT engine can be extended with custom function returning a DOM Document that will be used by the xslt-stylesheet. Here, I'll create an extension for XALAN getting some RDF statements from a Jena/RDF model. The RDF model will be loaded in memory but one can imagine to use a persistent model ( TDB or SDB). I'll download a record from NCBI-gene, transform it to html and use the disease-ontology database as RDF to annotate it.
A Gene record is downloaded as XML from NCBI gene:
curl "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=4853&retmode=xml" > notch2.htmlThe disease ontology is downloaded as RDF/XML:
curl -odoid.owl "http://www.berkeleybop.org/ontologies/doid.owl"
The XSLT Stylesheet
The stylesheet declares the extension jena, loads the RDF model ("$model"), searches for the OMIM identifiers in the Gene record and loads the RDF statements related to that OMIM-ID.For example the following xpath expression:
jena:query( $model, $doiid, 'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym', '' )returns a rdf/XML document containing the RDF statements having a subject=$doiid, a property "http://www.geneontology.org/formats/oboInOwl#hasExactSynonym" and any object.
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Statement> <rdf:subject rdf:resource="http://purl.obolibrary.org/obo/DOID_0050721"/> <rdf:predicate rdf:resource="http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"/> <rdf:object>Phosphoserine phosphatase deficiency</rdf:object> </rdf:Statement> </rdf:RDF>The stylesheet:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="ISO-8859-1"?> | |
<xsl:stylesheet | |
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:jena="xalan://jena4xalan.Jena4Xalan" | |
version="1.0" | |
extension-element-prefixes="jena" | |
> | |
<xsl:output method="html"/> | |
<!-- the path to the RDF file --> | |
<xsl:param name="doid.path">doid.owl</xsl:param> | |
<!-- create a new instance of Jena4Xalan and load the RDF dataset --> | |
<xsl:variable name="model" select="jena:new($doid.path)"/> | |
<!-- main template --> | |
<xsl:template match="/"> | |
<html> | |
<head> | |
<style type="text/css"> | |
dt { font-weight: bold; color: blue;} | |
</style> | |
</head> | |
<body> | |
<xsl:apply-templates select="Entrezgene-Set"/> | |
</body> | |
</html> | |
</xsl:template> | |
<!--template matching Entrezgene-Set --> | |
<xsl:template match="Entrezgene-Set"> | |
<div> | |
<xsl:apply-templates select="Entrezgene"/> | |
</div> | |
</xsl:template> | |
<!--template matching Entrezgene --> | |
<xsl:template match="Entrezgene"> | |
<div> | |
<!-- name of this entry --> | |
<h1> | |
<xsl:value-of select="Entrezgene_gene/Gene-ref/Gene-ref_locus"/> | |
</h1> | |
<!-- OMIM-ID --> | |
<xsl:apply-templates select=".//Gene-commentary[Gene-commentary_heading='OMIM']"/> | |
</div> | |
</xsl:template> | |
<!--template matching Gene-commentary for OMIM --> | |
<xsl:template match="Gene-commentary"> | |
<xsl:for-each select="Gene-commentary_source/Other-source/Other-source_src/Dbtag/Dbtag_tag/Object-id/Object-id_id"> | |
<div> | |
<!-- print URL to omim --> | |
<h3>Omim ID <a><xsl:attribute name="href"><xsl:value-of select="concat('http://omim.org/entry/',.)"/></xsl:attribute><xsl:value-of select="."/></a></h3> | |
<!-- set the variable 'stmts' with the RDF statements having an object='http://omim.org/entry/xxx' --> | |
<xsl:variable name="stmts" select="jena:query($model,'','',concat('http://omim.org/entry/',.))"/> | |
<p> | |
<!-- loop over the rdf:Statement in $stmts having a dbXref=this OMIM-ID --> | |
<xsl:for-each select="jena:query($model,'','http://www.geneontology.org/formats/oboInOwl#hasDbXref',concat('OMIM:',.))/rdf:Statement"> | |
<!-- the Subject of this rdf:statement is a DOID resource --> | |
<xsl:variable name="doiid" select="rdf:subject/@rdf:resource"/> | |
<xsl:if test="starts-with($doiid,'http://purl.obolibrary.org/obo/DOID_')"> | |
<!-- recursive call --> | |
<xsl:call-template name="disease"> | |
<xsl:with-param name="doiid" select="$doiid"/> | |
</xsl:call-template> | |
</xsl:if> | |
</xsl:for-each> | |
</p> | |
</div> | |
</xsl:for-each> | |
</xsl:template> | |
<!-- method 'disease' --> | |
<xsl:template name="disease"> | |
<!-- param doid: a DOID URI --> | |
<xsl:param name="doiid"/> | |
<!-- get the rdfs:label --> | |
<xsl:variable name="label" select="jena:query($model,$doiid,'http://www.w3.org/2000/01/rdf-schema#label','')/rdf:Statement[1]/rdf:object"/> | |
<!-- get the hasExactSynonym --> | |
<xsl:variable name="synonym" select="jena:query($model,$doiid,'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym','')/rdf:Statement[1]/rdf:object"/> | |
<!-- get the rdfs:subclasses --> | |
<xsl:variable name="sublass" select="jena:query($model,$doiid,'http://www.w3.org/2000/01/rdf-schema#subClassOf','')/rdf:Statement/rdf:object"/> | |
<div style="margin:5px;padding:5px;border: 1px solid black;"> | |
<dl> | |
<!-- print the label--> | |
<xsl:if test="$label"> | |
<dt>Label</dt> | |
<dd> | |
<xsl:value-of select="$label"/> | |
</dd> | |
</xsl:if> | |
<!-- print the synonym --> | |
<xsl:if test="$synonym"> | |
<dt>Synonym</dt> | |
<dd> | |
<xsl:value-of select="$synonym"/> | |
</dd> | |
</xsl:if> | |
<!-- print the subclasses, recursive calls --> | |
<xsl:if test="$sublass"> | |
<dt>Sub-Class Of</dt> | |
<dd> | |
<xsl:for-each select="$sublass"> | |
<xsl:call-template name="disease"> | |
<xsl:with-param name="doiid" select="@rdf:resource"/> | |
</xsl:call-template> | |
</xsl:for-each> | |
</dd> | |
</xsl:if> | |
</dl> | |
</div> | |
</xsl:template> | |
</xsl:stylesheet> |
The Java code
This is the java extension: the constructor loads the RDF model in memory. The function query(..) returns a RDF/XML document matching the query.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jena4xalan; | |
import com.hp.hpl.jena.rdf.model.*; | |
import com.hp.hpl.jena.util.*; | |
import org.apache.xml.dtm.*; | |
import org.apache.xml.dtm.ref.*; | |
import org.apache.xalan.extensions.*; | |
import javax.xml.parsers.*; | |
import org.w3c.dom.*; | |
import com.hp.hpl.jena.vocabulary.*; | |
/** | |
* RDF extension for XALAN | |
*/ | |
public class Jena4Xalan | |
{ | |
/** Jena RDF model */ | |
private Model model; | |
/** constructor, param: path to RDF */ | |
public Jena4Xalan(String rdfFile) throws Exception | |
{ | |
this.model=FileManager.get().loadModel(rdfFile); | |
} | |
/* returns a RDF Document containing | |
the rdf:Statements for the subject/property/object | |
empty strings are 'any' | |
object startings with 'http://' are Resources | |
*/ | |
public Element query(ExpressionContext exprContext, String subject,String property,String object) | |
{ | |
/* create a DOM builder */ | |
Document myDoc=null; | |
try | |
{ | |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); | |
DocumentBuilder db = dbf.newDocumentBuilder(); | |
myDoc = db.newDocument(); | |
} | |
catch(ParserConfigurationException pce) | |
{ | |
throw new org.apache.xml.utils.WrappedRuntimeException(pce); | |
} | |
Element root= myDoc.createElementNS(RDF.getURI(),"rdf:RDF"); | |
root.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:rdf",RDF.getURI()); | |
/* get an iterator of the statements */ | |
StmtIterator iter=model.listStatements( | |
isEmpty(subject)?null:ResourceFactory.createResource(subject), | |
isEmpty(property)?null:ResourceFactory.createProperty(property), | |
isEmpty(object)?null: (isURI(object)?ResourceFactory.createResource(object):model.createLiteral(object)) | |
); | |
/* loop over the statements */ | |
while(iter.hasNext()) | |
{ | |
Statement stmt= iter.nextStatement(); | |
/* create a new rdf:Statement and append to the element rdf:RDF */ | |
Element S=myDoc.createElementNS(RDF.getURI(),"rdf:Statement"); | |
root.appendChild(S); | |
/* create subject */ | |
Element f=myDoc.createElementNS(RDF.getURI(),"rdf:subject"); | |
S.appendChild(f); | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getSubject().getURI()); | |
/* create predicate */ | |
f=myDoc.createElementNS(RDF.getURI(),"rdf:predicate"); | |
S.appendChild(f); | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getPredicate().getURI()); | |
/* create object */ | |
f=myDoc.createElementNS(RDF.getURI(),"rdf:object"); | |
S.appendChild(f); | |
if(stmt.getObject().isLiteral()) | |
{ | |
f.appendChild(myDoc.createTextNode(""+stmt.getLiteral().getValue())); | |
} | |
else | |
{ | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getResource().getURI()); | |
} | |
} | |
iter.close(); | |
/* return the document element */ | |
return root; | |
} | |
private static boolean isEmpty(String s) | |
{ | |
return s==null || s.isEmpty(); | |
} | |
private static boolean isURI(String s) | |
{ | |
return s.startsWith("http://"); | |
} | |
} |
Makefile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
include config.mk | |
.PHONY: transform | |
transform: result.html | |
result.html : notch2.xml doid.owl gene2html.xsl lib/jena4xalan.jar | |
java -cp ${xalan.libs}:${jena.libs}:lib/jena4xalan.jar org.apache.xalan.xslt.Process \ | |
-IN notch2.xml \ | |
-XSL gene2html.xsl -EDUMP -OUT $@ | |
lib/jena4xalan.jar: src/jena4xalan/Jena4Xalan.java | |
mkdir -p tmp | |
mkdir -p $(dir $@) | |
javac -d tmp -cp ${xalan.libs}:${jena.libs} -sourcepath src $< | |
jar cvf $@ -C tmp . | |
notch2.xml: | |
curl "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=4853&retmode=xml" |\ | |
sed 's/<!DOCTYPE [^>]*>//' > $@ | |
doid.owl : | |
curl -o$@ "http://www.berkeleybop.org/ontologies/doid.owl" |
config.mk:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xalan.dir=/home/lindenb/package/exist/lib/endorsed | |
xalan.libs=${xalan.dir}/xalan-2.7.1.jar:${xalan.dir}/serializer-2.9.1.jar:${xalan.dir}/xercesImpl-2.9.1.jar | |
ivy.cache=/home/lindenb/.ivy2/cache | |
jena.libs=${ivy.cache}/org.apache.jena/jena-core/jars/jena-core-2.7.3.jar:${ivy.cache}/org.apache.jena/jena-iri/jars/jena-iri-0.9.3.jar:${ivy.cache}/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.6.4.jar:${ivy.cache}/org.slf4j/slf4j-api/jars/slf4j-api-1.6.4.jar:${ivy.cache}/log4j/log4j/bundles/log4j-1.2.16.jar |
Result
java -cp ${class.path} org.apache.xalan.xslt.Process \ -IN notch2.xml \ -XSL gene2html.xsl -EDUMP -OUT result.html
NOTCH2
Omim ID 610205
- Label
- Alagille syndrome
- Synonym
- Arteriohepatic dysplasia (disorder)
- Sub-Class Of
-
- Label
- gastrointestinal system disease
- Synonym
- gastrointestinal disease
- Sub-Class Of
-
- Label
- disease of anatomical entity
- Sub-Class Of
-
- Label
- disease
Omim ID 102500
- Label
- Hajdu-Cheney syndrome
- Synonym
- Hajdu-Cheney syndrome (disorder)
- Sub-Class Of
-
- Label
- autosomal dominant disease
- Sub-Class Of
-
- Label
- autosomal genetic disease
- Sub-Class Of
-
- Label
- monogenic disease
- Sub-Class Of
-
- Label
- genetic disease
- Sub-Class Of
-
- Label
- disease
That's it,
Pierre