RDF/Jena: a simple extension for XSLT/XALAN. Testing with NCBI-Gene
In a previous post, I've shown that the XALAN XSLT engine can be extended with custom function returning a DOM Document that will be used by the xslt-stylesheet. Here, I'll create an extension for XALAN getting some RDF statements from a Jena/RDF model. The RDF model will be loaded in memory but one can imagine to use a persistent model ( TDB or SDB). I'll download a record from NCBI-gene, transform it to html and use the disease-ontology database as RDF to annotate it.
A Gene record is downloaded as XML from NCBI gene:
curl "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=4853&retmode=xml" > notch2.htmlThe disease ontology is downloaded as RDF/XML:
curl -odoid.owl "http://www.berkeleybop.org/ontologies/doid.owl"
The XSLT Stylesheet
The stylesheet declares the extension jena, loads the RDF model ("$model"), searches for the OMIM identifiers in the Gene record and loads the RDF statements related to that OMIM-ID.For example the following xpath expression:
jena:query( $model, $doiid, 'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym', '' )returns a rdf/XML document containing the RDF statements having a subject=$doiid, a property "http://www.geneontology.org/formats/oboInOwl#hasExactSynonym" and any object.
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Statement> <rdf:subject rdf:resource="http://purl.obolibrary.org/obo/DOID_0050721"/> <rdf:predicate rdf:resource="http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"/> <rdf:object>Phosphoserine phosphatase deficiency</rdf:object> </rdf:Statement> </rdf:RDF>The stylesheet:
<?xml version="1.0" encoding="ISO-8859-1"?> | |
<xsl:stylesheet | |
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:jena="xalan://jena4xalan.Jena4Xalan" | |
version="1.0" | |
extension-element-prefixes="jena" | |
> | |
<xsl:output method="html"/> | |
<!-- the path to the RDF file --> | |
<xsl:param name="doid.path">doid.owl</xsl:param> | |
<!-- create a new instance of Jena4Xalan and load the RDF dataset --> | |
<xsl:variable name="model" select="jena:new($doid.path)"/> | |
<!-- main template --> | |
<xsl:template match="/"> | |
<html> | |
<head> | |
<style type="text/css"> | |
dt { font-weight: bold; color: blue;} | |
</style> | |
</head> | |
<body> | |
<xsl:apply-templates select="Entrezgene-Set"/> | |
</body> | |
</html> | |
</xsl:template> | |
<!--template matching Entrezgene-Set --> | |
<xsl:template match="Entrezgene-Set"> | |
<div> | |
<xsl:apply-templates select="Entrezgene"/> | |
</div> | |
</xsl:template> | |
<!--template matching Entrezgene --> | |
<xsl:template match="Entrezgene"> | |
<div> | |
<!-- name of this entry --> | |
<h1> | |
<xsl:value-of select="Entrezgene_gene/Gene-ref/Gene-ref_locus"/> | |
</h1> | |
<!-- OMIM-ID --> | |
<xsl:apply-templates select=".//Gene-commentary[Gene-commentary_heading='OMIM']"/> | |
</div> | |
</xsl:template> | |
<!--template matching Gene-commentary for OMIM --> | |
<xsl:template match="Gene-commentary"> | |
<xsl:for-each select="Gene-commentary_source/Other-source/Other-source_src/Dbtag/Dbtag_tag/Object-id/Object-id_id"> | |
<div> | |
<!-- print URL to omim --> | |
<h3>Omim ID <a><xsl:attribute name="href"><xsl:value-of select="concat('http://omim.org/entry/',.)"/></xsl:attribute><xsl:value-of select="."/></a></h3> | |
<!-- set the variable 'stmts' with the RDF statements having an object='http://omim.org/entry/xxx' --> | |
<xsl:variable name="stmts" select="jena:query($model,'','',concat('http://omim.org/entry/',.))"/> | |
<p> | |
<!-- loop over the rdf:Statement in $stmts having a dbXref=this OMIM-ID --> | |
<xsl:for-each select="jena:query($model,'','http://www.geneontology.org/formats/oboInOwl#hasDbXref',concat('OMIM:',.))/rdf:Statement"> | |
<!-- the Subject of this rdf:statement is a DOID resource --> | |
<xsl:variable name="doiid" select="rdf:subject/@rdf:resource"/> | |
<xsl:if test="starts-with($doiid,'http://purl.obolibrary.org/obo/DOID_')"> | |
<!-- recursive call --> | |
<xsl:call-template name="disease"> | |
<xsl:with-param name="doiid" select="$doiid"/> | |
</xsl:call-template> | |
</xsl:if> | |
</xsl:for-each> | |
</p> | |
</div> | |
</xsl:for-each> | |
</xsl:template> | |
<!-- method 'disease' --> | |
<xsl:template name="disease"> | |
<!-- param doid: a DOID URI --> | |
<xsl:param name="doiid"/> | |
<!-- get the rdfs:label --> | |
<xsl:variable name="label" select="jena:query($model,$doiid,'http://www.w3.org/2000/01/rdf-schema#label','')/rdf:Statement[1]/rdf:object"/> | |
<!-- get the hasExactSynonym --> | |
<xsl:variable name="synonym" select="jena:query($model,$doiid,'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym','')/rdf:Statement[1]/rdf:object"/> | |
<!-- get the rdfs:subclasses --> | |
<xsl:variable name="sublass" select="jena:query($model,$doiid,'http://www.w3.org/2000/01/rdf-schema#subClassOf','')/rdf:Statement/rdf:object"/> | |
<div style="margin:5px;padding:5px;border: 1px solid black;"> | |
<dl> | |
<!-- print the label--> | |
<xsl:if test="$label"> | |
<dt>Label</dt> | |
<dd> | |
<xsl:value-of select="$label"/> | |
</dd> | |
</xsl:if> | |
<!-- print the synonym --> | |
<xsl:if test="$synonym"> | |
<dt>Synonym</dt> | |
<dd> | |
<xsl:value-of select="$synonym"/> | |
</dd> | |
</xsl:if> | |
<!-- print the subclasses, recursive calls --> | |
<xsl:if test="$sublass"> | |
<dt>Sub-Class Of</dt> | |
<dd> | |
<xsl:for-each select="$sublass"> | |
<xsl:call-template name="disease"> | |
<xsl:with-param name="doiid" select="@rdf:resource"/> | |
</xsl:call-template> | |
</xsl:for-each> | |
</dd> | |
</xsl:if> | |
</dl> | |
</div> | |
</xsl:template> | |
</xsl:stylesheet> |
The Java code
This is the java extension: the constructor loads the RDF model in memory. The function query(..) returns a RDF/XML document matching the query.package jena4xalan; | |
import com.hp.hpl.jena.rdf.model.*; | |
import com.hp.hpl.jena.util.*; | |
import org.apache.xml.dtm.*; | |
import org.apache.xml.dtm.ref.*; | |
import org.apache.xalan.extensions.*; | |
import javax.xml.parsers.*; | |
import org.w3c.dom.*; | |
import com.hp.hpl.jena.vocabulary.*; | |
/** | |
* RDF extension for XALAN | |
*/ | |
public class Jena4Xalan | |
{ | |
/** Jena RDF model */ | |
private Model model; | |
/** constructor, param: path to RDF */ | |
public Jena4Xalan(String rdfFile) throws Exception | |
{ | |
this.model=FileManager.get().loadModel(rdfFile); | |
} | |
/* returns a RDF Document containing | |
the rdf:Statements for the subject/property/object | |
empty strings are 'any' | |
object startings with 'http://' are Resources | |
*/ | |
public Element query(ExpressionContext exprContext, String subject,String property,String object) | |
{ | |
/* create a DOM builder */ | |
Document myDoc=null; | |
try | |
{ | |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); | |
DocumentBuilder db = dbf.newDocumentBuilder(); | |
myDoc = db.newDocument(); | |
} | |
catch(ParserConfigurationException pce) | |
{ | |
throw new org.apache.xml.utils.WrappedRuntimeException(pce); | |
} | |
Element root= myDoc.createElementNS(RDF.getURI(),"rdf:RDF"); | |
root.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:rdf",RDF.getURI()); | |
/* get an iterator of the statements */ | |
StmtIterator iter=model.listStatements( | |
isEmpty(subject)?null:ResourceFactory.createResource(subject), | |
isEmpty(property)?null:ResourceFactory.createProperty(property), | |
isEmpty(object)?null: (isURI(object)?ResourceFactory.createResource(object):model.createLiteral(object)) | |
); | |
/* loop over the statements */ | |
while(iter.hasNext()) | |
{ | |
Statement stmt= iter.nextStatement(); | |
/* create a new rdf:Statement and append to the element rdf:RDF */ | |
Element S=myDoc.createElementNS(RDF.getURI(),"rdf:Statement"); | |
root.appendChild(S); | |
/* create subject */ | |
Element f=myDoc.createElementNS(RDF.getURI(),"rdf:subject"); | |
S.appendChild(f); | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getSubject().getURI()); | |
/* create predicate */ | |
f=myDoc.createElementNS(RDF.getURI(),"rdf:predicate"); | |
S.appendChild(f); | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getPredicate().getURI()); | |
/* create object */ | |
f=myDoc.createElementNS(RDF.getURI(),"rdf:object"); | |
S.appendChild(f); | |
if(stmt.getObject().isLiteral()) | |
{ | |
f.appendChild(myDoc.createTextNode(""+stmt.getLiteral().getValue())); | |
} | |
else | |
{ | |
f.setAttributeNS(RDF.getURI(),"rdf:resource",stmt.getResource().getURI()); | |
} | |
} | |
iter.close(); | |
/* return the document element */ | |
return root; | |
} | |
private static boolean isEmpty(String s) | |
{ | |
return s==null || s.isEmpty(); | |
} | |
private static boolean isURI(String s) | |
{ | |
return s.startsWith("http://"); | |
} | |
} |
Makefile
include config.mk | |
.PHONY: transform | |
transform: result.html | |
result.html : notch2.xml doid.owl gene2html.xsl lib/jena4xalan.jar | |
java -cp ${xalan.libs}:${jena.libs}:lib/jena4xalan.jar org.apache.xalan.xslt.Process \ | |
-IN notch2.xml \ | |
-XSL gene2html.xsl -EDUMP -OUT $@ | |
lib/jena4xalan.jar: src/jena4xalan/Jena4Xalan.java | |
mkdir -p tmp | |
mkdir -p $(dir $@) | |
javac -d tmp -cp ${xalan.libs}:${jena.libs} -sourcepath src $< | |
jar cvf $@ -C tmp . | |
notch2.xml: | |
curl "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=4853&retmode=xml" |\ | |
sed 's/<!DOCTYPE [^>]*>//' > $@ | |
doid.owl : | |
curl -o$@ "http://www.berkeleybop.org/ontologies/doid.owl" |
config.mk:
xalan.dir=/home/lindenb/package/exist/lib/endorsed | |
xalan.libs=${xalan.dir}/xalan-2.7.1.jar:${xalan.dir}/serializer-2.9.1.jar:${xalan.dir}/xercesImpl-2.9.1.jar | |
ivy.cache=/home/lindenb/.ivy2/cache | |
jena.libs=${ivy.cache}/org.apache.jena/jena-core/jars/jena-core-2.7.3.jar:${ivy.cache}/org.apache.jena/jena-iri/jars/jena-iri-0.9.3.jar:${ivy.cache}/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.6.4.jar:${ivy.cache}/org.slf4j/slf4j-api/jars/slf4j-api-1.6.4.jar:${ivy.cache}/log4j/log4j/bundles/log4j-1.2.16.jar |
Result
java -cp ${class.path} org.apache.xalan.xslt.Process \ -IN notch2.xml \ -XSL gene2html.xsl -EDUMP -OUT result.html
NOTCH2
Omim ID 610205
- Label
- Alagille syndrome
- Synonym
- Arteriohepatic dysplasia (disorder)
- Sub-Class Of
-
- Label
- gastrointestinal system disease
- Synonym
- gastrointestinal disease
- Sub-Class Of
-
- Label
- disease of anatomical entity
- Sub-Class Of
-
- Label
- disease
Omim ID 102500
- Label
- Hajdu-Cheney syndrome
- Synonym
- Hajdu-Cheney syndrome (disorder)
- Sub-Class Of
-
- Label
- autosomal dominant disease
- Sub-Class Of
-
- Label
- autosomal genetic disease
- Sub-Class Of
-
- Label
- monogenic disease
- Sub-Class Of
-
- Label
- genetic disease
- Sub-Class Of
-
- Label
- disease
That's it,
Pierre