Drawing a SVG timeline with the http://data.bnf.fr data
The National French Library has recently started to release
its data as RDF/XML. Here, I've played with the biographies of the famous French writers to create a simple timeline.
Unfortunately, this timeline is too large to be displayed here. :-)
However, here is the java code I used to generate this map as a SVG document:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: | |
* Pierre Lindenbaum PhD | |
* Date: | |
* July-2011 | |
* Contact: | |
* plindenbaum@yahoo.fr | |
* Reference: | |
* | |
* WWW: | |
* http://plindenbaum.blogspot.com | |
* Motivation: | |
* timeline from http://data.bnf.fr | |
* | |
*/ | |
import java.awt.Dimension; | |
import java.awt.image.BufferedImage; | |
import java.io.File; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.StringReader; | |
import java.net.URL; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Comparator; | |
import java.util.HashMap; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.logging.Logger; | |
import javax.imageio.ImageIO; | |
import javax.imageio.ImageReader; | |
import javax.imageio.stream.ImageInputStream; | |
import javax.xml.XMLConstants; | |
import javax.xml.namespace.NamespaceContext; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.stream.XMLOutputFactory; | |
import javax.xml.stream.XMLStreamException; | |
import javax.xml.stream.XMLStreamWriter; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathConstants; | |
import javax.xml.xpath.XPathExpression; | |
import javax.xml.xpath.XPathFactory; | |
import org.w3c.dom.Attr; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.Element; | |
import org.w3c.dom.NodeList; | |
import org.w3c.tidy.Tidy; | |
import org.xml.sax.EntityResolver; | |
import org.xml.sax.InputSource; | |
import org.xml.sax.SAXException; | |
public class BNFTimeline | |
{ | |
static final int ICON_SIZE=64; | |
static final int AUTHOR_HEIGHT=ICON_SIZE+12; | |
static final int MARGIN=5; | |
private static final String SVG="http://www.w3.org/2000/svg"; | |
private static final String CubicWeb="http://www.logilab.org/2008/cubicweb"; | |
private static final String HTML="http://www.w3.org/1999/xhtml"; | |
private static Logger LOG=Logger.getLogger(BNFTimeline.class.getName()); | |
private DocumentBuilder docBuilder=null; | |
private XPath xpath=null; | |
private Map<String, String> prefix2uri=new HashMap<String, String>(); | |
private Double minDays=null; | |
private Double maxDays=null; | |
private static class Date implements Comparable<Date> | |
{ | |
String literal; | |
int year; | |
Integer month; | |
Integer day; | |
@Override | |
public int compareTo(Date o) | |
{ | |
double d= days()-o.days(); | |
if(d!=0.0) return d<0?-1:1; | |
return 0; | |
} | |
public double days() | |
{ | |
double v= year*365.25; | |
if(month!=null) | |
{ | |
v+= (365.25/12.0)*month; | |
if(day!=null) | |
{ | |
v+=day; | |
} | |
} | |
return v; | |
} | |
} | |
private class Author | |
{ | |
String url; | |
String name; | |
String birthPlace; | |
Date birthDate; | |
String deathPlace; | |
Date deathDate; | |
String gender; | |
String shortBio; | |
String depiction; | |
Dimension iconSize; | |
int y; | |
public double x1() | |
{ | |
return convertDate2Pixel(birthDate); | |
} | |
public double x2() | |
{ | |
return convertDate2Pixel(deathDate); | |
} | |
void writeXML(XMLStreamWriter w) throws XMLStreamException | |
{ | |
w.writeStartElement("a"); | |
w.writeAttribute("xlink:href", this.url); | |
w.writeAttribute("xlink:target","_blank"); | |
w.writeStartElement("g"); | |
w.writeAttribute("title",String.valueOf(name)); | |
w.writeAttribute("transform", "translate("+x1()+","+(MARGIN+y*(AUTHOR_HEIGHT+MARGIN))+")"); | |
w.writeStartElement("rect"); | |
w.writeAttribute("style", "fill:black;stroke:white;"); | |
w.writeAttribute("height", String.valueOf(AUTHOR_HEIGHT)); | |
w.writeAttribute("width", String.valueOf(x2()-x1())); | |
w.writeEndElement();//rect | |
double textLength=(x2()-x1())-(MARGIN/2); | |
int shift=MARGIN; | |
if(this.iconSize!=null) | |
{ | |
w.writeEmptyElement("image"); | |
w.writeAttribute("x", String.valueOf(MARGIN+(ICON_SIZE-this.iconSize.width)/2)); | |
w.writeAttribute("y", String.valueOf(MARGIN+(ICON_SIZE-this.iconSize.height)/2)); | |
w.writeAttribute("width", String.valueOf(this.iconSize.width)); | |
w.writeAttribute("height", String.valueOf(this.iconSize.height)); | |
w.writeAttribute("xlink:href",this.depiction); | |
shift+=(ICON_SIZE+MARGIN); | |
textLength-=(ICON_SIZE+MARGIN); | |
} | |
w.writeStartElement("g"); | |
w.writeAttribute("transform", "translate("+shift+",0)"); | |
w.writeAttribute("style", "stroke:white;fill:white;font-size:14pt;font-weight:normal;"); | |
w.writeStartElement("text"); | |
w.writeAttribute("x", "0"); | |
w.writeAttribute("y", "18"); | |
w.writeCharacters(this.name+" ("+birthDate.year+" / "+this.deathDate.year+")"); | |
w.writeEndElement(); | |
if(this.shortBio==null) this.shortBio=""; | |
//note: 123 chars/600px | |
// 0.25char/px | |
String biography=shortBio; | |
int posY=40; | |
int maxCharParLine=(int)(textLength*0.2); | |
while(biography.length()>0 && posY+10 < AUTHOR_HEIGHT) | |
{ | |
String s=biography; | |
if(s.length()>maxCharParLine) s=biography.substring(0,maxCharParLine); | |
w.writeStartElement("text"); | |
w.writeAttribute("style", "font-size:50%;"); | |
w.writeAttribute("x", "0"); | |
w.writeAttribute("y", String.valueOf(posY)); | |
w.writeCharacters(s); | |
w.writeEndElement(); | |
posY+=11; | |
biography=biography.substring(s.length()); | |
} | |
w.writeEndElement();//g | |
w.writeEndElement();//g | |
w.writeEndElement();//a | |
} | |
} | |
private BNFTimeline() throws Exception | |
{ | |
DocumentBuilderFactory f=DocumentBuilderFactory.newInstance(); | |
f.setCoalescing(true); | |
f.setNamespaceAware(true); | |
f.setValidating(false); | |
f.setExpandEntityReferences(true); | |
f.setIgnoringComments(false); | |
f.setIgnoringElementContentWhitespace(true); | |
this.docBuilder=f.newDocumentBuilder(); | |
this.docBuilder.setEntityResolver(new EntityResolver() | |
{ | |
@Override | |
public InputSource resolveEntity(String publicId, String systemId) | |
throws SAXException, IOException | |
{ | |
LOG.info("resolve "+publicId+" "+systemId); | |
return new InputSource(new StringReader("")); | |
} | |
}); | |
this.prefix2uri.put("h", HTML); | |
this.prefix2uri.put("cubicweb", CubicWeb); | |
this.prefix2uri.put(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI); | |
this.prefix2uri.put(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI); | |
this.prefix2uri.put("dc","http://purl.org/dc/terms/"); | |
this.prefix2uri.put("owl","http://www.w3.org/2002/07/owl#"); | |
this.prefix2uri.put("foaf","http://xmlns.com/foaf/0.1/"); | |
this.prefix2uri.put("rdagroup2elements","http://RDVocab.info/ElementsGr2/"); | |
this.prefix2uri.put("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#"); | |
this.prefix2uri.put("skos","http://www.w3.org/2004/02/skos/core#"); | |
this.prefix2uri.put("xfoaf","http://www.foafrealm.org/xfoaf/0.1/"); | |
XPathFactory xpathFactory=XPathFactory.newInstance(); | |
this.xpath=xpathFactory.newXPath(); | |
this.xpath.setNamespaceContext(new NamespaceContext() | |
{ | |
@SuppressWarnings({ "rawtypes", "unchecked" }) | |
@Override | |
public Iterator getPrefixes(String namespaceURI) | |
{ | |
return prefix2uri.keySet().iterator(); | |
} | |
@Override | |
public String getPrefix(String ns) | |
{ | |
for(String k:prefix2uri.keySet()) | |
{ | |
if(prefix2uri.get(k).equals(ns)) return k; | |
} | |
return null; | |
} | |
@Override | |
public String getNamespaceURI(String prefix) | |
{ | |
String u=prefix2uri.get(prefix); | |
return (u!=null?u:XMLConstants.NULL_NS_URI); | |
} | |
}); | |
} | |
private int getScreenWidthInPixel() | |
{ | |
return 15000; | |
} | |
private double convertDate2Pixel(Date d) | |
{ | |
return getScreenWidthInPixel()*((d.days()-minDays)/((double)this.maxDays-(double)this.minDays)); | |
} | |
private void parse() throws Exception | |
{ | |
Tidy tidy = new Tidy(); | |
tidy.setXHTML(true); | |
File xmlFile=File.createTempFile("_tmp", ".xml"); | |
xmlFile.deleteOnExit(); | |
final String prefix="http://data.bnf.fr/"; | |
int pageIndex=1; | |
XPathExpression expr=this.xpath.compile(".//h:li/h:a[@href]"); | |
List<Author> authors=new ArrayList<Author>(); | |
//scan each index | |
for(;;) | |
{ | |
boolean found=false; | |
URL url=new URL("http://data.bnf.fr/liste-auteurs/page"+pageIndex); | |
LOG.info(url.toString()); | |
FileOutputStream fout=new FileOutputStream(xmlFile); | |
InputStream in=url.openStream(); | |
tidy.parse(in,fout); | |
in.close(); | |
fout.flush(); | |
fout.close(); | |
Document dom=this.docBuilder.parse(xmlFile); | |
NodeList L=(NodeList)expr.evaluate(dom, XPathConstants.NODESET); | |
for(int i=0;i< L.getLength();++i) | |
{ | |
String href=Element.class.cast(L.item(i)).getAttribute("href"); | |
if(!href.startsWith(prefix)) continue; | |
if(!href.substring(prefix.length()).matches("[0-9]+/[a-z\\-A-Z_0-9]+/")) | |
{ | |
LOG.info("ignoring "+href); | |
continue; | |
} | |
Author author=new Author(); | |
author.url=href; | |
authors.add(author); | |
found=true; | |
} | |
in.close(); | |
if(!found) break; | |
++pageIndex; | |
} | |
xmlFile.delete(); | |
int index=0; | |
while(index< authors.size()) | |
{ | |
Author author=authors.get(index); | |
LOG.info(author.url+"rdf.xml"); | |
Document dom=this.docBuilder.parse(author.url+"rdf.xml"); | |
Element root=(Element)xpath.evaluate("rdf:RDF/rdf:Description[rdf:type/@rdf:resource='http://xmlns.com/foaf/0.1/Person']",dom,XPathConstants.NODE); | |
if(root==null) | |
{ | |
authors.remove(index); | |
continue; //e.g. "Academie Fr" | |
} | |
author.name=(String)xpath.evaluate("dc:title[1]", root,XPathConstants.STRING); | |
author.birthDate= parseDate((String)xpath.evaluate("rdagroup2elements:dateOfBirth", root,XPathConstants.STRING)); | |
author.birthPlace = (String)xpath.evaluate("rdagroup2elements:placeOfBirth", root,XPathConstants.STRING); | |
author.deathDate = parseDate((String)xpath.evaluate("rdagroup2elements:dateOfDeath", root,XPathConstants.STRING)); | |
author.deathPlace = (String)xpath.evaluate("rdagroup2elements:placeOfDeath", root,XPathConstants.STRING); | |
author.gender = (String)xpath.evaluate("foaf:gender", root,XPathConstants.STRING); | |
author.shortBio = (String)xpath.evaluate("rdagroup2elements:biographicalInformation", root,XPathConstants.STRING); | |
author.depiction=(String)xpath.evaluate("foaf:depiction/@rdf:resource",root,XPathConstants.STRING); | |
if(author.birthDate==null || author.deathDate==null | |
|| author.deathDate.year<1400 || author.birthDate.year<1400)//TODO | |
{ | |
authors.remove(index); | |
continue; | |
} | |
if(author.depiction!=null && !author.depiction.trim().isEmpty()) | |
{ | |
author.iconSize=getDepictionSize(author.depiction); | |
} | |
if(this.minDays==null || author.birthDate.days()<this.minDays) | |
{ | |
this.minDays= author.birthDate.days(); | |
} | |
if(this.maxDays==null || author.deathDate.days()>this.maxDays) | |
{ | |
this.maxDays= author.deathDate.days(); | |
} | |
++index; | |
} | |
this.minDays-=360; | |
this.maxDays+=360; | |
//sort persons on birth-date/death-date | |
Collections.sort(authors, new Comparator<Author>() | |
{ | |
@Override | |
public int compare(Author o1, Author o2) | |
{ | |
int i=o1.birthDate.compareTo(o2.birthDate); | |
if(i!=0) return i; | |
return o1.deathDate.compareTo(o2.deathDate); | |
} | |
}); | |
List<Author> remains=new ArrayList<Author>(authors); | |
int nLine=-1; | |
while(!remains.isEmpty()) | |
{ | |
++nLine; | |
Author first=remains.remove(0); | |
first.y=nLine; | |
while(true) | |
{ | |
Author best=null; | |
int bestIndex=-1; | |
for(int i=0;i< remains.size();++i) | |
{ | |
Author next=remains.get(i); | |
if(next.x1()< first.x2()+5) continue; | |
if(best==null || | |
(next.x1()-first.x2() < best.x1()-first.x2())) | |
{ | |
best=next; | |
bestIndex=i; | |
} | |
} | |
if(best==null) break; | |
first=best; | |
first.y=nLine; | |
remains.remove(bestIndex); | |
} | |
} | |
FileOutputStream fout=new FileOutputStream("output.svg"); | |
XMLOutputFactory xmlfactory= XMLOutputFactory.newInstance(); | |
XMLStreamWriter w= xmlfactory.createXMLStreamWriter(fout,"UTF-8"); | |
w.writeStartDocument("UTF-8","1.0"); | |
w.writeStartElement("svg"); | |
w.writeAttribute("xmlns", SVG); | |
w.writeAttribute("xmlns:xlink","http://www.w3.org/1999/xlink"); | |
w.writeAttribute("version","1.1"); | |
w.writeAttribute("width",String.valueOf(getScreenWidthInPixel())); | |
w.writeAttribute("height",String.valueOf(MARGIN+((nLine+1)*(AUTHOR_HEIGHT+MARGIN)))); | |
w.writeAttribute("style", "fill:none;stroke:black;stroke-width:1px;"); | |
w.writeEmptyElement("rect"); | |
w.writeAttribute("x","0"); | |
w.writeAttribute("y","0"); | |
w.writeAttribute("width",String.valueOf(getScreenWidthInPixel()-1)); | |
w.writeAttribute("height",String.valueOf(MARGIN+((nLine+1)*(AUTHOR_HEIGHT+MARGIN))-1)); | |
w.writeAttribute("style", "fill:lightgray;stroke:black;"); | |
for(Author author:authors) | |
{ | |
author.writeXML(w); | |
} | |
w.writeEndDocument();//svg | |
w.close(); | |
fout.flush(); | |
fout.close(); | |
} | |
private Dimension getDepictionSize(String resourceFile) throws Exception | |
{ | |
BufferedImage img=ImageIO.read(new URL(resourceFile)); | |
Dimension d= new Dimension(img.getWidth(),img.getHeight()); | |
if(d.getWidth()< d.getHeight()) | |
{ | |
double ratio= d.getWidth()/(double)d.getHeight();//<0 | |
int len= (int)(ICON_SIZE*ratio); | |
d.width=len; | |
d.height=ICON_SIZE; | |
} | |
else | |
{ | |
double ratio= d.getHeight()/(double)d.getWidth();//<0 | |
int len= (int)(ICON_SIZE*ratio); | |
d.height=len; | |
d.width=ICON_SIZE; | |
} | |
return d; | |
} | |
private Date parseDate(String s) | |
{ | |
if(s==null || s.isEmpty()) return null; | |
Date d=new Date(); | |
d.literal=s; | |
s=s.trim(); | |
if(s.startsWith("-")) | |
{ | |
return null; | |
} | |
if(s.matches("[0-3][0-9]\\-[0-1][0-9]\\-[0-9][0-9][0-9][0-9]")) | |
{ | |
String tokens[]=s.split("[\\-]"); | |
d.day=Integer.parseInt(tokens[0]); | |
d.month=Integer.parseInt(tokens[1]); | |
d.year=Integer.parseInt(tokens[2]); | |
} | |
else if(s.matches("[0-1][0-9]\\-[0-9][0-9][0-9][0-9]")) | |
{ | |
String tokens[]=s.split("[\\-]"); | |
d.month=Integer.parseInt(tokens[0]); | |
d.year=Integer.parseInt(tokens[1]); | |
} | |
else if(s.matches("[0-9]{1,4}")) | |
{ | |
d.year=Integer.parseInt(s); | |
} | |
else | |
{ | |
return null; | |
} | |
return d; | |
} | |
public static void main(String[] args) { | |
try | |
{ | |
BNFTimeline app=new BNFTimeline(); | |
int optind=0; | |
while(optind< args.length) | |
{ | |
if(args[optind].equals("-h") || | |
args[optind].equals("-help") || | |
args[optind].equals("--help")) | |
{ | |
System.err.println("Options:"); | |
System.err.println(" -h help; This screen."); | |
return; | |
} | |
else if(args[optind].equals("-L")) | |
{ | |
} | |
else if(args[optind].equals("--")) | |
{ | |
optind++; | |
break; | |
} | |
else if(args[optind].startsWith("-")) | |
{ | |
System.err.println("Unknown option "+args[optind]); | |
return; | |
} | |
else | |
{ | |
break; | |
} | |
++optind; | |
} | |
if(optind!=args.length) | |
{ | |
System.err.println("Illegal number of arguments."); | |
return; | |
} | |
app.parse(); | |
} | |
catch(Throwable err) | |
{ | |
err.printStackTrace(); | |
} | |
} | |
} |
See also: Freebase and the History of Sciences
That's it,
Pierre
No comments:
Post a Comment