?? strawmanparser.java
字號:
package edu.stanford.db.rdf.syntax.strawman;import org.w3c.rdf.model.*;import org.w3c.rdf.syntax.*;import org.w3c.rdf.vocabulary.rdf_syntax_19990222.RDF;import org.w3c.rdf.vocabulary.rdf_schema_19990303.RDFS;import org.w3c.rdf.util.*;import edu.stanford.db.xml.util.*;import edu.stanford.db.rdf.syntax.generic.*;import org.w3c.rdf.digest.*;import org.xml.sax.*;import org.xml.sax.helpers.*;import java.util.*;import java.io.*;import java.net.*;/** * A parser for a simplified syntax for RDF. Supports arbitrary XML files. * Uses three control attributes: * <ol> * <li><code>rdf:instance</code>: specifes that a tag denotes an instance of a class</li> * <li><code>rdf:for</code>: specifes the subject URI of the property</li> * <li><code>rdf:resource</code>: specifes the object URI of the property</li> * </ol> * * @author Sergey Melnik <melnik@db.stanford.edu> */public class StrawmanParser extends GenericXML2RDF { public static final String REVISION = "Strawman RDF parser v0.3 2000-10-31"; // namespace of extension attributes public static final String _Namespace = "http://interdataworking.com/vocabulary/strawman-20000408#"; public static final QName RDF_INSTANCE = createQName(_Namespace, "instance"); public static final QName RDF_RESOURCE = createQName(_Namespace, "resource"); public static final QName RDF_FOR = createQName(_Namespace, "for"); int shift = 0; // used to track where strawman syntax started withing XML tree int strawDepth = 0; // we either use digests or generate "genid"s boolean useDigests = false; // if we are not using digests, we need a genid counter static int genidCounter = 0; public StrawmanParser () { super(); } public StrawmanParser (boolean useDigests, boolean warn) { super(warn); this.useDigests = useDigests; } boolean isProperty(Element el) { // StrawElement se = (StrawElement)el; return el.getAttribute(RDF_FOR) != null || (el.getParent() != null /*((StrawElement)el).depth > 0*/ && el.getAttribute(RDF_INSTANCE) == null); } StrawElement getParent(Element e) { return (StrawElement)e.getParent(); } Resource getArcFromParent(Element e) { return ((StrawElement)e).arcFromParent; } int depth(Element e) { return ((StrawElement)e).depth; } void collectCDATA(StrawElement p) throws ModelException { // check for deferred CDATA if(p.getValue().length() > 0 || (p.getType() == p.EMPTY && p.arcs.size() == 0 && p.getAttribute(RDF_RESOURCE) == null)) { p.addArc(nodeFactory.createOrdinal(p.next()), nodeFactory.createLiteral(p.getValue())); p.setValue(p.EMPTY_STR); } } /* Resource uri2resource(String uri) throws ModelException { if(uri.startsWith("#")) uri = getSourceURI() + uri; return nodeFactory.createResource(uri); } */ byte[] getDigest(Object obj) throws DigestException { if(obj instanceof Digestable) return ((Digestable)obj).getDigest().getDigestBytes(); if(obj instanceof StrawElement) { StrawElement se = (StrawElement)obj; if(se.anonymous) return se.digestPath; else return getDigest(se.subject); } throw new RuntimeException("No digest for " + obj); } public void startElement (String uri, String name, String qname, Attributes al) throws SAXException { _startElement(uri, name, al); try { StrawElement current = (StrawElement)this.current; // set subject if possible. If anonymous do nothing String inst = current.getAttribute(RDF_INSTANCE); String res = current.getAttribute(RDF_RESOURCE); String fr = current.getAttribute(RDF_FOR); // System.out.println("" + inst + " " + res + " " + fr); if(strawDepth == 0 && inst == null && res == null && fr == null) return; else strawDepth++; StrawElement parent = getParent(current); if(inst != null && res != null) throw new SAXException("Invalid XML element: attributes 'instance' and 'resource' are mutually exclusive!"); if(inst != null && inst.length() > 0) current.subject = createResource(inst); else if(res != null && res.length() > 0) current.subject = createResource(res); // else if(fr != null && fr.length() > 0) // FIXME // current.subject = createResource(fr); else current.anonymous = true; // collect non-RDF attributes Enumeration attrs = current.getAttributes(); while(attrs.hasMoreElements()) { QName aName = (QName)attrs.nextElement(); // System.err.println("=== attr: " + aName); if(!(aName.equals(RDF_RESOURCE) || aName.equals(RDF_INSTANCE) || aName.equals(RDF_FOR))) current.addArc(createResource(aName), nodeFactory.createLiteral(current.getAttribute(aName))); } if(fr != null) { // node with rdf:for. Loose context. Set depth of this node to 0. // create an artificial node StrawElement el = new StrawElement(); if(parent != null) parent.setChild(el); el.setChild(current); el.setAttribute(RDF_FOR, fr); current.setParent(el); el.setParent(parent); if(fr.length() > 0) el.subject = createResource(fr); else el.anonymous = true; // current.setChild(el); // el.setParent(current); // current = el; current.arcFromParent = createResource(current.getName()); el.addArc(current.arcFromParent, current); current.depth = 1; if(useDigests) current.digestPath = getDigest(current.arcFromParent); // el.arcFromParent = parent.next(); // parent.addArc(el.arcFromParent, el); // if(res != null && res.length() > 0) // current.subject = createResource(res); } else { // no rdf:for if(parent != null) { collectCDATA(parent); // set path from parent to current: // determine whether current is property or instance element if(isProperty(current)) { current.arcFromParent = createResource(current.getName()); } else { // arc from parent is next ordinal current.arcFromParent = nodeFactory.createOrdinal(parent.next()); } parent.addArc(current.arcFromParent, current); current.depth = parent.depth + 1; if(useDigests) { // set path digest byte[] d = getDigest(getArcFromParent(current)); if(depth(current) == 1) { current.digestPath = d; } else { // System.err.println("--Current: " + current.getName() + ", depth=" + current.depth); current.digestPath = new byte[parent.digestPath.length]; System.arraycopy(parent.digestPath, 0, current.digestPath, 0, parent.digestPath.length); DigestUtil.xor(current.digestPath, d, depth(current)-1); } } } } } catch (Exception exc) { // ModelException or DigestException throw new SAXException(exc); } // System.out.println("Start element: " + current.getName()); } public void endElement (String uri, String name, String qName) throws SAXException { finishElement(); if(current.getAttribute(RDF_FOR) != null) { // remove the artificial node this.current = current.getParent(); finishElement(); } if(strawDepth > 0) strawDepth--; _endElement(uri, name); } void finishElement() throws SAXException { // System.out.println("End element: " + current.getName()); try { StrawElement current = (StrawElement)this.current; boolean done = false; collectCDATA(current); // normalize arcs Vector arcs = current.arcs; // if only one arc _1 to a literal (no _2) replace it with rdf:value StrawArc firstArc = findArc(arcs, nodeFactory.createOrdinal(1)); if(firstArc != null && firstArc.object() instanceof Literal && findArc(arcs, nodeFactory.createOrdinal(2)) == null) { firstArc.predicate = RDF.value; } if(arcs.size() == 1) { StrawArc singleArc = (StrawArc)arcs.elementAt(0); if(current.anonymous && singleArc.predicate().equals(RDF.value) && isProperty(current)) { // replace this node in parent's arc list by the literal // System.err.println("===REPLACING in " + current.getName()); Vector parcs = getParent(current).arcs; StrawArc parc = findArc(parcs, current); if(parc != null) { // System.err.println("===FOUND at " + i); // FIXME: singleArc.object must be a literal parc.object = singleArc.object(); done = true; } } } if(!done) { if(!isProperty(current)) current.addArc(RDF.type, createResource(current.getName())); // createStatement(current.subject, RDF.type, createResource(current.getName())); if(current.anonymous) { if(useDigests) { // compute subject from digest for(int i=0; i < arcs.size(); i++) { StrawArc arc = (StrawArc)arcs.elementAt(i); updateDigest(current, arc); // System.out.println(" node " + current.getName() + " at depth " + current.depth + " adding " + arc.predicate() + "=" + arc.object()); } current.subject = createResource(getSourceURI() + "#", RDFDigestUtil.getDigestAlgorithm() + "-" + DigestUtil.toHexString(current.digestPath)); } else // use genid current.subject = createResource(getSourceURI() + "#", "genid-" + (++genidCounter)); } // create triple for rdf:instance // String inst = current.getAttribute(RDF_INSTANCE); // generate triples for the arcs for(int i = 0; i < arcs.size(); i++) { StrawArc arc = (StrawArc)arcs.elementAt(i); Object obj = arc.object(); RDFNode object = obj instanceof RDFNode ? (RDFNode)obj : ((StrawElement)obj).subject; if(current.subject != null) // == null for the top non-RDF node containing an RDF node createStatement(current.subject, arc.predicate(), object); } // destroy arcs current.arcs = null; } } catch (Exception exc) { // ModelException or DigestException throw new SAXException(exc); } } protected void updateDigest(StrawElement current, StrawArc arc) throws DigestException { if(current.digestPath == null) // i.e. depth == 0 current.digestPath = getDigest(arc.predicate()); else DigestUtil.xor(current.digestPath, getDigest(arc.predicate()), current.depth); DigestUtil.xor(current.digestPath, getDigest(arc.object()), current.depth + 1); } protected Statement createStatement(Resource subject, Resource predicate, RDFNode object) throws ModelException { if(!(predicate.equals(RDF.type) && object.equals(RDFS.Resource))) { return super.createStatement(subject, predicate, object); // consumer.addStatement(nodeFactory.createStatement(subject, predicate, object)); } return null; } StrawArc findArc(Vector arcs, Resource predicate) { for(int i = 0; i < arcs.size(); i++) { StrawArc arc = (StrawArc)arcs.elementAt(i); if(arc.predicate().equals(predicate)) return arc; } return null; } StrawArc findArc(Vector arcs, StrawElement element) { for(int i = 0; i < arcs.size(); i++) { StrawArc arc = (StrawArc)arcs.elementAt(i); if(arc.object() == element) return arc; } return null; } protected Element createElement() { return new StrawElement(); } static void bailOut() { System.err.println("Usage: java -Dorg.xml.sax.parser=<classname> org.w3c.rdf.syntax.strawman.StrawmanParser " + "[-d] <URI | filename>"); System.err.println ("This is revision " + REVISION); System.exit(1); } public static void main (String args[]) throws Exception { boolean useDigests = false; String fileOrURI = null; for(int i = 0; i < args.length; i++) { String a = args[i]; if(a.startsWith("-d")) useDigests = true; else { fileOrURI = a; if(i+1 < args.length) bailOut(); } } if(fileOrURI == null) bailOut(); _main(fileOrURI, new StrawmanParser(useDigests, true)); }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -