?? dtdparser.java
字號:
package com.wutka.dtd;import java.util.*;import java.io.*;import java.net.*;/** Parses a DTD file and returns a DTD object * * @author Mark Wutka * @version $Revision: 1.19 $ $Date: 2002/10/01 12:48:47 $ by $Author: wutka $ */public class DTDParser implements EntityExpansion{ protected Scanner scanner; protected DTD dtd; protected Object defaultLocation;/** Creates a parser that will read from the specified Reader object */ public DTDParser(Reader in) { scanner = new Scanner(in, false, this); dtd = new DTD(); }/** Creates a parser that will read from the specified Reader object * @param in The input stream to read * @param trace True if the parser should print out tokens as it reads them * (used for debugging the parser) */ public DTDParser(Reader in, boolean trace) { scanner = new Scanner(in, trace, this); dtd = new DTD(); }/** Creates a parser that will read from the specified File object */ public DTDParser(File in) throws IOException { defaultLocation = in.getParentFile(); scanner = new Scanner(new BufferedReader(new FileReader(in)), false, this); dtd = new DTD(); }/** Creates a parser that will read from the specified File object * @param in The file to read * @param trace True if the parser should print out tokens as it reads them * (used for debugging the parser) */ public DTDParser(File in, boolean trace) throws IOException { defaultLocation = in.getParentFile(); scanner = new Scanner(new BufferedReader(new FileReader(in)), trace, this); dtd = new DTD(); }/** Creates a parser that will read from the specified URL object */ public DTDParser(URL in) throws IOException { //LAM: we need to set the defaultLocation to the directory where //the dtd is found so that we don't run into problems parsing any //relative external files referenced by the dtd. String file = in.getFile(); defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1)); scanner = new Scanner(new BufferedReader( new InputStreamReader(in.openStream())), false, this); dtd = new DTD(); }/** Creates a parser that will read from the specified URL object * @param in The URL to read * @param trace True if the parser should print out tokens as it reads them * (used for debugging the parser) */ public DTDParser(URL in, boolean trace) throws IOException { //LAM: we need to set the defaultLocation to the directory where //the dtd is found so that we don't run into problems parsing any //relative external files referenced by the dtd. String file = in.getFile(); defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1)); scanner = new Scanner(new BufferedReader( new InputStreamReader(in.openStream())), trace, this); dtd = new DTD(); }/** Parses the DTD file and returns a DTD object describing the DTD. This invocation of parse does not try to guess the root element (for efficiency reasons) */ public DTD parse() throws IOException { return parse(false); }/** Parses the DTD file and returns a DTD object describing the DTD. * @param guessRootElement If true, tells the parser to try to guess the root element of the document by process of elimination */ public DTD parse(boolean guessRootElement) throws IOException { Token token; for (;;) { token = scanner.peek(); if (token.type == Scanner.EOF) break; parseTopLevelElement(); } if (guessRootElement) { Hashtable roots = new Hashtable(); Enumeration e = dtd.elements.elements(); while (e.hasMoreElements()) { DTDElement element = (DTDElement) e.nextElement(); roots.put(element.name, element); } e = dtd.elements.elements(); while (e.hasMoreElements()) { DTDElement element = (DTDElement) e.nextElement(); if (!(element.content instanceof DTDContainer)) continue; Enumeration items = ((DTDContainer) element.content). getItemsVec(). elements(); while (items.hasMoreElements()) { removeElements(roots, dtd, (DTDItem) items.nextElement()); } } if (roots.size() == 1) { e = roots.elements(); dtd.rootElement = (DTDElement) e.nextElement(); } else { dtd.rootElement = null; } } else { dtd.rootElement = null; } return dtd; } protected void removeElements(Hashtable h, DTD dtd, DTDItem item) { if (item instanceof DTDName) { h.remove(((DTDName) item).value); } else if (item instanceof DTDContainer) { Enumeration e = ((DTDContainer) item).getItemsVec().elements(); while (e.hasMoreElements()) { removeElements(h, dtd, (DTDItem) e.nextElement()); } } } protected void parseTopLevelElement() throws IOException { Token token = scanner.get();// Is <? xxx ?> even valid in a DTD? I'll ignore it just in case it's there if (token.type == Scanner.LTQUES) { StringBuffer textBuffer = new StringBuffer(); for (;;) { String text = scanner.getUntil('?'); textBuffer.append(text); token = scanner.peek(); if (token.type == Scanner.GT) { scanner.get(); break; } textBuffer.append('?'); } DTDProcessingInstruction instruct = new DTDProcessingInstruction(textBuffer.toString()); dtd.items.addElement(instruct); return; } else if (token.type == Scanner.CONDITIONAL) { token = expect(Scanner.IDENTIFIER); if (token.value.equals("IGNORE")) { scanner.skipConditional(); } else { if (token.value.equals("INCLUDE")) { scanner.skipUntil('['); } else { throw new DTDParseException(scanner.getUriId(), "Invalid token in conditional: "+token.value, scanner.getLineNumber(), scanner.getColumn()); } } } else if (token.type == Scanner.ENDCONDITIONAL) { // Don't need to do anything for this token } else if (token.type == Scanner.COMMENT) { dtd.items.addElement( new DTDComment(token.value)); } else if (token.type == Scanner.LTBANG) { token = expect(Scanner.IDENTIFIER); if (token.value.equals("ELEMENT")) { parseElement(); } else if (token.value.equals("ATTLIST")) { parseAttlist(); } else if (token.value.equals("ENTITY")) { parseEntity(); } else if (token.value.equals("NOTATION")) { parseNotation(); } else { skipUntil(Scanner.GT); } } else {// MAW Version 1.17// Previously, the parser would skip over unexpected tokens at the// upper level. Some invalid DTDs would still show up as valid. throw new DTDParseException(scanner.getUriId(), "Unexpected token: "+ token.type.name+"("+token.value+")", scanner.getLineNumber(), scanner.getColumn()); } } protected void skipUntil(TokenType stopToken) throws IOException { Token token = scanner.get(); while (token.type != stopToken) { token = scanner.get(); } } protected Token expect(TokenType expected) throws IOException { Token token = scanner.get(); if (token.type != expected) { if (token.value == null) { throw new DTDParseException(scanner.getUriId(), "Expected "+expected.name+" instead of "+token.type.name, scanner.getLineNumber(), scanner.getColumn()); } else { throw new DTDParseException(scanner.getUriId(), "Expected "+expected.name+ " instead of "+ token.type.name+"("+token.value+")", scanner.getLineNumber(), scanner.getColumn()); } } return token; } protected void parseElement() throws IOException { Token name = expect(Scanner.IDENTIFIER); DTDElement element = (DTDElement) dtd.elements.get(name.value); if (element == null) { element = new DTDElement(name.value); dtd.elements.put(element.name, element); } else if (element.content != null) {// 070501 MAW: Since the ATTLIST tag can also cause an element to be created,// only throw this exception if the element has content defined, which// won't happen when you just create an ATTLIST. Thanks to// Jags Krishnamurthy of Object Edge for pointing out this problem - // originally the parser would let you define an element more than once. throw new DTDParseException(scanner.getUriId(), "Found second definition of element: "+name.value, scanner.getLineNumber(), scanner.getColumn()); } dtd.items.addElement(element); parseContentSpec(scanner, element); expect(Scanner.GT); } protected void parseContentSpec(Scanner scanner, DTDElement element) throws IOException { Token token = scanner.get(); if (token.type == Scanner.IDENTIFIER) { if (token.value.equals("EMPTY")) { element.content = new DTDEmpty(); } else if (token.value.equals("ANY")) { element.content = new DTDAny(); } else { throw new DTDParseException(scanner.getUriId(), "Invalid token in entity content spec "+ token.value, scanner.getLineNumber(), scanner.getColumn()); } } else if (token.type == Scanner.LPAREN) { token = scanner.peek(); if (token.type == Scanner.IDENTIFIER) { if (token.value.equals("#PCDATA")) { parseMixed(element); } else { parseChildren(element); } } else if (token.type == Scanner.LPAREN) { parseChildren(element); } } } protected void parseMixed(DTDElement element) throws IOException { // MAW Version 1.19 // Keep track of whether the mixed is #PCDATA only // Don't allow * after (#PCDATA), but allow after // (#PCDATA|foo|bar|baz)* boolean isPcdataOnly = true; DTDMixed mixed = new DTDMixed(); mixed.add(new DTDPCData()); scanner.get(); element.content = mixed; for (;;) { Token token = scanner.get(); if (token.type == Scanner.RPAREN) { token = scanner.peek(); if (token.type == Scanner.ASTERISK) { scanner.get(); mixed.cardinal = DTDCardinal.ZEROMANY; } else { if (!isPcdataOnly) { throw new DTDParseException(scanner.getUriId(), "Invalid token in Mixed content type, '*' required after (#PCDATA|xx ...): "+ token.type.name, scanner.getLineNumber(), scanner.getColumn()); } mixed.cardinal = DTDCardinal.NONE; } return; } else if (token.type == Scanner.PIPE) { token = scanner.get(); mixed.add(new DTDName(token.value)); // MAW Ver. 1.19 isPcdataOnly = false; } else { throw new DTDParseException(scanner.getUriId(), "Invalid token in Mixed content type: "+ token.type.name, scanner.getLineNumber(), scanner.getColumn()); } } } protected void parseChildren(DTDElement element) throws IOException { DTDContainer choiceSeq = parseChoiceSequence(); Token token = scanner.peek(); choiceSeq.cardinal = parseCardinality(); if (token.type == Scanner.QUES) { choiceSeq.cardinal = DTDCardinal.OPTIONAL; } else if (token.type == Scanner.ASTERISK) { choiceSeq.cardinal = DTDCardinal.ZEROMANY; } else if (token.type == Scanner.PLUS) { choiceSeq.cardinal = DTDCardinal.ONEMANY; } else { choiceSeq.cardinal = DTDCardinal.NONE; } element.content = choiceSeq;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -