?? kittest.java
字號:
// HTMLParser Library $Name: v1_6_20051112 $ - A java-based parser for HTML// Copyright (C) August 26, 2003 Derrick Oswald//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java,v $// $Author: derrickoswald $// $Date: 2005/05/15 11:49:05 $// $Revision: 1.10 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.tests.lexerTests;import java.io.IOException;import java.net.URL;import java.util.Vector;import javax.swing.text.BadLocationException;import javax.swing.text.MutableAttributeSet;import javax.swing.text.html.HTML;import javax.swing.text.html.HTMLEditorKit;import javax.swing.text.html.HTMLEditorKit.Parser;import javax.swing.text.html.HTMLEditorKit.ParserCallback;import org.htmlparser.Attribute;import org.htmlparser.Node;import org.htmlparser.Tag;import org.htmlparser.nodes.AbstractNode;import org.htmlparser.lexer.Cursor;import org.htmlparser.lexer.Lexer;import org.htmlparser.util.ParserException;import org.htmlparser.util.Translate;/** * Compare output from javax.swing.text.html.HTMLEditorKit with Lexer. * This test provides a means of comparing the lexemes from * javax.swing.text.html.HTMLEditorKit.Parser class with the lexemes * produced by the org.htmlparser.lexer.Lexer class. * <blockquote> * The differences have eluded automation since the HTMLEditorKit parser * adds spurious nodes where it thinks elements need closing or it gets * confused. The intent is to eventually incorporate this into the * 'fit test' and run it against lots of HTML pages, but so far you must * analyse the differences by hand. * </blockquote> */public class KitTest extends ParserCallback{ Vector mNodes; int mIndex; /** * Creates a new instance of KitTest * @param nodes The list of lexemes from Lexer to compare with the kit lexemes. */ public KitTest (Vector nodes) { mNodes = nodes; mIndex = 0; } /** * Remove whitespace from a string. * @param s The string to crunch. * @return The string with whitespace characters removed. */ String snowhite (String s) { int length; char ch; StringBuffer ret; length = s.length (); ret = new StringBuffer (length); for (int i = 0; i < length; i++) { ch = s.charAt (i); if (!Character.isWhitespace (ch) && !(160 == ch)) ret.append (ch); } return (ret.toString ()); } /** * Check if two strings match. * @param s1 One string. * @param s2 The other string. * @return <code>true</code> if the strings are equivalent ignoring whitespace. */ boolean match (String s1, String s2) { s1 = snowhite (Translate.decode (s1)); s2 = snowhite (Translate.decode (s2)); return (s1.equalsIgnoreCase (s2)); } /** * Callback for a text lexeme. * @param data The text extracted from the page. * @param pos The position in the page. * <em>Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser.</em> */ public void handleText (char[] data, int pos) { StringBuffer sb; String theirs; Node node; int match; String ours; sb = new StringBuffer (data.length); for (int i = 0; i < data.length; i++) { if (160 == data[i]) sb.append (" "); else sb.append (data[i]); } theirs = sb.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) { node = (Node)mNodes.elementAt (i); ours = node.getText (); if (match (theirs, ours)) { match = i; break; } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); }// System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for a remark lexeme. * @param data The text extracted from the page. * @param pos The position in the page. * <em>Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser.</em> */ public void handleComment (char[] data, int pos) { StringBuffer sb; String theirs; Node node; int match; String ours; sb = new StringBuffer (data.length); sb.append (data); theirs = sb.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) { node = (Node)mNodes.elementAt (i); ours = node.getText (); if (match (theirs, ours)) { match = i; break; } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); }// System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for a start tag lexeme. * @param t The tag extracted from the page. * @param a The attributes parsed out of the tag. * @param pos The position in the page. * <em>Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser.</em> */ public void handleStartTag (HTML.Tag t, MutableAttributeSet a, int pos) { String theirs; Node node; int match; String ours; theirs = t.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) { node = (Node)mNodes.elementAt (i); if (node instanceof Tag) { ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName (); if (match (theirs, ours)) { match = i; break; } } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); }// System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for an end tag lexeme. * @param t The tag extracted from the page. * @param pos The position in the page. * <em>Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser.</em> */ public void handleEndTag (HTML.Tag t, int pos) { String theirs; Node node; int match; String ours; theirs = t.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -