?? html2txt.java
字號:
/*
* Copyright (c) 2001 Shiraz Kanga. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
// Recurse through a directory of html files and convert them to text.
public class html2txt
{
static boolean inText = true;
static String fileSeparator = System.getProperty ("file.separator");
static int numFiles = 0;
// regular search and replace of all occurances of oldString with newString.
static String strReplace (String currLine, String oldString, String newString)
{
int index = 0;
while ((index = currLine.indexOf (oldString, index)) >= 0)
{
// Replace the old string with the new string (inefficient - but works!)
currLine = currLine.substring (0, index) + newString +
currLine.substring (index + oldString.length ());
index += newString.length ();
}
return currLine;
}
// This is the actual filter. Removes anything between a < and a >
public static final String filterLine (String currLine) throws IOException
{
if (currLine == null)
return null;
StringBuffer currBuf = new StringBuffer ();
char [] theChars = currLine.toCharArray ();
for (int i = 0; i < currLine.length (); i++)
{
if (theChars [i] == '<')
inText = false;
if (theChars [i] == '>')
inText = true;
if (inText && theChars [i] != '>')
currBuf.append (theChars [i]);
}
// Should add more of these
String str1 = strReplace (currBuf.toString (), " ", " ");
String str2 = strReplace (str1, "<", "<");
String str3 = strReplace (str2, ">", ">");
return str3;
}
// Process this file
public static void doFile (String fileName)
{
try
{
File inFile = new File (fileName);
String inputText = readFile (inFile);
String outputText = filterLine (inputText);
numFiles ++;
System.out.println ("Processed file \"" + fileName + "\"");
File outFile = new File (fileName + ".txt");
writeFile (outFile, outputText);
}
catch (IOException e)
{
System.out.println ("ERROR: I/O Exception while processing file \"" +
fileName + "\"");
e.printStackTrace ();
}
}
// Writes a String to a File
public static void writeFile (String fileName, String text)
throws IOException, FileNotFoundException
{
BufferedWriter theWriter = new BufferedWriter (new FileWriter (fileName));
stringToWriter (theWriter, text);
}
// Writes a String to a File
public static void writeFile (File fileHandle, String text)
throws IOException, FileNotFoundException
{
BufferedWriter theWriter = new BufferedWriter (new FileWriter (fileHandle));
stringToWriter (theWriter, text);
}
// Writes the entire contents of a String into a BufferedWriter
public static void stringToWriter (BufferedWriter theWriter, String text)
throws IOException
{
theWriter.write (text, 0, text.length ());
theWriter.close ();
}
// Reads a File into a String.
public static String readFile (String fileName)
throws IOException, FileNotFoundException
{
BufferedReader theReader = new BufferedReader (new FileReader (fileName));
return (readerToString (theReader));
}
// Reads a File into a String.
public static String readFile (File fileHandle)
throws IOException, FileNotFoundException
{
BufferedReader theReader = new BufferedReader (new FileReader (fileHandle));
return (readerToString (theReader));
}
// Read the entire contents of the BufferedReader into a String.
public static String readerToString (BufferedReader theReader)
throws IOException
{
StringBuffer retVal = new StringBuffer (100000);
String currLine = null;
if (theReader != null)
{
while ((currLine = theReader.readLine ()) != null)
{
retVal.append (currLine);
retVal.append ("\n"); // UNIX style - works
//retVal.append ("\r\n"); // DOS style - works
//retVal.append (fileSeparator); // Java style - fails!!
}
}
theReader.close ();
return retVal.toString ();
}
// Main recursive routing to locate all files in a tree that have a specific ending
private static void fileLister (String rootDir)
{
File f = new File (rootDir);
String [] fileList = f.list ();
if (fileList == null)
return;
for (int i = fileList.length - 1; i >= 0; i--)
{
f = new File (rootDir, fileList [i]);
if (f.isDirectory ())
fileLister (rootDir + fileSeparator + fileList [i]);
else if (fileList [i].toUpperCase ().endsWith ("HTML")
|| fileList [i].toUpperCase ().endsWith ("HTM"))
{
doFile (rootDir + fileSeparator + fileList [i]);
}
}
}
public static void getFiles (String rootDir)
{
fileLister (rootDir);
}
public static void main (String [] args)
{
String rootDir = "";
if (args.length == 0) // No directory argument supplied. Use current directory
rootDir = ".";
else
rootDir = args [0];
System.out.println ("Converting *.htm and *.html in directory \"" +
rootDir + "\" to text.");
getFiles (rootDir);
// All Done.
System.out.println ("Done.");
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -