?? heritrix.java
字號(hào):
// Ok, we should now have everything to launch the program. String status = null; if (selfTest) { // If more than just '--selftest' and '--port' passed, then // there is confusion on what is being asked of us. Print usage // rather than proceed. for (int i = 0; i < options.length; i++) { if (options[i].getId() != 'p' && options[i].getId() != 's') { clp.usage(1); } } if (arguments.size() > 0) { // No arguments accepted by selftest. clp.usage(1); } status = selftest(selfTestName, Heritrix.guiPort); } else { if (!Heritrix.gui) { if (options.length > 1) { // If more than just '--nowui' passed, then there is // confusion on what is being asked of us. Print usage // rather than proceed. clp.usage(1); } Heritrix h = new Heritrix(true); status = h.doOneCrawl(crawlOrderFile); } else { if (!isValidLoginPasswordString(adminLoginPassword)) { clp.usage("Invalid admin login:password value, or none " + "specified. ", 1); } status = startEmbeddedWebserver( Heritrix.guiHosts, Heritrix.guiPort, adminLoginPassword); Heritrix h = new Heritrix(true); String tmp = h.launch(crawlOrderFile, runMode); if (tmp != null) { status += ('\n' + tmp); } } } return status; } /** * @return The file we dump stdout and stderr into. */ public static String getHeritrixOut() { String tmp = System.getProperty("heritrix.out"); if (tmp == null || tmp.length() == 0) { tmp = Heritrix.DEFAULT_HERITRIX_OUT; } return tmp; } /** * Exploit <code>-Dheritrix.home</code> if available to us. * Is current working dir if no heritrix.home property supplied. * @return Heritrix home directory. * @throws IOException */ protected static File getHeritrixHome() throws IOException { File heritrixHome = null; String home = System.getProperty("heritrix.home"); if (home != null && home.length() > 0) { heritrixHome = new File(home); if (!heritrixHome.exists()) { throw new IOException("HERITRIX_HOME <" + home + "> does not exist."); } } else { heritrixHome = new File(new File("").getAbsolutePath()); } return heritrixHome; } /** * @return The directory into which we put jobs. If the system property * 'heritrix.jobsdir' is set, we will use its value in place of the default * 'jobs' directory in the current working directory. * @throws IOException */ public static File getJobsdir() throws IOException { Heritrix.loadProperties(); // if called in constructor String jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs"); File jobsdir = new File(jobsdirStr); return (jobsdir.isAbsolute())? jobsdir: new File(getHeritrixHome(), jobsdirStr); } /** * Get and check for existence of expected subdir. * * If development flag set, then look for dir under src dir. * * @param subdirName Dir to look for. * @return The extant subdir. Otherwise null if we're running * in a webapp context where there is no conf directory available. * @throws IOException if unable to find expected subdir. */ protected static File getSubDir(String subdirName) throws IOException { return getSubDir(subdirName, true); } /** * Get and optionally check for existence of subdir. * * If development flag set, then look for dir under src dir. * * @param subdirName Dir to look for. * @param fail True if we are to fail if directory does not * exist; false if we are to return false if the directory does not exist. * @return The extant subdir. Otherwise null if we're running * in a webapp context where there is no subdir directory available. * @throws IOException if unable to find expected subdir. */ protected static File getSubDir(String subdirName, boolean fail) throws IOException { String path = isDevelopment()? "src" + File.separator + subdirName: subdirName; File dir = new File(getHeritrixHome(), path); if (!dir.exists()) { if (fail) { throw new IOException("Cannot find subdir: " + subdirName); } dir = null; } return dir; } /** * Test string is valid login/password string. * * A valid login/password string has the login and password compounded * w/ a ':' delimiter. * * @param str String to test. * @return True if valid password/login string. */ protected static boolean isValidLoginPasswordString(String str) { boolean isValid = false; StringTokenizer tokenizer = new StringTokenizer(str, ":"); if (tokenizer.countTokens() == 2) { String login = ((String)tokenizer.nextElement()).trim(); String password = ((String)tokenizer.nextElement()).trim(); if (login.length() > 0 && password.length() > 0) { isValid = true; } } return isValid; } protected static boolean isDevelopment() { return System.getProperty("heritrix.development") != null; } /** * Load the heritrix.properties file. * * Adds any property that starts with * <code>HERITRIX_PROPERTIES_PREFIX</code> * or <code>ARCHIVE_PACKAGE</code> * into system properties (except logging '.level' directives). * @return Loaded properties. * @throws IOException */ protected static Properties loadProperties() throws IOException { if (Heritrix.propertiesLoaded) { return System.getProperties(); } Heritrix.propertiesLoaded = true; Properties properties = new Properties(); properties.load(getPropertiesInputStream()); // Any property that begins with ARCHIVE_PACKAGE, make it // into a system property. While iterating, check to see if anything // defined on command-line, and if so, it overrules whats in // heritrix.properties. for (Enumeration e = properties.keys(); e.hasMoreElements();) { String key = ((String)e.nextElement()).trim(); if (key.startsWith(ARCHIVE_PACKAGE) || key.startsWith(HERITRIX_PROPERTIES_PREFIX)) { // Don't add the heritrix.properties entries that are // changing the logging level of particular classes. String value = properties.getProperty(key).trim(); if (key.indexOf(".level") < 0) { copyToSystemProperty(key, value); } } else if (key.startsWith(SYSTEM_PREFIX)) { String value = properties.getProperty(key).trim(); copyToSystemProperty(key.substring(SYSTEM_PREFIX.length()), value); } } return properties; } /** * Copy the given key-value into System properties, as long as there * is no existing value. * @param key property key * @param value property value */ protected static void copyToSystemProperty(String key, String value) { if (System.getProperty(key) == null || System.getProperty(key).length() == 0) { System.setProperty(key, value); } } protected static InputStream getPropertiesInputStream() throws IOException { File file = null; // Look to see if properties have been passed on the cmd-line. String alternateProperties = System.getProperty(PROPERTIES_KEY); if (alternateProperties != null && alternateProperties.length() > 0) { file = new File(alternateProperties); } // Get properties from conf directory if one available. if ((file == null || !file.exists()) && getConfdir(false) != null) { file = new File(getConfdir(), PROPERTIES); if (!file.exists()) { // If no properties file in the conf dir, set file back to // null so we go looking for heritrix.properties on classpath. file = null; } } // If not on the command-line, there is no conf dir. Then get the // properties from the CLASSPATH (Classpath file separator is always // '/', whatever the platform. InputStream is = (file != null)? new FileInputStream(file): Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY); if (is == null) { throw new IOException("Failed to load properties file from" + " filesystem or from classpath."); } return is; } /** * If the user hasn't altered the default logging parameters, tighten them * up somewhat: some of our libraries are way too verbose at the INFO or * WARNING levels. * * This might be a problem running inside in someone else's * container. Container's seem to prefer commons logging so we * ain't messing them doing the below. * * @throws IOException * @throws SecurityException */ protected static void patchLogging() throws SecurityException, IOException { if (System.getProperty("java.util.logging.config.class") != null) { return; } if (System.getProperty("java.util.logging.config.file") != null) { return; } // No user-set logging properties established; use defaults // from distribution-packaged 'heritrix.properties'. LogManager.getLogManager(). readConfiguration(getPropertiesInputStream()); } /** * Configure our trust store. * * If system property is defined, then use it for our truststore. Otherwise * use the heritrix truststore under conf directory if it exists. * * <p>If we're not launched from the command-line, we will not be able * to find our truststore. The truststore is nor normally used so rare * should this be a problem (In case where we don't use find our trust * store, we'll use the 'default' -- either the JVMs or the containers). */ protected static void configureTrustStore() { // Below must be defined in jsse somewhere but can' find it. final String TRUSTSTORE_KEY = "javax.net.ssl.trustStore"; String value = System.getProperty(TRUSTSTORE_KEY); File confdir = null; try { confdir = getConfdir(false); } catch (IOException e) { logger.log(Level.WARNING, "Failed to get confdir.", e); } if ((value == null || value.length() <= 0) && confdir != null) { // Use the heritrix store if it exists on disk. File heritrixStore = new File(confdir, "heritrix.cacerts"); if(heritrixStore.exists()) { value = heritrixStore.getAbsolutePath(); } } if (value != null && value.length() > 0) { System.setProperty(TRUSTSTORE_KEY, value); } } /** * Run the selftest * * @param oneSelfTestName Name of a test if we are to run one only rather * than the default running all tests. * @param port Port number to use for web UI. * * @exception Exception * @return Status of how selftest startup went. */ protected static String selftest(final String oneSelfTestName, final int port) throws Exception { // Put up the webserver w/ the root and selftest webapps only. final String SELFTEST = "selftest"; Heritrix.httpServer = new SimpleHttpServer(SELFTEST, Heritrix.adminContext, LOCALHOST_ONLY, port, true); // Set up digest auth for a section of the server so selftest can run // auth tests. Looks like can only set one login realm going by the // web.xml dtd. Otherwise, would be nice to selftest basic and digest. // Have login, password and role all be SELFTEST. Must match what is // in the selftest order.xml file. Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext, SELFTEST, SELFTEST, SELFTEST); Heritrix.httpServer.startServer(); // Get the order file from the CLASSPATH unless we're running in dev // environment. File selftestDir = (isDevelopment())? new File(getConfdir(), SELFTEST):
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -