?? heritrix.java
字號:
} private final static String START_OPER = "start"; private final static String STOP_OPER = "stop"; private final static String DESTROY_OPER = "destroy"; private final static String INTERRUPT_OPER = "interrupt"; private final static String START_CRAWLING_OPER = "startCrawling"; private final static String STOP_CRAWLING_OPER = "stopCrawling"; private final static String ADD_CRAWL_JOB_OPER = "addJob"; private final static String TERMINATE_CRAWL_JOB_OPER = "terminateCurrentJob"; private final static String DELETE_CRAWL_JOB_OPER = "deleteJob"; private final static String ALERT_OPER = "alert"; private final static String ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon"; private final static String PENDING_JOBS_OPER = "pendingJobs"; private final static String COMPLETED_JOBS_OPER = "completedJobs"; private final static String CRAWLEND_REPORT_OPER = "crawlendReport"; private final static String SHUTDOWN_OPER = "shutdown"; private final static String LOG_OPER = "log"; private final static String REBIND_JNDI_OPER = "rebindJNDI"; private final static List OPERATION_LIST; static { OPERATION_LIST = Arrays.asList(new String [] {START_OPER, STOP_OPER, INTERRUPT_OPER, START_CRAWLING_OPER, STOP_CRAWLING_OPER, ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER, DELETE_CRAWL_JOB_OPER, ALERT_OPER, PENDING_JOBS_OPER, COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER, LOG_OPER, DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER, REBIND_JNDI_OPER}); } private CompositeType jobCompositeType = null; private TabularType jobsTabularType = null; private static final String [] JOB_KEYS = new String [] {"uid", "name", "status"}; private static String adminUsername; private static String adminPassword; /** * Constructor. * Does not register the created instance with JMX. Assumed this * constructor is used by such as JMX agent creating an instance of * Heritrix at the commmand of a remote client (In this case Heritrix will * be registered by the invoking agent). * @throws IOException */ public Heritrix() throws IOException { this(null, false); } public Heritrix(final boolean jmxregister) throws IOException { this(null, jmxregister); } /** * Constructor. * @param name If null, we bring up the default Heritrix instance. * @param jmxregister True if we are to register this instance with JMX * agent. * @throws IOException */ public Heritrix(final String name, final boolean jmxregister) throws IOException { this(name, jmxregister, new CrawlJobHandler(getJobsdir())); } /** * Constructor. * @param name If null, we bring up the default Heritrix instance. * @param jmxregister True if we are to register this instance with JMX * agent. * @param cjh CrawlJobHandler to use. * @throws IOException */ public Heritrix(final String name, final boolean jmxregister, final CrawlJobHandler cjh) throws IOException { super(); containerInitialization(); this.jobHandler = cjh; this.openMBeanInfo = buildMBeanInfo(); // Set up the alerting system. SinkHandler is also a global so will // catch alerts for all running Heritrix instances. Will need to // address (Add name of instance that threw the alert to SinkRecord?). final SinkHandler sinkHandler = SinkHandler.getInstance(); if (sinkHandler == null) { throw new NullPointerException("SinkHandler not found."); } // Adapt the alerting system to use SinkHandler. this.alertManager = new AlertManager() { public void add(SinkHandlerLogRecord record) { sinkHandler.publish(record); } public Vector getAll() { return sinkHandler.getAll(); } public Vector getNewAll() { return sinkHandler.getAllUnread(); } public SinkHandlerLogRecord get(String alertID) { return sinkHandler.get(Long.parseLong(alertID)); } public int getCount() { return sinkHandler.getCount(); } public int getNewCount() { return sinkHandler.getUnreadCount(); } public void remove(String alertID) { sinkHandler.remove(Long.parseLong(alertID)); } public void read(String alertID) { sinkHandler.read(Long.parseLong(alertID)); } }; try { Heritrix.registerHeritrix(this, name, jmxregister); } catch (InstanceAlreadyExistsException e) { throw new RuntimeException(e); } catch (MBeanRegistrationException e) { throw new RuntimeException(e); } catch (NotCompliantMBeanException e) { throw new RuntimeException(e); } catch (MalformedObjectNameException e) { throw new RuntimeException(e); } } /** * Run setup tasks for this 'container'. Idempotent. * * @throws IOException */ protected static void containerInitialization() throws IOException { if (Heritrix.containerInitialized) { return; } Heritrix.containerInitialized = true; // Load up the properties. This invocation adds heritrix properties // to system properties so all available via System.getProperty. // Note, loadProperties and patchLogging have global effects. May be an // issue if we're running inside a container such as tomcat or jboss. Heritrix.loadProperties(); Heritrix.patchLogging(); Heritrix.configureTrustStore(); // Will run on SIGTERM but not on SIGKILL, unfortunately. // Otherwise, ensures we cleanup after ourselves (Deregister from // JMX and JNDI). Runtime.getRuntime().addShutdownHook( Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook")); // Register this heritrix 'container' though we may be inside another // tomcat or jboss container. try { registerContainerJndi(); } catch (Exception e) { logger.log(Level.WARNING, "Failed jndi container registration.", e); } } /** * Do inverse of construction. Used by anyone who does a 'new Heritrix' when * they want to cleanup the instance. * Of note, there may be Heritrix threads still hanging around after the * call to destroy completes. They'll eventually go down after they've * finished their cleanup routines. In particular, if you are watching * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister * ahead of the CrawlJob JMX bean that its hosting. */ public void destroy() { stop(); try { Heritrix.unregisterHeritrix(this); } catch (InstanceNotFoundException e) { e.printStackTrace(); } catch (MBeanRegistrationException e) { e.printStackTrace(); } catch (NullPointerException e) { e.printStackTrace(); } this.jobHandler = null; this.openMBeanInfo = null; } /** * Launch program. * Optionally will launch a web server to host UI. Will also register * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM * Agent). * * @param args Command line arguments. * @throws Exception */ public static void main(String[] args) throws Exception { Heritrix.commandLine = true; // Set timezone here. Would be problematic doing it if we're running // inside in a container. TimeZone.setDefault(TimeZone.getTimeZone("GMT")); File startLog = new File(getHeritrixHome(), STARTLOG); Heritrix.out = new PrintWriter(isDevelopment()? System.out: new PrintStream(new FileOutputStream(startLog))); try { containerInitialization(); String status = doCmdLineArgs(args); if (status != null) { Heritrix.out.println(status); } } catch(Exception e) { // Show any exceptions in STARTLOG. e.printStackTrace(Heritrix.out); throw e; } finally { // If not development, close the file that signals the wrapper // script that we've started. Otherwise, just flush it; if in // development, the output is probably a console. if (!isDevelopment()) { if (Heritrix.out != null) { Heritrix.out.close(); } System.out.println("Heritrix version: " + Heritrix.getVersion()); } else { if (Heritrix.out != null) { Heritrix.out.flush(); } } } } protected static String doCmdLineArgs(final String [] args) throws Exception { // Get defaults for commandline arguments from the properties file. String tmpStr = PropertyUtils. getPropertyOrNull("heritrix.context"); if (tmpStr != null) { Heritrix.adminContext = tmpStr; } tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port"); if (tmpStr != null) { Heritrix.guiPort = Integer.parseInt(tmpStr); } tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin"); String adminLoginPassword = (tmpStr == null)? "": tmpStr; String crawlOrderFile = PropertyUtils.getPropertyOrNull("heritrix.cmdline.order"); tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run"); boolean runMode = PropertyUtils.getBooleanProperty("heritrix.cmdline.run"); boolean selfTest = false; String selfTestName = null; CommandLineParser clp = new CommandLineParser(args, Heritrix.out, Heritrix.getVersion()); List arguments = clp.getCommandLineArguments(); Option [] options = clp.getCommandLineOptions(); // Check passed argument. Only one argument, the ORDER_FILE is allowed. // If one argument, make sure exists and xml suffix. if (arguments.size() > 1) { clp.usage(1); } else if (arguments.size() == 1) { crawlOrderFile = (String)arguments.get(0); if (!(new File(crawlOrderFile).exists())) { clp.usage("ORDER.XML <" + crawlOrderFile + "> specified does not exist.", 1); } // Must end with '.xml' if (crawlOrderFile.length() > 4 && !crawlOrderFile.substring(crawlOrderFile.length() - 4). equalsIgnoreCase(".xml")) { clp.usage("ORDER.XML <" + crawlOrderFile + "> does not have required '.xml' suffix.", 1); } } // Now look at options passed. for (int i = 0; i < options.length; i++) { switch(options[i].getId()) { case 'h': clp.usage(); break; case 'a': adminLoginPassword = options[i].getValue(); break; case 'n': if (crawlOrderFile == null) { clp.usage("You must specify an ORDER_FILE with" + " '--nowui' option.", 1); } Heritrix.gui = false; break; case 'b': Heritrix.guiHosts = parseHosts(options[i].getValue()); break; case 'p': try { Heritrix.guiPort = Integer.parseInt(options[i].getValue()); } catch (NumberFormatException e) { clp.usage("Failed parse of port number: " + options[i].getValue(), 1); } if (Heritrix.guiPort <= 0) { clp.usage("Nonsensical port number: " + options[i].getValue(), 1); } break; case 'r': runMode = true; break; case 's': selfTestName = options[i].getValue(); selfTest = true; break; default: assert false: options[i].getId(); } }
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -