?? preconditionenforcer.java
字號:
logger.fine("Deferring processing of CrawlURI " + curi.toString() + " for dns lookup."); String preq = "dns:" + ch.getHostName(); try { curi.markPrerequisite(preq, getController().getPostprocessorChain()); } catch (URIException e) { throw new RuntimeException(e); // shouldn't ever happen } return true; } // DNS preconditions OK return false; } /** * Get the maximum time a dns-record is valid. * * @param curi the uri this time is valid for. * @return the maximum time a dns-record is valid -- in seconds -- or * negative if record's ttl should be used. */ public long getIPValidityDuration(CrawlURI curi) { Integer d; try { d = (Integer)getAttribute(ATTR_IP_VALIDITY_DURATION, curi); } catch (AttributeNotFoundException e) { d = DEFAULT_IP_VALIDITY_DURATION; } return d.longValue(); } /** Return true if ip should be looked up. * * @param curi the URI to check. * @return true if ip should be looked up. */ public boolean isIpExpired(CrawlURI curi) { CrawlHost host = getController().getServerCache().getHostFor(curi); if (!host.hasBeenLookedUp()) { // IP has not been looked up yet. return true; } if (host.getIpTTL() == CrawlHost.IP_NEVER_EXPIRES) { // IP never expires (numeric IP) return false; } long duration = getIPValidityDuration(curi); if (duration == 0) { // Never expire ip if duration is null (set by user or more likely, // set to zero in case where we tried in FetchDNS but failed). return false; } // catch old "default" -1 settings that are now problematic, // convert to new minimum if (duration <= 0) { duration = DEFAULT_IP_VALIDITY_DURATION.intValue(); } long ttl = host.getIpTTL(); if (ttl > duration) { // Use the larger of the operator-set minimum duration // or the DNS record TTL duration = ttl; } // Duration and ttl are in seconds. Convert to millis. if (duration > 0) { duration *= 1000; } return (duration + host.getIpFetched()) < System.currentTimeMillis(); } /** Get the maximum time a robots.txt is valid. * * @param curi * @return the time a robots.txt is valid in milliseconds. */ public long getRobotsValidityDuration(CrawlURI curi) { Integer d; try { d = (Integer) getAttribute(ATTR_ROBOTS_VALIDITY_DURATION, curi); } catch (AttributeNotFoundException e) { // This should never happen, but if it does, return default logger.severe(e.getLocalizedMessage()); d = DEFAULT_ROBOTS_VALIDITY_DURATION; } // convert from seconds to milliseconds return d.longValue() * 1000; } /** * Is the robots policy expired. * * This method will also return true if we haven't tried to get the * robots.txt for this server. * * @param curi * @return true if the robots policy is expired. */ public boolean isRobotsExpired(CrawlURI curi) { CrawlServer server = getController().getServerCache().getServerFor(curi); long robotsFetched = server.getRobotsFetchedTime(); if (robotsFetched == CrawlServer.ROBOTS_NOT_FETCHED) { // Have not attempted to fetch robots return true; } long duration = getRobotsValidityDuration(curi); if (duration == 0) { // When zero, robots should be valid forever return false; } if (robotsFetched + duration < System.currentTimeMillis()) { // Robots is still valid return true; } return false; } /** * Consider credential preconditions. * * Looks to see if any credential preconditions (e.g. html form login * credentials) for this <code>CrawlServer</code>. If there are, have they * been run already? If not, make the running of these logins a precondition * of accessing any other url on this <code>CrawlServer</code>. * * <p> * One day, do optimization and avoid running the bulk of the code below. * Argument for running the code everytime is that overrides and refinements * may change what comes back from credential store. * * @param curi CrawlURI we're checking for any required preconditions. * @return True, if this <code>curi</code> has a precondition that needs to * be met before we can proceed. False if we can precede to process * this url. */ private boolean credentialPrecondition(final CrawlURI curi) { boolean result = false; CredentialStore cs = CredentialStore.getCredentialStore(getSettingsHandler()); if (cs == null) { logger.severe("No credential store for " + curi); return result; } Iterator i = cs.iterator(curi); if (i == null) { return result; } while (i.hasNext()) { Credential c = (Credential)i.next(); if (c.isPrerequisite(curi)) { // This credential has a prereq. and this curi is it. Let it // through. Add its avatar to the curi as a mark. Also, does // this curi need to be posted? Note, we do this test for // is it a prereq BEFORE we do the check that curi is of the // credential domain because such as yahoo have you go to // another domain altogether to login. c.attach(curi); curi.setPost(c.isPost(curi)); break; } if (!c.rootUriMatch(getController(), curi)) { continue; } if (!c.hasPrerequisite(curi)) { continue; } if (!authenticated(c, curi)) { // Han't been authenticated. Queue it and move on (Assumption // is that we can do one authentication at a time -- usually one // html form). String prereq = c.getPrerequisite(curi); if (prereq == null || prereq.length() <= 0) { CrawlServer server = getController().getServerCache().getServerFor(curi); logger.severe(server.getName() + " has " + " credential(s) of type " + c + " but prereq" + " is null."); } else { try { curi.markPrerequisite(prereq, getController().getPostprocessorChain()); } catch (URIException e) { logger.severe("unable to set credentials prerequisite "+prereq); getController().logUriError(e,curi.getUURI(),prereq); return false; } result = true; if (logger.isLoggable(Level.FINE)) { logger.fine("Queueing prereq " + prereq + " of type " + c + " for " + curi); } break; } } } return result; } /** * Has passed credential already been authenticated. * * @param credential Credential to test. * @param curi CrawlURI. * @return True if already run. */ private boolean authenticated(final Credential credential, final CrawlURI curi) { boolean result = false; CrawlServer server = getController().getServerCache().getServerFor(curi); if (!server.hasCredentialAvatars()) { return result; } Set avatars = server.getCredentialAvatars(); for (Iterator i = avatars.iterator(); i.hasNext();) { CredentialAvatar ca = (CredentialAvatar)i.next(); String key = null; try { key = credential.getKey(curi); } catch (AttributeNotFoundException e) { logger.severe("Failed getting key for " + credential + " for " + curi); continue; } if (ca.match(credential.getClass(), key)) { result = true; } } return result; }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -