?? crawljobhandler.html

?? 網絡爬蟲開源代碼
?? HTML
?? 第 1 頁 / 共 5 頁
字號:
上一頁 1 2 3 45
<a name="588" href="#588">588</a>     <em>/**<em>*</em></em><a name="589" href="#589">589</a> <em>     * The specified job will be removed from the pending queue or aborted if</em><a name="590" href="#590">590</a> <em>     * currently running.  It will be placed in the list of completed jobs with</em><a name="591" href="#591">591</a> <em>     * appropriate status info. If the job is already in the completed list or</em><a name="592" href="#592">592</a> <em>     * no job with the given UID is found, no action will be taken.</em><a name="593" href="#593">593</a> <em>     *</em><a name="594" href="#594">594</a> <em>     * @param jobUID The UID (unique ID) of the job that is to be deleted.</em><a name="595" href="#595">595</a> <em>     *</em><a name="596" href="#596">596</a> <em>     */</em><a name="597" href="#597">597</a>     <strong>public</strong> <strong>void</strong> deleteJob(String jobUID) {<a name="598" href="#598">598</a>         <em class="comment">// First check to see if we are deleting the current job.</em><a name="599" href="#599">599</a>         <strong>if</strong> (currentJob != <strong>null</strong> &amp;&amp; jobUID.equals(currentJob.getUID())) {<a name="600" href="#600">600</a>             terminateCurrentJob();<a name="601" href="#601">601</a>             <strong>return</strong>; <em class="comment">// We're not going to find another job with the same UID</em><a name="602" href="#602">602</a>         }<a name="603" href="#603">603</a>         <a name="604" href="#604">604</a>         <em class="comment">// Ok, it isn't the current job, let's check the pending jobs.</em><a name="605" href="#605">605</a>         <strong>for</strong>(Iterator it = pendingCrawlJobs.iterator(); it.hasNext();) {<a name="606" href="#606">606</a>             <a href="../../../../org/archive/crawler/admin/CrawlJob.html">CrawlJob</a> cj = (CrawlJob) it.next();<a name="607" href="#607">607</a>             <strong>if</strong> (cj.getUID().equals(jobUID)) {<a name="608" href="#608">608</a>                 <em class="comment">// Found the one to delete.</em><a name="609" href="#609">609</a>                 cj.setStatus(CrawlJob.STATUS_DELETED);<a name="610" href="#610">610</a>                 it.remove();<a name="611" href="#611">611</a>                 <strong>return</strong>; <em class="comment">// We're not going to find another job with the same UID</em><a name="612" href="#612">612</a>             }<a name="613" href="#613">613</a>         }<a name="614" href="#614">614</a>         <a name="615" href="#615">615</a>         <em class="comment">// And finally the completed jobs.</em><a name="616" href="#616">616</a>         <strong>for</strong> (Iterator it = completedCrawlJobs.iterator(); it.hasNext();) {<a name="617" href="#617">617</a>             <a href="../../../../org/archive/crawler/admin/CrawlJob.html">CrawlJob</a> cj = (CrawlJob) it.next();<a name="618" href="#618">618</a>             <strong>if</strong> (cj.getUID().equals(jobUID)) {<a name="619" href="#619">619</a>                 <em class="comment">// Found the one to delete.</em><a name="620" href="#620">620</a>                 cj.setStatus(CrawlJob.STATUS_DELETED);<a name="621" href="#621">621</a>                 it.remove();<a name="622" href="#622">622</a>                 <strong>return</strong>; <em class="comment">// No other job will have the same UID</em><a name="623" href="#623">623</a>             }<a name="624" href="#624">624</a>         }<a name="625" href="#625">625</a>     }<a name="626" href="#626">626</a> <a name="627" href="#627">627</a>     <em>/**<em>*</em></em><a name="628" href="#628">628</a> <em>     * Cause the current job to pause. If no current job is crawling this</em><a name="629" href="#629">629</a> <em>     * method will have no effect. </em><a name="630" href="#630">630</a> <em>     */</em><a name="631" href="#631">631</a>     <strong>public</strong> <strong>void</strong> pauseJob() {<a name="632" href="#632">632</a>         <strong>if</strong> (<strong>this</strong>.currentJob != <strong>null</strong>) {<a name="633" href="#633">633</a>             <strong>this</strong>.currentJob.pause();<a name="634" href="#634">634</a>         }<a name="635" href="#635">635</a>     }<a name="636" href="#636">636</a> <a name="637" href="#637">637</a>     <em>/**<em>*</em></em><a name="638" href="#638">638</a> <em>     * Cause the current job to resume crawling if it was paused. Will have no</em><a name="639" href="#639">639</a> <em>     * effect if the current job was not paused or if there is no current job.</em><a name="640" href="#640">640</a> <em>     * If the current job is still waiting to pause, this will not take effect</em><a name="641" href="#641">641</a> <em>     * until the job has actually paused. At which time it will immeditatly</em><a name="642" href="#642">642</a> <em>     * resume crawling.</em><a name="643" href="#643">643</a> <em>     */</em><a name="644" href="#644">644</a>     <strong>public</strong> <strong>void</strong> resumeJob() {<a name="645" href="#645">645</a>         <strong>if</strong> (<strong>this</strong>.currentJob != <strong>null</strong>) {<a name="646" href="#646">646</a>             <strong>this</strong>.currentJob.resume();<a name="647" href="#647">647</a>         }<a name="648" href="#648">648</a>     }<a name="649" href="#649">649</a> <a name="650" href="#650">650</a>     <em>/**<em>*</em></em><a name="651" href="#651">651</a> <em>     * Cause the current job to write a checkpoint to disk. Currently</em><a name="652" href="#652">652</a> <em>     * requires job to already be paused.</em><a name="653" href="#653">653</a> <em>     * @throws IllegalStateException Thrown if crawl is not paused.</em><a name="654" href="#654">654</a> <em>     */</em><a name="655" href="#655">655</a>     <strong>public</strong> <strong>void</strong> checkpointJob() throws IllegalStateException {<a name="656" href="#656">656</a>         <strong>if</strong> (<strong>this</strong>.currentJob != <strong>null</strong>) {<a name="657" href="#657">657</a>             <strong>this</strong>.currentJob.checkpoint();<a name="658" href="#658">658</a>         }<a name="659" href="#659">659</a>     }<a name="660" href="#660">660</a> <a name="661" href="#661">661</a>     <em>/**<em>*</em></em><a name="662" href="#662">662</a> <em>     * Returns a unique job ID.</em><a name="663" href="#663">663</a> <em>     * &lt;p></em><a name="664" href="#664">664</a> <em>     * No two calls to this method (on the same instance of this class) can ever</em><a name="665" href="#665">665</a> <em>     * return the same value. &lt;br></em><a name="666" href="#666">666</a> <em>     * Currently implemented to return a time stamp. That is subject to change</em><a name="667" href="#667">667</a> <em>     * though.</em><a name="668" href="#668">668</a> <em>     *</em><a name="669" href="#669">669</a> <em>     * @return A unique job ID.</em><a name="670" href="#670">670</a> <em>     *</em><a name="671" href="#671">671</a> <em>     * @see ArchiveUtils#TIMESTAMP17</em><a name="672" href="#672">672</a> <em>     */</em><a name="673" href="#673">673</a>     <strong>public</strong> String getNextJobUID() {<a name="674" href="#674">674</a>         <strong>return</strong> ArchiveUtils.get17DigitDate();<a name="675" href="#675">675</a>     }<a name="676" href="#676">676</a> <a name="677" href="#677">677</a>     <em>/**<em>*</em></em><a name="678" href="#678">678</a> <em>     * Creates a new job. The new job will be returned and also registered as</em><a name="679" href="#679">679</a> <em>     * the handler's 'new job'. The new job will be based on the settings</em><a name="680" href="#680">680</a> <em>     * provided but created in a new location on disk.</em><a name="681" href="#681">681</a> <em>     *</em><a name="682" href="#682">682</a> <em>     * @param baseOn</em><a name="683" href="#683">683</a> <em>     *            A CrawlJob (with a valid settingshandler) to use as the</em><a name="684" href="#684">684</a> <em>     *            template for the new job.</em><a name="685" href="#685">685</a> <em>     * @param recovery Whether to preinitialize new job as recovery of</em><a name="686" href="#686">686</a> <em>     * &lt;code>baseOn&lt;/code> job.  String holds RECOVER_LOG if we are to</em><a name="687" href="#687">687</a> <em>     * do the recovery based off the recover.gz log -- See RecoveryJournal in</em><a name="688" href="#688">688</a> <em>     * the frontier package -- or it holds the name of</em><a name="689" href="#689">689</a> <em>     * the checkpoint we're to use recoverying.</em><a name="690" href="#690">690</a> <em>     * @param name</em><a name="691" href="#691">691</a> <em>     *            The name of the new job.</em><a name="692" href="#692">692</a> <em>     * @param description</em><a name="693" href="#693">693</a> <em>     *            Descriptions of the job.</em><a name="694" href="#694">694</a> <em>     * @param seeds</em><a name="695" href="#695">695</a> <em>     *            The contents of the new settings' seed file.</em><a name="696" href="#696">696</a> <em>     * @param priority</em><a name="697" href="#697">697</a> <em>     *            The priority of the new job.</em><a name="698" href="#698">698</a> <em>     *</em><a name="699" href="#699">699</a> <em>     * @return The new crawl job.</em><a name="700" href="#700">700</a> <em>     * @throws FatalConfigurationException If a problem occurs creating the</em><a name="701" href="#701">701</a> <em>     *             settings.</em><a name="702" href="#702">702</a> <em>     */</em><a name="703" href="#703">703</a>     <strong>public</strong> <a href="../../../../org/archive/crawler/admin/CrawlJob.html">CrawlJob</a> newJob(<a href="../../../../org/archive/crawler/admin/CrawlJob.html">CrawlJob</a> baseOn, String recovery, String name,<a name="704" href="#704">704</a>             String description, String seeds, <strong>int</strong> priority)<a name="705" href="#705">705</a>     throws <a href="../../../../org/archive/crawler/framework/exceptions/FatalConfigurationException.html">FatalConfigurationException</a> {<a name="706" href="#706">706</a>         <em class="comment">// See what the recover story is.</em><a name="707" href="#707">707</a>         File recover = <strong>null</strong>;<a name="708" href="#708">708</a>         <strong>try</strong> {<a name="709" href="#709">709</a>             <strong>if</strong> (recovery != <strong>null</strong> &amp;&amp; recovery.length() > 0<a name="710" href="#710">710</a>                     &amp;&amp; recovery.equals(RECOVER_LOG)) {<a name="711" href="#711">711</a>                 <em class="comment">// Then we're to do a recovery based off the RecoveryJournal</em><a name="712" href="#712">712</a>                 <em class="comment">// recover.gz log.</em><a name="713" href="#713">713</a>                 File dir = baseOn.getSettingsHandler().getOrder()<a name="714" href="#714">714</a>                     .getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);<a name="715" href="#715">715</a>                 <em class="comment">// Add name of recover file.  We're hardcoding it as</em><a name="716" href="#716">716</a>                 <em class="comment">// 'recover.gz'.</em><a name="717" href="#717">717</a>                 recover = <strong>new</strong> File(dir, FrontierJournal.LOGNAME_RECOVER);<a name="718" href="#718">718</a>             } <strong>else</strong> <strong>if</strong> (recovery != <strong>null</strong> &amp;&amp; recovery.length() > 0) {<a name="719" href="#719">719</a>                 <em class="comment">// Must be name of a checkpoint to use.</em><a name="720" href="#720">720</a>                 recover = <strong>new</strong> File(baseOn.getSettingsHandler().<a name="721" href="#721">721</a>                     getOrder().getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH),<a name="722" href="#722">722</a>                         recovery);<a name="723" href="#723">723</a>             }<a name="724" href="#724">724</a>         } <strong>catch</strong> (AttributeNotFoundException e1) {<a name="725" href="#725">725</a>             <strong>throw</strong> <strong>new</strong> <a href="../../../../org/archive/crawler/framework/exceptions/FatalConfigurationException.html">FatalConfigurationException</a>(<a name="726" href="#726">726</a>                 <span class="string">"AttributeNotFoundException occured while setting up"</span> +<a name="727" href="#727">727</a>                     <span class="string">"new job/profile "</span> + name + <span class="string">" \n"</span> + e1.getMessage());<a name="728" href="#728">728</a>         }<a name="729" href="#729">729</a> <a name="730" href="#730">730</a>         <a href="../../../../org/archive/crawler/admin/CrawlJob.html">CrawlJob</a> cj = createNewJob(baseOn.getSettingsHandler().getOrderFile(),<a name="731" href="#731">731</a>             name, description, seeds, priority);<a name="732" href="#732">732</a>     <a name="733" href="#733">733</a>         updateRecoveryPaths(recover, cj.getSettingsHandler(), name);<a name="734" href="#734">734</a>         <a name="735" href="#735">7
上一頁 1 2 3 45
?? 文件大小 20230 K
?? 上傳用戶 singwolf
?? 所屬分類 Java編程
??? 相關標簽

#網絡爬蟲 #開源 #代碼
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? crawljobhandler.html

?? 快捷鍵說明