?? statisticssummary.html
字號:
<a name="628" href="#628">628</a> <strong>return</strong> false;<a name="629" href="#629">629</a> }<a name="630" href="#630">630</a> BufferedReader br = <strong>null</strong>;<a name="631" href="#631">631</a> <strong>try</strong> {<a name="632" href="#632">632</a> FileReader reader = <strong>new</strong> FileReader(f);<a name="633" href="#633">633</a> br = <strong>new</strong> BufferedReader(reader);<a name="634" href="#634">634</a> String line = br.readLine(); <a name="635" href="#635">635</a> <strong>while</strong> (line != <strong>null</strong>) {<a name="636" href="#636">636</a> <strong>if</strong> (line.startsWith(<span class="string">"Duration Time"</span>)) {<a name="637" href="#637">637</a> durationTime = line.substring(line.indexOf(':')+1);<a name="638" href="#638">638</a> }<a name="639" href="#639">639</a> <strong>else</strong> <strong>if</strong> (line.startsWith(<span class="string">"Processed docs/sec"</span>)) {<a name="640" href="#640">640</a> processedDocsPerSec = line.substring(line.indexOf(':')+1);<a name="641" href="#641">641</a> }<a name="642" href="#642">642</a> <strong>else</strong> <strong>if</strong> (line.startsWith(<span class="string">"Bandwidth in Kbytes/sec"</span>)) {<a name="643" href="#643">643</a> bandwidthKbytesPerSec = line.substring(line.indexOf(':')+1);<a name="644" href="#644">644</a> }<a name="645" href="#645">645</a> <strong>else</strong> <strong>if</strong> (line.startsWith(<span class="string">"Total Raw Data Size in Bytes"</span>)) {<a name="646" href="#646">646</a> totalDataWritten = line.substring(line.indexOf(':')+1);<a name="647" href="#647">647</a> }<a name="648" href="#648">648</a> <a name="649" href="#649">649</a> line = br.readLine();<a name="650" href="#650">650</a> }<a name="651" href="#651">651</a> }<a name="652" href="#652">652</a> <strong>catch</strong> (IOException e) {<a name="653" href="#653">653</a> logger.log(Level.SEVERE, <span class="string">"Reading "</span> + f.getAbsolutePath(), e); <a name="654" href="#654">654</a> } <strong>finally</strong> {<a name="655" href="#655">655</a> <strong>if</strong> (br != <strong>null</strong>) {<a name="656" href="#656">656</a> <strong>try</strong> {<a name="657" href="#657">657</a> br.close();<a name="658" href="#658">658</a> } <strong>catch</strong> (IOException e) {<a name="659" href="#659">659</a> logger.log(Level.SEVERE,<a name="660" href="#660">660</a> <span class="string">"Failed close of "</span> + f.getAbsolutePath(), e);<a name="661" href="#661">661</a> }<a name="662" href="#662">662</a> }<a name="663" href="#663">663</a> }<a name="664" href="#664">664</a> <strong>return</strong> <strong>true</strong>;<a name="665" href="#665">665</a> }<a name="666" href="#666">666</a> <a name="667" href="#667">667</a> <em>/**<em>*</em></em><a name="668" href="#668">668</a> <em> * Returns sorted Iterator of seeds records based on status code.</em><a name="669" href="#669">669</a> <em> * @return sorted Iterator of seeds records</em><a name="670" href="#670">670</a> <em> */</em><a name="671" href="#671">671</a> <strong>public</strong> Iterator<SeedRecord> getSeedRecordsSortedByStatusCode() {<a name="672" href="#672">672</a> TreeSet<SeedRecord> sortedSet = <strong>new</strong> TreeSet<SeedRecord>(<a name="673" href="#673">673</a> <strong>new</strong> Comparator<SeedRecord>() {<a name="674" href="#674">674</a> <strong>public</strong> <strong>int</strong> compare(<a href="../../../../org/archive/crawler/admin/SeedRecord.html">SeedRecord</a> sr1, <a href="../../../../org/archive/crawler/admin/SeedRecord.html">SeedRecord</a> sr2) {<a name="675" href="#675">675</a> <strong>int</strong> code1 = sr1.getStatusCode();<a name="676" href="#676">676</a> <strong>int</strong> code2 = sr2.getStatusCode();<a name="677" href="#677">677</a> <strong>if</strong> (code1 == code2) {<a name="678" href="#678">678</a> <em class="comment">// If the values are equal, sort by URIs.</em><a name="679" href="#679">679</a> <strong>return</strong> sr1.getUri().compareTo(sr2.getUri());<a name="680" href="#680">680</a> }<a name="681" href="#681">681</a> <em class="comment">// mirror and shift the nubmer line so as to</em><a name="682" href="#682">682</a> <em class="comment">// place zero at the beginning, then all negatives </em><a name="683" href="#683">683</a> <em class="comment">// in order of ascending absolute value, then all </em><a name="684" href="#684">684</a> <em class="comment">// positives descending</em><a name="685" href="#685">685</a> code1 = -code1 - Integer.MAX_VALUE;<a name="686" href="#686">686</a> code2 = -code2 - Integer.MAX_VALUE;<a name="687" href="#687">687</a> <a name="688" href="#688">688</a> <strong>return</strong> <strong>new</strong> Integer(code1).compareTo(<strong>new</strong> Integer(code2));<a name="689" href="#689">689</a> }<a name="690" href="#690">690</a> });<a name="691" href="#691">691</a> <strong>for</strong> (SeedRecord sr: processedSeedsRecords.values()) {<a name="692" href="#692">692</a> sortedSet.add(sr);<a name="693" href="#693">693</a> }<a name="694" href="#694">694</a> <a name="695" href="#695">695</a> <strong>return</strong> sortedSet.iterator();<a name="696" href="#696">696</a> }<a name="697" href="#697">697</a> <a name="698" href="#698">698</a> <em>/**<em>*</em></em><a name="699" href="#699">699</a> <em> * Reads seed data from seeds-report.txt.</em><a name="700" href="#700">700</a> <em> * @return True if stats found.</em><a name="701" href="#701">701</a> <em> */</em><a name="702" href="#702">702</a> <strong>private</strong> <strong>boolean</strong> readSeedReport() {<a name="703" href="#703">703</a> File f = <strong>new</strong> File(cjob.getDirectory(), <span class="string">"seeds-report.txt"</span>);<a name="704" href="#704">704</a> <strong>if</strong> (!f.exists()) {<a name="705" href="#705">705</a> <strong>return</strong> false;<a name="706" href="#706">706</a> }<a name="707" href="#707">707</a> BufferedReader br = <strong>null</strong>;<a name="708" href="#708">708</a> <strong>try</strong> {<a name="709" href="#709">709</a> FileReader reader = <strong>new</strong> FileReader(f);<a name="710" href="#710">710</a> br = <strong>new</strong> BufferedReader(reader);<a name="711" href="#711">711</a> <a name="712" href="#712">712</a> <em class="comment">// Ignore heading: [code] [status] [seed] [redirect]</em><a name="713" href="#713">713</a> String line = br.readLine(); <a name="714" href="#714">714</a> line = br.readLine();<a name="715" href="#715">715</a> <strong>while</strong> (line != <strong>null</strong>) {<a name="716" href="#716">716</a> <em class="comment">// Example lines:</em><a name="717" href="#717">717</a> <em class="comment">// 302 CRAWLED http://www.ashlandcitytimes.com/ http://www.ashlandcitytimes.com/apps/pbcs.dll/section?Category=MTCN01</em><a name="718" href="#718">718</a> <em class="comment">// 200 CRAWLED http://noleeo.com/</em><a name="719" href="#719">719</a> <a name="720" href="#720">720</a> String[] items = line.split(<span class="string">" "</span>);<a name="721" href="#721">721</a> <a name="722" href="#722">722</a> <strong>if</strong> (items.length < 3) {<a name="723" href="#723">723</a> logger.log(Level.WARNING,<a name="724" href="#724">724</a> <span class="string">"Unexpected formatting on line ["</span> + line + <span class="string">"]"</span>);<a name="725" href="#725">725</a> }<a name="726" href="#726">726</a> <strong>else</strong> {<a name="727" href="#727">727</a> String statusCode = items[0];<a name="728" href="#728">728</a> String crawlStatus = items[1];<a name="729" href="#729">729</a> String seed = items[2];<a name="730" href="#730">730</a> String redirect = items.length > 3 ? items[3] : <strong>null</strong>;<a name="731" href="#731">731</a> <a name="732" href="#732">732</a> <em class="comment">// All values should be CRAWLED or NOTCRAWLED</em><a name="733" href="#733">733</a> <strong>if</strong> (crawlStatus.equals(<span class="string">"CRAWLED"</span>)) {<a name="734" href="#734">734</a> crawlStatus =org.archive.crawler.framework.StatisticsTracking.SEED_DISPOSITION_SUCCESS; <a name="735" href="#735">735</a> }<a name="736" href="#736">736</a> <strong>else</strong> {<a name="737" href="#737">737</a> crawlStatus = org.archive.crawler.framework.StatisticsTracking.SEED_DISPOSITION_FAILURE;<a name="738" href="#738">738</a> }<a name="739" href="#739">739</a> SeedRecord sr = <strong>new</strong> SeedRecord(seed, crawlStatus, <a name="740" href="#740">740</a> Integer.parseInt(statusCode), redirect);<a name="741" href="#741">741</a> processedSeedsRecords.put(seed, sr);<a name="742" href="#742">742</a> }<a name="743" href="#743">743</a> <a name="744" href="#744">744</a> line = br.readLine();<a name="745" href="#745">745</a> }<a name="746" href="#746">746</a> } <strong>catch</strong> (IOException e) {<a name="747" href="#747">747</a> logger.log(Level.SEVERE, <span class="string">"Reading "</span> + f.getAbsolutePath(), e); <a name="748" href="#748">748</a> } <strong>finally</strong> {<a name="749" href="#749">749</a> <strong>if</strong> (br != <strong>null</strong>) {<a name="750" href="#750">750</a> <strong>try</strong> {<a name="751" href="#751">751</a> br.close();<a name="752" href="#752">752</a> } <strong>catch</strong> (IOException e) {<a name="753" href="#753">753</a> logger.log(Level.SEVERE,<a name="754" href="#754">754</a> <span class="string">"Closing "</span> + f.getAbsolutePath(), e);<a name="755" href="#755">755</a> }<a name="756" href="#756">756</a> }<a name="757" href="#757">757</a> }<a name="758" href="#758">758</a> <strong>return</strong> <strong>true</strong>;<a name="759" href="#759">759</a> }<a name="760" href="#760">760</a> <a name="761" href="#761">761</a> <em>/**<em>*</em></em><a name="762" href="#762">762</a> <em> * Return a copy of the hosts distribution in reverse-sorted</em><a name="763" href="#763">763</a> <em> * (largest first) order.</em><a name="764" href="#764">764</a> <em> * </em><a name="765" href="#765">765</a> <em> * @return SortedMap of hosts distribution</em><a name="766" href="#766">766</a> <em> */</em><a name="767" href="#767">767</a> <strong>public</strong> SortedMap getReverseSortedHostsDistribution() {<a name="768" href="#768">768</a> <strong>return</strong> getReverseSortedCopy(hostsDistribution); <a name="769" href="#769">769</a> } <a name="770" href="#770">770</a> <a name="771" href="#771">771</a> <em>/**<em>*</em></em><a name="772" href="#772">772</a> <em> * @return True if we compiled stats, false if none to compile (e.g.</em><a name="773" href="#773">773</a> <em> * there are no reports files on disk).</em><a name="774" href="#774">774</a> <em> */</em><a name="775" href="#775">775</a> <strong>public</strong> <strong>boolean</strong> isStats() {<a name="776" href="#776">776</a> <strong>return</strong> <strong>this</strong>.stats;<a name="777" href="#777">777</a> }<a name="778" href="#778">778</a> }</pre><hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body></html>
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -