锘??xml version="1.0" encoding="utf-8" standalone="yes"?>亚洲精品网址在线观看,欧美日韩亚洲一区三区,欧美va天堂http://www.shnenglu.com/zzfmars/category/14823.htmlOpen Source Spirit zh-cnSun, 19 Jun 2011 06:34:54 GMTSun, 19 Jun 2011 06:34:54 GMT60 Lucene鍏ラ棬綰х瑪璁頒簲 -- 鍒嗚瘝鍣紝浣跨敤涓枃鍒嗚瘝鍣紝鎵╁睍璇嶅簱錛屽仠鐢ㄨ瘝http://www.shnenglu.com/zzfmars/archive/2011/04/17/144401.htmlKevin_ZhangKevin_ZhangSun, 17 Apr 2011 11:25:00 GMThttp://www.shnenglu.com/zzfmars/archive/2011/04/17/144401.htmlhttp://www.shnenglu.com/zzfmars/comments/144401.htmlhttp://www.shnenglu.com/zzfmars/archive/2011/04/17/144401.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/144401.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/144401.html1. 甯歌鐨勪腑鏂囧垎璇嶅櫒鏈夛細鏋佹槗鍒嗚瘝鐨?MMAnalyzer) 銆?/span>"搴栦竵鍒嗚瘝"鍒嗚瘝鍣?PaodingAnalzyer)銆両KAnalyzer 絳夌瓑銆傚叾涓?nbsp;MMAnalyzer 鍜?nbsp;PaodingAnalzyer 涓嶆敮鎸?nbsp;lucene3.0鍙婁互鍚庣増鏈?br>
   浣跨敤鏂瑰紡閮界被浼鹼紝鍦ㄦ瀯寤哄垎璇嶅櫒鏃?br>
     Analyzer analyzer 
= new [My]Analyzer(); 

     

2. 榪欓噷鍙ず渚?nbsp;IKAnalyzer錛岀洰鍓嶅彧鏈夊畠鏀寔Lucene3.0 浠ュ悗鐨勭増鏈?nbsp;

   棣栧厛闇瑕佸鍏?nbsp;IKAnalyzer3.
2.0Stable.jar 鍖?br>
 

3. 紺轟緥浠g爜

     view plaincopy to clipboardprint
?
public class AnalyzerTest {   
       @Test  
       
public void test() throws Exception {   
              String text 
= "An IndexWriter creates and maintains an index.";   
              
/* 鏍囧噯鍒嗚瘝鍣細鍗曞瓙鍒嗚瘝 */  
              Analyzer analyzer 
= new StandardAnalyzer(Version.LUCENE_30);   
              testAnalyzer(analyzer, text);   
    
              String text2 
= "嫻嬭瘯涓枃鐜涓嬬殑淇℃伅媯绱?/span>";   
              testAnalyzer(
new IKAnalyzer(), text2); // 浣跨敤IKAnalyzer錛岃瘝搴撳垎璇?nbsp;  
       }
   
    
       
/**  
        * 浣跨敤鎸囧畾鐨勫垎璇嶅櫒瀵規(guī)寚瀹氱殑鏂囨湰榪涜鍒嗚瘝錛屽茍鎵撳嵃緇撴灉  
        *  
        * 
@param analyzer  
        * 
@param text  
        * 
@throws Exception  
        
*/
  
       
private void testAnalyzer(Analyzer analyzer, String text) throws Exception {   
              System.out.println(
"褰撳墠浣跨敤鐨勫垎璇嶅櫒錛?/span>" + analyzer.getClass());   
    
              TokenStream tokenStream 
= analyzer.tokenStream("content"new StringReader(text));   
              tokenStream.addAttribute(TermAttribute.
class);   
    
              
while (tokenStream.incrementToken()) {   
                     TermAttribute termAttribute 
= tokenStream.getAttribute(TermAttribute.class);   
                     System.out.println(termAttribute.term());   
              }
   
       }
   
}
   
   
public class AnalyzerTest {
       @Test
       
public void test() throws Exception {
              String text 
= "An IndexWriter creates and maintains an index.";
              
/* 鏍囧噯鍒嗚瘝鍣細鍗曞瓙鍒嗚瘝 */
              Analyzer analyzer 
= new StandardAnalyzer(Version.LUCENE_30);
              testAnalyzer(analyzer, text);
 
              String text2 
= "嫻嬭瘯涓枃鐜涓嬬殑淇℃伅媯绱?/span>";
              testAnalyzer(
new IKAnalyzer(), text2); // 浣跨敤IKAnalyzer錛岃瘝搴撳垎璇?/span>
       }

 
       
/**
        * 浣跨敤鎸囧畾鐨勫垎璇嶅櫒瀵規(guī)寚瀹氱殑鏂囨湰榪涜鍒嗚瘝錛屽茍鎵撳嵃緇撴灉
        *
        * 
@param analyzer
        * 
@param text
        * 
@throws Exception
        
*/

       
private void testAnalyzer(Analyzer analyzer, String text) throws Exception {
              System.out.println(
"褰撳墠浣跨敤鐨勫垎璇嶅櫒錛?/span>" + analyzer.getClass());
 
              TokenStream tokenStream 
= analyzer.tokenStream("content"new StringReader(text));
              tokenStream.addAttribute(TermAttribute.
class);
 
              
while (tokenStream.incrementToken()) {
                     TermAttribute termAttribute 
= tokenStream.getAttribute(TermAttribute.class);
                     System.out.println(termAttribute.term());
              }

       }

}

  

3. 濡備綍鎵╁睍璇嶅簱錛氬緢澶氭儏鍐典笅錛屾垜浠彲鑳介渶瑕佸畾鍒惰嚜宸辯殑璇嶅簱錛屼緥濡?nbsp;XXX 鍏徃錛屾垜浠笇鏈涜繖鑳借鍒嗚瘝鍣ㄨ瘑鍒紝騫舵媶鍒嗘垚涓涓瘝銆?br>
   IKAnalyzer 鍙互寰堟柟渚跨殑瀹炵幇鎴戜滑鐨勮繖縐嶉渶姹傘?br>
   鏂板緩 IKAnalyzer.cfg.xml

     view plaincopy to clipboardprint
?
<?xml version="1.0" encoding="UTF-8"?>  
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">    
<properties>    
       
<!-- 1錛屾枃浠惰鏄?nbsp;UTF-8 緙栫爜銆?/span>2錛屼竴琛屽啓涓涓瘝 -->  
       
<!--鐢ㄦ埛鍙互鍦ㄨ繖閲岄厤緗嚜宸辯殑鎵╁睍瀛楀吀-->  
       
<entry key="ext_dict">/mydict.dic</entry>  
</properties>  
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> 
<properties> 
       
<!-- 1錛屾枃浠惰鏄?nbsp;UTF-8 緙栫爜銆?/span>2錛屼竴琛屽啓涓涓瘝 -->
       
<!--鐢ㄦ埛鍙互鍦ㄨ繖閲岄厤緗嚜宸辯殑鎵╁睍瀛楀吀-->
       
<entry key="ext_dict">/mydict.dic</entry>
</properties> 

       

       瑙f瀽錛?br>
               
<entry key="ext_dict">/mydict.dic</entry> 鎵╁睍浜嗕竴涓嚜宸辯殑璇嶅吀錛屽悕瀛楀彨 mydict.dic

               鍥犳鎴戜滑瑕佸緩涓涓枃鏈枃浠訛紝鍚嶄負錛歮ydict.dic  錛堟澶勪嬌鐢ㄧ殑 .dic 騫墮潪蹇呴』錛?br>
               鍦ㄨ繖涓枃鏈枃浠墮噷鍐欏叆錛?br>
                    鍖椾含XXXX縐戞妧鏈夐檺鍏徃

               榪欐牱灝辨坊鍔犱簡涓涓瘝姹囥?br>
               濡傛灉瑕佹坊鍔犲涓紝鍒欐柊璧蜂竴琛岋細

                    璇嶆眹涓

                    璇嶆眹浜?br>
                    璇嶆眹涓?br>
                    

               闇瑕佹敞鎰忕殑鏄紝榪欎釜鏂囦歡涓瀹氳浣跨敤 UTF
-8緙栫爜

 

4. 鍋滅敤璇嶏細

    鏈変簺璇嶅湪鏂囨湰涓嚭鐜扮殑棰戠巼闈炲父楂橈紝浣嗘槸瀵規(guī)枃鏈墍鎼哄甫鐨勪俊鎭熀鏈笉浜х敓褰卞搷錛屼緥濡傝嫳鏂囩殑
"a銆乤n銆乼he銆乷f"錛屾垨涓枃鐨?/span>"鐨勩佷簡銆佺潃"錛屼互鍙婂悇縐嶆爣鐐圭鍙風瓑錛岃繖鏍風殑璇嶇О涓哄仠鐢ㄨ瘝錛坰top word錛夈?br>
    鏂囨湰緇忚繃鍒嗚瘝涔嬪悗錛屽仠鐢ㄨ瘝閫氬父琚繃婊ゆ帀錛屼笉浼氳榪涜绱㈠紩銆傚湪媯绱㈢殑鏃跺欙紝鐢ㄦ埛鐨勬煡璇腑濡傛灉鍚湁鍋滅敤璇嶏紝媯绱㈢郴緇熶篃浼氬皢鍏惰繃婊ゆ帀錛堝洜涓虹敤鎴瘋緭鍏ョ殑鏌ヨ瀛楃涓蹭篃瑕佽繘琛屽垎璇嶅鐞嗭級銆?br>
    鎺掗櫎鍋滅敤璇嶅彲浠ュ姞蹇緩绔嬬儲寮曠殑閫熷害錛屽噺灝忕儲寮曞簱鏂囦歡鐨勫ぇ灝忋?br>
    IKAnalyzer 涓嚜瀹氫箟鍋滅敤璇嶄篃闈炲父鏂逛究錛屽拰閰嶇疆 
"鎵╁睍璇嶅簱" 鎿嶄綔綾誨瀷錛屽彧闇瑕佸湪 IKAnalyzer.cfg.xml 鍔犲叆濡備笅閰嶇疆錛?br>
       
<entry key="ext_stopwords">/ext_stopword.dic</entry> 

       鍚屾牱榪欎釜閰嶇疆涔熸寚鍚戜簡涓涓枃鏈枃浠?nbsp;
/ext_stopword.dic 錛堝悗緙鍚嶄換鎰忥級錛屾牸寮忓涓嬶細

           涔?br>
          浜?br>
          浠?br>
          浠?br>
          



鏈枃鏉ヨ嚜CSDN鍗氬錛岃漿杞借鏍囨槑鍑哄錛歨ttp:
//blog.csdn.net/wenlin56/archive/2010/12/13/6074124.aspx

]]>
緗戦〉瑙f瀽寮婧愰」鐩?/title><link>http://www.shnenglu.com/zzfmars/archive/2011/04/17/144369.html</link><dc:creator>Kevin_Zhang</dc:creator><author>Kevin_Zhang</author><pubDate>Sun, 17 Apr 2011 00:36:00 GMT</pubDate><guid>http://www.shnenglu.com/zzfmars/archive/2011/04/17/144369.html</guid><wfw:comment>http://www.shnenglu.com/zzfmars/comments/144369.html</wfw:comment><comments>http://www.shnenglu.com/zzfmars/archive/2011/04/17/144369.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.shnenglu.com/zzfmars/comments/commentRss/144369.html</wfw:commentRss><trackback:ping>http://www.shnenglu.com/zzfmars/services/trackbacks/144369.html</trackback:ping><description><![CDATA[<a >http://htmlparser.sourceforge.net/</a><br> <img src ="http://www.shnenglu.com/zzfmars/aggbug/144369.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.shnenglu.com/zzfmars/" target="_blank">Kevin_Zhang</a> 2011-04-17 08:36 <a href="http://www.shnenglu.com/zzfmars/archive/2011/04/17/144369.html#Feedback" target="_blank" style="text-decoration:none;">鍙戣〃璇勮</a></div>]]></description></item><item><title>涓涓?Java 鎼滅儲寮曟搸鐨勫疄鐜幫紝絎?2 閮ㄥ垎: 緗戦〉棰勫鐞?/title><link>http://www.shnenglu.com/zzfmars/archive/2011/04/16/144357.html</link><dc:creator>Kevin_Zhang</dc:creator><author>Kevin_Zhang</author><pubDate>Sat, 16 Apr 2011 12:36:00 GMT</pubDate><guid>http://www.shnenglu.com/zzfmars/archive/2011/04/16/144357.html</guid><wfw:comment>http://www.shnenglu.com/zzfmars/comments/144357.html</wfw:comment><comments>http://www.shnenglu.com/zzfmars/archive/2011/04/16/144357.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.shnenglu.com/zzfmars/comments/commentRss/144357.html</wfw:commentRss><trackback:ping>http://www.shnenglu.com/zzfmars/services/trackbacks/144357.html</trackback:ping><description><![CDATA[<div style="BORDER-BOTTOM: #cccccc 1px solid; BORDER-LEFT: #cccccc 1px solid; PADDING-BOTTOM: 4px; BACKGROUND-COLOR: #eeeeee; PADDING-LEFT: 4px; WIDTH: 98%; PADDING-RIGHT: 5px; FONT-SIZE: 13px; WORD-BREAK: break-all; BORDER-TOP: #cccccc 1px solid; BORDER-RIGHT: #cccccc 1px solid; PADDING-TOP: 4px"><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><span style="COLOR: #000000">鍦?nbsp;涓婁竴閮ㄥ垎 涓紝鎮(zhèn)ㄤ簡瑙e埌濡備綍緙栧啓涓涓?nbsp;spider 紼嬪簭鏉ヨ繘琛岀綉欏電殑鐖彇錛屼綔涓?nbsp;spider 鐨勭埇鍙栫粨鏋滐紝鎴戜滑鑾峰緱浜嗕竴涓寜鐓т竴瀹氭牸寮忓瓨鍌ㄧ殑鍘熷緗戦〉搴擄紝鍘熷緗戦〉搴撲篃鏄垜浠浜岄儴鍒嗙綉欏甸澶勭悊鐨勬暟鎹熀紜銆傜綉欏甸澶勭悊鐨勪富瑕佺洰鏍囨槸灝嗗師濮嬬綉欏甸氳繃涓姝ユ鐨勬暟鎹鐞嗗彉鎴愬彲鏂逛究鎼滅儲鐨勬暟鎹艦寮忋備笅闈㈠氨璁╂垜浠愭浠嬬粛緗戦〉棰勫鐞嗙殑璁捐鍜屽疄鐜般?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">棰勫鐞嗘ā鍧楃殑鏁翠綋緇撴瀯<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">棰勫鐞嗘ā鍧楃殑鏁翠綋緇撴瀯濡備笅錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥?nbsp;</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">. 棰勫鐞嗘ā鍧楃殑鏁翠綋緇撴瀯<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">閫氳繃 spider 鐨勬敹闆嗭紝淇濆瓨涓嬫潵鐨勭綉欏典俊鎭叿鏈夎緝濂界殑淇℃伅瀛樺偍鏍煎紡錛屼絾鏄繕鏄湁涓涓己鐐癸紝灝辨槸涓嶈兘鎸夌収緗戦〉 URL 鐩存帴瀹氫綅鍒版墍鎸囧悜鐨勭綉欏點傛墍浠ワ紝鍦ㄧ涓涓祦紼嬩腑錛岄渶瑕佸厛寤虹珛緗戦〉鐨勭儲寮曪紝濡傛閫氳繃绱㈠紩錛屾垜浠彲浠ュ緢鏂逛究鐨勪粠鍘熷緗戦〉搴撲腑鑾峰緱鏌愪釜 URL 瀵瑰簲鐨勯〉闈俊鎭備箣鍚庯紝鎴戜滑澶勭悊緗戦〉鏁版嵁錛屽浜庝竴涓綉欏碉紝棣栧厛闇瑕佹彁鍙栧叾緗戦〉姝f枃淇℃伅錛屽叾嬈″姝f枃淇℃伅榪涜鍒嗚瘝錛屼箣鍚庡啀鏍規(guī)嵁鍒嗚瘝鐨勬儏鍐靛緩绔嬬儲寮曞拰鍊掓帓绱㈠紩錛岃繖鏍鳳紝緗戦〉鐨勯澶勭悊涔熷叏閮ㄥ畬鎴愩傚彲鑳借鑰呭浜庡叾涓殑鏌愪簺涓撲笟鏈浼氭湁涓浜涗笉鏄庣櫧涔嬪錛屽湪鍚庣畫璇﹁堪鍚勪釜嫻佺▼鐨勬椂鍊欎細緇欏嚭鐩稿簲鐨勫浘鎴栬呬緥瀛愭潵甯姪澶у鐞嗚В銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">寤虹珛绱㈠紩緗戦〉搴?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍘熷緗戦〉搴撴槸鎸夌収鏍煎紡瀛樺偍鐨勶紝榪欏浜庣綉欏電殑绱㈠紩寤虹珛鎻愪緵浜嗘柟渚匡紝涓嬪浘緇欏嚭浜嗕竴鏉$綉欏典俊鎭褰曪細<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">娓呭崟 </span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">. 鍘熷緗戦〉搴撲腑鐨勪竴鏉$綉欏佃褰?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">                <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx     </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 涔嬪墠鐨勮褰?/span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span><span style="COLOR: #000000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> version:</span><span style="COLOR: #000000">1.0</span><span style="COLOR: #000000">                           </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 璁板綍澶撮儴</span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span><span style="COLOR: #000000"> url:http:</span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">ast.nlsde.buaa.edu.cn/ </span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span><span style="COLOR: #000000"> date:Mon Apr </span><span style="COLOR: #000000">05</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">14</span><span style="COLOR: #000000">:</span><span style="COLOR: #000000">22</span><span style="COLOR: #000000">:</span><span style="COLOR: #000000">53</span><span style="COLOR: #000000"> CST </span><span style="COLOR: #000000">2010</span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> IP:</span><span style="COLOR: #000000">218.241</span><span style="COLOR: #000000">.</span><span style="COLOR: #000000">236.72</span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> length:</span><span style="COLOR: #000000">3981</span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> </span><span style="COLOR: #000000"><!</span><span style="COLOR: #000000">DOCTYPE ……                     </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 璁板綍鏁版嵁閮ㄥ垎</span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span><span style="COLOR: #000000"> </span><span style="COLOR: #000000"><</span><span style="COLOR: #000000">html</span><span style="COLOR: #000000">></span><span style="COLOR: #000000"> …… </span><span style="COLOR: #000000"></</span><span style="COLOR: #000000">html</span><span style="COLOR: #000000">></span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx     </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 涔嬪悗鐨勮褰?/span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span><span style="COLOR: #000000"> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx     <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎴戜滑閲囩敤“緗戦〉搴撳悕鈥斿亸縐?#8221;鐨勪俊鎭鏉ュ畾浣嶅簱涓殑鏌愭潯緗戦〉璁板綍銆傜敱浜庢暟鎹噺姣旇緝澶э紝榪欎簺绱㈠紩緗戦〉淇℃伅闇瑕佷竴縐嶄繚瀛樼殑鏂規(guī)硶錛宒ySE 浣跨敤鏁版嵁搴撴潵淇濆瓨榪欎簺淇℃伅銆傛暟鎹簱浠噰鐢?nbsp;mysql錛岄厤鍚?nbsp;SQL</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">Front 杞歡鍙互杞繪澗榪涜鍥懼艦鐣岄潰鐨勬搷浣溿傛垜浠敤涓涓〃鏉ヨ褰曡繖浜涗俊鎭紝琛ㄧ殑鍐呭濡備笅錛歶rl銆乧ontent銆乷ffset銆乺aws銆俇RL 鏄煇鏉¤褰曞搴旂殑 URL錛屽洜涓虹儲寮曟暟鎹簱寤虹珛涔嬪悗錛屾垜浠槸閫氳繃 URL 鏉ョ‘瀹氶渶瑕佺殑緗戦〉鐨勶紱raws 鍜?nbsp;offset 鍒嗗埆琛ㄧず緗戦〉搴撳悕鍜屽亸縐誨鹼紝榪欎袱涓睘鎬у敮涓紜畾浜嗘煇鏉¤褰曪紝content 鏄綉欏靛唴瀹圭殑鎽樿錛岀綉欏電殑鏁版嵁閲忎竴鑸緝澶э紝鎶婄綉欏電殑鍏ㄩ儴鍐呭鏀懼叆鏁版嵁搴撲腑鏄懼緱涓嶆槸寰堝疄闄咃紝鎵浠ユ垜浠皢緗戦〉鍐呭鐨?nbsp;MD5 鎽樿鏀懼叆鍒?nbsp;content 灞炴т腑錛岃灞炴х浉褰撲簬涓涓牎楠岀爜錛屽湪瀹為檯榪愮敤涓紝褰撴垜浠牴鎹?nbsp;URL 鑾峰緱鏌愪釜緗戦〉淇℃伅鏄紝鍙互灝嗚幏寰楃殑緗戦〉鍋?nbsp;MD5 鎽樿鐒跺悗涓?nbsp;content 涓殑鍊煎仛涓涓尮閰嶏紝濡傛灉涓鏍峰垯緗戦〉鑾峰彇鎴愬姛錛屽鏋滀笉涓鏍鳳紝鍒欒鏄庣綉欏佃幏鍙栧嚭鐜伴棶棰樸?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">榪欓噷綆鍗曚粙緇嶄竴涓?nbsp;mySql 鐨勫畨瑁呬互鍙婁笌 Java 鐨勮繛鎺ワ細<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀹夎 mySql錛屾渶濂介渶瑕佷笁涓粍浠訛紝mySql錛宮ySql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">front錛宮ysql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">connector</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">java</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">5.1</span><span style="COLOR: #000000">.</span><span style="COLOR: #000000">7</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">bin.jar錛屽垎鍒彲浠ュ湪緗戠粶涓笅杞姐傛敞鎰忥細瀹夎 mySql 涓?nbsp;mySql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">front 鐨勬椂鍊欒鐗堟湰瀵瑰簲錛孧ySql5.</span><span style="COLOR: #000000">0</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">+</span><span style="COLOR: #000000"> MySql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">Front3.</span><span style="COLOR: #000000">2</span><span style="COLOR: #000000"> 鍜?nbsp;MySql5.</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">+</span><span style="COLOR: #000000"> MySql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">Front4.</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛岃繖涓粍鍚堟槸涓嶈兘涔辯殑錛屽彲浠ユ牴鎹浉搴旂殑鐗堟湰鍙鋒潵涓嬭澆錛屽惁鍒欎細鐖?#8220;‘ </span><span style="COLOR: #000000">10.000000</span><span style="COLOR: #000000"> ’ ist kein gUltiger Integerwert ”鐨勯敊璇?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵煎叆 mysql</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">connector</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">java</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">5.1</span><span style="COLOR: #000000">.</span><span style="COLOR: #000000">7</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">bin.jar 鍒?nbsp;eclipse 鐨勯」鐩腑錛屾墦寮 eclipse錛屽彸閿偣闇瑕佸鍏?nbsp;jar 鍖呯殑欏?nbsp;鐩悕錛岄夊睘鎬э紙properties)錛屽啀閫?nbsp;java 鏋勫緩璺緞錛坖ava Build Path)錛屽悗鍦ㄥ彸渚х偣 (libraries)錛岄?nbsp;add external JARs錛屼箣鍚庨夋嫨浣犺瀵煎叆鐨?nbsp;jar 鍖呯‘瀹氥?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎺ョ潃灝卞彲浠ョ敤浠g爜鏉ユ祴璇曚笌 mySql 鐨勮繛鎺ヤ簡錛屼唬鐮佽鏈枃闄勫甫鐨?nbsp;testMySql.java 紼嬪簭錛岃繖閲岄檺浜庣瘒騫呭氨涓嶅湪璧樿堪銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵逛簬鏁版嵁搴撶殑鎿嶄綔錛屾垜浠渶濂借繘琛屼竴瀹氱殑灝佽錛屼互鎻愪緵緇熶竴鐨勬暟鎹簱鎿嶄綔鏀寔錛岃屼笉闇瑕佸湪鍏朵粬鐨勭被涓樉紺虹殑榪涜鏁版嵁搴撹繛鎺ユ搷浣滐紝鑰屼笖榪欐牱涔熷氨涓嶉渶瑕佸緩绔嬪ぇ閲忕殑鏁版嵁搴撹繛鎺ヤ粠鑰岄犳垚璧勬簮鐨勬氮璐癸紝浠g爜璇﹁ DBConnection.java銆備富瑕佹彁渚涚殑鎿嶄綔鏄細寤虹珛榪炴帴銆佹墽琛?nbsp;SQL 璇彞銆佽繑鍥炴搷浣滅粨鏋溿?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">浠嬬粛浜嗘暟鎹簱鐨勭浉鍏蟲搷浣滄椂鍊欙紝鐜板湪鎴戜滑鍙互鏉ュ畬鎴愮綉欏電儲寮曞簱鐨勫緩绔嬭繃紼嬨傝繖閲岃璇存槑鐨勬槸錛岀涓鏉¤褰曠殑鍋忕Щ鏄?nbsp;</span><span style="COLOR: #000000">0</span><span style="COLOR: #000000">錛屾墍浠ュ湪褰撳墠璁板綍 record 澶勭悊涔嬪墠錛岃璁板綍鐨勫亸縐繪槸宸茬粡璁$畻鍑烘潵鐨勶紝澶勭悊 record 鐨勬剰涔夊湪浜庤幏寰椾笅涓涓褰曞湪緗戦〉搴撲腑鐨勫亸縐匯傚亣璁懼綋鍓?nbsp;record 鐨勫亸縐諱負 offset錛屽畾浣嶄簬澶撮儴鐨勭涓鏉″睘鎬т箣鍓嶏紝鎴戜滑閫氳繃璇誨彇璁板綍鐨勫ご閮ㄥ拰璁板綍鐨勬暟鎹儴鍒嗘潵寰楀埌璇ヨ褰曠殑闀垮害 length錛屼粠鑰岋紝offset</span><span style="COLOR: #000000">+</span><span style="COLOR: #000000">length 鍗充負涓嬩竴鏉¤褰曠殑鍋忕Щ鍊箋傝鍙栧ご閮ㄥ拰璇誨彇璁板綍閮芥槸閫氳繃鏁版嵁闂寸殑絀鴻鏉ユ爣璇嗙殑錛屽叾浼唬鐮佸涓嬶細<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">娓呭崟 </span><span style="COLOR: #000000">2</span><span style="COLOR: #000000">. 绱㈠紩緗戦〉搴撳緩绔?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">                <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">For each record in Raws </span><span style="COLOR: #0000ff">do</span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">begin <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">    璇誨彇 record 鐨勫ご閮ㄥ拰鏁版嵁錛屼粠澶撮儴涓娊鍙?nbsp;URL錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">    璁$畻澶撮儴鍜屾暟鎹殑闀垮害錛屽姞鍒板綋鍓嶅亸縐誨間笂寰楀埌鏂扮殑鍋忕Щ錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">    浠?nbsp;record 涓暟鎹腑璁$畻鍏?nbsp;MD5 鎽樿鍊鹼紱<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">    灝嗘暟鎹彃鍏ユ暟鎹簱涓紝鍖呮嫭錛歎RL銆佸亸縐匯佹暟鎹?nbsp;MD5 鎽樿銆丷aws錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">end錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎮(zhèn)ㄥ彲鑳戒細瀵?nbsp;MD5 鎽樿綆楁硶鏈変簺鐤戞儜錛岃繖鏄粈涔堬紵榪欐湁浠涔堢敤錛?nbsp;Message Digest Algorithm MD5錛堜腑鏂囧悕涓烘秷鎭憳瑕佺畻娉曠浜旂増錛変負璁$畻鏈哄畨鍏ㄩ鍩熷箍娉涗嬌鐢ㄧ殑涓縐嶆暎鍒楀嚱鏁幫紝鐢ㄤ互鎻愪緵娑堟伅鐨勫畬鏁存т繚鎶ゃ侻D5 鐨勫吀鍨嬪簲鐢ㄦ槸瀵逛竴孌典俊鎭?nbsp;(Message) 浜х敓涓涓?nbsp;</span><span style="COLOR: #000000">128</span><span style="COLOR: #000000"> 浣嶇殑浜岃繘鍒朵俊鎭憳瑕?nbsp;(Message</span><span style="COLOR: #000000">-</span><span style="COLOR: #000000">Digest)錛屽嵆涓?nbsp;</span><span style="COLOR: #000000">32</span><span style="COLOR: #000000"> 浣?nbsp;</span><span style="COLOR: #000000">16</span><span style="COLOR: #000000"> 榪涘埗鏁板瓧涓詫紝浠ラ槻姝㈣綃℃敼銆傚浜庢垜浠潵璇達紝姣斿閫氳繃 MD5 璁$畻錛屾煇涓綉欏墊暟鎹殑鎽樿鏄?nbsp;00902914CFE6CD1A959C31C076F49EA8錛屽鏋滄垜浠換鎰忕殑鏀瑰彉榪欎釜緗戦〉涓殑鏁版嵁錛岄氳繃璁$畻涔嬪悗錛岃鎽樿灝變細鏀瑰彉錛屾垜浠彲浠ュ皢淇℃伅鐨?nbsp;MD5 鎽樿瑙嗕綔涓鴻淇℃伅鐨勬寚綰逛俊鎭傛墍浠ワ紝瀛樺偍璇ユ憳瑕佸彲浠ラ獙璇佷箣鍚庤幏鍙栫殑緗戦〉淇℃伅鏄惁涓庡師濮嬬綉欏典竴鑷淬?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵?nbsp;MD5 綆楁硶綆瑕佺殑鍙欒堪鍙互涓猴細MD5 浠?nbsp;</span><span style="COLOR: #000000">512</span><span style="COLOR: #000000"> 浣嶅垎緇勬潵澶勭悊杈撳叆鐨勪俊鎭紝涓旀瘡涓鍒嗙粍鍙堣鍒掑垎涓?nbsp;</span><span style="COLOR: #000000">16</span><span style="COLOR: #000000"> 涓?nbsp;</span><span style="COLOR: #000000">32</span><span style="COLOR: #000000"> 浣嶅瓙鍒嗙粍錛岀粡榪囦簡涓緋誨垪鐨勫鐞嗗悗錛岀畻娉曠殑杈撳嚭鐢卞洓涓?nbsp;</span><span style="COLOR: #000000">32</span><span style="COLOR: #000000"> 浣嶅垎緇勭粍鎴愶紝灝嗚繖鍥涗釜 </span><span style="COLOR: #000000">32</span><span style="COLOR: #000000"> 浣嶅垎緇勭駭鑱斿悗灝嗙敓鎴愪竴涓?nbsp;</span><span style="COLOR: #000000">128</span><span style="COLOR: #000000"> 浣嶆暎鍒楀箋傚叾涓?#8220;涓緋誨垪鐨勫鐞?#8221;鍗充負璁$畻嫻佺▼錛孧D5 鐨勮綆楁祦紼嬫瘮杈冨錛屼絾鏄笉闅撅紝鍚屾椂涔熶笉闅懼疄鐜幫紝鎮(zhèn)ㄥ彲浠ョ洿鎺ヤ嬌鐢ㄧ綉涓婄幇鏈夌殑 java 鐗堟湰瀹炵幇鎴栬呬嬌鐢ㄦ湰鏁欑▼鎻愪緵鐨勬簮鐮佷笅杞戒腑鐨?nbsp;MD5 綾匯傚浜?nbsp;MD5錛屾垜浠煡閬撳叾鍔熻兘錛岃兘浣跨敤灝卞彲浠ワ紝鍏蜂綋鐨勬瘡涓楠ょ殑鎰忎箟涓嶉渶瑕佹繁鍏ョ悊瑙c?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">姝f枃淇℃伅鎶藉彇<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">PageGetter<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍦ㄦ鏂囦俊鎭娊鍙栦箣鍓嶏紝鎴戜滑棣栧厛闇瑕佷竴涓畝鍗曠殑宸ュ叿綾伙紝璇ュ伐鍏風被鍙互鍙栧嚭鏁版嵁搴撲腑鐨勫唴瀹瑰茍涓斿幓鍘熷緗戦〉闆嗕腑鑾峰緱緗戦〉淇℃伅錛宒ySE 瀵逛簬璇ュ姛鑳界殑瀹炵幇鍦?nbsp;originalPageGetter.java 涓紝璇ョ被閫氳繃 URL 浠庢暟鎹簱涓幏寰楄 URL 瀵瑰簲鐨勭綉欏墊暟鎹殑鎵鍦ㄧ綉欏靛簱鍚嶄互鍙婂亸縐伙紝鐒跺悗灝卞彲浠ユ牴鎹亸縐繪潵璇誨彇璇ョ綉欏電殑鏁版嵁鍐呭錛屽悓鏍蜂互鍘熷緗戦〉闆嗕腑鍚勮褰曢棿鐨勭┖琛屼綔涓烘暟鎹唴瀹圭殑緇撴潫鏍囪錛岃鍙栧唴瀹逛箣鍚庯紝閫氳繃 MD5 璁$畻褰撳墠璇誨彇鐨勫唴瀹圭殑鎽樿錛屾牎楠屾槸鍚︿笌涔嬪墠鐨勬憳瑕佷竴鑷淬傚浜庡亸縐葷殑浣跨敤錛孊ufferedReader 綾繪彁渚涗竴涓?nbsp;skip(</span><span style="COLOR: #0000ff">int</span><span style="COLOR: #000000"> offset) 鐨勫嚱鏁幫紝鍏朵綔鐢ㄦ槸璺寵繃鏂囨。涓紝浠庡綋鍓嶅紑濮嬭綆楃殑 offset 涓瓧絎︼紝鐢ㄨ繖涓嚱鏁版垜浠氨鍙互瀹氫綅鍒版垜浠渶瑕佺殑璁板綍銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">娓呭崟 </span><span style="COLOR: #000000">3</span><span style="COLOR: #000000">. 鑾峰彇鍘熷緗戦〉搴撲腑鍐呭<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">                <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> String getContent(String fileName, </span><span style="COLOR: #0000ff">int</span><span style="COLOR: #000000"> offset) <br><img id=Codehighlighter1_3538_3906_Open_Image onclick="this.style.display='none'; Codehighlighter1_3538_3906_Open_Text.style.display='none'; Codehighlighter1_3538_3906_Closed_Image.style.display='inline'; Codehighlighter1_3538_3906_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_3538_3906_Closed_Image onclick="this.style.display='none'; Codehighlighter1_3538_3906_Closed_Text.style.display='none'; Codehighlighter1_3538_3906_Open_Image.style.display='inline'; Codehighlighter1_3538_3906_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedBlock.gif"> </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_3538_3906_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_3538_3906_Open_Text><span style="COLOR: #000000">{ <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">     String content </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">""</span><span style="COLOR: #000000">; <br><img id=Codehighlighter1_3577_3833_Open_Image onclick="this.style.display='none'; Codehighlighter1_3577_3833_Open_Text.style.display='none'; Codehighlighter1_3577_3833_Closed_Image.style.display='inline'; Codehighlighter1_3577_3833_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_3577_3833_Closed_Image onclick="this.style.display='none'; Codehighlighter1_3577_3833_Closed_Text.style.display='none'; Codehighlighter1_3577_3833_Open_Image.style.display='inline'; Codehighlighter1_3577_3833_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedSubBlock.gif">     </span><span style="COLOR: #0000ff">try</span><span style="COLOR: #000000"> </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_3577_3833_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_3577_3833_Open_Text><span style="COLOR: #000000">{ <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">         FileReader fileReader </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> FileReader(fileName); <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">         BufferedReader bfReader </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> BufferedReader(fileReader); <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">         bfReader.skip(offset); <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">         readRawHead(bfReader); <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">         content </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> readRawContent(bfReader);         <br><img id=Codehighlighter1_3855_3876_Open_Image onclick="this.style.display='none'; Codehighlighter1_3855_3876_Open_Text.style.display='none'; Codehighlighter1_3855_3876_Closed_Image.style.display='inline'; Codehighlighter1_3855_3876_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_3855_3876_Closed_Image onclick="this.style.display='none'; Codehighlighter1_3855_3876_Closed_Text.style.display='none'; Codehighlighter1_3855_3876_Open_Image.style.display='inline'; Codehighlighter1_3855_3876_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedSubBlock.gif">     }</span></span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">catch</span><span style="COLOR: #000000"> (Exception e) </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_3855_3876_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_3855_3876_Open_Text><span style="COLOR: #000000">{e.printStackTrace();}</span></span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">     </span><span style="COLOR: #0000ff">return</span><span style="COLOR: #000000"> content;     <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedBlockEnd.gif"> }</span></span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">涓婅堪浠g爜涓紝鐪佺暐浜?nbsp;readRawHead 鍜?nbsp;readRawContent 鐨勫疄鐜幫紝榪欎簺閮芥槸鍩烘湰鐨?nbsp;I</span><span style="COLOR: #000000">/</span><span style="COLOR: #000000">O 鎿嶄綔錛岃瑙佹墍闄勬簮鐮併?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">姝f枃鎶藉彇<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵逛簬鑾峰緱鐨勫崟涓綉欏墊暟鎹紝鎴戜滑灝卞彲浠ヨ繘琛屼笅涓姝ョ殑澶勭悊錛岄鍏堣鍋氱殑灝辨槸姝f枃鍐呭鐨勬娊鍙栵紝浠庤屽墧闄ょ綉欏典腑鐨勬爣絳懼唴瀹癸紝榪欎竴姝ョ殑鎿嶄綔涓昏閲囩敤姝e垯琛ㄨ揪寮忔潵瀹屾垚銆傛垜浠敤姝e垯琛ㄨ揪寮忔潵鍖歸厤 html 鐨勬爣絳撅紝騫朵笖鎶婂尮閰嶅埌鐨勬爣絳懼垹闄わ紝鏈鍚庯紝鍓╀笅鐨勫唴瀹瑰氨鏄綉欏墊鏂囥傞檺浜庣瘒騫咃紝鎴戜滑浠ヨ繃婊?nbsp;script 鏍囩涓虹ず渚嬶紝鍏朵唬鐮佸涓?nbsp;:<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">娓呭崟 </span><span style="COLOR: #000000">4</span><span style="COLOR: #000000">. 鏍囩榪囨護<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">                <br><img id=Codehighlighter1_4196_4688_Open_Image onclick="this.style.display='none'; Codehighlighter1_4196_4688_Open_Text.style.display='none'; Codehighlighter1_4196_4688_Closed_Image.style.display='inline'; Codehighlighter1_4196_4688_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_4196_4688_Closed_Image onclick="this.style.display='none'; Codehighlighter1_4196_4688_Closed_Text.style.display='none'; Codehighlighter1_4196_4688_Open_Image.style.display='inline'; Codehighlighter1_4196_4688_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedBlock.gif"> </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> String html2Text(String inputString) </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_4196_4688_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_4196_4688_Open_Text><span style="COLOR: #000000">{        <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">     String htmlStr </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> inputString; </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 鍚?nbsp;html 鏍囩鐨勫瓧絎︿覆    </span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif"></span><span style="COLOR: #000000">     Pattern p_script;    Matcher m_script;      <br><img id=Codehighlighter1_4321_4608_Open_Image onclick="this.style.display='none'; Codehighlighter1_4321_4608_Open_Text.style.display='none'; Codehighlighter1_4321_4608_Closed_Image.style.display='inline'; Codehighlighter1_4321_4608_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_4321_4608_Closed_Image onclick="this.style.display='none'; Codehighlighter1_4321_4608_Closed_Text.style.display='none'; Codehighlighter1_4321_4608_Open_Image.style.display='inline'; Codehighlighter1_4321_4608_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedSubBlock.gif">     </span><span style="COLOR: #0000ff">try</span><span style="COLOR: #000000"> </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_4321_4608_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_4321_4608_Open_Text><span style="COLOR: #000000">{ <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">            String regEx_script </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">"</span><span style="COLOR: #000000"><script[^>]*?>[\\s\\S]*?</script></span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">;<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">            p_script </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);    <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">            m_script </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> p_script.matcher(htmlStr);    <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">            htmlStr </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> m_script.replaceAll(</span><span style="COLOR: #000000">""</span><span style="COLOR: #000000">); </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 榪囨護 script 鏍囩    </span><span style="COLOR: #008000"><br><img id=Codehighlighter1_4628_4649_Open_Image onclick="this.style.display='none'; Codehighlighter1_4628_4649_Open_Text.style.display='none'; Codehighlighter1_4628_4649_Closed_Image.style.display='inline'; Codehighlighter1_4628_4649_Closed_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif"><img style="DISPLAY: none" id=Codehighlighter1_4628_4649_Closed_Image onclick="this.style.display='none'; Codehighlighter1_4628_4649_Closed_Text.style.display='none'; Codehighlighter1_4628_4649_Open_Image.style.display='inline'; Codehighlighter1_4628_4649_Open_Text.style.display='inline';" align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ContractedSubBlock.gif"></span><span style="COLOR: #000000">     }</span></span><span style="COLOR: #0000ff">catch</span><span style="COLOR: #000000">(Exception e) </span><span style="BORDER-BOTTOM: #808080 1px solid; BORDER-LEFT: #808080 1px solid; BACKGROUND-COLOR: #ffffff; DISPLAY: none; BORDER-TOP: #808080 1px solid; BORDER-RIGHT: #808080 1px solid" id=Codehighlighter1_4628_4649_Closed_Text><img src="http://www.shnenglu.com/Images/dot.gif"></span><span id=Codehighlighter1_4628_4649_Open_Text><span style="COLOR: #000000">{e.printStackTrace();}</span></span><span style="COLOR: #000000"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/InBlock.gif">     </span><span style="COLOR: #0000ff">return</span><span style="COLOR: #000000"> htmlStr;</span><span style="COLOR: #008000">//</span><span style="COLOR: #008000"> 榪斿洖鏂囨湰瀛楃涓?nbsp;   </span><span style="COLOR: #008000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/ExpandedBlockEnd.gif"></span><span style="COLOR: #000000"> }</span></span><span style="COLOR: #000000"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">閫氳繃涓緋誨垪鐨勬爣絳捐繃婊わ紝鎴戜滑鍙互寰楀埌緗戦〉鐨勬鏂囧唴瀹癸紝灝卞彲浠ョ敤浜庝笅涓姝ョ殑鍒嗚瘝浜嗐?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍒嗚瘝<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">涓枃鍒嗚瘝鏄寚灝嗕竴涓眽瀛楀簭鍒楀垏鍒嗘垚涓涓竴涓崟鐙殑璇嶏紝浠庤岃揪鍒拌綆楁満鍙互鑷姩璇嗗埆鐨勬晥鏋溿備腑鏂囧垎璇嶄富瑕佹湁涓夌鏂規(guī)硶錛氱涓縐嶅熀浜庡瓧絎︿覆鍖歸厤錛岀浜岀鍩轟簬璇箟鐞嗚В錛岀涓夌鍩轟簬緇熻銆傜敱浜庣浜屽拰絎笁縐嶇殑瀹炵幇闇瑕佸ぇ閲忕殑鏁版嵁鏉ユ敮鎸侊紝鎵浠ユ垜浠噰鐢ㄧ殑鏄熀浜庡瓧絎︿覆鍖歸厤鐨勬柟娉曘?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍩轟簬瀛楃涓插尮閰嶇殑鏂規(guī)硶鍙堝彨鍋氭満姊板垎璇嶆柟娉曪紝瀹冩槸鎸夌収涓瀹氱殑絳栫暐灝嗗緟鍒嗘瀽鐨勬眽瀛椾覆涓庝竴涓?#8220;鍏呭垎澶х殑”鏈哄櫒璇嶅吀涓殑璇嶆潯榪涜閰嶏紝鑻ュ湪璇嶅吀涓壘鍒版煇涓瓧絎︿覆錛屽垯鍖歸厤鎴愬姛錛堣瘑鍒嚭涓涓瘝錛夈傛寜鐓ф壂鎻忔柟鍚戠殑涓嶅悓錛屼覆鍖歸厤鍒嗚瘝鏂規(guī)硶鍙互鍒嗕負姝e悜鍖歸厤鍜岄嗗悜鍖歸厤錛涙寜鐓т笉鍚岄暱搴︿紭鍏堝尮閰嶇殑鎯呭喌錛屽彲浠ュ垎涓烘渶澶э紙鏈闀匡級鍖歸厤鍜屾渶灝忥紙鏈鐭級鍖歸厤銆傚父鐢ㄧ殑鍑犵鏈烘鍒嗚瘝鏂規(guī)硶濡備笅錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">姝e悜鍑忓瓧鏈澶у尮閰嶆硶錛堢敱宸﹀埌鍙崇殑鏂瑰悜錛夛紱<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">閫嗗悜鍑忓瓧鏈澶у尮閰嶆硶錛堢敱鍙沖埌宸︾殑鏂瑰悜錛夛紱<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鏈灝戝垏鍒嗭紙浣挎瘡涓鍙ヤ腑鍒囧嚭鐨勮瘝鏁版渶灝忥級錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍙屽悜鏈澶у噺瀛楀尮閰嶆硶錛堣繘琛岀敱宸﹀埌鍙熾佺敱鍙沖埌宸︿袱嬈℃壂鎻忥級錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎴戜滑閲囩敤鍏朵腑鐨勬鍚戞渶澶у尮閰嶆硶銆傜畻娉曟弿榪板涓嬶細杈撳叆鍊間負涓涓腑鏂囪鍙?nbsp;S錛屼互鍙婃渶澶у尮閰嶈瘝 n<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍙?nbsp;S 涓墠 n 涓瓧錛屾牴鎹瘝鍏稿鍏惰繘琛屽尮閰嶏紝鑻ュ尮閰嶆垚鍔燂紝杞?nbsp;</span><span style="COLOR: #000000">3</span><span style="COLOR: #000000">錛屽惁鍒欒漿 </span><span style="COLOR: #000000">2</span><span style="COLOR: #000000">錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">n </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> n – </span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛氬鏋?nbsp;n 涓?nbsp;</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛岃漿 </span><span style="COLOR: #000000">3</span><span style="COLOR: #000000">錛涘惁鍒欒漿 </span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">灝?nbsp;S 涓殑鍓?nbsp;n 涓瓧浣滀負鍒嗚瘝緇撴灉鐨勪竴閮ㄥ垎錛孲 闄ゅ幓鍓?nbsp;n 涓瓧錛岃嫢 S 涓虹┖錛岃漿 </span><span style="COLOR: #000000">4</span><span style="COLOR: #000000">錛涘惁鍒欙紝杞?nbsp;</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">綆楁硶緇撴潫銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">闇瑕佽鏄庣殑鏄紝鍦ㄧ涓夋鐨勮搗濮嬶紝n 濡傛灉涓嶄負 </span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛屽垯鎰忓懗鐫鏈夊尮閰嶅埌鐨勮瘝錛涜屽鏋?nbsp;n 涓?nbsp;</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛屾垜浠粯璁?nbsp;</span><span style="COLOR: #000000">1</span><span style="COLOR: #000000"> 涓瓧鏄簲璇ヨ繘鍏ュ垎璇嶇粨鏋滅殑錛屾墍浠ョ涓夋鍙互灝嗗墠 n 涓瓧浣滀負涓涓瘝鑰屽垎鍓插紑鏉ャ傝繕鏈夐渶瑕佹敞鎰忕殑鏄浜庡仠鐢ㄨ瘝鐨勮繃婊わ紝鍋滅敤璇嶅嵆姹夎涓?#8220;鐨勶紝浜嗭紝鍜岋紝涔?#8221;絳夊瓧璇嶏紝鍦ㄦ悳绱㈠紩鎿庝腑鏄拷鐣ョ殑錛屾墍浠ュ浜庡垎璇嶅悗鐨勭粨鏋滐紝鎴戜滑闇瑕佸湪鐢ㄥ仠鐢ㄨ瘝鍒楄〃榪涜涓涓嬪仠鐢ㄨ瘝榪囨護銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎮(zhèn)ㄤ篃璁告湁鐤戦棶錛屽浣曡幏寰楀垎璇嶅瓧鍏告垨鑰呮槸鍋滅敤璇嶅瓧鍏搞傚仠鐢ㄨ瘝瀛楀吀姣旇緝濂藉姙錛岀敱浜庝腑鏂囧仠鐢ㄨ瘝鏁伴噺鏈夐檺錛屽彲浠ヤ粠緗戜笂鑾峰緱鍋滅敤璇嶅垪琛紝浠庤岃嚜宸卞緩涓涓仠鐢ㄨ瘝瀛楀吀錛涚劧鑰屽浜庡垎璇嶅瓧鍏革紝铏界劧緗戜笂鏈夎澶氱煡鍚嶇殑姹夊瓧鍒嗚瘝杞歡錛屼絾鏄緢灝戞湁鍒嗚瘝鐨勫瓧鍏告彁渚涳紝榪欓噷鎴戜滑鎻愪緵涓浜涘湪 dySE 涓嬌鐢ㄧ殑鍒嗚瘝瀛楀吀緇欐?zhèn)ㄣ傚湪紼嬪簭浣跨敤榪囩▼涓紝鍒嗚瘝瀛楀吀鍙互鏀懼叆涓涓泦鍚堜腑錛岃繖鏍峰氨鍙互姣旇緝鏂逛究鐨勮繘琛屾瘮瀵瑰伐浣溿?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍒嗚瘝鐨勭粨鏋滃浜庢悳绱㈢殑綺懼噯鎬ф湁鐫鑷沖叧閲嶈鐨勫獎鍝嶏紝濂界殑鍒嗚瘝絳栫暐緇忓父鏄敱鑻ュ共涓畝鍗曠畻娉曟嫾鎺ヨ屾垚鐨勶紝鎵浠ユ?zhèn)ㄤ篃鍙互璇曠潃瀹炵幇鍙屽悜鏈澶у噺瀛楀尮閰嶆硶鏉ユ彁楂樺垎璇嶇殑鍑嗙‘鐜囥傝屽鏋滈亣鍒版涔夎瘝緇勶紝鍙互閫氳繃瀛楀吀涓檮甯︾殑璇嶉鏉ュ喅瀹氬摢縐嶅垎璇嶇殑緇撴灉鏇村ソ銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍊掓帓绱㈠紩<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">榪欎釜绔犺妭鎴戜滑涓烘?zhèn)ㄨ瑙i澶勭悊妯″潡鐨勬渶鍚庝袱涓楠わ紝绱㈠紩鐨勫緩绔嬪拰鍊掓帓绱㈠紩鐨勫緩绔嬨傛湁浜嗗垎璇嶇殑緇撴灉錛屾垜浠氨鍙互鑾峰緱涓涓鍚戠殑绱㈠紩錛屽嵆鏌愪釜緗戦〉浠ュ強鍏跺搴旂殑鍒嗚瘝緇撴灉銆傚涓嬪浘鎵紺猴細<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥?nbsp;</span><span style="COLOR: #000000">2</span><span style="COLOR: #000000">. 姝e悜绱㈠紩<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"><br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥?nbsp;</span><span style="COLOR: #000000">3</span><span style="COLOR: #000000">. 鍊掓帓绱㈠紩<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"> <br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍦ㄦ湰鏂囩殑寮澶達紝鎴戜滑寤虹珛浜嗙儲寮曠綉欏靛簱錛岀敤浜庨氳繃 URL 鍙互鐩存帴瀹氫綅鍒板師濮嬬綉欏靛簱涓 URL 瀵瑰簲鐨勬暟鎹殑浣嶇疆錛涜岀幇鍦ㄧ殑姝e悜绱㈠紩錛屾垜浠彲浠ラ氳繃鏌愪釜緗戦〉鐨?nbsp;URL 寰楀埌璇ョ綉欏電殑鍒嗚瘝淇℃伅銆傝幏寰楁鍚戠儲寮曠湅浼煎浜庢垜浠殑鍗沖皢榪涜鐨勬煡璇㈡搷浣滄病鏈変粈涔堝疄闄呯殑甯姪錛屽洜涓烘煡璇㈡湇鍔℃槸閫氳繃鍏抽敭璇嶆潵鑾峰緱緗戦〉淇℃伅錛岃屾鍚戠儲寮曞茍涓嶈兘閫氳繃鍒嗚瘝緇撴灉鍙嶆煡緗戦〉淇℃伅銆傚叾瀹烇紝鎴戜滑寤虹珛姝e悜绱㈠紩鐨勭洰鐨勫氨鏄氳繃緲昏漿鐨勬搷浣滃緩绔嬪掓帓绱㈠紩銆傛墍璋撳掓帓灝辨槸鐩稿浜庢鍚戠儲寮曚腑緗戦〉鈥斺斿垎璇嶇粨鏋滅殑鏄犲皠鏂瑰紡錛岄噰鐢ㄥ垎璇嶁斺斿搴旂殑緗戦〉榪欑鏄犲皠鏂瑰紡銆備笌鍥?nbsp;</span><span style="COLOR: #000000">2</span><span style="COLOR: #000000"> 鐩稿搴旂殑鍊掓帓绱㈠紩濡備笂鍥?nbsp;</span><span style="COLOR: #000000">3</span><span style="COLOR: #000000"> 鎵紺恒?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鎺ヤ笅鏉ユ垜浠垎鏋愬浣曚粠姝e悜绱㈠紩鏉ュ緱鍒板掓帓绱㈠紩銆傜畻娉曡繃紼嬪涓嬶細<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵逛簬緗戦〉 i錛岃幏鍙栧叾鍒嗚瘝鍒楄〃 List錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">瀵逛簬 List 涓殑姣忎釜璇嶇粍錛屾煡鐪嬪掓帓绱㈠紩涓槸鍚﹀惈鏈夎繖涓瘝緇勶紝濡傛灉娌℃湁錛屽皢榪欎釜璇嶇粍鎻掑叆鍊掓帓绱㈠紩鐨勭儲寮曢」錛屽茍灝嗙綉欏?nbsp;i 鍔犲埌鍏剁儲寮曞間腑錛涘鏋滃掓帓绱㈠紩涓凡緇忓惈鏈夎繖涓瘝緇勶紝鐩存帴灝嗙綉欏?nbsp;i 鍔犲埌鍏剁儲寮曞間腑錛?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">濡傛灉榪樻湁緗戦〉灝氭湭鍒嗘瀽錛岃漿 </span><span style="COLOR: #000000">1</span><span style="COLOR: #000000">錛涘惁鍒欙紝緇撴潫<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">寤虹珛鍊掓帓绱㈠紩鐨勭畻娉曚笉闅懼疄鐜幫紝涓昏鏄叾涓暟鎹粨鏋勭殑閫夌敤錛屽湪 dySE 涓紝姝e悜绱㈠紩鍜屽掓帓绱㈠紩閮芥槸閲囩敤 HashMap 鏉ュ瓨鍌紝鏄犲皠涓鍚戠儲寮曠殑閿槸閲囩敤緗戦〉 URL 瀵瑰簲鐨勫瓧絎︿覆錛岃屽掓帓绱㈠紩鏄噰鐢ㄥ垎璇嶈瘝緇勶紝鏄犲皠涓殑鍊鹼紝鍓嶈呮槸涓涓垎璇嶅垪琛紝鍚庤呮槸涓涓?nbsp;URL 鐨勫瓧絎︿覆鍒楄〃銆傝繖閲屽彲浠ラ噰鐢ㄤ竴涓紭鍖栵紝鍒嗗埆寤虹珛涓や釜琛紝鎸夌収鏍囧彿瀛樺偍鍒嗚瘝鍒楄〃鍜?nbsp;URL 鍒楄〃錛岃繖鏍鳳紝绱㈠紩涓殑鍊煎氨鍙互浣跨敤鏁村瀷鍙橀噺鍒楄〃鏉ヨ妭鐪佺┖闂淬?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍒濇瀹為獙<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍒扮洰鍓嶄負姝紝铏界劧鎴戜滑榪樻病鏈夋寮忕殑鏌ヨ杈撳叆鐣岄潰浠ュ強緇撴灉榪斿洖欏甸潰錛屼絾榪欎笣姣笉褰卞搷鎴戜滑鏉ュ鎴戜滑鐨勬悳绱㈠紩鎿庤繘琛屽垵姝ョ殑瀹為獙銆傚湪鍊掓帓绱㈠紩寤虹珛浠ュ悗錛屾垜浠湪紼嬪簭涓幏寰椾竴涓掓帓绱㈠紩鐨勫疄渚嬶紝鐒跺悗瀹氫箟涓涓悳绱㈢殑瀛楃涓詫紝鐩存帴鍦ㄥ掓帓绱㈠紩涓亶鍘嗚繖涓瓧絎︿覆錛岀劧鍚庤繑鍥炶璇嶇粍鎵鎸囧悜鐨勫掓帓绱㈠紩涓殑 URL 鍒楄〃鍗沖彲銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">灝忕粨<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">緗戦〉鐨勯澶勭悊鏄悳绱㈠紩鎿庣殑鏍稿績閮ㄥ垎錛屽緩绔嬬儲寮曠綉欏靛簱鏄負浜嗙綉欏墊暟鎹洿鏂逛究鐨勪粠鍘熷緗戦〉搴撲腑鑾峰彇錛岃屾娊鍙栨鏂囦俊鎭槸鍚庣畫鎿嶄綔鐨勫熀紜銆備粠鍒嗚瘝寮濮嬪氨姝e紡娑夊強鍒版悳绱㈠紩鎿庝腑鏂囨湰鏁版嵁鐨勫鐞嗭紝鍒嗚瘝鐨勫ソ鍧忎互鍙婃晥鐜囧緢澶х▼搴︿笂鍐沖畾鐫鎼滅儲寮曟搸鐨勭簿紜э紝鏄潪甯擱渶瑕佸叧娉ㄧ殑涓鐐癸紝鑰屽掓帓绱㈠紩鏃舵牴鎹垎璇嶇殑緇撴灉寤虹珛鐨勪竴涓?#8220;璇嶇粍鈥斺斿搴旂綉欏靛垪琛?#8221;鏄犲皠錛屽掓帓绱㈠紩鏄綉欏墊悳绱㈢殑鏈鍏抽敭鏁版嵁緇撴瀯錛屾悳绱㈠紩鎿庢墽琛岀殑閫熷害涓庡掓帓绱㈠紩鐨勫緩绔嬩互鍙婂掓帓绱㈠紩鐨勬悳绱㈡柟寮忔伅鎭浉鍏熾?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍥為〉棣?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍚庣畫鍐呭<br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif">鍦ㄦ湰緋誨垪鐨勭涓夐儴鍒嗕腑錛屾?zhèn)畣浜嗚В鍒板浣曚粠鍒涘缓缃憴宓锛屼粠缃憴宓涓緭鍏ユ煡璇俊鎭氳繃鍊掓帓绱㈠紩鐨勬悳绱㈠畬鎴愮粨鏋滅殑榪斿洖錛屽茍涓斿畬鎴愮綉欏墊帓鍚嶇殑鍔熻兘銆?br><img align=top src="http://www.shnenglu.com/Images/OutliningIndicators/None.gif"></span></div> <img src ="http://www.shnenglu.com/zzfmars/aggbug/144357.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.shnenglu.com/zzfmars/" target="_blank">Kevin_Zhang</a> 2011-04-16 20:36 <a href="http://www.shnenglu.com/zzfmars/archive/2011/04/16/144357.html#Feedback" target="_blank" style="text-decoration:none;">鍙戣〃璇勮</a></div>]]></description></item><item><title>涓涓?Java 鎼滅儲寮曟搸鐨勫疄鐜幫紝絎?1 閮ㄥ垎: 緗戠粶鐖櫕http://www.shnenglu.com/zzfmars/archive/2011/04/16/144356.htmlKevin_ZhangKevin_ZhangSat, 16 Apr 2011 12:35:00 GMThttp://www.shnenglu.com/zzfmars/archive/2011/04/16/144356.htmlhttp://www.shnenglu.com/zzfmars/comments/144356.htmlhttp://www.shnenglu.com/zzfmars/archive/2011/04/16/144356.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/144356.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/144356.html鑷繁鍔ㄦ墜鍐欎竴涓悳绱㈠紩鎿庯紝鎯蟲兂榪欐湁澶?nbsp;cool錛氬湪鐣岄潰涓婅緭鍏ュ叧閿瘝錛岀偣鍑繪悳绱紝寰楀埌鑷繁鎯寵鐨勭粨鏋滐紱閭d箞瀹冭繕鍙互鍋氫粈涔堝憿錛熶篃璁告槸鑷繁鐨勭綉绔欓渶瑕佷竴涓珯鍐呮悳绱㈠姛鑳斤紝鎶戞垨鏄浜庣‖鐩樹腑鏂囨。鐨勬悳绱?nbsp;鈥斺?nbsp;鏈閲嶈鐨勬槸錛屾槸涓嶆槸瑙夊緱浼楀 IT 鍏徃閮藉湪鍚戜綘鎷涙墜鍛紵濡傛灉浣犲績鍔ㄤ簡錛岄偅涔堬紝Let's Go錛?/span>
榪欓噷棣栧厛瑕佽鏄庝嬌鐢?nbsp;Java 璇█鑰屼笉鏄?nbsp;C/C++ 絳夊叾瀹冭璦鐨勫師鍥狅紝鍥犱負 Java 涓彁渚涗簡瀵逛簬緗戠粶緙栫▼浼楀鐨勫熀紜鍖呭拰綾伙紝姣斿 URL 綾匯両netAddress 綾匯佹鍒欒〃杈懼紡錛岃繖涓烘垜浠殑鎼滅儲寮曟搸瀹炵幇鎻愪緵浜嗚壇濂界殑鍩虹錛屼嬌鎴戜滑鍙互涓撴敞浜庢悳绱㈠紩鎿庢湰韜殑瀹炵幇錛岃屼笉闇瑕佸洜涓鴻繖浜涘熀紜綾葷殑瀹炵幇鑰屽垎蹇冦?br>榪欎釜鍒嗕笁閮ㄥ垎鐨勭郴鍒楀皢閫愭璇存槑濡備綍璁捐鍜屽疄鐜頒竴涓悳绱㈠紩鎿庛傚湪絎竴閮ㄥ垎涓紝鎮(zhèn)ㄥ皢棣栧厛瀛︿範鎼滅儲寮曟搸鐨勫伐浣滃師鐞嗭紝鍚屾椂浜嗚В鍏朵綋緋葷粨鏋勶紝涔嬪悗灝嗚瑙e浣曞疄鐜版悳绱㈠紩鎿庣殑絎竴閮ㄥ垎錛岀綉緇滅埇铏ā鍧楋紝鍗沖畬鎴愮綉欏墊悳闆嗗姛鑳姐傚湪緋誨垪鐨勭浜岄儴鍒嗕腑錛屽皢浠嬬粛棰勫鐞嗘ā鍧楋紝鍗沖浣曞鐞嗘敹闆嗘潵鐨勭綉欏碉紝鏁寸悊銆佸垎璇嶄互鍙婄儲寮曠殑寤虹珛閮藉湪榪欓儴鍒嗕箣涓傚湪緋誨垪鐨勭涓夐儴鍒嗕腑錛屽皢浠嬬粛淇℃伅鏌ヨ鏈嶅姟鐨勫疄鐜幫紝涓昏鏄煡璇㈢晫闈㈢殑寤虹珛銆佹煡璇㈢粨鏋滅殑榪斿洖浠ュ強蹇収鐨勫疄鐜般?br>dySE 鐨勬暣浣撶粨鏋?br>鍦ㄥ紑濮嬪涔犳悳绱㈠紩鎿庣殑妯″潡瀹炵幇涔嬪墠錛屾?zhèn)ㄩ渶瑕佷簡瑙?nbsp;dySE 鐨勬暣浣撶粨鏋勪互鍙婃暟鎹紶杈撶殑嫻佺▼銆備簨瀹炰笂錛屾悳绱㈠紩鎿庣殑涓変釜閮ㄥ垎鏄浉浜掔嫭绔嬬殑錛屼笁涓儴鍒嗗垎鍒伐浣滐紝涓昏鐨勫叧緋諱綋鐜板湪鍓嶄竴閮ㄥ垎寰楀埌鐨勬暟鎹粨鏋滀負鍚庝竴閮ㄥ垎鎻愪緵鍘熷鏁版嵁銆備笁鑰呯殑鍏崇郴濡備笅鍥炬墍紺猴細

鍥?nbsp;
1. 鎼滅儲寮曟搸涓夋寮忓伐浣滄祦紼?br> 
鍦ㄤ粙緇嶆悳绱㈠紩鎿庣殑鏁翠綋緇撴瀯涔嬪墠錛屾垜浠熼壌銆婅綆楁満緗戠粶鈥斺旇嚜欏跺悜涓嬬殑鏂規(guī)硶鎻忚堪鍥犵壒緗戠壒鑹層嬩竴涔︾殑鍙欎簨鏂規(guī)硶錛屼粠鏅氱敤鎴蜂嬌鐢ㄦ悳绱㈠紩鎿庣殑瑙掑害鏉ヤ粙緇嶆悳绱㈠紩鎿庣殑鍏蜂綋宸ヤ綔嫻佺▼銆?br>鑷《鍚戜笅鐨勬柟娉曟弿榪版悳绱㈠紩鎿庢墽琛岃繃紼嬶細
鐢ㄦ埛閫氳繃嫻忚鍣ㄦ彁浜ゆ煡璇㈢殑璇嶆垨鑰呯煭璇?nbsp;P錛屾悳绱㈠紩鎿庢牴鎹敤鎴風殑鏌ヨ榪斿洖鍖歸厤鐨勭綉欏典俊鎭垪琛?nbsp;L錛?br>涓婅堪榪囩▼娑夊強鍒頒袱涓棶棰橈紝濡備綍鍖歸厤鐢ㄦ埛鐨勬煡璇互鍙婄綉欏典俊鎭垪琛ㄤ粠浣曡屾潵錛屾牴鎹粈涔堣屾帓搴忥紵鐢ㄦ埛鐨勬煡璇?nbsp;P 緇忚繃鍒嗚瘝鍣ㄨ鍒囧壊鎴愬皬璇嶇粍 
<p1,p2 … pn> 騫惰鍓旈櫎鍋滅敤璇?nbsp;( 鐨勩佷簡銆佸晩絳夊瓧 )錛屾牴鎹郴緇熺淮鎶ょ殑涓涓掓帓绱㈠紩鍙互鏌ヨ鏌愪釜璇?nbsp;pi 鍦ㄥ摢浜涚綉欏典腑鍑虹幇榪囷紝鍖歸厤閭d簺 <p1,p2 … pn> 閮藉嚭鐜扮殑緗戦〉闆嗗嵆鍙綔涓哄垵濮嬬粨鏋滐紝鏇磋繘涓姝ワ紝榪斿洖鐨勫垵濮嬬綉欏甸泦閫氳繃璁$畻涓庢煡璇㈣瘝鐨勭浉鍏沖害浠庤屽緱鍒扮綉欏墊帓鍚嶏紝鍗?nbsp;Page Rank錛屾寜鐓х綉欏電殑鎺掑悕欏哄簭鍗沖彲寰楀埌鏈緇堢殑緗戦〉鍒楄〃錛?br>鍋囪鍒嗚瘝鍣ㄥ拰緗戦〉鎺掑悕鐨勮綆楀叕寮忛兘鏄棦瀹氱殑錛岄偅涔堝掓帓绱㈠紩浠ュ強鍘熷緗戦〉闆嗕粠浣曡屾潵錛熷師濮嬬綉欏甸泦鍦ㄤ箣鍓嶇殑鏁版嵁嫻佺▼鐨勪粙緇嶄腑錛屽彲浠ュ緱鐭ユ槸鐢辯埇铏?nbsp;spider 鐖彇緗戦〉騫朵笖淇濆瓨鍦ㄦ湰鍦扮殑錛岃屽掓帓绱㈠紩錛屽嵆璇嶇粍鍒扮綉欏電殑鏄犲皠琛ㄦ槸寤虹珛鍦ㄦ鎺掔儲寮曠殑鍩虹涓婄殑錛屽悗鑰呮槸鍒嗘瀽浜嗙綉欏電殑鍐呭騫跺鍏跺唴瀹硅繘琛屽垎璇嶅悗錛屽緱鍒扮殑緗戦〉鍒拌瘝緇勭殑鏄犲皠琛紝灝嗘鎺掔儲寮曞掔疆鍗沖彲寰楀埌鍊掓帓绱㈠紩錛?br>緗戦〉鐨勫垎鏋愬叿浣撳仛浠涔堝憿錛熺敱浜庣埇铏敹闆嗘潵鐨勫師濮嬬綉欏典腑鍖呭惈寰堝淇℃伅錛屾瘮濡?nbsp;html 琛ㄥ崟浠ュ強涓浜涘瀮鍦句俊鎭瘮濡傚箍鍛婏紝緗戦〉鍒嗘瀽鍘婚櫎榪欎簺淇℃伅錛屽茍鎶藉彇鍏朵腑鐨勬鏂囦俊鎭綔涓哄悗緇殑鍩虹鏁版嵁銆?br>鍦ㄦ湁浜嗕笂榪扮殑鍒嗘瀽涔嬪悗錛屾垜浠彲浠ュ緱鍒版悳绱㈠紩鎿庣殑鏁翠綋緇撴瀯濡備笅鍥撅細

鍥?nbsp;
2. 鎼滅儲寮曟搸鏁翠綋緇撴瀯
 
鐖櫕浠?nbsp;Internet 涓埇鍙栦紬澶氱殑緗戦〉浣滀負鍘熷緗戦〉搴撳瓨鍌ㄤ簬鏈湴錛岀劧鍚庣綉欏靛垎鏋愬櫒鎶藉彇緗戦〉涓殑涓婚鍐呭浜ょ粰鍒嗚瘝鍣ㄨ繘琛屽垎璇嶏紝寰楀埌鐨勭粨鏋滅敤绱㈠紩鍣ㄥ緩绔嬫鎺掑拰鍊掓帓绱㈠紩錛岃繖鏍峰氨寰楀埌浜嗙儲寮曟暟鎹簱錛岀敤鎴鋒煡璇㈡椂錛屽湪閫氳繃鍒嗚瘝鍣ㄥ垏鍓茶緭鍏ョ殑鏌ヨ璇嶇粍騫墮氳繃媯绱㈠櫒鍦ㄧ儲寮曟暟鎹簱涓繘琛屾煡璇紝寰楀埌鐨勭粨鏋滆繑鍥炵粰鐢ㄦ埛銆?br>鏃犺鎼滅儲寮曟搸鐨勮妯″ぇ灝忥紝鍏朵富瑕佺粨鏋勯兘鏄敱榪欏嚑閮ㄥ垎鏋勬垚鐨勶紝騫舵病鏈夊ぇ鐨勫樊鍒紝鎼滅儲寮曟搸鐨勫ソ鍧忎富瑕佹槸鍐沖畾浜庡悇閮ㄥ垎鐨勫唴閮ㄥ疄鐜般?br>鏈変簡涓婅堪鐨勫涓庢悳绱㈠紩鎿庣殑鏁翠綋浜嗚В錛屾垜浠潵瀛︿範 dySE 涓埇铏ā鍧楃殑鍏蜂綋璁捐鍜屽疄鐜般?br>鍥為〉棣?br>Spider 鐨勮璁?br>緗戦〉鏀墮泦鐨勮繃紼嬪鍚屽浘鐨勯亶鍘嗭紝鍏朵腑緗戦〉灝變綔涓哄浘涓殑鑺傜偣錛岃岀綉欏典腑鐨勮秴閾炬帴鍒欎綔涓哄浘涓殑杈癸紝閫氳繃鏌愮綉欏電殑瓚呴摼鎺?nbsp;寰楀埌鍏朵粬緗戦〉鐨勫湴鍧錛屼粠鑰屽彲浠ヨ繘涓姝ョ殑榪涜緗戦〉鏀墮泦錛涘浘鐨勯亶鍘嗗垎涓哄箍搴︿紭鍏堝拰娣卞害浼樺厛涓ょ鏂規(guī)硶錛岀綉欏電殑鏀墮泦榪囩▼涔熸槸濡傛銆傜患涓婏紝Spider 鏀墮泦緗戦〉鐨勮繃紼嬪涓嬶細浠庡垵濮?nbsp;URL 闆嗗悎鑾峰緱鐩爣緗戦〉鍦板潃錛岄氳繃緗戠粶榪炴帴鎺ユ敹緗戦〉鏁版嵁錛屽皢鑾峰緱鐨勭綉欏墊暟鎹坊鍔犲埌緗戦〉搴撲腑騫朵笖鍒嗘瀽璇ョ綉欏典腑鐨勫叾浠?nbsp;URL 閾炬帴錛屾斁鍏ユ湭璁塊棶 URL 闆嗗悎鐢ㄤ簬緗戦〉鏀墮泦銆備笅鍥捐〃紺轟簡榪欎釜榪囩▼錛?br>
鍥?nbsp;
3. Spider 宸ヤ綔嫻佺▼
 
鍥為〉棣?br>Spider 鐨勫叿浣撳疄鐜?br>緗戦〉鏀墮泦鍣?nbsp;Gather
緗戦〉鏀墮泦鍣ㄩ氳繃涓涓?nbsp;URL 鏉ヨ幏鍙栬 URL 瀵瑰簲鐨勭綉欏墊暟鎹紝鍏跺疄鐜頒富瑕佹槸鍒╃敤 Java 涓殑 URLConnection 綾繪潵鎵撳紑 URL 瀵瑰簲欏甸潰鐨勭綉緇滆繛鎺ワ紝鐒跺悗閫氳繃 I
/O 嫻佽鍙栧叾涓殑鏁版嵁錛孊ufferedReader 鎻愪緵璇誨彇鏁版嵁鐨勭紦鍐插尯鎻愰珮鏁版嵁璇誨彇鐨勬晥鐜囦互鍙婂叾涓嬪畾涔夌殑 readLine() 琛岃鍙栧嚱鏁般備唬鐮佸涓?nbsp;( 鐪佺暐浜嗗紓甯稿鐞嗛儴鍒?nbsp;)錛?br>
娓呭崟 
1. 緗戦〉鏁版嵁鎶撳彇
                
URL url 
= new URL(“http://www.xxx.com”); 
URLConnection conn = url.openConnection(); 
BufferedReader reader 
= new BufferedReader(new InputStreamReader(conn.getInputStream())); 
String line 
= null
while((line = reader.readLine()) != null
    document.append(line 
+ "\n"); 

浣跨敤 Java 璇█鐨勫ソ澶勬槸涓嶉渶瑕佽嚜宸卞鐞嗗簳灞傜殑榪炴帴鎿嶄綔錛屽枩嬈㈡垨鑰呯簿閫?nbsp;Java 緗戠粶緙栫▼鐨勮鑰呬篃鍙互涓嶇敤涓婅堪鐨勬柟娉曪紝鑷繁瀹炵幇 URL 綾誨強鐩稿叧鎿嶄綔錛岃繖涔熸槸涓縐嶅緢濂界殑閿葷偧銆?br>緗戦〉澶勭悊
鏀墮泦鍒扮殑鍗曚釜緗戦〉錛岄渶瑕佽繘琛屼袱縐嶄笉鍚岀殑澶勭悊錛屼竴縐嶆槸鏀懼叆緗戦〉搴擄紝浣滀負鍚庣畫澶勭悊鐨勫師濮嬫暟鎹紱鍙︿竴縐嶆槸琚垎鏋愪箣鍚庯紝鎶藉彇鍏朵腑鐨?nbsp;URL 榪炴帴錛屾斁鍏?nbsp;URL 姹犵瓑寰呭搴旂綉欏電殑鏀墮泦銆?br>緗戦〉鐨勪繚瀛橀渶瑕佹寜鐓т竴瀹氱殑鏍煎紡錛屼互渚夸互鍚庢暟鎹殑鎵歸噺澶勭悊銆傝繖閲屼粙緇嶄竴縐嶅瓨鍌ㄦ暟鎹牸寮忥紝璇ユ牸寮忎粠鍖楀ぇ澶╃綉鐨勫瓨鍌ㄦ牸寮忕畝鍖栬屾潵錛?br>緗戦〉搴撶敱鑻ュ共璁板綍緇勬垚錛屾瘡涓褰曞寘鍚竴鏉$綉欏墊暟鎹俊鎭紝璁板綍鐨勫瓨鏀句負欏哄簭娣誨姞錛?br>涓鏉¤褰曠敱鏁版嵁澶淬佹暟鎹佺┖琛岀粍鎴愶紝欏哄簭涓猴細澶撮儴 
+ 絀鴻 + 鏁版嵁 + 絀鴻錛?br>澶撮儴鐢辮嫢騫插睘鎬х粍鎴愶紝鏈夛細鐗堟湰鍙鳳紝鏃ユ湡錛孖P 鍦板潃錛屾暟鎹暱搴︼紝鎸夌収灞炴у悕鍜屽睘鎬у肩殑鏂瑰紡鎺掑垪錛屼腑闂村姞鍐掑彿錛屾瘡涓睘鎬у崰鐢ㄤ竴琛岋紱
鏁版嵁鍗充負緗戦〉鏁版嵁銆?br>闇瑕佽鏄庣殑鏄紝娣誨姞鏁版嵁鏀墮泦鏃ユ湡鐨勫師鍥狅紝鐢變簬璁稿緗戠珯鐨勫唴瀹歸兘鏄姩鎬佸彉鍖栫殑錛屾瘮濡備竴浜涘ぇ鍨嬮棬鎴風綉绔欑殑棣栭〉鍐呭錛岃繖灝辨剰鍛崇潃濡傛灉涓嶆槸褰撳ぉ鐖彇鐨勭綉欏墊暟鎹紝寰堝彲鑳藉彂鐢熸暟鎹繃鏈熺殑闂錛屾墍浠ラ渶瑕佹坊鍔犳棩鏈熶俊鎭姞浠ヨ瘑鍒?br>URL 鐨勬彁鍙栧垎涓轟袱姝ワ紝絎竴姝ユ槸 URL 璇嗗埆錛岀浜屾鍐嶈繘琛?nbsp;URL 鐨勬暣鐞嗭紝鍒嗕袱姝ヨ蛋涓昏鏄洜涓烘湁浜涚綉绔欑殑閾炬帴鏄噰鐢ㄧ浉瀵硅礬寰勶紝濡傛灉涓嶆暣鐞嗕細浜х敓閿欒銆俇RL 鐨勮瘑鍒富瑕佹槸閫氳繃姝e垯琛ㄨ揪寮忔潵鍖歸厤錛岃繃紼嬮鍏堣瀹氫竴涓瓧絎︿覆浣滀負鍖歸厤鐨勫瓧絎︿覆妯″紡錛岀劧鍚庡湪 Pattern 涓紪璇戝悗鍗沖彲浣跨敤 Matcher 綾繪潵榪涜鐩稿簲瀛楃涓茬殑鍖歸厤銆傚疄鐜頒唬鐮佸涓嬶細

娓呭崟 
2. URL 璇嗗埆
                
public ArrayList<URL> urlDetector(String htmlDoc){
    
final String patternString = "<[a|A]\\s+href=([^>]*\\s*>)";           
    Pattern pattern 
= Pattern.compile(patternString,Pattern.CASE_INSENSITIVE);   
    ArrayList
<URL> allURLs = new ArrayList<URL>();
    Matcher matcher 
= pattern.matcher(htmlDoc);
    String tempURL;
    
//鍒濇鍖歸厤鍒扮殑url鏄艦濡傦細<a href="http://bbs.life.xxx.com.cn/" target="_blank">
    
//涓烘錛岄渶瑕佽繘琛屼笅涓姝ョ殑澶勭悊錛屾妸鐪熸鐨剈rl鎶藉彇鍑烘潵錛?br>    //鍙互瀵逛簬鍓嶄袱涓?涔嬮棿鐨勯儴鍒嗚繘琛岃褰曞緱鍒皍rl
    while(matcher.find()){
        
try {
            tempURL 
= matcher.group();            
            tempURL 
= tempURL.substring(tempURL.indexOf("\"")+1);        
            if(!tempURL.contains("\""))
                continue;
            tempURL 
= tempURL.substring(0, tempURL.indexOf("\""));        
        }
 catch (MalformedURLException e) {
            e.printStackTrace();
        }

    }

    
return allURLs;    
}


鎸夌収“
<[a|A]\\s+href=([^>]*\\s*>)”榪欎釜姝e垯琛ㄨ揪寮忓彲浠ュ尮閰嶅嚭 URL 鎵鍦ㄧ殑鏁翠釜鏍囩錛屽艦濡?#8220;<a href="http://bbs.life.xxx.com.cn/" target="_blank">”錛屾墍浠ュ湪寰幆鑾峰緱鏁翠釜鏍囩涔嬪悗錛岄渶瑕佽繘涓姝ユ彁鍙栧嚭鐪熸鐨?nbsp;URL錛屾垜浠彲浠ラ氳繃鎴彇鏍囩涓墠涓や釜寮曞彿涓棿鐨勫唴瀹規(guī)潵鑾峰緱榪欐鍐呭銆傚姝や箣鍚庯紝鎴戜滑鍙互寰楀埌涓涓垵姝ョ殑灞炰簬璇ョ綉欏電殑 URL 闆嗗悎銆?br>鎺ヤ笅鏉ユ垜浠繘琛岀浜屾鎿嶄綔錛孶RL 鐨勬暣鐞嗭紝鍗沖涔嬪墠鑾峰緱鐨勬暣涓〉闈腑 URL 闆嗗悎榪涜絳涢夊拰鏁村悎銆傛暣鍚堜富瑕佹槸閽堝緗戦〉鍦板潃鏄浉瀵歸摼鎺ョ殑閮ㄥ垎錛岀敱浜庢垜浠彲浠ュ緢瀹規(guī)槗鐨勮幏寰楀綋鍓嶇綉欏電殑 URL錛屾墍浠ワ紝鐩稿閾炬帴鍙渶瑕佸湪褰撳墠緗戦〉鐨?nbsp;URL 涓婃坊鍔犵浉瀵歸摼鎺ョ殑瀛楁鍗沖彲緇勬垚瀹屾暣鐨?nbsp;URL錛屼粠鑰屽畬鎴愭暣鍚堛傚彟涓鏂歸潰錛屽湪欏甸潰涓寘鍚殑鍏ㄩ潰 URL 涓紝鏈変竴浜涚綉欏墊瘮濡傚箍鍛婄綉欏墊槸鎴戜滑涓嶆兂鐖彇鐨勶紝鎴栬呬笉閲嶈鐨勶紝榪欓噷鎴戜滑涓昏閽堝浜庨〉闈腑鐨勫箍鍛婅繘琛屼竴涓畝鍗曞鐞嗐備竴鑸綉绔欑殑騫垮憡榪炴帴閮芥湁鐩稿簲鐨勬樉紺鴻〃杈撅紝姣斿榪炴帴涓惈鏈?#8220;ad”絳夎〃杈炬椂錛屽彲浠ュ皢璇ラ摼鎺ョ殑浼樺厛綰ч檷浣庯紝榪欐牱灝卞彲浠ヤ竴瀹氱▼搴︾殑閬垮厤騫垮憡閾炬帴鐨勭埇鍙栥?br>緇忚繃榪欎袱姝ユ搷浣滄椂鍊欙紝鍙互鎶婅緗戦〉鐨勬敹闆嗗埌鐨?nbsp;URL 鏀懼叆 URL 姹犱腑錛屾帴涓嬫潵鎴戜滑澶勭悊鐖櫕鐨?nbsp;URL 鐨勬淳鍒嗛棶棰樸?br>Dispatcher 鍒嗛厤鍣?br>鍒嗛厤鍣ㄧ鐞?nbsp;URL錛岃礋璐d繚瀛樼潃 URL 姹犲茍涓斿湪 Gather 鍙栧緱鏌愪竴涓綉欏典箣鍚庢淳鍒嗘柊鐨?nbsp;URL錛岃繕瑕侀伩鍏嶇綉欏電殑閲嶅鏀墮泦銆傚垎閰嶅櫒閲囩敤璁捐妯″紡涓殑鍗曚緥妯″紡緙栫爜錛岃礋璐f彁渚涚粰 Gather 鏂扮殑 URL錛屽洜涓烘秹鍙婂埌涔嬪悗鐨勫綰跨▼鏀瑰啓錛屾墍浠ュ崟渚嬫ā寮忔樉寰楀挨涓洪噸瑕併?br>閲嶅鏀墮泦鏄寚鐗╃悊涓婂瓨鍦ㄧ殑涓涓綉欏碉紝鍦ㄦ病鏈夋洿鏂扮殑鍓嶆彁涓嬶紝琚?nbsp;Gather 閲嶅璁塊棶錛岄犳垚璧勬簮鐨勬氮璐癸紝涓昏鍘熷洜鏄病鏈夋竻妤氱殑璁板綍宸茬粡璁塊棶鐨?nbsp;URL 鑰屾棤娉曡鯨鍒傛墍浠ワ紝Dispatcher 緇存姢涓や釜鍒楄〃 ,“宸茶闂〃”錛屽拰“鏈闂〃”銆傛瘡涓?nbsp;URL 瀵瑰簲鐨勯〉闈㈣鎶撳彇涔嬪悗錛岃 URL 鏀懼叆宸茶闂〃涓紝鑰屼粠璇ラ〉闈㈡彁鍙栧嚭鏉ョ殑 URL 鍒欐斁鍏ユ湭璁塊棶琛ㄤ腑錛涘綋 Gather 鍚?nbsp;Dispatcher 璇鋒眰 URL 鐨勬椂鍊欙紝鍏堥獙璇佽 URL 鏄惁鍦ㄥ凡璁塊棶琛ㄤ腑錛岀劧鍚庡啀緇?nbsp;Gather 榪涜浣滀笟銆?br>Spider 鍚姩澶氫釜 Gather 綰跨▼
鐜板湪 Internet 涓殑緗戦〉鏁伴噺鏁頒互浜胯錛岃屽崟鐙殑涓涓?nbsp;Gather 鏉ヨ繘琛岀綉欏墊敹闆嗘樉鐒舵晥鐜囦笉瓚籌紝鎵浠ユ垜浠渶瑕佸埄鐢ㄥ綰跨▼鐨勬柟娉曟潵鎻愰珮鏁堢巼銆侴ather 鐨勫姛鑳芥槸鏀墮泦緗戦〉錛屾垜浠彲浠ラ氳繃 Spider 綾繪潵寮鍚涓?nbsp;Gather 綰跨▼錛屼粠鑰岃揪鍒板綰跨▼鐨勭洰鐨勩備唬鐮佸涓嬶細
/** 
* 鍚姩綰跨▼ gather錛岀劧鍚庡紑濮嬫敹闆嗙綉欏佃祫鏂?br>
*/
 
public void start() 
    Dispatcher disp 
= Dispatcher.getInstance(); 
    
for(int i = 0; i < gatherNum; i++)
        Thread gather 
= new Thread(new Gather(disp)); 
        gather.start(); 
    }

}


鍦ㄥ紑鍚嚎紼嬩箣鍚庯紝緗戦〉鏀墮泦鍣ㄥ紑濮嬩綔涓氱殑榪愪綔錛屽茍鍦ㄤ竴涓綔涓氬畬鎴愪箣鍚庯紝鍚?nbsp;Dispatcher 鐢寵涓嬩竴涓綔涓氾紝鍥犱負鏈変簡澶氱嚎紼嬬殑 Gather錛屼負浜嗛伩鍏嶇嚎紼嬩笉瀹夊叏錛岄渶瑕佸 Dispatcher 榪涜浜掓枼璁塊棶錛屽湪鍏跺嚱鏁頒箣涓坊鍔?nbsp;
synchronized 鍏抽敭璇嶏紝浠庤岃揪鍒扮嚎紼嬬殑瀹夊叏璁塊棶銆?br>鍥為〉棣?br>灝忕粨
Spider 鏄暣涓悳绱㈠紩鎿庣殑鍩虹錛屼負鍚庣畫鐨勬搷浣滄彁渚涘師濮嬬綉欏佃祫鏂欙紝鎵浠ヤ簡瑙?nbsp;Spider 鐨勭紪鍐欎互鍙婄綉欏靛簱鐨勭粍鎴愮粨鏋勪負鍚庣畫棰勫鐞嗘ā鍧楁墦涓嬪熀紜銆傚悓鏃?nbsp;Spider 紼嶅姞淇敼涔嬪悗涔熷彲浠ュ崟鐙敤浜庢煇綾誨叿浣撲俊鎭殑鎼滈泦錛屾瘮濡傛煇涓綉绔欑殑鍥劇墖鐖彇絳夈?br>鍥為〉棣?br>鍚庣畫鍐呭
鍦ㄦ湰緋誨垪鐨勭 
2 閮ㄥ垎涓紝鎮(zhèn)ㄥ皢浜嗚В鍒扮埇铏幏鍙栫殑緗戦〉搴撳浣曡棰勫鐞嗘ā鍧楅愭鎻愬彇鍐呭淇℃伅錛岄氳繃鍒嗚瘝騫跺緩鎴愬掓帓绱㈠紩錛涜屽湪絎?nbsp;3 閮ㄥ垎涓紝鎮(zhèn)ㄥ皢浜嗚В鍒幫紝濡備綍緙栧啓緗戦〉鏉ユ彁渚涙煡璇㈡湇鍔★紝騫朵笖濡備綍鏄劇ず鐨勮繑鍥炵殑緇撴灉鍜屽畬鎴愬揩鐓х殑鍔熻兘銆?/span>

]]>
java 涓嬭澆緗戦〉http://www.shnenglu.com/zzfmars/archive/2011/04/13/144148.htmlKevin_ZhangKevin_ZhangWed, 13 Apr 2011 12:42:00 GMThttp://www.shnenglu.com/zzfmars/archive/2011/04/13/144148.htmlhttp://www.shnenglu.com/zzfmars/comments/144148.htmlhttp://www.shnenglu.com/zzfmars/archive/2011/04/13/144148.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/144148.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/144148.html 

 1//鑾峰彇鎸囧畾緗戦〉婧愪唬鐮?/span>
 2package kevin;
 3
 4import java.io.*;//java鐨勮緭鍏ヨ緭鍑?/span>
 5import java.net.*;//java鐨刵et鍖?/span>
 6public class fei{
 7 public static void main(String[] args) throws IOException{
 8  URL url=new URL("http://www.baidu.com");//瀹氫箟涓涓猽rl綾葷殑瀹炰緥
 9  InputStreamReader isr=new InputStreamReader(url.openStream());//杈撳叆嫻?/span>
10  BufferedReader br=new BufferedReader(isr);
11  String s;
12  while((s=br.readLine())!=null)
13  System.out.print(s);
14  URLConnection connection=url.openConnection();
15
16 }

17}

18
19

 



Kevin_Zhang 2011-04-13 20:42 鍙戣〃璇勮
]]>
Apache+php+mysql鍦╔P涓嬫惌閰嶈瑙?/title><link>http://www.shnenglu.com/zzfmars/archive/2011/04/10/143865.html</link><dc:creator>Kevin_Zhang</dc:creator><author>Kevin_Zhang</author><pubDate>Sun, 10 Apr 2011 04:14:00 GMT</pubDate><guid>http://www.shnenglu.com/zzfmars/archive/2011/04/10/143865.html</guid><wfw:comment>http://www.shnenglu.com/zzfmars/comments/143865.html</wfw:comment><comments>http://www.shnenglu.com/zzfmars/archive/2011/04/10/143865.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.shnenglu.com/zzfmars/comments/commentRss/143865.html</wfw:commentRss><trackback:ping>http://www.shnenglu.com/zzfmars/services/trackbacks/143865.html</trackback:ping><description><![CDATA[<a >http://www.php100.com/html/webkaifa/apache/2009/0418/1188.html</a> <img src ="http://www.shnenglu.com/zzfmars/aggbug/143865.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.shnenglu.com/zzfmars/" target="_blank">Kevin_Zhang</a> 2011-04-10 12:14 <a href="http://www.shnenglu.com/zzfmars/archive/2011/04/10/143865.html#Feedback" target="_blank" style="text-decoration:none;">鍙戣〃璇勮</a></div>]]></description></item><item><title>MonoDevelophttp://www.shnenglu.com/zzfmars/archive/2010/10/22/130844.htmlKevin_ZhangKevin_ZhangThu, 21 Oct 2010 23:29:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/10/22/130844.htmlhttp://www.shnenglu.com/zzfmars/comments/130844.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/10/22/130844.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/130844.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/130844.htmlMonoDevelop鏀寔浣跨敤C#鍜屽叾浠?NET璇█榪涜寮鍙戯紝瀹冧嬌寰楀紑鍙戣呭彲浠ュ湪Linux鍜孧ac OS X涓婇潪甯歌繀閫熺殑寮鍙戝嚭妗岄潰杞歡鍜孉SP.NET Web搴旂敤銆傞櫎姝や箣澶栵紝MonoDevelop榪樺厑璁稿紑鍙戣呴潪甯哥畝鍗曠殑灝哣isual Studio寮鍙戠殑.NET搴旂敤紼嬪簭縐繪鍒癓inux鍜孧ac OS X涓嬶紝榪欐牱寮鍙戣呭彧闇瑕佺淮鎶や竴濂椾唬鐮佸嵆鍙攢鈹鍥犱負GTK#鏄法騫沖彴鐨勩?
銆銆鎴栬鏈変漢瀵逛簬Microsoft鐨?NET鐜鏈変簺鎶佃Е錛岃屽紑鏀劇殑妗岄潰鐜錛欸NOME鏃╁凡灝嗗紑婧愬疄鐜扮殑.NET榪愯鐜Mono綰沖叆浜嗛粯璁ゆ敮鎸佸綋涓?
銆銆GNOME緋葷粺鐨?#8220;Tomboy渚跨”鍗蟲槸鐢–#緙栧啓錛孨ovell鍑哄搧鐨勭収鐗囩鐞嗗伐鍏鳳細F-spot涔熸槸濡傛錛屽悓鏍瘋繕鏈夎憲鍚嶇殑绱㈠紩鎼滅儲宸ュ叿Beagle銆?
銆銆閫氳繃Mono錛岃兘鍚稿紩鏇村鐨勫紑鍙戣咃紝榪欎綍灝濅笉鏄竴浠跺ソ浜嬶紵
銆銆鍐嶈皥鏈鏂扮殑MonoDevelop 1.0錛屽畠鏄竴嬈鵑潪甯稿己澶х殑闆嗘垚寮鍙戠幆澧冿紝鏈夊涓嬬壒鎬э細
銆銆浠g爜琛ュ叏銆?
銆銆鍙傛暟淇℃伅銆?
銆銆淇℃伅鎻愮ず銆?
銆銆鍗蟲椂閿欒媯鏌ャ?
銆銆浠g爜瀵艱埅銆?
銆銆鏅鴻兘绱㈠紩銆?
銆銆鑷姩鐢熸垚XML鏍囩銆?
銆銆浠g爜妯℃澘銆?
銆銆綾誨拰鎴愬憳閫夋嫨鍣ㄣ?
銆銆鍗曞厓嫻嬭瘯銆?
銆銆鎵撳寘鍜岄儴緗層?
銆銆鐗堟湰鎺у埗銆?
銆銆Visual Studio鏀寔銆?
銆銆鍥介檯鍖栨敮鎸併?
銆銆鏈媯掔殑鏄紝濡傛灉浣犱嬌鐢–#鐨勮瘽錛岃繕鑳戒嬌鐢ㄩ泦鎴怗TK#鐨勫彲瑙嗗寲璁捐銆傝繖鏄洰鍓嶄負姝NOME鐜涓嬪敮涓鐨勯泦鎴愬彲瑙嗗寲璁捐鍣ㄧ殑IDE錛孉njuta涔熶笉鏀寔銆?br>
瀹樻柟緗戠珯錛?a >http://monodevelop.com/


Kevin_Zhang 2010-10-22 07:29 鍙戣〃璇勮
]]>
heritrix1.14.4http://www.shnenglu.com/zzfmars/archive/2010/10/18/130323.htmlKevin_ZhangKevin_ZhangMon, 18 Oct 2010 12:31:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/10/18/130323.htmlhttp://www.shnenglu.com/zzfmars/comments/130323.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/10/18/130323.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/130323.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/130323.html----------------------------------------------

Kevin_Zhang 2010-10-18 20:31 鍙戣〃璇勮
]]>
tomcatPlugin涓嬭澆鍦板潃http://www.shnenglu.com/zzfmars/archive/2010/10/17/130188.htmlKevin_ZhangKevin_ZhangSun, 17 Oct 2010 02:04:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/10/17/130188.htmlhttp://www.shnenglu.com/zzfmars/comments/130188.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/10/17/130188.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/130188.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/130188.html鑷繁鍦ㄨ嚜瀛ava錛岃嚜瀛2EE錛岄渶瑕佺敤鍒癳clipse涓婄殑tomcatPlugin鎻掍歡錛屾妸eclipse鍜宼omcat榪炴帴璧鋒潵銆?/p>

寰堝璧勬枡涓婃彁渚涚殑鐨勪笅杞藉湴鍧鏄細http://www.sysdeo.com/eclipse/tomcatPlugn 鎭肩伀鐨勬槸錛岃繖涓綉鍧宸茬粡鎸囧悜www.sqli.com錛屽洜涓哄璇笉濂斤紝涔熸壘涓嶅埌涓嬭澆鐨勫湴鏂廣?/p>

鍦ㄦ悳绱omcatPluginV32 涓嬭澆錛屾壘鍒扮殑鏄疌SDN涓婄殑錛屾渶璁ㄥ帉CSDN涓婁笅杞藉紑婧愮殑涓滆タ榪樿鐧婚檰錛岃繕瑕佹秷鑰楃Н鍒嗭紝鍏朵粬鐨勫ぇ澶氫篃涓婇潰鐨勪笉鑳界敤鐨勮繛鎺ャ?/p>

鍚庢潵娌″姙娉曪紝鍙悳绱omcatPlugin鎵懼埌浜嗗畼緗戯細http://www.eclipsetotale.com/tomcatPlugin.html

涔熸壘鍒頒簡瀹樻柟鐨勪笅杞藉湴鍧錛?a >http://www.eclipsetotale.com/tomcatPlugin/tomcatPluginV321.zip

 



Kevin_Zhang 2010-10-17 10:04 鍙戣〃璇勮
]]>
Heritrix-1.14.1鎬庝箞閰嶇疆?http://www.shnenglu.com/zzfmars/archive/2010/10/07/128956.htmlKevin_ZhangKevin_ZhangThu, 07 Oct 2010 14:24:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/10/07/128956.htmlhttp://www.shnenglu.com/zzfmars/comments/128956.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/10/07/128956.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/128956.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/128956.html
1.涓嬭澆heritrix-1.14.1.zip鍜宧eritrix-1.14.1.src 騫惰В鍘嬶紝瑙e帇heritrix-1.14.1.jar.
2.鍦╡clipse涓嬪垱寤簀ava project,鍛藉悕涓烘瘮濡俬eritrix錛岃繘鍏ュ叾宸ョ▼鐨勭洰褰曪紝鎴戠殑鏄疐:\workspace\myeclipse\heritrix錛屽垹闄rc鏂囦歡澶廣?
3.copy瑙e帇鍚庣殑heritrix-1.14.1.zip鏂囦歡澶逛笅鐨刲ib錛寃ebapps錛宧eritrix-1.14.1鍒癋:\workspace\myeclipse\heritrix鐩綍涓嬶紝騫跺垹闄:\workspace\myeclipse\heritrix\heritrix-1.14.1鐩綍涓嬬殑org鍜宻t涓や釜鏂囦歡澶廣?
copy瑙e帇鍚庣殑heritrix-1.14.1.src 鏂囦歡澶逛笅鐨刪eritrix-1.14.1\src\java涓嬬殑org鍜宻t涓や釜鏂囦歡澶瑰埌F:\workspace\myeclipse\heritrix\heritrix-1.14.1\鐩綍涓?
4.淇敼heritrix-1.14.1鏂囦歡澶瑰悕縐頒負src
5.淇敼src\heritrix.properties鏂囦歡涓殑heritrix.cmdline.admin = 涓?heritrix.cmdline.admin = admin:sun,榪欎釜灝辨槸瑕佽緗綘鐨勭敤鎴峰悕鍜屽瘑鐮侊紝鍙互闅忎究錛屼腑闂存槸鍐掑彿銆?
6.鍒鋒柊宸ョ▼錛屾妸lib涓嬬殑jar鍖呭叏閮ㄦ坊鍔犲埌宸ョ▼涓紝鍗崇偣鍑籬eritrix宸ョ▼錛屽彸閿睘鎬?--java build path---libraries--- add jars閫夋嫨heritrix宸ョ▼涓媗ib鎵鏈塲ar銆?
7.榪愯org.archive.crawler.Heritrix綾伙紝鍦ㄥ湴鍧鏍忚緭鍏?a style="COLOR: rgb(38,28,220)" href="http://localhost:8080/" target=_blank>http://localhost:8080/
OK!灝辨槸榪欎箞綆鍗曪紒 
杞?/span>
鑷細http://zhidao.baidu.com/question/72080439.html

Kevin_Zhang 2010-10-07 22:24 鍙戣〃璇勮
]]>
璇存槑涓涓嬩笅http://www.shnenglu.com/zzfmars/archive/2010/10/07/128928.htmlKevin_ZhangKevin_ZhangThu, 07 Oct 2010 08:03:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/10/07/128928.htmlhttp://www.shnenglu.com/zzfmars/comments/128928.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/10/07/128928.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/128928.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/128928.html




---------------------------------------

Kevin_Zhang 2010-10-07 16:03 鍙戣〃璇勮
]]>
Spider姒傝堪 http://www.shnenglu.com/zzfmars/archive/2010/09/16/126793.htmlKevin_ZhangKevin_ZhangThu, 16 Sep 2010 11:29:00 GMThttp://www.shnenglu.com/zzfmars/archive/2010/09/16/126793.htmlhttp://www.shnenglu.com/zzfmars/comments/126793.htmlhttp://www.shnenglu.com/zzfmars/archive/2010/09/16/126793.html#Feedback0http://www.shnenglu.com/zzfmars/comments/commentRss/126793.htmlhttp://www.shnenglu.com/zzfmars/services/trackbacks/126793.htmlSpider姒傝堪

Spider鍗崇綉緇滅埇铏?,鍏跺畾涔夋湁騫夸箟鍜岀嫮涔変箣鍒嗐傜嫮涔変笂鎸囬伒寰爣鍑嗙殑 http鍗忚鍒╃敤瓚呴摼鎺ュ拰 Web鏂囨。媯绱㈢殑鏂規(guī)硶閬嶅巻涓囩淮緗戜俊鎭┖闂寸殑杞歡紼嬪簭 ;鑰屽箍涔夌殑瀹氫箟鍒欐槸鎵鏈夎兘閬靛驚 http鍗忚媯绱?Web鏂囨。鐨勮蔣浠墮兘縐頒箣涓虹綉緇滅埇铏?

Spider鏄竴涓姛鑳藉緢寮虹殑鑷姩鎻愬彇緗戦〉鐨勭▼搴?,瀹冧負鎼滅儲寮曟搸浠庝竾緇寸綉涓婁笅杞界綉欏?,鏄悳绱㈠紩鎿庣殑閲嶈緇勬垚 .瀹冮氳繃璇鋒眰绔欑偣涓婄殑 HTML鏂囨。璁塊棶鏌愪竴绔欑偣銆傚畠閬嶅巻 Web絀洪棿 ,涓嶆柇浠庝竴涓珯鐐圭Щ鍔ㄥ埌鍙︿竴涓珯鐐?,鑷姩寤虹珛绱㈠紩 ,騫跺姞鍏ュ埌緗戦〉鏁版嵁搴撲腑銆傜綉緇滅埇铏繘鍏ユ煇涓秴綰ф枃鏈椂 ,瀹冨埄鐢?HTML璇█鐨勬爣璁扮粨鏋勬潵鎼滅儲淇℃伅鍙婅幏鍙栨寚鍚戝叾浠栬秴綰ф枃鏈殑 URL鍦板潃 ,鍙互瀹屽叏涓嶄緷璧栫敤鎴峰共棰勫疄鐜扮綉緇滀笂鐨勮嚜鍔ㄧ埇琛屽拰鎼滅儲銆?

Spider鐨勯槦鍒?

錛?錛夌瓑寰呴槦鍒?:鏂板彂鐜扮殑 URL琚姞鍏ュ埌榪欎釜闃熷垪 ,絳夊緟琚?Spider紼嬪簭澶勭悊 ;

錛?錛夊鐞嗛槦鍒?:瑕佽澶勭悊鐨?URL琚紶閫佸埌榪欎釜闃熷垪銆備負浜嗛伩鍏嶅悓涓涓?URL琚嬈″鐞?,褰撲竴涓?URL琚鐞嗚繃鍚?,瀹冨皢琚漿縐誨埌瀹屾垚闃熷垪鎴栬呴敊璇槦鍒?(濡傛灉鍙戠敓閿欒 )銆?

錛?錛夐敊璇槦鍒?:濡傛灉鍦ㄤ笅杞界綉欏墊槸鍙戠敓閿欒 ,璇?URL灝嗚鍔犲叆 鍒伴敊璇槦鍒椼?/p>

錛?錛夊畬鎴愰槦鍒?:濡傛灉鍦ㄥ鐞嗙綉欏墊病鏈夊彂鐢熼敊璇?,璇?URL灝嗚鍔犲叆鍒板畬鎴愰槦鍒椼?

緗戠粶鐖櫕鎼滅儲絳栫暐

鍦ㄦ姄鍙栫綉欏電殑鏃跺?,鐩墠緗戠粶鐖櫕涓鑸湁涓ょ絳栫暐 :鏃犱富棰樻悳绱笌鍩轟簬鏌愮壒瀹氫富浣撶殑涓撲笟鏅鴻兘鎼滅儲銆傚叾涓墠鑰呬富瑕佸寘鎷?:騫垮害浼樺厛鍜屾繁搴︿紭鍏堛傚箍搴︿紭鍏堟槸鎸囩綉緇滅埇铏細鍏堟姄鍙栬搗濮嬬綉欏典腑閾炬帴鐨勬墍鏈夌綉欏?,鐒跺悗鍐嶉夋嫨鍏朵腑鐨勪竴涓摼鎺ョ綉欏?,緇х畫鎶撳彇鍦ㄦ緗戦〉涓摼鎺ョ殑鎵鏈夌綉欏點傝繖鏄渶甯哥敤鐨勬柟寮?鍥犱負榪欎釜鏂規(guī)硶鍙互璁╃綉緇滅埇铏茍琛屽鐞?,鎻愰珮鍏舵姄鍙栭熷害銆傛繁搴︿紭鍏堟槸鎸囩綉緇滅埇铏細浠庤搗濮嬮〉寮濮?,涓涓摼鎺ヤ竴涓摼鎺ヨ窡韙笅鍘?,澶勭悊瀹岃繖鏉$嚎璺箣鍚庡啀杞叆涓嬩竴涓搗濮嬮〉 ,緇х畫璺熻釜閾炬帴銆傝繖涓柟娉曟湁涓紭鐐規(guī)槸緗戠粶鐖櫕鍦ㄨ璁$殑鏃跺欐瘮杈冨鏄撱傚ぇ澶氭暟緗戦〉鐖鍣ㄩ噰鐢ㄥ搴︿紭鍏堟悳绱㈢瓥鐣ユ垨鑰呮槸瀵硅繖縐嶇瓥鐣ョ殑鏌愪簺鏀硅繘銆?/p>

鍦ㄤ笓涓氭悳绱㈠紩鎿庝腑 ,緗戠粶鐖櫕鐨勪換鍔℃槸鑾峰彇 Web欏甸潰鍜屽喅瀹氶摼鎺ョ殑璁塊棶欏哄簭 ,瀹冮氬父浠庝竴涓?“縐嶅瓙闆?”(濡傜敤鎴鋒煡璇€佺瀛愰摼鎺ユ垨縐嶅瓙欏甸潰 )鍙?浠ヨ凱浠g殑鏂瑰紡璁塊棶欏甸潰鍜屾彁鍙栭摼鎺ャ傛悳绱㈣繃紼嬩腑 ,鏈闂殑閾炬帴琚殏瀛樺湪涓涓О涓?“鎼滅儲鍓嶆部 ”(Spider Frontier)鐨勯槦鍒椾腑 ,緗戠粶鐖櫕鏍規(guī)嵁鎼滅儲鍓嶆部涓摼鎺ョ殑 “閲嶈紼嬪害 ”鍐沖畾涓嬩竴涓璁塊棶鐨勯摼鎺ャ傚浣曡瘎浠峰拰棰勬祴閾炬帴鐨?“閲嶈紼嬪害 ”(鎴栫О浠峰?)鏄喅瀹氱綉緇滅埇铏悳绱㈢瓥鐣ョ殑鍏抽敭銆?/p>

浼楀鐨勭綉緇滅埇铏璁″悇涓嶇浉鍚?,浣嗗綊鏍圭粨搴曟槸閲囩敤涓嶅悓鐨勯摼鎺ヤ環(huán)鍊艱瘎浠鋒爣鍑嗐?/p>

甯哥敤寮婧愮綉緇滅埇铏粙緇嶅強鍏舵瘮杈?/h2>

Nutch

寮鍙戣璦錛欽ava

http://lucene.apache.org/nutch/

綆浠嬶細

Apache鐨勫瓙欏圭洰涔嬩竴錛屽睘浜嶭ucene欏圭洰涓嬬殑瀛愰」鐩?/p>

Nutch鏄竴涓熀浜嶭ucene錛岀被浼糋oogle鐨勫畬鏁寸綉緇滄悳绱㈠紩鎿庤В鍐蟲柟妗堬紝鍩轟簬Hadoop鐨勫垎甯冨紡澶勭悊妯″瀷淇濊瘉浜嗙郴緇熺殑鎬ц兘錛岀被浼糆clipse鐨勬彃浠舵満鍒朵繚璇佷簡緋葷粺鐨勫彲瀹㈡埛鍖栵紝鑰屼笖寰堝鏄撻泦鎴愬埌鑷繁鐨勫簲鐢ㄤ箣涓?

Larbin

寮鍙戣璦錛欳++

http://larbin.sourceforge.net/index-eng.html

綆浠?/p>

銆銆larbin鏄竴縐嶅紑婧愮殑緗戠粶鐖櫕/緗戠粶铚樿洓錛岀敱娉曞浗鐨勫勾杞諱漢 Sébastien Ailleret鐙珛寮鍙戙俵arbin鐩殑鏄兘澶熻窡韙〉闈㈢殑url榪涜鎵╁睍鐨勬姄鍙栵紝鏈鍚庝負鎼滅儲寮曟搸鎻愪緵騫挎硾鐨勬暟鎹潵婧愩?/p>

銆銆Larbin鍙槸涓涓埇铏紝涔熷氨鏄larbin鍙姄鍙栫綉欏碉紝鑷充簬濡備綍parse鐨勪簨鎯呭垯鐢辯敤鎴瘋嚜宸卞畬鎴愩傚彟澶栵紝濡備綍瀛樺偍鍒版暟鎹簱浠ュ強寤虹珛绱㈠紩鐨勪簨鎯?larbin涔熶笉鎻愪緵銆?/p>

銆銆latbin鏈鍒濈殑璁捐涔熸槸渚濇嵁璁捐綆鍗曚絾鏄珮搴﹀彲閰嶇疆鎬х殑鍘熷垯錛屽洜姝ゆ垜浠彲浠ョ湅鍒幫紝涓涓畝鍗曠殑larbin鐨勭埇铏彲浠ユ瘡澶╄幏鍙栵紩錛愶紣涓囩殑緗戦〉錛岄潪甯擱珮鏁堛?/p>

Heritrix

寮鍙戣璦錛欽ava

http://crawler.archive.org/

綆浠?/p>

涓嶯utch姣旇緝

鍜?Nutch銆備簩鑰呭潎涓篔ava寮婧愭鏋訛紝Heritrix 鏄?SourceForge涓婄殑寮婧愪駭鍝侊紝Nutch涓篈pache鐨勪竴涓瓙欏圭洰錛屽畠浠兘縐頒綔緗戠粶鐖櫕/铚樿洓錛?Web Crawler錛夛紝瀹冧滑瀹炵幇鐨勫師鐞嗗熀鏈竴鑷達細娣卞害閬嶅巻緗戠珯鐨勮祫婧愶紝灝嗚繖浜涜祫婧愭姄鍙栧埌鏈湴錛屼嬌鐢ㄧ殑鏂規(guī)硶閮芥槸鍒嗘瀽緗戠珯姣忎竴涓湁鏁堢殑URI錛屽茍鎻愪氦Http璇鋒眰錛屼粠鑰岃幏寰楃浉搴旂粨鏋滐紝鐢熸垚鏈湴鏂囦歡鍙婄浉搴旂殑鏃ュ織淇℃伅絳夈?/p>

Heritrix 鏄釜 "archival crawler" -- 鐢ㄦ潵鑾峰彇瀹屾暣鐨勩佺簿紜殑銆佺珯鐐瑰唴瀹圭殑娣卞害澶嶅埗銆傚寘鎷幏鍙栧浘鍍忎互鍙婂叾浠栭潪鏂囨湰鍐呭銆傛姄鍙栧茍瀛樺偍鐩稿叧鐨勫唴瀹廣傚鍐呭鏉ヨ呬笉鎷掞紝涓嶅欏甸潰榪涜鍐呭涓婄殑淇敼銆傞噸鏂扮埇琛屽鐩稿悓鐨刄RL涓嶉拡瀵瑰厛鍓嶇殑榪涜鏇挎崲銆傜埇铏氳繃Web鐢ㄦ埛鐣岄潰鍚姩銆佺洃鎺с佽皟鏁達紝鍏佽寮規(guī)х殑瀹氫箟瑕佽幏鍙栫殑URL銆?/p>

浜岃呯殑宸紓錛?/p>

Nutch 鍙幏鍙栧茍淇濆瓨鍙儲寮曠殑鍐呭銆侶eritrix鍒欐槸鐓у崟鍏ㄦ敹銆傚姏姹備繚瀛橀〉闈㈠師璨?

Nutch 鍙互淇壀鍐呭錛屾垨鑰呭鍐呭鏍煎紡榪涜杞崲銆?

Nutch 淇濆瓨鍐呭涓烘暟鎹簱浼樺寲鏍煎紡渚夸簬浠ュ悗绱㈠紩錛涘埛鏂版浛鎹㈡棫鐨勫唴瀹廣傝孒eritrix 鏄坊鍔?榪藉姞)鏂扮殑鍐呭銆?

Nutch 浠庡懡浠よ榪愯銆佹帶鍒躲侶eritrix 鏈?Web 鎺у埗綆$悊鐣岄潰銆?

Nutch 鐨勫畾鍒惰兘鍔涗笉澶熷己錛屼笉榪囩幇鍦ㄥ凡緇忔湁浜嗕竴瀹氭敼榪涖侶eritrix 鍙帶鍒剁殑鍙傛暟鏇村銆?/p>

Heritrix鎻愪緵鐨勫姛鑳芥病鏈塶utch澶氾紝鏈夌偣鏁寸珯涓嬭澆鐨勫懗閬撱傛棦娌℃湁绱㈠紩鍙堟病鏈夎В鏋愶紝鐢氳嚦瀵逛簬閲嶅鐖彇URL閮藉鐞嗕笉鏄緢濂姐?/p>

Heritrix鐨勫姛鑳藉己澶?浣嗘槸閰嶇疆璧鋒潵鍗存湁鐐歸夯鐑︺?/p>

涓夎呯殑姣旇緝

涓銆佷粠鍔熻兘鏂歸潰鏉ヨ錛孒eritrix涓嶭arbin鐨勫姛鑳界被浼箋傞兘鏄竴涓函綺圭殑緗戠粶鐖櫕錛屾彁渚涚綉绔欑殑闀滃儚涓嬭澆銆傝孨utch鏄竴涓綉緇滄悳绱㈠紩鎿庢鏋訛紝鐖彇緗戦〉鍙槸鍏跺姛鑳界殑涓閮ㄥ垎銆?/p>

浜屻佷粠鍒嗗竷寮忓鐞嗘潵璇達紝Nutch鏀寔鍒嗗竷寮忓鐞嗭紝鑰屽彟澶栦袱涓ソ鍍忓皻涓旇繕娌℃湁鏀寔銆?/p>

涓夈佷粠鐖彇鐨勭綉欏靛瓨鍌ㄦ柟寮忔潵璇達紝Heritrix鍜?Larbin閮芥槸灝嗙埇鍙栦笅鏉ョ殑鍐呭淇濆瓨涓哄師濮嬬被鍨嬬殑鍐呭銆傝孨utch鏄皢鍐呭淇濆瓨鍒板叾鐗瑰畾鏍煎紡鐨剆egment涓幓銆?/p>

鍥涳紝瀵逛簬鐖彇涓嬫潵鐨勫唴瀹圭殑澶勭悊鏉ヨ錛孒eritrix鍜?Larbin閮芥槸灝嗙埇鍙栦笅鏉ョ殑鍐呭涓嶇粡澶勭悊鐩存帴淇濆瓨涓哄師濮嬪唴瀹廣傝孨utch瀵規(guī)枃鏈繘琛屼簡鍖呮嫭閾炬帴鍒嗘瀽銆佹鏂囨彁鍙栥佸緩绔嬬儲寮曪紙Lucene绱㈠紩錛夌瓑澶勭悊銆?/p>

浜旓紝浠庣埇鍙栫殑鏁堢巼鏉ヨ錛孡arbin鏁堢巼杈冮珮錛屽洜涓哄叾鏄嬌鐢╟++瀹炵幇鐨勫茍涓斿姛鑳藉崟涓銆?/p>

琛?3縐嶇埇铏殑姣旇緝

crawler

寮鍙戣璦

鍔熻兘鍗曚竴

鏀寔鍒嗗竷寮忕埇鍙?/p>

鏁堢巼

闀滃儚淇濆瓨

Nutch

Java

×

浣?/p>

×

Larbin

C++

×

楂?/p>

Heritrix

Java

×

涓?/p>

鍏朵粬緗戠粶鐖櫕浠嬬粛錛?/h3>

Heritrix
Heritrix鏄竴涓紑婧愶紝鍙墿灞曠殑web鐖櫕欏圭洰銆侶eritrix璁捐鎴愪弗鏍兼寜鐓obots.txt鏂囦歡鐨勬帓闄ゆ寚紺哄拰META robots鏍囩銆?br>http://crawler.archive.org/

WebSPHINX
WebSPHINX鏄竴涓狫ava綾誨寘鍜學eb鐖櫕鐨勪氦浜掑紡寮鍙戠幆澧冦俉eb鐖櫕(涔熷彨浣滄満鍣ㄤ漢鎴栬湗铔?鏄彲浠ヨ嚜鍔ㄦ祻瑙堜笌澶勭悊Web欏甸潰鐨勭▼搴忋俉ebSPHINX鐢變袱閮ㄥ垎緇勬垚錛氱埇铏伐浣滃鉤鍙板拰WebSPHINX綾誨寘銆?br>http://www.cs.cmu.edu/~rcm/websphinx/

WebLech
WebLech鏄竴涓姛鑳藉己澶х殑Web绔欑偣涓嬭澆涓庨暅鍍忓伐鍏楓傚畠鏀寔鎸夊姛鑳介渶姹傛潵涓嬭澆web绔欑偣騫惰兘澶熷敖鍙兘妯′豢鏍囧噯Web嫻忚鍣ㄧ殑琛屼負銆俉ebLech鏈変竴涓姛鑳芥帶鍒跺彴騫墮噰鐢ㄥ綰跨▼鎿嶄綔銆?br>http://weblech.sourceforge.net/
Arale
Arale涓昏涓轟釜浜轟嬌鐢ㄨ岃璁★紝鑰屾病鏈夊儚鍏跺畠鐖櫕涓鏍鋒槸鍏蟲敞浜庨〉闈㈢儲寮曘侫rale鑳藉涓嬭澆鏁翠釜web绔欑偣鎴栨潵鑷獁eb绔欑偣鐨勬煇浜涜祫婧愩侫rale榪樿兘澶熸妸鍔ㄦ侀〉闈㈡槧灝勬垚闈欐侀〉闈€?br>http://web.tiscali.it/_flat/arale.jsp.html

J-Spider
J-Spider:鏄竴涓畬鍏ㄥ彲閰嶇疆鍜屽畾鍒剁殑Web Spider寮曟搸.浣犲彲浠ュ埄鐢ㄥ畠鏉ユ鏌ョ綉绔欑殑閿欒(鍐呭湪鐨勬湇鍔″櫒閿欒絳?,緗戠珯鍐呭閮ㄩ摼鎺ユ鏌ワ紝鍒嗘瀽緗戠珯鐨勭粨鏋?鍙垱寤轟竴涓綉绔欏湴鍥?,涓嬭澆鏁翠釜Web绔欑偣錛屼綘榪樺彲浠ュ啓涓涓狫Spider鎻掍歡鏉ユ墿灞曚綘鎵闇瑕佺殑鍔熻兘銆?br>http://j-spider.sourceforge.net/

spindle
spindle 鏄竴涓瀯寤哄湪Lucene宸ュ叿鍖呬箣涓婄殑Web绱㈠紩/鎼滅儲宸ュ叿.瀹冨寘鎷竴涓敤浜庡垱寤虹儲寮曠殑HTTP spider鍜屼竴涓敤浜庢悳绱㈣繖浜涚儲寮曠殑鎼滅儲綾匯俿pindle欏圭洰鎻愪緵浜嗕竴緇凧SP鏍囩搴撲嬌寰楅偅浜涘熀浜嶫SP鐨勭珯鐐逛笉闇瑕佸紑鍙戜換浣旿ava綾誨氨鑳藉澧炲姞鎼滅儲鍔熻兘銆?br>http://www.bitmechanic.com/projects/spindle/

Arachnid
Arachnid: 鏄竴涓熀浜嶫ava鐨剋eb spider妗嗘灦.瀹冨寘鍚竴涓畝鍗曠殑HTML鍓栨瀽鍣ㄨ兘澶熷垎鏋愬寘鍚獺TML鍐呭鐨勮緭鍏ユ祦.閫氳繃瀹炵幇Arachnid鐨勫瓙綾誨氨鑳藉寮鍙戜竴涓畝鍗曠殑Web spiders騫惰兘澶熷湪Web绔欎笂鐨勬瘡涓〉闈㈣瑙f瀽涔嬪悗澧炲姞鍑犺浠g爜璋冪敤銆?Arachnid鐨勪笅杞藉寘涓寘鍚袱涓猻pider搴旂敤紼嬪簭渚嬪瓙鐢ㄤ簬婕旂ず濡備綍浣跨敤璇ユ鏋躲?br>http://arachnid.sourceforge.net/

LARM
LARM鑳藉涓篔akarta Lucene鎼滅儲寮曟搸妗嗘灦鐨勭敤鎴鋒彁渚涗竴涓函Java鐨勬悳绱㈣В鍐蟲柟妗堛傚畠鍖呭惈鑳藉涓烘枃浠訛紝鏁版嵁搴撹〃鏍煎緩绔嬬儲寮曠殑鏂規(guī)硶鍜屼負Web绔欑偣寤虹儲寮曠殑鐖櫕銆?br>http://larm.sourceforge.net/

JoBo
JoBo 鏄竴涓敤浜庝笅杞芥暣涓猈eb绔欑偣鐨勭畝鍗曞伐鍏楓傚畠鏈川鏄竴涓猈eb Spider銆備笌鍏跺畠涓嬭澆宸ュ叿鐩告瘮杈冨畠鐨勪富瑕佷紭鍔挎槸鑳藉鑷姩濉厖f(xié)orm(濡傦細鑷姩鐧誨綍)鍜屼嬌鐢╟ookies鏉ュ鐞唖ession銆侸oBo榪樻湁鐏墊椿鐨勪笅杞借鍒?濡傦細閫氳繃緗戦〉鐨刄RL錛屽ぇ灝忥紝MIME綾誨瀷絳?鏉ラ檺鍒朵笅杞姐?br>http://www.matuschek.net/software/jobo/index.html

snoics-reptile
snoics -reptile鏄敤綰疛ava寮鍙戠殑錛岀敤鏉ヨ繘琛岀綉绔欓暅鍍忔姄鍙栫殑宸ュ叿錛屽彲浠ヤ嬌鐢ㄩ厤鍒舵枃浠朵腑鎻愪緵鐨刄RL鍏ュ彛錛屾妸榪欎釜緗戠珯鎵鏈夌殑鑳界敤嫻忚鍣ㄩ氳繃GET鐨勬柟寮忚幏鍙栧埌鐨勮祫婧愬叏閮ㄦ姄鍙栧埌鏈湴錛屽寘鎷綉欏靛拰鍚勭綾誨瀷鐨勬枃浠訛紝濡傦細鍥劇墖銆乫lash銆乵p3銆亃ip銆乺ar銆乪xe絳夋枃浠躲傚彲浠ュ皢鏁翠釜緗戠珯瀹屾暣鍦頒笅浼犺嚦紜洏鍐咃紝騫惰兘淇濇寔鍘熸湁鐨勭綉绔欑粨鏋勭簿紜笉鍙樸傚彧闇瑕佹妸鎶撳彇涓嬫潵鐨勭綉绔欐斁鍒皐eb鏈嶅姟鍣?濡傦細Apache)涓紝灝卞彲浠ュ疄鐜板畬鏁寸殑緗戠珯闀滃儚銆?br>http://www.blogjava.net/snoics


Web-Harvest
Web-Harvest鏄竴涓狫ava寮婧怶eb鏁版嵁鎶藉彇宸ュ叿銆傚畠鑳藉鏀墮泦鎸囧畾鐨刉eb欏甸潰騫朵粠榪欎簺欏甸潰涓彁鍙栨湁鐢ㄧ殑鏁版嵁銆俉eb-Harvest涓昏鏄繍鐢ㄤ簡鍍廥SLT,XQuery,姝e垯琛ㄨ揪寮忕瓑榪欎簺鎶鏈潵瀹炵幇瀵箃ext/xml鐨勬搷浣溿?br>http://web-harvest.sourceforge.net

spiderpy
spiderpy鏄竴涓熀浜嶱ython緙栫爜鐨勪竴涓紑婧恮eb鐖櫕宸ュ叿錛屽厑璁哥敤鎴鋒敹闆嗘枃浠跺拰鎼滅儲緗戠珯錛屽茍鏈変竴涓彲閰嶇疆鐨勭晫闈€?br>http://pyspider.sourceforge.net/

The Spider Web Network Xoops Mod Team
pider Web Network Xoops Mod鏄竴涓猉oops涓嬬殑妯″潡錛屽畬鍏ㄧ敱PHP璇█瀹炵幇銆?br>http://www.tswn.com/

larbin
larbin鏄釜鍩轟簬C++鐨剋eb鐖櫕宸ュ叿錛屾嫢鏈夋槗浜庢搷浣滅殑鐣岄潰錛屼笉榪囧彧鑳借窇鍦↙INUX涓嬶紝鍦ㄤ竴鍙版櫘閫歅C涓媗arbin姣忓ぉ鍙互鐖?鐧句竾涓〉闈?褰撶劧鍟︼紝闇瑕佹嫢鏈夎壇濂界殑緗戠粶)
http://larbin.sourceforge.net/index-eng.html

鐖櫕瀛樺湪鐨勯棶棰?/h3>

1. robots.txt

robots.txt鏄竴涓函鏂囨湰鏂囦歡錛屽湪榪欎釜鏂囦歡涓綉绔欑鐞嗚呭彲浠ュ0鏄庤緗戠珯涓笉鎯寵robots璁塊棶鐨勯儴鍒嗭紝鎴栬呮寚瀹氭悳绱㈠紩鎿庡彧鏀跺綍鎸囧畾鐨勫唴瀹廣?/p>

褰撲竴涓悳绱㈡満鍣ㄤ漢錛堟湁鐨勫彨鎼滅儲铚樿洓錛夎闂竴涓珯鐐規(guī)椂錛屽畠浼氶鍏堟鏌ヨ绔欑偣鏍圭洰褰曚笅鏄惁瀛樺湪robots.txt錛屽鏋滃瓨鍦紝鎼滅儲鏈哄櫒浜哄氨浼氭寜鐓ц鏂囦歡涓殑鍐呭鏉ョ‘瀹氳闂殑鑼冨洿錛涘鏋滆鏂囦歡涓嶅瓨鍦紝閭d箞鎼滅儲鏈哄櫒浜哄氨娌跨潃閾炬帴鎶撳彇銆?/p>

鍙﹀錛宺obots.txt蹇呴』鏀劇疆鍦ㄤ竴涓珯鐐圭殑鏍圭洰褰曚笅錛岃屼笖鏂囦歡鍚嶅繀欏誨叏閮ㄥ皬鍐欍?/p>

2. 鏈変簺綾誨瀷鐨勭綉欏甸毦浠ョ埇鍙栥備緥濡傦紝浣跨敤javascript璋冪敤鐨勯〉闈€侀渶瑕佹敞鍐屾墠鑳借闂殑欏甸潰絳夈?/p>

緗戠粶鐖櫕鐨勭浉鍏崇爺絀跺伐浣?/h3>

鏈変簺綾誨瀷鐨勭綉欏甸毦浠ョ埇鍙栥備緥濡傦紝浣跨敤javascript璋冪敤鐨勯〉闈€侀渶瑕佹敞鍐屾墠鑳借闂殑欏甸潰絳夛紝瀵逛簬榪欎簺緗戠粶鐨勭埇鍙栬褰掔粨涓烘繁灞傜綉緇滅殑鎸栨帢銆傝繖浜涚綉欏靛彲褰掔粨涓哄涓嬪嚑綾伙細錛?錛夐氳繃

濉啓琛ㄥ崟褰㈡垚瀵瑰悗鍙板啀鐜版暟鎹簱鏌ヨ寰楀埌鐨勫姩鎬侀〉闈€傦紙2錛夌敱浜庣己涔忚鎸囧悜鐨勮秴閾炬帴鑰屾病鏈夎绱㈠紩鍒扮殑欏甸潰銆傦紙3錛夐渶瑕佹敞鍐屾垨鍏朵粬闄愬埗璁塊棶鐨勯〉闈€傦紙4錛夊彲璁塊棶鐨勯潪緗戦〉鏂囦歡銆傚湪鏇句紵杈夌瓑浜虹殑鏂囩珷涓紝瀵硅繖綾婚棶棰樿繘琛屼簡緇艱堪銆傚湪鐜嬫槧絳変漢鐨勬枃绔犱腑錛屾彁鍑轟簡浣跨敤涓涓祵鍏ュ紡鐨凧avaScript寮曟搸鏉ヨ繘琛屽姩鎬佺綉欏甸噰闆嗙殑鏂規(guī)硶銆?/p>

1. 鏈変簺闈為潤鎬佺殑Web2.0緗戠珯鐨勫唴瀹瑰姩鎬佺敓鎴愶紝鏁版嵁閲忓法澶э紝闅句互鎶撳彇錛屼緥濡傝鍧涚瓑緗戠珯銆傚湪2008騫碨IGIR涓紝Yida Wang絳夋彁鍑轟簡涓縐嶇埇鍙栬鍧涚殑鐖彇鏂規(guī)硶銆?/p>

2. 鏈変簺緗戠珯浼氶檺鍒剁綉緇滅埇铏殑鐖彇錛孉nalia G. Lourenco, Orlando O. Belo 鍦?006騫存彁鍑烘潵浣跨敤鏌ヨ鏃ュ織鐨勬柟娉曢檺鍒剁綉緇滅埇铏殑媧誨姩浠ュ噺杞繪湇鍔″櫒鍘嬪姏銆?/p>

3. 緗戠粶涓婄殑緗戦〉鏁伴噺澶ぇ錛屽湪鐖彇鏃墮渶瑕佽冭檻鐖彇鐨勬椂闂村強鏁堢巼絳夐棶棰橈紝UCLA鐨凧unghoo Cho絳夋彁鍑轟簡浣跨敤騫惰鐨刢rawler鐨勬柟娉曘?/p>

4.



]]> 青青草原综合久久大伊人导航_色综合久久天天综合_日日噜噜夜夜狠狠久久丁香五月_热久久这里只有精品
  • <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
      <noscript id="pjuwb"></noscript>
            <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
              <dd id="pjuwb"></dd>
              <abbr id="pjuwb"></abbr>
              校园春色国产精品| 欧美中文字幕在线观看| 你懂的国产精品| 久久精品九九| 亚洲国产综合视频在线观看| 久久久久久97三级| 另类av导航| 亚洲午夜高清视频| 亚洲自拍偷拍一区| 亚洲国产99| 日韩午夜中文字幕| 国产一区91精品张津瑜| 美女爽到呻吟久久久久| 欧美激情久久久| 欧美亚洲一区在线| 免费成人高清在线视频| 亚洲一二三四久久| 久久午夜电影| 亚洲午夜一区二区三区| 欧美自拍偷拍午夜视频| 亚洲精品在线观| 亚洲欧美另类在线| 99精品热视频| 久久国产欧美| 亚洲午夜激情在线| 另类图片综合电影| 午夜精品国产精品大乳美女| 久久久噜噜噜久久中文字幕色伊伊| 在线电影国产精品| 亚洲欧洲在线看| 国产欧美丝祙| 亚洲免费观看高清完整版在线观看| 麻豆免费精品视频| 欧美四级在线| 欧美国产高潮xxxx1819| 国产精品成人一区二区艾草| 欧美成在线观看| 国产乱码精品一区二区三| 亚洲国产一区二区三区在线播| 亚洲午夜久久久久久久久电影院 | 亚洲欧美在线免费| 久久超碰97中文字幕| 亚洲性av在线| 欧美日韩国产va另类| 欧美ab在线视频| 国内一区二区三区在线视频| 一区二区三区日韩精品| 99re热精品| 久久久精品国产免大香伊| 欧美亚洲免费高清在线观看| 欧美日韩视频免费播放| 欧美寡妇偷汉性猛交| 欧美一区三区三区高中清蜜桃| 国产日韩欧美高清免费| 99亚洲伊人久久精品影院红桃| 欧美国产先锋| 你懂的视频欧美| 在线免费观看日本一区| 亚洲欧美国内爽妇网| 亚洲欧美卡通另类91av| 欧美日韩网站| 99国产一区二区三精品乱码| 一本到高清视频免费精品| 欧美激情一区二区三区成人| 亚洲高清影视| 91久久精品视频| 欧美激情第六页| 夜夜嗨av一区二区三区网页 | 免费在线播放第一区高清av| 久久久久久久欧美精品| 国产综合亚洲精品一区二| 久久国产精品第一页| 久久综合久久久久88| 一区久久精品| 欧美福利在线| 一区二区欧美日韩视频| 亚洲视频一区在线| 国产精品日韩欧美一区二区| 先锋影院在线亚洲| 榴莲视频成人在线观看| 亚洲日韩成人| 欧美天天在线| 欧美一进一出视频| 女仆av观看一区| 9色porny自拍视频一区二区| 欧美午夜电影在线观看| 亚洲欧美一区二区三区久久| 久久―日本道色综合久久| 怡红院av一区二区三区| 欧美日韩成人精品| 午夜精彩视频在线观看不卡| 欧美成人激情视频| 亚洲午夜性刺激影院| 国产亚洲精品bv在线观看| 久久精品亚洲乱码伦伦中文| 亚洲国产成人av好男人在线观看| 国产精品另类一区| 久久精品卡一| 亚洲毛片av| 久久综合九色99| 一区二区日韩免费看| 国产一区二区三区直播精品电影 | 午夜视频在线观看一区| 免播放器亚洲一区| 亚洲精品乱码视频 | 亚洲日本在线观看| 性欧美超级视频| 最近中文字幕mv在线一区二区三区四区| 亚洲视频欧美在线| 欧美高清视频在线播放| 欧美亚洲视频一区二区| 亚洲久色影视| 在线观看成人小视频| 国产精品久久久久久久电影 | 亚洲精品1区2区| 国产真实乱子伦精品视频| 欧美精品久久久久久| 久久免费视频网| 在线视频亚洲一区| 亚洲国产精品久久久久秋霞蜜臀| 在线观看一区| 国产日产欧美a一级在线| 欧美激情在线免费观看| 久久久青草青青国产亚洲免观| 久久精品一本| 亚洲午夜三级在线| 亚洲最新在线视频| 亚洲精品欧洲| 亚洲人精品午夜在线观看| 国内外成人在线| 国产精品老女人精品视频| 欧美日韩妖精视频| 欧美精品国产| 欧美精品自拍| 欧美人成在线视频| 欧美精品成人在线| 欧美久久久久久久| 欧美精品日韩综合在线| 欧美精品亚洲精品| 欧美国产日韩xxxxx| 欧美大色视频| 欧美激情乱人伦| 欧美精品一区二区三区很污很色的| 这里只有精品电影| 亚洲三级国产| 99精品欧美一区二区蜜桃免费| 久久av在线| 久久久久一本一区二区青青蜜月| 亚洲国产婷婷香蕉久久久久久| 欧美高清成人| 欧美日韩国产色视频| 欧美午夜www高清视频| 国产精品国产三级国产普通话蜜臀 | 亚洲欧美国产不卡| 亚洲校园激情| 久久精品午夜| 欧美电影免费| 99视频在线观看一区三区| 一本色道久久99精品综合| 亚洲手机在线| 久久精品视频免费| 欧美成人一区二区三区在线观看| 亚洲一品av免费观看| 香蕉亚洲视频| 欧美国产日本高清在线| 国产精品s色| 国产综合亚洲精品一区二| 最新日韩在线视频| 亚洲无亚洲人成网站77777| 欧美中文字幕在线视频| 免费不卡中文字幕视频| 亚洲第一在线视频| 亚洲一区二区三区久久| 久久精品国产欧美激情| 欧美精品在线免费| 国产一区二区精品| 99在线热播精品免费99热| 久久av一区二区| 亚洲精品乱码久久久久久日本蜜臀| 看片网站欧美日韩| 亚洲精选视频在线| 欧美在线观看一区二区三区| 鲁大师影院一区二区三区| 欧美日韩亚洲国产精品| 国产中文一区| 午夜精品网站| 亚洲日本黄色| 老司机一区二区三区| 国产人成精品一区二区三| 日韩亚洲欧美一区二区三区| 久久美女性网| 亚洲小视频在线观看| 欧美黄色精品| 久久性色av| 国产视频精品va久久久久久| 久久久久久伊人| 亚洲欧洲在线一区| 久久米奇亚洲| 国产精品推荐精品| 在线亚洲高清视频|