浏览 1817 次
该帖已经被评为隐藏帖
|
|
---|---|
作者 | 正文 |
发表时间:2008-12-05
最后修改:2008-12-05
package test; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; public class readBaidu { public List readHTML(String s) throws Exception { List list = new ArrayList(); while(s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\"><tr><td class=f>")>0){ baiduBean bb = new baiduBean(); int start = s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">"); int end = s.indexOf("</table>", start)+8; String content = s.substring(start,end); bb.setContents(this.getShow(content.replace(" ", ""))); bb.setUrl(this.getURL(content.replace(" ", ""))); bb.setTitle(this.getTitle(content.replace(" ", ""))); s = s.replace(content, ""); list.add(bb); } return list; } public String getContents(String path)throws Exception{ StringBuffer contents = new StringBuffer(); URL url = new URL(path); URLConnection uc = url.openConnection(); BufferedReader bf = new BufferedReader(new InputStreamReader(uc.getInputStream())); String nowLine = ""; while((nowLine=bf.readLine())!=null){ contents.append(nowLine); } return contents.toString(); } public String getURL(String con){ String url = ""; int srartURL = con.indexOf("href=\"")+6; int endURL = con.indexOf("target=", srartURL)-1; url = con.substring(srartURL,endURL); return url; } public String getTitle(String con){ String title = ""; int startCon = con.indexOf("<fontsize=")+14; int endCon = con.indexOf("</a>",startCon); title = con.substring(startCon,endCon); title = title.replace("fontcolor", "font color"); return title; } public String getShow(String con){ String contents = ""; String br = ""; int startBR = con.indexOf("<br>")+17; int endBR = con.indexOf("<br>",startBR); br = con.substring(startBR,endBR); br = br.replace("fontcolor", "font color"); return br; } public static void main(String[] args)throws Exception{ String path = "http://www.baidu.com/s?wd=csdn&pn=10"; readBaidu r = new readBaidu(); String rest = r.getContents(path); List list = r.readHTML(rest); for (int i = 0; i < list.size(); i++) { baiduBean b = (baiduBean)list.get(i); System.out.println(b.url); System.out.println(b.title); System.out.println(b.contents); } } } 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |