`
pavel
  • 浏览: 941116 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

抓取的另外思路

 
阅读更多

public String getContent(String weburl) throws Exception {
  // TODO Auto-generated method stub
  NodeList nodelist = null;
  String temp = "";
  List list = new ArrayList();
  Parser parser = new Parser(weburl);
  //System.out.println("getUrl" + weburl);
  // System.out.println(parser.getEncoding());
  parser.setEncoding(parser.getEncoding());
  NodeFilter filterTable = new TagNameFilter("div");
  NodeFilter filterHeight = new HasAttributeFilter("class", "newsCon");
  NodeFilter filterClass = new HasAttributeFilter("id", "IDNewsDtail");
  NodeFilter filter = new AndFilter(new NodeFilter[] { filterTable,
  filterHeight, filterClass });
  nodelist = parser.extractAllNodesThatMatch(filter);
  System.out.println(nodelist.toHtml());
  
     // System.out.println("*******"+temp);
     return temp; 
 }

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics