论坛首页 入门技术论坛

java中根据url抓取html页面内容的方法

浏览 4409 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (3)
作者 正文
   发表时间:2009-02-01   最后修改:2009-02-01
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

public class URLUtil {

  public static String getHtml(String urlString) {
    try {
      StringBuffer html = new StringBuffer();
      URL url = new URL(urlString);
      HttpURLConnection conn = (HttpURLConnection) url.openConnection();
      InputStreamReader isr = new InputStreamReader(conn.getInputStream());
      BufferedReader br = new BufferedReader(isr);
      String temp;
      while ((temp = br.readLine()) != null) {
        html.append(temp).append("\n");
      }
      br.close();
      isr.close();
      return html.toString();
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
  }

  public static void main(String[] args) {
    System.out.println(URLUtil.getHtml("http://www.163.com"));
  }
}
论坛首页 入门技术版

跳转论坛:
Global site tag (gtag.js) - Google Analytics