抓取网上信息，抓取人人网院校 -

c2045875

浏览: 52987 次

最近访客更多访客>>

yanghui_123

sanniangmiao

最王座

wsnbmw

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

抓取网上信息，抓取人人网院校

博客分类：

java爬虫，抓取页面信息

httpclient更好地封装爬虫抓取网页信息抓取人人高校信息

这是我自己写的一个对HttpClient的一个改进
代码写上

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.HTTP;

public class HttpUtil {
private HttpClient httpClient ;
private HttpResponse response ;
public HttpUtil(){
httpClient = new DefaultHttpClient() ;
}
public HttpUtil(String ip ,int port){
httpClient = new DefaultHttpClient() ;
this.setProxy(ip, port) ;
}
public String getMethodHt(String url){
HttpGet get = new HttpGet(url) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(get,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return html ;
}
public HttpResponse getMethodRe(String url){
HttpGet get = new HttpGet(url) ;
try {
response = httpClient.execute(get) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return response ;
}
public String getMethodHt(String url,Map<String,String> params){
HttpGet get = new HttpGet(url) ;
Set<String> set = params.keySet() ;
HttpParams basicParams = new BasicHttpParams() ;
for(String key: set){
basicParams.setParameter(key,params.get(key)) ;
}
get.setParams(basicParams) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(get,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return html ;
}
public HttpResponse getMethodRe(String url,Map<String,String> params){

HttpGet get = new HttpGet(url) ;
Set<String> set = params.keySet() ;
HttpParams basicParams = new BasicHttpParams() ;
for(String key: set){
basicParams.setParameter(key,params.get(key)) ;
}
get.setParams(basicParams) ;
try {
response = httpClient.execute(get) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return response ;
}
public String postMethodHt(String url){
HttpPost post = new HttpPost(url) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(post,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return html ;
}
public HttpResponse postMethodRe(String url){
HttpPost post = new HttpPost(url) ;
try {
response = httpClient.execute(post) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return response ;
}
public String postMethodHt(String url , Map<String,String> params){
HttpPost post = new HttpPost(url) ;
List<BasicNameValuePair> qparams = new ArrayList<BasicNameValuePair>() ;
Set<String> set = params.keySet() ;
for(String key : set){
qparams.add(new BasicNameValuePair(key,params.get(key))) ;
}
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
post.setEntity(new UrlEncodedFormEntity(qparams, HTTP.UTF_8));
html = httpClient.execute(post,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return html ;
}
public HttpResponse postMethodRe(String url,Map<String,String> params ){
HttpPost post = new HttpPost(url) ;
List<BasicNameValuePair> qparams = new ArrayList<BasicNameValuePair>() ;
Set<String> set = params.keySet() ;
for(String key : set){
qparams.add(new BasicNameValuePair(key,params.get(key))) ;
}
try {
post.setEntity(new UrlEncodedFormEntity(qparams, HTTP.UTF_8));
response = httpClient.execute(post) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return response ;
}
public void setProxy(String ip ,int port){
HttpHost proxy = new HttpHost(ip,port) ;
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,
                proxy) ;
}
public void close(){
if(httpClient!=null)
httpClient.getConnectionManager().shutdown();
}
}
利用这个类可以更好地操作对http操作，但是不完善
其实抓取信息 1.http协议的了解
             2.抓取网页内容
             3.正则表达式

大学院校分布.zip (139 KB)
下载次数: 30

HttpUtil.zip (1.1 KB)
下载次数: 26

分享到：

java分布式开发TCP/IP+BIO(基于消息方式实 ... | 试验过得代理ip

2011-09-28 19:38
浏览 1432
评论(0)
分类:互联网
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

抓取网上信息，抓取人人网院校

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

抓取网上信息，抓取人人网院校

评论

发表评论

相关推荐

最近访客更多访客>>