`
futrueboy
  • 浏览: 84731 次
  • 性别: Icon_minigender_1
  • 来自: 杭州
社区版块
存档分类
最新评论

在网上看了一段代码 觉得好有摘下来 主要是中文的处理上有用

阅读更多

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.SSLHandshakeException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.NoHttpResponseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import com.anal.UtilComm;

public class HttpClientFactory {
	
	private static final String CHARSET_GBK = "GBK";
	
	/**
	 *  异常自动恢复处理
	 *  使用HttpRequestRetryHandler接口实现请求的异常恢复
	 */
	private static HttpRequestRetryHandler requestRetryHandler = new HttpRequestRetryHandler() {
		// 自定义的恢复策略
		public synchronized boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
			// 设置恢复策略,在发生异常时候将自动重试3次
			if (executionCount > 3) {  
                // 超过最大次数则不需要重试  
                return false;  
            }  
            if (exception instanceof NoHttpResponseException) {  
                // 服务停掉则重新尝试连接  
                return true;  
            }  
            if (exception instanceof SSLHandshakeException) {  
                // SSL异常不需要重试  
                return false;  
            } 
			HttpRequest request = (HttpRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
			boolean idempotent = (request instanceof HttpEntityEnclosingRequest);
			if (!idempotent) {
				// 请求内容相同则重试
				return true;
			}
			return false;
		}
	};
	
	/**
	 *  使用ResponseHandler接口处理响应
	 *  HttpClient使用ResponseHandler会自动管理连接的释放
	 *  解决了对连接的释放管理
	 */
	private static ResponseHandler<String> responseHandler = new ResponseHandler<String>() {
		// 自定义响应处理
		public synchronized String handleResponse(HttpResponse response)	throws ClientProtocolException, IOException {
			HttpEntity entity = response.getEntity();
			if (entity != null) {
				String charset = EntityUtils.getContentCharSet(entity) == null ? CHARSET_GBK : EntityUtils.getContentCharSet(entity);
				return new String(EntityUtils.toByteArray(entity), charset);
			} else {
				return null;
			}
		}
	};
	
	/**
	* 获取DefaultHttpClient实例
	* 
	* @param charset
	* 参数编码集, 可空
	* @return DefaultHttpClient 对象
	*/
	public static DefaultHttpClient getDefaultHttpClient(final String charset){
		DefaultHttpClient httpclient = new DefaultHttpClient();
		ArrayList headers = new ArrayList();
		headers.add(new BasicHeader("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"));
		headers.add(new BasicHeader("Accept-Language", "zh-cn,en-us,zh-tw,en-gb,en;"));
		headers.add(new BasicHeader("Accept-Charset","gbk,gb2312,utf-8,BIG5,ISO-8859-1;"));
		headers.add(new BasicHeader("Connection","Close"));
		headers.add(new BasicHeader("Cache-Control","no-cache"));
		headers.add(new BasicHeader("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; CIBA)"));
		httpclient.getParams().setParameter("http.default-headers", headers);
		
		//设置http头信息
		httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
		//模拟浏览器,解决一些服务器程序只允许浏览器访问的问题
		httpclient.getParams().setParameter(CoreProtocolPNames.HTTP_CONTENT_CHARSET, charset == null ? HTTP.UTF_8 : charset);
		httpclient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,30000);
		httpclient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT,60000);
		httpclient.setHttpRequestRetryHandler(requestRetryHandler);
		return httpclient;
	}
	
	/**
	 * get方式提交抓取网页
	 * 
	 * @param url
	 * @param charset
	 * @throws IOException
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	public static String httpGet(HttpClient httpClient, String url,
			String charset) throws ClientProtocolException, IOException {
		HttpGet httpget = new HttpGet(url);
		String content = null;
		// 发送请求,得到响应
		HttpResponse response = httpClient.execute(httpget);
		HttpEntity entity = response.getEntity();
		if (entity != null && HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
			charset = EntityUtils.getContentCharSet(entity) == null ? 
					CHARSET_GBK : EntityUtils.getContentCharSet(entity);
			content = UtilComm.getString(entity.getContent(),charset);
		}

		abortRequest(httpget);
		return content;
		
	}

	/**
	 * post方式提交抓取网页
	 * 
	 * @param url
	 * @param charset
	 * @throws IOException
	 * @throws ClientProtocolException
	 */
	public static String httpPost(HttpClient httpClient, String url,
			String charset) throws ClientProtocolException, IOException {
		HttpPost httppost = new HttpPost(url);
		// 得到提交的POST值
		List<NameValuePair> nvpsList = UtilComm.getNameValuePairs(url);
		httppost.setEntity(new UrlEncodedFormEntity(nvpsList, charset));
		// 得到返回值
		String content = null;
		HttpResponse response = httpClient.execute(httppost);
		HttpEntity entity = response.getEntity();
		if (entity != null && HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
			charset = EntityUtils.getContentCharSet(entity) == null ? 
					CHARSET_GBK : EntityUtils.getContentCharSet(entity);
			content = UtilComm.getString(entity.getContent(),charset);
		}
		
		abortRequest(httppost);
		return content;
	}
	
	/**
	* 释放HttpClient连接
	* 
	* @param hrb
	* 请求对象
	* @param httpclient
	* 			client对象
	*/
	public static void abortRequest(final HttpRequestBase hrb){
		if (hrb != null && hrb.isAborted()) {
			hrb.abort();
		}
	}
	
	public static void shutdown(final HttpClient httpclient) {
		if (httpclient != null) {
			httpclient.getConnectionManager().shutdown();
		}
	}
}
 
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics