httpclient4.1网页抓取的问题？

0 0

httpclient4.1网页抓取的问题？5

最近，写了一个网页抓取的程序，其中使用到httpclient4.1，程序运行没有什么问题的。
但是查看流量监控时，发现请求总是出现比较规律的流量下降的情况，如图所示：

自己分析了半天也没有发现问题的原因，所以请教一下各位，帮忙看看，
代码如下：

/**
 * HttpClient 制造工厂
 * @author david.wang
 */
public class HttpClientFactory 
{
	private static Logger logger = LoggerFactory.getLogger(HttpClientFactory.class);
	
	private static ThreadSafeClientConnManager cm = null;
	
	private static HttpClient httpclient = null;  
	/**
	 * 初始化连接池
	 */
	static 
	{
		SchemeRegistry schemeRegistry = new SchemeRegistry();
		schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory
				.getSocketFactory()));
		schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory
				.getSocketFactory()));
		cm = new ThreadSafeClientConnManager(schemeRegistry);
		try 
		{
			int maxTotal = 200;// Integer.valueOf(ResourceUtil.getSystem("httpclient.max_total"));
			cm.setMaxTotal(maxTotal);
		} 
		catch (NumberFormatException e) 
		{
			logger.error("Key[httpclient.max_total] Not Found in systemConfig.properties", e);
		}
		// 每条通道的并发连接数设置（连接池）
		try 
		{
			int defaultMaxConnection = 50;// Integer.valueOf(ResourceUtil.getSystem("httpclient.default_max_connection"));
			cm.setDefaultMaxPerRoute(defaultMaxConnection);
		} 
		catch (NumberFormatException e) 
		{
			logger.error("Key[httpclient.default_max_connection] Not Found in systemConfig.properties", e);
		}
		
		HttpParams params = new BasicHttpParams();
		params.setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
		params.setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 3000); // 3000ms
		params.setParameter(CoreConnectionPNames.SO_TIMEOUT, 60000); // 5000ms
		//实例化一个httpclient
		httpclient = new DefaultHttpClient(cm, params);
	}

	/**
	 * 获取httpclient实例
	 * @return
	 */
	public static HttpClient getHttpClient() 
	{
		return httpclient;
	}

	/**
	 * 关闭整个连接池
	 */
	public static void shutdown() 
	{
		if (cm != null) 
		{
			cm.shutdown();
		}
	}
}

	public String httpClient(String url, String cookies) 
			throws ClientProtocolException, IOException
	{
		//从连接池获取链接
		HttpClient client = HttpClientFactory.getHttpClient();
		HttpGet get = new HttpGet(url);
		get.setHeader("Cookie", cookies);
		HttpResponse response = client.execute(get);
		HttpEntity entity = response.getEntity();
		String content = EntityUtils.toString(entity, "UTF-8");
		get.abort();//释放链接
		return content;
	}

多线程调用方法如下：