浏览 7902 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2016-08-18
package com.goubanjia.test; import java.io.BufferedInputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.ProxyConfig; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class TestDynamicIp { public static List ipList = new ArrayList(); public static boolean gameOver = false; public static void main(String[] args) { long fetchIpSeconds = 5; int threadNum = 10; int testTime = 3;<span style="color: #ff0000;"> String order = "这里换成你的订单号,百度全网代理IP获取";</span> System.out.println(">>>>>>>>>>>>>>全网代理动态IP测试开始<<<<<<<<<<<<<<"); System.out.println("***************"); System.out.println("接口返回IP为国内各地区,每次最多返回10个"); System.out.println("提取IP间隔 " + fetchIpSeconds + " 秒 "); System.out.println("开启爬虫线程 " + threadNum); System.out.println("爬虫目标网址 http://1212.ip138.com/ic.asp"); System.out.println("测试次数 3 "); System.out.println("***************\n"); TestDynamicIp tester = new TestDynamicIp(); new Thread(tester.new GetIP(fetchIpSeconds * 1000, testTime, order)).start(); for (int i = 0; i < threadNum; i++) { tester.new Ip138Tester(100).start(); } while(!gameOver){ try { Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } } System.out.println(">>>>>>>>>>>>>>全网代理动态IP测试结束<<<<<<<<<<<<<<"); System.exit(0); } // 抓取IP138,检测IP public class Ip138Tester extends Thread{ @Override public void run() { while(!gameOver){ webParseHtml("http://1212.ip138.com/ic.asp"); try { Thread.sleep(sleepMs); } catch (InterruptedException e) { e.printStackTrace(); } } } long sleepMs = 200; public Ip138Tester(long sleepMs) { this.sleepMs = sleepMs; } public String webParseHtml(String parentUrl) { String html = ""; WebClient client = new WebClient(); try { client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setJavaScriptEnabled(false); client.getOptions().setCssEnabled(false); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setTimeout(10000); // 10s超时 client.getOptions().setAppletEnabled(true); client.getOptions().setGeolocationEnabled(true); client.getOptions().setRedirectEnabled(true); String ipport = getAProxy(); if (ipport != null) { ProxyConfig proxyConfig = new ProxyConfig(ipport.split(":")[0], Integer.parseInt(ipport.split(":")[1])); client.getOptions().setProxyConfig(proxyConfig); }else { System.out.print("."); return ""; } HtmlPage page = client.getPage(parentUrl); html = page.asXml(); if (html.length() > 0) { html = Jsoup.parse(html).select("center").first().text(); } System.out.println(getName() + " 使用代理 " + ipport + "请求IP138返回:" + html); } catch (Exception e) { return webParseHtml(parentUrl); } finally { client.close(); } return html; } private String getAProxy() { if (ipList.size() > 0) { String ip = ipList.get((int)(Math.random() * ipList.size())); return ip ; } return null; } } // 定时获取动态IP public class GetIP implements Runnable{ long sleepMs = 1000; int maxTime = 3; String order = ""; public GetIP(long sleepMs, int maxTime, String order) { this.sleepMs = sleepMs; this.maxTime = maxTime; this.order = order; } @Override public void run() { long getIpTime = 0; int time = 1; while(!gameOver){ if(time >= 4){ gameOver = true; break; } try { java.net.URL url = new java.net.URL("http://dynamic.goubanjia.com/dynamic/get/" + order + ".html?ttl"); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); connection.setConnectTimeout(3000); connection = (HttpURLConnection)url.openConnection(); InputStream raw = connection.getInputStream(); InputStream in = new BufferedInputStream(raw); byte[] data = new byte[in.available()]; int bytesRead = 0; int offset = 0; while(offset < data.length) { bytesRead = in.read(data, offset, data.length - offset); if(bytesRead == -1) { break; } offset += bytesRead; } in.close(); raw.close(); String[] res = new String(data, "UTF-8").split("\n"); List ipList = new ArrayList(); for (String ip : res) { try { String[] parts = ip.split(","); if (Integer.parseInt(parts[1]) > 0) { ipList.add(parts[0]); } } catch (Exception e) { } } if (ipList.size() > 0) { TestDynamicIp.ipList = ipList; System.out.println("第" + ++getIpTime + "次获取动态IP " + ipList.size() + " 个"); time += 1; } } catch (Exception e) { e.printStackTrace(); System.err.println(">>>>>>>>>>>>>>获取IP出错"); } try { Thread.sleep(sleepMs); } catch (InterruptedException e) { e.printStackTrace(); } } } } } 完整项目包下载地址:http://www.goubanjia.com/download/test-dynamic-ip.zip 使用动态代理IP,完全避免了被封IP的风险,爬虫效率直接提升了3倍以上。 请填写全网代理IP订单号,填写之后才可以提取到IP哦 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |