watij本来是用于web测试的,但是我发现利用它来做垂直爬虫,效果也很好
以下的代码抓了三个网站
package com.example.tests;
import watij.runtime.ie.IE;
import watij.finders.AttributeFinder;
import watij.finders.Finder;
import watij.finders.NameFinder;
import watij.finders.XPathFinder;
import watij.finders.FinderFactory.*;
import watij.elements.*;
public class WatijHotel {
public static void main(String[] args){
IE ie = new IE(),new_ie=null;
IE iectrip=null,ieelong=null;
try {
ie.start("http://hotel.qunar.com");
//ie.textField(new NameFinder("toCity")).set("北京");
//click("hchkParaSeachElong");
ie.checkbox(new AttributeFinder("id","hchkParaSeachElong")).click();
ie.button(new AttributeFinder("id","hbtnSearch")).click();
Links ls = ie.links(new AttributeFinder("target","_blank"));
//System.out.println(ls.toString());
ls.link(0).click();
new_ie = ie.childBrowser();
//System.out.println(new_ie.text());
//ie.link(new XPathFinder("//DIV[@id='jxContentPanel']//DIV[1]//DIV[1]//DIV[2]/A")).click();
new_ie.waitUntilReady(1000);
String text = new_ie.div(new AttributeFinder("class","detailInfoLinks")).text();
System.out.println(text);
String[] links = text.split("\\)");
String ctrip = "携程旅行网";
String elong = "艺龙旅行网";
String tctrip = "",telong="";
System.out.println(new_ie.childBrowserCount());
for(String link : links){
if(link.indexOf(ctrip)>=0){
try {
new_ie.link("预订网站").click();
tctrip = link + ")";
new_ie.link(tctrip).click();
String qunarprice = new_ie.table(
new AttributeFinder("class", "bookingTable"))
.text();
System.out.println(qunarprice);
new_ie.table(
new AttributeFinder("class", "bookingTable"))
.links().get(0).click();
int count = new_ie.childBrowserCount();
System.out.println(new_ie.childBrowserCount());
iectrip = new_ie.childBrowser(count - 1);
} catch (Exception e) {
e.printStackTrace();
}
} else if(link.indexOf(elong)>=0){
try {
new_ie.link("预订网站").click();
// table class="bookingTable"
telong = link + ")";
new_ie.link(telong).click();
String qunarprice = new_ie.table(
new AttributeFinder("class", "bookingTable"))
.text();
System.out.println(qunarprice);
new_ie.table(
new AttributeFinder("class", "bookingTable"))
.links().get(0).click();
int count = new_ie.childBrowserCount();
// System.out.println(new_ie.childBrowserCount());
ieelong = new_ie.childBrowser(count - 1);
// div class="taL left10_dbk2
} catch (Exception e) {
e.printStackTrace();
}
}
}
if(ieelong != null){
ieelong.waitUntilReady(10000);
// table class="border_2"
ieelong.waitUntilReady(20);
ieelong.div(new AttributeFinder("class", "taL left10_dbk2")).link(0).click();
//ieelong.executeScript("HotelDetails('50101472','rate','eLong')");
ieelong.waitUntilReady(10000);
//ieelong.div(new AttributeFinder("class", "taL left10_dbk2"));
//System.out.println("ieelong=" + ieelong.text());
// form id="HotSrch"
//ieelong.table();
System.out.println(ieelong.text());
//System.out.println(elongprice);
}
if(iectrip != null){
iectrip.waitUntilReady(10000);
// table class="pubGlobal_romList01"
String ctripprice = iectrip.table(new AttributeFinder("class","pubGlobal_romList01")).text();
System.out.println(ctripprice);
}
} catch (Exception e) {
e.printStackTrace();
} finally{
try {
if(ie != null)
ie.close();
if(new_ie != null)
new_ie.close();
if(ieelong != null)
ieelong.close();
if(iectrip != null)
iectrip.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
分享到:
评论
QQ:2024486
最近由于工作需要也得做过类似的spider。
kqy929@126.com