主要是运用httpclient+htmlparser。
程序写得比较粗糙,抽空会写得更加完善
/**
* @author Tony Shen
*
*/
public class CompareBook {
private String bookName;
private String author;
private String publish;
private String originalPrice;
private String price;
private String desc;
private String publishDate;
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getPublish() {
return publish;
}
public void setPublish(String publish) {
this.publish = publish;
}
public String getOriginalPrice() {
return originalPrice;
}
public void setOriginalPrice(String originalPrice) {
this.originalPrice = originalPrice;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取当当图书的信息
* @author Tony Shen
*
*/
public class FecthDangDang {
private String bookName;
public FecthDangDang(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://search.dangdang.com/search.php?catalog=&key="
+ paramStr + "&SearchFromTop=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "list_r_list");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "list_r_list_h4_info3");
NodeList nodeList2 = P(nodelist, "class", "gray del");
NodeList nodeList3 = P(nodelist, "class", "red");
Node dateNode = nodeList1.elementAt(0);
Span datelink = (Span) dateNode;
book.setPublishDate(datelink.toPlainTextString());
Node originalNode = nodeList2.elementAt(0);
Span originallink = (Span) originalNode;
book.setOriginalPrice(originallink.toPlainTextString());
Node priceNode = nodeList3.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
filter = new NodeClassFilter(LinkTag.class);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(1);
LinkTag desclink = (LinkTag) descNode;
book.setDesc(desclink.getLinkText());
Node nameNode = nodelist.elementAt(4);
LinkTag namelink = (LinkTag) nameNode;
book.setAuthor(namelink.getLinkText());
Node publishingNode = nodelist.elementAt(5);
LinkTag publishinglink = (LinkTag) publishingNode;
book.setPublish(publishinglink.getLinkText());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取卓越图书的信息
* @author Tony Shen
*
*/
public class FetchZhuoYue {
private String bookName;
public FetchZhuoYue(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.amazon.cn/s/ref=nb_ss?url=search-alias%3Dbooks&keywords="
+ paramStr + "&Go.x=15&Go.y=13&searchKind=name";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "n2");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "saleprice");
Node priceNode = nodeList1.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
/**
* 抓取China-Pub图书的信息
* @author Tony Shen
*
*/
public class FecthChinaPub {
private String bookName;
public FecthChinaPub(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.china-pub.com/s/?key1="
+ paramStr + "&type=&pz=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "listview");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
filter = new TagNameFilter("ul");
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(0);
String[] strsStrings = descNode.toPlainTextString().trim().split("\\s+");
book.setPrice(strsStrings[0]);
return book;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
/**
* @author Tony Shen
*
*/
public class ComparePrice {
private static String bookName = "我的奋斗";
public static void main(String[] args) {
FecthDangDang dd = new FecthDangDang(bookName);
FetchZhuoYue zy = new FetchZhuoYue(bookName);
FecthChinaPub cp = new FecthChinaPub(bookName);
try {
String responseDD = dd.getResponse();
CompareBook book1 = dd.fetchData(responseDD);
System.out.println("++++++当当抓取结果+++++");
System.out.println("书名:"+book1.getBookName());
System.out.println("作者:"+book1.getAuthor());
System.out.println("出版社:"+book1.getPublish());
System.out.println("原价:"+book1.getOriginalPrice());
System.out.println("现价:"+book1.getPrice());
System.out.println("描述:"+book1.getDesc());
System.out.println(book1.getPublishDate());
String responseZY = zy.getResponse();
CompareBook book2 = zy.fetchData(responseZY);
System.out.println("++++++卓越抓取结果+++++");
System.out.println("书名:"+book2.getBookName());
System.out.println("现价:"+book2.getPrice());
String responseCP = cp.getResponse();
CompareBook book3 = cp.fetchData(responseCP);
System.out.println("++++++China-Pub抓取结果+++++");
System.out.println("书名:"+book3.getBookName());
System.out.println("现价:"+book3.getPrice());
} catch (Exception e) {
e.printStackTrace();
}
}
}
程序的运行结果
- 描述: 结构
- 大小: 28.2 KB
分享到:
相关推荐
总之,Sharding-JDBC以其简单高效的设计理念、高度兼容性以及出色的性能表现,在数据库分库分表领域内占据了一席之地,对于需要解决大规模数据处理问题的企业来说,是一个值得关注和尝试的选择。
在本项目中,我们主要探讨的是一个名为"当当网----练习小项目"的实践案例。这个项目旨在模仿知名电商平台当当网的功能,为初学者提供一个锻炼编程技能的平台。作者强调这是其个人编写的代码,可能存在不足,期待得到...
标题中的“当当图书榜”指的是当当网的图书排行榜,这是一个反映当下畅销书籍的榜单,通常包括各类图书的销售排名。在这个项目中,“当当图书榜html”表示使用HTML(超文本标记语言)来创建了一个展示这个排行榜的...
【标题】"山寨版--当当网源程序"揭示了一个基于Java Web技术的网上购物平台的实现,旨在模仿知名电商网站当当网的功能与用户体验。这个项目采用了经典的Java技术栈,包括JavaServer Pages (JSP)、Struts框架以及...
当当-林嘉琦-APM在当当的实践
本书由China-pub首发,目前在当当和淘宝上都有卖。价格低至7.5折,比较实惠。大家先下载试读版本看看,也可以去书店逛逛,看看书写的如何。相关网址如下: China-Pub: http://www.china-pub.com/197674 当当:...
每次调用时都会生成一个基于当前时间戳的新URL,从而确保每次请求都返回一个新的图片。 - **窗口操作**: - `function showyou()` 和 `function MeToMe()`:这些函数用于显示提示信息,并关闭当前窗口。这在表单...
综上所述,这个基于HTML的当当图书网项目是一个全面而实用的学习资源,它不仅涵盖了基本的HTML、CSS和JavaScript知识,还涉及到了实际网页设计和开发过程中需要注意的细节问题。通过完成这样一个项目,学生不仅可以...
这个项目旨在提供一个便捷的、易于操作的网上购物平台,模仿或复现了当当网的部分功能。下面我们将详细探讨JSP技术和MySQL数据库在构建此类网站中的应用。 **JSP技术** JSP是Java平台上的服务器端脚本语言,用于...
京东 当当 卓越价格变化监控器(三合一)2.3
Struts2是一个强大的Java web应用程序框架,用于构建和维护可扩展、结构清晰的MVC(模型-视图-控制器)应用程序。在这个名为“当当网实现”的项目中,开发者使用Struts2框架来构建了一个类似于当当网的电子商务平台...
当当网web项目,包含技术及相应框架:html、css、jsp、bootstrap、javaScript、JQuery、maven、tomcat、JDK1.8、SpringMVC、Spring、mybatis、Mysql等...(解压出来sql文件在主目录下)
【标题】"当当网购书系统--JAVA开发"是一个基于JAVA技术栈的电子商务项目,旨在模拟实际中的在线购书流程,提供用户浏览、搜索、购买书籍等服务。这个项目是作者在培训期间完成的,虽然可能在代码质量或设计上存在...
这是一个当当网主页的HTML+CSS代码实现,希望对大家有用
标题中的“学习SSH写的一个类似当当网系统”指的是基于SSH(Struts、Hibernate、Spring)框架开发的一个模仿当当网功能的项目。这个系统旨在为开发者提供一个学习和实践的平台,帮助他们理解如何在实际开发中运用...
Sharding-JDBC是由当当网开源的一个分布式数据库解决方案,它在保持传统关系型数据库的基础上,通过分库分表、读写分离等技术手段,以保证数据库的可扩展性、高性能和高可用性。Sharding-JDBC的一个重要特性是它不...
"html作业 当当图书榜" 这个标题表明了这是一个与HTML相关的学习项目,具体是模拟制作当当网上的图书排行榜页面。当当网是中国知名的在线图书销售平台,其图书榜通常会展示各类畅销书籍,包括但不限于小说、教辅、...
在本练习中,我们将探索"当当图书榜"的数据,这是一个关于畅销图书的排行榜,可能包含各类书籍的信息,如书名、作者、出版社、销量等。这个数据集可以帮助我们了解当前图书市场的趋势,分析读者的阅读偏好,以及为...
3、在页面的右侧有一个随滚动条上下移动的广告图片,并且图片上方有一个“关闭”按钮,单击“关闭”按钮,图片和“关闭”按钮均隐藏 4、页面中间的特效是带数字按钮的循环显示的图片广告,六张图片按规定的时间间隔...