- 浏览: 376736 次
- 性别:
- 来自: 四川
文章分类
- 全部博客 (247)
- 数据库以及sql (11)
- java (48)
- 爬虫学习 (20)
- java编程 (28)
- python编程以及安装和配置 (27)
- oracle数据库 (32)
- mongodb基本操作 (4)
- linux学习 (7)
- easyUI (2)
- nodeJs (8)
- python学习 (16)
- 其他 (13)
- hadoop (1)
- svn (1)
- 汉字 (1)
- windows (2)
- jsp (5)
- fiddler (1)
- ETL (1)
- teamviewer (1)
- maven (4)
- GIT (2)
- datagrip (1)
- ocr (1)
- redis (3)
- mysql (3)
- linux (1)
- 数据服务 (0)
最新评论
package com.teamdev.jxbrowser.chromium.demo_lingshui.baidunuomi.shop;
import java.awt.BorderLayout;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import javax.swing.JFrame;
import javax.swing.WindowConstants;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.hyjx.common.CommonUtil;
import com.hyjx.orclJdbcUtil.JDBCUtils;
import com.teamdev.jxbrowser.chromium.Browser;
import com.teamdev.jxbrowser.chromium.BrowserPreferences;
import com.teamdev.jxbrowser.chromium.JSValue;
import com.teamdev.jxbrowser.chromium.LoggerProvider;
import com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent;
import com.teamdev.jxbrowser.chromium.events.LoadAdapter;
import com.teamdev.jxbrowser.chromium.swing.BrowserView;
/**
* 百度糯米抓 评论总数 (COMM_TOTAL ) 差评数 (NEGATIVE_NUM) 店铺
*
*/
public class ls_bdnm_shop{
public static void main(String[] args) throws Exception {
java.sql.Connection conOrcale = null;
try {
conOrcale = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps = null;
String sql1 = null;
String sql2 = null;
Statement st2 = null;
ResultSet rs2 = null;
Document doc = null;
try{
sql1 = " select * from ls_nm_shop_good t where t.plat_name ='百度糯米' and state is null ";
sql2 = "update ls_nm_shop_good set state = 'shop' , shop_url =? where good_url = ? ";
st2 = (java.sql.Statement) conOrcale.createStatement();
rs2 = st2.executeQuery(sql1);
ps = conOrcale.prepareStatement(sql2);
}catch (Exception e) {
e.printStackTrace();
}
int i = 1;
String gurl="";
for(;rs2.next();){
try {
Thread.sleep(1000);
} catch (Exception e) {
// TODO: handle exception
}
System.out.println(i);
i++;
final String good_url = rs2.getString("good_url");
String good_type = rs2.getString("good_type");
try {
gurl= search(good_url,good_type);
} catch (Exception e) {
// TODO Auto-generated catch block
System.out.println("该商品已经下线或者该店铺没有url");
continue;
}
//修改状态
ps.setString(1,gurl);
ps.setString(2,good_url);
ps.executeUpdate();
}
}
static String search(final String good_url ,String good_type)throws Exception{
System.out.println("url========="+good_url);
java.sql.Connection conOrcale2 = null;
try {
conOrcale2 = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps = null;
try{
sql = "insert into ls_nm_shop "+
" (shop_id, site_ent_id, platform_code,shop_name,shop_address,shop_url,tellphone,type) "+
" values " +
"( ? , ? , ? , ?, ? , ?, ?,?) ";
ps = conOrcale2.prepareStatement(sql);
}catch (Exception e) {
e.printStackTrace();
}
Document doc = null;
String text ="";
//httpclient
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet(good_url);
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
String ahtml = EntityUtils.toString(entity, "UTF-8");
httpget.releaseConnection();
String deal_id="";
if(ahtml.contains("article")){
deal_id=ahtml.substring(ahtml.indexOf("article")+8,ahtml.indexOf("section"));
// F.context("goods_id", "31217598");
deal_id=deal_id.substring(deal_id.lastIndexOf("=")+1,deal_id.length()-5);
}
if(ahtml.contains("goods_id")&&deal_id ==null){
deal_id=ahtml.substring(ahtml.indexOf("goods_id")+8,ahtml.indexOf("merchant_ids"));
// F.context("goods_id", "31217598");
deal_id=deal_id.substring(deal_id.lastIndexOf(",")+3);
deal_id=deal_id.substring(0,deal_id.lastIndexOf(")")-1);
}
System.out.println("deal_id="+deal_id);
System.out.println("https://www.nuomi.com/pcindex/main/shopchain?dealId="+deal_id);
//详细
doc = Jsoup.connect("https://www.nuomi.com/pcindex/main/shopchain?dealId="+deal_id).userAgent("Mozilla")
.header("method", "GET")
.header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Encoding:","gzip, deflate, sdch")
.header("Accept-Language","zh-CN,zh;q=0.8")
.header("Cache-Control","max-age=0")
.header("Connection","keep-alive")
.header("Host","lingshui.nuomi.com")
.header("Upgrade-Insecure-Requests","1")
.header("User-Agent","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36")
.ignoreContentType(true).timeout(200000).data()
.get();
//获取json
text = doc.text();
System.out.println(text);
JSONObject ojson = new JSONObject(text);
// JSONArray ajson = (JSONArray) ojson.get("data");
JSONObject oojson=(JSONObject) ojson.get("data");
String shop_name=(String) oojson.get("name");
String shop_url="https:"+(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("link");
String shop_address=(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("address");
String tellphone=(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("phone");//电话号码
System.out.println("店铺名称="+shop_name);
System.out.println("shop_url="+shop_url);
System.out.println("店铺地址 ="+shop_address);
System.out.println("电话="+tellphone);
try {
//(shop_id, site_ent_id, platform_code,shop_name,shop_address,shop_url,tellphone,type)
ps.setString(1,CommonUtil.getUUID32());
ps.setString(2,CommonUtil.getUUID32());
ps.setString(3,"010");
ps.setString(4,shop_name);
ps.setString(5,shop_address);
ps.setString(6,shop_url);
ps.setString(7,tellphone);
ps.setString(8,good_type);
ps.executeUpdate();
} catch (Exception e1) {
// TODO Auto-generated catch block
}
conOrcale2.close();
System.out.println("---------------------------------------------------------------------------------------------");
if(shop_url.length()>0){
return shop_url;
}
return null;
}
}
import java.awt.BorderLayout;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import javax.swing.JFrame;
import javax.swing.WindowConstants;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.hyjx.common.CommonUtil;
import com.hyjx.orclJdbcUtil.JDBCUtils;
import com.teamdev.jxbrowser.chromium.Browser;
import com.teamdev.jxbrowser.chromium.BrowserPreferences;
import com.teamdev.jxbrowser.chromium.JSValue;
import com.teamdev.jxbrowser.chromium.LoggerProvider;
import com.teamdev.jxbrowser.chromium.events.FinishLoadingEvent;
import com.teamdev.jxbrowser.chromium.events.LoadAdapter;
import com.teamdev.jxbrowser.chromium.swing.BrowserView;
/**
* 百度糯米抓 评论总数 (COMM_TOTAL ) 差评数 (NEGATIVE_NUM) 店铺
*
*/
public class ls_bdnm_shop{
public static void main(String[] args) throws Exception {
java.sql.Connection conOrcale = null;
try {
conOrcale = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps = null;
String sql1 = null;
String sql2 = null;
Statement st2 = null;
ResultSet rs2 = null;
Document doc = null;
try{
sql1 = " select * from ls_nm_shop_good t where t.plat_name ='百度糯米' and state is null ";
sql2 = "update ls_nm_shop_good set state = 'shop' , shop_url =? where good_url = ? ";
st2 = (java.sql.Statement) conOrcale.createStatement();
rs2 = st2.executeQuery(sql1);
ps = conOrcale.prepareStatement(sql2);
}catch (Exception e) {
e.printStackTrace();
}
int i = 1;
String gurl="";
for(;rs2.next();){
try {
Thread.sleep(1000);
} catch (Exception e) {
// TODO: handle exception
}
System.out.println(i);
i++;
final String good_url = rs2.getString("good_url");
String good_type = rs2.getString("good_type");
try {
gurl= search(good_url,good_type);
} catch (Exception e) {
// TODO Auto-generated catch block
System.out.println("该商品已经下线或者该店铺没有url");
continue;
}
//修改状态
ps.setString(1,gurl);
ps.setString(2,good_url);
ps.executeUpdate();
}
}
static String search(final String good_url ,String good_type)throws Exception{
System.out.println("url========="+good_url);
java.sql.Connection conOrcale2 = null;
try {
conOrcale2 = JDBCUtils.getConnection();
} catch (SQLException e1) {
e1.printStackTrace();
}
String sql = null;
PreparedStatement ps = null;
try{
sql = "insert into ls_nm_shop "+
" (shop_id, site_ent_id, platform_code,shop_name,shop_address,shop_url,tellphone,type) "+
" values " +
"( ? , ? , ? , ?, ? , ?, ?,?) ";
ps = conOrcale2.prepareStatement(sql);
}catch (Exception e) {
e.printStackTrace();
}
Document doc = null;
String text ="";
//httpclient
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet(good_url);
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
String ahtml = EntityUtils.toString(entity, "UTF-8");
httpget.releaseConnection();
String deal_id="";
if(ahtml.contains("article")){
deal_id=ahtml.substring(ahtml.indexOf("article")+8,ahtml.indexOf("section"));
// F.context("goods_id", "31217598");
deal_id=deal_id.substring(deal_id.lastIndexOf("=")+1,deal_id.length()-5);
}
if(ahtml.contains("goods_id")&&deal_id ==null){
deal_id=ahtml.substring(ahtml.indexOf("goods_id")+8,ahtml.indexOf("merchant_ids"));
// F.context("goods_id", "31217598");
deal_id=deal_id.substring(deal_id.lastIndexOf(",")+3);
deal_id=deal_id.substring(0,deal_id.lastIndexOf(")")-1);
}
System.out.println("deal_id="+deal_id);
System.out.println("https://www.nuomi.com/pcindex/main/shopchain?dealId="+deal_id);
//详细
doc = Jsoup.connect("https://www.nuomi.com/pcindex/main/shopchain?dealId="+deal_id).userAgent("Mozilla")
.header("method", "GET")
.header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Encoding:","gzip, deflate, sdch")
.header("Accept-Language","zh-CN,zh;q=0.8")
.header("Cache-Control","max-age=0")
.header("Connection","keep-alive")
.header("Host","lingshui.nuomi.com")
.header("Upgrade-Insecure-Requests","1")
.header("User-Agent","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36")
.ignoreContentType(true).timeout(200000).data()
.get();
//获取json
text = doc.text();
System.out.println(text);
JSONObject ojson = new JSONObject(text);
// JSONArray ajson = (JSONArray) ojson.get("data");
JSONObject oojson=(JSONObject) ojson.get("data");
String shop_name=(String) oojson.get("name");
String shop_url="https:"+(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("link");
String shop_address=(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("address");
String tellphone=(String) ((JSONObject) ((JSONArray) oojson.get("shop")).get(0)).get("phone");//电话号码
System.out.println("店铺名称="+shop_name);
System.out.println("shop_url="+shop_url);
System.out.println("店铺地址 ="+shop_address);
System.out.println("电话="+tellphone);
try {
//(shop_id, site_ent_id, platform_code,shop_name,shop_address,shop_url,tellphone,type)
ps.setString(1,CommonUtil.getUUID32());
ps.setString(2,CommonUtil.getUUID32());
ps.setString(3,"010");
ps.setString(4,shop_name);
ps.setString(5,shop_address);
ps.setString(6,shop_url);
ps.setString(7,tellphone);
ps.setString(8,good_type);
ps.executeUpdate();
} catch (Exception e1) {
// TODO Auto-generated catch block
}
conOrcale2.close();
System.out.println("---------------------------------------------------------------------------------------------");
if(shop_url.length()>0){
return shop_url;
}
return null;
}
}
- httpclient_jar.zip (1.7 MB)
- 下载次数: 1
发表评论
-
采集ymx商品信息
2018-08-29 15:28 530public static void main(String[ ... -
根据百度加密url, 获取真实url
2018-08-06 10:46 0public static void main(String[ ... -
fiddler 抓取 手机https 数据包 完美解决
2018-08-03 12:43 3057转:https://jingyan.baidu.com ... -
无忧代理IP
2018-07-30 16:19 0http://api.ip.data5u.com/dynami ... -
无所不能的四种请求方式(天下武功为怪不破)
2018-07-26 11:50 865package com.teamdev.jxbrowser.c ... -
post请求加json参数方式二
2018-07-24 14:02 2110package com.teamdev.jxbrowser.c ... -
post请求加json参数或xml参数
2018-07-20 18:21 1274import java.io.BufferedReader; ... -
jd编号
2017-11-02 17:59 557京东店铺:https://mall.jd.com/index- ... -
清除google缓存
2017-10-26 11:20 7081.进入 chrome://net-internals/#dn ... -
adb server is out of date. killing... 本地连接夜神模拟器失败、超时
2017-06-19 10:43 1522原因:1.模拟器器设置了代理;2.主要是模拟器和sdk 的ad ... -
fiddler抓取的https请求 数据乱码问题解决方案
2017-06-09 09:44 7298参考地址:http://blog.csdn.net/SomeO ... -
chromium.Browser 禁止加载图片,提升加载速度
2017-05-25 11:29 1213package com.teamdev.jxbrowser.c ... -
chrome 禁止加载网页图片 解决办法
2017-05-25 11:18 23571.在谷歌浏览器中输入:chrome://settings/c ... -
开源爬虫框架的优缺点?
2017-03-22 14:46 832开源爬虫框架各有什么优缺点? 作者:老夏 开发网络爬 ... -
反爬虫
2017-03-17 10:34 7001. 伪装user agent User agen ... -
爬虫被封禁原因
2017-03-17 09:37 1562爬虫被封禁常见原因 1. ... -
App数据抓取
2017-03-17 09:32 1119思路: 通过fiddler抓包 ... -
八爪鱼规则学习
2017-03-08 13:44 8601.八爪鱼采集器是任何一个需要从网页获取信息的人都必备的采集工 ... -
java模拟jquery请求动作(模拟点击、选择下拉)
2016-12-29 14:40 4092package com.teamdev.jxbrowser.c ... -
jsoup + json 解析网页
2016-11-01 16:25 1310package com.teamdev.jxbrowser.c ...
相关推荐
《深入解析httpclient.jar及其与code.jar的关联》 在Java开发中,HTTP通信是不可或缺的一部分,而Apache HttpClient库正是Java实现HTTP客户端操作的重要工具。本文将深入探讨httpclient.jar包,以及它与code.jar包...
### Httpclient官网教程中文版知识点总结 #### 一、引言 HTTP协议作为互联网的核心通信标准之一,在现代网络服务及物联网设备中扮演着至关重要的角色。随着技术的发展,越来越多的应用和服务依赖于HTTP协议来实现...
HttpClient 4.2.1版本引入了一些重要的改进和修复,以提高性能和稳定性。以下是一些关键特性: 1. **连接管理**:HttpClient 4.2.1引入了更完善的连接管理机制,允许开发者控制连接的创建、复用和关闭。`...
例如,在HttpClient 3.x中,代码可能会使用`***mons.httpclient.HttpClient`类和`***mons.httpclient.methods.GetMethod`等,而在4.x版本中,这些都被新的API所替代。程序员需要熟悉`org.apache....
赠送jar包:httpclient-4.2.5.jar; 赠送原API文档:httpclient-4.2.5-javadoc.jar; 赠送源代码:httpclient-4.2.5-sources.jar; 赠送Maven依赖信息文件:httpclient-4.2.5.pom; 包含翻译后的API文档:httpclient...
本文将深入探讨HttpClient 4.2.1的核心特性和使用方法,帮助开发者更好地理解和应用这个强大的工具。 一、HttpClient简介 HttpClient是一个开放源码的Java库,由Apache软件基金会维护。它为Java程序员提供了一个...
HttpClient httpClient = new HttpClient(); // 设置 Http 连接超时为5秒 httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000); /* 2 生成 GetMethod 对象并设置参数 */ GetMethod ...
赠送jar包:httpclient-4.5.6.jar; 赠送原API文档:httpclient-4.5.6-javadoc.jar; 赠送源代码:httpclient-4.5.6-sources.jar; 赠送Maven依赖信息文件:httpclient-4.5.6.pom; 包含翻译后的API文档:httpclient...
本篇文章将深入探讨如何使用HttpClient方式调用URL,以及相关的知识点。 首先,HttpClient允许我们构建复杂的HTTP请求,包括GET、POST以及其他HTTP方法。使用HttpClient调用URL的基本步骤包括创建HttpClient实例、...
HttpClientHelper 对这个类进行了封装,使得开发者无需直接与HttpClient接口打交道,而是通过更简洁、易用的方法调用来实现网络通信。这提高了代码的可读性和可维护性。 单例模式是软件设计模式的一种,确保一个类...
HttpClient 4.13版本是这个库的一个较新版本,包含了一系列的改进和修复。 在Java开发中,HttpClient是一个常用的工具,尤其在处理Web服务或者API调用时。它支持同步和异步操作,可以处理复杂的HTTP协议细节,使...
这个实例主要涉及如何配置HttpClient来忽略SSL(Secure Socket Layer)验证,这对于在开发和测试环境中处理自签名证书或未认证的服务器非常有用。以下将详细介绍HttpClient的使用以及如何进行SSL验证的忽略。 首先...
《HttpClient 4.5详解与应用实践》 HttpClient是一个开源的Java库,由Apache软件基金会维护,主要用于在HTTP协议上实现客户端的通信。版本4.5是HttpClient的一个稳定版本,提供了许多增强的功能和优化,使其成为...
本压缩包文件"httpClient"很可能包含了HttpClient库所需的必备JAR文件,这些文件通常包括HttpClient的核心库、依赖的第三方库以及可能的扩展模块。为了正确使用HttpClient,你需要确保将这些JAR文件添加到你的项目类...
《深入理解HTTPClient 4.5及其依赖》 在Java编程世界中,HTTPClient是一个非常重要的库,它允许开发者执行HTTP请求并处理响应。本文将深入探讨`httpclient-4.5.jar`这个包,以及它所依赖的相关jar包,帮助你更好地...
在本文中,我们将深入探讨如何使用HttpClient调用WebService。 首先,调用WebService通常涉及SOAP(Simple Object Access Protocol)或RESTful API。HttpClient可以处理这两种类型的Web服务。在本示例中,我们假设...
赠送jar包:httpclient-4.5.13.jar; 赠送原API文档:httpclient-4.5.13-javadoc.jar; 赠送源代码:httpclient-4.5.13-sources.jar; 赠送Maven依赖信息文件:httpclient-4.5.13.pom; 包含翻译后的API文档:...
赠送jar包:httpclient-4.5.5.jar; 赠送原API文档:httpclient-4.5.5-javadoc.jar; 赠送源代码:httpclient-4.5.5-sources.jar; 包含翻译后的API文档:httpclient-4.5.5-javadoc-API文档-中文(简体)版.zip ...
### HTTPClient知识点详解 #### 1. HttpClient4 – 获取状态码 **1.1 概览** 本节将详细介绍如何使用HttpClient 4.x版本来获取HTTP响应的状态码,并对其进行验证。这对于开发人员来说是一个非常实用的功能,可以...