package com.sxit; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Scanner; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.HttpClient; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; /** * @功能:单线程 抓取新浪微博 相册图片 * @作者: smile * @时间:2013-2-3 下午10:56:33 * @版本:1.0 */ public class MoPic { private final static HttpClient client = new DefaultHttpClient(); // 存放图片地址 private static List<String> picList = new ArrayList<String>(); public static void main(String[] args) { try { Scanner scan = new Scanner(System.in); System.out.println("请输入你的用户名:"); String username = scan.nextLine(); System.out.println("请输入你的密码:"); String password = scan.nextLine(); System.out.println("请输入目标用户的用户名:"); String targetname = scan.nextLine(); System.out.println("请输入需要下载的相片数量:"); int count = Integer.parseInt(scan.nextLine()); //登入 login(username, password, targetname, count); //下载 upload(targetname); } catch (IOException e) { e.printStackTrace(); } catch (JSONException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } /** * @功能:下载图片 * @时间:2013-2-4 上午11:00:37 */ public static void upload(String targetname) throws Exception { InputStream is = null; OutputStream os = null; URL url = null; HttpURLConnection con = null; // 判断保存路径是否存在 不存在则新建文件夹 File f = new File("E:\\tmp\\"+targetname); if (!f.exists()) { f.mkdir(); } if (picList != null) { for (int i = picList.size() - 1; i >= 0; i--) { try { String p_url = picList.get(i); if (p_url != null && !"".equals(p_url)) { url = new URL(p_url); //截取后缀 int index = p_url.lastIndexOf("."); System.out.println("索引位:"+index); String pos = p_url.substring(index); System.out.println("后缀为:"+pos); con = (HttpURLConnection) url.openConnection(); // 设置连接超时 con.setConnectTimeout(100 * 1000); // 设置读取超时 con.setReadTimeout(100 * 1000); is = new BufferedInputStream(con.getInputStream()); os = new BufferedOutputStream(new FileOutputStream(new File("E:/tmp/"+targetname+"/" + i + pos))); byte[] b = new byte[1024]; int length = 0; while ((length = is.read(b)) != -1) { os.write(b, 0, length); } os.flush(); System.out.println("下载完第" + i + "张图"); } }catch (Exception e) { continue; } } }else{ System.out.println("无相片信息!"); } } // 登入新浪微博 public static void login(String username, String password, String targetName, int pCount) throws IOException, JSONException { HttpPost post = new HttpPost("http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)"); post.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0"); post.setHeader("Referer", "http://weibo.com/"); post.setHeader("Content-Type", "application/x-www-form-urlencoded"); String data = getServerTime(); String nonce = makeNonce(6); // 登录表单的信息 List<NameValuePair> qparams = new ArrayList<NameValuePair>(); qparams.add(new BasicNameValuePair("entry", "weibo")); qparams.add(new BasicNameValuePair("gateway", "1")); qparams.add(new BasicNameValuePair("from", "")); qparams.add(new BasicNameValuePair("savestate", "0")); qparams.add(new BasicNameValuePair("useticket", "1")); qparams.add(new BasicNameValuePair("pagerefer", "")); qparams.add(new BasicNameValuePair("service", "miniblog")); qparams.add(new BasicNameValuePair("servertime", data)); qparams.add(new BasicNameValuePair("nonce", nonce)); qparams.add(new BasicNameValuePair("pwencode", "wsse")); qparams.add(new BasicNameValuePair("encoding", "UTF-8")); qparams.add(new BasicNameValuePair("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack")); qparams.add(new BasicNameValuePair("returntype", "META")); // 用户名处理 qparams.add(new BasicNameValuePair("su", encodeAccount(username))); qparams.add(new BasicNameValuePair("sp", new SinaSSOEncoder().encode(password, data, nonce))); UrlEncodedFormEntity params = new UrlEncodedFormEntity(qparams, "utf-8"); post.setEntity(params); HttpResponse response = client.execute(post); String entity = EntityUtils.toString(response.getEntity()); System.out.println("entity为:" + entity); String url = entity.substring(entity.indexOf("http%3A%2F%2Fweibo.com%2Fajaxlogin.php"), entity.indexOf("code=0") + 6); url = URLDecoder.decode(url); System.out.println("真实地址为:" + url); // 获取到实际url进行连接 HttpGet getMethod = new HttpGet(url); response = client.execute(getMethod); entity = EntityUtils.toString(response.getEntity()); System.out.println("----->>>" + entity); entity = entity.substring(entity.indexOf("userdomain") + 13, entity.lastIndexOf("\"")); System.out.println("......." + entity); getMethod = new HttpGet("http://weibo.com/" + entity); response = client.execute(getMethod); String uid = EntityUtils.toString(response.getEntity()); uid = uid.substring(uid.indexOf("oid") + 9, uid.lastIndexOf("$CONFIG['onick']") - 3); // 这里获取的是登入用户的uid System.out.println(uid); // 这里去访问别的用户的微博 输入用户名 比如:bearsun getMethod = new HttpGet("http://weibo.com/" + targetName); response = client.execute(getMethod); String pid = EntityUtils.toString(response.getEntity()); pid = pid.substring(pid.indexOf("oid") + 9, pid.lastIndexOf("$CONFIG['onick']") - 3); // 访问目标用户的pid System.out.println(pid); // 这里只取微博配图中的图片http://photo.weibo.com/1511804135/talbum/index?from=profile_wb getMethod = new HttpGet("http://photo.weibo.com/" + pid + "/talbum/index?from=profile_wb"); response = client.execute(getMethod); String albumId = EntityUtils.toString(response.getEntity()); albumId = albumId.substring(albumId.indexOf("album_id") + 9, albumId.indexOf("album_info") - 36); // 相册id System.out.println(albumId); // http://ww3.sinaimg.cn/mw690/6fb242fdjw1dzke8vygnwj.jpg // http://photo.weibo.com/photos/get_all?uid=1511804135&album_id=14503807&count=32&page=1&type=3 getMethod = new HttpGet("http://photo.weibo.com/photos/get_all?uid=" + pid + "&album_id=" + albumId + "&count=" + pCount + "&page=1&type=3"); response = client.execute(getMethod); // 返回的是一个json数组 entity = EntityUtils.toString(response.getEntity()); JSONObject a = new JSONObject(entity); // 获取图片信息json数组 System.out.println(a.get("data").toString()); JSONArray list = new JSONObject(a.get("data").toString()).getJSONArray("photo_list"); for (int i = 0; i < list.length(); i++) { JSONObject temp = (JSONObject) list.get(i); String pic_name = "http://ww3.sinaimg.cn/mw690/" + temp.getString("pic_name"); System.out.println(pic_name); picList.add(pic_name); } } // 登入账号处理 private static String encodeAccount(String account) { String userName = ""; try { userName = Base64.encodeBase64String(URLEncoder.encode(account, "UTF-8").getBytes()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return userName; } private static String makeNonce(int len) { String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; String str = ""; for (int i = 0; i < len; i++) { str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x.length())); } return str; } private static String getServerTime() { long servertime = new Date().getTime() / 1000; return String.valueOf(servertime); } }
package com.sxit; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Scanner; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.HttpClient; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; /** * @功能:多线程抓取新浪微博 相册图片 * @作者: smile * @时间:2013-2-3 下午10:56:33 * @版本:1.0 */ public class MoPicThread { private final static HttpClient client = new DefaultHttpClient(); // 存放图片地址 private static List<String> picList = new ArrayList<String>(); public static void main(String[] args) { try { Scanner scan = new Scanner(System.in); System.out.println("请输入你的用户名:"); String username = scan.nextLine(); System.out.println("请输入你的密码:"); String password = scan.nextLine(); System.out.println("请输入目标用户的用户名:"); String targetname = scan.nextLine(); System.out.println("请输入需要下载的相片数量:"); int count = Integer.parseInt(scan.nextLine()); // 登入 login(username, password, targetname, count); // 下载 upload(targetname); } catch (IOException e) { e.printStackTrace(); } catch (JSONException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } /** * @功能:下载图片 * @时间:2013-2-4 上午11:00:37 */ public static void upload(String targetname) throws Exception { // 建立线程池 ExecutorService executor = Executors.newFixedThreadPool(10); // 判断保存路径是否存在 不存在则新建文件夹 File f = new File("E:\\tmp\\" + targetname); if (!f.exists()) { f.mkdir(); } if (picList != null) { // 将图片分段下载 for (int i = 1,count = picList.size() / 20; i <= count; i++) { int start = (i - 1) * 20; int end = 0; if (i != 20) { end = i*20-1; } else { end = picList.size()-1; } ImagThread thread = new ImagThread(start, end, targetname); executor.submit(thread); } executor.shutdown(); } else { System.out.println("无相片信息!"); } } /** * @功能:多线程下载图片到本地 * @时间:2013-2-4 下午1:59:02 */ static class ImagThread implements Runnable { // 起始 private int start; // 终止 private int end; // 目标用户名 private String targetname; public ImagThread(int start, int end, String targetname) { this.start = start; this.end = end; this.targetname = targetname; } public void run() { for (int i = start; i <= end; i++) { try { uploadImag(i, targetname, picList.get(i)); } catch (Exception e) { System.out.println("第" + i + "张图片下载失败,地址为:" + picList.get(i)); continue; } System.out.println("线程"+Thread.currentThread().getName()+"下载完第"+i+"张图片"); } } } /** * @功能:下载单个图片到本地 */ public static void uploadImag(int i, String targetname, String p_url) throws Exception { InputStream is = null; OutputStream os = null; URL url = null; HttpURLConnection con = null; try { url = new URL(p_url); // 截取后缀 int index = p_url.lastIndexOf("."); String pos = p_url.substring(index); con = (HttpURLConnection) url.openConnection(); // 设置连接超时 con.setConnectTimeout(100 * 1000); // 设置读取超时 con.setReadTimeout(100 * 1000); is = new BufferedInputStream(con.getInputStream()); os = new BufferedOutputStream(new FileOutputStream(new File("E:/tmp/" + targetname + "/" + i + pos))); byte[] b = new byte[1024]; int length = 0; while ((length = is.read(b)) != -1) { os.write(b, 0, length); } os.flush(); } finally { is.close(); os.close(); } } // 登入新浪微博 public static void login(String username, String password, String targetName, int pCount) throws IOException, JSONException { HttpPost post = new HttpPost("http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)"); post.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0"); post.setHeader("Referer", "http://weibo.com/"); post.setHeader("Content-Type", "application/x-www-form-urlencoded"); String data = getServerTime(); String nonce = makeNonce(6); // 登录表单的信息 List<NameValuePair> qparams = new ArrayList<NameValuePair>(); qparams.add(new BasicNameValuePair("entry", "weibo")); qparams.add(new BasicNameValuePair("gateway", "1")); qparams.add(new BasicNameValuePair("from", "")); qparams.add(new BasicNameValuePair("savestate", "0")); qparams.add(new BasicNameValuePair("useticket", "1")); qparams.add(new BasicNameValuePair("pagerefer", "")); qparams.add(new BasicNameValuePair("service", "miniblog")); qparams.add(new BasicNameValuePair("servertime", data)); qparams.add(new BasicNameValuePair("nonce", nonce)); qparams.add(new BasicNameValuePair("pwencode", "wsse")); qparams.add(new BasicNameValuePair("encoding", "UTF-8")); qparams.add(new BasicNameValuePair("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack")); qparams.add(new BasicNameValuePair("returntype", "META")); // 用户名处理 qparams.add(new BasicNameValuePair("su", encodeAccount(username))); qparams.add(new BasicNameValuePair("sp", new SinaSSOEncoder().encode(password, data, nonce))); UrlEncodedFormEntity params = new UrlEncodedFormEntity(qparams, "utf-8"); post.setEntity(params); HttpResponse response = client.execute(post); String entity = EntityUtils.toString(response.getEntity()); System.out.println("entity为:" + entity); String url = entity.substring(entity.indexOf("http%3A%2F%2Fweibo.com%2Fajaxlogin.php"), entity.indexOf("code=0") + 6); url = URLDecoder.decode(url); System.out.println("真实地址为:" + url); // 获取到实际url进行连接 HttpGet getMethod = new HttpGet(url); response = client.execute(getMethod); entity = EntityUtils.toString(response.getEntity()); System.out.println("----->>>" + entity); entity = entity.substring(entity.indexOf("userdomain") + 13, entity.lastIndexOf("\"")); System.out.println("......." + entity); getMethod = new HttpGet("http://weibo.com/" + entity); response = client.execute(getMethod); String uid = EntityUtils.toString(response.getEntity()); uid = uid.substring(uid.indexOf("oid") + 9, uid.lastIndexOf("$CONFIG['onick']") - 3); // 这里获取的是登入用户的uid System.out.println(uid); // 这里去访问别的用户的微博 输入用户名 比如:bearsun getMethod = new HttpGet("http://weibo.com/" + targetName); response = client.execute(getMethod); String pid = EntityUtils.toString(response.getEntity()); pid = pid.substring(pid.indexOf("oid") + 9, pid.lastIndexOf("$CONFIG['onick']") - 3); // 访问目标用户的pid System.out.println(pid); // 这里只取微博配图中的图片http://photo.weibo.com/1511804135/talbum/index?from=profile_wb getMethod = new HttpGet("http://photo.weibo.com/" + pid + "/talbum/index?from=profile_wb"); response = client.execute(getMethod); String albumId = EntityUtils.toString(response.getEntity()); albumId = albumId.substring(albumId.indexOf("album_id") + 9, albumId.indexOf("album_info") - 36); // 相册id System.out.println(albumId); // http://ww3.sinaimg.cn/mw690/6fb242fdjw1dzke8vygnwj.jpg // http://photo.weibo.com/photos/get_all?uid=1511804135&album_id=14503807&count=32&page=1&type=3 getMethod = new HttpGet("http://photo.weibo.com/photos/get_all?uid=" + pid + "&album_id=" + albumId + "&count=" + pCount + "&page=1&type=3"); response = client.execute(getMethod); // 返回的是一个json数组 entity = EntityUtils.toString(response.getEntity()); JSONObject a = new JSONObject(entity); // 获取图片信息json数组 System.out.println(a.get("data").toString()); JSONArray list = new JSONObject(a.get("data").toString()).getJSONArray("photo_list"); for (int i = 0; i < list.length(); i++) { JSONObject temp = (JSONObject) list.get(i); String pic_name = "http://ww3.sinaimg.cn/mw690/" + temp.getString("pic_name"); System.out.println(pic_name); picList.add(pic_name); } } // 登入账号处理 private static String encodeAccount(String account) { String userName = ""; try { userName = Base64.encodeBase64String(URLEncoder.encode(account, "UTF-8").getBytes()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return userName; } private static String makeNonce(int len) { String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; String str = ""; for (int i = 0; i < len; i++) { str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x.length())); } return str; } private static String getServerTime() { long servertime = new Date().getTime() / 1000; return String.valueOf(servertime); } }
相关推荐
【Python-新浪微博相册大图多线程爬虫】是一个基于Python编程语言的项目,用于高效地抓取新浪微博用户相册中的大尺寸图片。在Web爬虫领域,它利用了Python的强大功能,特别是对于网络数据抓取和多线程处理的优势。 ...
在本资源中,我们主要探讨的是如何利用Python编程语言实现一个针对新浪微博的网络爬虫,目的是抓取微博数据,包括微博的文字内容、图片以及视频。爬虫是数据挖掘的重要工具,它能自动化地从互联网上收集信息,对于...
《新浪微博图片批量下载工具——荔枝新浪微博图片批量下载助手 v1.0详解》 在数字化信息时代,社交媒体成为了人们分享生活、获取资讯的重要平台,而图片作为直观表达信息的方式,其重要性不言而喻。新浪微博作为...
微商用户在浏览新闻、微博、QQ空间说说、档口相册或者淘宝商品的时候看到好的图片素材,想要抓取到朋友圈,可以直接使用果子抓图神器。 添加任意网页打开,点击抓取即可一键抓取该网页的所有图文,用户可以手动选择...
本程序采用ThinkSNS内核,...3.针对中小站长、淘宝客、网店设计,提供商品信息抓取、图片抓取等必备功能 4.URL伪静态,增强搜索引擎收录,有效提高SEO效果 5.淘宝商品图片远程调用,节省本地空间 6.强大的采集工具
功能简介 ... ... 目前主要功能: 1.淘宝客,一键淘点金 ...3.针对中小站长、淘宝客、网店设计,提供商品信息抓取、图片抓取等必备功能 4.URL伪静态,增强搜索引擎收录,有效提高SEO效果 5.强大的采集工具
这是记事狗微博系统最新官方版,记事狗微博系统,是一套创新的PHP开源微博程序,兼有BBS和轻博系统特性,支持短信、手机客户端,可与新浪微博平台内容互通,既可用来独立建站也可通过Ucenter与已有网站整合,通过...
支持QQ、新浪微博、帐号一键登录 B2Bbuilder已有模块列表: 企业模块 VIP等级体系、认证体系、自动生成二级域名、可为商铺绑定顶级域名,实现自主建站 保证金模块 管理员可以对会员收取保证金,并和运营机制相...
KesionCMS打破CMS系统瓶颈,系统自带功能强大的BBS,不再为了整合第三方论坛平台而烦恼,一站式用户登录,系统还集成腾讯QQ,新浪微博及支付宝快捷登录,只需绑定下帐户,以后可以直接用QQ号或支付宝帐户登录。...
KesionCMS打破CMS系统瓶颈,系统自带功能强大的BBS,不再为了整合第三方论坛平台而烦恼,一站式用户登录,系统还集成腾讯QQ,新浪微博及支付宝快捷登录,只需绑定下帐户,以后可以直接用QQ号或支付宝帐户登录。...