jsoup从巨鲸批量下载迈克杰克逊的MP3歌曲

you_java

浏览: 97169 次
性别:
来自: 广州

最近访客更多访客>>

geeksun

LinSiraaa

wssc198765

Joeygirl

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

jsoup

jsoup下载地址 http://www.jsoup.org

package jsoup;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Top100Mp3Downloader {
	
	private static final String MJ_INDEX = "http://www.top100.cn/artist/songs.php?singerid=1669";
	// http://www.top100.cn/artist/info-agr5dcqe.shtml
	// http://www.top100.cn/artist/songs.php?singerid=1669
	
	private static final String DOWNLOAD = "http://www.top100.cn/download/download.php?Productid=";
	
	/**
	 * 给定歌曲列表页面,返回歌曲名称和加密id的键值对
	 * 
	 * @param url
	 *            歌曲列表地址,如:http://www.top100.cn/artist/info-agr5dcqe.shtml
	 * @return 键值对
	 */
	private Map<String, String> findIds(String url) {
		try {
			URL u = new URL(url);
			Document doc = Jsoup.parse(u, 1000 * 10);
			Element listDiv = doc.getElementById("songsListDiv");
			Elements uls = listDiv.getElementsByTag("ul");
			Map<String, String> map = new HashMap<String, String>();
			for (int i = 0; i < uls.size(); i++) {
				Element ul = uls.get(i);
				Element hidden = ul.getElementById("hidValue");
				String id = hidden.val();
				Element li = ul.getElementsByAttributeValue("class", "No2")
						.first();
				Element href = li.getElementsByTag("a").first();
				String name = href.attr("title");

				map.put(name, id);
			}
			return map;
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}

	/**
	 * 从歌曲的加密id获取歌曲的下载页面,并分析得到下载地址
	 * 
	 * @param id
	 *            加密id
	 * @return 歌曲下载页面地址
	 */
	private String findDownPathById(String id) {
		if (id.startsWith("m")) {// 所有id都是m开头
			id = id.substring(1);
		}
		String path = "http://www.top100.cn";
		URL url = null;
		try {
			url = new URL(DOWNLOAD + id);
			Document doc = Jsoup.parse(url, 1000 * 2);
			Elements lis = doc.select(".Listen_downloadtopcon ul li");
			path += lis.get(5).getElementsByTag("a").first().attr("href");
//			for (int i = 0; i < lis.size(); i++) {
//				Element e = lis.get(i);
//				if (e.tagName().equals("a")) {
//					path = e.attr("href");
//					break;
//				}
//			}
		} catch (MalformedURLException e) {
			System.out.println("访问url【" + url + "】出错！");
			e.printStackTrace();
		} catch (IOException e) {
			System.out.println("文件下载错误");
			e.printStackTrace();
		}
		return path;
	}

	/**
	 * 从获取的下载地址获取歌曲内容
	 * 
	 * @param dir
	 *            保存到目录
	 * @param name
	 *            歌曲名称
	 * @param path
	 *            歌曲下载地址
	 */
	private void downByPath(String dir, String name, String path) {
		System.out.println("path=" + path);
		File parent = new File(dir);
		if (!parent.exists()) {
			parent.mkdirs();
		}
		File mp3 = new File(parent, name + ".mp3");
		try {
			URL url = new URL(path);
			HttpURLConnection con = (HttpURLConnection) url.openConnection();
			// 此处必须伪造referer,否则会自动返回首页.分析后,与cookie无关
			con.setRequestProperty("User-Agent",
							"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon;)");
			con.setRequestProperty("Accept-Encoding", "gzip");
			con.setRequestProperty("referer", "http://www.top100.cn");
			con.setDoInput(true);
			con.connect();
			if (con.getResponseCode() == HttpURLConnection.HTTP_OK) {
				InputStream is = con.getInputStream();
				byte[] b = new byte[1024 * 5];
				int length = -1;
				OutputStream os = new FileOutputStream(mp3);
				while ((length = is.read(b)) != -1) {
					os.write(b, 0, length);
				}
				os.flush();
				os.close();
				is.close();
			} else {
				System.out.println("服务器返回:" + con.getResponseCode());
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static void main(String[] args) {
		Top100Mp3Downloader m = new Top100Mp3Downloader();
		for (Map.Entry<String, String> e : m.findIds(MJ_INDEX).entrySet()) {
			String name = e.getKey();
			String path = m.findDownPathById(e.getValue());
			m.downByPath("E:\\music\\files\\Michael Jackson1", name, path);
			System.out.println(name + " from " + path + " has down!");
		}
	}
}

分享到：

java mail(简单发送邮件) | jsoup httpclient 爬取网页并下载google图 ...

2012-03-21 14:13
浏览 1984
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

jsoup从巨鲸批量下载迈克杰克逊的MP3歌曲

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

jsoup从巨鲸批量下载迈克杰克逊的MP3歌曲

评论

发表评论

相关推荐

jsoup httpclient 爬取网页并下载google图标

jsoup抓取其他网站的页面代码

HttpClient根据jsoup解析网页

java排序3（插入排序）

java排序2（选择排序）

java排序1（冒泡排序）

java面试题集锦

java面向对象笔记

最近访客更多访客>>