浏览 3167 次
锁定老帖子 主题:抓出soso音乐信息
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2007-11-07
import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; public class SongList { //歌手列表 private List<Song> songList; //当前页面最大分页数 private int maxPage = 2; //是否取得页面最大分页数 private boolean bool = false; //已添加的歌曲信息 private HashSet<String> sondListed = new HashSet<String>(); public SongList(String singer){ this.songList = spiderBySinger(singer); } public List<Song> getSongList(){ return songList; } /** * 根据歌手名称去爬页面,取得歌曲信息 */ private List<Song> spiderBySinger(String singer){ ArrayList<Song> sosoList = new ArrayList<Song>(); Spider spider = new Spider(); for (int i = 1; i < maxPage ;i++){ String content = spider.getURLContentByUrl("http://music.soso.com/music.cgi?w=" + singer + "&pl=&co=&ch=s.m.res&sc=mus&sz=&clz=wma&pg="+i); setMaxPageByContent(content); sosoList.addAll(this.getSongListByContent(content)); } sondListed.clear(); return sosoList; } /** * 根据内容设置当前页面最大分页数 */ private void setMaxPageByContent(String content){ if (!bool){ String s = ContentPattern.getStringByContentPattern(content, "<script language=\"javascript\">splitResNum(.*?);</script>"); if (s != null && !s.equals("")){ s = Replace.YYReplace(s, "(", ""); s = Replace.YYReplace(s, ")", ""); int i = Integer.parseInt(s)/20 + 1; if (i > 50){ maxPage = 50; }else{ maxPage = i; } } bool = true; } } /** * 获得歌曲列表 */ private ArrayList<Song> getSongListByContent(String content){ ArrayList<Song> list = new ArrayList<Song>(); Iterator<String> iterator = ContentPattern.getListByContentPattern(content, "<form id=(.*?)<tr>").iterator(); while(iterator.hasNext()){ Song song = getSongByContent(iterator.next()); if (song.getName() == null){ }else{ list.add(song); } } return list; } /** * 根据页面内容提取歌曲信息 */ private Song getSongByContent(String content){ Song song = new Song(); //获得歌曲名称 String name = ContentPattern.getStringByContentPattern(content, "name=\"song\"\\s+value=\"(.*?)\">"); //判断是否已添加此歌曲 if(sondListed.contains(name.trim())){ return song; }else{ sondListed.add(name.trim()); //获得歌曲文件地址 String address = ContentPattern.getStringByContentPattern(content, "name=\"url\"\\s+value=\"(.*?)\">"); //获得歌手 String singer = ContentPattern.getStringByContentPattern(content, "name=\"singer\"\\s+value=\"(.*?)\">"); //获得专辑 String special = ContentPattern.getStringByContentPattern(content, "name=\"album\"\\s+value=\"(.*?)\">"); song.setAddress(address); song.setName(name); song.setSpecial(special); song.setSinger(singer); return song; } } } import java.util.HashSet; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ContentPattern { /** * 获得列表根据原内容和正则表达式 */ public static HashSet<String> getListByContentPattern(String content, String pattern){ HashSet<String> list = new HashSet<String>(); // 用正则表达式编译链接的匹配模式。 Pattern p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(content); while(m.find()){ String s = m.group(1).trim(); if(!list.contains(s)) list.add(s); } return list; } /** * 内容,根据内容和正则表达式 */ public static String getStringByContentPattern(String content, String pattern){ String s = ""; // 用正则表达式编译链接的匹配模式。 Pattern p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(content); if(m.find()){ s = m.group(1).trim(); } return s; } } public class Song { //歌曲名称 private String name; //歌手名 private String singer; //专辑 private String special; //地址 private String address; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getSinger() { return singer; } public void setSinger(String singer) { this.singer = singer; } public String getSpecial() { return special; } public void setSpecial(String special) { this.special = special; } public String getAddress() { return address; } public void setAddress(String address) { this.address = address; } } 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |
发表时间:2007-11-07
希望有什么修改意见提出来 谢谢
|
|
返回顶楼 | |