import urllib2,re basePath = r'e:\\open163\\' fileFormat = r'.mp4' pageUrl = r"http://v.163.com/special/Khan/european.html" limitPat = r'<table class="m-clist" id="list2" style="display:none">.*?</table>' needPat = r'<tr class="u-(even|odd)">\s*<td class="u-ctitle">\s*(.*?)\s*<a.*?>(.*?)</a>.*?<a class="refbtn" href="(.*?)".*?>.*?</tr>' print 'get page:', pageUrl content = urllib2.urlopen(pageUrl).read() mat = re.search(limitPat, content, re.S) limitContent = mat.group() for each in re.findall(needPat, limitContent, re.S): filename = basePath+each[1]+each[2]+fileFormat print 'creatint file:', filename curMp4 = open(filename, 'wb') print 'fetching url:', each[3],'......' mp4 = urllib2.urlopen(each[3]).read() curMp4.write(mp4) curMp4.close() print 'file done!'
也不复杂,修改python源码里的pageUrl为你想抓取的那一页的所有视频的URL,默认会保存在E:\\open163目录下。
评论