使用urllib,urllib2,beautifulsoup.
执行下面代码,输入要查找的美剧名即可:
查询到所有结果并遍历每一个结果中不同视频格式的所有下载链接
# coding: utf-8
import urllib
import urllib2
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding(\’utf8\’)
host= \”http://www.meijutt.com\”
def getUrlRespHtml(url,data=None):
heads = {\’Accept\’:\’text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\’,
\’Accept-Charset\’:\’GB2312,utf-8;q=0.7,*;q=0.7\’,
\’Accept-Language\’:\’zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3\’,
\’Cache-Control\’:\’max-age=0\’,
\’Host\’:host,
\’Connection\’:\’keep-alive\’,
\’Referer\’:url,
\’User-Agent\’:\’Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.14) Gecko/20110221 Ubuntu/10.10 (maverick) Firefox/3.6.14\’}
req = urllib2.Request(url)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
opener.addheaders = heads.items()
if data:
data = urllib.urlencode(data)
respHtml = opener.open(req,data).read()
else:
respHtml = opener.open(req).read()
return respHtml
def get_bt(url):
html = getUrlRespHtml(url)
bs_html = BeautifulSoup(html)
download_list = bs_html.find_all(\’div\’,\’down_list\’)
for index,down_list in enumerate(download_list):
if len(download_list)>1:
print \'<<<<<视频格式%s>>>>>\’%(index+1)
for li in down_list.find_all(\’li\’):
f_attrs = li.find(\’input\’,\’down_url\’).attrs
print f_attrs.get(\’value\’)
def get_bts(searchword):
data ={\’searchword\’:searchword}
url = host+\’/search.asp\’
html=getUrlRespHtml(url,data)
bs = BeautifulSoup(html)
cn_box2 = bs.find_all(\’div\’,\’cn_box2\’)
print \’\\n搜索结果:%s\\n\’%len(cn_box2)
for index,div in enumerate(cn_box2):
print \’——搜索结果%s——\’%(index+1)
attrs = div.a.attrs
title = attrs.get(\’title\’)
href = attrs.get(\’href\’)
print title,(host+href)
get_bt(host+href)
print \’——end——–\\n\’
searchword=raw_input(\”请输入要搜索的美剧:\”)
get_bts(searchword.encode(\’gb2312\’))
上一篇:Anaconda安装及使用教程
下一篇:Django学习笔记