本文是上一篇的续后期将进行封装,以提高可用性。
虎扑在展示图片的时候偷懒了,直接给出了原图链接,直接获取这个链接,下载到本地即可。
这里使用PhantomJS来打开浏览器。
# download one picture by urllib2
def downloadPicture(pic_url, pic_path):
f = urllib2.urlopen(pic_url)
with open(pic_path, \"wb\") as img_file:
img_file.write(f.read())
def getPictures(elem_url, nums, path):
try:
count = 1
t = elem_url.find(r\'.html\')
while (count <= nums):
html_url = elem_url[:t-2] + \'-\' + str(count) + \'.html\'
# Set proxy
proxy_support = urllib2.ProxyHandler({\"http\": \"http://yout_proxy:8080/\"})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)
driver.get(html_url)
elem = driver.find_element_by_xpath(\"//div[@class=\'pic_bg\']/div/img\")
url = elem.get_attribute(\"src\")
file_name = os.path.basename(url).replace(\'*\',\'\')
fname = path + \"\\\\\" + file_name
downloadPicture(url, fname)
count += 1
except Exception, e:
print \"Error: \", e
finally:
print \"Download \" + str(count-1) + \' pictures\\n\'
if __name__ == \"__main__\":
elem_url = \"http://photo.hupu.com/nba/p29556-1.html\"
nums = 15
# Create folder for store picture
sub_folder = os.path.join(os.getcwd(), \"hupupic\")
if not os.path.exists(sub_folder):
os.mkdir(sub_folder)
# os.chdir(sub_folder)
pic_path = sub_folder
getPictures(elem_url, nums, pic_path)
下一篇:Python3 色情图片识别