本文实例讲述了python实现下载指定网址所有图片的方法。分享给大家供大家参考。具体实现方法如下:
#coding=utf-8 #download pictures of the url #useage: python downpicture.py www.baidu.com import os import sys from html.parser import HTMLParser from urllib.request import urlopen from urllib.parse import urlparse def getpicname(path): \'\'\' retrive filename of url \'\'\' if os.path.splitext(path)[1] == \'\': return None pr=urlparse(path) path=\'http://\'+pr[1]+pr[2] return os.path.split(path)[1] def saveimgto(path, urls): \'\'\' save img of url to local path \'\'\' if not os.path.isdir(path): print(\'path is invalid\') sys.exit() else: for url in urls: of=open(os.path.join(path, getpicname(url)), \'w+b\') q=urlopen(url) of.write(q.read()) q.close() of.close() class myhtmlparser(HTMLParser): \'\'\'put all src of img into urls\'\'\' def __init__(self): HTMLParser.__init__(self) self.urls=list() self.num=0 def handle_starttag(self, tag, attr): if tag.lower() == \'img\': srcs=[u[1] for u in attr if u[0].lower() == \'src\'] self.urls.extend(srcs) self.num = self.num+1 if __name__ == \'__main__\': url=sys.argv[1] if not url.startswith(\'http://\'): url=\'http://\' + sys.argv[1] parseresult=urlparse(url) domain=\'http://\' + parseresult[1] q=urlopen(url) content=q.read().decode(\'utf-8\', \'ignore\') q.close() myparser=myhtmlparser() myparser.feed(content) for u in myparser.urls: if (u.startswith(\'//\')): myparser.urls[myparser.urls.index(u)]= \'http:\'+u elif u.startswith(\'/\'): myparser.urls[myparser.urls.index(u)]= domain+u saveimgto(r\'D:\\python\\song\', myparser.urls) print(\'num of download pictures is {}\'.format(myparser.num))
运行结果如下:
num of download pictures is 19
希望本文所述对大家的Python程序设计有所帮助。