Python实现批量下载文件
#!/usr/bin/env python # -*- coding:utf-8 -*- from gevent import monkey monkey.patch_all() from gevent.pool import Pool import requests import sys import os def download(url): chrome = \'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 \' + \'(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36\' headers = {\'User-Agent\': chrome} filename = url.split(\'/\')[-1].strip() r = requests.get(url.strip(), headers=headers, stream=True) with open(filename, \'wb\') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() print filename,\"is ok\" def removeLine(key, filename): os.system(\'sed -i /%s/d %s\' % (key, filename)) if __name__ ==\"__main__\": if len(sys.argv) == 2: filename = sys.argv[1] f = open(filename,\"r\") p = Pool(4) for line in f.readlines(): if line: p.spawn(download, line.strip()) key = line.split(\'/\')[-1].strip() removeLine(key, filename) f.close() p.join() else: print \'Usage: python %s urls.txt\' % sys.argv[0]
其他网友的方法:
from os.path import basename from urlparse import urlsplit def url2name(url): return basename(urlsplit(url)[2]) def download(url, localFileName = None): localName = url2name(url) req = urllib2.Request(url) r = urllib2.urlopen(req) if r.info().has_key(\'Content-Disposition\'): # If the response has Content-Disposition, we take file name from it localName = r.info()[\'Content-Disposition\'].split(\'filename=\')[1] if localName[0] == \'\"\' or localName[0] == \"\'\": localName = localName[1:-1] elif r.url != url: # if we were redirected, the real file name we take from the final URL localName = url2name(r.url) if localFileName: # we can force to save the file as specified name localName = localFileName f = open(localName, \'wb\') f.write(r.read()) f.close() download(r\'你要下载的python文件的url地址\')
以上便是本文给大家分享的全部内容了,小伙伴们可以测试下哪种方法效率更高呢。