下载8000首儿歌的python的代码:
复制代码 代码如下:
#-*- coding: UTF-8 -*-
from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging
def format(filename):
tuple=(\’ \’,\’\’\’,\’\\\’\’)
for char in tuple:
if (filename.find(char)!=-1):
filename=filename.replace(char,\”_\”)
return filename
def download_mp3(mp3_url, filename,dir):
f = dir+\”\\\\\”+filename
if os.path.exists(f):
logger.debug(f+\” is existed.\”)
return
try:
open(f, \’wb\’).write(urllib.urlopen(mp3_url).read())
logger.debug( filename + \’ is downloaded.\’)
except:
logger.debug( filename + \’ is not downloaded.\’)
def download_all_mp3(start,end,dir,logger):
for x in range(start,end):
try:
url = \”http://www.youban.com/mp3-d\” + str(x) + \”.html\”
logger.debug(str(x) + \”: \”+url)
doc = py(url=url)
e = doc(\’.mp3downloadbox\’)
if e is None or e == \’\’:
logger.debug(url+\” is not existed.\”)
return
e = unicode(e)
#logger.debug( e)
regex = re.compile(ur\”.*
if __name__ == \”__main__\”:
dir_root = \”e:\\\\song\”
if sys.argv[3] != \’\’: dir_root=sys.argv[3]
start,end = 1,8000
if sys.argv[1] >= 0 and sys.argv[2]>=0:
start,end = int(sys.argv[1]),int(sys.argv[2])
print (\”Download from %s to %s.\\n\” % (start,end))
dir = dir_root + \”\\\\\”+str(start)+\”-\”+str(end)
if not os.path.exists(dir):
os.mkdir(dir)
print \”Download to \” + dir + \”.\\n\”
logger = logging.getLogger(\”simple\”)
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler(dir+\”\\\\\”+\”download.log\”)
ch = logging.StreamHandler()
formatter = logging.Formatter(\”%(message)s\”)
ch.setFormatter(formatter)
fh.setFormatter(formatter)
logger.addHandler(ch)
logger.addHandler(fh)
download_all_mp3(start,end,dir,logger)
有需要的可以参考继续修改。