./pycheck404url.py 0 100 1
0:起始便宜量,100 查询总数 1:查询主表
./pycheck404url.py 0 100 2
0:起始便宜量,100 查询总数 2:查询图片表
#!/usr/bin/env python
#coding:utf-8
from mysql import connector
import urllib2
import sys
import socket
HOST = \'localhost\'
USER = \'root\'
PASSWORD = \'\'
DATABASE = \'pytest\'
ImagePrefix = \'http://image.59store.com\'
def pyconnect(**kwargs):
try:
connect = connector.connect(host=kwargs.get(\'host\'), user=kwargs.get(\'user\'), passwd=kwargs.get(\'password\'), db=kwargs.get(\'database\'))
return connect
except Exception as e:
print(e)
return False
def queryData(start=0, limit=100, type=1):
connect = pyconnect(host=HOST, user=USER, password= PASSWORD, database= DATABASE)
if connect != False:
cursor = connect.cursor()
if type == 1:
sql = \'SELECT rid,default_image FROM prefix_repo limit %s,%s\'
sql = sql % (start, limit)
else:
sql = \'SELECT rid,url FROM prefix_repoimage limit %s,%s\'
sql = sql % (start, limit)
cursor.execute(sql)
data = cursor.fetchall()
return data
else:
return False
def check200(start, limit, type=1):
file = open(\'%s.txt\' % (type),\'a\')
data = queryData(start, limit, type)
if data != False:
for row in data:
try:
url = ImagePrefix + row[1]
print url
response = urllib2.urlopen(url, timeout=3)
httpCode = response.getcode()
if httpCode != 200:
file.write(\'%s\\t%s \\n\' % (row[0], url))
else:
print(row[0],\'ok\')
except urllib2.HTTPError as e:
file.write(\'%s\\t%s \\n\' % (row[0], url))
except socket.timeout as e:
file.write(\'%s\\t%s \\n\' % (row[0], url))
file.close()
if __name__ == \"__main__\":
args = sys.argv
print args
check200(args[1],args[2],int(args[3]))
1.商品主表
CREATE TABLE `prefix_repo` (
`rid` int(10) unsigned NOT NULL AUTO_INCREMENT,
`status` tinyint(3) NOT NULL DEFAULT \'0\',
`default_image` varchar(100) NOT NULL DEFAULT \'\',
PRIMARY KEY (`rid`),
KEY `rid` (`rid`,`status`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2.图片表
CREATE TABLE `prefix_repoimage` (
`item_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`rid` int(10) unsigned NOT NULL,
`url` varchar(120) NOT NULL DEFAULT \'\',
PRIMARY KEY (`item_id`),
KEY `rid` (`rid`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;
上一篇:编程之外,8种拓展方式
下一篇:中文维基百科文本数据获取与预处理