采集内容常需要得到网页返回的验证码做进一步处理
下面代码是用python写的用来获取网页http状态码的脚本
#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:states_code.py
import urllib2
url = \'http://www.jb51.net/\'
response = None
try:
response = urllib2.urlopen(url,timeout=5)
except urllib2.URLError as e:
if hasattr(e, \'code\'):
print \'Error code:\',e.code
elif hasattr(e, \'reason\'):
print \'Reason:\',e.reason
finally:
if response:
response.close()