
URL介绍.png
如何通过urllib2实现请求,参看下图:

通过urllib2完成请求.png
import urllib2
request = urllib2.Request(uri, data=data)
request.get_method = lambda: \'PUT\' # or \'DELETE\'
response = urllib2.urlopen(request)
from urllib2 import Request, urlopen, URLError, HTTPError
req = Request(\'http://www.jianshu.com/users/92a1227beb27/latest_articles\')
try:
response = urlopen(req)
except URLError, e:
if hasattr(e, \'code\'):
print \'The server couldn\'t fulfill the request.\'
print \'Error code: \', e.code
elif hasattr(e, \'reason\'):
print \'We failed to reach a server.\'
print \'Reason: \', e.reason

数据解析.png
# coding:utf-8
import urllib2
from lxml import etree
import sys
print sys.getdefaultencoding()
reload(sys)
sys.setdefaultencoding(\'utf-8\')
#网站数据复杂,暂时还没有处理方法
def oper(url):
headers = {
\'User-Agent\': \'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6\'
}
req = urllib2.Request(url=url,headers=headers)
try:
response = urllib2.urlopen(req)
except urllib2.URLError,e:
print e.reason
html = response.read()
myparser = etree.HTMLParser(encoding=\"utf-8\")
selector = etree.HTML(html, parser=myparser)
stainfos = selector.xpath(\'//input[@name=\"stainfo\"]/@value\')
for stainfo in stainfos:
print stainfo
stanames = selector.xpath(\'//input[@name=\"staname\"]/@value\')
for staname in stanames:
print staname
stainfodbys = selector.xpath(\'//input[@name=\"stainfodby\"]/@value\')
for stainfodby in stainfodbys:
print stainfodby
def start():
urls = [\'http://58.68.130.147/#\']
for url in urls:
oper(url)
if __name__ == \'__main__\':
start()