本文实例讲述了python实现自动登录人人网并采集信息的方法。分享给大家供大家参考。具体实现方法如下:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import re
import urllib2
import urllib
import cookielib
class Renren(object):
def __init__(self):
self.name = self.pwd = self.content = self.domain = self.origURL = \'\'
self.operate = \'\'#登录进去的操作对象
self.cj = cookielib.LWPCookieJar()
try:
self.cj.revert(\'./renren.coockie\')
except Exception,e:
print e
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.opener)
def setinfo(self,username,password,domain,origURL):
\'\'\'设置用户登录信息\'\'\'
self.name = username
self.pwd = password
self.domain = domain
self.origURL = origURL
def login(self):
\'\'\'登录人人网\'\'\'
params = {
\'domain\':self.domain,
\'origURL\':self.origURL,
\'email\':self.name,
\'password\':self.pwd}
print \'login.......\'
req = urllib2.Request(
\'http://www.renren.com/PLogin.do\',
urllib.urlencode(params)
)
self.file=urllib2.urlopen(req).read()
newsfeed = open(\'news.html\',\'w\')
try:
newsfeed.write(self.file)
except Exception, e:
newsfeed.close()
self.operate = self.opener.open(req)
print type(self.operate)
print self.operate.geturl()
if self.operate.geturl():
print \'Logged on successfully!\'
self.cj.save(\'./renren.coockie\')
self.__viewnewinfo()
else:
print \'Logged on error\'
def __viewnewinfo(self):
\'\'\'查看好友的更新状态\'\'\'
self.__caiinfo()
def __caiinfo(self):
\'\'\'采集信息\'\'\'
h3patten = re.compile(\'(.*?) \')#匹配范围
apatten = re.compile(\'(.+):\')#匹配作者
cpatten = re.compile(\'(.+)\\s\')#匹配内容
content = h3patten.findall(self.file)
print len(content)
infocontent = self.operate.readlines()
print type(infocontent)
print \'friend newinfo:\'
for i in infocontent:
content = h3patten.findall(i)
if len(content) != 0:
for m in content:
username = apatten.findall(m)
info = cpatten.findall(m)
if len(username) !=0:
print username[0],\'说:\',info[0]
print \'----------------------------------------------\'
else:
continue
ren = Renren()
username = \'username\'#你的人人网的帐号
password = \'password\'#你的人人网的密码
domain = \'www.renren.com\'#人人网的地址
origURL = \'http://www.renren.com/home\'#人人网登录以后的地址
ren.setinfo(username,password,domain,origURL)
ren.login()
希望本文所述对大家的Python序设计有所帮助。