本文实例讲述了python中django框架通过正则搜索页面上email地址的方法。分享给大家供大家参考。具体实现方法如下:
import re
from django.shortcuts import render
from pattern.web import URL, DOM, abs, find_urls
def index(request):
\"\"\"
find email addresses in requested url or contact page
\"\"\"
error = \'\'
emails = set()
url_string = request.GET.get(\'url\', \'\')
EMAIL_REGEX = re.compile(r\'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\', re.IGNORECASE)
# use absolute url or domain name
url = URL(url_string) if url_string.startswith(\'http\') else URL(domain=url_string,protocol=\'http\')
if url_string:
try:
dom = DOM(url.download(cached=True))
except Exception, e:
error = e
else:
contact_urls = { url.string }
# search links of contact page
for link in dom(\'a\'):
if re.search(r\'contact|about\', link.source, re.IGNORECASE):
contact_urls.add(
abs(link.attributes.get(\'href\',\'\'), base=url.redirect or url.string))
for contact_url in contact_urls:
# download contact page
dom = DOM(URL(contact_url).download(cached=True))
# search emails in the body of the page
for line in dom(\'body\')[0].content.split(\'\\n\'):
found = EMAIL_REGEX.search(line)
if found:
emails.add(found.group())
data = {
\'url\': url_string,
\'emails\': emails,
\'error\': error,
}
return render(request, \'index.html\', data)
希望本文所述对大家的Python程序设计有所帮助。