python实现的一个火车票转让信息采集器
admin
2023-07-31 02:06:05
0

好吧,我承认我是对晚上看到一张合适的票转让但打过电话去说已经被搞走了这件事情感到蛋疼。直接上文件吧。

#coding: utf-8
\'\'\'
春运查询火车票转让信息
Author: piglei2007@gmail.com
Date: 2011.01.25
\'\'\'
import re
import os
import time
import urlparse
import datetime
import traceback
import urllib2
import socket
socket.setdefaulttimeout(20)

BLANK_RE = re.compile(r\"\\s+\")

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
opener.addheaders = [
  (\"User-agent\", \"Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.1) Gecko/20090704 Firefox/3.5\"),
  (\"Accept\", \"*/*\"),
]
urllib2.install_opener(opener)

from BeautifulSoup import BeautifulSoup

SOURCE = {
  \"58\": \"http://bj.58.com/huochepiao/?Num=%(train)s&StartTime=%(date)s00\",
  \"ganji\": \"http://bj.ganji.com/piao/cc_%(train)s/%(date)s/\",
}
RECORD_FILE = \"/tmp/ticket_records.txt\"

def parse_record():
  try:
    return set([x.strip() for x in open(RECORD_FILE, \"r\").readlines()])
  except IOError:
    open(RECORD_FILE, \"w\")
    return set()

def flush_record(records):
  open(RECORD_FILE, \"w\").write(\"\\n\".join(records))

def main(config):
  \"\"\"
  开始抓取
  \"\"\"
  existed = parse_record()
  to_email = []

  for train in config[\"trains\"]:
    for date in config[\"dates\"]:
      for type, _url in SOURCE.items():
        url = _url % dict(train=train, date=date)
        content = urllib2.urlopen(url).read()
        soup = BeautifulSoup(content)
        result = parse_content(type, soup, train)
        for url, text in result:
          url = urlparse.urljoin(_url, url)
          # 只要卧铺!
          if url not in existed and u\"卧\" in text:
            to_email.append([text, url])
          existed.add(url)
  if to_email:
    content = \"\".join(
      [x for x in [\" | \".join(y) for y in to_email]]
    ).encode(\"utf-8\")
    simple_mail(config[\"people\"], content)
  flush_record(existed)

def parse_content(type, soup, train):
  \"\"\"
  获得车次信息
  \"\"\"
  result = []
  if type == \"58\":
    info_table = soup.find(\"table\", id=\"infolist\")
    if info_table:
      for x in info_table.findAll(\"tr\", text=re.compile(ur\"%s(?!时刻表)\" % train, re.I)):
        a = x.parent
        _text = BLANK_RE.sub(\"\", a.text)
        result.append([a[\"href\"], _text])
  if type == \"ganji\":
    for x in soup.findAll(\"dl\", {\"class\": \"list_piao\"}):
      a = x.dt.a
      result.append([a[\"href\"], a.text])
  return result

EMAIL_HOST = \'smtp.sohu.com\'
EMAIL_HOST_USER = \'yourname@sohu.com\'
EMAIL_HOST_PASSWORD = \'yourpassword\'
EMAIL_PORT = 25

def simple_mail(to, content):
  \"\"\"
  发送邮件
  \"\"\"
  import smtplib
  from email.mime.text import MIMEText

  msgRoot = MIMEText(content, \'html\', \'UTF-8\')
  msgRoot[\'Subject\'] = \"[%s]有票来啦!!!!\" % datetime.datetime.today().isoformat(\" \")
  msgRoot[\'From\'] = EMAIL_HOST_USER
  msgRoot[\'To\'] = \", \".join(to)

  s = smtplib.SMTP(EMAIL_HOST, EMAIL_PORT)
  s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD)
  s.sendmail(EMAIL_HOST_USER, to, msgRoot.as_string())
  s.close()

def switch_time_zone():
  \"\"\"
  切换时区
  \"\"\"
  os.environ[\"TZ\"] = \"Asia/Shanghai\"
  time.tzset()

switch_time_zone()

if __name__ == \'__main__\':
  config = {
    \"trains\": (\"k471\",),
    \"dates\": (\"20110129\",),
    \"people\": (
      \"youremail@sohu.com\",
    )
  }
  try:
    main(config)
    print \"%s: ok\" % datetime.datetime.today()
  except Exception, e:
    print traceback.format_exc()

然后放入cron,你懂的。

相关内容

热门资讯

500 行 Python 代码... 语法分析器描述了一个句子的语法结构,用来帮助其他的应用进行推理。自然语言引入了很多意外的歧义,以我们...
定时清理删除C:\Progra... C:\Program Files (x86)下面很多scoped_dir开头的文件夹 写个批处理 定...
65536是2的几次方 计算2... 65536是2的16次方:65536=2⁶ 65536是256的2次方:65536=256 6553...
Mobi、epub格式电子书如... 在wps里全局设置里有一个文件关联,打开,勾选电子书文件选项就可以了。
scoped_dir32_70... 一台虚拟机C盘总是莫名奇妙的空间用完,导致很多软件没法再运行。经过仔细检查发现是C:\Program...
pycparser 是一个用... `pycparser` 是一个用 Python 编写的 C 语言解析器。它可以用来解析 C 代码并构...
小程序支付时提示:appid和... [Q]小程序支付时提示:appid和mch_id不匹配 [A]小程序和微信支付没有进行关联,访问“小...
微信小程序使用slider实现... 众所周知哈,微信小程序里面的音频播放是没有进度条的,但最近有个项目呢,客户要求音频要有进度条控制,所...
python绘图库Matplo... 本文简单介绍了Python绘图库Matplotlib的安装,简介如下: matplotlib是pyt...
Prometheus+Graf... 一,Prometheus概述 1,什么是Prometheus?Prometheus是最初在Sound...