import requests #导入requests包 from bs4 import BeautifulSoup import time #设置请求头 headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36' } #列表页URL url='https://www.ygdy8.net/html/gndy/dyzz/index.html' strhtml=requests.get(url,headers=headers) strhtml.encoding='gb2312' #这个网站编码格式是gb2312 soup=BeautifulSoup(strhtml.text,'lxml') #获取class="co_content8"的div标签 co_content8 = soup.find('div',class_="co_content8") #获取class="tbspan"的所有table标签 tables = co_content8.find_all('table',class_="tbspan") for table in tables: #获取table标签下面的所有a标签 aa = table.find('a',class_="ulink") #打印出来所有a标签的电影标题,和详情页连接 print(aa.text,aa['href']) print("ok")
下一篇:不推荐大家购买vultr的vps