import requests
from bs4 import BeautifulSoup
import re
import json
titles=[]
key=input('请输入提供关键词:')
headers={
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:55.0) Gecko/20100101 Firefox/55.0"
}
for i in range(10):
#get key word & url
url='https://www.baidu.com/s?wd='+key+'&pn='+str(i)
#url='https://www.so.com/s?q='+key+'&pn='+str(i) #搜狗网址
#print url
print(url)
res=requests.get(url,headers=headers)
res.encoding='UTF-8'
soup=BeautifulSoup(res.text,'html.parser')
for div in soup.find_all('div',{'data-tools':re.compile('title')}):
data=div.attrs['data-tools']
print(data)
d=json.loads(data)
titles.append(d['title'])
#print(res.text)
with open("t.txt","w",encoding="utf-8") as fl:
for i in titles:
fl.write(i+'\n')
print(i)