本帖最后由 xuehang00126 于 2019-7-29 22:49 编辑
刚好前几天我也写了个一样的...
#代码名称:爬取百度实时热点
#代码编号:0005
#代码时间:2019年7月21日17:24:53
#-----------------------------------------------------------
import requests,re
url="http://top.baidu.com/buzz?b=1&c=513&fr=topbuzz_b341_c513"
html=requests.get(url)
html.encoding=html.apparent_encoding
def rs_title(text):
re_ls=re.findall("list-title(.*?)</a>",text)
ls=[]
for i in re_ls:
title=i.split(">")[1]
ls.append(title)
return ls
def rs_index(text):
re_ls = re.findall("<span class=\"icon-(.*?)</span>",text)
ls = []
for i in re_ls:
title = i.split(">")[1]
ls.append(title)
return ls
title=rs_title(html.text)
index=rs_index(html.text)
for i in range(len(title)):
print("热搜排名-{}-关键词:{}----搜索指数--{}".format(i+1,title[i],index[i]))
|