1from bs4 import BeautifulSoup 2import requests,os,re 3 4 5 6 7 8if name == "main": 9 server = "https://www.abcxs.com"</span><br><span class="linenum hljs-number" style="font-size: inherit; line-height: inherit; margin: 0px; padding: 0px; color: rgb(174, 135, 250); padding-right: 20px; word-spacing: 0px; word-wrap: inherit !important; word-break: inherit !important;">10 url = "https://www.abcxs.com/book/13417/#main"</span><br><span class="linenum hljs-number" style="font-size: inherit; line-height: inherit; margin: 0px; padding: 0px; color: rgb(174, 135, 250); padding-right: 20px; word-spacing: 0px; word-wrap: inherit !important; word-break: inherit !important;">11 r = requests.get(url)12 html = r.text1314 15 title_bf = BeautifulSoup(html)16 title = title_bf.find_all(property = 'og:title')17 print(title)18 19 searchObj = re.search( '?)" property=', str(title), re.M|re.I)20 if searchObj:21 print ("searchObj.group(1) : ", searchObj.group(1))22 ShuMing = searchObj.group(1)23 else:24 print ("Nothing found!!")2526 2728 div_bf = BeautifulSoup(html)29 div = div_bf.findall('div',class="listmain")30 31 a_bf = BeautifulSoup(str(div[0]))32 a = a_bf.find_all('a')33 for each in a:34 print(each.string, server + each.get('href'))3536 37 path = "J:/python/Python/我的Python学习/爬虫及文件写入/" + ShuMing38 if not os.path.exists(path):39 os.mkdir(path)40 41 if name == "main":42 r = requests.get(server + each.get('href'))43 html = r.text44 bf = BeautifulSoup(html)45 tetx_content = bf.findall('div', class = 'showtxt')46 print(tetx_content[0].text.replace('\xa0'*8,'\n'))47 4849 50 with open(path + "/" + each.string + '.txt', 'w') as f:51 52 f.write(tetx_content[0].text.replace(u'\xa0', u' '))