|
我试了论坛里的所有爬虫都不能行,改了貌似也不能用(水平有限),现在自己写了个Python的:
import requests
from bs4 import BeautifulSoup
import os
# 章节目录链接
base_url = "https://www.biqiugexx.com/book_94736840/"
#导出地址
output_path = "D:/1.txt"
# 发起网络请求获取网页内容
response = requests.get(base_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
chapter_list = soup.find_all("dd")
with open(output_path, "w", encoding="utf-8") as f:
for chapter in chapter_list:
chapter_link = chapter.find("a")["href"]
chapter_title = chapter.find("a").text
chapter_url = base_url + chapter_link
chapter_url = chapter_url.replace("/book_94736840/", "", 1)# 修复链接生成问题,请类似这样改动
chapter_response = requests.get(chapter_url)
if chapter_response.status_code == 200:
chapter_soup = BeautifulSoup(chapter_response.content, "html.parser")
chapter_content = chapter_soup.find("div", class_="showtxt").text
chapter_content = chapter_content.replace("<br /><br />", "\n").replace(" ", " ")
f.write(chapter_title + "\n")
f.write(chapter_content + "\n\n")
print(f"Chapter {chapter_title} done.")
else:
print(f"Failed to retrieve content for Chapter {chapter_title}")
else:
print("Failed to retrieve the content from the URL.")
这本书还蛮好看的,论坛里书源都没有这个.
|
|