Python爬取笔趣阁小说（新原创）

2765245531 · 发表于 2023-8-28 18:57:59

我试了论坛里的所有爬虫都不能行，改了貌似也不能用（水平有限），现在自己写了个Python的：
import requests
from bs4 import BeautifulSoup
import os

# 章节目录链接
base_url = "https://www.biqiugexx.com/book_94736840/"

#导出地址
output_path = "D:/1.txt"

# 发起网络请求获取网页内容
response = requests.get(base_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
chapter_list = soup.find_all("dd")
with open(output_path, "w", encoding="utf-8") as f:
      for chapter in chapter_list:
         chapter_link = chapter.find("a")["href"]
         chapter_title = chapter.find("a").text
         chapter_url = base_url + chapter_link
         chapter_url = chapter_url.replace("/book_94736840/", "", 1)# 修复链接生成问题，请类似这样改动
         chapter_response = requests.get(chapter_url)
         if chapter_response.status_code == 200:
            chapter_soup = BeautifulSoup(chapter_response.content, "html.parser")
            chapter_content = chapter_soup.find("div", class_="showtxt").text
            chapter_content = chapter_content.replace("<br /><br />", "\n").replace(" ", " ")
            f.write(chapter_title + "\n")
            f.write(chapter_content + "\n\n")
            print(f"Chapter {chapter_title} done.")
         else:
            print(f"Failed to retrieve content for Chapter {chapter_title}")
else:
print("Failed to retrieve the content from the URL.")
这本书还蛮好看的，论坛里书源都没有这个.