[C#] 纯文本查看 复制代码 using System;
using System.Collections.Generic;
using System.Linq;
using System.Security.Policy;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using MyLib.HttpHelper;
using MyLib.TextHelper;
namespace 爬虫豆瓣Top250
{
internal class Program
{
static void Main(string[] args)
{
Console.WriteLine("程序开始运行...");
var url_list = Get全部影片详情页链接("https://movie.douban.com/top250");
foreach (var url in url_list)
{
Get影片信息And写到文件(url);
Thread.Sleep(1000);
}
Console.WriteLine("任务已全部完成!");
Console.ReadKey();
}
static List<string> Get全部影片详情页链接(string url)
{
var headers = new Dictionary<string, string>() {
{"user-agent" ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"},
};
List<string> url_all_list = new List<string>();
int i = 0;
while (i < 250)
{
var ret = HttpHelper.HttpGetData(url + "?start=" + i, headers);
// 获取网页源码
var html = ret.Result;
// 获取中间链接列表数据
var left_grid_view = @"<ol class=""grid_view"">";
var right_grid_view = @"</ol>";
var grid_view_str = TextHelper.GetMidText(html, left_grid_view, right_grid_view);
var url_list = ExtractUniqueUrls(grid_view_str);
foreach (var u in url_list)
{
url_all_list.Add(u);
}
i += 25;
Thread.Sleep(500);
}
return url_all_list;
}
static List<string> ExtractUniqueUrls(string html)
{
List<string> urls = new List<string>();
MatchCollection matches = Regex.Matches(html, @"<a.*?href=""(.*?)"".*?>");
foreach (Match match in matches)
{
if (match.Groups.Count > 1)
{
string url = match.Groups[1].Value;
if (!urls.Contains(url))
{
urls.Add(url);
}
}
}
return urls;
}
static void Get影片信息And写到文件(string url)
{
var headers = new Dictionary<string, string>() {
{"user-agent" ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"},
};
var ret = HttpHelper.HttpGetData(url, headers);
var html = ret.Result;
var left_电影名 = @"<span property=""v:itemreviewed"">";
var right_电影名 = @"</span>";
var 电影名 = TextHelper.GetMidText(html, left_电影名, right_电影名);
var left_年份 = @"<span class=""year"">(";
var right_年份 = @")</span>";
var 年份 = TextHelper.GetMidText(html, left_年份, right_年份);
var left_导演 = @"rel=""v:directedBy"">";
var right_导演 = @"</a>";
var 导演 = TextHelper.GetMidText(html, left_导演, right_导演);
var left_主演 = @"rel=""v:starring"">";
var right_主演 = @"</a>";
var zy_list = TextHelper.GetMidTextBatch(html, left_主演, right_主演);
var 主演 = "";
for (int i = 0; i < 3; i++)
{
主演 += zy_list + "/";
}
var left_类型 = @"property=""v:genre"">";
var right_类型 = @"</span>";
var lx_list = TextHelper.GetMidTextBatch(html, left_类型, right_类型);
var 类型 = "";
foreach (var lx in lx_list)
{
类型 += lx + "/";
}
var left_地区 = @"<span class=""pl"">制片国家/地区:</span>";
var right_地区 = @"<br/>";
var 地区 = TextHelper.GetMidText(html, left_地区, right_地区);
var left_语言 = @"语言:</span>";
var right_语言 = @"<br/>";
var 语言 = TextHelper.GetMidText(html, left_语言, right_语言);
var left_评分 = @"<strong class=""ll rating_num"" property=""v:average"">";
var right_评分 = @"</strong>";
var 评分 = TextHelper.GetMidText(html, left_评分, right_评分);
Console.WriteLine("***************************影片信息***************************");
Console.WriteLine("电影名:" + 电影名);
Console.WriteLine("年份:" + 年份);
Console.WriteLine("导演:" + 导演);
Console.WriteLine("主演:" + 主演);
Console.WriteLine("类型:" + 类型);
Console.WriteLine("地区:" + 地区);
Console.WriteLine("语言:" + 语言);
Console.WriteLine("评分:" + 评分);
var app_path = System.AppDomain.CurrentDomain.BaseDirectory;
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "***************************影片信息***************************");
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "电影名:" + 电影名);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "年份:" + 年份);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "导演:" + 导演);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "主演:" + 主演);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "类型:" + 类型);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "地区:" + 地区);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "语言:" + 语言);
TextHelper.AppendToTxtFile(app_path + "download\\豆瓣.txt", "评分:" + 评分);
Console.WriteLine("写入完成");
}
}
}
爬虫豆瓣Top250.rar
(71.03 KB, 下载次数: 4)
|