python源码
pip install playwright -i https://mirrors.bfsu.edu.cn/pypi/web/simple/
python -m playwright install
# -*- coding: utf-8 -*-
"""
home.php?mod=space&uid=116177 : 2022/7/28 22:58
@Author : superhero
home.php?mod=space&uid=270431 : 838210720@qq.com
home.php?mod=space&uid=702767 : dy.py
@IDE: PyCharm
"""
import asyncio
from playwright.async_api import async_playwright
import re
async def cancel_request(route, request):
await route.abort()
async def get_userinfo(p, _url):
# 获取用户信息,是否直播,IP属地等
browser = await p.chromium.launch(headless=True)
context = await browser.new_context()
page = await context.new_page()
await page.route(re.compile(r"(\.png)|(\.jpg)|(\.css)|(\.js)"), cancel_request)
await page.goto(_url)
data = {}
ip = await page.query_selector_all('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/p[1]')
user_name = await page.locator('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[2]/h1/span/span/span/span/span/span').text_content()
try:
is_live = await page.locator('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[3]/div[2]/button').text_content(timeout=1000)
except Exception:
is_live = '未开播'
try:
is_re = await page.locator('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[2]/div/span').text_content(timeout=1000)
except Exception:
is_re = '未认证'
num_list = await page.query_selector_all('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[1]/div[2]/div')
text_list = await page.query_selector_all('xpath=//*[@id="root"]/div/div[2]/div/div/div[2]/div[1]/p[2]/span')
if not is_re:
is_re = '未认证'
if not is_live:
is_live = '未开播'
data['认证信息'] = is_re
data['是否开播'] = is_live
data['昵称'] = user_name
for i in ip:
res_list = str(await i.text_content()).split(':')
data['抖音号'] = res_list[1][:-4]
data['IP属地'] = res_list[2]
res_list = []
for i in num_list:
res_list.append(str(await i.text_content()))
data.update({'num': res_list})
for i in text_list:
data['签名'] = await i.text_content()
print({'data': data})
async def main():
async with async_playwright() as playwright:
await get_userinfo(playwright, 'https://www.douyin.com/user/MS4wLjABAAAAzzmS2TgIEvxGftMpWD13Ty8k5HmsjlGsLJ1yBUEm2Ew')
asyncio.run(main())
- 运行结果如下
|