更新bs4-script.py,支持使用playwright和requests两种方式获取网页内容,并添加命令行参数解析

This commit is contained in:
严浩
2025-01-16 16:12:49 +08:00
parent b6809e90ed
commit 0a676c0ea9

View File

@ -9,6 +9,7 @@ import json
import re import re
import os import os
import requests import requests
from playwright.sync_api import sync_playwright
def clean_text(text): def clean_text(text):
@ -81,8 +82,14 @@ def extract_station_info(html_content):
return stations return stations
def get_content(use_local=True, url='https://network.satnogs.org/stations/?page=3'): def get_content(use_local=True, url='https://network.satnogs.org/stations/?page=3', method='requests'):
"""获取页面内容,可以从本地文件或网络获取""" """
获取页面内容
:param use_local: 是否使用本地文件
:param url: 要抓取的URL
:param method: 获取方式,可选值:'requests''playwright'
:return: 页面内容
"""
if use_local: if use_local:
try: try:
with open('source/satnogs.html', 'r', encoding='utf-8') as file: with open('source/satnogs.html', 'r', encoding='utf-8') as file:
@ -92,22 +99,49 @@ def get_content(use_local=True, url='https://network.satnogs.org/stations/?page=
use_local = False use_local = False
if not use_local: if not use_local:
if method == 'requests':
try: try:
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
print("成功从网络获取数据") print("成功使用requests从网络获取数据")
return response.text return response.text
except requests.RequestException as e: except requests.RequestException as e:
print(f"获取网页数据时出错: {e}") print(f"使用requests获取网页数据时出错: {e}")
return None
elif method == 'playwright':
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto(url)
# 等待页面加载完成
page.wait_for_selector('tr.station-row')
content = page.content()
browser.close()
print("成功使用playwright从网络获取数据")
return content
except Exception as e:
print(f"使用playwright获取网页数据时出错: {e}")
return None
else:
print(f"不支持的获取方式: {method}")
return None return None
def main(): def main():
# 设置数据源 (True为本地文件False为网络请求) import argparse
use_local = False
# 创建命令行参数解析器
parser = argparse.ArgumentParser(description='抓取SatNOGS网站站点信息')
parser.add_argument('--local', action='store_true', help='使用本地文件')
parser.add_argument('--method', choices=['requests', 'playwright'], default='requests',
help='选择获取内容的方式: requests或playwright')
args = parser.parse_args()
# 获取内容 # 获取内容
content = get_content(use_local) content = get_content(use_local=args.local, method=args.method)
if not content: if not content:
print("无法获取数据,程序退出") print("无法获取数据,程序退出")
return return
@ -170,6 +204,19 @@ def main():
print(f"最低海拔: {min(altitudes)}m") print(f"最低海拔: {min(altitudes)}m")
print(f"平均海拔: {sum(altitudes) / len(altitudes):.1f}m") print(f"平均海拔: {sum(altitudes) / len(altitudes):.1f}m")
print("\n等待10秒后退出")
import time
time.sleep(10)
"""
# 使用playwright方式抓取
python bs4-script.py --method playwright
# 使用requests方式抓取
python bs4-script.py --method requests
# 使用本地文件
python bs4-script.py --local
"""
if __name__ == "__main__": if __name__ == "__main__":
main() main()