""" 提示词: 在文件result-script.py创建一个Python脚本来抓取页面上的所有站点以及站点信息 with open('source/satnogs.html', 'r', encoding='utf-8') as file: content = file.read() """ from bs4 import BeautifulSoup import json import re import os import requests def clean_text(text): """清理文本,移除多余的空白字符""" return ' '.join(text.split()) def extract_station_info(html_content): soup = BeautifulSoup(html_content, 'html.parser') stations = [] # 查找所有站点行 for row in soup.find_all('tr', class_='station-row'): station = {} # 提取站点ID station['id'] = row.find('span', class_='station').text.strip() # 提取站点名称 station['name'] = row.find_all('td')[1].text.strip() # 提取位置信息 location_td = row.find_all('td')[2] location_span = location_td.find('span', {'data-toggle': 'tooltip'}) if location_span: # 提取坐标 coordinates = location_span['title'].strip() # 提取网格代码 - 只保留网格代码部分 grid = location_span.text.strip().split('@')[0].strip() # 提取海拔 altitude_text = location_td.text altitude_match = re.search(r'@(\d+)m', altitude_text) altitude = f"{altitude_match.group(1)}m" if altitude_match else "N/A" station['location'] = { 'coordinates': coordinates, # 例如: "39.236°, -86.305°" 'grid': grid, # 例如: "EM69uf" 'altitude': altitude # 例如: "280m" } # 提取总观测数 total_obs = row.find('a', class_='badge-success') station['total_observations'] = total_obs.text.strip() if total_obs else '0' # 提取未来观测数 future_obs = row.find('a', class_='badge-info') station['future_observations'] = future_obs.text.strip() if future_obs else '0' # 提取天线信息 antennas = [] for antenna_span in row.find_all('span', class_='antenna-pill'): freq_range = antenna_span['title'].strip() antenna_type = antenna_span.text.strip() antennas.append({ 'type': antenna_type, 'frequency_range': freq_range }) station['antennas'] = antennas # 提取所有者信息 owner_link = row.find_all('td')[-1].find('a') if owner_link: station['owner'] = { 'name': owner_link.text.strip(), 'profile_url': owner_link['href'] } stations.append(station) return stations def get_content(use_local=True, url='https://network.satnogs.org/stations/?page=3'): """获取页面内容,可以从本地文件或网络获取""" if use_local: try: with open('source/satnogs.html', 'r', encoding='utf-8') as file: return file.read() except FileNotFoundError: print("本地文件不存在,将尝试从网络获取数据") use_local = False if not use_local: try: response = requests.get(url) response.raise_for_status() print("成功从网络获取数据") return response.text except requests.RequestException as e: print(f"获取网页数据时出错: {e}") return None def main(): # 设置数据源 (True为本地文件,False为网络请求) use_local = False # 获取内容 content = get_content(use_local) if not content: print("无法获取数据,程序退出") return # 提取站点信息 stations = extract_station_info(content) # 确保_tmp目录存在 os.makedirs('_tmp', exist_ok=True) # 将结果保存为JSON文件 with open('_tmp/stations.json', 'w', encoding='utf-8') as f: json.dump(stations, f, ensure_ascii=False, indent=2) # 打印统计信息 print(f"已成功提取 {len(stations)} 个站点的信息") print("详细信息已保存到 _tmp/stations.json 文件中") # 打印一些数据统计 total_observations = sum(int(station['total_observations']) for station in stations) print(f"所有站点总观测数: {total_observations}") # 统计天线类型 antenna_types = {} for station in stations: for antenna in station['antennas']: antenna_type = antenna['type'] antenna_types[antenna_type] = antenna_types.get(antenna_type, 0) + 1 print("\n天线类型统计:") for antenna_type, count in sorted(antenna_types.items()): print(f"{antenna_type}: {count}个") # 统计频段分布 print("\n频段分布:") vhf_count = uhf_count = other_count = 0 for station in stations: for antenna in station['antennas']: if 'VHF' in antenna['type']: vhf_count += 1 if 'UHF' in antenna['type']: uhf_count += 1 if not ('VHF' in antenna['type'] or 'UHF' in antenna['type']): other_count += 1 print(f"VHF频段天线: {vhf_count}个") print(f"UHF频段天线: {uhf_count}个") print(f"其他频段天线: {other_count}个") # 打印海拔分布 altitudes = [] for station in stations: alt = station['location']['altitude'] if alt != 'N/A': altitudes.append(int(alt[:-1])) # 移除'm'并转换为整数 if altitudes: print(f"\n海拔统计:") print(f"最高海拔: {max(altitudes)}m") print(f"最低海拔: {min(altitudes)}m") print(f"平均海拔: {sum(altitudes) / len(altitudes):.1f}m") if __name__ == "__main__": main()