43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
from scrapegraphai.graphs import SmartScraperGraph
|
|
|
|
graph_config = {
|
|
"llm": {
|
|
"model": "ollama/mistral-nemo:12b",
|
|
"model_tokens": 1024000,
|
|
|
|
# "model": "ollama/mistral:7b",
|
|
# "model_tokens": 32768,
|
|
|
|
# "model": "ollama/llama3.2:3b",
|
|
# "model_tokens": 131072, # ollama show llama3.2:latest
|
|
|
|
# "model": "ollama/qwen2:7b",
|
|
# "model_tokens": 32768, # ollama show qwen2:7b
|
|
|
|
"temperature": 0, # 更准确执行任务
|
|
"format": "json", # Ollama 需要显式指定格式
|
|
},
|
|
"embeddings": {
|
|
"model": "ollama/nomic-embed-text",
|
|
},
|
|
"verbose": True,
|
|
# "headless": False,
|
|
}
|
|
|
|
# smart_scraper_graph = SmartScraperGraph(
|
|
# # prompt="List all article titles on the page",
|
|
# prompt="列出页面上的所有文章标题",
|
|
# source="https://www.aivi.fyi/",
|
|
# config=graph_config
|
|
# )
|
|
|
|
smart_scraper_graph = SmartScraperGraph(
|
|
prompt="List all Stations on the page. the data is in tbody",
|
|
source="https://network.satnogs.org/stations/",
|
|
config=graph_config
|
|
)
|
|
|
|
# Run the pipeline
|
|
result = smart_scraper_graph.run()
|
|
print(result)
|