demo

2025-01-13 17:21:40 +08:00
parent 456f56e40d
commit 320df7e2a5
5 changed files with 186 additions and 0 deletions
--- a/demo.py
+++ b/demo.py
@ -0,0 +1,44 @@
+from scrapegraphai.graphs import SmartScraperGraph
+
+graph_config = {
+    "llm": {
+        "model": "ollama/mistral-nemo:latest",
+        "temperature": 0,  # 更准确执行任务
+        "format": "json",  # Ollama 需要显式指定格式
+        # "base_url": "http://localhost:11434",  # 设置 Ollama URL
+        # LLM模型负责理解"List me all the projects with their descriptions"这个提示，并从网页内容中提取相关信息
+        # - LLM模型 (ollama/mistral-nemo:latest):
+        #   - 用于理解和处理自然语言
+        #   - 负责理解用户的提示(prompt)
+        #   - 分析网页内容并生成符合要求的响应
+        #   - 将信息组织成结构化的格式（这里是JSON格式）
+        #   - 主要处理理解和生成任务
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        # "base_url": "http://localhost:11434",  # 设置 Ollama URL
+        # Embeddings模型帮助系统理解网页中不同部分的语义关系，确保提取的项目和描述是相互对应的
+        # - Embeddings模型 (ollama/nomic-embed-text):
+        #   - 用于文本向量化
+        #   - 将文本转换为数值向量，便于计算相似度
+        #   - 帮助系统理解文本的语义关系
+        #   - 用于匹配相关内容和相似度计算
+        #   - 主要处理语义搜索和匹配任务
+    },
+    # - 这种双模型架构的好处是：
+    #   - 分工明确，每个模型专注于自己最擅长的任务
+    #   - 提高了系统的准确性和效率
+    #   - 能更好地理解网页内容的语义和结构
+    "verbose": True,
+    # "headless": False,
+}
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their descriptions",
+    # 也接受已下载的 HTML 代码的字符串
+    source="https://perinim.github.io/projects",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)