Files
chat-bot/extension/agent/learner.py
lychang 64ce30fdfd init
2025-08-26 09:35:29 +08:00

71 lines
2.2 KiB
Python

# -*- coding: UTF-8 -*-
from urllib.parse import quote
from core.model import get_chat_model
from function.web_tool import WebScraper
from extension.rag import RAGPipeline
# Initialize the agent components
coder_instance = get_chat_model("qwen2.5-coder:14b")
llm_instance = get_chat_model("deepseek-r1:14b")
rag_pipeline = RAGPipeline(table_name="learner_knowledge")
web_scraper = WebScraper()
def learn_from_web(urls):
result = web_scraper.search(urls)
rag_pipeline.insert_text_content(result['data'])
def query_agent(question):
response = rag_pipeline.query(question)
return response
def get_relevant_content(url: str, tasks: list[str]) -> list:
list_result = []
query_result = web_scraper.search(url)
list_result.append(query_result['data'])
for task in tasks:
llm_result = llm_instance.llm(f"{query_result['data']}\n{task}")
list_result.append(llm_result)
return list_result
# Example usage
if __name__ == "__main__":
topic = "fastgpt"
# bing 一级索引
bing_url = f"https://cn.bing.com/search?q={quote(topic)}"
task_1 = f"找到{topic}相关的知识的网址,只输出网址地址列表"
# task_2 = f"找到{topic}相关的知识的,输出整理好的内容,尽可能多的保留原文"
first_result, first_urls = get_relevant_content(bing_url, [task_1])
print(first_urls)
# 对一级索引的网址进行爬取,获取二级索引地址,获得的内容进行总结
web_urls = web_scraper.split_urls(first_urls)
print(len(web_urls))
second_urls = ""
for url in web_urls:
temp_result, temp_urls = get_relevant_content(url, [task_1])
print(first_urls)
rag_pipeline.insert_text_content(temp_result, url)
second_urls += temp_urls + "\n"
web_urls = web_scraper.split_urls(second_urls)
print(len(web_urls))
for url in web_urls:
temp_result = get_relevant_content(url, [])[0]
print(first_urls)
rag_pipeline.insert_text_content(temp_result, url)
# 第二级索引的网址获得的内容进行总结
question = f"{topic}的技术架构是什么?"
answer = query_agent(question)
print(answer)
print("Agent believes it has learned enough.")