71 lines
2.2 KiB
Python
71 lines
2.2 KiB
Python
# -*- coding: UTF-8 -*-
|
|
from urllib.parse import quote
|
|
|
|
from core.model import get_chat_model
|
|
from function.web_tool import WebScraper
|
|
from extension.rag import RAGPipeline
|
|
|
|
# Initialize the agent components
|
|
coder_instance = get_chat_model("qwen2.5-coder:14b")
|
|
llm_instance = get_chat_model("deepseek-r1:14b")
|
|
|
|
rag_pipeline = RAGPipeline(table_name="learner_knowledge")
|
|
|
|
web_scraper = WebScraper()
|
|
|
|
|
|
def learn_from_web(urls):
|
|
result = web_scraper.search(urls)
|
|
rag_pipeline.insert_text_content(result['data'])
|
|
|
|
|
|
def query_agent(question):
|
|
response = rag_pipeline.query(question)
|
|
return response
|
|
|
|
|
|
def get_relevant_content(url: str, tasks: list[str]) -> list:
|
|
list_result = []
|
|
query_result = web_scraper.search(url)
|
|
list_result.append(query_result['data'])
|
|
for task in tasks:
|
|
llm_result = llm_instance.llm(f"{query_result['data']}\n{task}")
|
|
list_result.append(llm_result)
|
|
return list_result
|
|
|
|
|
|
# Example usage
|
|
if __name__ == "__main__":
|
|
topic = "fastgpt"
|
|
|
|
# bing 一级索引
|
|
bing_url = f"https://cn.bing.com/search?q={quote(topic)}"
|
|
task_1 = f"找到{topic}相关的知识的网址,只输出网址地址列表"
|
|
# task_2 = f"找到{topic}相关的知识的,输出整理好的内容,尽可能多的保留原文"
|
|
first_result, first_urls = get_relevant_content(bing_url, [task_1])
|
|
print(first_urls)
|
|
|
|
# 对一级索引的网址进行爬取,获取二级索引地址,获得的内容进行总结
|
|
web_urls = web_scraper.split_urls(first_urls)
|
|
print(len(web_urls))
|
|
second_urls = ""
|
|
for url in web_urls:
|
|
temp_result, temp_urls = get_relevant_content(url, [task_1])
|
|
print(first_urls)
|
|
rag_pipeline.insert_text_content(temp_result, url)
|
|
second_urls += temp_urls + "\n"
|
|
|
|
web_urls = web_scraper.split_urls(second_urls)
|
|
print(len(web_urls))
|
|
for url in web_urls:
|
|
temp_result = get_relevant_content(url, [])[0]
|
|
print(first_urls)
|
|
rag_pipeline.insert_text_content(temp_result, url)
|
|
|
|
# 第二级索引的网址获得的内容进行总结
|
|
|
|
question = f"{topic}的技术架构是什么?"
|
|
answer = query_agent(question)
|
|
print(answer)
|
|
print("Agent believes it has learned enough.")
|