From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期一, 07 七月 2025 16:20:25 +0800 Subject: [PATCH] 生成数据库 --- knowledgebase/doc/entity_recognition.py | 41 +++++++++++++++++++++++++---------------- 1 files changed, 25 insertions(+), 16 deletions(-) diff --git a/knowledgebase/doc/entity_recognition.py b/knowledgebase/doc/entity_recognition.py index 6512bfe..8183a3d 100644 --- a/knowledgebase/doc/entity_recognition.py +++ b/knowledgebase/doc/entity_recognition.py @@ -5,13 +5,14 @@ # @version: 0.0.1 # @description: 瀹炰綋鎶藉彇锛屽皢鏂囨湰涓殑瀹炰綋杩涜璇嗗埆鍜屾彁鍙栥�� -from langchain_openai.chat_models import ChatOpenAI from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate from langchain_core.output_parsers import JsonOutputParser import json from knowledgebase import utils - +from knowledgebase.db.doc_db_helper import doc_dbh +from knowledgebase.log import Log +from knowledgebase.llm import llm class EntityRecognition: """ @@ -19,28 +20,35 @@ 浣跨敤langchain鏋勫缓瀹炰綋鎶藉彇娴佺▼銆� """ + use_cache = False cache_file = "entity_recognition.cache" - def __init__(self): - llm = ChatOpenAI(temperature=0, - model="qwen2.5-72b-instruct", - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", - api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") - msg = HumanMessagePromptTemplate.from_template(template=""" + def __init__(self, doc_type: str): + # 瀹炰綋璇嶅垪琛� + entities = doc_dbh.get_entities_by_doc_type(doc_type) + entity_list = '锛�'.join([entity.name for entity in entities]) + "銆�" + entity_rules = "锛沑n".join([f"- {entity.name}锛歿entity.prompts}" for entity in entities]) + "銆�" + tpl = """ # 鎸囦护 -璇蜂粠缁欏畾鐨勬枃鏈腑鎻愬彇瀹炰綋璇嶅垪琛ㄣ�� +璇锋牴鎹疄浣撹瘝鍒ゆ柇瑙勫垯浠庣粰瀹氱殑鏂囨湰涓垽鏂槸鍚︽湁涓嬪垪瀹炰綋璇嶇浉鍏冲唴瀹癸紝濡傛灉鏈夊垯杈撳嚭鐩稿叧鐨勫疄浣撹瘝锛屾病鏈夊垯涓嶈緭鍑猴紝瀹炰綋璇嶅垪琛ㄥ畾涔夊涓嬶細 +""" + entity_list + """ +## 瀹炰綋璇嶅垽鏂鍒欙細 +""" + entity_rules + """ # 绾︽潫 - 杈撳嚭鏍煎紡涓篔SON鏍煎紡锛� +- 鎻愬彇鐨勫疄浣撹瘝蹇呴』鏄細""" + entity_list + """锛� +- 濡傛灉娌℃湁绗﹀悎涓婅堪瑙勫垯鐨勫疄浣撹瘝鍒欎笉瑕佽緭鍑轰换浣曞疄浣撹瘝锛� - 杈撳嚭鏁版嵁缁撴瀯涓哄瓧绗︿覆鏁扮粍銆� # 绀轰緥 ```json -["瀹炰綋1","瀹炰綋2"] +[\"""" + entities[0].name + """\"] ``` # 鏂囨湰濡備笅锛� {text} """ - ) + Log.info(tpl) + msg = HumanMessagePromptTemplate.from_template(template=tpl) prompt = ChatPromptTemplate.from_messages([msg]) parser = JsonOutputParser(pydantic_object=list[str]) self.chain = prompt | llm | parser @@ -60,17 +68,18 @@ 淇濆瓨缂撳瓨銆� """ text = json.dumps(self.cache) - utils.save_to_file(text, self.cache_file) + utils.save_text_to_file(text, self.cache_file) - def run(self, in_text: str) -> list[str]: + async def run(self, in_text: str) -> list[str]: """ 杩愯瀹炰綋璇嗗埆鎶藉彇銆� + :param in_text: str - 杈撳叆鏂囨湰 """ # 缂撳瓨鍛戒腑 - text_md5 = utils.generate_md5(in_text) - if text_md5 in self.cache: + text_md5 = utils.generate_text_md5(in_text) + if self.use_cache and text_md5 in self.cache: return self.cache[text_md5] - result = self.chain.invoke({"text": in_text}) + result = await self.chain.ainvoke({"text": in_text}) self.cache[text_md5] = result self.save_cache() return result -- Gitblit v1.9.1