| | |
| | | import json |
| | | |
| | | from knowledgebase import utils |
| | | from knowledgebase.doc.entity_helper import entity_helper |
| | | |
| | | llm = ChatOpenAI(temperature=0, |
| | | model="qwen2.5-72b-instruct", |
| | | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
| | | api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") |
| | | |
| | | |
| | | class EntityRecognition: |
| | |
| | | """ |
| | | cache_file = "entity_recognition.cache" |
| | | |
| | | def __init__(self): |
| | | llm = ChatOpenAI(temperature=0, |
| | | model="qwen2.5-72b-instruct", |
| | | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
| | | api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") |
| | | def __init__(self, doc_type: str): |
| | | # 实体词列表 |
| | | entities = filter(lambda x: x.doc_type == doc_type, entity_helper.entities) |
| | | entity_list = ';\n'.join([f'- {entity.name}:{entity.prompts}' for entity in entities]) + "。" |
| | | msg = HumanMessagePromptTemplate.from_template(template=""" |
| | | # 指令 |
| | | 请从给定的文本中提取实体词列表。 |
| | | 请从给定的文本中提取实体词列表,实体词列表定义如下: |
| | | ## 实体词列表及识别规则 |
| | | """ + entity_list + """ |
| | | # 约束 |
| | | - 输出格式为JSON格式; |
| | | - 提取的实体词必须是上面列举的实体词; |
| | | - 输出数据结构为字符串数组。 |
| | | # 示例 |
| | | ```json |
| | | ["实体1","实体2"] |
| | | ["遥控帧格式","遥控包格式"] |
| | | ``` |
| | | |
| | | # 文本如下: |
| | |
| | | def run(self, in_text: str) -> list[str]: |
| | | """ |
| | | 运行实体识别抽取。 |
| | | :param in_text: str - 输入文本 |
| | | """ |
| | | # 缓存命中 |
| | | text_md5 = utils.generate_md5(in_text) |
| | | text_md5 = utils.generate_text_md5(in_text) |
| | | if text_md5 in self.cache: |
| | | return self.cache[text_md5] |
| | | result = self.chain.invoke({"text": in_text}) |