# -*- coding: utf-8 -*-
|
#
|
# @author: lyg
|
# @date: 2025-5-12
|
# @version: 1
|
# @description: 文档数据库助手,mysql数据库
|
|
import json
|
|
from knowledgebase.db.doc_db_models import init_doc_db, TDoc, TEntity, TParagraph, TParagraphLink, TParagraphRefLink, \
|
TParagraphEntityLink
|
|
from knowledgebase.doc.models import ParagraphInfo, DocInfo
|
|
|
class DocDbHelper:
|
"""
|
文档数据库助手
|
"""
|
|
def __init__(self):
|
self.session = init_doc_db()
|
|
def add_doc(self, doc_info: DocInfo) -> int:
|
"""
|
添加文档
|
"""
|
_doc = TDoc(
|
file=doc_info.file,
|
file_name=doc_info.file_name,
|
is_del=0,
|
)
|
self.session.add(_doc)
|
self.session.commit()
|
return _doc.id
|
|
def add_paragraph(self, doc_id: int, parent_id: int, paragraph_info: ParagraphInfo) -> TParagraph:
|
"""
|
添加段落
|
:param doc_id: 文档id
|
:param parent_id: 父段落id
|
:param paragraph_info: 段落信息
|
"""
|
_paragraph = TParagraph(
|
doc_id=doc_id,
|
text=paragraph_info.text,
|
title_level=paragraph_info.title_level,
|
title_num=paragraph_info.title_num,
|
num=paragraph_info.num,
|
num_level=paragraph_info.num_level,
|
parent_id=parent_id,
|
is_del=0,
|
)
|
self.session.add(_paragraph)
|
self.session.commit()
|
if parent_id is not None:
|
paragraph_link = TParagraphLink(parent_id=parent_id, child_id=_paragraph.id)
|
self.add_paragraph_link(paragraph_link)
|
if paragraph_info.entities:
|
for entity in paragraph_info.entities:
|
self.add_paragraph_entity_link(TParagraphEntityLink(paragraph_id=_paragraph.id, entity_id=entity.id))
|
if paragraph_info.children:
|
for child in paragraph_info.children:
|
self.add_paragraph(doc_id, _paragraph.id, child)
|
return _paragraph
|
|
def add_paragraph_link(self, paragraph_link):
|
"""
|
添加段落关系
|
:param paragraph_link: 段落关系
|
"""
|
self.session.add(paragraph_link)
|
self.session.commit()
|
return paragraph_link.id
|
|
def add_paragraph_entity_link(self, paragraph_entity_link):
|
"""
|
添加段落实体关系
|
:param paragraph_entity_link: 段落实体关系
|
"""
|
self.session.add(paragraph_entity_link)
|
self.session.commit()
|
return paragraph_entity_link.id
|
|
def add_entity(self, entity):
|
"""
|
添加实体
|
:param entity: 实体
|
"""
|
self.session.add(entity)
|
self.session.commit()
|
return entity.id
|
|
def add_paragraph_ref_link(self, paragraph_ref_link):
|
"""
|
添加段落引用关系
|
:param paragraph_ref_link: 段落引用关系
|
"""
|
self.session.add(paragraph_ref_link)
|
self.session.commit()
|
return paragraph_ref_link
|
|
def get_all_entities(self) -> list[TEntity]:
|
return self.session.query(TEntity).all()
|
|
def get_docs(self) -> list[TDoc]:
|
return self.session.query(TDoc).all()
|
|
def get_text_with_entities(self, entity_names: list[str]) -> str:
|
"""
|
根据实体词获取文本内容
|
:param entity_names: list[str] - 实体词
|
:return: str - 文本
|
"""
|
if not entity_names:
|
return ""
|
|
return '\n'.join([entity.name for entity in self.get_all_entities() if entity.name in entity_names])
|
|
def commit(self):
|
self.session.commit()
|
|
|
doc_dbh = DocDbHelper()
|
|
# if __name__ == '__main__':
|
# doc_db = DocDbHelper()
|
# # doc_db.insert_entities()
|
# doc = doc_db.add_doc(DocInfo(file='aaa', file_name='test'))
|
# p1 = doc_db.add_paragraph(doc.id, None, ParagraphInfo(text='test1', title_level=1, num=1, num_level=1))
|
# p2 = doc_db.add_paragraph(doc.id, p1.id, ParagraphInfo(text='test2', title_level=2, num=1, num_level=2))
|
# p3 = doc_db.add_paragraph(doc.id, p2.id, ParagraphInfo(text='test3', title_level=3, num=1, num_level=3))
|
# doc_db.add_paragraph_ref_link(TParagraphRefLink(parent_id=p1.id, child_id=p3.id))
|