KnowledgeBase.git

			@@ -27,12 +27,14 @@

			"""

			def __init__(self, docx_file: str):
			def __init__(self, docx_file: str, docx_type: str):
			"""
			docx文档拆分
			:param docx_file: 要拆分的docx文件路径
			:param docx_type: 文档类型
			"""
			self.docx_file = docx_file
			self.docx_type = docx_type
			self.image_to_text = ImageToText()
			self.paragraphs: list[ParagraphInfo] = []
			self.paragraph_tree: list[ParagraphInfo] = []
			@@ -291,21 +293,3 @@
			# 替换原始列表内容，避免多次 remove 操作
			self.paragraphs[:] = _paragraphs
			self.paragraph_tree = result


			if __name__ == '__main__':
			docx_file = r'D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D无人机1553B总线传输通信帧分配（公开）.docx'
			# docx_file = r'D:\workspace\PythonProjects\KnowledgeBase\doc\table_test.docx'
			doc_split = DocSplit(docx_file)
			doc_split.split()
			# er = EntityRecognition()
			# db = Neo4jHelper()
			# for trunk in doc_split.trunks:
			# print('段落文本：')
			# print(trunk)
			# print('实体词：')
			# print(er.run(trunk))
			# entities = er.run(trunk)
			# db.create_page_node()
			print("\n".join([x.full_text_with_children for x in doc_split.paragraphs]))
			print()