From e60d75228fb161e464ca59fa2526bf0765f4d902 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期四, 22 五月 2025 12:35:55 +0800
Subject: [PATCH] 修改指令json生成,加入fastapi

---
 knowledgebase/doc/docx_split.py |   22 +++-------------------
 1 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/knowledgebase/doc/docx_split.py b/knowledgebase/doc/docx_split.py
index 22d189a..4270b05 100644
--- a/knowledgebase/doc/docx_split.py
+++ b/knowledgebase/doc/docx_split.py
@@ -27,12 +27,14 @@
 
     """
 
-    def __init__(self, docx_file: str):
+    def __init__(self, docx_file: str, docx_type: str):
         """
         docx鏂囨。鎷嗗垎
         :param docx_file: 瑕佹媶鍒嗙殑docx鏂囦欢璺緞
+        :param docx_type: 鏂囨。绫诲瀷
         """
         self.docx_file = docx_file
+        self.docx_type = docx_type
         self.image_to_text = ImageToText()
         self.paragraphs: list[ParagraphInfo] = []
         self.paragraph_tree: list[ParagraphInfo] = []
@@ -291,21 +293,3 @@
         # 鏇挎崲鍘熷鍒楄〃鍐呭锛岄伩鍏嶅娆� remove 鎿嶄綔
         self.paragraphs[:] = _paragraphs
         self.paragraph_tree = result
-
-
-if __name__ == '__main__':
-    docx_file = r'D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx'
-    # docx_file = r'D:\workspace\PythonProjects\KnowledgeBase\doc\table_test.docx'
-    doc_split = DocSplit(docx_file)
-    doc_split.split()
-    # er = EntityRecognition()
-    # db = Neo4jHelper()
-    # for trunk in doc_split.trunks:
-    #     print('娈佃惤鏂囨湰锛�')
-    #     print(trunk)
-    #     print('瀹炰綋璇嶏細')
-    #     print(er.run(trunk))
-    # entities = er.run(trunk)
-    # db.create_page_node()
-    print("\n".join([x.full_text_with_children for x in doc_split.paragraphs]))
-    print()

--
Gitblit v1.9.1