From acde3bd32f07bf02839a21e8fe5b4e69bfca2251 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期三, 14 五月 2025 10:37:00 +0800
Subject: [PATCH] docx文档拆分,文档段落实体词提取,存入mysql数据库。

---
 knowledgebase/doc/doc_convert.py |   14 --------------
 1 files changed, 0 insertions(+), 14 deletions(-)

diff --git a/knowledgebase/doc/doc_convert.py b/knowledgebase/doc/doc_convert.py
index dd9d7c5..db2bc32 100644
--- a/knowledgebase/doc/doc_convert.py
+++ b/knowledgebase/doc/doc_convert.py
@@ -69,17 +69,3 @@
         print(f"鏂囦欢 {docx_file} 宸叉垚鍔熻浆鎹负 {pdf_file}锛�")
     except  Exception as e:
         print(f"鍑虹幇閿欒: {e}")
-
-
-def test():
-    # doc_to_docx("D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.doc",
-    #             "D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�111.docx")
-    # docx_to_pdf("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).docx",
-    #             "D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf")
-    import pymupdf4llm
-    md_text = pymupdf4llm.to_markdown("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf")
-    print(md_text)
-
-
-if __name__ == '__main__':
-    test()

--
Gitblit v1.9.1