From acde3bd32f07bf02839a21e8fe5b4e69bfca2251 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 14 五月 2025 10:37:00 +0800 Subject: [PATCH] docx文档拆分,文档段落实体词提取,存入mysql数据库。 --- knowledgebase/doc/doc_convert.py | 14 -------------- 1 files changed, 0 insertions(+), 14 deletions(-) diff --git a/knowledgebase/doc/doc_convert.py b/knowledgebase/doc/doc_convert.py index dd9d7c5..db2bc32 100644 --- a/knowledgebase/doc/doc_convert.py +++ b/knowledgebase/doc/doc_convert.py @@ -69,17 +69,3 @@ print(f"鏂囦欢 {docx_file} 宸叉垚鍔熻浆鎹负 {pdf_file}锛�") except Exception as e: print(f"鍑虹幇閿欒: {e}") - - -def test(): - # doc_to_docx("D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.doc", - # "D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�111.docx") - # docx_to_pdf("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).docx", - # "D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf") - import pymupdf4llm - md_text = pymupdf4llm.to_markdown("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf") - print(md_text) - - -if __name__ == '__main__': - test() -- Gitblit v1.9.1