From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期一, 07 七月 2025 16:20:25 +0800 Subject: [PATCH] 生成数据库 --- testcases/test_doc_processor.py | 33 +++++++++++++++++++++++---------- 1 files changed, 23 insertions(+), 10 deletions(-) diff --git a/testcases/test_doc_processor.py b/testcases/test_doc_processor.py index ecad70a..203ab7a 100644 --- a/testcases/test_doc_processor.py +++ b/testcases/test_doc_processor.py @@ -10,20 +10,33 @@ def test_process(): files = [ - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx", - # r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈烘寚浠ゆ牸寮忎笌缂栫爜瀹氫箟锛堝叕寮�锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx" + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx", + # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx", + + # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx", + # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\閫氱敤-鏁扮鍒嗙郴缁熸寚浠ゆ牸寮忎笌缂栫爜瀹氫箟鍙婁娇鐢ㄥ噯鍒欙紝缂栧啓涓璙4锛�20240119(鍐呴儴).docx" + + r"D:\projects\KnowledgeBase\doc_xx25\鍗槦閬ユ祴澶х翰Z250226锛堢瀵嗏槄10骞达級 - 鍓湰.docx" + + # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡.docx" ] for file in files: doc_processor = DocProcessor(file) doc_processor.process() + + def test_get_text_by_entity(): - text = doc_dbh.get_text_with_entities(['鍒嗙郴缁熸簮鍖�']) + text = doc_dbh.get_text_with_entities(['閬ユ祴婧愬寘涓嬩紶鏃舵満']) print(text) + + if __name__ == '__main__': - # test_process() - test_get_text_by_entity() \ No newline at end of file + test_process() + # test_get_text_by_entity() -- Gitblit v1.9.1