From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期一, 07 七月 2025 16:20:25 +0800
Subject: [PATCH] 生成数据库

---
 testcases/test_doc_processor.py |   33 +++++++++++++++++++++++----------
 1 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/testcases/test_doc_processor.py b/testcases/test_doc_processor.py
index ecad70a..203ab7a 100644
--- a/testcases/test_doc_processor.py
+++ b/testcases/test_doc_processor.py
@@ -10,20 +10,33 @@
 
 def test_process():
     files = [
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx",
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx",
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx",
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx",
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx",
-        # r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈烘寚浠ゆ牸寮忎笌缂栫爜瀹氫箟锛堝叕寮�锛�.docx",
-        r"D:\workspace\PythonProjects\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx"
+        # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx",
+        # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx",
+        # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx",
+        # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx",
+        # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx",
+        # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx",
+
+        # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx",
+        # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx",
+        # r"D:\KnowledgeBase\doc\HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx",
+        # r"D:\KnowledgeBase\doc\HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx",
+        # r"D:\KnowledgeBase\doc\閫氱敤-鏁扮鍒嗙郴缁熸寚浠ゆ牸寮忎笌缂栫爜瀹氫箟鍙婁娇鐢ㄥ噯鍒欙紝缂栧啓涓璙4锛�20240119(鍐呴儴).docx"
+
+        r"D:\projects\KnowledgeBase\doc_xx25\鍗槦閬ユ祴澶х翰Z250226锛堢瀵嗏槄10骞达級 - 鍓湰.docx"
+
+        # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡.docx"
     ]
     for file in files:
         doc_processor = DocProcessor(file)
         doc_processor.process()
+
+
 def test_get_text_by_entity():
-    text = doc_dbh.get_text_with_entities(['鍒嗙郴缁熸簮鍖�'])
+    text = doc_dbh.get_text_with_entities(['閬ユ祴婧愬寘涓嬩紶鏃舵満'])
     print(text)
+
+
 if __name__ == '__main__':
-    # test_process()
-    test_get_text_by_entity()
\ No newline at end of file
+    test_process()
+    # test_get_text_by_entity()

--
Gitblit v1.9.1