From aef16113f5ffc1f9cb841ad56129e9029b5768d6 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期三, 07 五月 2025 16:32:15 +0800
Subject: [PATCH] 生成指令单元并插入db,docx转pdf,pdf按页切分,按页提取实体词并保存到neo4j数据库。

---
 knowledgebase/utils.py |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py
index c785dfe..ab7d2d1 100644
--- a/knowledgebase/utils.py
+++ b/knowledgebase/utils.py
@@ -1,4 +1,8 @@
 import math
+import hashlib
+import os
+import json
+import re
 
 
 def get_bit_mask(start, end):
@@ -9,3 +13,49 @@
     for i in range(start, end + 1):
         mask |= 1 << (bits - i - 1)
     return mask
+
+
+def generate_md5(input_string):
+    # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄
+    md5_hash = hashlib.md5()
+
+    # 鏇存柊鍝堝笇瀵硅薄鐨勫唴瀹癸紙闇�瑕佸皢瀛楃涓茬紪鐮佷负瀛楄妭锛�
+    md5_hash.update(input_string.encode('utf-8'))
+
+    # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず
+    md5_digest = md5_hash.hexdigest()
+
+    return md5_digest
+
+
+def file_exists(cache_file: str):
+    return os.path.exists(cache_file)
+
+
+def read_from_file(cache_file: str) -> str:
+    with open(cache_file, 'r', encoding='utf-8') as f:
+        text = f.read()
+    return text
+
+
+def save_to_file(text, cache_file):
+    with open(cache_file, 'w', encoding='utf-8') as f:
+        f.write(text)
+
+
+def replace_tpl_paras(tpl_text: str, data: dict):
+    for key, val in data.items():
+        if not isinstance(val, str):
+            val = json.dumps(json.dumps(val, ensure_ascii=False), ensure_ascii=False)[1:-1]
+        tpl_text = tpl_text.replace('{{' + key + '}}', val)
+    return tpl_text
+
+
+def to_file_name(text: str):
+    """
+    灏嗘枃鏈浆涓哄悎娉曠殑鏂囦欢鍚嶇О銆�
+    灏嗙壒娈婂瓧绗︽浛鎹负_
+    :param text:
+    :return:
+    """
+    return re.sub(r'[\\/:*?"<>|]', '_', text)

--
Gitblit v1.9.1