From aef16113f5ffc1f9cb841ad56129e9029b5768d6 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 07 五月 2025 16:32:15 +0800 Subject: [PATCH] 生成指令单元并插入db,docx转pdf,pdf按页切分,按页提取实体词并保存到neo4j数据库。 --- knowledgebase/utils.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 50 insertions(+), 0 deletions(-) diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py index c785dfe..ab7d2d1 100644 --- a/knowledgebase/utils.py +++ b/knowledgebase/utils.py @@ -1,4 +1,8 @@ import math +import hashlib +import os +import json +import re def get_bit_mask(start, end): @@ -9,3 +13,49 @@ for i in range(start, end + 1): mask |= 1 << (bits - i - 1) return mask + + +def generate_md5(input_string): + # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 + md5_hash = hashlib.md5() + + # 鏇存柊鍝堝笇瀵硅薄鐨勫唴瀹癸紙闇�瑕佸皢瀛楃涓茬紪鐮佷负瀛楄妭锛� + md5_hash.update(input_string.encode('utf-8')) + + # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず + md5_digest = md5_hash.hexdigest() + + return md5_digest + + +def file_exists(cache_file: str): + return os.path.exists(cache_file) + + +def read_from_file(cache_file: str) -> str: + with open(cache_file, 'r', encoding='utf-8') as f: + text = f.read() + return text + + +def save_to_file(text, cache_file): + with open(cache_file, 'w', encoding='utf-8') as f: + f.write(text) + + +def replace_tpl_paras(tpl_text: str, data: dict): + for key, val in data.items(): + if not isinstance(val, str): + val = json.dumps(json.dumps(val, ensure_ascii=False), ensure_ascii=False)[1:-1] + tpl_text = tpl_text.replace('{{' + key + '}}', val) + return tpl_text + + +def to_file_name(text: str): + """ + 灏嗘枃鏈浆涓哄悎娉曠殑鏂囦欢鍚嶇О銆� + 灏嗙壒娈婂瓧绗︽浛鎹负_ + :param text: + :return: + """ + return re.sub(r'[\\/:*?"<>|]', '_', text) -- Gitblit v1.9.1