From acde3bd32f07bf02839a21e8fe5b4e69bfca2251 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 14 五月 2025 10:37:00 +0800 Subject: [PATCH] docx文档拆分,文档段落实体词提取,存入mysql数据库。 --- knowledgebase/utils.py | 13 ++++++++++++- 1 files changed, 12 insertions(+), 1 deletions(-) diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py index ab7d2d1..0314db2 100644 --- a/knowledgebase/utils.py +++ b/knowledgebase/utils.py @@ -15,7 +15,7 @@ return mask -def generate_md5(input_string): +def generate_text_md5(input_string): # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 md5_hash = hashlib.md5() @@ -27,6 +27,17 @@ return md5_digest +def generate_bytes_md5(input_bytes): + # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 + md5_hash = hashlib.md5() + + # 鏇存柊鍝堝笇瀵硅薄鍐呭 + md5_hash.update(input_bytes) + + # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず + md5_digest = md5_hash.hexdigest() + + return md5_digest def file_exists(cache_file: str): return os.path.exists(cache_file) -- Gitblit v1.9.1