From acde3bd32f07bf02839a21e8fe5b4e69bfca2251 Mon Sep 17 00:00:00 2001
From: lyg <1543117173@qq.com>
Date: 星期三, 14 五月 2025 10:37:00 +0800
Subject: [PATCH] docx文档拆分,文档段落实体词提取,存入mysql数据库。

---
 knowledgebase/utils.py |   13 ++++++++++++-
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py
index ab7d2d1..0314db2 100644
--- a/knowledgebase/utils.py
+++ b/knowledgebase/utils.py
@@ -15,7 +15,7 @@
     return mask
 
 
-def generate_md5(input_string):
+def generate_text_md5(input_string):
     # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄
     md5_hash = hashlib.md5()
 
@@ -27,6 +27,17 @@
 
     return md5_digest
 
+def generate_bytes_md5(input_bytes):
+    # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄
+    md5_hash = hashlib.md5()
+
+    # 鏇存柊鍝堝笇瀵硅薄鍐呭
+    md5_hash.update(input_bytes)
+
+    # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず
+    md5_digest = md5_hash.hexdigest()
+
+    return md5_digest
 
 def file_exists(cache_file: str):
     return os.path.exists(cache_file)

--
Gitblit v1.9.1