From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期一, 07 七月 2025 16:20:25 +0800 Subject: [PATCH] 生成数据库 --- knowledgebase/utils.py | 40 ++++++++++++++++++++++++++++++++++------ 1 files changed, 34 insertions(+), 6 deletions(-) diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py index ab7d2d1..ae9fe1f 100644 --- a/knowledgebase/utils.py +++ b/knowledgebase/utils.py @@ -3,6 +3,7 @@ import os import json import re +import asyncio def get_bit_mask(start, end): @@ -15,7 +16,7 @@ return mask -def generate_md5(input_string): +def generate_text_md5(input_string): # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 md5_hash = hashlib.md5() @@ -28,19 +29,46 @@ return md5_digest +def generate_bytes_md5(input_bytes): + # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 + md5_hash = hashlib.md5() + + # 鏇存柊鍝堝笇瀵硅薄鍐呭 + md5_hash.update(input_bytes) + + # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず + md5_digest = md5_hash.hexdigest() + + return md5_digest + + +async def get_md5_async(file: bytes): + md5 = await asyncio.to_thread(generate_bytes_md5, file) + return md5 + + +async def save_to_file_async(data: bytes, file_path: str): + await asyncio.to_thread(save_bytes_to_file, data, file_path) + + def file_exists(cache_file: str): return os.path.exists(cache_file) -def read_from_file(cache_file: str) -> str: - with open(cache_file, 'r', encoding='utf-8') as f: +def read_from_file(file: str) -> str: + with open(file, 'r', encoding='utf-8') as f: text = f.read() return text -def save_to_file(text, cache_file): - with open(cache_file, 'w', encoding='utf-8') as f: +def save_text_to_file(text: str, file: str): + with open(file, 'w', encoding='utf-8') as f: f.write(text) + + +def save_bytes_to_file(bytes_data: bytes, file: str): + with open(file, 'wb') as f: + f.write(bytes_data) def replace_tpl_paras(tpl_text: str, data: dict): @@ -58,4 +86,4 @@ :param text: :return: """ - return re.sub(r'[\\/:*?"<>|]', '_', text) + return re.sub(r'[\t\\/:*?"<>|]', '_', text) -- Gitblit v1.9.1