import math import os from knowledgebase.markitdown import MarkItDown from doc_to_docx import doc_to_docx def process_docs(directory): # 遍历目录下的所有文件 for filename in os.listdir(directory): # 判断是否为 doc 文件 if filename.endswith(".doc"): # 转换为 docx doc_to_docx(directory + filename, directory + filename.replace(".doc", ".docx")) md = MarkItDown() def to_markdown(dst_dir: str): text = '' # 遍历文件夹下的所有文件 for file in os.listdir(dst_dir): # 判断是否为 docx 文件 if file.endswith(".docx"): # 转换为 md result = md.convert(dst_dir + file) text = result.text_content out_file = dst_dir + file + '.md' with open(out_file, 'w', encoding='utf-8') as f: f.write(text) return out_file # 1.解析文档 # 2.输入文档 # 3.启动LangFlow def main(): doc_dir = ".\\doc\\" # 处理文档 # process_docs(doc_dir) # 文档转换为markdown md_file = to_markdown(doc_dir) md_file = 'D:\\workspace\\PythonProjects\\KnowledgeBase\\doc\\test.md' # 启动大模型处理流程 # ret_text = LangFlow([md_file]).run() # 保存结果 # with open('D:\\workspace\\PythonProjects\\KnowledgeBase\\doc\\test.text', 'w', encoding='utf-8') as f: # f.write(ret_text) def get_bit_mask(start, end): bits = math.ceil((end + 1) / 8) * 8 if bits == 0: bits = 8 mask = 0 for i in range(start, end + 1): mask |= 1 << (bits - i - 1) return mask if __name__ == '__main__': main()