import asyncio import os import sys from db_struct_flow import DbStructFlow, tc_data_generate from knowledgebase.db.doc_db_helper import doc_dbh from knowledgebase.doc.doc_processor import DocProcessor from knowledgebase.doc.entity_helper import init_entity_helper def doc_split(project_path): docs_path = f'{project_path}/docs' files = os.listdir(docs_path) files = [f'{docs_path}/{x}' for x in filter(lambda x: x.endswith('.docx'), files)] for file in files: DocProcessor(file).process() def main(): project_path = sys.argv[1] if not project_path: print("missing project path. eg: python main.py ") return # 拆分文档 doc_dbh.set_project_path(project_path) init_entity_helper() # doc_split(project_path) # 启动大模型处理流程 asyncio.run(DbStructFlow(project_path).run()) # 生成指令数据表 tc_data_generate() if __name__ == "__main__": main()