import asyncio
|
import os
|
import sys
|
|
from db_struct_flow import DbStructFlow, tc_data_generate
|
from knowledgebase.db.doc_db_helper import doc_dbh
|
from knowledgebase.doc.doc_processor import DocProcessor
|
from knowledgebase.doc.entity_helper import init_entity_helper
|
|
|
def doc_split(project_path):
|
docs_path = f'{project_path}/docs'
|
files = os.listdir(docs_path)
|
files = [f'{docs_path}/{x}' for x in filter(lambda x: x.endswith('.docx'), files)]
|
for file in files:
|
DocProcessor(file).process()
|
|
|
|
def main():
|
project_path = sys.argv[1]
|
if not project_path:
|
print("missing project path. eg: python main.py <path/to/project>")
|
return
|
# 拆分文档
|
doc_dbh.set_project_path(project_path)
|
init_entity_helper()
|
# doc_split(project_path)
|
# 启动大模型处理流程
|
asyncio.run(DbStructFlow(project_path).run())
|
# 生成指令数据表
|
tc_data_generate()
|
if __name__ == "__main__":
|
main()
|