From aef16113f5ffc1f9cb841ad56129e9029b5768d6 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期三, 07 五月 2025 16:32:15 +0800 Subject: [PATCH] 生成指令单元并插入db,docx转pdf,pdf按页切分,按页提取实体词并保存到neo4j数据库。 --- knowledgebase/doc/__init__.py | 0 knowledgebase/doc/doc_processor.py | 65 ++ /dev/null | 86 --- db_struct_flow.py | 829 ++++++++++++++++++++++++---------- knowledgebase/db/neo4j.py | 57 ++ knowledgebase/doc/doc_split.py | 137 +++++ knowledgebase/utils.py | 50 ++ main.py | 6 knowledgebase/db/models.py | 2 knowledgebase/doc/entity_recognition.py | 76 +++ knowledgebase/db/db_helper.py | 8 knowledgebase/doc/doc_convert.py | 85 +++ 12 files changed, 1,059 insertions(+), 342 deletions(-) diff --git a/db/db_generate.py b/db/db_generate.py deleted file mode 100644 index 80dbbac..0000000 --- a/db/db_generate.py +++ /dev/null @@ -1,86 +0,0 @@ -import uuid -from datetime import datetime - -from sqlalchemy.orm import sessionmaker, scoped_session - -from db.models import engine, TProject, TDevice, TDataStream, TDevStream - -from hashlib import md5 - -# 鍒涘缓涓�涓細璇濆伐鍘� -session_factory = sessionmaker(bind=engine) -# 鍒涘缓涓�涓細璇濆璞� -Session = scoped_session(session_factory) -session = Session() - - -def get_pk(): - n = uuid.uuid4().hex - pk = md5(n.encode('utf-8')).hexdigest() - return pk - - -def create_project(sat_id, sat_name, proj_code, proj_name, desc, date_time, ) -> TProject: - """ - 鍒涘缓project - :param sat_id: - :param sat_name: - :param proj_code: - :param proj_name: - :param desc: - :param date_time: - :return: 鍒涘缓瀹屾垚鐨刾roject - """ - project = TProject(C_PROJECT_PK=get_pk(), C_SAT_ID=sat_id, C_SAT_NAME=sat_name, C_PROJECT_CODE=proj_code, - C_DESCRIPTION=desc, C_HASH=uuid.uuid4().int & 0xffffffff, C_PROJECT_NAME=proj_name, - C_DATETIME=date_time, - C_CREATEOR='') - session.add(project) - session.commit() - return project - - -def create_device(device_id, device_name, device_type, dll, project_pk): - """ - 鍒涘缓device - :param device_id: - :param device_name: - :param device_type: - :param dll: - :param project_pk: - :return: - """ - device = TDevice(C_DEV_PK=get_pk(), C_DEV_ID=device_id, C_DEV_NAME=device_name, C_DEV_TYPE=device_type, C_DLL=dll, - C_PROJECT_PK=project_pk) - session.add(device) - session.commit() - return device - - -def create_data_stream(proj_pk, dev_pk, name, code, data_ty, direct, rule_id, rule_ty): - """ - 鍒涘缓data_stream - :param proj_pk: - :param dev_pk: - :param name: - :param code: - :param data_ty: - :param direct: - :param rule_id: - :param rule_ty: - :return: - """ - ds = TDataStream(C_STREAM_PK=get_pk(), - C_PROJECT_PK=proj_pk, - C_STREAM_ID=code, - C_DATA_TYPE=data_ty, - C_STREAM_DIR=direct, - C_NAME=name, - C_DESCRIPTION='', - C_RULE_ID=rule_id, - C_RULE_TYPE=rule_ty) - session.add(ds) - link = TDevStream(C_PK=get_pk(), C_DEV_PK=dev_pk, C_STREAM_PK=ds.C_STREAM_PK, C_PROJECT_PK=proj_pk) - session.add(link) - session.commit() - return ds diff --git a/db_struct_flow.py b/db_struct_flow.py index bf2817a..286a659 100644 --- a/db_struct_flow.py +++ b/db_struct_flow.py @@ -6,50 +6,64 @@ import re import json +from langchain_community.chat_models import ChatOpenAI +from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate + import data_templates +from knowledgebase import utils from knowledgebase.db.db_helper import create_project, create_device, create_data_stream, \ update_rule_enc, create_extend_info, create_ref_ds_rule_stream, create_ins_format from knowledgebase.db.data_creator import create_prop_enc, create_enc_pkt, get_data_ty, create_any_pkt from knowledgebase.db.models import TProject -file_map = { - "鏂囨。鍚堝苟": "./doc/鏂囨。鍚堝苟.md", - "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/XA-5D鏃犱汉鏈哄垎绯荤粺鎺㈡祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.md", - "閬ユ祴澶х翰": "./doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.md", - "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/XA-5D鏃犱汉鏈�1314A鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.md", - "搴旂敤杞欢鐢ㄦ埛闇�姹�": "./doc/XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx.md", - "鎸囦护鏍煎紡": "./doc/ZL鏍煎紡(鍏紑).docx.md" -} +# file_map = { +# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx.md", +# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/鏁扮鏁板瓧閲忓揩閫熸簮鍖�.md", +# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/鏁扮鏁板瓧閲忎腑閫熸簮鍖�.md", +# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/纭�氶亾璁惧宸ヤ綔鐘舵�佹暟鎹寘.md", +# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/DIU閬ユ祴妯″潡閲囬泦鐨凞S閲�4.md", +# "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/DIU閬ユ祴妯″潡妯℃嫙閲�.md", +# "閬ユ祴澶х翰": "./doc/HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx.md", +# # "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx.md", +# "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/鎬荤嚎.md", +# "搴旂敤杞欢鐢ㄦ埛闇�姹�": "./doc/HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx.md" +# } # file_map = { # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./docs/HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx.md", # "閬ユ祴澶х翰": "./docs/HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx.md", # "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./docs/HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx.md", # "搴旂敤杞欢鐢ㄦ埛闇�姹�": "./docs/HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx.md" # } -# file_map = { -# "鏂囨。鍚堝苟": "./doc/鏂囨。鍚堝苟.md", -# "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/XA-5D鏃犱汉鏈哄垎绯荤粺鎺㈡祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.md", -# "閬ユ祴澶х翰": "./doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.md", -# "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/XA-5D鏃犱汉鏈�1314A鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.md" -# } +file_map = { + "鏂囨。鍚堝苟": "./doc/鏂囨。鍚堝苟.md", + "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/XA-5D鏃犱汉鏈哄垎绯荤粺鎺㈡祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.md", + "閬ユ祴澶х翰": "./doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.md", + "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/XA-5D鏃犱汉鏈�1314A鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.md", + "鎸囦护鏍煎紡": "./doc/ZL鏍煎紡(鍏紑).docx.md" +} BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1' API_KEY = 'sk-15ecf7e273ad4b729c7f7f42b542749e' -MODEL_NAME = 'qwen2.5-14b-instruct-1m' +MODEL_NAME = 'qwen2.5-72b-instruct' # BASE_URL = 'http://10.74.15.164:11434/v1/' # API_KEY = 'ollama' # MODEL_NAME = 'qwen2.5:32b-128k' -# BASE_URL = 'http://10.74.15.164:1001/api' -# API_KEY = 'sk-a909385bc14d4491a718b6ee264c3227' -# MODEL_NAME = 'qwen2.5:32b-128k' +# BASE_URL = 'http://chat.com/api' +# API_KEY = 'sk-49457e83f734475cb4cf7066c649d563' +# MODEL_NAME = 'qwen2.5:72b-120k' + +# BASE_URL = 'http://10.74.15.171:8000/v1' +# API_KEY = 'EMPTY' +# MODEL_NAME = 'QwQ:32b' +# MODEL_NAME = 'vllm-Qwen-72b-4bit' USE_CACHE = True assistant_msg = """ # 瑙掕壊 -浣犳槸涓�涓笓涓氱殑鏂囨。閫氫俊鍒嗘瀽甯堬紝鎿呴暱杩涜鏂囨。鍒嗘瀽鍜岄�氫俊鍗忚鍒嗘瀽锛屽悓鏃惰兘澶熻В鏋� markdown 绫诲瀷鐨勬枃妗c�傛嫢鏈夋垚鐔熷噯纭殑鏂囨。闃呰涓庡垎鏋愯兘鍔涳紝鑳藉濡ュ杽澶勭悊澶氭枃妗i棿瀛樺湪寮曠敤鍏崇郴鐨勫鏉傛儏鍐点�� +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堬紝鎿呴暱杩涜鏂囨。鍒嗘瀽鍜岄�氫俊鍗忚鍒嗘瀽锛屽悓鏃惰兘澶熻В鏋� markdown 绫诲瀷鐨勬枃妗c�傛嫢鏈夋垚鐔熷噯纭殑鏂囨。闃呰涓庡垎鏋愯兘鍔涳紝鑳藉濡ュ杽澶勭悊澶氭枃妗i棿瀛樺湪寮曠敤鍏崇郴鐨勫鏉傛儏鍐点�� ## 鎶�鑳� ### 鎶�鑳� 1锛氭枃妗e垎鏋愶紙鍖呮嫭 markdown 鏂囨。锛� @@ -91,6 +105,7 @@ 10. 浠� JSON 鏍煎紡缁勭粐杈撳嚭鍐呭锛岀‘淇濇暟鎹粨鏋勭殑瀹屾暣鎬у拰鍙鎬э紝娉ㄦ剰锛氱敓鎴愮殑JSON璇硶鏍煎紡蹇呴』绗﹀悎json瑙勮寖锛岄伩鍏嶅嚭鐜伴敊璇�� ## 闄愬埗锛� +- id鍜宑ode鐨勫懡鍚嶈鍒欙細鑻辨枃瀛楁瘝銆佹暟瀛椼�佷笅鍒掔嚎缁勬垚锛屼笖浠ヨ嫳鏂囧瓧姣嶆垨涓嬪垝绾垮紑澶淬�� - 鎵�杈撳嚭鐨勫唴瀹瑰繀椤绘寜鐓SON鏍煎紡杩涜缁勭粐锛屼笉鑳藉亸绂绘鏋惰姹傦紝涓斾弗鏍奸伒寰枃妗e唴瀹硅繘琛岃緭鍑猴紝鍙緭鍑� JSON 锛屼笉瑕佽緭鍑哄叾瀹冩枃瀛椼�� - 涓嶈緭鍑轰换浣曟敞閲婄瓑鎻忚堪鎬т俊鎭�� """ @@ -110,10 +125,16 @@ f.write(text) +def remove_think_tag(text): + pattern = r'<think>(.|\n)*?</think>' + result = re.sub(pattern, '', text) + return result + + json_pat = re.compile(r'```json(.*?)```', re.DOTALL) -def remove_markdown(text): +def get_json_text(text): # 浣跨敤姝e垯琛ㄨ揪寮忔彁鍙杍son鏂囨湰 try: return json_pat.findall(text)[0] @@ -310,6 +331,7 @@ # api_key="ollama", # base_url="http://192.168.1.48:11434/v1/", ) + # self.llm = ChatOpenAI(model=MODEL_NAME, temperature=0, api_key=API_KEY, base_url=BASE_URL) def run(self): # 鐢熸垚鍨嬪彿缁撴瀯 @@ -319,9 +341,9 @@ # 鐢熸垚閬ユ祴鏁版嵁鍖呯粨鏋� self.proj = self.gen_project() - devs = self.gen_device(self.proj) + # devs = self.gen_device(self.proj) - # self.gen_tc() + self.gen_tc() return '' def _gen(self, msgs, msg, files=None): @@ -341,10 +363,10 @@ model=MODEL_NAME, messages=messages, stream=True, - temperature=0.0, - top_p=0, + temperature=0.6, + # top_p=0, timeout=30 * 60000, - max_completion_tokens=1000000, + max_completion_tokens=32000, seed=0 # stream_options={"include_usage": True} ) @@ -358,7 +380,7 @@ g_completion = None return text - def generate_text(self, msg, cache_file, msgs=None, files=None, validation=None, try_cnt=5): + def generate_text(self, msg, cache_file, msgs=None, files=None, validation=None, try_cnt=5, json_text=False): if msgs is None: msgs = [] if USE_CACHE and os.path.isfile(cache_file): @@ -366,7 +388,9 @@ else: s = time.time() text = self._gen(msgs, msg, files) - text = remove_markdown(text) + text = remove_think_tag(text) + if json_text: + text = get_json_text(text) if validation: try: validation(text) @@ -374,10 +398,14 @@ print(e) if try_cnt <= 0: raise RuntimeError('鐢熸垚澶辫触锛岄噸璇曟鏁板お澶氾紝寮哄埗缁撴潫锛�') - return self.generate_text(msg, cache_file, msgs, files, validation, try_cnt - 1) - save_to_file(text, cache_file) + return self.generate_text_json(msg, cache_file, msgs, files, validation, try_cnt - 1) + if cache_file: + save_to_file(text, cache_file) print(f'鑰楁椂锛歿time.time() - s}') return text + + def generate_text_json(self, msg, cache_file, msgs=None, files=None, validation=None, try_cnt=5): + return self.generate_text(msg, cache_file, msgs, files, validation, try_cnt, True) def generate_tc_text(self, msg, cache_file, messages=None, files=None, validation=None, try_cnt=5): if messages is None: @@ -388,7 +416,7 @@ if len(messages) == 0: # 濡傛灉鏄涓�娆℃彁闂姞鍏ystem娑堟伅 messages.append({'role': 'user', 'content': "浠ヤ笅鏄枃妗e唴瀹癸細\n" + doc_text}) - return self.generate_text(msg, cache_file, messages, files, validation, try_cnt) + return self.generate_text_json(msg, cache_file, messages, files, validation, try_cnt) def gen_project(self): # _msg = """ @@ -424,13 +452,41 @@ proj_pk = proj.C_PROJECT_PK devices = [] - _msg = f""" -杈撳嚭鍒嗙郴缁熶笅鐨勭‖浠朵骇鍝侊紙璁惧锛夊垪琛紝瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(code)锛岀‖浠朵骇鍝佸悕绉颁竴鑸細鍖呭惈鈥滅鐞嗗崟鍏冣�濇垨鑰呪�滄帴鍙e崟鍏冣�濓紝濡傛灉娌℃湁浠e彿鍒欎娇鐢ㄥ悕绉扮殑鑻辨枃缂╁啓浠f浛缂╁啓闀垮害涓嶈秴杩�5涓瓧绗�; -骞朵笖缁欐瘡涓‖浠朵骇鍝佸鍔犱笁涓瓧娈碉細绗竴涓瓧娈礹asTcTm鈥滄槸鍚﹀寘鍚仴鎺ч仴娴嬧�濓紝鍒ゆ柇璇ョ‖浠朵骇鍝佹槸鍚﹀寘鍚仴鎺ч仴娴嬬殑鍔熻兘銆� -绗簩涓瓧娈礹asTemperatureAnalog鈥滄槸鍚﹀寘鍚俯搴﹂噺銆佹ā鎷熼噺绛夋暟鎹殑閲囬泦鈥濓紝鍒ゆ柇璇ョ‖浠朵骇鍝佹槸鍚﹀寘鍚俯搴﹂噺绛変俊鎭殑閲囬泦鍔熻兘銆� -绗笁涓瓧娈礹asBus鈥滄槸鍚︽槸鎬荤嚎纭欢浜у搧鈥濓紝鍒ゆ柇璇ヨ澶囨槸鍚﹀睘浜庢�荤嚎纭欢浜у搧锛屾槸鍚︽湁RT鍦板潃锛涙瘡涓瓧娈电殑鍊奸兘浣跨敤true鎴杅alse鏉ヨ〃绀恒�� -浠呰緭鍑篔SON锛岀粨鏋勬渶澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓鸿澶囦俊鎭紝涓嶈杈撳嚭JSON浠ュ鐨勪换浣曞瓧绗︺�� - """ + _msg = """ +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� +# 鎸囦护 +鎴戦渶瑕佷粠鏂囨。鎻愬彇璁惧鍒楄〃淇℃伅锛屼綘瑕佸府鍔╂垜瀹屾垚璁惧鍒楄〃淇℃伅鎻愬彇銆� +# 闇�姹� +杈撳嚭鍒嗙郴缁熶笅鐨勭‖浠朵骇鍝侊紙璁惧锛夊垪琛紝纭欢浜у搧鍚嶇О涓�鑸細鍖呭惈鈥滅鐞嗗崟鍏冣�濇垨鑰呪�滄帴鍙e崟鍏冣�濓紱 +# 瀛楁鍖呮嫭锛� +- 鍚嶇О(name)锛氳澶囧悕绉帮紱 +- 浠e彿(code)锛氳澶囦唬鍙凤紱 +- 鏄惁鍖呭惈閬ユ帶閬ユ祴(hasTcTm)锛氭爣璇嗚纭欢浜у搧鏄惁鍖呭惈閬ユ帶閬ユ祴鐨勫姛鑳斤紝甯冨皵鍊紅rue鎴杅alse锛� +- 鏄惁鍖呭惈娓╁害閲忔ā鎷熼噺绛夋暟鎹殑閲囬泦(hasTemperatureAnalog)锛氭爣璇嗚纭欢浜у搧鏄惁鍖呭惈娓╁害閲忕瓑淇℃伅鐨勯噰闆嗗姛鑳斤紝甯冨皵鍊紅rue鎴杅alse锛� +- 鏄惁鏈夋�荤嚎纭欢浜у搧(hasBus)锛氭爣璇嗚璁惧鏄惁灞炰簬鎬荤嚎纭欢浜у搧锛屾槸鍚︽湁RT鍦板潃锛屽竷灏斿�紅rue鎴杅alse锛� +# 绾︽潫 +- 濡傛灉娌℃湁浠e彿鍒欎娇鐢ㄥ悕绉扮殑鑻辨枃缂╁啓浠f浛缂╁啓闀垮害涓嶈秴杩�5涓瓧绗︼紱 +- 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓鸿澶囦俊鎭� +- 浠呰緭鍑篔SON锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曞瓧绗︺�� +# 渚嬪瓙 +[ + { + "name": "绯荤粺绠$悊鍗曞厓", + "code": "SMU", + "hasTcTm": true, + "hasTemperatureAnalog": false, + "hasBus": true + }, + { + "name": "1553B鎬荤嚎", + "code": "1553", + "hasTcTm": true, + "hasTemperatureAnalog": true, + "hasBus": true + } +] +""" print('璁惧鍒楄〃锛�') cache_file = 'out/璁惧鍒楄〃.json' @@ -439,7 +495,7 @@ assert isinstance(_devs, list), '鏁版嵁缁撴瀯鏈�澶栧眰涓嶆槸鏁扮粍' assert next(filter(lambda it: it['name'].endswith('绠$悊鍗曞厓'), _devs), None), '鐢熸垚鐨勮澶囧垪琛ㄤ腑娌℃湁绠$悊鍗曞厓' - text = self.generate_text(_msg, cache_file, files=[file_map['搴旂敤杞欢鐢ㄦ埛闇�姹�']], validation=validation) + text = self.generate_text_json(_msg, cache_file, files=[file_map['搴旂敤杞欢鐢ㄦ埛闇�姹�']], validation=validation) devs = json.loads(text) # 绫籗MU璁惧锛屽寘鍚仴娴嬪拰閬ユ帶鍔熻兘锛屽悕绉扮粨灏句负鈥滅鐞嗗崟鍏冣�� @@ -485,10 +541,30 @@ def gen_insert_domain_params(self): _msg = """ -鍒嗘瀽鏂囨。锛岃緭鍑烘彃鍏ュ煙鐨勫弬鏁板垪琛紝灏嗘墍鏈夊弬鏁板叏閮ㄨ緭鍑猴紝涓嶈鏈夐仐婕忋�� -鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓哄弬鏁颁俊鎭璞★紝鍙傛暟淇℃伅瀛楁鍖呮嫭锛歯ame銆乮d銆乸os銆乴ength銆乼ype銆� -1涓瓧鑺傜殑闀垮害涓�8浣嶏紝浣跨敤B0-B7鏉ヨ〃绀猴紝璇疯鐪熻绠楀弬鏁伴暱搴︺�� -鏂囨。涓綅缃弿杩颁俊鎭彲鑳藉瓨鍦ㄨ法瀛楄妭鐨勬儏鍐碉紝锛屼緥濡傦細"Byte1_B6~Byte2_B0":琛ㄧず浠庣1涓瓧鑺傜殑绗�7浣嶅埌绗�2涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�3;"Byte27_B7~Byte28_B0":琛ㄧず浠庣27涓瓧鑺傜殑绗�8浣嶅埌绗�28涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�2銆� +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栨彃鍏ュ煙鐨勫弬鏁板垪琛紝浣犺甯姪鎴戝畬鎴愭彃鍏ュ煙鍙傛暟鍒楄〃鐨勬彁鍙栥�� +#闇�姹� +鍒嗘瀽鏂囨。锛岃緭鍑烘彃鍏ュ煙鐨勫弬鏁板垪琛紝灏嗘墍鏈夊弬鏁板叏閮ㄨ緭鍑恒�� +鍙傛暟淇℃伅瀛楁鍖呮嫭锛歯ame锛堝弬鏁板悕绉帮級銆乮d锛堝弬鏁颁唬鍙凤級銆乸os锛堝弬鏁拌捣濮媌it浣嶇疆锛夈�乴ength锛堝弬鏁癰it闀垮害锛夈�乼ype锛堢被鍨嬶細para锛夈�� +娉ㄦ剰锛� +1涓瓧鑺傜殑闀垮害涓�8浣嶏紝浣跨敤B0-B7鏉ヨ〃绀猴紝璇风簿纭绠楀弬鏁伴暱搴︺�� +鏂囨。涓綅缃弿杩颁俊鎭彲鑳藉瓨鍦ㄨ法瀛楄妭鐨勬儏鍐碉紝渚嬪锛�"Byte1_B6~Byte2_B0":琛ㄧず浠庣1涓瓧鑺傜殑绗�7浣嶅埌绗�2涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�3;"Byte27_B7~Byte28_B0":琛ㄧず浠庣27涓瓧鑺傜殑绗�8浣嶅埌绗�28涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�2銆� +#绾︽潫 +- 涓嶈閬楁紡浠讳綍鍙傛暟锛� +- 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓哄弬鏁颁俊鎭璞★紱 +- 浠呰緭鍑篔SON鏂囨湰銆� +#渚嬪瓙 +[ + { + "name": "閬ユ祴妯″紡瀛�", + "id": "TMS215", + "pos": 0, + "length": 8, + "type": "para" + } +] """ print('鎻掑叆鍩熷弬鏁板垪琛細') files = [file_map['閬ユ祴澶х翰']] @@ -498,7 +574,7 @@ assert isinstance(params, list), '鎻掑叆鍩熷弬鏁板垪琛ㄦ暟鎹粨鏋勬渶澶栧眰蹇呴』鏄暟缁�' assert len(params), '鎻掑叆鍩熷弬鏁板垪琛ㄤ笉鑳戒负绌�' - text = self.generate_text(_msg, './out/鎻掑叆鍩熷弬鏁板垪琛�.json', files=files, validation=validation) + text = self.generate_text_json(_msg, './out/鎻掑叆鍩熷弬鏁板垪琛�.json', files=files, validation=validation) return json.loads(text) def gen_tm_frame_data(self): @@ -613,12 +689,28 @@ def gen_vc(self): _msg = """ -璇峰垎鏋愭枃妗d腑鐨勯仴娴嬪寘鏍煎紡锛岃緭鍑洪仴娴嬭櫄鎷熶俊閬撶殑鍒掑垎锛屾暟鎹粨鏋勬渶澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓鸿櫄鎷熶俊閬撲俊鎭瓧鍏革紝瀛楀吀鍖呭惈浠ヤ笅閿�煎锛� -id: 铏氭嫙淇¢亾浠e彿 -name: 铏氭嫙淇¢亾鍚嶇О -VCID: 铏氭嫙淇¢亾VCID锛堜簩杩涘埗锛� -format: 鏍规嵁铏氭嫙淇¢亾绫诲瀷鑾峰彇瀵瑰簲鐨勬暟鎹寘鐨勬牸寮忕殑鍚嶇О +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栬櫄鎷熶俊閬撳垪琛紝浣犺甯姪鎴戝畬鎴愯櫄鎷熶俊閬撳垪琛ㄧ殑鎻愬彇銆� +#闇�姹� +璇峰垎鏋愭枃妗d腑鐨勯仴娴嬪寘鏍煎紡浠ュ強閬ユ祴铏氭嫙淇¢亾锛岃緭鍑洪仴娴嬭櫄鎷熶俊閬撳垪琛ㄣ�� +瀛楁鍖呮嫭锛歩d锛堣櫄鎷熶俊閬撲唬鍙凤級銆乶ame锛堣櫄鎷熶俊閬撳悕绉帮級銆乂CID锛堣櫄鎷熶俊閬揤CID锛屼簩杩涘埗锛夈�乫ormat锛堟牴鎹櫄鎷熶俊閬撶被鍨嬭幏鍙栧搴旂殑鏁版嵁鍖呯殑鏍煎紡鐨勫悕绉帮級 +#涓婁笅鏂� 娣卞叆鐞嗚В鏂囨。涓弿杩扮殑鍏崇郴锛屼緥濡傦細鏂囨。涓弿杩颁簡甯歌閬ユ祴鏄父瑙勬暟鎹殑涓嬩紶淇¢亾锛屽苟涓旇繕鎻忚堪浜嗗垎绯荤粺甯歌閬ユ祴鍙傛暟鍖呭氨鏄疄鏃堕仴娴嬪弬鏁板寘锛屽苟涓旀枃妗d腑瀵瑰疄鏃堕仴娴嬪弬鏁板寘鐨勬牸寮忚繘琛屼簡鎻忚堪锛屾墍浠ュ父瑙勯仴娴媀C搴旇杈撳嚭涓猴細{"id": "1", "name": "甯歌閬ユ祴VC", "VCID": "0", "format": "瀹炴椂閬ユ祴鍙傛暟鍖�"} +#绾︽潫 +- 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓鸿櫄鎷熶俊閬撲俊鎭紱 +- format锛氬繀椤绘槸鏁版嵁鍖呮牸寮忕殑鍚嶇О锛� +- 浠呰緭鍑篔SON鏂囨湰銆� +#渚嬪瓙锛� +[ + { + "id": "VC0", + "name": "绌洪棽淇¢亾", + "VCID": "111111", + "format": "绌洪棽鍖�" + } +] """ def validation(gen_text): @@ -626,17 +718,28 @@ assert next(filter(lambda it: re.match('^[0-1]+$', it['VCID']), vcs)), '鐢熸垚鐨刅CID蹇呴』鏄簩杩涘埗' print('铏氭嫙淇¢亾锛�') - text = self.generate_text(_msg, "out/铏氭嫙淇¢亾.json", files=[file_map['閬ユ祴澶х翰']], validation=validation) + text = self.generate_text_json(_msg, "out/铏氭嫙淇¢亾.json", files=[file_map['閬ユ祴澶х翰']], validation=validation) vcs = json.loads(text) return vcs def gen_dev_pkts(self): - _msg = f""" -杈撳嚭鏂囨。涓仴娴嬫簮鍖呯被鍨嬪畾涔夋弿杩扮殑璁惧浠ュ強璁惧涓嬮潰鐨勯仴娴嬪寘锛屾暟鎹粨鏋勶細鏈�澶栧眰涓烘暟缁� > 璁惧 > 閬ユ祴鍖呭垪琛�(pkts)锛岃澶囧瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)锛屾簮鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id) - """ + _msg = """ +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栬澶囦互鍙婅澶囦笅闈㈢殑閬ユ祴鍖呬俊鎭紝浣犺甯姪鎴戝畬鎴愭彁鍙栥�� +#闇�姹� +杈撳嚭鏂囨。涓仴娴嬫簮鍖呯被鍨嬪畾涔夋弿杩扮殑璁惧浠ュ強璁惧涓嬮潰鐨勯仴娴嬪寘銆� +#绾︽潫 +- 鏁版嵁缁撴瀯锛氭暟缁� > 璁惧 > 閬ユ祴鍖呭垪琛�(pkts)锛� +- 璁惧瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)锛� +- 婧愬寘瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)锛� +- 浠呰緭鍑篔SON鏂囨湰銆� +#渚嬪瓙 +""" print('璁惧閬ユ祴婧愬寘淇℃伅锛�') files = [file_map["閬ユ祴婧愬寘璁捐鎶ュ憡"]] - text = self.generate_text(_msg, 'out/璁惧鏁版嵁鍖�.json', [], files) + text = self.generate_text_json(_msg, 'out/璁惧鏁版嵁鍖�.json', [], files) dev_pkts = json.loads(text) return dev_pkts @@ -647,9 +750,20 @@ files = [file_map['閬ユ祴婧愬寘璁捐鎶ュ憡']] print(f'鏂囨。涓湁鏃犫�渰pkt_name}鈥濈殑瀛楁鎻忚堪锛�', end='') _msg = f""" -鏂囨。涓湁閬ユ祴鍖呪�渰pkt_name}鈥濈殑瀛楁琛ㄦ弿杩板悧锛熼仴娴嬪寘鍚嶇О蹇呴』瀹屽叏鍖归厤銆傝緭鍑猴細鈥滄棤鈥濇垨鈥滄湁鈥濓紝涓嶈杈撳嚭鍏朵粬浠讳綍鍐呭銆� +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓垎鏋愬垽璇绘槸鍚︽湁鏌愪釜閬ユ祴鍖呯殑瀛楁琛ㄦ弿杩帮紝浣犺甯姪鎴戝垽鏂�� +#闂 +鏂囨。涓湁閬ユ祴鍖呪�渰pkt_name}鈥濈殑瀛楁琛ㄦ弿杩板悧锛� 娉ㄦ剰锛氶仴娴嬪寘鐨勫瓧娈佃〃绱ф帴鐫�閬ユ祴鍖呯珷鑺傛爣棰橈紝濡傛灉绔犺妭鏍囬鍚庨潰鐪佺暐浜嗘垨鑰呰瑙亁xx鍒欐槸娌℃湁瀛楁琛ㄦ弿杩般�� -鏍规嵁鏂囨。鍐呭杈撳嚭銆�""" +#绾︽潫 +- 鏍规嵁鏂囨。鍐呭杈撳嚭锛� +- 閬ユ祴鍖呭悕绉板繀椤诲畬鍏ㄥ尮閰嶏紱 +- 杈撳嚭鈥滄棤鈥濇垨鈥滄湁鈥濓紝涓嶈杈撳嚭鍏朵粬浠讳綍鍐呭銆� +#渚嬪瓙 +鏈� +""" text = self.generate_text(_msg, f'out/pkts/鏈夋棤鏁版嵁鍖�-{pkt_name}.txt', [], files) return text == '鏈�' @@ -657,22 +771,100 @@ cache_file = f'out/鏁版嵁鍖�-{pkt_name}.json' files = [file_map['閬ユ祴婧愬寘璁捐鎶ュ憡']] if not os.path.isfile(cache_file): + # 鍏堥棶鏈�鍚庝竴涓弬鏁扮殑瀛楄妭浣嶇疆 + print(f'閬ユ祴婧愬寘鈥渰pkt_name}鈥濅俊鎭細') _msg = f""" -杈撳嚭鏂囨。涓弿杩扮殑鍚嶇О涓衡�渰pkt_name}鈥濅唬鍙蜂负鈥渰pkt_id}鈥濋仴娴嬪寘锛� +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呯殑鏈�鍚庝竴涓弬鏁扮殑bit浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟锛屼綘瑕佸府鎴戝畬鎴愬弬鏁癰it浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟鐨勬彁鍙栥�� +#闇�姹� +杈撳嚭鏂囨。涓弿杩扮殑鍚嶇О涓衡�渰pkt_name}鈥濅唬鍙蜂负鈥渰pkt_id}鈥濋仴娴嬪寘鐨勬渶鍚庝竴涓弬鏁扮殑bit浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟銆� +""" + """ +#绾︽潫 +- 閬ユ祴婧愬寘鐨勫唴瀹瑰湪涓�涓〃鏍间腑瀹氫箟锛岃〃鏍肩粨鏉熷垯鍖呭唴瀹圭粨鏉燂紱 +- 鏁版嵁鍩熶腑姣忎竴琛屽搴斾竴涓弬鏁帮紱 +- 涓嶈璺ㄨ〃鏍兼彁鍙栵紱 +- 瀛楄妭浣嶇疆涓瓧鑺備綅缃槸浠�1寮�濮嬬殑锛宐it浣嶇疆鏄粠0寮�濮嬬殑锛� +- bit浣嶇疆璁$畻鍏紡涓猴細(N-1)*8+B锛屽叾涓璑鏄瓧鑺傛暟锛孊鏄痓it鏁帮紱 +- 浠呰緭鍑簀son锛屼笉瑕佽緭鍑哄叾浠栦换浣曞瓧绗︺�� +#渚嬪瓙锛� +{"last_par_pos":128, "par_num": 20} +""" + text = self.generate_text_json(_msg, '', files=files) + result = json.loads(text) + last_par_pos = result['last_par_pos'] + par_num = result['par_num'] + + _msg = f""" +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呬俊鎭垪琛紝浣犺甯垜瀹屾垚閬ユ祴婧愬寘淇℃伅鍒楄〃鐨勬彁鍙栥�� +#闇�姹� +杈撳嚭鏂囨。涓弿杩扮殑鍚嶇О涓衡�渰pkt_name}鈥濅唬鍙蜂负鈥渰pkt_id}鈥濋仴娴嬪寘銆� +娉ㄦ剰锛氭渶鍚庝竴涓弬鏁扮殑璧峰bit鍋忕Щ浣嶇疆涓簕last_par_pos}銆� +""" + """ 閬ユ祴鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佺被鍨�(type)銆佸寘澶村睘鎬у垪琛�(headers)銆佹暟鎹煙鍙傛暟鍒楄〃(datas)锛岀被鍨嬩负 linear锛� -鍖呭ご灞炴�у瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佷綅缃�(pos)銆佸畾涔�(content)銆侀暱搴�(length)銆佺被鍨�(type)锛岀被鍨嬩负 para锛� +鍖呭ご鐨勫睘鎬х殑瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)銆佷綅缃�(pos)銆佸畾涔�(content)銆侀暱搴�(length)銆佺被鍨�(type)锛岀被鍨嬩负 para锛� 鏁版嵁鍩熷弬鏁板瓧娈靛寘鎷細鍙傛暟鍚嶇О(name)銆佸弬鏁颁唬鍙�(id)銆佷綅缃�(pos)銆侀暱搴�(length)銆佸瓧鑺傞『搴�(byteOrder)锛岀被鍨嬩负 para锛� -濡傛灉娌℃湁鍚嶇О鐢ㄤ唬鍙蜂唬鏇匡紝濡傛灉娌℃湁浠e彿鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧浠f浛锛岀炕璇戝敖閲忕畝鐭紱 -浣犻渶瑕佺悊瑙f暟鎹寘鐨勪綅缃俊鎭紝骞朵笖灏嗘墍鏈夎緭鍑哄崟浣嶇粺涓�杞崲涓� bits锛屼綅缃瓧娈电殑杈撳嚭鏍煎紡蹇呴』涓烘暟鍊肩被鍨�; -鏁版嵁缁撴瀯浠呭彧鍖呭惈閬ユ祴鍖咃紝浠呰緭鍑簀son锛屼笉瑕佽緭鍑轰换浣曞叾浠栧唴瀹广��""" + +鍖呭ご灞炴�у寘鎷細鍖呯増鏈彿銆佸寘绫诲瀷銆佸壇瀵煎ご鏍囪瘑銆佸簲鐢ㄨ繃绋嬫爣璇嗐�佸簭鍒楁爣璁般�佸寘搴忓垪璁℃暟銆佸寘闀裤�佹湇鍔°�佸瓙鏈嶅姟銆� +鍖呭ご灞炴�х殑闀垮害锛氬寘鐗堟湰鍙凤紙3锛夈�佸寘绫诲瀷锛�1锛夈�佸壇瀵煎ご鏍囪瘑锛�1锛夈�佸簲鐢ㄨ繃绋嬫爣璇嗭紙11锛夈�佸簭鍒楁爣璁帮紙2锛夈�佸寘搴忓垪璁℃暟锛�14锛夈�佸寘闀匡紙16锛夈�佹湇鍔★紙8锛夈�佸瓙鏈嶅姟锛�8锛夈�� + +琛ㄦ牸鍗曞厓鏍煎悎骞惰鏄庯細鍖呮牸涓瓨鍦ㄥ崟鍏冩牸鍚堝苟鐨勬儏鍐碉紝濡傛灉姘村钩鎴栧瀭鐩寸浉閭荤殑鍗曞厓鏍煎唴瀹逛竴鏍烽偅涔堣繖鍑犱釜鍐呭涓�鏍风殑鍗曞厓鏍兼湁鍙兘鏄竴涓悎骞跺崟鍏冩牸鍦ㄥ垎鏋愭椂搴旇褰撲綔鍚堝苟鍗曞厓鏍煎垎鏋愩�� +#绾︽潫 +- 浠e彿鍛藉悕瑙勫垯锛氭暟瀛椼�佽嫳鏂囧瓧姣嶅拰涓嬪垝绾跨粍鎴愪笖浠ヨ嫳鏂囧瓧姣嶅拰涓嬪垝绾垮紑澶达紱 +- 濡傛灉娌℃湁鍚嶇О鐢ㄤ唬鍙蜂唬鏇匡紝濡傛灉娌℃湁浠e彿鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧浠f浛锛岀炕璇戝敖閲忕畝鐭紱 +- 濡傛灉鏈変唬鍙蜂弗鏍间緷鐓ф枃妗d腑鐨勪唬鍙凤紝鏂囨。涓殑浠e彿濡傛灉涓嶇鍚堜唬鍙峰懡鍚嶈鍒欏皢鐗规畩瀛楃杞崲涓轰笅鍒掔嚎锛屼緥濡傦細Rsv-1杞崲涓篟sv_1锛� +- 浣犻渶瑕佺悊瑙f暟鎹寘鐨勪綅缃俊鎭紝鐢变綅缃俊鎭緱鍒伴暱搴︼紝骞朵笖灏嗘墍鏈夎緭鍑哄崟浣嶇粺涓�杞崲涓� bits锛� +- pos瀛楁锛氭暟鍊肩被鍨嬶紝浠�0寮�濮嬭绠楋紝鐢遍暱搴︼紙length锛夌疮鍔犲緱鍒帮紱 +- 搴旂敤杩囩▼鏍囪瘑锛氬鏋滀笉鏄崄鍏繘鍒惰浆鎹负鍗佸叚杩涘埗锛岃浆鎹㈠畬鎴愬悗瑕侀獙璇佹槸鍚︽纭紝浠�0x寮�澶达紝锛� +- 鍖呭ご鍚庨潰鐨勬瘡涓�琛岄兘瀵瑰簲涓�涓弬鏁帮紝閫愯杈撳嚭鍙傛暟锛屼笉瑕侀仐婕忎换浣曞弬鏁帮紱 +- 绫讳技鈥濅繚鐣欙紙Rsv锛夆�滅殑琛屼篃瑕佸綋鍙傛暟鐢熸垚锛� +- 閲嶅鐨勮涔熻鐢熸垚锛� +- 娉ㄦ剰鍖呭唴瀹圭殑鑼冨洿锛屼笉瑕佹彁鍙栧埌鍏朵粬鍖呬腑鐨勫唴瀹癸紝鍖呭唴瀹归兘鍦ㄥ悓涓�涓〃鏍间腑锛� +- 瀛楄妭椤哄簭锛氬�间负澶х鈥淏鈥濓紝灏忕鈥淟鈥濓紝榛樿涓衡�淏鈥濓紱 +- 杈撳嚭涓ユ牸鎸夌収鏂囨。涓殑鍐呭鐢熸垚锛屼笉瑕佸垱閫犳枃妗d腑涓嶅瓨鍦ㄧ殑鍐呭锛� +- 浠呰緭鍑簀son锛屼笉瑕佽緭鍑轰换浣曞叾浠栧唴瀹广�� +#渚嬪瓙 +{ + "name": "鏁扮缂撳彉閬ユ祴鍖�", + "id": "PMS003", + "type": "linear", + "headers": [ + { + "name": "鍖呮爣璇�", + "id": "packetIdentifier", + "pos": 0, + "content": "000", + "length": 8, + "type": "para" + } + ], + "datas": [ + { + "name": "XXX鍖�", + "id": "XXX", + "pos": 0, + "length": 8, + "byteOrder": "" + } + ] +""" print(f'閬ユ祴婧愬寘鈥渰pkt_name}鈥濅俊鎭細') def validation(gen_text): _pkt = json.loads(gen_text) + with open(f'out/tmp/{time.time()}.json', 'w') as f: + f.write(gen_text) assert 'headers' in _pkt, '鍖呯粨鏋勪腑蹇呴』鍖呭惈headers瀛楁' assert 'datas' in _pkt, '鍖呯粨鏋勪腑蹇呴』鍖呭惈datas瀛楁' + print(f'鍙傛暟涓暟锛歿len(_pkt["datas"])}') + # assert par_num == len(_pkt['datas']), f'鏁版嵁鍩熷弬鏁颁釜鏁颁笉瀵癸紒棰勮{par_num}涓紝瀹為檯{len(_pkt["datas"])}' + assert last_par_pos == _pkt['datas'][-1]['pos'], '鏈�鍚庝竴涓弬鏁扮殑瀛楄妭浣嶇疆涓嶅锛�' - text = self.generate_text(_msg, cache_file, [], files, validation) + text = self.generate_text_json(_msg, cache_file, [], files, validation) pkt = json.loads(text) else: pkt = json.loads(read_from_file(cache_file)) @@ -684,23 +876,61 @@ return pkt def gen_pkts(self): - _msg = f""" -杈撳嚭鏂囨。涓弿杩扮殑閬ユ祴鍖呫�� -閬ユ祴鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆乭asParams锛� -鍚嶇О涓笉瑕佸寘鍚唬鍙凤紝 -hasParams琛ㄧず褰撳墠閬ユ祴鍖呮槸鍚︽湁鍙傛暟鍒楄〃锛岄仴娴嬪寘鐨勫弬鏁拌〃绱ф帴鐫�閬ユ祴鍖呯珷鑺傛爣棰橈紝濡傛灉绔犺妭鏍囬鍚庨潰鐪佺暐浜嗘垨鑰呰瑙亁xx鍒欐槸娌℃湁鍙傛暟琛紝 -濡傛灉娌℃湁浠e彿鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧浠f浛锛屽鏋滄病鏈夊悕绉扮敤浠e彿浠f浛锛� -鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勬暟缁勫厓绱犱负閬ユ祴鍖咃紝涓嶅寘鎷仴娴嬪寘涓嬮潰鐨勫弬鏁般�� + _msg = """ +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬪寘鏁版嵁锛屼綘瑕佹牴鎹枃妗e唴瀹瑰府鎴戝畬鎴愰仴娴嬪寘鏁版嵁鐨勬彁鍙栥�� +#闇�姹� +杈撳嚭鏂囨。涓弿杩扮殑閬ユ祴鍖呭垪琛紝閬ユ祴鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佹槸鍚︽湁鍙傛暟(hasParams)銆� +瀛楁鎻忚堪锛� +1.鍚嶇О锛氶仴娴嬪寘鐨勫悕绉帮紱 +2.浠e彿锛氶仴娴嬪寘鐨勪唬鍙凤紱 +3.鏄惁鏈夊弬鏁帮細琛ㄧず褰撳墠閬ユ祴鍖呮槸鍚︽湁鍙傛暟鍒楄〃锛岄仴娴嬪寘鐨勫弬鏁拌〃绱ф帴鐫�閬ユ祴鍖呯珷鑺傛爣棰橈紝濡傛灉绔犺妭鏍囬鍚庨潰鐪佺暐浜嗘垨鑰呯被浼尖�濊瑙亁xx鈥滃垯鏄病鏈夊弬鏁拌〃銆� +#绾︽潫 +- name锛氬悕绉颁腑涓嶈鍖呭惈浠e彿锛屼粎浠庢枃妗d腑鎻愬彇婧愬寘鍚嶇О锛� +- hasParams锛氬�间负甯冨皵鍊硷紝true鎴杅alse锛� +- 濡傛灉娌℃湁浠e彿锛屼娇鐢ㄩ仴娴嬪寘鍚嶇О鐨勮嫳鏂囩炕璇戜唬鏇匡紱 +- 濡傛灉娌℃湁鍚嶇О鐢ㄤ唬鍙蜂唬鏇匡紱 +- 涓嶈婕忔帀浠讳綍閬ユ祴鍖咃紱 +- 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勬暟缁勫厓绱犱负閬ユ祴鍖咃紝涓嶅寘鎷仴娴嬪寘涓嬮潰鐨勫弬鏁般�� +#渚嬪瓙 +[ + { + "name": "鏁扮鏁板瓧閲忓揩閫熸簮鍖�", + "id": "PMS001", + "hasParams": true + } +] """ print(f'閬ユ祴婧愬寘鍒楄〃锛�') files = [file_map['閬ユ祴婧愬寘璁捐鎶ュ憡']] - text = self.generate_text(_msg, 'out/婧愬寘鍒楄〃.json', [], files) + text = self.generate_text_json(_msg, 'out/婧愬寘鍒楄〃.json', [], files) pkt = json.loads(text) return pkt def gen_pkt_vc(self): - _msg = f""" -鏍规嵁閬ユ祴婧愬寘涓嬩紶鏃舵満瀹氫箟锛岃緭鍑哄悇涓仴娴嬫簮鍖呬俊鎭垪琛紝椤剁骇缁撴瀯涓烘暟缁勫厓绱犱负閬ユ祴婧愬寘锛屾簮鍖呭瓧娈靛寘鎷細鍖呬唬鍙�(id)锛屽悕绉�(name)锛屾墍灞炶櫄鎷熶俊閬�(vcs)锛屼笅浼犳椂鏈猴紙timeTags锛� + _msg = """ +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呬俊鎭紝浣犺甯姪鎴戝畬鎴愰仴娴嬫簮鍖呬俊鎭殑鎻愬彇銆� +#闇�姹� +鏍规嵁鈥濋仴娴嬫簮鍖呬笅浼犳椂鏈哄畾涔夆�滅珷鑺傜殑鍐呭杈撳嚭鍚勪釜閬ユ祴婧愬寘淇℃伅鍒楄〃锛岄《绾х粨鏋勪负鏁扮粍鍏冪礌涓洪仴娴嬫簮鍖咃紝婧愬寘瀛楁鍖呮嫭锛氬寘浠e彿(id)锛屽悕绉�(name)锛屾墍灞炶櫄鎷熶俊閬�(vcs)锛屼笅浼犳椂鏈猴紙timeTags锛夈�� +#绾︽潫 +- 浠庘�濋仴娴嬫簮鍖呬笅浼犳椂鏈哄畾涔夆�滅珷鑺備腑鎻愬彇閬ユ祴婧愬寘淇℃伅锛� +- 鎵�灞炶櫄鎷熶俊閬擄細蹇呴』鏄枃妗d腑鎻忚堪鐨勯仴娴嬭櫄鎷熶俊閬撲唬鍙凤紙搴忓彿锛夛紱 +- 涓嬩紶鏃舵満锛氫笌琛ㄦ牸涓畾涔夌殑涓�鑷达紱 +- 涓嶈閬楁紡浠讳綍閬ユ祴婧愬寘銆� +#渚嬪瓙锛� +[ + { + "id": "PMS001", + "name": "鏁扮鏁板瓧閲忓揩閫熸簮鍖�", + "vcs": ["VC1"], + "timeTags": ["瀹炴椂"] + }, +] """ files = [file_map['閬ユ祴澶х翰']] print('閬ユ祴婧愬寘鎵�灞炶櫄鎷熶俊閬擄細') @@ -709,19 +939,51 @@ pkts = json.loads(gen_text) assert len(pkts), 'VC婧愬寘鍒楄〃涓嶈兘涓虹┖' - text = self.generate_text(_msg, 'out/閬ユ祴VC婧愬寘.json', files=files, validation=validation) + text = self.generate_text_json(_msg, 'out/閬ユ祴VC婧愬寘.json', files=files, validation=validation) pkt_vcs = json.loads(text) return pkt_vcs def gen_pkt_format(self): - _msg = f""" -璇蜂粩缁嗗垎绯绘枃妗o紝杈撳嚭鍚勪釜鏁版嵁鍖呯殑鏍煎紡锛屾暟鎹粨鏋勬渶澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘鏍煎紡锛屽皢涓诲澶寸殑瀛愮骇鎻愬崌鍒颁富瀵煎ご杩欎竴绾у苟涓斿幓闄や富瀵煎ご锛屾暟鎹寘type涓簂ogic锛屽寘鏁版嵁鍩焧ype涓篴ny銆� -鍖呮牸寮廲hildren鍖呮嫭锛氱増鏈彿(id:Ver)銆佺被鍨�(id:TM_Type)銆佸壇瀵煎ご鏍囧織(id:Vice_Head)銆佸簲鐢ㄨ繃绋嬫爣璇嗙(id:Proc_Sign)銆佸垎缁勬爣蹇�(id:Group_Sign)銆佸寘搴忓垪璁℃暟(id:Package_Count)銆佸寘闀�(id:Pack_Len)銆佹暟鎹煙(id:EPDU_DATA)銆� -children鍏冪礌鐨勫瓧娈靛寘鎷細name銆乮d銆乸os銆乴ength銆乼ype -娉ㄦ剰锛氱敓鎴愮殑JSON璇硶鏍煎紡瑕佸悎娉曘�� + _msg = """ +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栨暟鎹寘鐨勬牸寮忥紝浣犺甯姪鎴戝畬鎴愭暟鎹寘鏍煎紡鐨勬彁鍙栥�� +#闇�姹� +璇蜂粩缁嗗垎绯绘枃妗o紝杈撳嚭鍚勪釜鏁版嵁鍖呯殑鏍煎紡銆� +鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘鏍煎紡锛屽皢涓诲澶寸殑瀛愮骇鎻愬崌鍒颁富瀵煎ご杩欎竴绾у苟涓斿幓闄や富瀵煎ご锛屾暟鎹寘type涓簂ogic锛屽寘鏁版嵁鍩焧ype涓篴ny銆� +鍖呮牸寮忓瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佺被鍨�(type)銆佸瓙绾�(children)銆� +children鍏冪礌鐨勫瓧娈靛寘鎷細name銆乮d銆乸os銆乴ength銆乼ype銆� +children鍏冪礌鍖呮嫭锛氱増鏈彿(Ver)銆佺被鍨�(TM_Type)銆佸壇瀵煎ご鏍囧織(Vice_Head)銆佸簲鐢ㄨ繃绋嬫爣璇嗙(Proc_Sign)銆佸垎缁勬爣蹇�(Group_Sign)銆佸寘搴忓垪璁℃暟(Package_Count)銆佸寘闀�(Pack_Len)銆佹暟鎹煙(EPDU_DATA)銆� +#绾︽潫 +- 鐢熸垚鐨凧SON璇硶鏍煎紡瑕佸悎娉曘�� +#渚嬪瓙 +{ + "name": "瀹炴椂閬ユ祴鍙傛暟鍖�", + "id": "EPDU", + "type": "logic", + "children": [ + { + "name": "鐗堟湰鍙�", + "id": "Ver", + "pos": 0, + "length": 3, + "type": "para", + "content": "0", + "dataTy": "INVAR" + }, + { + "name": "鏁版嵁鍩�", + "id": "EPDU_DATA", + "pos": 3, + "length": "鍙橀暱", + "type": "any" + } + ] +} """ print('閬ユ祴鍖呮牸寮忥細') - text = self.generate_text(_msg, 'out/鏁版嵁鍖呮牸寮�.json', files=[file_map['閬ユ祴澶х翰']]) + text = self.generate_text_json(_msg, 'out/鏁版嵁鍖呮牸寮�.json', files=[file_map['閬ユ祴澶х翰']]) pkt_formats = json.loads(text) return pkt_formats @@ -738,24 +1000,52 @@ # node['length'] = length def gen_bus(self, proj_pk, rule_enc, rule_id, ds, name_path, dev_name): - _msg = f""" + _msg = """ +#瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯� +#鎸囦护 +鎴戦渶瑕佷粠鏂囨。涓彁鍙栫粡鎬荤嚎鐨勬暟鎹寘鍒楄〃锛屼綘瑕佸府鍔╂垜瀹屾垚缁忔�荤嚎鐨勬暟鎹寘鍒楄〃鐨勬彁鍙栥�� +#闇�姹� 璇锋瀽鏂囨。锛屽垪鍑烘�荤嚎閫氫俊鍖呬紶杈撶害瀹氫腑鎻忚堪鐨勬墍鏈夋暟鎹寘鍒楄〃锛� -鏁版嵁鍖呭瓧娈靛寘鎷細id銆乶ame銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(bit闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� -transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乺tAddr(鎵�灞濺T鐨勫湴鍧�鍗佽繘鍒�)銆乺t(鎵�灞瀝t鍚嶇О)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乥urst(鏄惁绐佸彂)銆乼ransDirect(浼犺緭鏂瑰悜)锛� -鏁版嵁缁撴瀯鏈�澶栧眰鏄暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘锛屼互JSON鏍煎紡杈撳嚭锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曟枃鏈�� -閫氫俊甯у彿锛氫娇鐢ㄦ枃妗d腑鐨勬枃鏈笉瑕佸仛浠讳綍杞崲銆� -subAddr锛氬�间负鈥滄繁搴︹�濄�佲�滃钩閾衡�濄�佲�滄暟瀛椻�濇垨null銆� -鏄惁缁忚繃鎬荤嚎鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎銆� -浼犺緭鏈嶅姟鍒嗕笁绉嶏細SetData(缃暟)銆丟etData(鍙栨暟)銆丏ataBlock(鏁版嵁鍧椾紶杈�)銆� -浼犺緭鏂瑰悜鍒嗭細鈥濇敹鈥滃拰鈥濆彂鈥滐紝浼犺緭鏈嶅姟濡傛灉鏄�濆彇鏁扳�滄槸鈥濇敹鈥滐紝濡傛灉鏄�濇暟鎹潡浼犺緭鈥滃垯鏍规嵁鍖呮墍鍦ㄧ殑鍒嗙郴缁熶互鍙婅〃鏍肩殑鈥濅紶杈撴柟鍚戔�滃垪杩涜鍒ゆ柇锛屽垽鏂浜嶴MU鏉ヨ鏄敹杩樻槸鍙戙�� -鏄惁绐佸彂鐨勫垽鏂緷鎹細鏍规嵁琛ㄦ牸涓殑鈥濅紶杈撳懆鏈熲�滃垪杩涜鍒ゆ柇锛屽鏋滃~鍐欎簡绫讳技鈥濈獊鍙戔�滅殑鏂囧瓧琛ㄧず鏄獊鍙戝惁鍒欒〃绀轰笉鏄獊鍙戙�� +鏁版嵁鍖呭瓧娈靛寘鎷細id(鏁版嵁鍖呬唬鍙�)銆乶ame(鏁版嵁鍖呭悕绉�)銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(bit闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� +transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乺tAddr(鎵�灞濺T鐨勫湴鍧�鍗佽繘鍒�)銆乺t(鎵�灞瀝t鍚嶇О)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乥urst(鏄惁绐佸彂)銆乼ransDirect(浼犺緭鏂瑰悜)銆� +#绾︽潫 +- frameNum锛氫娇鐢ㄦ枃妗d腑鐨勬枃鏈笉瑕佸仛浠讳綍杞崲锛� +- subAddr锛氬�间负鈥滄繁搴︹�濄�佲�滃钩閾衡�濄�佲�滄暟瀛椻�濇垨null锛� +- 鏄惁缁忚繃鎬荤嚎鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎锛� +- 浼犺緭鏈嶅姟鍒嗕笁绉嶏細SetData(缃暟)銆丟etData(鍙栨暟)銆丏ataBlock(鏁版嵁鍧椾紶杈�)锛� +- 浼犺緭鏂瑰悜鍒嗏�濇敹鈥滃拰鈥濆彂鈥滐紝浼犺緭鏈嶅姟濡傛灉鏄�濆彇鏁扳�滄槸鈥濇敹鈥滐紝濡傛灉鏄�濇暟鎹潡浼犺緭鈥滃垯鏍规嵁鍖呮墍鍦ㄧ殑鍒嗙郴缁熶互鍙婅〃鏍肩殑鈥濅紶杈撴柟鍚戔�滃垪杩涜鍒ゆ柇锛屽垽鏂浜嶴MU鏉ヨ鏄敹杩樻槸鍙戯紱 +- 鏄惁绐佸彂锛氭牴鎹〃鏍间腑鐨勨�濅紶杈撳懆鏈熲�滃垪杩涜鍒ゆ柇锛屽鏋滃~鍐欎簡绫讳技鈥濈獊鍙戔�滅殑鏂囧瓧琛ㄧず鏄獊鍙戝惁鍒欒〃绀轰笉鏄獊鍙戯紱 +- 涓嶈婕忔帀浠讳綍涓�涓暟鎹寘锛� +- 鏁版嵁缁撴瀯鏈�澶栧眰鏄暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘锛屼互JSON鏍煎紡杈撳嚭锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曟枃鏈�� +#渚嬪瓙 +[ + { + "id": "PCS005", + "name": "鎬荤嚎绠$悊锛堝唴閮ㄦ寚浠わ級", + "apid": "418", + "service": "(1, 2)", + "length": 1, + "interval": 1000, + "subAddr": null, + "frameNum": "1|2", + "transSer": "DataBlock", + "note": "", + "rtAddr": 28, + "rt": "鏁版嵁鎺ュ彛鍗曞厓XIU", + "throughBus": true, + "burst": true, + "transDirect": "鍙�" + } +] """ print('鎬荤嚎鏁版嵁鍖咃細') def validation(gen_text): json.loads(gen_text) - text = self.generate_text(_msg, 'out/鎬荤嚎.json', files=[file_map['鎬荤嚎浼犺緭閫氫俊甯у垎閰�']], validation=validation) + text = self.generate_text_json(_msg, 'out/鎬荤嚎.json', files=[file_map['鎬荤嚎浼犺緭閫氫俊甯у垎閰�']], + validation=validation) pkts = json.loads(text) # 绛涢�夌粡鎬荤嚎鐨勬暟鎹寘 pkts = list(filter(lambda it: it['throughBus'], pkts)) @@ -854,43 +1144,85 @@ def gen_tc(self): # 鏁版嵁甯ф牸寮� - frame = self.gen_tc_transfer_frame() - # 鏁版嵁鍖呮牸寮� - pkt_format = self.gen_tc_transfer_pkt() - # 鏁版嵁鍖呭垪琛� + frame = self.gen_tc_transfer_frame_format() + # 閬ユ帶鍖呮牸寮� + pkt_format = self.gen_tc_pkt_format() + # 閬ユ帶鍖呭垪琛� pkts = self.gen_tc_transfer_pkts() for pkt in pkts: - pf = json.loads(json.dumps(pkt_format)) + # 閬ユ帶鍖呮暟鎹尯鍐呭 + self.gen_tc_pkt_details(pkt) + pkt['type'] = 'insUnit' + format_text = json.dumps(pkt_format, ensure_ascii=False) + format_text = utils.replace_tpl_paras(format_text, pkt) + pf = json.loads(format_text) pf['name'] = pkt['name'] - ph = next(filter(lambda x: x['name'] == '涓诲澶�', pf['children']), None) - apid = next(filter(lambda x: x['name'] == '搴旂敤杩涚▼鏍囪瘑绗�(APID)', ph['children']), None) - apid['value'] = pkt['apid'] - apid['type'] = 'const' - sh = next(filter(lambda x: x['name'] == '鍓澶�', pf['children']), None) - ser = next(filter(lambda x: x['name'] == '鏈嶅姟绫诲瀷', sh['children']), None) - sub_ser = next(filter(lambda x: x['name'] == '鏈嶅姟瀛愮被鍨�', sh['children']), None) - ser['value'] = pkt['server'] - ser['type'] = 'const' - sub_ser['value'] = pkt['subServer'] - sub_ser['type'] = 'const' + pf['code'] = pkt['code'] + data_area = next(filter(lambda x: x['name'] == '搴旂敤鏁版嵁鍖�', pf['children'])) + data_area['children'].append(pkt) frame['subPkts'].append(pf) self.order = 0 def build_def(item: dict): - if item['type'] == 'enum': - return json.dumps({"EnumItems": item['enums'], "CanInput": True}) + if item['type'] in ['enum', 'sendFlag']: + if isinstance(item['enums'], str): + enums = json.loads(item['enums']) + else: + enums = item['enums'] + return json.dumps({"EnumItems": enums, "CanInput": True}, ensure_ascii=False) elif item['type'] == 'length': return None elif item['type'] == 'checkSum': return json.dumps({"ChecksumType": "CRC-CCITT"}) elif item['type'] == 'subPkt': return json.dumps({"CanInput": False}) - elif item['type'] == 'combPkt': + elif item['type'] in ['combPkt', 'insUnitList', 'input']: return None + elif item['type'] == 'insUnit': + return '{"MinLength":null,"MaxLength":null,"IsSubPackage":false,"InputParams":[],"OutPutParams":[],"MatchItems":[]}' + elif item['type'] == 'pkt': + return '''{"MaxLength":1024,"IsSplit8":false,"Split8Start":null,"Split8End":null,"PadCode":null,"Alignment":null,"InputParams":[],"OutPutParams":[],"MatchItems":[]}''' elif 'value' in item: return item['value'] - def create_tc_format(parent_pk, field): + def make_attr(ty: str): + """ + 鑾峰彇瀛楁瀹氫箟鐨凙TTR銆� + + 浣嶆帺鐮侊紝鐢ㄤ簬鏍囪瘑鑺傜偣绫诲瀷銆� + 绫诲瀷锛�0~2 BinaryType; + 3~5 DataType; + 6~8: InputFormat; + 9 : IsSubPackage; + 10: IsSendFlag锛� + 11~13: ProcessMethod锛� + 14~16: ExpressionType锛� + 17~19: EnumType + + :param ty: + :return: + """ + + def create_tc_format(parent_pk, field, parent_parent_pk=None): + """ + 鍒涘缓閬ユ帶鏍煎紡 + + 鏁版嵁搴撴暟鎹粨鏋勶細 + 甯у瓧娈� parent_pk=null, pk=pk_001, type=1 + 鍖垮悕瀛楁(瀛愬寘) parent_pk=pk_001, pk=pk_002, type=22 + 瀛楁1 parent_pk=pk_002, pk=pk_003, type=15 + 瀛楁2 parent_pk=pk_002, pk=pk_004, type=15 + 鍖呭瓧娈� parent_pk=pk_001, pk=pk_005, type=1 + 鍖垮悕瀛楁(瀛愬寘) parent_pk=pk_005, pk=pk_006, type=22 + 瀛楁3 parent_pk=pk_006, pk=pk_007, type=15 + 鎸囦护鍗曞厓 parent_pk=pk_005, pk=pk_007, type=4 + 瀛楁4 parent_pk=pk_007, pk=pk_008, type=15 + + :param parent_pk: 鐖剁骇pk + :param field: 鏍煎紡瀛楁 + :param parent_parent_pk: 鐖剁骇鐨勭埗绾k + :return: + """ field['order'] = self.order self.order += 1 field['def'] = build_def(field) @@ -898,89 +1230,61 @@ field['bitWidth'] = field['length'] field['bitOrder'] = None field['attr'] = 0 - if field['type'] == 'length': + if field['type'] == 'length' and 'value' in field and field['value']: val = field['value'] field['range'] = val['start'] + "~" + val['end'] field['formula'] = val['formula'] ins_format = create_ins_format(self.proj.C_PROJECT_PK, parent_pk, field) + ins_format_pk = ins_format.C_INS_FORMAT_PK if 'children' in field: autocode = 1 if field['type'] == 'pkt': - ins_format = create_ins_format(self.proj.C_PROJECT_PK, ins_format.C_INS_FORMAT_PK, - {'order': self.order, 'type': 'subPkt', - 'def': json.dumps({"CanInput": False})}) + info = { + 'order': self.order, + 'type': 'subPkt', + 'def': json.dumps({"CanInput": False}) + } + ins_format = create_ins_format(self.proj.C_PROJECT_PK, ins_format_pk, info) self.order += 1 for child in field['children']: child['autocode'] = autocode autocode += 1 - create_tc_format(ins_format.C_INS_FORMAT_PK, child) - # if 'subPkts' in field: - # for pkt in field['subPkts']: - # ins_format = create_ins_format(self.proj.C_PROJECT_PK, ins_format.C_INS_FORMAT_PK, - # {'order': self.order, 'type': 'subPkt', - # 'def': json.dumps({"CanInput": False})}) - # create_tc_format(ins_format.C_INS_FORMAT_PK, pkt) + if field['type'] == 'insUnitList': + _parent_pk = parent_parent_pk + else: + _parent_pk = ins_format.C_INS_FORMAT_PK + create_tc_format(_parent_pk, child, ins_format_pk) + if 'subPkts' in field: + for _pkt in field['subPkts']: + create_tc_format(ins_format_pk, _pkt, parent_pk) create_tc_format(None, frame) - def gen_tc_transfer_frame(self): + def gen_tc_transfer_frame_format(self): _msg = ''' -鍒嗘瀽YK浼犻�佸抚鏍煎紡锛屾彁鍙朰K浼犻�佸抚鐨勬暟鎹粨鏋勶紝涓嶅寘鎷暟鎹寘鐨勬暟鎹粨鏋勩�� -## 缁忛獙锛� -瀛楁绫诲瀷鍖呮嫭锛� -1.缁勫悎鍖咃細combPkt锛� -2.鍥哄畾鐮佸瓧锛歝onst锛� -3.闀垮害锛歭ength锛� -4.鏋氫妇鍊硷細enum锛� -5.鏍¢獙鍜岋細checkSum锛� -6.鏁版嵁鍖猴細subPkt銆� - -鏍规嵁瀛楁鎻忚堪鍒嗘瀽瀛楁鐨勭被鍨嬶紝鍒嗘瀽鏂规硶锛� -1.瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊肩殑锛岀被鍨嬩负const锛� -2.瀛楁涓病鏈夋槑纭寚瀹氬瓧娈靛�硷紝浣嗘槸缃楀垪浜嗗彇鍊艰寖鍥寸殑锛岀被鍨嬩负enum锛� -3.瀛楁鎻忚堪涓鏋滃瓨鍦ㄥ灞傜骇鎻忚堪鍒欑埗绾у瓧娈电殑绫诲瀷涓篶ombPkt锛� -4.瀛楁濡傛灉鏄拰鈥滈暱搴︹�濇湁鍏筹紝绫诲瀷涓簂ength锛� -5.濡傛灉鍜屾暟鎹煙鏈夊叧锛岀被鍨嬩负subPkt锛� -6.瀛楁濡傛灉鍜屾牎楠屽拰鏈夊叧锛岀被鍨嬩负checkSum銆� - -瀛楁鍊兼彁鍙栨柟娉曪細 -1.瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊硷紝 -2.闀垮害瀛楁鐨勫�艰鏍规嵁鎻忚堪纭畾璧锋瀛楁鑼冨洿浠ュ強璁$畻鍏紡锛寁alue鏍煎紡渚嬪锛歿"start":"<code>","end":"<code>","formula":"N-1"}锛屾敞鎰忥細start鍜宔nd鐨勫�间负瀛楁code銆� - -## 闄愬埗锛� -- length 鑷姩杞崲涓篵it闀垮害銆� -- value 鏍规嵁瀛楁鎻忚堪鎻愬彇銆� -- enums 鏈変簺瀛楁鏄灇涓惧�硷紝鏍规嵁瀛楁鎻忚堪鎻愬彇锛屾灇涓惧厓绱犵殑鏁版嵁缁撴瀯涓簕"n":"","v":"","c":""}銆� -- 杈撳嚭鍐呭蹇呴』涓轰弗鏍肩殑json锛屼笉鑳借緭鍑洪櫎json浠ュ鐨勪换浣曞唴瀹广�� - -瀛楁鏁版嵁缁撴瀯锛� -涓诲澶� - 鐗堟湰鍙枫�侀�氳繃鏍囧織銆佹帶鍒跺懡浠ゆ爣蹇椼�佺┖闂蹭綅銆丠TQ鏍囪瘑銆佽櫄鎷熶俊閬撴爣璇嗐�佸抚闀裤�佸抚搴忓垪鍙� -浼犻�佸抚鏁版嵁鍩� -甯у樊閿欐帶鍒跺煙銆� - -# 杈撳嚭鍐呭渚嬪瓙锛� +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +# 鎸囦护 +鍒嗘瀽閬ユ帶浼犻�佸抚鏍煎紡锛屾彁鍙栭仴鎺т紶閫佸抚鏍煎紡鐨勫瓧娈靛畾涔夈�� +# 闇�姹� +瑕佹彁鍙栧�肩殑甯ф牸寮忓瓧娈碉細 +- 鐗堟湰鍙凤細const锛屼簩杩涘埗锛屼互B缁撳熬锛� +- 閫氳繃鏍囧織锛歝onst锛屼簩杩涘埗锛屼互B缁撳熬锛� +- 鎺у埗鍛戒护鏍囧織锛歝onst锛屼簩杩涘埗锛屼互B缁撳熬锛� +- 绌洪棽浣嶏細const锛屼簩杩涘埗锛屼互B缁撳熬锛� +- 鑸ぉ鍣ㄦ爣璇嗭細const锛屽崄鍏繘鍒讹紝浠�0x寮�澶达紱 +- 铏氭嫙淇¢亾鏍囪瘑锛歴endFlag锛屽彂閫佹爣璁帮紝榛樿涓衡�滀换鍔℃敞鍏ュ抚鈥濓紝鎵�鏈夌殑鍊奸兘瑕佸垪涓惧嚭鏉ワ紱 +# 鏁版嵁绫诲瀷 +- const锛氬浐瀹氱爜瀛楋紝鏁板�硷紝浜岃繘鍒朵互B缁撳熬锛屽崄杩涘埗锛屽崄鍏繘鍒朵互0x寮�澶达紱 +- sendFlag锛氬彂閫佹爣璁帮紝绫讳技鏋氫妇锛屽畾涔夋牱渚嬶細[{"n":"name","v":"value","c":"code","default":true}]锛宯琛ㄧず鍚嶇О锛寁琛ㄧず鍊硷紝c琛ㄧずcode锛堟病鏈夌┖鐫�锛夛紝default琛ㄧず鏄粯璁ゅ�硷紱 +# 绾︽潫 +- 浠SON鏍煎紡杈撳嚭锛� +- 浠呰緭鍑篔SON鏂囨湰锛屼笉瑕佽緭鍑轰换浣曞叾浠栨枃鏈�� +# 杈撳嚭渚嬪瓙锛� { - "name": "YK甯�", - "type": "pkt" - "children":[ - { - "name": "涓诲澶�", - "code": "primaryHeader", - "length": 2, - "value": "00", - "type": "combPkt", - "children": [ - { - "name": "鐗堟湰鍙�", - "code": "verNum" - "length": 1, - "value": "00" - } - ] - } - ], - "subPkts":[] + "鐗堟湰鍙�": "00B", + "閫氳繃鏍囧織": "0", + ... } ''' @@ -989,80 +1293,39 @@ text = self.generate_tc_text(_msg, 'out/tc_transfer_frame.json', files=[file_map['鎸囦护鏍煎紡']], validation=validation) - frame = json.loads(text) + result: dict = json.loads(text) + format_text = utils.read_from_file('tpl/tc_transfer_frame.json') + format_text = utils.replace_tpl_paras(format_text, result) + frame = json.loads(format_text) return frame - def gen_tc_transfer_pkt(self): + def gen_tc_pkt_format(self): _msg = ''' -浠呭垎鏋怸K鍖呮牸寮忥紝鎻愬彇YK鍖呮暟鎹粨鏋勩�� -## 缁忛獙锛� - -瀛楁绫诲瀷鍖呮嫭锛� -1.缁勫悎鍖咃細combPkt锛� -2.鍥哄畾鐮佸瓧锛歝onst锛� -3.闀垮害锛歭ength锛� -4.鏋氫妇鍊硷細enum锛� -5.鏍¢獙鍜岋細checkSum锛� -6.鏁版嵁鍖猴細subPkt銆� - -鏍规嵁瀛楁鎻忚堪鍒嗘瀽瀛楁鐨勭被鍨嬶紝鍒嗘瀽鏂规硶锛� -1.瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊肩殑锛岀被鍨嬩负const锛� -2.瀛楁涓病鏈夋槑纭寚瀹氬瓧娈靛�硷紝浣嗘槸缃楀垪浜嗗彇鍊艰寖鍥寸殑锛岀被鍨嬩负enum锛� -3.瀛楁鎻忚堪涓鏋滃瓨鍦ㄥ灞傜骇鎻忚堪鍒欑埗绾у瓧娈电殑绫诲瀷涓篶ombPkt锛� -4.瀛楁濡傛灉鏄拰鈥滈暱搴︹�濇湁鍏筹紝绫诲瀷涓簂ength锛� -5.濡傛灉鍜屾暟鎹煙鏈夊叧锛岀被鍨嬩负subPkt锛� -6.瀛楁濡傛灉鍜屾牎楠屽拰鏈夊叧锛岀被鍨嬩负checkSum銆� - -瀛楁鍊兼彁鍙栨柟娉曪細 -1.瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊硷紝 -2.闀垮害瀛楁鐨勫�艰鏍规嵁鎻忚堪纭畾璧锋瀛楁鑼冨洿浠ュ強璁$畻鍏紡锛寁alue鏍煎紡渚嬪锛歿"start":"<code>","end":"<code>","formula":"N-1"}锛屾敞鎰忥細start鍜宔nd鐨勫�间负瀛楁code銆� - -## 闄愬埗锛� -- length 鑷姩杞崲涓篵it闀垮害銆� -- value 鏍规嵁瀛楁鎻忚堪鎻愬彇銆� -- enums 鏈変簺瀛楁鏄灇涓惧�硷紝鏍规嵁瀛楁鎻忚堪鎻愬彇锛屾灇涓惧厓绱犵殑鏁版嵁缁撴瀯涓簕"n":"","v":"","c":""}銆� -- 杈撳嚭鍐呭蹇呴』涓轰弗鏍肩殑json锛屼笉鑳借緭鍑洪櫎json浠ュ鐨勪换浣曞唴瀹广�� - -瀛楁鏁版嵁缁撴瀯锛� -涓诲澶� - 鍖呰瘑鍒� - 鍖呯増鏈彿銆佸寘绫诲瀷銆佹暟鎹尯澶存爣蹇椼�佸簲鐢ㄨ繘绋嬫爣璇嗙(APID) - 鍖呭簭鍒楁帶鍒� - 搴忓垪鏍囧織 - 鍖呭簭鍒楄鏁� - 鍖呴暱 -鍓澶� - CCSDS鍓澶存爣蹇� - YK鍖呯増鏈彿 - 鍛戒护姝g‘搴旂瓟锛圓ck锛� - 鏈嶅姟绫诲瀷 - 鏈嶅姟瀛愮被鍨� - 婧愬湴鍧� -搴旂敤鏁版嵁鍖� -甯у樊閿欐帶鍒跺煙銆� - -# 杈撳嚭鍐呭渚嬪瓙锛� +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +# 鎸囦护 +鍒嗘瀽閬ユ帶鍖呮牸寮忥紝鎻愬彇閬ユ帶鍖呮牸寮忕殑瀛楁瀹氫箟銆� +# 闇�姹� +瑕佹彁鍙栧�肩殑鍖呮牸寮忓瓧娈碉細 +- 鍖呯増鏈彿: const锛屼簩杩涘埗锛� +- 鍖呯被鍨�: const锛屼簩杩涘埗锛� +- 鏁版嵁鍖哄ご鏍囧織: const锛屼簩杩涘埗锛� +- 搴忓垪鏍囧織: const锛屼簩杩涘埗锛� +- 鍖呴暱锛歭ength锛� +- 鍓澶存爣蹇�: const锛屼簩杩涘埗锛� +- 閬ユ帶鍖呯増鏈彿: const锛屼簩杩涘埗锛� +- 鍛戒护姝g‘搴旂瓟: const锛屼簩杩涘埗锛� +- 婧愬湴鍧�: const锛屽崄鍏繘鍒躲�� +# 鏁版嵁绫诲瀷 +- const锛氬浐瀹氱爜瀛楋紝鏁板�硷紝浜岃繘鍒朵互B缁撳熬锛屽崄杩涘埗锛屽崄鍏繘鍒朵互0x寮�澶达紱 +# 绾︽潫 +- 浠SON鏍煎紡杈撳嚭锛� +- 浠呰緭鍑篔SON鏂囨湰锛屼笉瑕佽緭鍑轰换浣曞叾浠栨枃鏈�� +# 杈撳嚭渚嬪瓙锛� { - "name": "YK鍖�", - "type": "pkt" - "children":[ - { - "name": "涓诲澶�", - "code": "primaryHeader", - "length": 2, - "value": "00", - "type": "combPkt", - "children": [ - { - "name": "鐗堟湰鍙�", - "code": "verNum" - "length": 1, - "value": "00" - } - ] - } - ], - "subPkts":[] + "鍖呯増鏈彿": "00B", + "鍖呯被鍨�": "1B", + ... } ''' @@ -1071,19 +1334,26 @@ text = self.generate_tc_text(_msg, 'out/tc_transfer_pkt.json', files=[file_map['鎸囦护鏍煎紡']], validation=validation) - pkt_format = json.loads(text) + result = json.loads(text) + + format_text = utils.read_from_file('tpl/tc_pkt_format.json') + format_text = utils.replace_tpl_paras(format_text, result) + pkt_format = json.loads(format_text) return pkt_format def gen_tc_transfer_pkts(self): _msg = ''' +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +# 鎸囦护 鍒嗘瀽鏂囨。鍒楀嚭鎵�鏈夌殑閬ユ帶婧愬寘銆� -## 鏁版嵁缁撴瀯濡備笅锛� +# 杈撳嚭渚嬪瓙锛� [{ "name": "xxx", "code":"pkt", -"apid":"0xAA", -"server":"0x1", -"subServer":"0x2" +"搴旂敤杩囩▼鏍囪瘑绗�":"0xAA", +"鏈嶅姟绫诲瀷":"0x1", +"鏈嶅姟瀛愮被鍨�":"0x2" }] ''' @@ -1095,10 +1365,67 @@ pkts = json.loads(text) return pkts + def gen_tc_pkt_details(self, pkt): + result = [] + tc_name = pkt['name'] + tc_code = pkt['code'] + pkt['name'] = f'{tc_code} {tc_name}' + _msg = f""" +# 瑙掕壊 +浣犳槸涓�涓祫娣辫蒋浠跺伐绋嬪笀銆� + +# 鎸囦护 +鍒嗘瀽鏂囨。锛屼粠鏂囨。涓彁鍙栭仴鎺ф寚浠ゅ悕绉颁负鈥渰tc_name}鈥濅唬鍙蜂负鈥渰tc_code}鈥濈殑鎸囦护搴旂敤鏁版嵁鍖哄畾涔夈�� +""" + """ +# 绾︽潫 +- code 濡傛灉娌℃湁鏄庣‘瀹氫箟鍒欎娇鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧锛屽敖閲忕畝鐭紱 +- length 鑷姩杞崲涓篵it闀垮害锛屽繀椤绘槸鏁板�兼垨null锛屼笉鑳戒负0锛� +- value 鏍规嵁瀛楁鎻忚堪鎻愬彇锛� +- enums 鏈変簺瀛楁鏄灇涓惧�硷紝鏍规嵁瀛楁鎻忚堪鎻愬彇锛屾灇涓惧厓绱犵殑鏁版嵁缁撴瀯涓簕"n":"","v":"","c":""}锛� +- 杈撳嚭鍐呭蹇呴』涓轰弗鏍肩殑json锛屼笉鑳借緭鍑洪櫎json浠ュ鐨勪换浣曞唴瀹广�� + +# 瀛楁绫诲瀷 +- 鍥哄畾鐮佸瓧锛歝onst锛� +- 闀垮害锛歭ength锛� +- 鏋氫妇鍊硷細enum锛� +- 鏍¢獙鍜岋細checkSum锛� +- 鍗虫椂杈撳叆锛歩nput銆� + +# 瀛楁绫诲瀷鍒嗘瀽鏂规硶 +- 鏍规嵁瀛楁鎻忚堪鍒嗘瀽瀛楁鐨勭被鍨嬶紱 +- 瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊肩殑锛岀被鍨嬩负const锛� +- 瀛楁涓病鏈夋槑纭寚瀹氬瓧娈靛�硷紝浣嗘槸缃楀垪浜嗗彇鍊艰寖鍥寸殑锛岀被鍨嬩负enum锛� +- 瀛楁濡傛灉鏄拰鈥滈暱搴︹�濇湁鍏筹紝绫诲瀷涓簂ength锛� +- 濡傛灉鍜屾暟鎹煙鏈夊叧锛岀被鍨嬩负const锛� +- 瀛楁濡傛灉鍜屾牎楠屽拰鏈夊叧锛岀被鍨嬩负checkSum銆� + +# 杈撳嚭渚嬪瓙锛� +[ + { + "name": "para1", + "code": "para1", + "length": 8, + "type": "const", + "value": "0xAA" + } + ... +] +""" + + def validation(gen_text): + json.loads(gen_text) + + text = self.generate_tc_text(_msg, f'out/閬ユ帶鎸囦护鏁版嵁鍩�-{tc_code}-{utils.to_file_name(tc_name)}.json', + files=[file_map['鎸囦护鏍煎紡']], + validation=validation) + result = json.loads(text) + pkt['children'] = result + if __name__ == '__main__': try: os.makedirs("./out/pkts", exist_ok=True) + os.makedirs("./out/tmp", exist_ok=True) # 鍚姩澶фā鍨嬪鐞嗘祦绋� ret_text = DbStructFlow().run() except KeyboardInterrupt: diff --git a/knowledgebase/db/db_helper.py b/knowledgebase/db/db_helper.py index cf94ba0..c394644 100644 --- a/knowledgebase/db/db_helper.py +++ b/knowledgebase/db/db_helper.py @@ -379,8 +379,12 @@ "combPkt": 12, "const": 15, "length": 17, - "enum": 26, + "enum": 18, + "sendFlag": 26, "checkSum": 20, + "insUnit": 4, + "insUnitList": 11, + "input": 19 } @@ -395,7 +399,7 @@ C_CODE=info['code'] if 'code' in info else '', C_TYPE=ins_ty[info['type']] if 'type' in info else 0, C_DEF=info['def'] if 'def' in info else None, - C_BIT_WIDTH=info['bitWidth'] if 'bitWidth' in info else 0, + C_BIT_WIDTH=info['bitWidth'] if 'bitWidth' in info else None, C_BIT_ORDER=info['bitOrder'] if 'bitOrder' in info else 0, C_ATTR=info['attr'] if 'attr' in info else 0, C_RANGE=info['range'] if 'range' in info else None, diff --git a/knowledgebase/db/models.py b/knowledgebase/db/models.py index 9577b28..f171b46 100644 --- a/knowledgebase/db/models.py +++ b/knowledgebase/db/models.py @@ -476,5 +476,5 @@ if os.path.isfile("db.db"): os.remove("db.db") -engine = create_engine('sqlite:///db.db', echo=True) +engine = create_engine('sqlite:///db.db', echo=False) metadata.create_all(engine) diff --git a/knowledgebase/db/neo4j.py b/knowledgebase/db/neo4j.py new file mode 100644 index 0000000..5b9f887 --- /dev/null +++ b/knowledgebase/db/neo4j.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# neo4j.py +# @author: lyg +# @date: 2025-04-24 +# @version: 0.0.1 +# @description: neo4j鏁版嵁搴撴搷浣� + +from py2neo import Graph, Node, Relationship +import json + + +class Neo4jHelper: + def __init__(self): + self.graph = Graph('http://192.168.1.14:7474', user='neo4j', password='123456') + + def create_page_node(self, page_info): + """ + 鍒涘缓椤甸潰鑺傜偣 + """ + # 鍒涘缓鑺傜偣 + node = Node("Page", page_num=page_info.page_num, + chapter_info=json.dumps(page_info.chapter_info, ensure_ascii=False, indent=2), + text=page_info.text, + entities=json.dumps(page_info.entities, ensure_ascii=False, indent=2)) + self.graph.create(node) + return node + + def create_entity_node(self, entity: str): + """ + 鍒涘缓瀹炰綋鑺傜偣 + """ + node = self.graph.nodes.match("Entity", text=entity).first() + if node: + return node + node = Node("Entity", text=entity) + self.graph.create(node) + return node + + def create_page_entity_relationship(self, page_node, entity_node): + """ + 鍒涘缓椤甸潰鍜屽疄浣撹妭鐐圭殑鍏崇郴 + """ + relationship = Relationship(page_node, "page_entity", entity_node) + self.graph.create(relationship) + return relationship + + def create_entity_relationship(self, entity_node1, entity_node2): + """ + 鍒涘缓瀹炰綋鑺傜偣鐨勫叧绯� + """ + if entity_node1 == entity_node2: + return + relationship = Relationship(entity_node1, "entity_entity", entity_node2) + if self.graph.exists(relationship): + return + self.graph.create(relationship) + return relationship diff --git a/knowledgebase/doc/__init__.py b/knowledgebase/doc/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/knowledgebase/doc/__init__.py diff --git a/knowledgebase/doc/doc_convert.py b/knowledgebase/doc/doc_convert.py new file mode 100644 index 0000000..dd9d7c5 --- /dev/null +++ b/knowledgebase/doc/doc_convert.py @@ -0,0 +1,85 @@ +from markitdown import MarkItDown +from win32com import client + +office_id = 'kwps.Application' + + +def docx_to_markdown(doc_file: str) -> str: + """ + 灏�.docx鏂囦欢杞崲涓篗arkdown鏍煎紡鏂囨湰銆� + + 璇ュ嚱鏁颁娇鐢∕arkItDown绫荤殑瀹炰緥鏉ュ鐞嗕紶鍏ョ殑.docx鏂囦欢锛屽苟灏嗗叾鍐呭杞崲涓篗arkdown鏍煎紡鐨勬枃鏈�� + 涓昏姝ラ鍖呮嫭鍒涘缓MarkItDown绫荤殑瀹炰緥銆佽皟鐢ㄥ疄渚嬬殑convert鏂规硶澶勭悊鏂囦欢锛屾渶鍚庤繑鍥炶浆鎹㈠悗鐨勬枃鏈唴瀹广�� + + 鍙傛暟: + doc_file: str - .docx鏂囦欢鐨勮矾寰勶紝搴斿寘鍚枃浠跺悕鍜屾墿灞曞悕銆� + + 杩斿洖: + str - 杞崲鍚庣殑Markdown鏍煎紡鏂囨湰銆� + """ + # 鍒涘缓MarkItDown绫荤殑瀹炰緥 + md = MarkItDown() + + # 浣跨敤MarkItDown瀹炰緥杞崲.docx鏂囦欢涓篗arkdown鏍煎紡鏂囨湰 + result = md.convert(doc_file) + + # 杩斿洖杞崲鍚庣殑鏂囨湰鍐呭 + return result.text_content + + +def doc_to_docx(doc_file: str, docx_file: str) -> None: + """ + 灏�.doc鏂囦欢杞崲涓�.docx鏂囦欢銆� + + 鍙傛暟: + doc_file (str): 杈撳叆鐨�.doc鏂囦欢璺緞銆� + docx_file (str): 杈撳嚭鐨�.docx鏂囦欢璺緞銆� + + 杩斿洖: + None + """ + try: + word = client.Dispatch(office_id) + doc = word.Documents.Open(doc_file) + doc.SaveAs(docx_file, 12) # 鍙傛暟12琛ㄧず淇濆瓨涓�.docx鏍煎紡 + doc.Close() + word.Quit() + print(f"鏂囦欢 {doc_file} 宸叉垚鍔熻浆鎹负 {docx_file}锛�") + except Exception as e: + print(f"鍑虹幇閿欒: {e}") + + +def docx_to_pdf(docx_file: str, pdf_file: str) -> None: + """ + 灏�.docx鏂囦欢杞崲涓�.pdf鏂囦欢銆� + + 鍙傛暟: + docx_file (str): 杈撳叆鐨�.docx鏂囦欢璺緞銆� + pdf_file (str): 杈撳嚭鐨�.pdf鏂囦欢璺緞銆� + + 杩斿洖: + None + """ + try: + word = client.Dispatch(office_id) + doc = word.Documents.Open(docx_file) + doc.SaveAs(pdf_file, 17) # 17 琛ㄧず淇濆瓨涓�.pdf鏍煎紡 + doc.Close() + word.Quit() + print(f"鏂囦欢 {docx_file} 宸叉垚鍔熻浆鎹负 {pdf_file}锛�") + except Exception as e: + print(f"鍑虹幇閿欒: {e}") + + +def test(): + # doc_to_docx("D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.doc", + # "D:\\projects\\KnowledgeBase\\doc\\XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�111.docx") + # docx_to_pdf("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).docx", + # "D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf") + import pymupdf4llm + md_text = pymupdf4llm.to_markdown("D:/workspace/PythonProjects/KnowledgeBase/doc/ZL鏍煎紡(鍏紑).pdf") + print(md_text) + + +if __name__ == '__main__': + test() diff --git a/knowledgebase/doc/doc_processor.py b/knowledgebase/doc/doc_processor.py new file mode 100644 index 0000000..7dccb8b --- /dev/null +++ b/knowledgebase/doc/doc_processor.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# @file: doc_processor.py +# @author: lyg +# @date: 20250427 +# @version: +# @description: 澶勭悊鏂囨。锛屾彁鍙栫珷鑺備俊鎭紝鎻愬彇椤电爜淇℃伅锛屾彁鍙栧疄浣撹瘝锛屽啓鍏ュ浘鏁版嵁搴擄紙neo4j锛夈�� +from knowledgebase.db.neo4j import Neo4jHelper +from knowledgebase.doc.doc_split import DocSplit +from knowledgebase.doc.entity_recognition import EntityRecognition +import asyncio + + +class DocProcessor: + def __init__(self, pdf_file): + self.doc_split = DocSplit(pdf_file) + self.entity_recognition = EntityRecognition() + self.neo4j = Neo4jHelper() + + async def gen_page_entities(self, page_info): + # 鑾峰彇椤甸潰瀹炰綋璇� + page_entities = await asyncio.to_thread(lambda: self.entity_recognition.run(page_info.text)) + page_info.entities = page_entities + + def process(self): + # 鍒嗘壒骞跺彂澶勭悊锛屾瘡鎵�10椤� + batch_size = 10 + for i in range(0, len(self.doc_split.page_infos), batch_size): + batch_page_infos = self.doc_split.page_infos[i:i + batch_size] + tasks = [] + for page_info in batch_page_infos: + tasks.append(self.gen_page_entities(page_info)) + asyncio.run(asyncio.gather(*tasks)) + self.save_to_neo4j() + + def save_to_neo4j(self): + """ + 淇濆瓨椤靛拰椤靛疄浣撹瘝鍒皀eo4j鏁版嵁搴撱�� + + 1.姣忎竴椤典负涓�涓狽ode锛� + 2.姣忎竴涓疄浣撹瘝涓轰竴涓狽ode锛� + 3.椤靛拰瀹炰綋璇嶇洿鎺ュ缓绔嬪叧绯� 椤�->瀹炰綋璇� + :return: + """ + for page_info in self.doc_split.page_infos: + # 鍒涘缓椤佃妭鐐� + page_node = self.neo4j.create_page_node(page_info) + entity_nodes = [] + for entity in page_info.entities: + # 鍒涘缓瀹炰綋璇嶈妭鐐� + entity_node = self.neo4j.create_entity_node(entity) + # 寤虹珛鍏崇郴 椤�->瀹炰綋璇� + self.neo4j.create_page_entity_relationship(page_node, entity_node) + entity_nodes.append(entity_node) + if len(entity_nodes) > 0: + for i in range(len(entity_nodes)): + prev_entity_node = entity_nodes[i] + for entity_node in entity_nodes[i + 1:]: + # 寤虹珛鍏崇郴 涓�椤典腑鐨� 瀹炰綋璇�1->瀹炰綋璇�2 + self.neo4j.create_entity_relationship(prev_entity_node, entity_node) + + +if __name__ == '__main__': + pdf_file = "D:/workspace/PythonProjects/KnowledgeBase/doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�111.pdf" + doc_processor = DocProcessor(pdf_file) + doc_processor.process() diff --git a/knowledgebase/doc/doc_split.py b/knowledgebase/doc/doc_split.py new file mode 100644 index 0000000..a98f1e3 --- /dev/null +++ b/knowledgebase/doc/doc_split.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# doc_split.py +# @author: lyg +# @date: 2025-04-24 +# @version: 0.0.1 +# @description: 鏂囨。澶勭悊锛屾寜椤靛垏鍓叉枃妗e苟鎻愬彇姣忎竴椤电殑鏂囨湰锛屾彁鏂囨。鐨勭珷鑺備俊鎭紝灏嗙珷鑺備俊鎭拰椤电爜淇℃伅鍏宠仈璧锋潵锛屾彁鍙栨瘡涓�椤电殑瀹炰綋璇嶃�� + +from dataclasses import dataclass, field +import docx +import pymupdf + +import pymupdf4llm + +from knowledgebase.doc.entity_recognition import EntityRecognition + + +@dataclass +class ChapterInfo: + """ + 琛ㄧず绔犺妭淇℃伅鐨勭被锛屽寘鍚珷鑺傛爣棰樸�佽捣濮嬮〉鐮併�佺粨鏉熼〉鐮佸拰灞傜骇銆� + """ + title: str # 绔犺妭鏍囬 + start_page: int # 璧峰椤电爜 + end_page: int # 缁撴潫椤电爜 + level: int # 绔犺妭灞傜骇 + + def __post_init__(self): + """ + 鍦ㄥ垵濮嬪寲鍚庢墽琛岀殑閽╁瓙鍑芥暟锛岀敤浜庨獙璇佽緭鍏ュ弬鏁扮殑鏈夋晥鎬с�� + """ + # 楠岃瘉鏍囬鏄惁涓虹┖ + if not self.title or not isinstance(self.title, str): + raise ValueError("鏍囬蹇呴』鏄潪绌哄瓧绗︿覆") + + # 楠岃瘉椤电爜鏄惁涓烘鏁存暟 + if not isinstance(self.start_page, int) or self.start_page < 1: + raise ValueError("璧峰椤电爜蹇呴』鏄ぇ浜�0鐨勬暣鏁�") + if not isinstance(self.end_page, int) or self.end_page < 1: + raise ValueError("缁撴潫椤电爜蹇呴』鏄ぇ浜�0鐨勬暣鏁�") + + # 楠岃瘉璧峰椤电爜鏄惁灏忎簬绛変簬缁撴潫椤电爜 + if self.start_page > self.end_page: + raise ValueError("璧峰椤电爜涓嶈兘澶т簬缁撴潫椤电爜") + + # 楠岃瘉灞傜骇鏄惁涓烘鏁存暟 + if not isinstance(self.level, int) or self.level < 1: + raise ValueError("灞傜骇蹇呴』鏄ぇ浜�0鐨勬暣鏁�") + + +@dataclass +class PageInfo: + """ + 椤甸潰淇℃伅绫伙紝鐢ㄤ簬瀛樺偍椤甸潰鐨勫熀鏈俊鎭�� + + 鍙傛暟锛� + - page_num (int): 椤甸潰缂栧彿锛屽繀椤讳负姝f暣鏁般�� + - chapter_info (str): 绔犺妭淇℃伅锛屾弿杩板綋鍓嶉〉闈㈡墍灞炵珷鑺傘�� + - text (str, optional): 椤甸潰鏂囨湰鍐呭锛岄粯璁や负绌哄瓧绗︿覆銆� + - entities (list[str]): 椤甸潰瀹炰綋璇嶅垪琛ㄣ�� + + 寮傚父锛� + - ValueError: 濡傛灉 `page_num` 涓嶆槸姝f暣鏁帮紝鎴栬�� `chapter_info` 鍜� `text` 涓嶆槸瀛楃涓层�� + """ + page_num: int + chapter_info: str + text: str = "" + entities: list[str] = field(default_factory=list) + + def __post_init__(self): + """ + 鍒濆鍖栧悗楠岃瘉鍙傛暟绫诲瀷鍜屽�笺�� + """ + if not isinstance(self.page_num, int) or self.page_num < 0: + raise ValueError("page_num 蹇呴』鏄鏁存暟") + if not isinstance(self.chapter_info, str): + raise ValueError("chapter_info 蹇呴』鏄瓧绗︿覆") + if not isinstance(self.text, str): + raise ValueError("text 蹇呴』鏄瓧绗︿覆") + if self.entities is None or not isinstance(self.entities, list): + raise ValueError("entities 蹇呴』鏄垪琛�") + + +class DocSplit: + """ + 鏂囨。澶勭悊 + 鎸夐〉鍒囧壊鏂囨。骞舵彁鍙栨瘡涓�椤甸兘鏂囨湰锛屼娇鐢� + """ + pdf_file: str + page_infos: list[PageInfo] + chapter_infos: list[ChapterInfo] + + def __init__(self, pdf_file): + self.pdf_file = pdf_file + self.page_infos = [] + self.chapter_infos = [] + self.doc = pymupdf.open(self.pdf_file) + self.extract_chapter_info() + self.extract_page_info() + + def extract_chapter_info(self): + """ + 鎻愬彇绔犺妭淇℃伅 + """ + toc = self.doc.get_toc() + for item in toc: + idx = toc.index(item) + end = len(self.doc) + for i in range(idx + 1, len(toc)): + if toc[i][0] >= item[0]: + end = toc[i][2] + break + self.chapter_infos.append(ChapterInfo(title=item[1], start_page=item[2], end_page=end, level=item[0])) + + def extract_page_info(self): + """ + 鎻愬彇椤甸潰淇℃伅 + """ + for page in self.doc: + # 閫氳繃pymupdf4llm鑾峰彇椤甸潰鏂囨湰锛宮arkdown鏍煎紡 + page_text = pymupdf4llm.to_markdown(self.doc, pages=[page.number]) + # 鍒涘缓椤甸潰淇℃伅瀵硅薄 + page_info = PageInfo(page_num=page.number, chapter_info="", text=page_text) + self.page_infos.append(page_info) + + def get_page_info(self, page_num): + """ + 鑾峰彇鎸囧畾椤电爜鐨勯〉闈俊鎭� + """ + for page in self.page_infos: + if page.page_num == page_num: + return page + return None + + +# if __name__ == '__main__': +# ds = DocSplit("D:/workspace/PythonProjects/KnowledgeBase/doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�111.pdf") +# print() diff --git a/knowledgebase/doc/entity_recognition.py b/knowledgebase/doc/entity_recognition.py new file mode 100644 index 0000000..6512bfe --- /dev/null +++ b/knowledgebase/doc/entity_recognition.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# entity_recognition.py +# @author: lyg +# @date: 2025-04-24 +# @version: 0.0.1 +# @description: 瀹炰綋鎶藉彇锛屽皢鏂囨湰涓殑瀹炰綋杩涜璇嗗埆鍜屾彁鍙栥�� + +from langchain_openai.chat_models import ChatOpenAI +from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate +from langchain_core.output_parsers import JsonOutputParser +import json + +from knowledgebase import utils + + +class EntityRecognition: + """ + 瀹炰綋璇嗗埆鎶藉彇銆� + + 浣跨敤langchain鏋勫缓瀹炰綋鎶藉彇娴佺▼銆� + """ + cache_file = "entity_recognition.cache" + + def __init__(self): + llm = ChatOpenAI(temperature=0, + model="qwen2.5-72b-instruct", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") + msg = HumanMessagePromptTemplate.from_template(template=""" +# 鎸囦护 +璇蜂粠缁欏畾鐨勬枃鏈腑鎻愬彇瀹炰綋璇嶅垪琛ㄣ�� +# 绾︽潫 +- 杈撳嚭鏍煎紡涓篔SON鏍煎紡锛� +- 杈撳嚭鏁版嵁缁撴瀯涓哄瓧绗︿覆鏁扮粍銆� +# 绀轰緥 +```json +["瀹炰綋1","瀹炰綋2"] +``` + +# 鏂囨湰濡備笅锛� +{text} +""" + ) + prompt = ChatPromptTemplate.from_messages([msg]) + parser = JsonOutputParser(pydantic_object=list[str]) + self.chain = prompt | llm | parser + self.cache = {} + self.load_cache() + + def load_cache(self): + """ + 鍔犺浇缂撳瓨銆� + """ + if utils.file_exists(self.cache_file): + text = utils.read_from_file(self.cache_file) + self.cache = json.loads(text) + + def save_cache(self): + """ + 淇濆瓨缂撳瓨銆� + """ + text = json.dumps(self.cache) + utils.save_to_file(text, self.cache_file) + + def run(self, in_text: str) -> list[str]: + """ + 杩愯瀹炰綋璇嗗埆鎶藉彇銆� + """ + # 缂撳瓨鍛戒腑 + text_md5 = utils.generate_md5(in_text) + if text_md5 in self.cache: + return self.cache[text_md5] + result = self.chain.invoke({"text": in_text}) + self.cache[text_md5] = result + self.save_cache() + return result diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py index c785dfe..ab7d2d1 100644 --- a/knowledgebase/utils.py +++ b/knowledgebase/utils.py @@ -1,4 +1,8 @@ import math +import hashlib +import os +import json +import re def get_bit_mask(start, end): @@ -9,3 +13,49 @@ for i in range(start, end + 1): mask |= 1 << (bits - i - 1) return mask + + +def generate_md5(input_string): + # 鍒涘缓涓�涓� md5 鍝堝笇瀵硅薄 + md5_hash = hashlib.md5() + + # 鏇存柊鍝堝笇瀵硅薄鐨勫唴瀹癸紙闇�瑕佸皢瀛楃涓茬紪鐮佷负瀛楄妭锛� + md5_hash.update(input_string.encode('utf-8')) + + # 鑾峰彇鍝堝笇鍊肩殑鍗佸叚杩涘埗琛ㄧず + md5_digest = md5_hash.hexdigest() + + return md5_digest + + +def file_exists(cache_file: str): + return os.path.exists(cache_file) + + +def read_from_file(cache_file: str) -> str: + with open(cache_file, 'r', encoding='utf-8') as f: + text = f.read() + return text + + +def save_to_file(text, cache_file): + with open(cache_file, 'w', encoding='utf-8') as f: + f.write(text) + + +def replace_tpl_paras(tpl_text: str, data: dict): + for key, val in data.items(): + if not isinstance(val, str): + val = json.dumps(json.dumps(val, ensure_ascii=False), ensure_ascii=False)[1:-1] + tpl_text = tpl_text.replace('{{' + key + '}}', val) + return tpl_text + + +def to_file_name(text: str): + """ + 灏嗘枃鏈浆涓哄悎娉曠殑鏂囦欢鍚嶇О銆� + 灏嗙壒娈婂瓧绗︽浛鎹负_ + :param text: + :return: + """ + return re.sub(r'[\\/:*?"<>|]', '_', text) diff --git a/main.py b/main.py index 7508d18..50eec97 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,7 @@ import math import os +import random +import time from knowledgebase.markitdown import MarkItDown @@ -61,5 +63,5 @@ return mask -if __name__ == '__main__': - main() \ No newline at end of file +# if __name__ == '__main__': +# main() -- Gitblit v1.9.1