From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期一, 07 七月 2025 16:20:25 +0800 Subject: [PATCH] 生成数据库 --- knowledgebase/gen_base_db/json_generate.py | 206 ++++++++++++++++++++++++++------------------------- 1 files changed, 105 insertions(+), 101 deletions(-) diff --git a/knowledgebase/gen_base_db/json_generate.py b/knowledgebase/gen_base_db/json_generate.py index ab7076a..062ed24 100644 --- a/knowledgebase/gen_base_db/json_generate.py +++ b/knowledgebase/gen_base_db/json_generate.py @@ -134,6 +134,15 @@ """ return doc_dbh.get_text_with_entities(entity_names) + @staticmethod + def get_texts_with_entity(entity_names: list[str]) -> list[str]: + """ + 鏍规嵁瀹炰綋璇嶈幏鍙栨枃妗f枃鏈� + :param entity_names: str - 瀹炰綋璇嶅悕绉� + :return: str - 鏂囨湰鍐呭 + """ + return doc_dbh.get_texts_with_entities(entity_names) + def run(self): # 鏍规嵁鏂囨。锛岀敓鎴愮粨鏋勫寲鏁版嵁 self.handle_tm_structured_data() @@ -349,6 +358,7 @@ def validation(gen_text): vcs = json.loads(gen_text) assert next(filter(lambda it: re.match('^[0-1]+$', it['VCID']), vcs)), '鐢熸垚鐨刅CID蹇呴』鏄簩杩涘埗' + doc_text = self.get_text_with_entity(['铏氭嫙淇¢亾瀹氫箟']) result = self.call_model(_msg, 'out/' + dev.code + '_铏氭嫙淇¢亾.json', doc_text, validation) Log.info('铏氭嫙淇¢亾锛�' + result) @@ -380,7 +390,8 @@ pkts = json.loads(gen_text) assert len(pkts), 'VC婧愬寘鍒楄〃涓嶈兘涓虹┖' - text = self.call_model(_msg, 'out/' + dev.code + '_閬ユ祴婧愬寘涓嬩紶鏃舵満.json', ['閬ユ祴婧愬寘涓嬩紶鏃舵満'], validation) + doc_text = self.get_text_with_entity(['閬ユ祴婧愬寘涓嬩紶鏃舵満']) + text = self.call_model(_msg, 'out/' + dev.code + '_閬ユ祴婧愬寘涓嬩紶鏃舵満.json', doc_text, validation) Log.info('閬ユ祴婧愬寘鎵�灞炶櫄鎷熶俊閬擄細' + text) return json.loads(text) @@ -410,7 +421,8 @@ } ] """ - result = self.call_model(_msg, 'out/' + dev.code + '_婧愬寘鍒楄〃.json', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�']) + doc_text = self.get_text_with_entity(['婧愬寘鍒楄〃']) + result = self.call_model(_msg, 'out/' + dev.code + '_婧愬寘鍒楄〃.json', doc_text) Log.info('閬ユ祴婧愬寘鍒楄〃锛�' + result) return json.loads(result) @@ -434,7 +446,8 @@ # 渚嬪瓙锛� {"last_par_pos":128, "par_num": 20} """ - text = self.call_model(_msg, '', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�']) + doc_text = self.get_text_with_entity([pkt_id]) + text = self.call_model(_msg, '', doc_text) result = json.loads(text) last_par_pos = result['last_par_pos'] par_num = result['par_num'] @@ -494,7 +507,7 @@ ] """ - def validation(gen_text): + def _validation(gen_text): _pkt = json.loads(gen_text) with open(f'out/tmp/{time.time()}.json', 'w') as f: f.write(gen_text) @@ -504,7 +517,7 @@ # assert par_num == len(_pkt['datas']), f'鏁版嵁鍩熷弬鏁颁釜鏁颁笉瀵癸紒棰勮{par_num}涓紝瀹為檯{len(_pkt["datas"])}' assert last_par_pos == _pkt['datas'][-1]['pos'], '鏈�鍚庝竴涓弬鏁扮殑瀛楄妭浣嶇疆涓嶅锛�' - result = self.call_model(_msg, f'out/鏁版嵁鍖�-{pkt_name}.json', [], ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�'], validation) + result = self.call_model(_msg, f'out/鏁版嵁鍖�-{pkt_name}.json', doc_text, _validation) Log.info(f'鏁版嵁鍖呪�渰pkt_name}鈥濅俊鎭細' + result) pkt = json.loads(result) else: @@ -517,87 +530,72 @@ return pkt def gen_bus(self): - _msg = """ - # 鎸囦护 - 鎴戦渶瑕佷粠鏂囨。涓彁鍙栫粡鎬荤嚎鐨勬暟鎹寘鍒楄〃锛屼綘瑕佸府鍔╂垜瀹屾垚缁忔�荤嚎鐨勬暟鎹寘鍒楄〃鐨勬彁鍙栥�� - # 闇�姹� - 璇锋瀽鏂囨。锛屽垪鍑烘�荤嚎閫氫俊鍖呬紶杈撶害瀹氫腑鎻忚堪鐨勬墍鏈夋暟鎹寘鍒楄〃锛� - 鏁版嵁鍖呭瓧娈靛寘鎷細id(鏁版嵁鍖呬唬鍙�)銆乶ame(鏁版嵁鍖呭悕绉�)銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(bit闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� - transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乺tAddr(鎵�灞濺T鐨勫湴鍧�鍗佽繘鍒�)銆乺t(鎵�灞瀝t鍚嶇О)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乥urst(鏄惁绐佸彂)銆乼ransDirect(浼犺緭鏂瑰悜)銆� - # 绾︽潫 - - frameNum锛氫娇鐢ㄦ枃妗d腑鐨勬枃鏈笉瑕佸仛浠讳綍杞崲锛� - - subAddr锛氬�间负鈥滄繁搴︹�濄�佲�滃钩閾衡�濄�佲�滄暟瀛椻�濇垨null锛� - - 鏄惁缁忚繃鎬荤嚎鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎锛� - - 浼犺緭鏈嶅姟鍒嗕笁绉嶏細SetData(缃暟)銆丟etData(鍙栨暟)銆丏ataBlock(鏁版嵁鍧椾紶杈�)锛� - - 浼犺緭鏂瑰悜鍒嗏�濇敹鈥滃拰鈥濆彂鈥滐紝浼犺緭鏈嶅姟濡傛灉鏄�濆彇鏁扳�滄槸鈥濇敹鈥滐紝濡傛灉鏄�濇暟鎹潡浼犺緭鈥滃垯鏍规嵁鍖呮墍鍦ㄧ殑鍒嗙郴缁熶互鍙婅〃鏍肩殑鈥濅紶杈撴柟鍚戔�滃垪杩涜鍒ゆ柇锛屽垽鏂浜嶴MU鏉ヨ鏄敹杩樻槸鍙戯紱 - - 鏄惁绐佸彂锛氭牴鎹〃鏍间腑鐨勨�濅紶杈撳懆鏈熲�滃垪杩涜鍒ゆ柇锛屽鏋滃~鍐欎簡绫讳技鈥濈獊鍙戔�滅殑鏂囧瓧琛ㄧず鏄獊鍙戝惁鍒欒〃绀轰笉鏄獊鍙戯紱 - - 涓嶈婕忔帀浠讳綍涓�涓暟鎹寘锛� - - 鏁版嵁缁撴瀯鏈�澶栧眰鏄暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘锛屼互JSON鏍煎紡杈撳嚭锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曟枃鏈�� - # 渚嬪瓙 - [ - { - "id": "PCS005", - "name": "鎬荤嚎绠$悊锛堝唴閮ㄦ寚浠わ級", - "apid": "418", - "service": "(1, 2)", - "length": 1, - "interval": 1000, - "subAddr": null, - "frameNum": "1|2", - "transSer": "DataBlock", - "note": "", - "rtAddr": 28, - "rt": "鏁版嵁鎺ュ彛鍗曞厓XIU", - "throughBus": true, - "burst": true, - "transDirect": "鍙�" - } - ] - """ + self.bus_pkts = [] + doc_text_list = self.get_texts_with_entity(['鍒嗙郴缁熸簮鍖�']) + for doc_text in doc_text_list: + _msg = """ + # 鎸囦护 + 鎴戦渶瑕佷粠鏂囨。涓彁鍙栫粡鎬荤嚎鐨勬暟鎹寘鍒楄〃锛屼綘瑕佸府鍔╂垜瀹屾垚缁忔�荤嚎鐨勬暟鎹寘鍒楄〃鐨勬彁鍙栥�� + # 闇�姹� + 璇锋瀽鏂囨。锛屽垪鍑烘�荤嚎閫氫俊鍖呬紶杈撶害瀹氫腑鎻忚堪鐨勬墍鏈夋暟鎹寘鍒楄〃锛� + 鏁版嵁鍖呭瓧娈靛寘鎷細id(鏁版嵁鍖呬唬鍙�)銆乶ame(鏁版嵁鍖呭悕绉�)銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(bit闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� + transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乺tAddr(鎵�灞濺T鐨勫湴鍧�鍗佽繘鍒�)銆乺t(鎵�灞瀝t鍚嶇О)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乥urst(鏄惁绐佸彂)銆乼ransDirect(浼犺緭鏂瑰悜)銆� + # 绾︽潫 + - frameNum锛氫娇鐢ㄦ枃妗d腑鐨勬枃鏈笉瑕佸仛浠讳綍杞崲锛� + - subAddr锛氬�间负鈥滄繁搴︹�濄�佲�滃钩閾衡�濄�佲�滄暟瀛椻�濇垨null锛� + - 鏄惁缁忚繃鎬荤嚎鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎锛� + - 浼犺緭鏈嶅姟鍒嗕笁绉嶏細SetData(缃暟)銆丟etData(鍙栨暟)銆丏ataBlock(鏁版嵁鍧椾紶杈�)锛� + - 浼犺緭鏂瑰悜鍒嗏�濇敹鈥滃拰鈥濆彂鈥滐紝浼犺緭鏈嶅姟濡傛灉鏄�濆彇鏁扳�滄槸鈥濇敹鈥滐紝濡傛灉鏄�濇暟鎹潡浼犺緭鈥滃垯鏍规嵁鍖呮墍鍦ㄧ殑鍒嗙郴缁熶互鍙婅〃鏍肩殑鈥濅紶杈撴柟鍚戔�滃垪杩涜鍒ゆ柇锛屽垽鏂浜嶴MU鏉ヨ鏄敹杩樻槸鍙戯紱 + - 鏄惁绐佸彂锛氭牴鎹〃鏍间腑鐨勨�濅紶杈撳懆鏈熲�滃垪杩涜鍒ゆ柇锛屽鏋滃~鍐欎簡绫讳技鈥濈獊鍙戔�滅殑鏂囧瓧琛ㄧず鏄獊鍙戝惁鍒欒〃绀轰笉鏄獊鍙戯紱 + - 涓嶈婕忔帀浠讳綍涓�涓暟鎹寘锛� + - 鏁版嵁缁撴瀯鏈�澶栧眰鏄暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘锛屼互JSON鏍煎紡杈撳嚭锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曟枃鏈�� + # 渚嬪瓙 + [ + { + "id": "PCS005", + "name": "鎬荤嚎绠$悊锛堝唴閮ㄦ寚浠わ級", + "apid": "418", + "service": "(1, 2)", + "length": 1, + "interval": 1000, + "subAddr": null, + "frameNum": "1|2", + "transSer": "DataBlock", + "note": "", + "rtAddr": 28, + "rt": "鏁版嵁鎺ュ彛鍗曞厓XIU", + "throughBus": true, + "burst": true, + "transDirect": "鍙�" + } + ] + """ - def validation(gen_text): - json.loads(gen_text) + def validation(gen_text): + json.loads(gen_text) - result = self.call_model(_msg, 'out/鎬荤嚎.json', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�'], validation) - Log.info('鎬荤嚎鏁版嵁鍖咃細' + result) + result = self.call_model(_msg, 'out/鎬荤嚎.json', doc_text, validation) + Log.info('鎬荤嚎鏁版嵁鍖咃細' + result) - pkts = json.loads(result) - # 绛涢�夌粡鎬荤嚎鐨勬暟鎹寘 - pkts = list(filter(lambda it: it['throughBus'], pkts)) - # 绛涢�夋湁apid鐨勬暟鎹寘 - pkts = list(filter(lambda it: it['apid'], pkts)) + pkts = json.loads(result) + # 绛涢�夌粡鎬荤嚎鐨勬暟鎹寘 + pkts = list(filter(lambda it: it['throughBus'], pkts)) + # 绛涢�夋湁apid鐨勬暟鎹寘 + pkts = list(filter(lambda it: it['apid'], pkts)) - pkts2 = [] - # todo 杩欎竴姝ュ簲璇ラ�氳繃鏁版嵁搴撶瓫閫夛紝鏁版嵁搴撲腑宸茬粡鏈夋墍鏈夐仴娴嬪寘浠ュ強閬ユ祴鍖呭搴旂殑瀹氫箟娈佃惤鏂囨湰 - for pkt in pkts: - if self.pkt_in_tm_pkts(pkt["name"]): - pkts2.append(pkt) - for pkt in pkts2: - self.gen_pkt_details(pkt['name'], pkt['id']) - _pkt = self.gen_pkt_details(pkt['name'], pkt['id']) - if _pkt: - pkt['children'] = [] - pkt['children'].extend(_pkt['datas']) - pkt['length'] = _pkt['length'] - self.bus_pkts = pkts - - def pkt_in_tm_pkts(self, pkt_name): - _msg = f""" - # 鎸囦护 - 鎴戦渶瑕佷粠鏂囨。涓垎鏋愬垽璇绘槸鍚︽湁鏌愪釜閬ユ祴鍖呯殑瀛楁琛ㄦ弿杩帮紝浣犺甯姪鎴戝垽鏂�� - # 闂 - 鏂囨。涓湁閬ユ祴鍖呪�渰pkt_name}鈥濈殑瀛楁琛ㄦ弿杩板悧锛� - 娉ㄦ剰锛氶仴娴嬪寘鐨勫瓧娈佃〃绱ф帴鐫�閬ユ祴鍖呯珷鑺傛爣棰橈紝濡傛灉绔犺妭鏍囬鍚庨潰鐪佺暐浜嗘垨鑰呰瑙亁xx鍒欐槸娌℃湁瀛楁琛ㄦ弿杩般�� - # 绾︽潫 - - 鏍规嵁鏂囨。鍐呭杈撳嚭锛� - - 閬ユ祴鍖呭悕绉板繀椤诲畬鍏ㄥ尮閰嶏紱 - - 杈撳嚭鈥滄棤鈥濇垨鈥滄湁鈥濓紝涓嶈杈撳嚭鍏朵粬浠讳綍鍐呭銆� - # 渚嬪瓙 - 鏈� - """ - text = self.call_model(_msg, f'out/pkts/鏈夋棤鏁版嵁鍖�-{pkt_name}.txt', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�']) - Log.info(f'鏂囨。涓湁鏃犫�渰pkt_name}鈥濈殑瀛楁鎻忚堪锛�' + text) - return text == '鏈�' + # pkts2 = [] + # todo 杩欎竴姝ュ簲璇ラ�氳繃鏁版嵁搴撶瓫閫夛紝鏁版嵁搴撲腑瀛樺偍浜嗘瘡涓暟鎹寘鐨勪唬鍙峰疄浣� + # for pkt in pkts: + # if self.pkt_in_tm_pkts(pkt["name"]): + # pkts2.append(pkt) + for pkt in pkts: + self.gen_pkt_details(pkt['name'], pkt['id']) + _pkt = self.gen_pkt_details(pkt['name'], pkt['id']) + if _pkt: + pkt['children'] = [] + pkt['children'].extend(_pkt['datas']) + pkt['length'] = _pkt['length'] + self.bus_pkts.extend(pkts) # endregion 閬ユ祴-end @@ -642,7 +640,8 @@ def validation(gen_text): json.loads(gen_text) - text = self.call_model(_msg, 'out/tc_transfer_frame.json', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�'], validation) + doc_text = self.get_text_with_entity(['閬ユ帶甯ф牸寮�']) + text = self.call_model(_msg, 'out/tc_transfer_frame.json', doc_text, validation) result: dict = json.loads(text) format_text = utils.read_from_file('tpl/tc_transfer_frame.json') format_text = utils.replace_tpl_paras(format_text, result) @@ -681,7 +680,8 @@ def validation(gen_text): json.loads(gen_text) - text = self.call_model(_msg, 'out/tc_transfer_pkt.json', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�'], validation) + doc_text = self.get_text_with_entity(['閬ユ帶鍖呮牸寮�']) + text = self.call_model(_msg, 'out/tc_transfer_pkt.json', doc_text, validation) result = json.loads(text) format_text = utils.read_from_file('tpl/tc_pkt_format.json') @@ -691,25 +691,29 @@ return pkt_format def gen_tc_transfer_pkts(self): - _msg = ''' - # 鎸囦护 - 鍒嗘瀽鏂囨。鍒楀嚭鎵�鏈夌殑閬ユ帶婧愬寘銆� - # 杈撳嚭渚嬪瓙锛� - [{ - "name": "xxx", - "code":"pkt", - "搴旂敤杩囩▼鏍囪瘑绗�":"0xAA", - "鏈嶅姟绫诲瀷":"0x1", - "鏈嶅姟瀛愮被鍨�":"0x2" - }] - ''' + doc_text_list = self.get_texts_with_entity(['APID鍒嗛厤']) + pkts = [] + for doc_text in doc_text_list: + _msg = ''' + # 鎸囦护 + 鍒嗘瀽鏂囨。鍒楀嚭鎵�鏈夌殑閬ユ帶婧愬寘銆� + # 杈撳嚭渚嬪瓙锛� + [{ + "name": "xxx", + "code":"pkt", + "搴旂敤杩囩▼鏍囪瘑绗�":"0xAA", + "鏈嶅姟绫诲瀷":"0x1", + "鏈嶅姟瀛愮被鍨�":"0x2" + }] + ''' - def validation(gen_text): - json.loads(gen_text) + def validation(gen_text): + json.loads(gen_text) - text = self.call_model(_msg, 'out/tc_transfer_pkts.json', ['杩欓噷鏄枃妗d腑鎶藉彇鐨勫唴瀹�'], validation) - Log.info('閬ユ帶鍖呭垪琛細' + text) - return json.loads(text) + text = self.call_model(_msg, 'out/tc_transfer_pkts.json', doc_text, validation) + Log.info('閬ユ帶鍖呭垪琛細' + text) + pkts.extend(json.loads(text)) + return pkts def gen_tc_pkt_details(self, pkt): tc_name = pkt['name'] -- Gitblit v1.9.1