From 22f370322412074174cde20ecfd14ec03657ab63 Mon Sep 17 00:00:00 2001 From: lyg <1543117173@qq.com> Date: 星期一, 07 七月 2025 16:20:25 +0800 Subject: [PATCH] 生成数据库 --- knowledgebase/doc/models.py | 20 knowledgebase/db/doc_db_helper.py | 84 + knowledgebase/db/models.py | 14 knowledgebase/doc/entity_recognition.py | 14 knowledgebase/llm.py | 6 testcases/test_doc_processor.py | 33 knowledgebase/doc/doc_processor.py | 112 ++ knowledgebase/doc/docx_split.py | 24 knowledgebase/db/doc_db_models.py | 10 db_struct_flow.py | 1113 +++++++++++++++++++++-------------- tpl/entities.json | 12 knowledgebase/db/data_creator.py | 8 knowledgebase/doc/entity_helper.py | 10 knowledgebase/utils.py | 2 main.py | 34 + knowledgebase/db/db_helper.py | 283 +++++--- tpl/tc_pkt_format.json | 22 17 files changed, 1,148 insertions(+), 653 deletions(-) diff --git a/db_struct_flow.py b/db_struct_flow.py index 0cb5312..34f7e77 100644 --- a/db_struct_flow.py +++ b/db_struct_flow.py @@ -1,4 +1,7 @@ +import asyncio +import math import os +import subprocess import time from datetime import datetime @@ -8,115 +11,56 @@ from langchain_community.chat_models import ChatOpenAI from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate - +import textwrap import data_templates from knowledgebase import utils from knowledgebase.db.db_helper import create_project, create_device, create_data_stream, \ - update_rule_enc, create_extend_info, create_ref_ds_rule_stream, create_ins_format, make_attr + update_rule_enc, create_extend_info, create_ref_ds_rule_stream, create_ins_format, make_attr, init_db_helper from knowledgebase.db.data_creator import create_prop_enc, create_enc_pkt, get_data_ty, create_any_pkt -from knowledgebase.db.models import TProject +from knowledgebase.db.models import TProject, init_base_db from knowledgebase.db.doc_db_helper import doc_dbh +from knowledgebase.llm import llm -# file_map = { -# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx.md", -# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/鏁扮鏁板瓧閲忓揩閫熸簮鍖�.md", -# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/鏁扮鏁板瓧閲忎腑閫熸簮鍖�.md", -# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/纭�氶亾璁惧宸ヤ綔鐘舵�佹暟鎹寘.md", -# # "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/DIU閬ユ祴妯″潡閲囬泦鐨凞S閲�4.md", -# "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/DIU閬ユ祴妯″潡妯℃嫙閲�.md", -# "閬ユ祴澶х翰": "./doc/HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx.md", -# # "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx.md", -# "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/鎬荤嚎.md", -# "搴旂敤杞欢鐢ㄦ埛闇�姹�": "./doc/HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx.md" -# } -# file_map = { -# "閬ユ祴婧愬寘璁捐鎶ュ憡": "./docs/HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx.md", -# "閬ユ祴澶х翰": "./docs/HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx.md", -# "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./docs/HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx.md", -# "搴旂敤杞欢鐢ㄦ埛闇�姹�": "./docs/HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx.md" -# } -file_map = { - "鏂囨。鍚堝苟": "./doc/鏂囨。鍚堝苟.md", - "閬ユ祴婧愬寘璁捐鎶ュ憡": "./doc/XA-5D鏃犱汉鏈哄垎绯荤粺鎺㈡祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.md", - "閬ユ祴澶х翰": "./doc/XA-5D鏃犱汉鏈烘帰娴嬪ぇ绾诧紙鍏紑锛�.md", - "鎬荤嚎浼犺緭閫氫俊甯у垎閰�": "./doc/XA-5D鏃犱汉鏈�1314A鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.md", - "鎸囦护鏍煎紡": "./doc/ZL鏍煎紡(鍏紑).docx.md" -} +# BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1' +# API_KEY = 'sk-15ecf7e273ad4b729c7f7f42b542749e' +# MODEL_NAME = 'qwen2.5-72b-instruct' -BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1' -API_KEY = 'sk-15ecf7e273ad4b729c7f7f42b542749e' -MODEL_NAME = 'qwen2.5-72b-instruct' - -# BASE_URL = 'http://10.74.15.164:11434/v1/' +# BASE_URL = 'http://10.74.15.171:11434/v1/' # API_KEY = 'ollama' -# MODEL_NAME = 'qwen2.5:32b-128k' +# MODEL_NAME = 'qwen2.5:72b-instruct' # BASE_URL = 'http://chat.com/api' # API_KEY = 'sk-49457e83f734475cb4cf7066c649d563' # MODEL_NAME = 'qwen2.5:72b-120k' -# BASE_URL = 'http://10.74.15.171:8000/v1' -# API_KEY = 'EMPTY' +BASE_URL = 'http://10.74.15.171:8000/v1' +API_KEY = 'EMPTY' # MODEL_NAME = 'QwQ:32b' -# MODEL_NAME = 'vllm-Qwen-72b-4bit' +MODEL_NAME = 'Qwen2.5-72B-Instruct-AWQ' +# MODEL_NAME = 'qwen2.5:72b-instruct' USE_CACHE = True assistant_msg = """ -# 瑙掕壊 -浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堬紝鎿呴暱杩涜鏂囨。鍒嗘瀽鍜岄�氫俊鍗忚鍒嗘瀽锛屽悓鏃惰兘澶熻В鏋� markdown 绫诲瀷鐨勬枃妗c�傛嫢鏈夋垚鐔熷噯纭殑鏂囨。闃呰涓庡垎鏋愯兘鍔涳紝鑳藉濡ュ杽澶勭悊澶氭枃妗i棿瀛樺湪寮曠敤鍏崇郴鐨勫鏉傛儏鍐点�� - -## 鎶�鑳� -### 鎶�鑳� 1锛氭枃妗e垎鏋愶紙鍖呮嫭 markdown 鏂囨。锛� -1. 褰撶敤鎴锋彁渚涙枃妗f椂锛屼粩缁嗛槄璇绘枃妗e唴瀹癸紝涓ユ牸鎸夌収鏂囨。涓殑鎻忚堪鎻愬彇鍏抽敭淇℃伅锛屼笉寰楀姞鍏ヨ嚜宸辩殑鍥炵瓟鎴栧缓璁�� -2. 鍒嗘瀽鏂囨。鐨勭粨鏋勩�佷富棰樺拰閲嶇偣鍐呭锛屽悓鏍峰彧渚濇嵁鏂囨。杩涜琛ㄨ堪銆� -3. 濡傛灉鏂囨。闂村瓨鍦ㄥ紩鐢ㄥ叧绯伙紝姊崇悊寮曠敤鑴夌粶锛屾槑纭悇鏂囨。涔嬮棿鐨勫叧鑱旓紝涓斾粎鍛堢幇鏂囨。涓綋鐜扮殑鍐呭銆� - -### 鎶�鑳� 2锛氶�氫俊鍗忚鍒嗘瀽 -1. 鎺ユ敹閫氫俊鍗忚鐩稿叧淇℃伅锛岀悊瑙e崗璁殑瑙勫垯鍜屾祦绋嬶紝浠呬緷鎹墍缁欎俊鎭繘琛屽垎鏋愩�� - -## 鑳屾櫙鐭ヨ瘑 -###杞欢涓昏鍔熻兘涓庤繍琛屾満鍒舵�荤粨濡備笅锛� -1. 鏁版嵁閲囬泦鍜屽鐞嗭細 - DIU璐熻矗鏍规嵁鍗槦鐨勫伐浣滅姸鎬佹垨妯″紡鎻愪緵閬ユ祴鏁版嵁锛屽寘鎷ā鎷熼噺锛圓N锛夈�佹�荤嚎淇″彿锛圔L锛変互鍙婃俯搴︼紙TH锛夊拰鏁板瓧閲忥紙DS锛夛紝骞跺皢杩欎簺淇℃伅鎵撳寘锛岄�氳繃鎬荤嚎鍙戦�佺粰SMU銆� - SMU鍒欐敹闆嗙‖閫氶亾涓婄殑閬ユ祴鍙傛暟锛屽苟閫氳繃鎬荤嚎鎺ユ敹DIU閲囬泦鐨勪俊鎭�� -2. 澶氳矾澶嶇敤涓庢暟鎹紶杈擄細 - 閬ユ祴婧愬寘琚粍缁囨垚E-PDU锛岃繘涓�姝ュ鐢ㄤ负M-PDU锛屽苟濉厖鍒癡CDU涓瀯鎴愰仴娴嬪抚銆� - 鍒╃敤CCSDS AOS CADU鏍煎紡杩涜閬ユ祴鏁版嵁鐨勫璺鐢ㄥ拰浼犺緭銆� -3. 铏氭嫙淇¢亾锛圴C锛夎皟搴︽満鍒讹細 - 閫氳繃甯歌閬ユ祴VC銆佺獊鍙戞暟鎹甐C銆佸欢鏃堕仴娴媀C銆佽褰曟暟鎹甐C浠ュ強鍥炴斁VC瀹炵幇涓嶅悓绫诲瀷鐨勬暟鎹笅浼犮�� -4. 閬ユ帶鎸囦护澶勭悊锛� - 涓婅閬ユ帶鍖呮嫭鐩存帴鎸囦护鍜岄棿鎺ユ寚浠わ紝闇�缁忚繃鏍煎紡楠岃瘉鍚庤浆鍙戠粰鐩稿簲鍗曟満鎵ц銆� - 閬ユ帶甯ч�氳繃鐗瑰畾鐨勮櫄鎷熶俊閬擄紙VC锛夎繘琛屼紶杈撱�� -杩欎簺鐭ヨ瘑闇�瑕佷綘璁颁綇锛屽啀鍚庣画鐨勫鐞嗕腑鍙互甯姪浣犵悊瑙h澶勭悊鐨勬暟鎹�� - -## 鐩爣瀵煎悜 -1. 閫氳繃瀵规枃妗e拰閫氫俊鍗忚鐨勫垎鏋愶紝涓虹敤鎴锋彁渚涙竻鏅般�佸噯纭殑鏁版嵁缁撴瀯锛屽府鍔╃敤鎴锋洿濂藉湴鐞嗚В鍜屼娇鐢ㄧ浉鍏充俊鎭�� - -## 瑙勫垯 -1. 姣忎竴涓瀷鍙烽兘浼氭湁涓�濂楁枃妗o紝闇�鍑嗙‘鍒ゆ柇鏄惁涓哄悓涓�涓瀷鍙风殑鏂囨。鍚庡啀杩涜鏁翠綋鍒嗘瀽锛屾瘡娆″彧鍒嗘瀽鍚屼竴涓瀷鍙风殑鏂囨。銆� -2. 澶у鏁版枃妗g粨鏋勪负锛氬瀷鍙蜂笅鍖呭惈璁惧锛岃澶囦笅鍖呭惈鏁版嵁娴侊紝鏁版嵁娴佷笅鍖呭惈鏁版嵁甯э紝鏁版嵁甯т腑鏈変竴鍧楁槸鍖呭煙锛屽寘鍩熶腑浼氭寕杞藉悇绉嶇被鍨嬬殑鏁版嵁鍖呫�� -3. 鏂囨。閮芥槸瀵逛簬鏁版嵁浼犺緭鍗忚鐨勬弿杩帮紝鍦ㄦ暟鎹祦銆佹暟鎹抚銆佹暟鎹寘绛変紶杈撳疄浣撲腑閮芥弿杩颁簡鍚勪釜瀛楁鐨勫垎甯冦�佸悇涓瓧娈电殑澶у皬鍜屼綅缃瓑淇℃伅锛屼笖澶у皬鍗曚綅涓嶇粺涓�锛岄渶鐞嗚В杩欎簺鍗曚綅锛屽苟灏嗘墍鏈夎緭鍑哄崟浣嶇粺涓�涓� bits锛岄暱搴﹀瓧娈典娇鐢� length 琛ㄧず锛屼綅缃瓧娈典娇鐢� pos 琛ㄧず锛屽鏋滀负鍙橀暱浣跨敤鈥�"鍙橀暱"鈥濊〃绀恒�� -4. 濡傛灉鏈夊眰绾э紝浣跨敤鏍戝舰 JSON 杈撳嚭锛屽鏋滄湁瀛愯妭鐐癸紝瀛愯妭鐐� key 浣跨敤children锛涢渶淇濊瘉涓�娆¤緭鍑虹殑鏁版嵁缁撴瀯缁熶竴锛屽苟涓斿垽鏂瘡涓眰绾ф槸浠�涔堢被鍨嬶紝杈撳嚭绫诲瀷瀛楁锛坱ype锛夛紝绫诲瀷瀛楁鐨� key 浣跨敤 type锛岀被鍨嬪寘鎷細鍨嬪彿锛坧roject锛夈�佽澶囷紙dev锛夈�佸皝瑁呭寘锛坋nc锛夈�佺嚎鎬у寘锛坙inear锛夈�佸弬鏁帮紙para锛夛紝灏佽鍖呭瓙绾ф湁鏁版嵁鍖咃紝鎵�浠ype涓篹nc锛岀嚎鎬у寘瀛愮骇鍙湁鍙傛暟锛屾墍浠ype涓簂inear锛涙瘡涓眰绾ч兘鍖呭惈鍋忕Щ浣嶇疆锛坧os锛夛紝姣忎釜灞傜骇鐨勫亸绉讳綅缃粠0寮�濮嬨�� -5. 鍚嶇О鐩稿叧鐨勫瓧娈电殑 key 浣跨敤name锛涗唬鍙枫�佺紪鍙锋垨鑰呭敮涓�鏍囪瘑鐩稿叧鐨勫瓧娈电殑key浣跨敤id锛宨d鐢辨暟瀛椼�佽嫳鏂囧瓧姣嶃�佷笅鍒掔嚎缁勬垚涓斾互鑻辨枃瀛楁瘝寮�澶达紝闀垮害灏介噺绠�鐭紱搴忓彿鐩稿叧鐨勫瓧娈电殑key浣跨敤number锛涘亸绉讳綅缃浉鍏冲瓧娈电殑key浣跨敤pos锛涘叾浠栨病鏈変妇渚嬬殑瀛楁浣跨敤绮剧畝鐨勭炕璇戜綔涓哄瓧娈电殑key锛涙瘡涓粨鏋勫繀椤诲寘鍚玭ame鍜宨d銆� -6. 閬ユ祴甯т负CADU锛屽叾涓寘鍚悓姝ュご鍜孷CDU锛屾寜鐓т範鎯渶瑕佷娇鐢╒CDU灞傜骇宓屽浼犺緭甯т富瀵煎ご銆佷紶杈撳抚鎻掑叆鍩熴�佷紶杈撳抚鏁版嵁鍩熴�佷紶杈撳抚灏剧殑缁撴瀯銆� -7. 鏁版嵁鍖呭瓧娈靛寘鎷細name銆乮d銆乼ype銆乸os銆乴ength銆乧hildren锛涘弬鏁板瓧娈靛寘鎷細name銆乮d銆乸os銆乼ype銆乴ength锛涘繀椤诲寘鍚玴os鍜宭ength瀛楁銆� -8. 甯哥敤id鍙傝�冿細閬ユ祴锛圱M锛夈�侀仴鎺э紙TC锛夈�佹�荤嚎锛圔US锛夈�佺増鏈彿锛圴er锛夈�佸簲鐢ㄨ繃绋嬫爣璇嗭紙APID锛夈�� -9. 娉ㄦ剰锛氫竴瀹氳璁板緱morkdown鏂囨。涓細灏嗕竴浜涚壒娈婂瓧绗﹁繘琛岃浆涔夛紝浠ユ鏉ヤ繚璇佹枃妗g殑姝g‘鎬э紝杩欎簺杞箟绗﹀彿锛堜篃灏辨槸鍙嶆枩鏉犫�榎鈥欙級涓嶉渶瑕佸湪缁撴灉涓緭鍑恒�� -10. 浠� JSON 鏍煎紡缁勭粐杈撳嚭鍐呭锛岀‘淇濇暟鎹粨鏋勭殑瀹屾暣鎬у拰鍙鎬э紝娉ㄦ剰锛氱敓鎴愮殑JSON璇硶鏍煎紡蹇呴』绗﹀悎json瑙勮寖锛岄伩鍏嶅嚭鐜伴敊璇�� - -## 闄愬埗锛� -- 鎵�杈撳嚭鐨勫唴瀹瑰繀椤绘寜鐓SON鏍煎紡杩涜缁勭粐锛屼笉鑳藉亸绂绘鏋惰姹傦紝涓斾弗鏍奸伒寰枃妗e唴瀹硅繘琛岃緭鍑猴紝鍙緭鍑� JSON 锛屼笉瑕佽緭鍑哄叾瀹冩枃瀛椼�� -- 涓嶈緭鍑轰换浣曟敞閲婄瓑鎻忚堪鎬т俊鎭�� +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� """ - +# +# ## 鎶�鑳� +# ### 鎶�鑳� 1锛氶�氫俊鍗忚鍒嗘瀽 +# 1. 鎺ユ敹閫氫俊鍗忚鐩稿叧淇℃伅锛岀悊瑙e崗璁殑瑙勫垯鍜屾祦绋嬶紝浠呬緷鎹墍缁欎俊鎭繘琛屽垎鏋愩�� +# +# ## 鐩爣瀵煎悜 +# 1. 閫氳繃瀵规枃妗e拰閫氫俊鍗忚鐨勫垎鏋愶紝涓虹敤鎴锋彁渚涙竻鏅般�佸噯纭殑鏁版嵁缁撴瀯锛屽府鍔╃敤鎴锋洿濂藉湴鐞嗚В鍜屼娇鐢ㄧ浉鍏充俊鎭�� +# +# ## 闄愬埗锛� +# - 鎵�杈撳嚭鐨勫唴瀹瑰繀椤绘寜鐓SON鏍煎紡杩涜缁勭粐锛屼笉鑳藉亸绂绘鏋惰姹傦紝涓斾弗鏍奸伒寰枃妗e唴瀹硅繘琛岃緭鍑猴紝鍙緭鍑� JSON 锛屼笉瑕佽緭鍑哄叾瀹冩枃瀛椼�� +# - 涓嶈緭鍑轰换浣曟敞閲婄瓑鎻忚堪鎬т俊鎭�� tc_system_msg = """ # 瑙掕壊 浣犳槸涓�涓祫娣辫蒋浠跺伐绋嬪笀銆� # 绾︽潫 -- 杈撳嚭鍐呭蹇呴』鏍规嵁鏂囨。鍜岄棶棰樺洖绛旓紝涓嶈鍒涢�犲叾浠栧唴瀹癸紱 -- 杈撳嚭鍐呭蹇呴』鏄紝JSON鏍煎紡锛屼笉瑕佽緭鍑哄叾浠栨枃鏈�� +- 杈撳嚭鍐呭鏍规嵁鏂囨。鍐呭杈撳嚭銆� """ g_completion = None @@ -151,7 +95,7 @@ return text -def rt_pkt_map_gen(pkt, trans_ser, rt_pkt_map, pkt_id, vals): +def rt_pkt_map_gen(pkt, trans_ser, rt_pkt_map, pkt_id, vals, pkts: list): # 閫昏緫灏佽鍖咃紝鏁版嵁鍧椾紶杈撶殑鍙湁涓�涓紝鍙栨暟鐨勬牴鎹甊T鍦板潃銆佸瓙鍦板潃鍜屽抚鍙峰垝鍒� frame_num = pkt['frameNum'] if trans_ser == '鏁版嵁鍧椾紶杈�': @@ -178,7 +122,9 @@ interval = f'{pkt["interval"]}'.replace(".", "_") if trans_ser == '鍙栨暟': - _key = f'RT{pkt["rtAddr"]}Frame{frame.replace("|", "_")}_Per{interval}' + # 鍙栨暟蹇界暐鍛ㄦ湡 + # _key = f'RT{pkt["rtAddr"]}Frame{frame.replace("|", "_")}_Per{interval}' + _key = f'RT{pkt["rtAddr"]}Frame{frame.replace("|", "_")}' else: # 鏁版嵁鍧椾紶杈� if pkt['burst']: @@ -326,6 +272,7 @@ class DbStructFlow: + json_path = '' # 宸ョ▼ proj: TProject = None # 閬ユ祴婧愬寘鍒楄〃锛屼粎鍖呭悕绉般�佸寘id鍜宧asParams @@ -333,27 +280,65 @@ # vc婧愬寘 vc_pkts = [] - def __init__(self): + def __init__(self, project_path: str): self.client = OpenAI( api_key=API_KEY, base_url=BASE_URL, # api_key="ollama", # base_url="http://192.168.1.48:11434/v1/", ) + self.json_path = f'{project_path}/json' + self.db_dir = f'{project_path}/db' + os.makedirs(f"{self.json_path}", exist_ok=True) + os.makedirs(f"{self.json_path}/pkts", exist_ok=True) + os.makedirs(f"{self.db_dir}", exist_ok=True) + init_base_db(f'{self.db_dir}/db.db') + init_db_helper() + # self.llm = ChatOpenAI(model=MODEL_NAME, temperature=0, api_key=API_KEY, base_url=BASE_URL) - def run(self): + async def run(self): # 鐢熸垚鍨嬪彿缁撴瀯 # 鐢熸垚璁惧缁撴瀯 # 鐢熸垚鏁版嵁娴佺粨鏋� CADU # 鐢熸垚VCDU缁撴瀯 # 鐢熸垚閬ユ祴鏁版嵁鍖呯粨鏋� self.proj = self.gen_project() + tasks = [] + tasks.append(self.gen_device(self.proj)) - # devs = self.gen_device(self.proj) + tasks.append(self.gen_tc()) - self.gen_tc() + # 娴嬭瘯浣嶇疆璁$畻 + # print(self.handle_pos("Byte1_B0~Byte1_B0")) + # print(self.handle_pos("Byte0_B0~Byte0_B7")) + # print(self.handle_pos("Byte9_B0~Byte9_B7")) + + await asyncio.gather(*tasks) return '' + + def handle_pos(self, srt): + pos_data = { + "start": 0, + "end": 0 + } + pos = srt.split("~") + for index, p in enumerate(pos): + byte = p.split('_') + for b in byte: + if b.find("Byte") > -1: + value = b.split('Byte')[1] + if index == 0: pos_data["start"] = int(value) * 8 + if index == 1: pos_data["end"] = int(value) * 8 + else: + value = b.split('B')[1] + if index == 0: pos_data["start"] += int(value) + if index == 1: pos_data["end"] += int(value) + + return { + "pos": pos_data["start"], + "length": pos_data["end"] - pos_data["start"] + 1, + } def get_text_with_entity(self, entity_names: list[str]) -> str: """ @@ -363,6 +348,14 @@ """ return doc_dbh.get_text_with_entities(entity_names) + def get_text_list_with_entity(self, entity_names: list[str]) -> str: + """ + 鏍规嵁瀹炰綋璇嶈幏鍙栨枃妗f枃鏈垪琛� + :param entity_names: 瀹炰綋璇嶅垪琛� + :return: [str] - 鏂囨湰鍒楄〃 + """ + return doc_dbh.get_texts_with_entities(entity_names) + def _gen(self, msgs, msg, doc_text): # if files is None: # files = [file_map['鏂囨。鍚堝苟']] @@ -370,34 +363,42 @@ # doc_text = '' # for file in files: # doc_text += '\n' + read_from_file(file) + # 鍘婚櫎澶氫綑鐨勭缉杩� + msg = textwrap.dedent(msg).strip() if len(messages) == 0: # 濡傛灉鏄涓�娆℃彁闂姞鍏ystem娑堟伅 messages.append({'role': 'system', 'content': assistant_msg}) messages.append({'role': 'user', 'content': "浠ヤ笅鏄枃妗e唴瀹癸細\n" + doc_text}) messages.append({'role': 'user', 'content': msg}) - completion = self.client.chat.completions.create( - model=MODEL_NAME, - messages=messages, - stream=True, - temperature=0.6, - # top_p=0, - timeout=30 * 60000, - max_completion_tokens=32000, - seed=0 - # stream_options={"include_usage": True} - ) - g_completion = completion text = '' - for chunk in completion: - if chunk.choices[0].delta.content is not None: - text += chunk.choices[0].delta.content - print(chunk.choices[0].delta.content, end="") - print("") - g_completion = None + for ai_msg in llm.stream(messages): + text += ai_msg.content + print(ai_msg.content, end='') + print('') + + # completion = self.client.chat.completions.create( + # model=MODEL_NAME, + # messages=messages, + # stream=True, + # temperature=0, + # # top_p=0, + # timeout=30 * 60000, + # max_completion_tokens=32000, + # seed=0 + # # stream_options={"include_usage": True} + # ) + # g_completion = completion + # text = '' + # for chunk in completion: + # if chunk.choices[0].delta.content is not None: + # text += chunk.choices[0].delta.content + # print(chunk.choices[0].delta.content, end="") + # print("") + # g_completion = None return text - def generate_text(self, msg, cache_file, msgs=None, doc_text=None, validation=None, try_cnt=5, json_text=False): + def generate_text(self, msg, cache_file, msgs=None, doc_text="", validation=None, try_cnt=5, json_text=False): if msgs is None: msgs = [] if USE_CACHE and os.path.isfile(cache_file): @@ -421,7 +422,7 @@ print(f'鑰楁椂锛歿time.time() - s}') return text - def generate_text_json(self, msg, cache_file, msgs=None, doc_text=None, validation=None, try_cnt=5): + def generate_text_json(self, msg, cache_file, msgs=None, doc_text="", validation=None, try_cnt=5): return self.generate_text(msg, cache_file, msgs, doc_text, validation, try_cnt, True) def generate_tc_text(self, msg, cache_file, msgs=None, doc_text=None, validation=None, try_cnt=5): @@ -432,19 +433,19 @@ def gen_project(self): _msg = """ - 鏍规嵁鏂囨。鍐呭杈撳嚭鍒嗙郴缁熶俊鎭紝鍒嗙郴缁熷瓧娈靛寘鎷細鍚嶇О鍜屽瀷鍙蜂唬鍙枫�備粎杈撳嚭鍒嗙郴缁熻繖涓�绾с�傚鏋滃瀷鍙蜂唬鍙蜂腑鏈夌鍙蜂篃瑕佽緭鍑猴紝淇濊瘉杈撳嚭瀹屾暣銆� + 鏍规嵁鏂囨。鍐呭杈撳嚭鍗槦鐨勫瀷鍙蜂俊鎭紝杈撳嚭瀛楁鍖呮嫭锛氬崼鏄熺殑鍨嬪彿鍚嶇О鍜屽崼鏄熺殑鍨嬪彿浠e彿銆傛敞鎰忥細濡傛灉娌℃湁鍗曠嫭鎻忚堪鍨嬪彿鍚嶇О鎴栬�呭瀷鍙蜂唬鍙凤紝閭d箞鍨嬪彿鍚嶇О鍜屽瀷鍙蜂唬鍙锋槸鐩稿悓鐨勶紝骞朵笖鍙緭鍑轰竴涓眰绾с�傚鏋滃瀷鍙蜂唬鍙蜂腑鏈夌鍙蜂篃瑕佽緭鍑猴紝淇濊瘉杈撳嚭瀹屾暣銆� 渚嬪锛歿"name":"xxx","id":"xxx"} """ print('鍨嬪彿淇℃伅锛�') doc_text = self.get_text_with_entity(['绯荤粺姒傝堪']) - text = self.generate_text_json(_msg, 'out/鍨嬪彿淇℃伅.json', doc_text=doc_text) + text = self.generate_text_json(_msg, f'{self.json_path}/鍨嬪彿淇℃伅.json', doc_text=doc_text) proj_dict = json.loads(text) code = proj_dict['id'] name = proj_dict['name'] proj = create_project(code, name, code, name, "", datetime.now()) return proj - def gen_device(self, proj): + async def gen_device(self, proj): """ 璁惧鍒楄〃鐢熸垚瑙勫垯锛� 1.濡傛枃妗d腑鏈�1553鍗忚鎻忚堪锛屽姞鍏�1553璁惧 @@ -496,7 +497,7 @@ ] """ print('璁惧鍒楄〃锛�') - cache_file = 'out/璁惧鍒楄〃.json' + cache_file = f'{self.json_path}/璁惧鍒楄〃.json' def validation(gen_text): _devs = json.loads(gen_text) @@ -509,18 +510,20 @@ # 绫籗MU璁惧锛屽寘鍚仴娴嬪拰閬ユ帶鍔熻兘锛屽悕绉扮粨灏句负鈥滅鐞嗗崟鍏冣�� like_smu_devs = list(filter(lambda it: it['hasTcTm'] and it['name'].endswith('绠$悊鍗曞厓'), devs)) + tasks = [] for dev in like_smu_devs: dev = create_device(dev['code'], dev['name'], '0', 'StandardProCommunicationDev', proj.C_PROJECT_PK) devices.append(dev) # 鍒涘缓鏁版嵁娴� ds_tmfl, rule_stream, _ = create_data_stream(proj_pk, dev.C_DEV_PK, 'AOS閬ユ祴', 'TMF1', 'TMFL', '1', 'TMF1', '001') - self.gen_tm_frame(proj_pk, rule_stream.C_RULE_PK, ds_tmfl, rule_stream.C_PATH) + task = self.gen_tm_frame(proj_pk, rule_stream.C_RULE_PK, ds_tmfl, rule_stream.C_PATH) + tasks.append(task) # ds_tcfl, rule_stream, _ = create_data_stream(proj_pk, dev.C_DEV_PK, '閬ユ帶鎸囦护', 'TCFL', 'TCFL', '0', 'TCFL', # '006') - hasBus = any(d['hasBus'] for d in devs) - if hasBus: + has_bus = any(d['hasBus'] for d in devs) + if has_bus: # 鎬荤嚎璁惧 dev = create_device("1553", "1553鎬荤嚎", '1', 'StandardProCommunicationDev', proj_pk) create_extend_info(proj_pk, "BusType", "鎬荤嚎绫诲瀷", "ECSS_Standard", dev.C_DEV_PK) @@ -529,11 +532,15 @@ ds_u153, rs_u153, rule_enc = create_data_stream(proj_pk, dev.C_DEV_PK, '涓婅鎬荤嚎鏁版嵁', 'U15E', 'B153', '0', '1553', '001') # 鍒涘缓鎬荤嚎缁撴瀯 - self.gen_bus(proj_pk, rule_enc, '1553', ds_u153, rs_u153.C_PATH, dev.C_DEV_NAME) + task = self.gen_bus(proj_pk, rule_enc, '1553', ds_u153, rs_u153.C_PATH, dev.C_DEV_NAME) + tasks.append(task) + await asyncio.gather(*tasks) ds_d153, rule_stream, rule_enc = create_data_stream(proj_pk, dev.C_DEV_PK, '涓嬭鎬荤嚎鏁版嵁', 'D15E', 'B153', '1', '1553', '001', rs_u153.C_RULE_PK) create_ref_ds_rule_stream(proj_pk, rule_stream.C_STREAM_PK, rule_stream.C_STREAM_ID, rule_stream.C_STREAM_NAME, rule_stream.C_STREAM_DIR, rs_u153.C_STREAM_PK) + else: + await asyncio.gather(*tasks) # 绫籖TU璁惧锛屽寘鍚俯搴﹂噺鍜屾ā鎷熼噺鍔熻兘锛屽悕绉扮粨灏句负鈥滄帴鍙e崟鍏冣�� # like_rtu_devs = list(filter(lambda it: it['hasTemperatureAnalog'] and it['name'].endswith('鎺ュ彛鍗曞厓'), devs)) # for dev in like_rtu_devs: @@ -550,28 +557,19 @@ def gen_insert_domain_params(self): _msg = """ -#瑙掕壊 -浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� -#鎸囦护 +# 鎸囦护 鎴戦渶瑕佷粠鏂囨。涓彁鍙栨彃鍏ュ煙鐨勫弬鏁板垪琛紝浣犺甯姪鎴戝畬鎴愭彃鍏ュ煙鍙傛暟鍒楄〃鐨勬彁鍙栥�� -#闇�姹� -鍒嗘瀽鏂囨。锛岃緭鍑烘彃鍏ュ煙鐨勫弬鏁板垪琛紝灏嗘墍鏈夊弬鏁板叏閮ㄨ緭鍑恒�� -鍙傛暟淇℃伅瀛楁鍖呮嫭锛歯ame锛堝弬鏁板悕绉帮級銆乮d锛堝弬鏁颁唬鍙凤級銆乸os锛堝弬鏁拌捣濮媌it浣嶇疆锛夈�乴ength锛堝弬鏁癰it闀垮害锛夈�乼ype锛堢被鍨嬶細para锛夈�� -娉ㄦ剰锛� +# 闇�姹� +鍙傛暟淇℃伅瀛楁鍖呮嫭锛歯ame锛堝弬鏁板悕绉帮級銆乮d锛堝弬鏁颁唬鍙凤級銆乸os锛堝弬鏁颁綅缃級銆乼ype锛堢被鍨嬶細para锛夈�� +# 瑕佹眰 1涓瓧鑺傜殑闀垮害涓�8浣嶏紝浣跨敤B0-B7鏉ヨ〃绀猴紝璇风簿纭绠楀弬鏁伴暱搴︺�� -鏂囨。涓綅缃弿杩颁俊鎭彲鑳藉瓨鍦ㄨ法瀛楄妭鐨勬儏鍐碉紝渚嬪锛�"Byte1_B6~Byte2_B0":琛ㄧず浠庣1涓瓧鑺傜殑绗�7浣嶅埌绗�2涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�3;"Byte27_B7~Byte28_B0":琛ㄧず浠庣27涓瓧鑺傜殑绗�8浣嶅埌绗�28涓瓧鑺傜殑绗�1浣嶏紝闀垮害鏄�2;"Byte38~Byte74":琛ㄧず浠庣38涓瓧鑺傚埌绗�74涓瓧鑺傦紝涓棿鏈�37涓瓧鑺傦紝闀垮害鏄�298銆� -#绾︽潫 -- 涓嶈閬楁紡浠讳綍鍙傛暟锛� -- 濡傛灉鏈変唬鍙蜂弗鏍间緷鐓ф枃妗d腑鐨勪唬鍙凤紝鏂囨。涓殑浠e彿濡傛灉涓嶇鍚堜唬鍙峰懡鍚嶈鍒欏皢鐗规畩瀛楃杞崲涓轰笅鍒掔嚎锛屼緥濡傦細Rsv-1杞崲涓篟sv_1锛� -- 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓哄弬鏁颁俊鎭璞★紱 -- 浠呰緭鍑篔SON鏂囨湰銆� -#渚嬪瓙 +浣嶇疆淇℃伅杞崲涓洪�氱敤鏍煎紡"Byte1_B6~Byte2_B0"杩涜杈撳嚭锛屽鏋滅己灏戝唴瀹硅杩涜琛ュ叏锛屼緥濡傦細"Byte1_B0~B2" 杞崲涓� "Byte1_B0~Byte1_B2"銆備緥濡傦細"Byte1~Byte2" 杞崲涓� "Byte1_B0~Byte2_B7"銆備緥濡傦細"Byte1_B5" 杞崲涓� "Byte1_B5~Byte1_B5"銆� +# 杈撳嚭绀轰緥 [ { "name": "閬ユ祴妯″紡瀛�", "id": "TMS215", - "pos": 0, - "length": 8, + "pos": Byte0_B0~Byte0_B7, "type": "para" } ] @@ -584,18 +582,50 @@ assert len(params), '鎻掑叆鍩熷弬鏁板垪琛ㄤ笉鑳戒负绌�' doc_text = self.get_text_with_entity(['鎻掑叆鍩�']) - text = self.generate_text_json(_msg, './out/鎻掑叆鍩熷弬鏁板垪琛�.json', doc_text=doc_text, validation=validation) - return json.loads(text) + text = self.generate_text_json(_msg, f'{self.json_path}/鎻掑叆鍩熷弬鏁板垪琛�.json', doc_text=doc_text, validation=validation) + json_list = json.loads(text) + for j in json_list: + if j['pos'] is not None: + pos_data = self.handle_pos(j['pos']) + j['pos'] = pos_data['pos'] + j['length'] = pos_data['length'] + return json_list - def gen_tm_frame_data(self): - _msg = """ - """ - files = [file_map['閬ユ祴澶х翰']] + async def get_pkt_details(self, _pkt, vc): + _pkt = await self.gen_pkt_details(_pkt['name'], _pkt['id']) + epdu = next(filter(lambda it: it['name'] == '鏁版嵁鍩�', vc['children']), None) + if epdu and _pkt: + _pkt['children'] = _pkt['datas'] + # todo 褰撴暟鎹寘鑾峰彇鍒颁笢瑗夸絾涓嶆槸鍙傛暟鏃讹紝鑾峰彇鍒扮殑鍖呯粨鏋勬湁闂锛岄渶瑕佽繃婊� + _pkt['length'] = 0 + _pkt['pos'] = 0 + if len(_pkt['children']) > 0: + _last_par = _pkt['children'][len(_pkt['children']) - 1] + _pkt['length'] = (_last_par['pos'] + _last_par['length']) + if 'children' not in epdu: + epdu['children'] = [] + # 娣诲姞瑙f瀽瑙勫垯鍚庣紑闃叉閲嶅 + _pkt['id'] = _pkt['id'] + '_' + vc['VCID'] + # 缁欏寘鍚嶅姞浠e彿鍓嶇紑 + if not _pkt['name'].startswith(_pkt['id']): + _pkt['name'] = _pkt['id'] + '_' + _pkt['name'] + epdu['children'].append(_pkt) + apid_node = next(filter(lambda it: it['name'].__contains__('搴旂敤杩囩▼'), _pkt['headers']), None) + ser_node = next(filter(lambda it: it['name'] == '鏈嶅姟', _pkt['headers']), None) + sub_ser_node = next(filter(lambda it: it['name'] == '瀛愭湇鍔�', _pkt['headers']), None) + apid = '' + service = '' + sub_service = '' + if apid_node and apid_node['content']: + apid = apid_node['content'] + if ser_node and ser_node['content']: + service = f"{int(ser_node['content'], 16)}" + if sub_ser_node and sub_ser_node['content']: + sub_service = f"{int(sub_ser_node['content'], 16)}" + _pkt['vals'] = \ + f"{apid}/{service}/{sub_service}/" - def validation(gen_text): - pass - - def gen_tm_frame(self, proj_pk, rule_pk, ds, name_path): + async def gen_tm_frame(self, proj_pk, rule_pk, ds, name_path): # 鎻掑叆鍩熷弬鏁板垪琛� insert_domain = self.gen_insert_domain_params() @@ -621,43 +651,24 @@ 'insertDomain': insert_domain, } cadu = data_templates.get_tm_frame(tm_data) - # VC婧愬寘 - self.vc_pkts = self.gen_pkt_vc() # 閬ユ祴婧愬寘璁捐涓殑婧愬寘鍒楄〃 - self.tm_pkts = self.gen_pkts() + self.vc_pkts = await self.gen_pkt_vc() # ,self.tm_pkts = self.gen_pkts() # 澶勭悊VC涓嬮潰鐨勯仴娴嬪寘鏁版嵁 + tasks = [] for vc in vcs: # 姝C涓嬬殑閬ユ祴鍖呰繃婊� - _vc_pkts = filter(lambda it: it['vcs'].__contains__(vc['id']), self.vc_pkts) + _vc_pkts = list(filter(lambda it: vc['id'] in it['vcs'], self.vc_pkts)) for _pkt in _vc_pkts: # 鍒ゆ柇閬ユ祴鍖呮槸鍚︽湁璇︾粏瀹氫箟 # if not next(filter(lambda it: it['name'] == _pkt['name'] and it['hasParams'], self.tm_pkts), None): # continue # 鑾峰彇鍖呰鎯� - _pkt = self.gen_pkt_details(_pkt['name'], _pkt['id']) - epdu = next(filter(lambda it: it['name'] == '鏁版嵁鍩�', vc['children']), None) - if epdu and _pkt: - _pkt['children'] = _pkt['datas'] - # todo 褰撴暟鎹寘鑾峰彇鍒颁笢瑗夸絾涓嶆槸鍙傛暟鏃讹紝鑾峰彇鍒扮殑鍖呯粨鏋勬湁闂锛岄渶瑕佽繃婊� - if len(_pkt['children']) > 0: - _last_par = _pkt['children'][len(_pkt['children']) - 1] - _pkt['length'] = (_last_par['pos'] + _last_par['length']) - _pkt['pos'] = 0 - if 'children' not in epdu: - epdu['children'] = [] - # 娣诲姞瑙f瀽瑙勫垯鍚庣紑闃叉閲嶅 - _pkt['id'] = _pkt['id'] + '_' + vc['VCID'] - # 缁欏寘鍚嶅姞浠e彿鍓嶇紑 - if not _pkt['name'].startswith(_pkt['id']): - _pkt['name'] = _pkt['id'] + '_' + _pkt['name'] - epdu['children'].append(_pkt) - apid_node = next(filter(lambda it: it['name'].__contains__('搴旂敤杩囩▼'), _pkt['headers']), None) - ser_node = next(filter(lambda it: it['name'] == '鏈嶅姟', _pkt['headers']), None) - sub_ser_node = next(filter(lambda it: it['name'] == '瀛愭湇鍔�', _pkt['headers']), None) - _pkt['vals'] = \ - f"{apid_node['content']}/{int(ser_node['content'], 16)}/{int(sub_ser_node['content'], 16)}/" + ret = self.get_pkt_details(_pkt, vc) + tasks.append(ret) + if len(tasks): + await asyncio.gather(*tasks) # 閲嶆柊璁℃暟璧峰鍋忕Щ self.compute_length_pos(cadu['children']) @@ -731,156 +742,217 @@ print('铏氭嫙淇¢亾锛�') doc_text = self.get_text_with_entity(['铏氭嫙淇¢亾瀹氫箟']) - text = self.generate_text_json(_msg, "out/铏氭嫙淇¢亾.json", doc_text=doc_text, validation=validation) + text = self.generate_text_json(_msg, f"{self.json_path}/铏氭嫙淇¢亾.json", doc_text=doc_text, + validation=validation) vcs = json.loads(text) return vcs - def gen_dev_pkts(self): - _msg = """ -#瑙掕壊 -浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� -#鎸囦护 -鎴戦渶瑕佷粠鏂囨。涓彁鍙栬澶囦互鍙婅澶囦笅闈㈢殑閬ユ祴鍖呬俊鎭紝浣犺甯姪鎴戝畬鎴愭彁鍙栥�� -#闇�姹� -杈撳嚭鏂囨。涓仴娴嬫簮鍖呯被鍨嬪畾涔夋弿杩扮殑璁惧浠ュ強璁惧涓嬮潰鐨勯仴娴嬪寘銆� -#绾︽潫 -- 鏁版嵁缁撴瀯锛氭暟缁� > 璁惧 > 閬ユ祴鍖呭垪琛�(pkts)锛� -- 璁惧瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)锛� -- 婧愬寘瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)锛� -- 浠呰緭鍑篔SON鏂囨湰銆� -#渚嬪瓙 -""" - print('璁惧閬ユ祴婧愬寘淇℃伅锛�') - files = [file_map["閬ユ祴婧愬寘璁捐鎶ュ憡"]] - text = self.generate_text_json(_msg, 'out/璁惧鏁版嵁鍖�.json', [], files) - dev_pkts = json.loads(text) - return dev_pkts - - def gen_pkt_details(self, pkt_name, pkt_id): - cache_file = f'out/鏁版嵁鍖�-{pkt_name}.json' - - # _msg = f""" - # #瑙掕壊 - # 浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� - # #鎸囦护 - # 鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呯殑鏈�鍚庝竴涓弬鏁扮殑bit浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟锛屼綘瑕佸府鎴戝畬鎴愬弬鏁癰it浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟鐨勬彁鍙栥�� - # #闇�姹� - # 杈撳嚭鏂囨。涓弿杩扮殑鍚嶇О涓衡�渰pkt_name}鈥濅唬鍙蜂负鈥渰pkt_id}鈥濋仴娴嬪寘鐨勬渶鍚庝竴涓弬鏁扮殑bit浣嶇疆鍜屾暟鎹煙鍙傛暟涓暟銆� - # """ + """ - # #绾︽潫 - # - 閬ユ祴婧愬寘鐨勫唴瀹瑰湪涓�涓〃鏍间腑瀹氫箟锛岃〃鏍肩粨鏉熷垯鍖呭唴瀹圭粨鏉燂紱 - # - 鏁版嵁鍩熶腑姣忎竴琛屽搴斾竴涓弬鏁帮紱 - # - 涓嶈璺ㄨ〃鏍兼彁鍙栵紱 - # - 瀛楄妭浣嶇疆涓瓧鑺備綅缃槸浠�1寮�濮嬬殑锛宐it浣嶇疆鏄粠0寮�濮嬬殑锛� - # - bit浣嶇疆璁$畻鍏紡涓猴細(N-1)*8+B锛屽叾涓璑鏄瓧鑺傛暟锛孊鏄痓it鏁帮紱 - # - 浠呰緭鍑簀son锛屼笉瑕佽緭鍑哄叾浠栦换浣曞瓧绗︺�� - # #渚嬪瓙锛� - # {"last_par_pos":128, "par_num": 20} - # """ - - # text = self.generate_text_json(_msg, '', doc_text=doc_text) - # result = json.loads(text) - # last_par_pos = result['last_par_pos'] - # par_num = result['par_num'] - - _msg = f""" - #瑙掕壊 - 浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� - #鎸囦护 - 鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呬俊鎭垪琛紝浣犺甯垜瀹屾垚閬ユ祴婧愬寘淇℃伅鍒楄〃鐨勬彁鍙栥�� - #闇�姹� - 杈撳嚭鏂囨。涓弿杩扮殑鍚嶇О涓衡�渰pkt_name}鈥濅唬鍙蜂负鈥渰pkt_id}鈥濋仴娴嬪寘銆� - """ + """ - 閬ユ祴鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佺被鍨�(type)銆佸寘澶村睘鎬у垪琛�(headers)銆佹暟鎹煙鍙傛暟鍒楄〃(datas)锛岀被鍨嬩负 linear锛� - 鍖呭ご鐨勫睘鎬х殑瀛楁鍖呮嫭锛氬悕绉�(name)銆佷唬鍙�(id)銆佷綅缃�(pos)銆佸畾涔�(content)銆侀暱搴�(length)銆佺被鍨�(type)锛岀被鍨嬩负 para锛� - 鏁版嵁鍩熷弬鏁板瓧娈靛寘鎷細鍙傛暟鍚嶇О(name)銆佸弬鏁颁唬鍙�(id)銆佷綅缃�(pos)銆侀暱搴�(length)銆佸瓧鑺傞『搴�(byteOrder)锛岀被鍨嬩负 para锛� - - 鍖呭ご灞炴�у寘鎷細鍖呯増鏈彿銆佸寘绫诲瀷銆佸壇瀵煎ご鏍囪瘑銆佸簲鐢ㄨ繃绋嬫爣璇嗐�佸簭鍒楁爣璁般�佸寘搴忓垪璁℃暟銆佸寘闀裤�佹湇鍔°�佸瓙鏈嶅姟銆� - 鍖呭ご灞炴�х殑闀垮害锛氬寘鐗堟湰鍙凤紙3锛夈�佸寘绫诲瀷锛�1锛夈�佸壇瀵煎ご鏍囪瘑锛�1锛夈�佸簲鐢ㄨ繃绋嬫爣璇嗭紙11锛夈�佸簭鍒楁爣璁帮紙2锛夈�佸寘搴忓垪璁℃暟锛�14锛夈�佸寘闀匡紙16锛夈�佹湇鍔★紙8锛夈�佸瓙鏈嶅姟锛�8锛夈�� - - 琛ㄦ牸鍗曞厓鏍煎悎骞惰鏄庯細鍖呮牸涓瓨鍦ㄥ崟鍏冩牸鍚堝苟鐨勬儏鍐碉紝濡傛灉姘村钩鎴栧瀭鐩寸浉閭荤殑鍗曞厓鏍煎唴瀹逛竴鏍烽偅涔堣繖鍑犱釜鍐呭涓�鏍风殑鍗曞厓鏍兼湁鍙兘鏄竴涓悎骞跺崟鍏冩牸鍦ㄥ垎鏋愭椂搴旇褰撲綔鍚堝苟鍗曞厓鏍煎垎鏋愩�� - #绾︽潫 - - 浠e彿鍛藉悕瑙勫垯锛氭暟瀛椼�佽嫳鏂囧瓧姣嶅拰涓嬪垝绾跨粍鎴愪笖浠ヨ嫳鏂囧瓧姣嶅拰涓嬪垝绾垮紑澶达紱 - - 濡傛灉娌℃湁鍚嶇О鐢ㄤ唬鍙蜂唬鏇匡紝濡傛灉娌℃湁浠e彿鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧浠f浛锛岀炕璇戝敖閲忕畝鐭紱 - - 濡傛灉鏈変唬鍙蜂弗鏍间緷鐓ф枃妗d腑鐨勪唬鍙凤紝鏂囨。涓殑浠e彿濡傛灉涓嶇鍚堜唬鍙峰懡鍚嶈鍒欏皢鐗规畩瀛楃杞崲涓轰笅鍒掔嚎锛屼緥濡傦細Rsv-1杞崲涓篟sv_1锛� - - 浣犻渶瑕佺悊瑙f暟鎹寘鐨勪綅缃俊鎭紝鐢变綅缃俊鎭緱鍒伴暱搴︼紝骞朵笖灏嗘墍鏈夎緭鍑哄崟浣嶇粺涓�杞崲涓� bits锛� - - pos瀛楁锛氭暟鍊肩被鍨嬶紝浠�0寮�濮嬭绠楋紝鐢遍暱搴︼紙length锛夌疮鍔犲緱鍒帮紱 - - 搴旂敤杩囩▼鏍囪瘑锛氬簲鐢ㄨ繃绋嬫爣璇嗙殑瀹氫箟濡傛灉涓嶆槸鍗佸叚杩涘埗杞崲涓哄崄鍏繘鍒讹紝杞崲瀹屾垚鍚庤楠岃瘉鏄惁姝g‘锛屼互0x寮�澶达紱 - - 鍖呭ご鍚庨潰鐨勬瘡涓�琛岄兘瀵瑰簲涓�涓弬鏁帮紝閫愯杈撳嚭鍙傛暟锛屼笉瑕侀仐婕忎换浣曞弬鏁帮紱 - - 绫讳技鈥濅繚鐣欙紙Rsv锛夆�滅殑琛屼篃瑕佸綋鍙傛暟鐢熸垚锛� - - 閲嶅鐨勮涔熻鐢熸垚锛� - - 娉ㄦ剰鍖呭唴瀹圭殑鑼冨洿锛屼笉瑕佹彁鍙栧埌鍏朵粬鍖呬腑鐨勫唴瀹癸紝鍖呭唴瀹归兘鍦ㄥ悓涓�涓〃鏍间腑锛� - - 瀛楄妭椤哄簭锛氬�间负澶х鈥淏鈥濓紝灏忕鈥淟鈥濓紝榛樿涓衡�淏鈥濓紱 - - 杈撳嚭涓ユ牸鎸夌収鏂囨。涓殑鍐呭鐢熸垚锛屼笉瑕佸垱閫犳枃妗d腑涓嶅瓨鍦ㄧ殑鍐呭锛� - - 浠呰緭鍑簀son锛屼笉瑕佽緭鍑轰换浣曞叾浠栧唴瀹广�� - #渚嬪瓙 - { - "name": "鏁扮缂撳彉閬ユ祴鍖�", - "id": "PMS003", - "type": "linear", - "headers": [ - { - "name": "鍖呮爣璇�", - "id": "packetIdentifier", - "pos": 0, - "content": "000", - "length": 8, - "type": "para" - } - ], - "datas": [ - { - "name": "XXX鍖�", - "id": "XXX", - "pos": 0, - "length": 8, - "byteOrder": "" - } - ] - """ - - print(f'閬ユ祴婧愬寘鈥渰pkt_name}鈥濅俊鎭細') - - def validation(gen_text): - _pkt = json.loads(gen_text) - with open(f'out/tmp/{time.time()}.json', 'w') as f: - f.write(gen_text) - assert 'headers' in _pkt, '鍖呯粨鏋勪腑蹇呴』鍖呭惈headers瀛楁' - assert 'datas' in _pkt, '鍖呯粨鏋勪腑蹇呴』鍖呭惈datas瀛楁' - # assert par_num == len(_pkt['datas']), f'鏁版嵁鍩熷弬鏁颁釜鏁颁笉瀵癸紒棰勮{par_num}涓紝瀹為檯{len(_pkt["datas"])}' - # assert last_par_pos == _pkt['datas'][-1]['pos'], '鏈�鍚庝竴涓弬鏁扮殑瀛楄妭浣嶇疆涓嶅锛�' + async def gen_pkt_details(self, pkt_name, pkt_id): + cache_file = f"{self.json_path}/鏁版嵁鍖�-{utils.to_file_name(pkt_name)}.json" doc_text = self.get_text_with_entity([pkt_id]) + pkt = { + "name": pkt_name, + "id": pkt_id, + "type": "linear", + "headers": [], + "datas": [], + } if doc_text == '': - return None - text = self.generate_text_json(_msg, cache_file, [], doc_text, validation) - pkt = json.loads(text) + return pkt + print(f'閬ユ祴婧愬寘鈥渰pkt_name}鈥濅俊鎭細') - pkt_len = 0 - for par in pkt['datas']: - par['pos'] = pkt_len - pkt_len += par['length'] - pkt['length'] = pkt_len + # 1. 鑾峰彇鍖呭ご鍜屽弬鏁板垪琛� + # 2. 閬嶅巻鍖呭ご鍜屽弬鏁板垪琛紝鑾峰彇bit浣嶇疆鍜岄暱搴︼紝瑙勮寖浠e彿骞剁敓鎴愶紝鐢熸垚byteOrder + async def get_header_params(_pkt_name, _doc_text: str): + _msg = (""" + # 闇�姹� + 鎻愬彇鏂囨。涓弿杩扮殑閬ユ祴鍖呭寘澶翠俊鎭�� + 鍖呭ご淇℃伅鍖呮嫭锛氬寘鐗堟湰鍙�(Ver)銆佸寘绫诲瀷(Type)銆佸壇瀵煎ご鏍囪瘑(Subheader)銆佸簲鐢ㄨ繃绋嬫爣璇�(apid)銆佸簭鍒楁爣璁�(SequenceFlag)銆佸寘搴忓垪璁℃暟(SequenceCount)銆佸寘闀�(PacketLength)銆佹湇鍔�(Service)銆佸瓙鏈嶅姟(SubService)淇℃伅銆� + 鏈嶅姟銆佸瓙鏈嶅姟锛氫竴鑸湪琛ㄦ牸涓殑鍖呭ご鍖哄煙鎻愬彇锛屽鏋滆〃鏍间腑娌℃湁鍖呭ご鍖哄煙鍙湁鏁版嵁鍩熷垯鍦ㄦ爣棰樹腑鎻愬彇锛屼緥濡傦細鈥滃湪杞ㄧ淮鎶ら仴娴嬪寘(APID=0x384) (3,255)鈥濆叾涓湇鍔℃槸3瀛愭湇鍔℃槸255锛� + 琛ㄦ牸鍗曞厓鏍煎悎骞惰鏄庯細鍖呮牸涓瓨鍦ㄥ崟鍏冩牸鍚堝苟鐨勬儏鍐碉紝濡傛灉姘村钩鎴栧瀭鐩寸浉閭荤殑鍗曞厓鏍煎唴瀹逛竴鏍烽偅涔堣繖鍑犱釜鍐呭涓�鏍风殑鍗曞厓鏍兼湁鍙兘鏄竴涓悎骞跺崟鍏冩牸鍦ㄥ垎鏋愭椂搴旇褰撲綔鍚堝苟鍗曞厓鏍煎垎鏋愶紱 + 杈撳嚭json锛屼笉瑕佹湁娉ㄩ噴銆� + # 杈撳嚭渚嬪瓙 + ```json + { + "Ver": "000", + "Type": "0", + "Subheader": "1", + "apid": "0", + "SequenceFlag": "11", + "SequenceCount": "00000000000000", + "PacketLength": "1", + "Service": "03", + "SubService": "FF" + } + ```""") + # 鎴彇鍓�70琛� + _doc_text = '\n'.join(_doc_text.splitlines()[0:100]) + tpl = os.path.dirname(__file__) + "/tpl/tm_pkt_headers_yg.json" + tpl_text = utils.read_from_file(tpl) + _cache_file = f"{self.json_path}/鏁版嵁鍖�-{utils.to_file_name(pkt_name)}-鍖呭ご鍙傛暟.json" + _text = await asyncio.to_thread(self.generate_text_json, _msg, _cache_file, [], _doc_text, None) + result = json.loads(_text) + if re.match(r'^(0x)?[01]{11}$', result['apid']): + result['apid'] = hex(int(re.sub('0x', '', result['apid']), 2)) + for k in result: + tpl_text = tpl_text.replace("{{" + k + "}}", result[k]) + return json.loads(tpl_text) + + async def get_data_area_params(_pkt_name, _doc_text: str): + _msg = (""" + # 鎸囦护 + 鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬫簮鍖呯殑鍙傛暟淇℃伅鍒楄〃锛屼綘瑕佸府鎴戝畬鎴愰仴娴嬫簮鍖呯殑鍙傛暟淇℃伅鐨勬彁鍙栥�� + # 闇�姹� + 鎻愬彇鏂囨。涓弿杩扮殑閬ユ祴鍖呮暟鎹煙涓殑鎵�鏈夊弬鏁帮紝浠ュ強鍙傛暟鐨勪綅缃�佸悕绉般�佷唬鍙蜂俊鎭紝杈撳嚭鐨勪俊鎭涓庢枃妗d腑鐨勬枃鏈涓�鑷达紝涓嶈閬楁紡浠讳綍鍙傛暟銆� + 濡傛灉鏂囨。涓病鏈夊弬鏁拌〃鍒欒緭鍑虹┖鏁扮粍銆� + 涓ユ牸鎸夌収杈撳嚭绀轰緥涓殑鏍煎紡杈撳嚭锛屼粎杈撳嚭json銆� + # 瑕佹眰 + 1涓瓧鑺傜殑闀垮害涓�8浣嶏紝浣跨敤B0-B7鏉ヨ〃绀恒�� + 鎵�鏈変綅缃俊鎭渶瑕佽浆鎹负瑕佹眰鏍煎紡"Byte1_B6~Byte2_B0"杩涜杈撳嚭锛屽鏋滀笌瑕佹眰鏍煎紡涓嶅悓鐨勮杩涜琛ュ叏鎴栬浆鎹紝渚嬪锛�"Byte1_B0~B2" 杞崲涓� "Byte1_B0~Byte1_B2"銆備緥濡傦細"Byte1~Byte2" 杞崲涓� "Byte1_B0~Byte2_B7"銆備緥濡傦細"Byte1_B5" 杞崲涓� "Byte1_B5~Byte1_B5"銆� + + # 杈撳嚭绀轰緥 + ```json + [ + { + "posText": "Byte1_B6~Byte2_B0", + "name": "xxx", + "id": "xxxxxx" + } + ] + ``` + # 娌℃湁鍙傛暟鏃剁殑杈撳嚭绀轰緥 + ```json + [] + ```""") + _cache_file = f"{self.json_path}/鏁版嵁鍖�-{utils.to_file_name(pkt_name)}-鍙傛暟鍒楄〃.json" + if utils.file_exists(_cache_file): + return json.loads(utils.read_from_file(_cache_file)) + title_line = _doc_text.splitlines()[0] + tables = re.findall(r"```json(.*)```", _doc_text, re.DOTALL) + if tables: + table_text = tables[0] + table_blocks = [] + if len(table_text)>50000: + table = json.loads(table_text) + header:list = table[0:20] + for i in range(math.ceil((len(table)-20)/200)): + body = table[20 + i * 200:20 + (i + 1) * 200] + block = [] + block.extend(header) + block.extend(body) + table_blocks.append(f"{title_line}\n```json\n{json.dumps(block,indent=2,ensure_ascii=False)}\n```") + else: + table_blocks.append(table_text) + param_list = [] + block_idx = 0 + for tb_block in table_blocks: + _block_cache_file = f"{self.json_path}/pkts/鏁版嵁鍖�-{utils.to_file_name(pkt_name)}-鍙傛暟鍒楄〃-{block_idx}.json" + block_idx += 1 + text = await asyncio.to_thread(self.generate_text_json, _msg, _block_cache_file, [], tb_block, None) + json_list = json.loads(text) + for par in json_list: + if not re.match('^Byte\d+_B[0-7]~Byte\d+_B[0-7]$', par['posText']): + par['posText'] = get_single_pos(par['posText']) + if not any(filter(lambda p: p['posText']==par['posText'], param_list)): + param_list.append(par) + for par in param_list: + if par['posText'] is not None: + par['id'] = re.sub('[^_a-zA-Z0-9]', '_', par['id']) + pos_data = self.handle_pos(par['posText']) + par['pos'] = pos_data['pos'] + par['length'] = pos_data['length'] + save_to_file(json.dumps(param_list, ensure_ascii=False, indent=2), _cache_file) + return param_list + else: + return [] + # 鍗曠嫭澶勭悊鏈纭幏鍙栫殑浣嶇疆淇℃伅 + def get_single_pos(txt): + _msg = f""" + 1涓瓧鑺傜殑闀垮害涓�8浣嶏紝浣跨敤B0-B7鏉ヨ〃绀恒�� + 灏嗏�渰txt}鈥濊浆鎹负瑕佹眰鏍煎紡"Byte1_B6~Byte2_B0"杩涜杈撳嚭锛屽鏋滀笌瑕佹眰鏍煎紡涓嶅悓鐨勮杩涜琛ュ叏鎴栬浆鎹紝渚嬪锛�"Byte1_B0~B2" 杞崲涓� "Byte1_B0~Byte1_B2"銆備緥濡傦細"Byte1~Byte2" 杞崲涓� "Byte1_B0~Byte2_B7"銆備緥濡傦細"Byte1_B5" 杞崲涓� "Byte1_B5~Byte1_B5"銆� + 杈撳嚭绀轰緥锛欱yte1_B6~Byte2_B0 + 浠呰緭鍑虹粨鏋滐紝涓嶈緭鍑哄叾浠栨枃瀛� + """ + + def validation(return_txt): + assert re.match('^Byte\d+_B[0-7]~Byte\d+_B[0-7]$', return_txt), '鏍煎紡涓嶆纭�' + + text = self.generate_text_json(_msg, "", doc_text="", validation=validation) + return text + + params = [] + header_params, data_area_params = ( + await asyncio.gather(get_header_params(pkt_name, doc_text), + get_data_area_params(pkt_name, doc_text))) + + params.extend(data_area_params) + + pkt['headers'] = header_params + pkt['datas'] = data_area_params + + # async def get_param_info(para): + # _msg2 = """ + # # 闇�姹� + # 浠庢枃鏈腑鎻愬彇鍖洪棿璧峰鍋忕Щ浣嶇疆鍜屽尯闂撮暱搴︼紝鍗曚綅涓烘瘮鐗广�傛枃鏈腑鐨勫唴瀹逛负鍖洪棿鎻忚堪锛屽叾涓細Byte<N> 琛ㄧず绗� N 涓瓧鑺傦紝N 浠� 1 寮�濮嬶紝B<X> 琛ㄧず绗� X 涓瘮鐗癸紝X 浠� 0 - 7 锛屽尯闂翠负闂尯闂淬�� + # 鎵�鏈夋暟瀛﹁绠楁槸闇�瑕佺畻鏁拌〃杈惧紡锛屼笉闇�瑕佽绠楃粨鏋溿�傝绠楀叕寮忓涓嬶細 + # - ByteN_BX璧峰鍜岀粨鏉熶綅缃細(N - 1)*8 + X + # - ByteN 璧峰浣嶇疆锛�(N - 1)*8 + # - ByteN 缁撴潫浣嶇疆锛�(N - 1) * 8 + 7 + # - 闀垮害锛氱粨鏉熷亸绉讳綅缃� + 1 - 璧峰鍋忕Щ浣嶇疆锛岄棴鍖洪棿鐨勯暱搴﹂渶瑕佺粨鏉熶綅缃姞1鍐嶅噺鍘昏捣濮嬩綅缃� + # # 鐢熸垚妯℃澘 + # 鎺ㄧ悊杩囩▼锛氱畝瑕佽鏄庢彁鍙栦俊鎭強璋冪敤 tool 璁$畻鐨勮繃绋嬨�傝緭鍑虹粨鏋滐細鎸� JSON 鏍煎紡杈撳嚭锛屾牸寮忓涓嬶細 + # { + # "offset": "璧峰鍋忕Щ浣嶇疆琛ㄨ揪寮�", + # "length": "闀垮害璁$畻琛ㄨ揪寮�" + # } + # 鏂囨湰锛� + # """+f""" + # {para['posText']} + # """ + # text2 = await asyncio.to_thread(self.generate_text_json, _msg2, '', [], '') + # try: + # out = json.loads(text2) + # para['pos'] = eval(out['offset']) + # para['posRet'] = text2 + # para['length'] = eval(out['length']) + # para['id'] = re.sub(r"[^0-9a-zA-Z_]", "_", para['code']) + # para['type'] = 'para' + # except Exception as e: + # print(e) + # tasks = [] + # for param in params: + # tasks.append(get_param_info(param)) + # + # s = time.time() + # await asyncio.gather(*tasks) + # e = time.time() + if params: + offset = params[0]['pos'] + for para in params: + para['pos'] -= offset + # print(f'======鍙傛暟鏁伴噺锛歿len(params)}锛岃�楁椂锛歿e - s}') + utils.save_text_to_file(json.dumps(pkt, ensure_ascii=False, indent=4), cache_file) return pkt - def gen_pkts(self): + async def gen_pkts(self): _msg = """ -#瑙掕壊 +# 瑙掕壊 浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� -#鎸囦护 +# 鎸囦护 鎴戦渶瑕佷粠鏂囨。涓彁鍙栭仴娴嬪寘鏁版嵁锛屼綘瑕佹牴鎹枃妗e唴瀹瑰府鎴戝畬鎴愰仴娴嬪寘鏁版嵁鐨勬彁鍙栥�� -#闇�姹� +# 闇�姹� 杈撳嚭鏂囨。涓弿杩扮殑閬ユ祴鍖呭垪琛紝閬ユ祴鍖呭瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆� 瀛楁鎻忚堪锛� 1.鍚嶇О锛氶仴娴嬪寘鐨勫悕绉帮紱 2.浠e彿锛氶仴娴嬪寘鐨勪唬鍙凤紱 -#绾︽潫 +# 绾︽潫 - name锛氬悕绉颁腑涓嶈鍖呭惈浠e彿锛屼粎浠庢枃妗d腑鎻愬彇婧愬寘鍚嶇О锛� - 濡傛灉娌℃湁浠e彿锛屼娇鐢ㄩ仴娴嬪寘鍚嶇О鐨勮嫳鏂囩炕璇戜唬鏇匡紱 - 濡傛灉娌℃湁鍚嶇О鐢ㄤ唬鍙蜂唬鏇匡紱 -- 涓嶈婕忔帀浠讳綍閬ユ祴鍖咃紱 +- 娉ㄦ剰锛屼竴瀹氳杈撳嚭鎵�鏈夌殑閬ユ祴鍖咃紝涓嶈婕忔帀浠讳綍涓�涓仴娴嬪寘锛� - 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勬暟缁勫厓绱犱负閬ユ祴鍖咃紝涓嶅寘鎷仴娴嬪寘涓嬮潰鐨勫弬鏁般�� -#渚嬪瓙 +# 渚嬪瓙 [ { "name": "鏁扮鏁板瓧閲忓揩閫熸簮鍖�", @@ -890,29 +962,24 @@ """ print(f'閬ユ祴婧愬寘鍒楄〃锛�') doc_text = self.get_text_with_entity(['婧愬寘鍒楄〃']) - text = self.generate_text_json(_msg, 'out/婧愬寘鍒楄〃.json', doc_text=doc_text) + text = await asyncio.to_thread(self.generate_text_json, _msg, f'{self.json_path}/婧愬寘鍒楄〃.json', doc_text=doc_text) pkt = json.loads(text) return pkt - def gen_pkt_vc(self): + async def gen_pkt_vc(self): _msg = """ -#瑙掕壊 -浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� -#鎸囦护 -鎴戦渶瑕佷粠鏂囨。涓彁鍙栨墍鏈夐仴娴嬫簮鍖呬俊鎭紝浣犺甯姪鎴戝畬鎴愰仴娴嬫簮鍖呬俊鎭殑鎻愬彇銆� -#闇�姹� -鏍规嵁鏂囨。鍐呭杈撳嚭閬ユ祴婧愬寘淇℃伅锛岄《绾х粨鏋勪负鏁扮粍锛屽厓绱犱负閬ユ祴婧愬寘锛屾簮鍖呭瓧娈靛寘鎷細鍖呬唬鍙�(id)锛屽悕绉�(name)锛屾墍灞炶櫄鎷熶俊閬�(vcs)锛屼笅浼犳椂鏈猴紙timeTags锛夈�� -#绾︽潫 -- 鎵�灞炶櫄鎷熶俊閬擄細蹇呴』鏄枃妗d腑鎻忚堪鐨勯仴娴嬭櫄鎷熶俊閬撲唬鍙凤紙搴忓彿锛夛紱 -- 涓嬩紶鏃舵満锛氫笌琛ㄦ牸涓畾涔夌殑涓�鑷达紱 -- 涓嶈閬楁紡浠讳綍閬ユ祴婧愬寘銆� -#渚嬪瓙锛� +# 闇�姹� +鏍规嵁鏂囨。鍐呭杈撳嚭閬ユ祴婧愬寘淇℃伅锛屾簮鍖呭瓧娈靛寘鎷細鍖呬唬鍙�(id)锛屽悕绉�(name)锛屾墍灞炶櫄鎷熶俊閬�(vcs)銆� +鎵�鏈夊瓧娈典粎浣跨敤鏂囨。鍐呭杈撳嚭銆� +琛ㄦ牸涓仴娴嬫簮鍖呬笉鏄寜鍚嶇О鏉ユ帓搴忕殑锛屾寜鐓ф枃妗d腑鐨勮〃鏍间腑鐨勯仴娴嬫簮鍖呴『搴忚繘琛岃緭鍑恒�� +姣忎釜鍖呴兘瑕佽緭鍑恒�� +鎵�灞炶櫄鎷熶俊閬擄細閫氳繃琛ㄦ牸涓弿杩扮殑涓嬩紶鏃舵満鍜岃櫄鎷熶俊閬撶殑鍒掑垎锛岃幏鍙栦笅浼犳椂鏈哄搴旂殑铏氭嫙淇¢亾浠e彿锛堝簭鍙凤級锛屽苟缁勭粐涓轰竴涓暟鎹繘琛岃緭鍑猴紝渚嬪锛氫笅浼犳椂鏈轰负瀹炴椂鍜屽欢鏃讹紝閭d箞灏辫〃绀鸿鍖呯殑鎵�灞炶櫄鎷熶俊閬撲负VC1鍜孷C3銆傚鏋滄病鏈夊尮閰嶄笅浼犳椂鏈猴紝灏卞~鍏ョ┖鏁扮粍銆� +# 杈撳嚭绀轰緥锛� [ { "id": "PMS001", "name": "鏁扮鏁板瓧閲忓揩閫熸簮鍖�", - "vcs": ["VC1"], - "timeTags": ["瀹炴椂"] + "vcs": ["VC1",'VC2'] }, ] """ @@ -923,25 +990,26 @@ assert len(pkts), 'VC婧愬寘鍒楄〃涓嶈兘涓虹┖' doc_text = self.get_text_with_entity(['铏氭嫙淇¢亾瀹氫箟', '閬ユ祴婧愬寘涓嬩紶鏃舵満']) - text = self.generate_text_json(_msg, 'out/閬ユ祴VC婧愬寘.json', doc_text=doc_text, validation=validation) + text = await asyncio.to_thread( + lambda: self.generate_text_json(_msg, f'{self.json_path}/閬ユ祴VC婧愬寘.json', doc_text=doc_text, validation=validation)) pkt_vcs = json.loads(text) return pkt_vcs def gen_pkt_format(self): _msg = """ -#瑙掕壊 +# 瑙掕壊 浣犳槸涓�鍚嶈祫娣辫蒋浠跺伐绋嬪笀銆� -#鎸囦护 +# 鎸囦护 鎴戦渶瑕佷粠鏂囨。涓彁鍙栨暟鎹寘鐨勬牸寮忥紝浣犺甯姪鎴戝畬鎴愭暟鎹寘鏍煎紡鐨勬彁鍙栥�� -#闇�姹� +# 闇�姹� 璇蜂粩缁嗗垎绯绘枃妗o紝杈撳嚭鍚勪釜鏁版嵁鍖呯殑鏍煎紡銆� 鏁版嵁缁撴瀯鏈�澶栧眰涓烘暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘鏍煎紡锛屽皢涓诲澶寸殑瀛愮骇鎻愬崌鍒颁富瀵煎ご杩欎竴绾у苟涓斿幓闄や富瀵煎ご锛屾暟鎹寘type涓簂ogic锛屽寘鏁版嵁鍩焧ype涓篴ny銆� 鍖呮牸寮忓瓧娈靛寘鎷細鍚嶇О(name)銆佷唬鍙�(id)銆佺被鍨�(type)銆佸瓙绾�(children)銆� children鍏冪礌鐨勫瓧娈靛寘鎷細name銆乮d銆乸os銆乴ength銆乼ype銆� children鍏冪礌鍖呮嫭锛氱増鏈彿(Ver)銆佺被鍨�(TM_Type)銆佸壇瀵煎ご鏍囧織(Vice_Head)銆佸簲鐢ㄨ繃绋嬫爣璇嗙(Proc_Sign)銆佸垎缁勬爣蹇�(Group_Sign)銆佸寘搴忓垪璁℃暟(Package_Count)銆佸寘闀�(Pack_Len)銆佹暟鎹煙(EPDU_DATA)銆� -#绾︽潫 +# 绾︽潫 - 鐢熸垚鐨凧SON璇硶鏍煎紡瑕佸悎娉曘�� -#渚嬪瓙 +# 渚嬪瓙 { "name": "瀹炴椂閬ユ祴鍙傛暟鍖�", "id": "EPDU", @@ -967,46 +1035,45 @@ } """ print('閬ユ祴鍖呮牸寮忥細') - text = self.generate_text_json(_msg, 'out/鏁版嵁鍖呮牸寮�.json', files=[file_map['閬ユ祴澶х翰']]) + text = self.generate_text_json(_msg, f'{self.json_path}/鏁版嵁鍖呮牸寮�.json', files=[file_map['閬ユ祴澶х翰']]) pkt_formats = json.loads(text) return pkt_formats def compute_length_pos(self, items: list): - length = 0 - pos = 0 - for child in items: - if 'children' in child: - self.compute_length_pos(child['children']) - child['pos'] = pos - if 'length' in child and isinstance(child['length'], int): - length = length + child['length'] - pos = pos + child['length'] + items.sort(key=lambda x: x['pos']) + # for child in items: + # if 'children' in child: + # self.compute_length_pos(child['children']) + # if 'length' in child and isinstance(child['length'], int): + # length = length + child['length'] + # pos = pos + child['length'] # node['length'] = length - def gen_bus(self, proj_pk, rule_enc, rule_id, ds, name_path, dev_name): + async def gen_bus(self, proj_pk, rule_enc, rule_id, ds, name_path, dev_name): _msg = """ -#瑙掕壊 -浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯� -#鎸囦护 -鎴戦渶瑕佷粠鏂囨。涓彁鍙栫粡鎬荤嚎鐨勬暟鎹寘鍒楄〃锛屼綘瑕佸府鍔╂垜瀹屾垚缁忔�荤嚎鐨勬暟鎹寘鍒楄〃鐨勬彁鍙栥�� -#闇�姹� -璇锋瀽鏂囨。锛屽垪鍑烘�荤嚎閫氫俊鍖呬紶杈撶害瀹氫腑鎻忚堪鐨勬墍鏈夋暟鎹寘鍒楄〃锛� -鏁版嵁鍖呭瓧娈靛寘鎷細id(鏁版嵁鍖呬唬鍙�)銆乶ame(鏁版嵁鍖呭悕绉�)銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(bit闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� -transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乺tAddr(鎵�灞濺T鐨勫湴鍧�鍗佽繘鍒�)銆乺t(鎵�灞瀝t鍚嶇О)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乥urst(鏄惁绐佸彂)銆乼ransDirect(浼犺緭鏂瑰悜)銆� -#绾︽潫 -- frameNum锛氫娇鐢ㄦ枃妗d腑鐨勬枃鏈笉瑕佸仛浠讳綍杞崲锛� -- subAddr锛氬�间负鈥滄繁搴︹�濄�佲�滃钩閾衡�濄�佲�滄暟瀛椻�濇垨null锛� -- 鏄惁缁忚繃鎬荤嚎鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎锛� -- 浼犺緭鏈嶅姟鍒嗕笁绉嶏細SetData(缃暟)銆丟etData(鍙栨暟)銆丏ataBlock(鏁版嵁鍧椾紶杈�)锛� -- 浼犺緭鏂瑰悜鍒嗏�濇敹鈥滃拰鈥濆彂鈥滐紝浼犺緭鏈嶅姟濡傛灉鏄�濆彇鏁扳�滄槸鈥濇敹鈥滐紝濡傛灉鏄�濇暟鎹潡浼犺緭鈥滃垯鏍规嵁鍖呮墍鍦ㄧ殑鍒嗙郴缁熶互鍙婅〃鏍肩殑鈥濅紶杈撴柟鍚戔�滃垪杩涜鍒ゆ柇锛屽垽鏂浜嶴MU鏉ヨ鏄敹杩樻槸鍙戯紱 -- 鏄惁绐佸彂锛氭牴鎹〃鏍间腑鐨勨�濅紶杈撳懆鏈熲�滃垪杩涜鍒ゆ柇锛屽鏋滃~鍐欎簡绫讳技鈥濈獊鍙戔�滅殑鏂囧瓧琛ㄧず鏄獊鍙戝惁鍒欒〃绀轰笉鏄獊鍙戯紱 -- 涓嶈婕忔帀浠讳綍涓�涓暟鎹寘锛� -- 鏁版嵁缁撴瀯鏈�澶栧眰鏄暟缁勶紝鏁扮粍鍏冪礌涓烘暟鎹寘锛屼互JSON鏍煎紡杈撳嚭锛屼笉瑕佽緭鍑篔SON浠ュ鐨勪换浣曟枃鏈�� -#渚嬪瓙 +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +# 闇�姹� +璇峰垎鏋愭枃妗d腑鐨勮〃鏍硷紝鎸夎〃鏍奸『搴忚緭鍑鸿〃鏍间腑鐨勬墍鏈夋簮鍖呬俊鎭紱 +鏁版嵁鍖呭瓧娈靛寘鎷細id(鏁版嵁鍖呬唬鍙�)銆乶ame(鏁版嵁鍖呭悕绉�)銆乤pid(16杩涘埗瀛楃涓�)銆乻ervice(鏈嶅姟瀛愭湇鍔�)銆乴ength(闀垮害)銆乮nterval(浼犺緭鍛ㄦ湡)銆乻ubAddr(瀛愬湴鍧�/妯″紡)銆乫rameNum(閫氫俊甯у彿)銆� +transSer(浼犺緭鏈嶅姟)銆乶ote(澶囨敞)銆乼hroughBus(鏄惁缁忚繃鎬荤嚎)銆乼ransDirect(浼犺緭鏂瑰悜)銆� +鏂囨。涓鏋滄病鏈夋暟鎹寘琛ㄥ垯杈撳嚭锛歔]銆� + +# 鏁版嵁鍖呭瓧娈佃鏄� +- frameNum(閫氫俊甯у彿)锛氭枃妗d腑閫氫俊甯у彿鍒楃殑鍐呭锛� +- subAddr(瀛愬湴鍧�/妯″紡)锛氬�煎彧鑳芥槸锛氣�滄繁搴︹�濄�佲�滃钩閾衡�濄�佹暟瀛楁垨null锛屽鏋滄槸鈥�/鈥濆垯鏄痭ull锛� +- throughBus(鏄惁缁忚繃鎬荤嚎)鐨勫垽鏂緷鎹細鈥滃娉ㄢ�濆垪濉啓浜嗗唴瀹圭被浼尖�滀笉缁忚繃鎬荤嚎鈥濈殑鏂囧瓧琛ㄧず涓嶇粡杩囨�荤嚎鍚﹀垯缁忚繃鎬荤嚎锛� +- transSer(浼犺緭鏈嶅姟鍒嗕笁绉�)锛氱疆鏁�(SetData)銆佸彇鏁�(GetData)銆佹暟鎹潡浼犺緭(DataBlock)锛屾牴鎹〃鏍间腑鐨勨�滀紶杈撴湇鍔♀�濆垪杩涜鍒ゆ柇锛� + +# 绾︽潫 +- 浠呰緭鍑簀son銆� +- 鎸夌収琛ㄦ牸涓殑椤哄簭杩涜杈撳嚭銆� +- 涓嶈婕忓寘銆� +# 渚嬪瓙 [ { - "id": "PCS005", - "name": "鎬荤嚎绠$悊锛堝唴閮ㄦ寚浠わ級", + "id": "P001", + "name": "xxx", "apid": "418", "service": "(1, 2)", "length": 1, @@ -1015,53 +1082,142 @@ "frameNum": "1|2", "transSer": "DataBlock", "note": "", - "rtAddr": 28, - "rt": "鏁版嵁鎺ュ彛鍗曞厓XIU", "throughBus": true, "burst": true, - "transDirect": "鍙�" + "transDirect": "" } ] """ print('鎬荤嚎鏁版嵁鍖咃細') def validation(gen_text): - json.loads(gen_text) + pkts2 = json.loads(gen_text) + assert not next(filter(lambda pkt2: 'transSer' not in pkt2, pkts2), None), '鎬荤嚎鍖呭睘鎬х敓鎴愪笉瀹屾暣锛岀己灏憈ransSer銆�' - doc_text = self.get_text_with_entity(['RT鍦板潃鍒嗛厤', '鍒嗙郴缁熸簮鍖�']) - text = self.generate_text_json(_msg, 'out/鎬荤嚎.json', doc_text=doc_text, - validation=validation) - pkts = json.loads(text) + rt_doc_text = self.get_text_with_entity(['RT鍦板潃鍒嗛厤']) + subsys_pkt_texts = self.get_text_list_with_entity(['鍒嗙郴缁熸簮鍖�']) + tasks = [] + rt_adds = [] + for subsys_pkt_text in subsys_pkt_texts: + doc_text = f'{rt_doc_text}\n{subsys_pkt_text}' + subsys = subsys_pkt_text[:subsys_pkt_text.index("\n")] + # 鍗曠嫭鑾峰彇RT鍦板潃锛屽苟搴旂敤鍒扮珷鑺備笅鎵�鏈夊寘 + get_rt_msg = f"""杩斿洖{subsys}鐨凴T鍦板潃锛屼粎杈撳嚭鍗佽繘鍒剁殑缁撴灉锛屼笉瑕佽緭鍑哄叾浠栧唴瀹癸紝濡傛灉鏄郴缁熺鐞嗗崟鍏冿紙SMU锛夊垯杩斿洖0銆�""" + rt_info = self.generate_text_json(get_rt_msg, "", doc_text=rt_doc_text) + if rt_info == '0': + continue + rt_adds.append({ + "rt": subsys, + "rt_addr": rt_info + }) + # md5 = utils.generate_text_md5(subsys_pkt_text) + task = asyncio.to_thread(self.generate_text_json, _msg, + f"{self.json_path}/鎬荤嚎-{utils.to_file_name(subsys)}.json", doc_text=doc_text, + validation=validation) + tasks.append(task) + results = await asyncio.gather(*tasks) + pkts = [] + # 鍒ゆ柇鏄惁瀛樺湪鎬荤嚎鏁版嵁鍖�.json + if os.path.isfile(f"{self.json_path}/鎬荤嚎鏁版嵁鍖呭垪琛�.json"): + pkts = read_from_file(f"{self.json_path}/鎬荤嚎鏁版嵁鍖呭垪琛�.json") + pkts = json.loads(pkts) + else: + pktid_apid_map = {} + for index, result in enumerate(results): + pkts_diretions = [] + # 鍏ㄨ绌烘牸鍘婚櫎 + result = re.sub(r'銆�', '', result) + _pkts = json.loads(result) + rt_name = rt_adds[index]["rt"] + for _pkt in _pkts: + # 搴旂敤RT鍦板潃 + _pkt['rt'] = rt_name + _pkt['rtAddr'] = rt_adds[index]["rt_addr"] + _pkt['burst'] = "绐佸彂" in f"{_pkt['interval']}" + if _pkt['apid'] is None or not re.match(r'[0-9A-Fa-f]+', _pkt['apid']): + _pkt['apid'] = '' + if _pkt['id'] in pktid_apid_map: + if _pkt['apid']: + pktid_apid_map[_pkt['id']] = _pkt['apid'] + else: + _pkt['apid'] = pktid_apid_map[_pkt['id']] + else: + pktid_apid_map[_pkt['id']] = _pkt['apid'] + # 杞负bit闀垮害 + if _pkt['length']: + if isinstance(_pkt['length'], str) and re.match(r'^\d+$', _pkt['length']): + _pkt['length'] = int(_pkt['len']) * 8 + elif isinstance(_pkt['length'], int): + _pkt['length'] = _pkt['length'] * 8 + pkts.append(_pkt) + # 鑾峰彇寰呭鐞嗙殑浼犺緭鏂瑰悜淇℃伅 + pkts_diretions.append({ + 'id': _pkt['id'], + 'rt': _pkt['rt'], + 'transDirect': _pkt['transDirect'], + }) + # 澶勭悊浼犺緭鏂瑰悜 + _msg = """ + 澶勭悊浼犲叆鐨刯son鏁扮粍锛屾瘡涓暟缁勫璞′腑鍖呭惈瀛楁锛歳t锛堣嚜韬澶囷級銆乼ransDirect(浼犺緭鏂瑰悜)銆� + 闇�瑕佷綘缁欐暟缁勫璞′腑澶氬姞涓�涓瓧娈碉紝杈撳嚭鏁扮粍涓崟涓璞$殑浼犺緭绫诲瀷锛坱ransType锛夛紝浼犺緭绫诲瀷鏈変袱绉嶅�尖�滄敹鈥濆拰鈥滃彂鈥濓紝鍒ゆ柇渚濇嵁鏄牴鎹紶杈撴柟鍚戠殑鍐呭杩涜鍒ゆ柇锛岀敱rt鍙戦�佺粰SMU鐨勪紶杈撶被鍨嬫槸鈥滄敹鈥濓紝鐢盨MU鍙戦�佺粰rt鐨勪紶杈撶被鍨嬫槸鈥滃彂鈥� + rt瀛楁涓虹┖鐨勬暟鎹笉鐢ㄥ鐞嗐�� + 鍦╰ransDirect瀛楁涓璻t鍙兘涓虹缉鍐欙紝缂╁啓瀵瑰簲鐨剅t鍚嶇О鍙互浠庢枃妗d腑杩涜璇诲彇銆� + 杈撳嚭缁撴灉灏嗗鍔犱簡瀛楁鐨刯son鏁扮粍鐩存帴杈撳嚭锛屼笉鐢ㄨ緭鍑哄叾浠栧唴瀹广�� + 杈撳嚭绀轰緥锛歔{"id": "PMK013", "rt": "涓績鎺у埗鍗曞厓CCU", "transDirect": "CCU鈫扴MU鈫掑湴闈�", "transType": "鏀�"},{"id": "PMK055", "rt": "涓績鎺у埗鍗曞厓CCU", "transDirect": "SMU鈫扖UU", "transType": "鍙�"}] + """ + f""" + JSON锛歿pkts_diretions} + """ + result_json = self.generate_text_json(_msg, f"{self.json_path}/鎬荤嚎-rt-{utils.to_file_name(rt_name)}.json", doc_text=rt_doc_text) + # 灏嗗鐞嗙粨鏋滃悓姝ヤ慨鏀瑰埌result + for pkt in _pkts: + for data in json.loads(result_json): + if "transType" in data: + if data['id'] == pkt['id']: + pkt['transType'] = data['transType'] + break + print(f"鎬荤嚎婧愬寘涓暟锛歿len(pkts)}") # 绛涢�夌粡鎬荤嚎鐨勬暟鎹寘 pkts = list(filter(lambda it: it['throughBus'], pkts)) no_apid_pkts = list(filter(lambda it: not it['apid'], pkts)) # 绛涢�夋湁apid鐨勬暟鎹寘 pkts = list(filter(lambda it: it['apid'], pkts)) + # 绛涢�塺tAddr涓嶄负0鐨勬暟鎹寘锛孲MU鐨� + pkts = list(filter(lambda it: it['rtAddr'] != '0', pkts)) + + # 鍌ㄥ瓨鎵�鏈夋�荤嚎鍖� + save_to_file(json.dumps(pkts, ensure_ascii=False, indent=2), f"{self.json_path}/鎬荤嚎鏁版嵁鍖呭垪琛�.json") + + tasks = [] + + def _run(gen_pkt_details, pkt2): + _pkt2 = asyncio.run(gen_pkt_details(pkt2['name'], pkt2['id'])) + if _pkt2 is not None: + pkt2['children'] = [] + pkt2['children'].extend(_pkt2['datas']) for pkt in pkts: - _pkt = self.gen_pkt_details(pkt['name'], pkt['id']) - if _pkt: - pkt['children'] = [] - pkt['children'].extend(_pkt['datas']) - pkt['length'] = _pkt['length'] + pkt_task = asyncio.to_thread(_run, self.gen_pkt_details, pkt) + tasks.append(pkt_task) + + await asyncio.gather(*tasks) + rt_pkt_map = {} for pkt in pkts: # 鏍规嵁鏁版嵁鍧椾紶杈撳拰鍙栨暟鍒嗙粍 # 閫昏緫灏佽鍖呯殑瑙f瀽瑙勫垯ID锛歊T[rt鍦板潃]SUB[瀛愬湴鍧�]S(S浠h〃鍙栨暟锛屾柟鍚戞槸AA琛ㄧず鍙戦�侊紱R浠h〃缃暟锛屾柟鍚戞槸BB琛ㄧず鎺ュ彈) # 鍙栨暟锛氶�昏緫灏佽鍖呮牴鎹瓙鍦板潃鍜屽抚鍙风粍鍚堝垱寤猴紝鏈夊嚑涓粍鍚堝氨鍒涘缓鍑犱釜閫昏緫灏佽鍖� # 鏁版嵁鍧楋細鍙湁涓�涓�昏緫灏佽鍖� - + if pkt['subAddr'] is not None and not isinstance(pkt['subAddr'], int) and pkt['subAddr'].find("/") > -1: + pkt['subAddr'] = pkt['subAddr'].split("/")[0] # 澶勭悊瀛愬湴鍧� - if pkt['burst']: - # 绐佸彂鍖呭瓙鍦板潃鏄�18~26 - pkt['subAddr'] = 26 - elif pkt['subAddr'] == '骞抽摵' or pkt['subAddr'] is None: + if pkt['subAddr'] == '骞抽摵' or not pkt['subAddr']: # 骞抽摵锛�11~26锛屾病鏈夊~鍐欑殑榛樿涓哄钩閾� - pkt['subAddr'] = 26 + pkt['subAddr'] = '11~26' elif pkt['subAddr'] == '娣卞害': # 娣卞害锛�11 - pkt['subAddr'] = 11 + pkt['subAddr'] = '11' + pkt['burst'] = "绐佸彂" in f"{pkt['interval']}" # 澶勭悊甯у彿 if pkt['burst']: # 绐佸彂锛欰LL @@ -1082,12 +1238,15 @@ # 鍙栨暟 pkt_id = f"RT{rt_addr}SUB{sub_addr}" vals = f"{rt_addr}/{sub_addr}/0xAA/{frame_no}/" - rt_pkt_map_gen(pkt, '鍙栨暟', rt_pkt_map, pkt_id, vals) + rt_pkt_map_gen(pkt, '鍙栨暟', rt_pkt_map, pkt_id, vals, pkts) elif trans_ser == 'DataBlock': # 鏁版嵁鍧� direct = '0xAA' - rt_pkt_map_gen(pkt, '鏁版嵁鍧椾紶杈�', rt_pkt_map, f"RT{rt_addr}SUB{sub_addr}{direct}", - f"{rt_addr}/{sub_addr}/{direct}/ALL/") + if pkt['transDirect'] == '鍙�': + direct = '0xBB' + pkt_id = f"RT{rt_addr}SUB{sub_addr}{direct}" + vals = f"{rt_addr}/{sub_addr}/{direct}/ALL/" + rt_pkt_map_gen(pkt, '鏁版嵁鍧椾紶杈�', rt_pkt_map, pkt_id, vals, pkts) _pkts = [] for k in rt_pkt_map: _pkts.append(rt_pkt_map[k]) @@ -1123,16 +1282,26 @@ rule_enc.C_KEY = sub_key update_rule_enc(rule_enc) - def gen_tc(self): + async def gen_tc(self): # 鏁版嵁甯ф牸寮� - frame = self.gen_tc_transfer_frame_format() + frame_task = self.gen_tc_transfer_frame_format() # 閬ユ帶鍖呮牸寮� - pkt_format = self.gen_tc_pkt_format() + pkt_format_task = self.gen_tc_pkt_format() # 閬ユ帶鍖呭垪琛� - instructions = self.gen_tc_transfer_pkts() + instructions_task = self.gen_tc_transfer_pkts() + result = await asyncio.gather(frame_task, pkt_format_task, instructions_task) + frame = result[0] + pkt_format = result[1] + instructions = result[2] + + tasks = [] for inst in instructions: # 閬ユ帶鎸囦护鏁版嵁鍖哄唴瀹� - self.gen_tc_pkt_details(inst) + tasks.append(self.gen_tc_details(inst)) + + await asyncio.gather(*tasks) + + for inst in instructions: inst['type'] = 'insUnit' format_text = json.dumps(pkt_format, ensure_ascii=False) format_text = utils.replace_tpl_paras(format_text, inst) @@ -1164,7 +1333,9 @@ elif item['type'] == 'pkt': return '''{"MaxLength":1024,"IsSplit8":false,"Split8Start":null,"Split8End":null,"PadCode":null,"Alignment":null,"InputParams":[],"OutPutParams":[],"MatchItems":[]}''' elif item['type'] == 'pktSeqCnt': - return json.dumps({"FirstPackValue":"PackCount","MiddlePackValue":"PackIndex","LastPackValue":"PackIndex","IndependPackValue":"InsUnitCount"}) + return json.dumps( + {"FirstPackValue": "PackCount", "MiddlePackValue": "PackIndex", "LastPackValue": "PackIndex", + "IndependPackValue": "InsUnitCount"}) elif 'value' in item: return item['value'] @@ -1202,8 +1373,11 @@ # 鍗虫椂杈撳叆闀垮害涓簄ull鍒欐槸鍙橀暱瀛楁锛岄渶瑕佹妸绫诲瀷鏀逛负variableLength if field['type'] == 'input' and field['length'] is None: field['type'] = 'variableLength' + if isinstance(field['value'], dict): + field['range'] = f'{field["value"]["minLength"]}~{field["value"]["maxLength"]}' # 鏋氫妇鍊奸粯璁ゅ�艰缃� - if field['type'] == 'enum' and len(field['enums']) and not next(filter(lambda x: 'default' in x and x['default'], field['enums']), None): + if field['type'] == 'enum' and len(field['enums']) and not next( + filter(lambda x: 'default' in x and x['default'], field['enums']), None): field['enums'][0]['default'] = True # 鏍¢獙鍜� if field['type'] == 'checkSum': @@ -1234,7 +1408,7 @@ create_tc_format(None, frame) - def gen_tc_transfer_frame_format(self): + async def gen_tc_transfer_frame_format(self): _msg = ''' # 瑙掕壊 浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� @@ -1251,7 +1425,7 @@ # 鏁版嵁绫诲瀷 - const锛氬浐瀹氱爜瀛楋紝鏁板�硷紝浜岃繘鍒朵互B缁撳熬锛屽崄杩涘埗锛屽崄鍏繘鍒朵互0x寮�澶达紱 - sendFlag锛氬彂閫佹爣璁帮紝绫讳技鏋氫妇锛屽畾涔夋牱渚嬶細[{"n":"name","v":"value","c":"code","default":true}]锛宯琛ㄧず鍚嶇О锛寁琛ㄧず鍊硷紝c琛ㄧずcode锛堟病鏈夌┖鐫�锛夛紝default琛ㄧず鏄粯璁ゅ�硷紱 -- checkSum锛氭牎楠屽拰锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛� +- checkSum锛氭牎楠屽拰锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value鐨則ype涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛� # 绾︽潫 - 浠SON鏍煎紡杈撳嚭锛� - 浠呰緭鍑篔SON鏂囨湰锛屼笉瑕佽緭鍑轰换浣曞叾浠栨枃鏈�� @@ -1267,15 +1441,16 @@ json.loads(gen_text) doc_text = self.get_text_with_entity(['閬ユ帶甯ф牸寮�']) - text = self.generate_tc_text(_msg, 'out/tc_transfer_frame.json', doc_text=doc_text, - validation=validation) + text = await asyncio.to_thread( + lambda: self.generate_tc_text(_msg, f'{self.json_path}/tc_transfer_frame.json', doc_text=doc_text, + validation=validation)) result: dict = json.loads(text) format_text = utils.read_from_file('tpl/tc_transfer_frame.json') format_text = utils.replace_tpl_paras(format_text, result) frame = json.loads(format_text) return frame - def gen_tc_pkt_format(self): + async def gen_tc_pkt_format(self): _msg = ''' # 瑙掕壊 浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� @@ -1283,26 +1458,30 @@ 鍒嗘瀽閬ユ帶鍖呮牸寮忥紝鎻愬彇閬ユ帶鍖呮牸寮忕殑瀛楁瀹氫箟銆� # 闇�姹� 瑕佹彁鍙栧�肩殑鍖呮牸寮忓瓧娈碉細 -- 鍖呯増鏈彿: const锛屼簩杩涘埗锛� -- 鍖呯被鍨�: const锛屼簩杩涘埗锛� -- 鏁版嵁鍖哄ご鏍囧織: const锛屼簩杩涘埗锛� -- 搴忓垪鏍囧織: const锛屼簩杩涘埗锛� -- 鍖呴暱锛歭ength锛� -- 鍓澶存爣蹇�: const锛屼簩杩涘埗锛� -- 閬ユ帶鍖呯増鏈彿: const锛屼簩杩涘埗锛� -- 鍛戒护姝g‘搴旂瓟: const锛屼簩杩涘埗锛� -- 婧愬湴鍧�: const锛屽崄鍏繘鍒躲�� +- packetVersionNumber锛氬寘鐗堟湰鍙凤紝const锛屼簩杩涘埗锛� +- packetType锛氬寘绫诲瀷锛宑onst锛屼簩杩涘埗锛� +- dataFieldHeaderFlag锛氭暟鎹尯澶存爣蹇楋紝const锛屼簩杩涘埗锛� +- sequenceFlags锛氬簭鍒楁爣蹇楋紝const锛屼簩杩涘埗锛� +- ccsdsSecondaryHeaderFlag锛氬壇瀵煎ご鏍囧織锛宑onst锛屼簩杩涘埗锛� +- tcPktVersionNumber锛氶仴鎺у寘鐗堟湰鍙凤紝const锛屼簩杩涘埗锛� +- acknowledgmentFlag锛氬懡浠ゆ纭簲绛旓紝const锛屼簩杩涘埗锛� +- sourceAddr锛氭簮鍦板潃锛宑onst锛屽崄鍏繘鍒躲�� # 鏁版嵁绫诲瀷 - 鍥哄畾鐮佸瓧锛歝onst锛屾暟鍊硷紝浜岃繘鍒朵互B缁撳熬锛屽崄杩涘埗锛屽崄鍏繘鍒朵互0x寮�澶达紱 - 闀垮害锛歭ength锛屽鏋滃瓧娈垫弿杩板唴瀹逛负鏁版嵁鍖哄煙鐨勯暱搴﹀垯琛ㄧず鏄暱搴︼紝闀垮害鐨剉alue涓烘暟鍊笺�乶ull鎴栬寖鍥村畾涔夛紝 - 鏋氫妇鍊硷細enum锛� -- 鏍¢獙鍜岋細checkSum锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛� -- 鍗虫椂杈撳叆锛歩nput銆� -# 闀垮害绫诲瀷鐨勮寖鍥村畾涔夋弿杩� +- 鏍¢獙鍜岋細checkSum锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value鐨則ype涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛� +- 鍗虫椂杈撳叆锛歩nput锛屽鏋滄槸鍗虫椂杈撳叆value鐨勫�间负鍙橀暱瀹氫箟銆� +## 闀垮害绫诲瀷鐨勮寖鍥村畾涔夋弿杩� {"start": "璧峰瀛楁code", "end": "缁撴潫瀛楁code", "formula": "璁$畻鍏紡"} - start锛氳捣濮嬪瓧娈礳ode锛岄暱搴﹀寘鎷捣濮嬪瓧娈碉紝瀛楁鎻忚堪涓鏄庝簡璧峰瀛楁锛� - end锛氱粨鏉熷瓧娈礳ode锛岄暱搴﹀寘鎷粨鏉熷瓧娈碉紝瀛楁鎻忚堪涓鏄庝簡缁撴潫瀛楁锛� - formula锛氳绠楀叕寮忥紝濡傛灉娌℃湁璁$畻鐩稿叧鎻忚堪鍒欒〃绀轰笉闇�瑕佽绠楀叕寮忋�� +## 鍗充娇杈撳叆绫诲瀷鐨勫彉闀垮畾涔夋弿杩� +{"minLength": "鏈�灏忛暱搴�", "maxLength": "鏈�澶ч暱搴�", "variableLength": true} +- minLength锛氭渶灏忛暱搴︼紝 +- maxLength锛氭渶澶ч暱搴︼紝 +- variableLength锛氭槸鍚︽槸鍙橀暱銆� 璁$畻鍏紡瀹氫箟锛� - BYTES锛氭寜瀛楄妭璁$畻锛� - N-x锛氭�诲瓧鑺傛暟鍑弜锛屼緥濡傛�诲瓧鑺傛暟鍑�1鐨勫叕寮忎负N-1銆� @@ -1311,8 +1490,8 @@ - 浠呰緭鍑篔SON鏂囨湰锛屼笉瑕佽緭鍑轰换浣曞叾浠栨枃鏈�� # 杈撳嚭渚嬪瓙锛� { - "鍖呯増鏈彿": "00B", - "鍖呯被鍨�": "1B", + "packetVersionNumber": "00B", + "packetType": "1B", ... } ''' @@ -1321,8 +1500,9 @@ json.loads(gen_text) doc_text = self.get_text_with_entity(['閬ユ帶鍖呮牸寮�']) - text = self.generate_tc_text(_msg, 'out/tc_transfer_pkt.json', doc_text=doc_text, - validation=validation) + text = await asyncio.to_thread( + lambda: self.generate_tc_text(_msg, f'{self.json_path}/tc_transfer_pkt.json', doc_text=doc_text, + validation=validation)) result = json.loads(text) format_text = utils.read_from_file('tpl/tc_pkt_format.json') @@ -1330,25 +1510,30 @@ pkt_format = json.loads(format_text) return pkt_format - def gen_tc_transfer_pkts(self): + async def gen_tc_transfer_pkts(self): _msg = ''' # 瑙掕壊 浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� # 鎸囦护 鍒嗘瀽鏂囨。鍒楀嚭鎵�鏈夌殑閬ユ帶鎸囦护銆� -# 绾︽潫 +# 绾︽潫 - 搴旂敤杩囩▼鏍囪瘑锛氬簲鐢ㄨ繃绋嬫爣璇嗗氨鏄疉PID锛屼竴鑸細鍦ㄥ悕绉板悗鐨勬嫭鍙蜂腑鍒楀嚭鏉ワ紱 -- code锛氭寚浠や唬鍙凤紝娌℃湁灏辩┖鐫�锛� -- name锛氭寚浠ゅ悕绉帮紝鏍规嵁琛ㄦ牸鍐呭鎻愬彇锛屾敞鎰忓悕绉伴渶瑕佹彁鍙栧畬鏁达紝濡傛灉鏈夊鍒楀垯鍚堝苟鐢�-鍒嗗壊锛� -- 搴旂敤鏁版嵁鍖猴細鎻愬彇琛ㄦ牸涓殑搴旂敤鏁版嵁鍖哄唴瀹广�� +- code锛氭寚浠や唬鍙凤紝濡傛灉娌℃湁濉啓鎴栬�呮槸鈥�/鈥濆垯浣跨敤绌哄瓧绗︿覆浠f浛锛� +- name锛氭寚浠ゅ悕绉帮紝鏍规嵁琛ㄦ牸琛屽唴瀹规彁鍙栵紝娉ㄦ剰鏄鍐呭锛屾敞鎰忓悕绉伴渶瑕佹彁鍙栧畬鏁达紝濡傛灉鏈夊鍒楀垯鍚堝苟鐢�-鍒嗗壊锛� +- shortName锛氭寚浠ゅ悕绉帮紝鏍规嵁琛ㄦ牸鍐呭鎻愬彇锛� +- apid: 搴旂敤杩囩▼鏍囪瘑绗︼紱 +- serviceType锛氭湇鍔$被鍨嬶紱 +- serviceSubtype锛氭湇鍔″瓙绫诲瀷锛� +- dataArea锛氬簲鐢ㄦ暟鎹尯锛屾彁鍙栬〃鏍间腑鐨勫簲鐢ㄦ暟鎹尯鍐呭銆� # 杈撳嚭渚嬪瓙锛� [{ -"name": "xxx", +"name": "aaa-xxx", +"shortName": "xxx" "code":"pkt", -"搴旂敤杩囩▼鏍囪瘑绗�":"0xAA", -"鏈嶅姟绫诲瀷":"0x1", -"鏈嶅姟瀛愮被鍨�":"0x2", -"搴旂敤鏁版嵁鍖�": "" +"apid":"0xAA", +"serviceType":"0x1", +"serviceSubtype":"0x2", +"dataArea": "" }] ''' @@ -1356,14 +1541,15 @@ json.loads(gen_text) doc_text = self.get_text_with_entity(['APID鍒嗛厤']) - text = self.generate_tc_text(_msg, 'out/tc_transfer_pkts.json', doc_text=doc_text, - validation=validation) + text = await asyncio.to_thread( + lambda: self.generate_tc_text(_msg, f'{self.json_path}/tc_transfer_pkts.json', doc_text=doc_text, + validation=validation)) pkts = json.loads(text) return pkts - def gen_tc_pkt_details(self, pkt): + async def gen_tc_details(self, pkt): result = [] - tc_name = pkt['name'] + tc_name = pkt['shortName'] tc_code = pkt['code'] pkt['name'] = f'{tc_code} {tc_name}' _msg = f""" @@ -1380,10 +1566,10 @@ - 鍥哄畾鐮佸瓧锛歝onst锛屾暟鍊硷紝浜岃繘鍒朵互B缁撳熬锛屽崄杩涘埗锛屽崄鍏繘鍒朵互0x寮�澶达紱 - 闀垮害锛歭ength锛屽鏋滃瓧娈垫弿杩板唴瀹逛负鏁版嵁鍖哄煙鐨勯暱搴﹀垯琛ㄧず鏄暱搴︼紝闀垮害鐨剉alue涓烘暟鍊笺�乶ull鎴栬寖鍥村畾涔夛紝 - 鏋氫妇鍊硷細enum锛� -- 鏍¢獙鍜岋細checkSum锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曟槸浠�涔堬紝骞朵繚瀛樺湪value涓紝 -- 鍗虫椂杈撳叆锛歩nput锛屽鏋滄槸鍗虫椂杈撳叆value鐨勫�间负绌哄瓧绗︿覆銆� +- 鏍¢獙鍜岋細checkSum锛屽鏋滄槸鏍¢獙鍜岀被鍨嬭繕闇�瑕佸垎鏋愭牎楠屽拰鐨勭畻娉曟槸浠�涔堜互鍙婃牎楠屾暟鎹煙鑼冨洿锛屽苟淇濆瓨鍦╲alue涓紝渚嬪锛歿 "type":"CRC-CCITT", "start": "START", "end":"END" }锛� +- 鍗虫椂杈撳叆锛歩nput锛屽鏋滄槸鍗虫椂杈撳叆value鐨勫�间负鍙橀暱瀹氫箟銆� -# 闀垮害绫诲瀷鐨勮寖鍥村畾涔夋弿杩� +## 闀垮害绫诲瀷鐨勮寖鍥村畾涔夋弿杩� {"start": "璧峰瀛楁code", "end": "缁撴潫瀛楁code", "formula": "璁$畻鍏紡"} - start锛氳捣濮嬪瓧娈礳ode锛岄暱搴﹀寘鎷捣濮嬪瓧娈碉紝瀛楁鎻忚堪涓鏄庝簡璧峰瀛楁锛� - end锛氱粨鏉熷瓧娈礳ode锛岄暱搴﹀寘鎷粨鏉熷瓧娈碉紝瀛楁鎻忚堪涓鏄庝簡缁撴潫瀛楁锛� @@ -1391,22 +1577,31 @@ 璁$畻鍏紡瀹氫箟锛� - BYTES锛氭寜瀛楄妭璁$畻锛屽瓧鑺傛暟锛� - N-x锛氭�诲瓧鑺傛暟鍑弜锛屼緥濡傛�诲瓧鑺傛暟鍑�1鐨勫叕寮忎负N-1銆� +## 鍗充娇杈撳叆绫诲瀷鐨勫彉闀垮畾涔夋弿杩� +{"minLength": "鏈�灏忛暱搴�", "maxLength": "鏈�澶ч暱搴�", "variableLength": true} +- minLength锛氭渶灏忛暱搴︼紝 +- maxLength锛氭渶澶ч暱搴︼紝 +- variableLength锛氭槸鍚︽槸鍙橀暱銆� # 瀛楁绫诲瀷鍒嗘瀽鏂规硶 - 鏍规嵁瀛楁鎻忚堪鍒嗘瀽瀛楁鐨勭被鍨嬶紱 - 瀛楁鎻忚堪涓槑纭寚瀹氫簡瀛楁鍊肩殑锛岀被鍨嬩负const锛� - 瀛楁鎻忚堪涓病鏈夋槑纭寚瀹氬瓧娈靛�硷紝浣嗘槸缃楀垪浜嗗彇鍊艰寖鍥寸殑锛岀被鍨嬩负enum锛� - 瀛楁鎻忚堪涓鏋滄病鏈夋槑纭寚瀹氬瓧娈靛�间篃娌℃湁缃楀垪鍙栧�艰寖鍥寸殑锛岀被鍨嬩负input锛� -- 瀛楁濡傛灉鏄拰鈥滈暱搴︹�濇湁鍏筹紝绫诲瀷涓簂ength锛� +- 瀛楁濡傛灉鎻忚堪浜嗗綋鍓嶆寚浠や腑鐨勬暟鎹煙闀垮害浠ュ強闀垮害鑼冨洿鍒欐槸闀垮害绫诲瀷length锛屽惁鍒欎笉鏄暱搴︾被鍨嬶紱 - 濡傛灉鍜屾暟鎹煙鏈夊叧锛岀被鍨嬩负const锛� -- 瀛楁濡傛灉鍜屾牎楠屽拰鏈夊叧锛岀被鍨嬩负checkSum锛屽垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛夈�� +- 鏍¢獙鍜岀被鍨嬶細瀛楁濡傛灉涓庡綋鍓嶆寚浠ゆ暟鎹尯鐨勬牎楠屽拰鏈夊叧鍒欎负鏍¢獙鍜岀被鍨嬪惁鍒欎笉鏄牎楠屽拰绫诲瀷锛屽垎鏋愭牎楠屽拰鐨勭畻娉曪紝骞朵繚瀛樺湪value涓紝鏍¢獙鍜岀畻娉曞寘鎷細瀛楄妭寮傛垨锛圔yteXOR锛夈�佺疮鍔犲拰鍙栧弽锛圫umNot锛夈�佺疮鍔犲拰锛圓ddSum锛夈�佸簲鐢ㄥ惊鐜啑浣欙紙CRC-CCITT锛夈�丆RC8锛圕RC8锛夈�両SO鍜屾牎楠岋紙ISOSum锛夈�佸鏍¢獙锛圤dd锛夈�佸伓鏍¢獙锛圗ven锛夈�佸叾浠栵紙Other锛夈�� # 绾︽潫 -- code 濡傛灉娌℃湁鏄庣‘瀹氫箟鍒欎娇鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧锛屽敖閲忕畝鐭紱 +## 瀛楁灞炴�� +- code 濡傛灉娌℃湁鏄庣‘瀹氫箟鍒欎娇鐢ㄥ悕绉扮殑鑻辨枃缈昏瘧锛屽敖閲忕畝鐭紝濡傛灉娌℃湁濉啓鎴栬�呬负鏂滅嚎琛ㄧず娌℃湁鏄庣‘瀹氫箟锛� - length 鑷姩杞崲涓篵it闀垮害锛屽繀椤绘槸鏁板�笺�乶ull鎴栬寖鍥村畾涔夛紝涓嶈兘涓�0锛� - value 鏍规嵁瀛楁鎻忚堪鎻愬彇瀛楁鍊硷紝瀛楁鍊间竴鑸负鏁板�肩被鍨嬶紝闇�瑕佹牴鎹瓧娈电被鍨嬫潵鍒嗘瀽锛屽鏋滄槸length绫诲瀷value鐨勫�间负鑼冨洿瀹氫箟锛� - enums 鏋氫妇绫诲瀷鐨勫瓧娈靛繀椤昏鏈塭nums锛屾牴鎹瓧娈垫弿杩版彁鍙栵紝鏋氫妇鍏冪礌鐨勬暟鎹粨鏋勪负{"n":"","v":"","c":""}锛� -- length绫诲瀷瀛楁鐨勮寖鍥村畾涔変腑鐨剆tart鍜宔nd蹇呴』鏄敓鎴愮粨鏋滀腑鐨勫瓧娈礳ode锛岄暱搴﹀寘鎷瑂tart鍜宔nd锛屽繀椤讳娇鐢ㄩ暱搴︽弿杩颁腑鐨勫瓧娈碉紱 +## 瀛楁绫诲瀷 +- 闀垮害绫诲瀷瀛楁鐨勮寖鍥村畾涔変腑鐨剆tart鍜宔nd蹇呴』鏄敓鎴愮粨鏋滀腑鐨勫瓧娈礳ode锛岄暱搴﹁寖鍥村寘鎷瑂tart鍜宔nd锛屽繀椤讳娇鐢ㄩ暱搴︽弿杩颁腑鐨勫瓧娈碉紱 +- 濡傛灉娌℃湁闀垮害鑼冨洿鎻忚堪鍒欎笉鏄暱搴︾被鍨嬶紱 +- 鏍¢獙鍜岀被鍨嬪瓧娈靛繀椤绘弿杩扮殑鏄綋鍓嶆寚浠ゆ暟鎹煙鏍¢獙鍜岋紝濡傛灉鎻忚堪鐨勪笉鏄綋鍓嶆寚浠ょ殑鏁版嵁鍩熸牎楠屽拰鍒欎笉鏄牎楠屽拰绫诲瀷锛� - 杈撳嚭鏁版嵁缁撴瀯涓烘暟缁勶紝鏁扮粍鍏冪礌涓哄瓧娈典俊鎭紱 - 杈撳嚭鍐呭蹇呴』涓轰弗鏍肩殑json锛屼笉鑳借緭鍑洪櫎json浠ュ鐨勪换浣曞唴瀹广�� @@ -1427,11 +1622,26 @@ "value": {"start": "data", "end": "data", "formula": "BYTES"} }, { + "name": "para3", + "code": "para3", + "length": 8, + "type": "enum", + "value": "", + "enums": [{"n":"鍙傛暟1","v":"0x0A","c":"Para1"}] + }, + { "name": "鏁版嵁", "code": "data", "length": null, "type": "input", "value": "" + }, + { + "name": "鏍¢獙鍜�", + "code": "checksum", + "length": 2, + "type": "checkSum", + "value": { "type": "CRC-CCITT", "start":"para1", "end":"data" } } ] """ @@ -1461,21 +1671,56 @@ doc_text = self.get_text_with_entity([tc_name]) if doc_text == '': - doc_text = pkt['搴旂敤鏁版嵁鍖�'] - text = self.generate_tc_text(_msg, - f'out/閬ユ帶鎸囦护鏁版嵁鍩�-{tc_code}-{utils.to_file_name(tc_name)}.json', - doc_text=doc_text, - validation=validation) + doc_text = self.get_text_with_tc_name(tc_name) + if doc_text == '': + doc_text = pkt['dataArea'] + text = await asyncio.to_thread(self.generate_tc_text, _msg, + f"{self.json_path}/閬ユ帶鎸囦护鏁版嵁鍩�-{tc_code}-{utils.to_file_name(tc_name)}.json", + doc_text=doc_text, validation=validation) result = json.loads(text) pkt['children'] = result + def get_text_with_tc_name(self, tc_name: str): + entities = doc_dbh.get_entities_by_type('鎸囦护鏍煎紡閰嶇疆') + entity_names = '\n'.join([f'- {e.name}' for e in entities]) + msg = f""" +# 闇�姹� +璇蜂粠涓嬪垪鎸囦护鍚嶇О涓尮閰嶄竴涓笌鈥渰tc_name}鈥濈浉浼煎害鏈�楂樼殑鎸囦护鍚嶇О銆� +鎸囦护鍚嶇О鍒楄〃锛� +{entity_names} +""" + name = self.generate_text(msg,None) + entity = next(filter(lambda e: e.name == name, entities,None)) + if entity: + return self.get_text_with_entity([entity.name]) + else: + return '' -if __name__ == '__main__': + +def tc_data_generate(): + exe_path = os.path.dirname(__file__) + "/db_tc_generator/InstructionGenerator.exe" + db_path = os.path.dirname(__file__) + "/db.db" try: - os.makedirs("./out/pkts", exist_ok=True) - os.makedirs("./out/tmp", exist_ok=True) + # 瓒呮椂鏃堕棿240绉� + result = subprocess.run([exe_path, db_path], timeout=240) + print(result.stdout) + print(result.returncode) + except subprocess.TimeoutExpired: + print("璀﹀憡锛氭寚浠ゆ暟鎹敓鎴愬け璐ワ紒") + + +def main(): + try: + project_path = r'D:\projects\KnowledgeBase' + doc_dbh.set_project_path(project_path) # 鍚姩澶фā鍨嬪鐞嗘祦绋� - ret_text = DbStructFlow().run() + asyncio.run(DbStructFlow(f'{project_path}').run()) + # 鐢熸垚鎸囦护鏁版嵁琛� + tc_data_generate() except KeyboardInterrupt: if g_completion: g_completion.close() + + +if __name__ == '__main__': + main() diff --git a/knowledgebase/db/data_creator.py b/knowledgebase/db/data_creator.py index 9158e8f..25feaf4 100644 --- a/knowledgebase/db/data_creator.py +++ b/knowledgebase/db/data_creator.py @@ -81,7 +81,8 @@ def create_prop_linear(proj_pk, linear_pk, node, seq): bit_length = node['length'] if isinstance(bit_length, int): - byte_length = math.ceil(bit_length / 8) + pos = node['pos'] + byte_length = math.ceil((pos % 8 + bit_length) / 8) start = node['pos'] % 8 end = start + bit_length - 1 @@ -163,8 +164,9 @@ values = [] vals = child['vals'] if vals.endswith("/"): - vals = vals[:-1] - values.extend(vals.split("/")) + values.extend(vals[:-1].split("/")) + else: + values.extend(vals.split("/")) for i in range(0, len(key_items)): key_items[i]['val'] = values[i] node_name = '銆�' diff --git a/knowledgebase/db/db_helper.py b/knowledgebase/db/db_helper.py index 4f193d1..0a14bdc 100644 --- a/knowledgebase/db/db_helper.py +++ b/knowledgebase/db/db_helper.py @@ -1,21 +1,40 @@ import uuid from enum import Enum +from threading import RLock from sqlalchemy.orm import sessionmaker, scoped_session -from knowledgebase.db.models import engine, TProject, TDevice, TDataStream, TDevStream, TRule, TRuleEnc, TPropertyEnc, \ +from knowledgebase.db.models import get_engine, TProject, TDevice, TDataStream, TDevStream, TRule, TRuleEnc, TPropertyEnc, \ TPropertyLinear, TRuleStream, TEncLinear, TRuleLinear, TParameter, TParameterType, TExtendInfo, TRulekeyInfo, \ TInsFormat from hashlib import md5 -# 鍒涘缓涓�涓細璇濆伐鍘� -session_factory = sessionmaker(bind=engine) -# 鍒涘缓涓�涓細璇濆璞� -Session = scoped_session(session_factory) -session = Session() +_session = None _para_id_map = {} + +db_lock = RLock() + +def get_session(): + global _session + return _session + +def init_db_helper(): + # 鍒涘缓涓�涓細璇濆伐鍘� + session_factory = sessionmaker(bind=get_engine()) + # 鍒涘缓涓�涓細璇濆璞� + Session = scoped_session(session_factory) + global _session + _session = Session() + +def th_safe(func): + def wrapper(*args, **kwargs): + with db_lock: + result = func(*args, **kwargs) + return result + + return wrapper def get_pk(): @@ -24,12 +43,13 @@ return pk +@th_safe def create_project(sat_id, sat_name, proj_code, proj_name, desc, date_time, ) -> TProject: """ 鍒涘缓project :param sat_id: - :param sat_name: - :param proj_code: + :param sat_name: + :param proj_code: :param proj_name: :param desc: :param date_time: @@ -39,11 +59,12 @@ C_DESCRIPTION=desc, C_HASH=uuid.uuid4().int & 0xffffffff, C_PROJECT_NAME=proj_name, C_DATETIME=date_time, C_CREATEOR='') - session.add(project) - session.commit() + _session.add(project) + _session.commit() return project +@th_safe def create_device(device_id, device_name, device_type, dll, project_pk): """ 鍒涘缓device @@ -56,11 +77,12 @@ """ device = TDevice(C_DEV_PK=get_pk(), C_DEV_ID=device_id, C_DEV_NAME=device_name, C_DEV_TYPE=device_type, C_DLL=dll, C_PROJECT_PK=project_pk) - session.add(device) - session.commit() + _session.add(device) + _session.commit() return device +@th_safe def create_extend_info(proj_pk, prop_id, prop_name, val, fk): ext_info = TExtendInfo( C_PK=get_pk(), @@ -70,10 +92,11 @@ C_VAL=val, C_FOREIGN_PK=fk ) - session.add(ext_info) - session.commit() + _session.add(ext_info) + _session.commit() +@th_safe def create_data_stream(proj_pk, dev_pk, name, code, data_ty, direct, rule_id, rule_ty, rule_pk=None): """ 鍒涘缓data_stream @@ -96,33 +119,38 @@ C_DESCRIPTION='', C_RULE_ID=rule_id, C_RULE_TYPE=rule_ty) - session.add(ds) + _session.add(ds) link = TDevStream(C_PK=get_pk(), C_DEV_PK=dev_pk, C_STREAM_PK=ds.C_STREAM_PK, C_PROJECT_PK=proj_pk) - session.add(link) + _session.add(link) rule_enc = None # 鍒涘缓瑙f瀽瑙勫垯 if rule_pk is None: rule_pk = get_pk() if rule_ty == '001': # 灏佽鍖� - rule_enc = create_rule_enc(proj_pk, rule_pk, rule_id, rule_id) + rule_enc = _create_rule_enc(proj_pk, rule_pk, rule_id, rule_id) - rule = create_rule(proj_pk, ds.C_STREAM_PK, rule_id, name, None, None, '0') - rule = create_rule(proj_pk, rule_pk, rule_id, rule_id, None, ds.C_STREAM_PK, '1') + rule = _create_rule(proj_pk, ds.C_STREAM_PK, rule_id, name, None, None, '0') + rule = _create_rule(proj_pk, rule_pk, rule_id, rule_id, None, ds.C_STREAM_PK, '1') # rule stream - rule_stream = create_rule_stream(proj_pk, - rule_pk, - ds.C_STREAM_PK, - ds.C_STREAM_ID, - ds.C_NAME, - ds.C_STREAM_DIR, - f"{ds.C_NAME}/{rule_id}/") - session.add(rule_stream) - session.commit() + rule_stream = _create_rule_stream(proj_pk, + rule_pk, + ds.C_STREAM_PK, + ds.C_STREAM_ID, + ds.C_NAME, + ds.C_STREAM_DIR, + f"{ds.C_NAME}/{rule_id}/") + _session.add(rule_stream) + _session.commit() return ds, rule_stream, rule_enc +@th_safe def create_rule(proj_pk, rule_pk, rule_id, rule_name, rule_len, parent_pk, flag, actual_parent_pk=None): + return _create_rule(proj_pk, rule_pk, rule_id, rule_name, rule_len, parent_pk, flag, actual_parent_pk) + + +def _create_rule(proj_pk, rule_pk, rule_id, rule_name, rule_len, parent_pk, flag, actual_parent_pk=None): rule = TRule( C_PK=get_pk(), C_PROJECT_PK=proj_pk, @@ -134,16 +162,22 @@ C_FLAG=flag, C_ACTUAL_PARENT_PK=actual_parent_pk ) - session.add(rule) - session.commit() + _session.add(rule) + _session.commit() return rule +@th_safe def find_rule_by_rule_id(rule_id): - return session.query(TRule).filter(TRule.C_RULE_ID == rule_id).first() + return _session.query(TRule).filter(TRule.C_RULE_ID == rule_id).first() +@th_safe def create_rule_stream(proj_pk, rule_pk, stream_pk, stream_id, stream_name, stream_dir, _path): + return _create_rule_stream(proj_pk, rule_pk, stream_pk, stream_id, stream_name, stream_dir, _path) + + +def _create_rule_stream(proj_pk, rule_pk, stream_pk, stream_id, stream_name, stream_dir, _path): rule_stream = TRuleStream( C_PK=get_pk(), C_PROJECT_PK=proj_pk, @@ -154,22 +188,28 @@ C_STREAM_DIR=stream_dir, C_PATH=_path ) - session.add(rule_stream) - session.commit() + _session.add(rule_stream) + _session.commit() return rule_stream +@th_safe def create_ref_ds_rule_stream(proj_pk, stream_pk, stream_id, stream_name, stream_dir, target_stream_pk): - items: list = session.query(TRuleStream).filter(TRuleStream.C_STREAM_PK == target_stream_pk).all() + items: list = _session.query(TRuleStream).filter(TRuleStream.C_STREAM_PK == target_stream_pk).all() for it in items: _path = it.C_PATH if len(_path.split('/')) == 3: continue _path = f'{stream_name}/{stream_id}/'.join(_path.split('/')[2:]) + '/' - create_rule_stream(proj_pk, it.C_RULE_PK, stream_pk, stream_id, stream_name, stream_dir, _path) + _create_rule_stream(proj_pk, it.C_RULE_PK, stream_pk, stream_id, stream_name, stream_dir, _path) +@th_safe def create_rule_enc(proj_pk, enc_pk, enc_id, name, content=None): + return _create_rule_enc(proj_pk, enc_pk, enc_id, name, content) + + +def _create_rule_enc(proj_pk, enc_pk, enc_id, name, content=None): rule_enc = TRuleEnc( C_ENC_PK=enc_pk, C_PROJECT_PK=proj_pk, @@ -177,11 +217,12 @@ C_NAME=name, C_CONTENT=content, ) - session.add(rule_enc) - session.commit() + _session.add(rule_enc) + _session.commit() return rule_enc +@th_safe def create_rule_linear(proj_pk, linear_pk, linear_id, name, length, content): rule_linear = TRuleLinear( C_LINEAR_PK=linear_pk, @@ -193,11 +234,12 @@ C_REL_LINEAR_PK=None, C_CONTENT=content ) - session.add(rule_linear) - session.commit() + _session.add(rule_linear) + _session.commit() return rule_linear +@th_safe def create_property_enc(proj_pk, enc_pk, segment_id, name, ty, content, offset, length, msb_first, mask, cond, seq, rel_enc_item_pk, para_id): property_enc = TPropertyEnc( @@ -218,7 +260,7 @@ C_REL_ENCITEM_PK=rel_enc_item_pk, C_PAR_ID=para_id ) - session.add(property_enc) + _session.add(property_enc) para = TParameter( C_PAR_PK=get_pk(), C_PROJECT_PK=proj_pk, @@ -235,7 +277,7 @@ C_REG_PK=None, C_METHOD_PK=None ) - session.add(para) + _session.add(para) if ty == 'ENUM' and content: items: list = content.split(' ') for item in items: @@ -250,8 +292,8 @@ C_PAR_PK=para.C_PAR_PK, C_PROJECT_PK=proj_pk ) - session.add(pt) - session.commit() + _session.add(pt) + _session.commit() return property_enc @@ -264,8 +306,16 @@ return _para_id +para_id_pk_map = {} + + +@th_safe def create_property_linear(proj_pk, linear_pk, para_id, name, ty, content, offset, length, msb_first, mask, cond, calc_expr, simuval, reg_par, params, seq): + + par_pk = get_pk() + if para_id in para_id_pk_map: + par_pk = para_id_pk_map[para_id] property_linear = TPropertyLinear( C_PK=get_pk(), C_LINEAR_PK=linear_pk, @@ -278,7 +328,7 @@ C_MASK=mask, C_CONDITION=cond, C_CALC_EXPR=calc_expr, - C_PAR_PK=get_pk(), + C_PAR_PK=par_pk, C_SIMUVAL=simuval, C_REG_PAR=reg_par, C_PARAMS=params, @@ -286,45 +336,47 @@ C_SEQ=seq, C_REL_PK=None ) - session.add(property_linear) - if para_id in _para_id_map: - get_para_id(para_id) - para = TParameter( - C_PAR_PK=property_linear.C_PAR_PK, - C_PROJECT_PK=proj_pk, - C_PAR_CODE=para_id, - C_PAR_NAME=name, - C_SUBSYS=None, - C_TYPE=None, - C_UNIT=None, - C_VALUE_RANGE=None, - C_DIS_REQUIRE=None, - C_MODULUS=None, - C_PARAMS=None, - C_PRECISION='0', - C_REG_PK=None, - C_METHOD_PK=None - ) - session.add(para) - if ty == 'ENUM' and content: - items: list = content.split(' ') - for item in items: - idx = items.index(item) - name, val = item.split(',') - pt = TParameterType( - C_PK=get_pk(), - C_TYPE_ID=f'{idx}', - C_TYPE_NAME=name, - C_VALUE=val, - C_DATA_TYPE=None, - C_PAR_PK=para.C_PAR_PK, - C_PROJECT_PK=proj_pk - ) - session.add(pt) - session.commit() + _session.add(property_linear) + if para_id not in para_id_pk_map: + if para_id in _para_id_map: + get_para_id(para_id) + para = TParameter( + C_PAR_PK=property_linear.C_PAR_PK, + C_PROJECT_PK=proj_pk, + C_PAR_CODE=para_id, + C_PAR_NAME=name, + C_SUBSYS=None, + C_TYPE=None, + C_UNIT=None, + C_VALUE_RANGE=None, + C_DIS_REQUIRE=None, + C_MODULUS=None, + C_PARAMS=None, + C_PRECISION='0', + C_REG_PK=None, + C_METHOD_PK=None + ) + _session.add(para) + if ty == 'ENUM' and content: + items: list = content.split(' ') + for item in items: + idx = items.index(item) + name, val = item.split(',') + pt = TParameterType( + C_PK=get_pk(), + C_TYPE_ID=f'{idx}', + C_TYPE_NAME=name, + C_VALUE=val, + C_DATA_TYPE=None, + C_PAR_PK=para.C_PAR_PK, + C_PROJECT_PK=proj_pk + ) + _session.add(pt) + _session.commit() return property_linear +@th_safe def create_enc_linear(proj_pk, enc_item_pk, ty, vals=None, linear_pk=None): """ 鍒涘缓 t_enc_linear @@ -345,19 +397,21 @@ C_TYPE=ty, C_FOLDER_PK=None ) - session.add(enc_linear) - session.commit() + _session.add(enc_linear) + _session.commit() return enc_linear +@th_safe def update_rule_enc(rule_enc): # 鏇存柊 - session.query(TRuleEnc).filter(TRuleEnc.C_ENC_PK == rule_enc.C_ENC_PK).update({ + _session.query(TRuleEnc).filter(TRuleEnc.C_ENC_PK == rule_enc.C_ENC_PK).update({ TRuleEnc.C_KEY: rule_enc.C_KEY }) - session.commit() + _session.commit() +@th_safe def create_rulekey_info(proj_pk, rule_pk, rule_id, rule_name, key_pk, key_id, key_name, key_val): info = TRulekeyInfo( C_PK=get_pk(), @@ -370,8 +424,8 @@ C_KEY_NAME=key_name, C_KEY_VAL=key_val ) - session.add(info) - session.commit() + _session.add(info) + _session.commit() ins_ty = { @@ -392,43 +446,43 @@ class BinaryType(Enum): - Integer = 0 # 鏁村瀷 - Float = 1 # 娴偣鍨� - Ascii = 2 # ASCII + Integer = 0 # 鏁村瀷 + Float = 1 # 娴偣鍨� + Ascii = 2 # ASCII class NumberDataType(Enum): - Unsigned = 0 # 鏃犵鍙锋暣鍨� - SignInteger = 1 # 鏈夌鍙锋暣鍨� - Phy = 2 # 鐗╃悊閲� - Bytes = 3 # 澶氬瓧鑺傜爜 + Unsigned = 0 # 鏃犵鍙锋暣鍨� + SignInteger = 1 # 鏈夌鍙锋暣鍨� + Phy = 2 # 鐗╃悊閲� + Bytes = 3 # 澶氬瓧鑺傜爜 class InputFormat(Enum): - Binary = 0 # 浜岃繘鍒� - Decimal = 1 # 鍗佽繘鍒� - Hex = 2 # 鍗佸叚杩涘埗 + Binary = 0 # 浜岃繘鍒� + Decimal = 1 # 鍗佽繘鍒� + Hex = 2 # 鍗佸叚杩涘埗 class ProcessMethod(Enum): - Dirct = 0 # 鐩磋 - Equivalent = 1 # 褰撻噺 - Formula = 2 # 鍏紡 - Script = 3 # 鑴氭湰 + Dirct = 0 # 鐩磋 + Equivalent = 1 # 褰撻噺 + Formula = 2 # 鍏紡 + Script = 3 # 鑴氭湰 class ExpressionType(Enum): - Count = 0 # 涓暟璁$畻 - Formula = 1 # 鏁板�艰绠� + Count = 0 # 涓暟璁$畻 + Formula = 1 # 鏁板�艰绠� class EnumType(Enum): - NameValue = 0 # 鍚嶅�煎鏋氫妇 - Range = 1 # 鑼冨洿鏋氫妇 - Classify = 2 # 澶氱骇鍒嗙被 - InsCategory = 3 # 鎸囦护绫诲埆鏋氫妇 - InsUnit = 4 # 鎸囦护鍗曞厓鏋氫妇 - InsFormat = 5 # 鎸囦护鏍煎紡鏋氫妇 + NameValue = 0 # 鍚嶅�煎鏋氫妇 + Range = 1 # 鑼冨洿鏋氫妇 + Classify = 2 # 澶氱骇鍒嗙被 + InsCategory = 3 # 鎸囦护绫诲埆鏋氫妇 + InsUnit = 4 # 鎸囦护鍗曞厓鏋氫妇 + InsFormat = 5 # 鎸囦护鏍煎紡鏋氫妇 def make_attr(field: dict): @@ -451,16 +505,19 @@ attr = 0 # 鍗虫椂杈撳叆锛屾棤绗﹀彿鏁存暟锛屽崄杩涘埗锛岀洿璇� if field['type'] == ins_ty['input']: - attr |= (NumberDataType.Unsigned.value << 3) | (InputFormat.Decimal.value << 6) | (ProcessMethod.Dirct.value << 11) + attr |= (NumberDataType.Unsigned.value << 3) | (InputFormat.Decimal.value << 6) | ( + ProcessMethod.Dirct.value << 11) # 鏄惁鏄瓙鍖� - attr |= (1 << 9) if field['type']==ins_ty['subPkt'] else 0 + attr |= (1 << 9) if field['type'] == ins_ty['subPkt'] else 0 # 鏄惁鏄彂閫佹爣璁� - attr |= (1 << 10) if field['type']==ins_ty['sendFlag'] else 0 + attr |= (1 << 10) if field['type'] == ins_ty['sendFlag'] else 0 # 璁$畻绫诲瀷 # 鏋氫妇绫诲瀷 return attr + +@th_safe def create_ins_format(proj_pk: str, parent_pk: str, info: dict) -> TInsFormat: ins_format = TInsFormat( C_INS_FORMAT_PK=get_pk(), @@ -480,6 +537,6 @@ C_FORMULA=info['formula'] if 'formula' in info else '', C_NUMBER='', ) - session.add(ins_format) - session.commit() + _session.add(ins_format) + _session.commit() return ins_format diff --git a/knowledgebase/db/doc_db_helper.py b/knowledgebase/db/doc_db_helper.py index 6b8d6cb..a0b82b1 100644 --- a/knowledgebase/db/doc_db_helper.py +++ b/knowledgebase/db/doc_db_helper.py @@ -6,6 +6,7 @@ # @description: 鏂囨。鏁版嵁搴撳姪鎵嬶紝mysql鏁版嵁搴� import json +from threading import RLock from knowledgebase.db.doc_db_models import init_doc_db, TDoc, TEntity, TParagraph, TParagraphLink, TParagraphRefLink, \ TParagraphEntityLink @@ -17,9 +18,13 @@ """ 鏂囨。鏁版嵁搴撳姪鎵� """ + lock = RLock() def __init__(self): - self.session = init_doc_db() + self.session = None + + def set_project_path(self, project_path): + self.session = init_doc_db(project_path) def add_doc(self, doc_info: DocInfo) -> int: """ @@ -32,6 +37,7 @@ ) self.session.add(_doc) self.session.commit() + doc_info.id = _doc.id return _doc.id def add_paragraph(self, doc_id: int, parent_id: int, paragraph_info: ParagraphInfo) -> TParagraph: @@ -62,6 +68,7 @@ if paragraph_info.children: for child in paragraph_info.children: self.add_paragraph(doc_id, _paragraph.id, child) + paragraph_info.id = _paragraph.id return _paragraph def add_paragraph_link(self, paragraph_link): @@ -73,6 +80,18 @@ self.session.commit() return paragraph_link.id + def add_paragraph_ref_link(self, paren_id: int, child_id: int) -> int: + """ + 娣诲姞娈佃惤寮曠敤鍏崇郴 + :param paren_id: 寮曠敤娈佃惤 + :param child_id: 琚紩鐢ㄦ钀� + :return: + """ + link = TParagraphRefLink(parent_id=paren_id, child_id=child_id, is_del=0) + self.session.add(link) + self.session.commit() + return link.id + def add_paragraph_entity_link(self, paragraph_entity_link): """ 娣诲姞娈佃惤瀹炰綋鍏崇郴 @@ -83,10 +102,11 @@ return paragraph_entity_link.id def get_entity(self, entity): - ret = self.session.query(TEntity).where( - TEntity.name == entity.name and TEntity.type == entity.type and TEntity.doc_type == entity.doc_type).first() - if ret: - return ret + with self.lock: + ret = self.session.query(TEntity).where( + TEntity.name == entity.name and TEntity.type == entity.type and TEntity.doc_type == entity.doc_type).first() + if ret: + return ret def add_entity(self, entity): """ @@ -97,20 +117,14 @@ self.session.commit() return entity.id - def add_paragraph_ref_link(self, paragraph_ref_link): - """ - 娣诲姞娈佃惤寮曠敤鍏崇郴 - :param paragraph_ref_link: 娈佃惤寮曠敤鍏崇郴 - """ - self.session.add(paragraph_ref_link) - self.session.commit() - return paragraph_ref_link - def get_all_entities(self) -> list[TEntity]: - return self.session.query(TEntity).all() + with self.lock: + return self.session.query(TEntity).all() def get_docs(self) -> list[TDoc]: - return self.session.query(TDoc).all() + with self.lock: + return self.session.query(TDoc).all() + def get_texts_with_entities(self, entity_names: list[str]): """ @@ -118,13 +132,29 @@ :param entity_names: list[str] - 瀹炰綋璇� :return: list[str] - 鏂囨湰鍒楄〃 """ - if not entity_names: - return "" - _entities = self.session.query(TEntity).where(TEntity.name.in_(entity_names)).all() - _entitie_ids = [entity.id for entity in _entities] - links = self.session.query(TParagraphEntityLink).where(TParagraphEntityLink.entity_id.in_(_entitie_ids)).all() - _paragraphs = [link.paragraph for link in links] - return [self.get_paragraph_full_text(p) for p in _paragraphs] + with self.lock: + if not entity_names: + return "" + _entities = self.session.query(TEntity).where(TEntity.name.in_(entity_names)).all() + _entitie_ids = [entity.id for entity in _entities] + links = self.session.query(TParagraphEntityLink).where( + TParagraphEntityLink.entity_id.in_(_entitie_ids)).all() + _paragraphs:[TParagraph] = [link.paragraph for link in links] + ref_paragraphs = [] + for p in _paragraphs: + ref_paragraphs.extend([x.child for x in p.ref_links]) + _paragraphs.extend(ref_paragraphs) + id_map = {} + result = [] + for p in _paragraphs: + if p.id in id_map: + continue + else: + id_map[p.id] = p + result.append(p) + return [p.text for p in result] + + def get_text_with_entities(self, entity_names: list[str]) -> str: """ 鏍规嵁瀹炰綋璇嶈幏鍙栨枃鏈唴瀹� @@ -143,7 +173,13 @@ return result + '\n' + '\n'.join([self.get_paragraph_full_text(p) for p in p.children]) def get_entities_by_doc_type(self, doc_type): - _entities = self.session.query(TEntity).where(TEntity.doc_type == doc_type).all() + with self.lock: + _entities = self.session.query(TEntity).where(TEntity.doc_type == doc_type).all() + return _entities + + def get_entities_by_type(self, ty: str)->list[TEntity]: + with self.lock: + _entities = self.session.query(TEntity).where(TEntity.type == ty).all() return _entities def commit(self): diff --git a/knowledgebase/db/doc_db_models.py b/knowledgebase/db/doc_db_models.py index 01ab1d8..80ea1a0 100644 --- a/knowledgebase/db/doc_db_models.py +++ b/knowledgebase/db/doc_db_models.py @@ -6,6 +6,7 @@ # @description: 鏂囨。鏁版嵁搴撴ā鍨� from sqlalchemy import create_engine, Column, DateTime, Integer, Text, ForeignKey +from sqlalchemy.dialects.mysql import LONGTEXT from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship from sqlalchemy.orm import sessionmaker, scoped_session @@ -22,6 +23,7 @@ """ __tablename__ = 't_paragraphs' id = Column(Integer, primary_key=True) + # text = Column(LONGTEXT) text = Column(Text) title_level = Column(Integer) title_num = Column(Text) @@ -139,17 +141,19 @@ # is_del = Column(Integer) -def init_doc_db(): +def init_doc_db(project_path): """ 鍒濆鍖栨枃妗f暟鎹簱 :return: 鏁版嵁搴搒ession瀹炰緥 """ # mysql Log.info("杩炴帴骞跺垵濮嬪寲鏂囨。鏁版嵁搴�...") - engine = create_engine('mysql+pymysql://root:123456@192.168.3.145:3306/knowledgebase', echo=False) - # engine = create_engine('sqlite:///doc_db.db', echo=False) + # engine = create_engine('mysql+pymysql://root:123456@10.74.15.171:3306/knowledgebase_xx25', echo=False) + engine = create_engine(f'sqlite:///{project_path}/docs/doc_db.db', echo=False) Base.metadata.create_all(engine) SessionFactory = sessionmaker(bind=engine) Session = scoped_session(SessionFactory) session = Session() return session + +# _xx25 \ No newline at end of file diff --git a/knowledgebase/db/models.py b/knowledgebase/db/models.py index f171b46..db509dd 100644 --- a/knowledgebase/db/models.py +++ b/knowledgebase/db/models.py @@ -473,8 +473,14 @@ C_ACCESS = Column(Integer) C_EDIT = Column(Integer) +engine = None -if os.path.isfile("db.db"): - os.remove("db.db") -engine = create_engine('sqlite:///db.db', echo=False) -metadata.create_all(engine) +def get_engine(): + return engine + +def init_base_db(db_path: str): + global engine, metadata + if os.path.isfile(db_path): + os.remove(db_path) + engine = create_engine(f'sqlite:///{db_path}', echo=False) + metadata.create_all(engine) diff --git a/knowledgebase/doc/doc_processor.py b/knowledgebase/doc/doc_processor.py index 28092ed..b0f3b9c 100644 --- a/knowledgebase/doc/doc_processor.py +++ b/knowledgebase/doc/doc_processor.py @@ -12,7 +12,7 @@ from knowledgebase.doc.docx_split import DocSplit import asyncio from knowledgebase.db.doc_db_helper import doc_dbh -from knowledgebase.doc.entity_helper import entity_helper +from knowledgebase.doc.entity_helper import get_entity_helper from knowledgebase.doc.entity_recognition import EntityRecognition import os.path @@ -36,6 +36,7 @@ self.doc_id = 0 def get_doc_type(self): + entity_helper = get_entity_helper() Log.info(f'璇嗗埆鏂囨。绫诲瀷锛歿self.docx_file}') rules = '锛沑n'.join([f'- {it}锛歿entity_helper.doc_prompt_map[it]}' for it in entity_helper.doc_prompt_map.keys()]) msg = HumanMessage(f''' @@ -50,7 +51,8 @@ resp = llm.invoke([msg]) Log.info(f'璇嗗埆缁撴灉锛歿resp.content}') return resp.content - def get_tc_info(self, paragraph: ParagraphInfo): + + async def get_tc_info(self, paragraph: ParagraphInfo): if self.doc_type not in [DocType.tc_format]: return '' prompt = HumanMessagePromptTemplate.from_template(''' @@ -69,39 +71,49 @@ {{ "name": "xxx" }} -# 绀轰緥 - 鏈瘑鍒埌鏁版嵁鍖� +# 绀轰緥 - 鏈瘑鍒埌鎸囦护 "" # 鏂囨湰鍐呭锛� {text} ''') chain = prompt.prompt | llm | JsonOutputParser() - resp = chain.invoke({"text": paragraph.full_text}) + resp = await chain.ainvoke({"text": paragraph.full_text}) + import json + # Log.info(f'>>>>>>鎸囦护璇嗗埆锛歕n{paragraph.full_text}') + # Log.info(f'<<<<<<鎸囦护锛歿json.dumps(resp, ensure_ascii=False)}') return resp - def get_tm_pkt_info(self, paragraph: ParagraphInfo): + + async def get_tm_pkt_info(self, paragraph: ParagraphInfo): if self.doc_type not in [DocType.tm_outline, DocType.tm_pkt_design]: return '' prompt = HumanMessagePromptTemplate.from_template(''' # 鎸囦护 璇嗗埆閬ユ祴鍖呬俊鎭紝璇蜂粠涓嬮潰鐨勬枃鏈腑璇嗗埆閬ユ祴鍖呬俊鎭紝濡傛灉璇嗗埆澶辫触涓嶈杈撳嚭浠讳綍瀛楃銆� 璇嗗埆瑙勫垯锛氱珷鑺傛爣棰樹腑鍖呭惈鍖呭悕绉板拰浠e彿锛岀珷鑺傚唴瀹逛负琛ㄦ牸锛岃〃鏍间腑鍖呮嫭鍖呭ご瀹氫箟鍜屽寘鍙傛暟瀹氫箟銆� -鎻愬彇鐨勯仴娴嬪寘淇℃伅鍖呮嫭锛氬寘鍚嶇О锛屽寘浠e彿锛孉PID銆� +鎻愬彇鐨勯仴娴嬪寘淇℃伅鍖呮嫭锛氬寘鍚嶇О锛屽寘浠e彿銆� # 绾︽潫 - 濡傛灉鏂囨湰鍐呭鏄洰褰曞垯涓嶈杈撳嚭浠讳綍瀛楃锛� - 鏂囨湰鎻忚堪鐨勫唴瀹规槸鍗曚釜閬ユ祴鍖咃紝濡傛灉鏈夊涓仴娴嬪寘鍒欎笉瑕佽緭鍑轰换浣曞瓧绗︼紱 -- 鏂囨湰缁撴瀯閫氬父鏄細鍖呭悕绉般�佷唬鍙峰拰APID鍦ㄥ紑澶达紝鍚庨潰绱ф帴鐫�鏄寘澶村拰鍙傛暟瀹氫箟琛紱 +- 鏂囨湰缁撴瀯閫氬父鏄細鍖呭悕绉般�佷唬鍙峰拰APID(搴旂敤杩囩▼鏍囪瘑)鍦ㄥ紑澶达紙搴旂敤杩囩▼鏍囪瘑涔熸湁鍙兘鍦ㄨ〃鏍间腑锛夛紝鍚庨潰绱ф帴鐫�鏄寘澶村拰鍙傛暟瀹氫箟琛紱 - 濡傛灉娌℃湁璇嗗埆鍒伴仴娴嬪寘淇℃伅涓嶈杈撳嚭浠讳綍瀛楃锛� - 璇嗗埆澶辫触锛屼笉瑕佽緭鍑轰换浣曞唴瀹癸紝鍖呮嫭瑙i噴鎬ф枃鏈紱 - 杈撳嚭json鏍煎紡銆� -# 澶嶅悎瑕佹眰鐨勬枃鏈粨鏋� +# 绗﹀悎瑕佹眰鐨勬枃鏈粨鏋�1 1.1.1 code xxx鍖�(APID=0x123) ```json 琛ㄦ牸鍐呭 ``` +# 绗﹀悎瑕佹眰鐨勬枃鏈粨鏋�2 +1.1.1 code xxx鍖� +```json +琛ㄦ牸鍐呭 +搴旂敤杩囩▼鏍囪瘑 +... +``` # 绀轰緥 - 璇嗗埆鍒版暟鎹寘 {{ "name": "xxx鍖�", - "code": "xxx", - "apid": 123 + "code": "TMS001" }} # 绀轰緥 - 鏈瘑鍒埌鏁版嵁鍖� "" @@ -109,17 +121,54 @@ {text} ''') chain = prompt.prompt | llm | JsonOutputParser() - resp = chain.invoke({"text": paragraph.full_text}) + resp = await chain.ainvoke({"text": paragraph.full_text}) return resp - async def gen_chapter_entities(self, paragraph: ParagraphInfo): + async def get_chapter_refs(self, paragraph: ParagraphInfo, toc: [str]) -> [str]: + if self.doc_type not in [DocType.tc_format]: + return '' + toc_text = '\n'.join(toc) + prompt = HumanMessagePromptTemplate.from_template(f''' +# 瑙掕壊 +浣犳槸涓�鍚嶈祫娣辩殑杞欢宸ョ▼甯堛�� +# 鎸囦护 +甯姪鎴戝畬鎴愬鏂囨湰涓紩鐢ㄥ叧绯荤殑鎶藉彇锛屽垽鏂綋鍓嶆枃鏈腑鏄惁鍖呭惈浜嗗紩鐢ㄤ俊鎭紝渚嬪鍖呭惈浠ヤ笅鍏抽敭瀛楋細鈥滆瑙�1.1鈥濄�佲�滆1.1鈥濄�佲�滃叿浣撹1.1鈥濄�佲�滆闄勫綍鈥濈瓑銆� +濡傛灉鍖呭惈寮曠敤锛屽皢寮曠敤涓庘�滅洰褰曞唴瀹光�濅腑鐨勭洰褰曟潯鐩繘琛屽尮閰嶃�� +灏嗗尮閰嶅埌鐨勭洰褰曟潯鐩緭鍑猴紝杈撳嚭鏍煎紡涓簀son鏍煎紡銆� +# 绾︽潫 +- 鏄惁鍖呭惈寮曠敤鐨勫垽鏂潯浠朵腑蹇呴』鍖呭惈寮曠敤鐩稿叧鐨勬弿杩帮紝渚嬪锛氣�滆瑙�1.1鈥濄�佲�滆1.1鈥濄�佲�滃叿浣撹1.1鈥濄�佲�滆闄勫綍鈥濈瓑锛� +- 娉ㄦ剰涓嶈鑷繁寮曠敤鑷繁锛� +- 浠呮彁鍙栫洰褰曞唴瀹逛腑鍖呭惈鐨勬潯鐩紝濡傛灉鐩綍鍐呭涓嶅寘鍚垯涓嶆彁鍙栵紱 +- 濡傛灉浠呴潬鏍囬鍙风爜鏃犳硶纭畾鐩綍鏉$洰鐨勶紝鏍规嵁鏂囨湰鍐呭鍖归厤瀵瑰簲鐨勭洰褰曟潯鐩紱 +- 杈撳嚭鐨勫唴瀹瑰繀椤绘槸鐩綍涓殑鏉$洰锛� +- 杈撳嚭json鏍煎紡锛屼笉瑕佽緭鍑轰换浣昷son浠ュ鐨勫瓧绗︺�� +# 杈撳嚭妗堜緥 +["1.1 xxx"] +# 鐩綍鍐呭锛� +{toc_text} +# 鏂囨湰鍐呭锛� +{{text}} +''') + chain = prompt.prompt | llm | JsonOutputParser() + resp = await chain.ainvoke({"text": paragraph.full_text}) + return resp + + async def gen_chapter_entities(self, paragraph: ParagraphInfo, paragraphs: [ParagraphInfo], toc: [str]): # 鑾峰彇绔犺妭瀹炰綋璇� - entity_names = await asyncio.to_thread(lambda: self.entity_recognition.run(paragraph.full_text)) + entity_names_task = self.entity_recognition.run(paragraph.full_text) + # 鑾峰彇鎸囦护淇℃伅 + cmd_task = self.get_tc_info(paragraph) + # 鑾峰彇閬ユ祴鍖呬俊鎭� + pkt_task = self.get_tm_pkt_info(paragraph) + # 鑾峰彇鏂囨。寮曠敤 + refs_task = self.get_chapter_refs(paragraph, toc) + entity_names, cmd, pkt, chapter_refs = await asyncio.gather(entity_names_task, cmd_task, pkt_task, refs_task) + Log.info(f'绔犺妭{paragraph.title_num}瀹炰綋璇嶏細{entity_names}') + Log.info(f'绔犺妭{paragraph.title_num}寮曠敤锛歿chapter_refs}') if entity_names: paragraph.entities = doc_dbh.get_entities_by_names(entity_names) - # 鑾峰彇閬ユ祴鍖呬俊鎭� - pkt = self.get_tm_pkt_info(paragraph) + if pkt: entity = TEntity(name=pkt['code'], type='閬ユ祴鍖呴厤缃�', prompts='', doc_type='') e = doc_dbh.get_entity(entity) @@ -129,8 +178,7 @@ doc_dbh.add_entity(entity) Log.info(f"鏂板Entity锛歿entity.name}锛宨d锛歿entity.id}") paragraph.entities.append(entity) - # 鑾峰彇鎸囦护淇℃伅 - cmd = self.get_tc_info(paragraph) + if cmd: entity = TEntity(name=cmd['name'], type='鎸囦护鏍煎紡閰嶇疆', prompts='', doc_type='') e = doc_dbh.get_entity(entity) @@ -140,21 +188,29 @@ doc_dbh.add_entity(entity) Log.info(f"鏂板Entity锛歿entity.name}锛宨d锛歿entity.id}") paragraph.entities.append(entity) + # 鑾峰彇寮曠敤淇℃伅 + if chapter_refs: + for ref in chapter_refs: + _p = next(filter(lambda p: ref == p.title, self.doc_split.paragraphs), None) + if _p: + if paragraph != _p: + paragraph.refs.append(_p) def process(self): self.doc_split.split() # 鍒嗘壒骞跺彂澶勭悊锛屾瘡鎵�10涓� - batch_size = 10 - for i in range(0, len(self.doc_split.paragraphs), batch_size): - batch_paragraphs = self.doc_split.paragraphs[i:i + batch_size] - tasks = [] - for paragraph in batch_paragraphs: - tasks.append(self.gen_chapter_entities(paragraph)) + tasks = [] + toc = [] + for p in self.doc_split.paragraphs: + if p.title_level: + toc.append(p.title) + for paragraph in self.doc_split.paragraphs: + tasks.append(self.gen_chapter_entities(paragraph, self.doc_split.paragraphs, toc)) - async def run(): - await asyncio.gather(*tasks) + async def run(): + await asyncio.gather(*tasks) - asyncio.run(run()) + asyncio.run(run()) # 淇濆瓨鍒版暟鎹簱 self.save_to_db() @@ -170,4 +226,8 @@ self.doc_id = doc_dbh.add_doc(doc) for paragraph in doc.paragraphs: doc_dbh.add_paragraph(self.doc_id, None, paragraph) + for paragraph in self.doc_split.paragraphs: + for ref_paragraph in paragraph.refs: + doc_dbh.add_paragraph_ref_link(paragraph.id, ref_paragraph.id) + Log.info(f"{paragraph.title} 寮曠敤浜�-> {ref_paragraph.title}") Log.info('淇濆瓨娈佃惤鍜屾钀藉疄浣撹瘝鍏崇郴鍒版暟鎹簱瀹屾垚') diff --git a/knowledgebase/doc/docx_split.py b/knowledgebase/doc/docx_split.py index 4270b05..4a97292 100644 --- a/knowledgebase/doc/docx_split.py +++ b/knowledgebase/doc/docx_split.py @@ -15,7 +15,7 @@ from knowledgebase.doc.image_to_text import ImageToText from knowledgebase.doc.models import ParagraphInfo from knowledgebase.log import Log - +from bs4 import BeautifulSoup class DocSplit: """ @@ -71,6 +71,9 @@ else: # 鍗曞厓鏍兼枃鏈幏鍙� text = cell.text + if cell._element.xml.find("w:ins")!=-1: + soup = BeautifulSoup(cell._element.xml, "xml") + text = ''.join([x.get_text() for x in soup.find_all("w:t")]) # row_data[headers[row_idx]] = text row_data.append(text) row_idx += 1 @@ -94,6 +97,7 @@ # 鑾峰彇鏍囬澶氱骇缂栧彿 paragraph = document.paragraphs[paragraph_cnt] p_text = paragraph.text + is_toc = paragraph.style.name.startswith('TOC') and '鐩�' in p_text and '褰�' in p_text try: num = element.pPr.numPr.numId.val level = element.pPr.numPr.ilvl.val @@ -102,7 +106,7 @@ level = 0 if p_text: title_level = self.get_title_level(paragraph) - self.paragraphs.append(ParagraphInfo(p_text, title_level, num, level)) + self.paragraphs.append(ParagraphInfo(p_text, title_level, num, level, is_toc)) # 妫�鏌ユ槸鍚︽槸鍥剧墖锛屽鏋滄槸鍥剧墖鍒欒浆鎹负鏂囨湰 img_data = self.get_image_text(paragraph) if img_data: @@ -118,12 +122,27 @@ ParagraphInfo("```json\n" + json.dumps(table_data, indent=4, ensure_ascii=False) + "\n```", 0)) else: continue + # 鍘婚櫎鐩綍 + self.remove_toc(self.paragraphs) # 鐢熸垚鏍囬缂栧彿 Log.info(f"寮�濮嬬敓鎴愭爣棰樼紪鍙峰拰鍒楄〃缂栧彿") self.gen_title_num(self.paragraphs) # 鐢熸垚鏍戝舰缁撴瀯 Log.info(f"寮�濮嬬敓鎴愭爲褰㈢粨鏋�") self.gen_paragraph_tree(self.paragraphs) + + @staticmethod + def remove_toc(paragraphs: [ParagraphInfo]): + rm_list = [] + for p in paragraphs: + if p.is_toc: + rm_list.append(p) + elif rm_list and p.title_level == 1: + break + elif rm_list: + rm_list.append(p) + for p in rm_list: + paragraphs.remove(p) @staticmethod def get_image_text(paragraph): @@ -248,6 +267,7 @@ :param img_data: bytes - 鍥剧墖鏁版嵁 :return: str - 鏂囨湰 """ + return '' return self.image_to_text.gen_text_from_img(img_data) def gen_paragraph_tree(self, paragraphs: typing.List[ParagraphInfo]): diff --git a/knowledgebase/doc/entity_helper.py b/knowledgebase/doc/entity_helper.py index 219eed2..afe0cc4 100644 --- a/knowledgebase/doc/entity_helper.py +++ b/knowledgebase/doc/entity_helper.py @@ -36,4 +36,12 @@ doc_dbh.add_entity(_entity) Log.info(f"鏂板Entity锛歿entity}锛宨d锛歿_entity.id}") -entity_helper = EntityHelper() +_entity_helper:EntityHelper = None + +def get_entity_helper(): + global _entity_helper + return _entity_helper + +def init_entity_helper(): + global _entity_helper + _entity_helper = EntityHelper() diff --git a/knowledgebase/doc/entity_recognition.py b/knowledgebase/doc/entity_recognition.py index 6366f8f..8183a3d 100644 --- a/knowledgebase/doc/entity_recognition.py +++ b/knowledgebase/doc/entity_recognition.py @@ -5,7 +5,6 @@ # @version: 0.0.1 # @description: 瀹炰綋鎶藉彇锛屽皢鏂囨湰涓殑瀹炰綋杩涜璇嗗埆鍜屾彁鍙栥�� -from langchain_openai.chat_models import ChatOpenAI from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate from langchain_core.output_parsers import JsonOutputParser import json @@ -13,12 +12,7 @@ from knowledgebase import utils from knowledgebase.db.doc_db_helper import doc_dbh from knowledgebase.log import Log - -llm = ChatOpenAI(temperature=0, - model="qwen2.5-72b-instruct", - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", - api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") - +from knowledgebase.llm import llm class EntityRecognition: """ @@ -43,7 +37,7 @@ # 绾︽潫 - 杈撳嚭鏍煎紡涓篔SON鏍煎紡锛� - 鎻愬彇鐨勫疄浣撹瘝蹇呴』鏄細""" + entity_list + """锛� -- 濡傛灉娌℃湁澶嶅悎涓婅堪瑙勫垯鐨勫疄浣撹瘝鍒欎笉瑕佽緭鍑轰换浣曞疄浣撹瘝锛� +- 濡傛灉娌℃湁绗﹀悎涓婅堪瑙勫垯鐨勫疄浣撹瘝鍒欎笉瑕佽緭鍑轰换浣曞疄浣撹瘝锛� - 杈撳嚭鏁版嵁缁撴瀯涓哄瓧绗︿覆鏁扮粍銆� # 绀轰緥 ```json @@ -76,7 +70,7 @@ text = json.dumps(self.cache) utils.save_text_to_file(text, self.cache_file) - def run(self, in_text: str) -> list[str]: + async def run(self, in_text: str) -> list[str]: """ 杩愯瀹炰綋璇嗗埆鎶藉彇銆� :param in_text: str - 杈撳叆鏂囨湰 @@ -85,7 +79,7 @@ text_md5 = utils.generate_text_md5(in_text) if self.use_cache and text_md5 in self.cache: return self.cache[text_md5] - result = self.chain.invoke({"text": in_text}) + result = await self.chain.ainvoke({"text": in_text}) self.cache[text_md5] = result self.save_cache() return result diff --git a/knowledgebase/doc/models.py b/knowledgebase/doc/models.py index 7eacdc8..c4daaf5 100644 --- a/knowledgebase/doc/models.py +++ b/knowledgebase/doc/models.py @@ -23,8 +23,11 @@ num_level: int - 鍒楄〃搴忓彿绾у埆锛�0琛ㄧず姝f枃 num: int - 鍒楄〃搴忓彿锛屽鏋滄槸鍒楄〃 children: typing.List[ParagraphInfo] - 瀛愭钀藉垪琛� - refs: 寮曠敤鏂囨。 + refs: 寮曠敤鍒楄〃 + entities: 瀹炰綋璇嶅垪琛� + is_toc: 鏄惁鏄洰褰曟爣蹇� """ + id: int text: str title_level: int title_num: str @@ -33,6 +36,15 @@ children: typing.List refs: typing.List entities: typing.List[TEntity] + is_toc: bool + + @property + def title(self): + if self.title_level: + text = self.full_text + idx = text.index('\n') + return text[0:idx] + return '' @property def full_text(self): @@ -61,7 +73,7 @@ full_text = full_text + "\n" + child.full_text_with_children return full_text - def __init__(self, text: str, title_level: int, num=0, num_level=0): + def __init__(self, text: str, title_level: int, num=0, num_level=0, is_toc=False): """ 娈佃惤淇℃伅 @@ -70,6 +82,7 @@ title_level: int - 娈佃惤绾у埆锛�1-9绾ф爣棰橈紝0琛ㄧず姝f枃 num: int - 鍒楄〃搴忓彿 num_level: int - 鍒楄〃搴忓彿绾у埆 + is_toc: bool - 鏄惁鏄洰褰� """ self.text = text self.title_level = title_level @@ -78,6 +91,9 @@ self.num_level = num_level self.children: typing.List[ParagraphInfo] = [] self.entities: typing.List[TEntity] = [] + self.refs: typing.List[ParagraphInfo] = [] + self.is_toc = is_toc + self.id = 0 def __str__(self): return f"{self.full_text}" diff --git a/knowledgebase/llm.py b/knowledgebase/llm.py index ac6e035..1024fbc 100644 --- a/knowledgebase/llm.py +++ b/knowledgebase/llm.py @@ -7,9 +7,9 @@ from langchain_openai.chat_models import ChatOpenAI llm = ChatOpenAI(temperature=0, - model="qwen2.5-72b-instruct", - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", - api_key="sk-15ecf7e273ad4b729c7f7f42b542749e") + model="Qwen2.5-72B-Instruct-AWQ", + base_url="http://10.74.15.171:8000/v1", + api_key="EMPTY") vision_llm = ChatOpenAI(temperature=0, model="qwen2.5-vl-32b-instruct", diff --git a/knowledgebase/utils.py b/knowledgebase/utils.py index 7ba2e2c..ae9fe1f 100644 --- a/knowledgebase/utils.py +++ b/knowledgebase/utils.py @@ -86,4 +86,4 @@ :param text: :return: """ - return re.sub(r'[\\/:*?"<>|]', '_', text) + return re.sub(r'[\t\\/:*?"<>|]', '_', text) diff --git a/main.py b/main.py index e69de29..6cc6b78 100644 --- a/main.py +++ b/main.py @@ -0,0 +1,34 @@ +import asyncio +import os +import sys + +from db_struct_flow import DbStructFlow, tc_data_generate +from knowledgebase.db.doc_db_helper import doc_dbh +from knowledgebase.doc.doc_processor import DocProcessor +from knowledgebase.doc.entity_helper import init_entity_helper + + +def doc_split(project_path): + docs_path = f'{project_path}/docs' + files = os.listdir(docs_path) + files = [f'{docs_path}/{x}' for x in filter(lambda x: x.endswith('.docx'), files)] + for file in files: + DocProcessor(file).process() + + + +def main(): + project_path = sys.argv[1] + if not project_path: + print("missing project path. eg: python main.py <path/to/project>") + return + # 鎷嗗垎鏂囨。 + doc_dbh.set_project_path(project_path) + init_entity_helper() + # doc_split(project_path) + # 鍚姩澶фā鍨嬪鐞嗘祦绋� + asyncio.run(DbStructFlow(project_path).run()) + # 鐢熸垚鎸囦护鏁版嵁琛� + tc_data_generate() +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/testcases/test_doc_processor.py b/testcases/test_doc_processor.py index 23eb4f2..203ab7a 100644 --- a/testcases/test_doc_processor.py +++ b/testcases/test_doc_processor.py @@ -10,20 +10,33 @@ def test_process(): files = [ - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx", - # r"D:\workspace\PythonProjects\KnowledgeBase\doc\XA-5D鏃犱汉鏈烘寚浠ゆ牸寮忎笌缂栫爜瀹氫箟锛堝叕寮�锛�.docx", - r"D:\workspace\PythonProjects\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx" + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈�1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰嶏紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈哄垎绯荤粺閬ユ祴婧愬寘璁捐鎶ュ憡锛堝叕寮�锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈鸿蒋浠剁敤鎴烽渶姹傦紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬪ぇ绾诧紙鍏紑锛�.docx", + # r"D:\KnowledgeBase\doc\XA-5D鏃犱汉鏈洪仴娴嬩俊鍙峰垎閰嶈〃锛堝叕寮�锛�.docx", + # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡(鍏紑).docx", + + # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熼仴娴嬫簮鍖呰璁℃姤鍛� Z 240824 鏇存敼3(鍐呴儴) .docx", + # r"D:\KnowledgeBase\doc\HY-4A鏁扮鍒嗙郴缁熷簲鐢ㄨ蒋浠剁敤鎴烽渶姹傦紙鏄熷姟绠$悊鍒嗗唽锛� Z 240831 鏇存敼4锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\HY-4A鍗槦1553B鎬荤嚎浼犺緭閫氫俊甯у垎閰� Z 240824 鏇存敼3锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\HY-4A鍗槦閬ユ祴澶х翰 Z 240824 鏇存敼3锛堝唴閮級.docx", + # r"D:\KnowledgeBase\doc\閫氱敤-鏁扮鍒嗙郴缁熸寚浠ゆ牸寮忎笌缂栫爜瀹氫箟鍙婁娇鐢ㄥ噯鍒欙紝缂栧啓涓璙4锛�20240119(鍐呴儴).docx" + + r"D:\projects\KnowledgeBase\doc_xx25\鍗槦閬ユ祴澶х翰Z250226锛堢瀵嗏槄10骞达級 - 鍓湰.docx" + + # r"D:\KnowledgeBase\doc\鎸囦护鏍煎紡.docx" ] for file in files: doc_processor = DocProcessor(file) doc_processor.process() + + def test_get_text_by_entity(): - text = doc_dbh.get_text_with_entities(['鎸囦护缁勫唴瀹逛笅浼�']) + text = doc_dbh.get_text_with_entities(['閬ユ祴婧愬寘涓嬩紶鏃舵満']) print(text) + + if __name__ == '__main__': - # test_process() - test_get_text_by_entity() \ No newline at end of file + test_process() + # test_get_text_by_entity() diff --git a/tpl/entities.json b/tpl/entities.json index 276f12f..4cb5a6d 100644 --- a/tpl/entities.json +++ b/tpl/entities.json @@ -12,11 +12,11 @@ "閬ユ祴澶х翰": { "prompts": "鏂囦欢鍚嶉�氬父鍖呭惈鈥滈仴娴嬧�� 鈥滃ぇ绾测�濈瓑鍏抽敭瀛楋紝鍐呭鍖呭惈瀵归仴娴嬪抚鍙婇仴娴嬪寘鏍煎紡鐨勫畾涔�", "entities": { - "閬ユ祴鏍煎紡瀹氫箟": "涓�鑸湪鈥滈仴娴嬫牸寮忊�濈珷鑺傦紝鍐呭鍖呭惈鈥濋仴娴嬪抚鈥� 鈥濋仴娴嬪寘鈥滃叿浣撴牸寮忕殑瀹氫箟", - "铏氭嫙淇¢亾瀹氫箟": "绔犺妭鍚嶅寘鍚�滆櫄鎷熶俊閬撯�濓紝鍐呭鍖呭惈铏氭嫙淇¢亾鐨勫垝鍒嗭紝鍚勬簮鍖呭湪鍚勮櫄鎷熶俊閬撲笅浼犲垎閰�", - "鎻掑叆鍩�": "绔犺妭鍚嶅寘鍚�滄彃鍏ュ煙鈥濓紝鍐呭涓轰竴寮犺〃鏍硷紝瀹氫箟浜嗘彃鍏ュ煙涓殑閬ユ祴鍙傛暟", - "婧愬寘鍙傛暟琛�": "绔犺妭鍚嶅寘鍚�滄簮鍖呰璁♀�濓紝鍐呭涓哄涓簮鍖呭叿浣撳弬鏁扮殑琛ㄦ牸锛屾瘡涓簮鍖呭崟鐙竴寮犺〃鏍�", - "閬ユ祴婧愬寘涓嬩紶鏃舵満": "绔犺妭鍚嶅寘鍚被浼尖�滈仴娴嬫簮鍖呬笅浼犳椂鏈衡�濈殑鏂囨湰锛屽唴瀹逛负涓�涓〃鏍兼弿杩伴仴娴嬫簮鍖呬笅浼犳椂鏈�" + "閬ユ祴鏍煎紡瀹氫箟": "鏂囨湰鐨勭涓�琛屾槸绔犺妭鏍囬锛岀珷鑺傚悕绉板寘鍚�滈仴娴嬪抚缁撴瀯鈥濓紝鍐呭鍖呭惈鈥濋仴娴嬪抚鈥滃叿浣撴牸寮忕殑瀹氫箟", + "铏氭嫙淇¢亾瀹氫箟": "鏂囨湰鐨勭涓�琛屾槸绔犺妭鏍囬锛岀珷鑺傛爣棰樺寘鍚�滆櫄鎷熶俊閬撯�濆苟涓旂珷鑺傚唴瀹瑰寘鍚�滈仴娴嬭櫄鎷熶俊閬撳垝鍒嗏�濊〃鏍硷紝琛ㄦ牸涓殑鍒楀寘鎷細VCID銆佸垎绫诲悕绉扮瓑", + "鎻掑叆鍩�": "鏂囨湰鐨勭涓�琛屾槸绔犺妭鏍囬锛岀珷鑺傚悕鍖呭惈鈥滄彃鍏ュ煙鈥濓紝鍐呭鍖呭惈鈥滄彃鍏ュ煙鏍煎紡鈥濊〃鏍硷紝瀹氫箟浜嗘彃鍏ュ煙涓殑閬ユ祴鍙傛暟", + "婧愬寘鍙傛暟琛�": "鏂囨湰鐨勭涓�琛屾槸绔犺妭鏍囬锛岀珷鑺傚悕閫氬父涓衡�渪xx鍖呪�濓紝鍐呭涓烘簮鍖呭弬鏁拌〃鏍硷紝瀹氫箟浜嗗寘澶淬�佹暟鎹煙鍏蜂綋鍐呭", + "閬ユ祴婧愬寘涓嬩紶鏃舵満": "鏂囨湰鐨勭涓�琛屾槸绔犺妭鏍囬锛岀珷鑺傚悕绉板寘鍚�滈仴娴嬫簮鍖呬笅浼犳椂鏈衡�濆苟涓旂珷鑺傚唴瀹逛负涓�涓弿杩伴仴娴嬫簮鍖呬笅浼犳椂鏈虹殑琛ㄦ牸" } }, "婧愬寘璁捐": { @@ -32,7 +32,7 @@ "prompts": "鏂囦欢鍚嶄腑鍖呭惈鈥滄�荤嚎鈥濆叧閿瓧锛屽唴瀹逛负鍚勫垎绯荤粺婧愬寘鍦ㄦ�荤嚎涓婁紶杈撶殑瀹氫箟", "entities": { "RT鍦板潃鍒嗛厤": "绔犺妭鍚嶅寘鍚�淩T鍦板潃鈥濓紝鍐呭涓哄悇鍒嗙郴缁熷拰RT鍦板潃鍒嗛厤鍏崇郴鐨勮〃鏍�", - "鍒嗙郴缁熸簮鍖�": "閫氬父鍦ㄥ彾瀛愮珷鑺備腑锛屽唴瀹逛负璇ュ垎绯荤粺鍚勬簮鍖呭湪鎬荤嚎涓婁紶杈撴椂鎵�浣跨敤鐨勨�滀紶杈撴湇鍔♀�濄�佲�滃瓙鍦板潃鈥濄�佲�滈�氫俊甯у彿鈥濈瓑锛屽苟鎻忚堪浜嗘簮鍖呭悕绉般�丄PID銆佸寘闀跨瓑淇℃伅", + "鍒嗙郴缁熸簮鍖�": "绔犺妭鍐呭鎻忚堪浜嗗悇涓垎绯荤粺鐨勬簮鍖呭垪琛紝閫氬父鏄彾瀛愮珷鑺備笖鏍囬涓嶅寘鍚被浼尖�滈�氫俊甯у垎閰嶁�濈殑鏂囨湰锛涘垪琛ㄥ寘鎷細鈥滃寘搴忓彿鈥濄�佲�滃寘鍚嶇О鈥濄�佲�淎PID鈥濄�佲�滄湇鍔★紝瀛愭湇鍔♀�濄�佲�滄簮鍖呮�婚暱鈥濄�佲�滀紶杈撳懆鏈熲�濄�佲�滀紶杈撴柟鍚戔�濄�佲�滀紶杈撴湇鍔♀�濄�佲�滃瓙鍦板潃/妯″紡鈥濄�佲�滈�氫俊甯у彿鈥濈瓑鍒楋紝骞舵弿杩颁簡婧愬寘鍚嶇О銆丄PID銆佸寘闀跨瓑淇℃伅銆�", "婧愬寘鍙傛暟琛�": "绔犺妭鍚嶅寘鍚�滄簮鍖呰璁♀�濓紝鍐呭涓哄涓簮鍖呭叿浣撳弬鏁扮殑琛ㄦ牸锛屾瘡涓簮鍖呭崟鐙竴寮犺〃鏍�" } } diff --git a/tpl/tc_pkt_format.json b/tpl/tc_pkt_format.json index 73c812f..71c4ee7 100644 --- a/tpl/tc_pkt_format.json +++ b/tpl/tc_pkt_format.json @@ -18,28 +18,28 @@ "name": "鍖呯増鏈彿", "code": "packetVersionNumber", "length": 3, - "value": "{{鍖呯増鏈彿}}", + "value": "{{packetVersionNumber}}", "type": "const" }, { "name": "鍖呯被鍨�", "code": "packetType", "length": 1, - "value": "{{鍖呯被鍨媫}", + "value": "{{packetType}}", "type": "const" }, { "name": "鏁版嵁鍖哄ご鏍囧織", "code": "dataFieldHeaderFlag", "length": 1, - "value": "{{鏁版嵁鍖哄ご鏍囧織}}", + "value": "{{dataFieldHeaderFlag}}", "type": "const" }, { "name": "搴旂敤杩囩▼鏍囪瘑绗�", "code": "apid", "length": 11, - "value": "{{搴旂敤杩囩▼鏍囪瘑绗}", + "value": "{{apid}}", "type": "const" } ] @@ -54,7 +54,7 @@ "name": "搴忓垪鏍囧織", "code": "sequenceFlags", "length": 2, - "value": "{{搴忓垪鏍囧織}}", + "value": "{{sequenceFlags}}", "type": "const" }, { @@ -89,14 +89,14 @@ "name": "鍓澶存爣蹇�", "code": "ccsdsSecondaryHeaderFlag", "length": 1, - "value": "{{鍓澶存爣蹇梷}", + "value": "{{ccsdsSecondaryHeaderFlag}}", "type": "const" }, { "name": "閬ユ帶鍖呯増鏈彿", "code": "tcPktVersionNumber", "length": 3, - "value": "{{閬ユ帶鍖呯増鏈彿}}", + "value": "{{tcPktVersionNumber}}", "type": "const" }, { @@ -104,27 +104,27 @@ "code": "acknowledgmentFlag", "length": 4, "type": "const", - "value": "{{鍛戒护姝g‘搴旂瓟}}" + "value": "{{acknowledgmentFlag}}" }, { "name": "鏈嶅姟绫诲瀷", "code": "serviceType", "length": 8, "type": "const", - "value": "{{鏈嶅姟绫诲瀷}}" + "value": "{{serviceType}}" }, { "name": "鏈嶅姟瀛愮被鍨�", "code": "serviceSubtype", "length": 8, "type": "const", - "value": "{{鏈嶅姟瀛愮被鍨媫}" + "value": "{{serviceSubtype}}" }, { "name": "婧愬湴鍧�", "code": "sourceAddr", "length": 8, - "value": "{{婧愬湴鍧�}}", + "value": "{{sourceAddr}}", "type": "const" } ] -- Gitblit v1.9.1