修改简单模式验证

This commit is contained in:
2025-12-03 17:13:47 +08:00
parent c4f851d387
commit 43a0636913
5 changed files with 3074 additions and 41 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1,3 +1,4 @@
起飞
起飞后移动到学生宿舍上方降落
起飞后移动到学生宿舍上方查找蓝色的车
起飞后移动到学生宿舍上方寻找蓝色的车

View File

@@ -1,12 +1,13 @@
instruction_index,instruction,run_number,success,attempts,response_time,plan_id,error,timestamp
1,起飞后移动到学生宿舍上方降落,1,False,1,2.048215866088867,,,2025-12-02 20:44:56
2,起飞后移动到学生宿舍上方查找蓝色的车,1,True,1,14.806509971618652,8a6f282e-c306-4249-962c-d47d48c31bad,,2025-12-02 20:45:12
3,起飞后移动到学生宿舍上方找蓝色的车,1,True,1,15.240672826766968,f298e2f4-9295-4ffd-8fff-0d0eb9a0ee6c,,2025-12-02 20:45:28
4,起飞后移动到学生宿舍上方检测蓝色的车,1,True,1,13.8105788230896,31733491-2030-43b1-a5e4-eb1300b8d23f,,2025-12-02 20:45:43
5,到学生宿舍上方查找蓝色的车,1,True,1,12.74257755279541,4c855ef4-c251-48cd-b464-4816bc62fbb5,,2025-12-02 20:45:57
6,飞到学生宿舍上方查找蓝色车辆并进行打击,1,True,1,16.117226600646973,63d0e7c3-dcbb-40f0-b76b-6f0191c6512f,,2025-12-02 20:46:14
7,起飞后移动到学生宿舍上方搜索蓝色车辆并进行打击,1,True,1,16.25989079475403,1b4a537e-c1be-4abf-897e-c21b677b83b7,,2025-12-02 20:46:31
8,起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,True,1,16.014280796051025,f88ea46f-5e0b-48fb-b1da-326d287af3d6,,2025-12-02 20:46:48
9,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,True,1,15.530286073684692,f56c811a-8304-4c68-8260-01643928bf3e,,2025-12-02 20:47:05
10,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,True,1,16.660754919052124,07a13346-3026-4dce-a976-4e0faa132248,,2025-12-02 20:47:23
11,飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,True,1,14.128317832946777,16426d41-4f02-4e27-a05e-f4eb84d6c935,,2025-12-02 20:47:38
1,起飞,1,True,1,2.4630444049835205,42903026-b02b-4089-859d-aec5cfa2435e,,2025-12-03 17:09:32
2,起飞后移动到学生宿舍上方降落,1,True,1,10.017558574676514,86238ad2-e275-4d50-905c-175bd2f26fd0,,2025-12-03 17:09:43
3,起飞后移动到学生宿舍上方找蓝色的车,1,True,1,12.420023202896118,d8345bc3-b70f-41d7-b9fc-3e4898d7409e,,2025-12-03 17:09:56
4,起飞后移动到学生宿舍上方寻找蓝色的车,1,True,1,12.864884614944458,29b5ee20-c809-4511-af08-80a85240c729,,2025-12-03 17:10:10
5,起飞后移动到学生宿舍上方检测蓝色的车,1,True,1,10.438142538070679,5e7eb8c7-287a-469a-b6c0-a4102c1b0dac,,2025-12-03 17:10:21
6,飞到学生宿舍上方查找蓝色,1,True,1,11.751057386398315,ef3d1981-1d51-433d-b2f4-2e92838075fd,,2025-12-03 17:10:34
7,到学生宿舍上方查找蓝色车辆并进行打击,1,True,1,32.890604972839355,d8fc4658-08af-4910-89c4-b029c9a5daa0,,2025-12-03 17:11:08
8,起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击,1,False,1,33.2862343788147,,,2025-12-03 17:11:42
9,飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,True,1,12.312166213989258,7fbf0091-f7d3-4c3a-a6b7-4c0bfd4df66e,,2025-12-03 17:11:56
10,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,True,1,12.204660892486572,3ae0b258-b7e4-460c-9cfe-4b224266edc4,,2025-12-03 17:12:09
11,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,True,1,12.808414936065674,2acb84cf-c89e-460d-a4d9-8d1edb4ee69a,,2025-12-03 17:12:23
12,起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,True,1,11.071707487106323,c05d46c9-1b1b-4c8d-b64b-86b76d0c4099,,2025-12-03 17:12:35
1 instruction_index instruction run_number success attempts response_time plan_id error timestamp
2 1 起飞后移动到学生宿舍上方降落 起飞 1 False True 1 2.048215866088867 2.4630444049835205 42903026-b02b-4089-859d-aec5cfa2435e 2025-12-02 20:44:56 2025-12-03 17:09:32
3 2 起飞后移动到学生宿舍上方查找蓝色的车 起飞后移动到学生宿舍上方降落 1 True 1 14.806509971618652 10.017558574676514 8a6f282e-c306-4249-962c-d47d48c31bad 86238ad2-e275-4d50-905c-175bd2f26fd0 2025-12-02 20:45:12 2025-12-03 17:09:43
4 3 起飞后移动到学生宿舍上方寻找蓝色的车 起飞后移动到学生宿舍上方查找蓝色的车 1 True 1 15.240672826766968 12.420023202896118 f298e2f4-9295-4ffd-8fff-0d0eb9a0ee6c d8345bc3-b70f-41d7-b9fc-3e4898d7409e 2025-12-02 20:45:28 2025-12-03 17:09:56
5 4 起飞后移动到学生宿舍上方检测蓝色的车 起飞后移动到学生宿舍上方寻找蓝色的车 1 True 1 13.8105788230896 12.864884614944458 31733491-2030-43b1-a5e4-eb1300b8d23f 29b5ee20-c809-4511-af08-80a85240c729 2025-12-02 20:45:43 2025-12-03 17:10:10
6 5 飞到学生宿舍上方查找蓝色的车 起飞后移动到学生宿舍上方检测蓝色的车 1 True 1 12.74257755279541 10.438142538070679 4c855ef4-c251-48cd-b464-4816bc62fbb5 5e7eb8c7-287a-469a-b6c0-a4102c1b0dac 2025-12-02 20:45:57 2025-12-03 17:10:21
7 6 飞到学生宿舍上方查找蓝色车辆并进行打击 飞到学生宿舍上方查找蓝色的车 1 True 1 16.117226600646973 11.751057386398315 63d0e7c3-dcbb-40f0-b76b-6f0191c6512f ef3d1981-1d51-433d-b2f4-2e92838075fd 2025-12-02 20:46:14 2025-12-03 17:10:34
8 7 起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击 飞到学生宿舍上方查找蓝色车辆并进行打击 1 True 1 16.25989079475403 32.890604972839355 1b4a537e-c1be-4abf-897e-c21b677b83b7 d8fc4658-08af-4910-89c4-b029c9a5daa0 2025-12-02 20:46:31 2025-12-03 17:11:08
9 8 起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资 起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击 1 True False 1 16.014280796051025 33.2862343788147 f88ea46f-5e0b-48fb-b1da-326d287af3d6 2025-12-02 20:46:48 2025-12-03 17:11:42
10 9 飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆 起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资 1 True 1 15.530286073684692 12.312166213989258 f56c811a-8304-4c68-8260-01643928bf3e 7fbf0091-f7d3-4c3a-a6b7-4c0bfd4df66e 2025-12-02 20:47:05 2025-12-03 17:11:56
11 10 飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆 飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆 1 True 1 16.660754919052124 12.204660892486572 07a13346-3026-4dce-a976-4e0faa132248 3ae0b258-b7e4-460c-9cfe-4b224266edc4 2025-12-02 20:47:23 2025-12-03 17:12:09
12 11 起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资 飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆 1 True 1 14.128317832946777 12.808414936065674 16426d41-4f02-4e27-a05e-f4eb84d6c935 2acb84cf-c89e-460d-a4d9-8d1edb4ee69a 2025-12-02 20:47:38 2025-12-03 17:12:23
13 12 起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资 1 True 1 11.071707487106323 c05d46c9-1b1b-4c8d-b64b-86b76d0c4099 2025-12-03 17:12:35

View File

@@ -1,12 +1,13 @@
instruction_index,instruction,total_runs,successful_runs,success_rate,avg_response_time,min_response_time,max_response_time,total_response_time
1,起飞后移动到学生宿舍上方降落,1,0,0.00%,N/A,N/A,N/A,0.00s
2,起飞后移动到学生宿舍上方查找蓝色的车,1,1,100.00%,14.81s,14.81s,14.81s,14.81s
3,起飞后移动到学生宿舍上方找蓝色的车,1,1,100.00%,15.24s,15.24s,15.24s,15.24s
4,起飞后移动到学生宿舍上方检测蓝色的车,1,1,100.00%,13.81s,13.81s,13.81s,13.81s
5,到学生宿舍上方查找蓝色的车,1,1,100.00%,12.74s,12.74s,12.74s,12.74s
6,飞到学生宿舍上方查找蓝色车辆并进行打击,1,1,100.00%,16.12s,16.12s,16.12s,16.12s
7,起飞后移动到学生宿舍上方搜索蓝色车辆并进行打击,1,1,100.00%,16.26s,16.26s,16.26s,16.26s
8,起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,1,100.00%,16.01s,16.01s,16.01s,16.01s
9,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,1,100.00%,15.53s,15.53s,15.53s,15.53s
10,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,1,100.00%,16.66s,16.66s,16.66s,16.66s
11,飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,1,100.00%,14.13s,14.13s,14.13s,14.13s
1,起飞,1,1,100.00%,2.46s,2.46s,2.46s,2.46s
2,起飞后移动到学生宿舍上方降落,1,1,100.00%,10.02s,10.02s,10.02s,10.02s
3,起飞后移动到学生宿舍上方找蓝色的车,1,1,100.00%,12.42s,12.42s,12.42s,12.42s
4,起飞后移动到学生宿舍上方寻找蓝色的车,1,1,100.00%,12.86s,12.86s,12.86s,12.86s
5,起飞后移动到学生宿舍上方检测蓝色的车,1,1,100.00%,10.44s,10.44s,10.44s,10.44s
6,飞到学生宿舍上方查找蓝色,1,1,100.00%,11.75s,11.75s,11.75s,11.75s
7,到学生宿舍上方查找蓝色车辆并进行打击,1,1,100.00%,32.89s,32.89s,32.89s,32.89s
8,起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击,1,0,0.00%,N/A,N/A,N/A,0.00s
9,飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,1,100.00%,12.31s,12.31s,12.31s,12.31s
10,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,1,100.00%,12.20s,12.20s,12.20s,12.20s
11,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,1,100.00%,12.81s,12.81s,12.81s,12.81s
12,起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,1,100.00%,11.07s,11.07s,11.07s,11.07s
1 instruction_index instruction total_runs successful_runs success_rate avg_response_time min_response_time max_response_time total_response_time
2 1 起飞后移动到学生宿舍上方降落 起飞 1 0 1 0.00% 100.00% N/A 2.46s N/A 2.46s N/A 2.46s 0.00s 2.46s
3 2 起飞后移动到学生宿舍上方查找蓝色的车 起飞后移动到学生宿舍上方降落 1 1 100.00% 14.81s 10.02s 14.81s 10.02s 14.81s 10.02s 14.81s 10.02s
4 3 起飞后移动到学生宿舍上方寻找蓝色的车 起飞后移动到学生宿舍上方查找蓝色的车 1 1 100.00% 15.24s 12.42s 15.24s 12.42s 15.24s 12.42s 15.24s 12.42s
5 4 起飞后移动到学生宿舍上方检测蓝色的车 起飞后移动到学生宿舍上方寻找蓝色的车 1 1 100.00% 13.81s 12.86s 13.81s 12.86s 13.81s 12.86s 13.81s 12.86s
6 5 飞到学生宿舍上方查找蓝色的车 起飞后移动到学生宿舍上方检测蓝色的车 1 1 100.00% 12.74s 10.44s 12.74s 10.44s 12.74s 10.44s 12.74s 10.44s
7 6 飞到学生宿舍上方查找蓝色车辆并进行打击 飞到学生宿舍上方查找蓝色的车 1 1 100.00% 16.12s 11.75s 16.12s 11.75s 16.12s 11.75s 16.12s 11.75s
8 7 起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击 飞到学生宿舍上方查找蓝色车辆并进行打击 1 1 100.00% 16.26s 32.89s 16.26s 32.89s 16.26s 32.89s 16.26s 32.89s
9 8 起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资 起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击 1 1 0 100.00% 0.00% 16.01s N/A 16.01s N/A 16.01s N/A 16.01s 0.00s
10 9 飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆 起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资 1 1 100.00% 15.53s 12.31s 15.53s 12.31s 15.53s 12.31s 15.53s 12.31s
11 10 飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆 飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆 1 1 100.00% 16.66s 12.20s 16.66s 12.20s 16.66s 12.20s 16.66s 12.20s
12 11 起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资 飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆 1 1 100.00% 14.13s 12.81s 14.13s 12.81s 14.13s 12.81s 14.13s 12.81s
13 12 起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资 1 1 100.00% 11.07s 11.07s 11.07s 11.07s

View File

@@ -84,10 +84,15 @@ def send_api_request(prompt, instruction_idx, run_number):
for attempt in range(MAX_RETRIES):
try:
debug_print(f"指令 {instruction_idx}-{run_number} 尝试 {attempt + 1}")
debug_print(f"请求URL: {url}")
debug_print(f"请求Payload: {json.dumps(payload, ensure_ascii=False)}")
start_time = time.time()
response = requests.post(url, data=json.dumps(payload), headers=headers, timeout=60) # 增加超时
response_time = time.time() - start_time
debug_print(f"HTTP状态码: {response.status_code}")
debug_print(f"响应时间: {response_time:.2f}")
# 首先检查HTTP状态
response.raise_for_status()
@@ -98,27 +103,51 @@ def send_api_request(prompt, instruction_idx, run_number):
debug_print(f"JSON解析失败: {e}, 响应文本: {response.text[:200]}")
raise
# 判断是简单模式还是复杂模式
# 简单模式和复杂模式都使用root字段区别是
# - 简单模式root是单个action节点没有children
# - 复杂模式root是控制流节点或有children的节点
root_node = data.get('root', {})
root_type = root_node.get('type', '')
root_has_children = bool(root_node.get('children'))
# 基本验证 - 放宽要求
# 简单模式root是action类型且没有children
is_simple_mode = (root_type == 'action' and not root_has_children)
# 复杂模式有root字段且不是简单模式
is_complex_mode = ("root" in data and not is_simple_mode)
# 基本验证 - 支持简单模式和复杂模式
validation_checks = {
"is_dict": isinstance(data, dict),
"has_root": "root" in data,
"root_has_children": bool(root_node.get('children')),
"has_plan_id": "plan_id" in data,
"has_visualization_url": "visualization_url" in data,
}
# 模式特定的验证
if is_simple_mode:
# 简单模式root必须是action类型且没有children
validation_checks.update({
"root_is_action": root_type == 'action',
"root_no_children": not root_has_children,
"root_has_name": bool(root_node.get('name')),
})
# 简单模式和复杂模式都不应该有mode字段
validation_checks["no_mode_field"] = "mode" not in data
elif is_complex_mode:
# 复杂模式root应该有children控制流节点
validation_checks.update({
"root_has_children": root_has_children,
})
# 复杂模式不应该有mode字段
validation_checks["no_mode_field"] = "mode" not in data
else:
# 既不是简单模式也不是复杂模式,这是错误
validation_checks["valid_mode"] = False
debug_print(f"⚠️ 响应既不是简单模式也不是复杂模式: root_type={root_type}, has_children={root_has_children}")
# 可选的高级验证
advanced_checks = {
"leaf_nodes_valid": check_leaf_nodes(root_node),
"has_safety": check_safety_monitoring(root_node)
}
# 合并验证结果
validation_checks.update(advanced_checks)
# 统计无效节点但不作为失败条件
advanced_checks = {}
invalid_actions = []
invalid_conditions = []
@@ -139,13 +168,41 @@ def send_api_request(prompt, instruction_idx, run_number):
for child in current_node.get('children', []):
collect_nodes(child)
collect_nodes(root_node)
if is_complex_mode:
# 复杂模式的高级验证
advanced_checks = {
"leaf_nodes_valid": check_leaf_nodes(root_node),
"has_safety": check_safety_monitoring(root_node)
}
collect_nodes(root_node)
elif is_simple_mode:
# 简单模式检查action名称是否有效
action_name = root_node.get('name', '')
if action_name and action_name not in ['deliver_payload', 'emergency_return', 'fly_to_waypoint',
'land', 'loiter', 'object_detect', 'preflight_checks',
'search_pattern', 'strike_target', 'battle_damage_assessment', 'takeoff']:
invalid_actions.append(action_name)
# 主要检查基本验证,高级验证作为警告
success = all(validation_checks[k] for k in ["is_dict", "has_root", "root_has_children",
"has_plan_id", "has_visualization_url"])
# 合并验证结果
validation_checks.update(advanced_checks)
debug_print(f"验证结果: 成功={success}, 基本验证通过={all(validation_checks.values())}")
# 根据模式确定成功条件
if is_simple_mode:
# 简单模式:必须有的字段
required_checks = ["is_dict", "has_root", "has_plan_id", "has_visualization_url",
"root_is_action", "root_no_children", "root_has_name", "no_mode_field"]
success = all(validation_checks.get(k, False) for k in required_checks)
elif is_complex_mode:
# 复杂模式:必须有的字段
required_checks = ["is_dict", "has_root", "has_plan_id", "has_visualization_url",
"root_has_children", "no_mode_field"]
success = all(validation_checks.get(k, False) for k in required_checks)
else:
# 无效模式
success = False
mode_type = "简单模式" if is_simple_mode else ("复杂模式" if is_complex_mode else "未知模式")
debug_print(f"验证结果: 模式={mode_type}, 成功={success}, 基本验证通过={all(validation_checks.values())}")
return {
"success": success,
@@ -156,13 +213,22 @@ def send_api_request(prompt, instruction_idx, run_number):
"invalid_conditions": invalid_conditions,
"error": None,
"attempts": attempt + 1,
"http_status": response.status_code
"http_status": response.status_code,
"mode_type": mode_type
}
except requests.exceptions.RequestException as e:
error_msg = f"请求失败: {e}"
http_status = getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
debug_print(f"请求异常: {error_msg}")
debug_print(f"HTTP状态码: {http_status}")
if hasattr(e, 'response') and e.response is not None:
try:
debug_print(f"响应内容: {e.response.text[:500]}")
except:
pass
if attempt < MAX_RETRIES - 1:
debug_print(f"等待 {RETRY_DELAY} 秒后重试...")
time.sleep(RETRY_DELAY)
continue
return {
@@ -174,12 +240,15 @@ def send_api_request(prompt, instruction_idx, run_number):
"invalid_conditions": [],
"error": error_msg,
"attempts": attempt + 1,
"http_status": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
"http_status": http_status,
"mode_type": "未知"
}
except Exception as e:
error_msg = f"未知错误: {e}"
debug_print(f"未知错误: {error_msg}")
import traceback
debug_print(f"错误堆栈: {traceback.format_exc()}")
return {
"success": False,
"data": None,
@@ -189,7 +258,8 @@ def send_api_request(prompt, instruction_idx, run_number):
"invalid_conditions": [],
"error": error_msg,
"attempts": attempt + 1,
"http_status": None
"http_status": None,
"mode_type": "未知"
}
def read_instructions(filename):
@@ -215,6 +285,7 @@ def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
f.write(f"指令 #{instruction_idx} - 运行 #{run_number} - {timestamp}\n")
f.write(f"HTTP状态: {result.get('http_status', 'N/A')}\n")
f.write(f"原始指令: {prompt}\n")
f.write(f"模式类型: {result.get('mode_type', '未知')}\n")
f.write(f"尝试次数: {result['attempts']}\n")
f.write(f"响应时间: {result['response_time']:.2f}\n")
f.write(f"结果: {'✅ 成功' if result['success'] else '❌ 失败'}\n")