修改简单模式验证

2025-12-03 17:13:47 +08:00
parent c4f851d387
commit 43a0636913
5 changed files with 3074 additions and 41 deletions
--- a/tools/test_validate/test_validity.py
+++ b/tools/test_validate/test_validity.py
@@ -84,10 +84,15 @@ def send_api_request(prompt, instruction_idx, run_number):
    for attempt in range(MAX_RETRIES):
        try:
            debug_print(f"指令 {instruction_idx}-{run_number} 尝试 {attempt + 1}")
+            debug_print(f"请求URL: {url}")
+            debug_print(f"请求Payload: {json.dumps(payload, ensure_ascii=False)}")
            start_time = time.time()
            response = requests.post(url, data=json.dumps(payload), headers=headers, timeout=60)  # 增加超时
            response_time = time.time() - start_time
            
+            debug_print(f"HTTP状态码: {response.status_code}")
+            debug_print(f"响应时间: {response_time:.2f}秒")
+            
            # 首先检查HTTP状态
            response.raise_for_status()
            
@@ -98,27 +103,51 @@ def send_api_request(prompt, instruction_idx, run_number):
                debug_print(f"JSON解析失败: {e}, 响应文本: {response.text[:200]}")
                raise
            
+            # 判断是简单模式还是复杂模式
+            # 简单模式和复杂模式都使用root字段，区别是：
+            # - 简单模式：root是单个action节点，没有children
+            # - 复杂模式：root是控制流节点或有children的节点
            root_node = data.get('root', {})
+            root_type = root_node.get('type', '')
+            root_has_children = bool(root_node.get('children'))
            
-            # 基本验证 - 放宽要求
+            # 简单模式：root是action类型且没有children
+            is_simple_mode = (root_type == 'action' and not root_has_children)
+            # 复杂模式：有root字段且不是简单模式
+            is_complex_mode = ("root" in data and not is_simple_mode)
+            
+            # 基本验证 - 支持简单模式和复杂模式
            validation_checks = {
                "is_dict": isinstance(data, dict),
                "has_root": "root" in data,
-                "root_has_children": bool(root_node.get('children')),
                "has_plan_id": "plan_id" in data,
                "has_visualization_url": "visualization_url" in data,
            }
            
+            # 模式特定的验证
+            if is_simple_mode:
+                # 简单模式：root必须是action类型，且没有children
+                validation_checks.update({
+                    "root_is_action": root_type == 'action',
+                    "root_no_children": not root_has_children,
+                    "root_has_name": bool(root_node.get('name')),
+                })
+                # 简单模式和复杂模式都不应该有mode字段
+                validation_checks["no_mode_field"] = "mode" not in data
+            elif is_complex_mode:
+                # 复杂模式：root应该有children（控制流节点）
+                validation_checks.update({
+                    "root_has_children": root_has_children,
+                })
+                # 复杂模式不应该有mode字段
+                validation_checks["no_mode_field"] = "mode" not in data
+            else:
+                # 既不是简单模式也不是复杂模式，这是错误
+                validation_checks["valid_mode"] = False
+                debug_print(f"⚠️  响应既不是简单模式也不是复杂模式: root_type={root_type}, has_children={root_has_children}")
+            
            # 可选的高级验证
-            advanced_checks = {
-                "leaf_nodes_valid": check_leaf_nodes(root_node),
-                "has_safety": check_safety_monitoring(root_node)
-            }
-            
-            # 合并验证结果
-            validation_checks.update(advanced_checks)
-            
-            # 统计无效节点但不作为失败条件
+            advanced_checks = {}
            invalid_actions = []
            invalid_conditions = []
            
@@ -139,13 +168,41 @@ def send_api_request(prompt, instruction_idx, run_number):
                for child in current_node.get('children', []):
                    collect_nodes(child)
            
-            collect_nodes(root_node)
+            if is_complex_mode:
+                # 复杂模式的高级验证
+                advanced_checks = {
+                    "leaf_nodes_valid": check_leaf_nodes(root_node),
+                    "has_safety": check_safety_monitoring(root_node)
+                }
+                collect_nodes(root_node)
+            elif is_simple_mode:
+                # 简单模式：检查action名称是否有效
+                action_name = root_node.get('name', '')
+                if action_name and action_name not in ['deliver_payload', 'emergency_return', 'fly_to_waypoint', 
+                                                       'land', 'loiter', 'object_detect', 'preflight_checks', 
+                                                       'search_pattern', 'strike_target', 'battle_damage_assessment', 'takeoff']:
+                    invalid_actions.append(action_name)
            
-            # 主要检查基本验证，高级验证作为警告
-            success = all(validation_checks[k] for k in ["is_dict", "has_root", "root_has_children", 
-                                                       "has_plan_id", "has_visualization_url"])
+            # 合并验证结果
+            validation_checks.update(advanced_checks)
            
-            debug_print(f"验证结果: 成功={success}, 基本验证通过={all(validation_checks.values())}")
+            # 根据模式确定成功条件
+            if is_simple_mode:
+                # 简单模式：必须有的字段
+                required_checks = ["is_dict", "has_root", "has_plan_id", "has_visualization_url", 
+                                 "root_is_action", "root_no_children", "root_has_name", "no_mode_field"]
+                success = all(validation_checks.get(k, False) for k in required_checks)
+            elif is_complex_mode:
+                # 复杂模式：必须有的字段
+                required_checks = ["is_dict", "has_root", "has_plan_id", "has_visualization_url", 
+                                 "root_has_children", "no_mode_field"]
+                success = all(validation_checks.get(k, False) for k in required_checks)
+            else:
+                # 无效模式
+                success = False
+            
+            mode_type = "简单模式" if is_simple_mode else ("复杂模式" if is_complex_mode else "未知模式")
+            debug_print(f"验证结果: 模式={mode_type}, 成功={success}, 基本验证通过={all(validation_checks.values())}")
            
            return {
                "success": success,
@@ -156,13 +213,22 @@ def send_api_request(prompt, instruction_idx, run_number):
                "invalid_conditions": invalid_conditions,
                "error": None,
                "attempts": attempt + 1,
-                "http_status": response.status_code
+                "http_status": response.status_code,
+                "mode_type": mode_type
            }
            
        except requests.exceptions.RequestException as e:
            error_msg = f"请求失败: {e}"
+            http_status = getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
            debug_print(f"请求异常: {error_msg}")
+            debug_print(f"HTTP状态码: {http_status}")
+            if hasattr(e, 'response') and e.response is not None:
+                try:
+                    debug_print(f"响应内容: {e.response.text[:500]}")
+                except:
+                    pass
            if attempt < MAX_RETRIES - 1:
+                debug_print(f"等待 {RETRY_DELAY} 秒后重试...")
                time.sleep(RETRY_DELAY)
                continue
            return {
@@ -174,12 +240,15 @@ def send_api_request(prompt, instruction_idx, run_number):
                "invalid_conditions": [],
                "error": error_msg,
                "attempts": attempt + 1,
-                "http_status": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
+                "http_status": http_status,
+                "mode_type": "未知"
            }
            
        except Exception as e:
            error_msg = f"未知错误: {e}"
            debug_print(f"未知错误: {error_msg}")
+            import traceback
+            debug_print(f"错误堆栈: {traceback.format_exc()}")
            return {
                "success": False,
                "data": None,
@@ -189,7 +258,8 @@ def send_api_request(prompt, instruction_idx, run_number):
                "invalid_conditions": [],
                "error": error_msg,
                "attempts": attempt + 1,
-                "http_status": None
+                "http_status": None,
+                "mode_type": "未知"
            }

 def read_instructions(filename):
@@ -215,6 +285,7 @@ def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
        f.write(f"指令 #{instruction_idx} - 运行 #{run_number} - {timestamp}\n")
        f.write(f"HTTP状态: {result.get('http_status', 'N/A')}\n")
        f.write(f"原始指令: {prompt}\n")
+        f.write(f"模式类型: {result.get('mode_type', '未知')}\n")
        f.write(f"尝试次数: {result['attempts']}\n")
        f.write(f"响应时间: {result['response_time']:.2f}秒\n")
        f.write(f"结果: {'✅ 成功' if result['success'] else '❌ 失败'}\n")