去除ROS2相关内容,新增一键启动脚本
This commit is contained in:
286
tools/api_test.log
Normal file
286
tools/api_test.log
Normal file
File diff suppressed because one or more lines are too long
@@ -1,149 +0,0 @@
|
|||||||
{
|
|
||||||
"plan_id": "9f743b03-8ba7-4a06-9260-337463887fc2",
|
|
||||||
"root": {
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"name": "preflight_checks",
|
|
||||||
"params": {
|
|
||||||
"check_level": "comprehensive"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "takeoff",
|
|
||||||
"params": {
|
|
||||||
"altitude": 2
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "fly_to_waypoint",
|
|
||||||
"params": {
|
|
||||||
"acceptance_radius": 2,
|
|
||||||
"x": 5,
|
|
||||||
"y": 3,
|
|
||||||
"z": 2
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"name": "loiter",
|
|
||||||
"params": {
|
|
||||||
"duration": 10
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "object_detect",
|
|
||||||
"params": {
|
|
||||||
"count": 1,
|
|
||||||
"description": "学生",
|
|
||||||
"target_class": "person"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "DirectDetectionSequence",
|
|
||||||
"type": "Sequence"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "search_pattern",
|
|
||||||
"params": {
|
|
||||||
"center_x": 5,
|
|
||||||
"center_y": 3,
|
|
||||||
"center_z": 2,
|
|
||||||
"count": 1,
|
|
||||||
"description": "学生",
|
|
||||||
"pattern_type": "spiral",
|
|
||||||
"radius": 50,
|
|
||||||
"target_class": "person"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "TargetAcquisitionSelector",
|
|
||||||
"params": {
|
|
||||||
"memory": true
|
|
||||||
},
|
|
||||||
"type": "Selector"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "track_object",
|
|
||||||
"params": {
|
|
||||||
"description": "学生",
|
|
||||||
"min_confidence": 0.7,
|
|
||||||
"safe_distance": 10,
|
|
||||||
"target_class": "person",
|
|
||||||
"track_time": 20
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "land",
|
|
||||||
"params": {
|
|
||||||
"mode": "home"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "MainTrackingTask",
|
|
||||||
"type": "Sequence"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"name": "battery_above",
|
|
||||||
"params": {
|
|
||||||
"threshold": 0.35
|
|
||||||
},
|
|
||||||
"type": "condition"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "gps_status",
|
|
||||||
"params": {
|
|
||||||
"min_satellites": 8
|
|
||||||
},
|
|
||||||
"type": "condition"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"children": [
|
|
||||||
{
|
|
||||||
"name": "emergency_return",
|
|
||||||
"params": {
|
|
||||||
"reason": "safety_breach"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "land",
|
|
||||||
"params": {
|
|
||||||
"mode": "home"
|
|
||||||
},
|
|
||||||
"type": "action"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "EmergencyHandler",
|
|
||||||
"type": "Sequence"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "SafetyMonitor",
|
|
||||||
"params": {
|
|
||||||
"memory": true
|
|
||||||
},
|
|
||||||
"type": "Selector"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"name": "TrackingMission",
|
|
||||||
"params": {
|
|
||||||
"policy": "all_success"
|
|
||||||
},
|
|
||||||
"type": "Parallel"
|
|
||||||
},
|
|
||||||
"visualization_url": "/static/py_tree.png"
|
|
||||||
}
|
|
||||||
@@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
# The base URL of your running FastAPI service
|
# The base URL of your running FastAPI service
|
||||||
@@ -14,17 +16,49 @@ ENDPOINT = "/generate_plan"
|
|||||||
# The user prompt we will send for the test
|
# The user prompt we will send for the test
|
||||||
TEST_PROMPT = "已知目标检测红色气球危险性高于蓝色气球高于绿色气球,飞往搜索区搜索并锁定危险性最高的气球,对其跟踪30秒后进行打击操作"
|
TEST_PROMPT = "已知目标检测红色气球危险性高于蓝色气球高于绿色气球,飞往搜索区搜索并锁定危险性最高的气球,对其跟踪30秒后进行打击操作"
|
||||||
|
|
||||||
|
# Log file path (will be created in the same directory as this script)
|
||||||
|
LOG_FILE = os.path.join(os.path.dirname(__file__), "api_test.log")
|
||||||
|
|
||||||
|
def write_log(message, print_to_console=True):
|
||||||
|
"""
|
||||||
|
Write a message to the log file in append mode.
|
||||||
|
Supports multi-line messages - only the first line gets timestamp.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: The message to write (can be multi-line)
|
||||||
|
print_to_console: Whether to also print to console (default: True)
|
||||||
|
"""
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
# Split message into lines and add timestamp to first line only
|
||||||
|
lines = message.split('\n')
|
||||||
|
log_entries = [f"[{timestamp}] {lines[0]}\n"]
|
||||||
|
for line in lines[1:]:
|
||||||
|
log_entries.append(f"{' ' * (len(timestamp) + 3)}{line}\n") # Indent continuation lines
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
||||||
|
f.writelines(log_entries)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Warning: Failed to write to log file: {e}")
|
||||||
|
|
||||||
|
if print_to_console:
|
||||||
|
print(message)
|
||||||
|
|
||||||
def test_generate_plan():
|
def test_generate_plan():
|
||||||
"""
|
"""
|
||||||
Sends a request to the /generate_plan endpoint and validates the response.
|
Sends a request to the /generate_plan endpoint and validates the response.
|
||||||
|
All results are logged to the log file for continuous tracking.
|
||||||
"""
|
"""
|
||||||
url = BASE_URL + ENDPOINT
|
url = BASE_URL + ENDPOINT
|
||||||
payload = {"user_prompt": TEST_PROMPT}
|
payload = {"user_prompt": TEST_PROMPT}
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
print("--- API Test: Generate Plan ---")
|
# Write separator and test start info to log
|
||||||
print(f"✅ URL: {url}")
|
write_log("=" * 80, print_to_console=False)
|
||||||
print(f"✅ Sending Prompt: \"{TEST_PROMPT}\"")
|
write_log("--- API Test: Generate Plan ---")
|
||||||
|
write_log(f"URL: {url}")
|
||||||
|
write_log(f"Sending Prompt: \"{TEST_PROMPT}\"")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Send the POST request
|
# Send the POST request
|
||||||
@@ -36,40 +70,85 @@ def test_generate_plan():
|
|||||||
# Parse the JSON response
|
# Parse the JSON response
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
print("✅ Received Response:")
|
# Extract and log organized prompt if available in response
|
||||||
print(json.dumps(data, indent=2, ensure_ascii=False))
|
organized_prompt = None
|
||||||
|
if isinstance(data, dict):
|
||||||
|
# Check for various possible field names for organized prompt
|
||||||
|
organized_prompt = data.get("organized_prompt") or \
|
||||||
|
data.get("processed_prompt") or \
|
||||||
|
data.get("final_prompt") or \
|
||||||
|
data.get("enhanced_prompt") or \
|
||||||
|
data.get("user_prompt_enhanced")
|
||||||
|
|
||||||
|
write_log("✅ Received Response:")
|
||||||
|
|
||||||
|
# Log organized prompt if found
|
||||||
|
if organized_prompt:
|
||||||
|
write_log("\n📝 组织后的Prompt:")
|
||||||
|
write_log(organized_prompt)
|
||||||
|
else:
|
||||||
|
# If not in response, log the original prompt for reference
|
||||||
|
write_log("\n📝 原始Prompt:")
|
||||||
|
write_log(f" {TEST_PROMPT}")
|
||||||
|
write_log(" (注: 组织后的prompt未在API响应中返回,如需查看请检查后端日志)")
|
||||||
|
|
||||||
|
response_json = json.dumps(data, indent=2, ensure_ascii=False)
|
||||||
|
write_log("\n完整响应内容:")
|
||||||
|
write_log(response_json)
|
||||||
|
|
||||||
# --- Validation ---
|
# --- Validation ---
|
||||||
print("\n--- Validation Checks ---")
|
write_log("\n--- Validation Checks ---")
|
||||||
|
|
||||||
|
validation_results = []
|
||||||
|
|
||||||
# 1. Check if the response is a dictionary
|
# 1. Check if the response is a dictionary
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
print("PASS: Response is a valid JSON object.")
|
validation_results.append("PASS: Response is a valid JSON object.")
|
||||||
else:
|
else:
|
||||||
print("FAIL: Response is not a valid JSON object.")
|
validation_results.append("FAIL: Response is not a valid JSON object.")
|
||||||
|
# Write all validation results to log before returning
|
||||||
|
for result in validation_results:
|
||||||
|
write_log(result)
|
||||||
|
write_log("=" * 80, print_to_console=False)
|
||||||
|
write_log("", print_to_console=False) # Empty line for readability
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. Check for the existence of the 'root' key
|
# 2. Check for the existence of the 'root' key
|
||||||
if "root" in data and isinstance(data['root'], dict):
|
if "root" in data and isinstance(data['root'], dict):
|
||||||
print("PASS: Response contains a valid 'root' key.")
|
validation_results.append("PASS: Response contains a valid 'root' key.")
|
||||||
else:
|
else:
|
||||||
print("FAIL: Response does not contain a valid 'root' key.")
|
validation_results.append("FAIL: Response does not contain a valid 'root' key.")
|
||||||
|
|
||||||
# 3. Check for the existence and format of the 'visualization_url' key
|
# 3. Check for the existence and format of the 'visualization_url' key
|
||||||
if "visualization_url" in data and data["visualization_url"].endswith(".png"):
|
if "visualization_url" in data and data["visualization_url"].endswith(".png"):
|
||||||
print(f"PASS: Response contains a valid 'visualization_url': {data['visualization_url']}")
|
validation_results.append(f"PASS: Response contains a valid 'visualization_url': {data['visualization_url']}")
|
||||||
else:
|
else:
|
||||||
print("FAIL: Response does not contain a valid 'visualization_url'.")
|
validation_results.append("FAIL: Response does not contain a valid 'visualization_url'.")
|
||||||
|
|
||||||
|
# Write all validation results to log
|
||||||
|
for result in validation_results:
|
||||||
|
write_log(result)
|
||||||
|
|
||||||
|
# Write test completion marker
|
||||||
|
write_log("✅ Test completed successfully")
|
||||||
|
write_log("=" * 80, print_to_console=False)
|
||||||
|
write_log("", print_to_console=False) # Empty line for readability
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
print(f"\n❌ TEST FAILED: Could not connect to the server.")
|
error_msg = f"❌ TEST FAILED: Could not connect to the server.\n Please make sure the backend service is running.\n Error details: {e}"
|
||||||
print(" Please make sure the backend service is running.")
|
write_log(error_msg)
|
||||||
print(f" Error details: {e}")
|
write_log("=" * 80, print_to_console=False)
|
||||||
|
write_log("", print_to_console=False) # Empty line for readability
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print(f"\n❌ TEST FAILED: The server response was not valid JSON.")
|
error_msg = f"❌ TEST FAILED: The server response was not valid JSON.\n Response text: {response.text}"
|
||||||
print(f" Response text: {response.text}")
|
write_log(error_msg)
|
||||||
|
write_log("=" * 80, print_to_console=False)
|
||||||
|
write_log("", print_to_console=False) # Empty line for readability
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n❌ TEST FAILED: An unexpected error occurred: {e}")
|
error_msg = f"❌ TEST FAILED: An unexpected error occurred: {e}"
|
||||||
|
write_log(error_msg)
|
||||||
|
write_log("=" * 80, print_to_console=False)
|
||||||
|
write_log("", print_to_console=False) # Empty line for readability
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_generate_plan()
|
test_generate_plan()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1 +1,12 @@
|
|||||||
instruction_index,instruction,run_number,success,attempts,response_time,plan_id,error,timestamp
|
instruction_index,instruction,run_number,success,attempts,response_time,plan_id,error,timestamp
|
||||||
|
1,起飞后移动到学生宿舍上方降落,1,False,1,2.048215866088867,,,2025-12-02 20:44:56
|
||||||
|
2,起飞后移动到学生宿舍上方查找蓝色的车,1,True,1,14.806509971618652,8a6f282e-c306-4249-962c-d47d48c31bad,,2025-12-02 20:45:12
|
||||||
|
3,起飞后移动到学生宿舍上方寻找蓝色的车,1,True,1,15.240672826766968,f298e2f4-9295-4ffd-8fff-0d0eb9a0ee6c,,2025-12-02 20:45:28
|
||||||
|
4,起飞后移动到学生宿舍上方检测蓝色的车,1,True,1,13.8105788230896,31733491-2030-43b1-a5e4-eb1300b8d23f,,2025-12-02 20:45:43
|
||||||
|
5,飞到学生宿舍上方查找蓝色的车,1,True,1,12.74257755279541,4c855ef4-c251-48cd-b464-4816bc62fbb5,,2025-12-02 20:45:57
|
||||||
|
6,飞到学生宿舍上方查找蓝色车辆并进行打击,1,True,1,16.117226600646973,63d0e7c3-dcbb-40f0-b76b-6f0191c6512f,,2025-12-02 20:46:14
|
||||||
|
7,起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击,1,True,1,16.25989079475403,1b4a537e-c1be-4abf-897e-c21b677b83b7,,2025-12-02 20:46:31
|
||||||
|
8,起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,True,1,16.014280796051025,f88ea46f-5e0b-48fb-b1da-326d287af3d6,,2025-12-02 20:46:48
|
||||||
|
9,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,True,1,15.530286073684692,f56c811a-8304-4c68-8260-01643928bf3e,,2025-12-02 20:47:05
|
||||||
|
10,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,True,1,16.660754919052124,07a13346-3026-4dce-a976-4e0faa132248,,2025-12-02 20:47:23
|
||||||
|
11,起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,True,1,14.128317832946777,16426d41-4f02-4e27-a05e-f4eb84d6c935,,2025-12-02 20:47:38
|
||||||
|
|||||||
|
@@ -1,12 +1,12 @@
|
|||||||
instruction_index,instruction,total_runs,successful_runs,success_rate,avg_response_time,min_response_time,max_response_time,total_response_time
|
instruction_index,instruction,total_runs,successful_runs,success_rate,avg_response_time,min_response_time,max_response_time,total_response_time
|
||||||
1,起飞后移动到学生宿舍上方降落,10,10,100.00%,7.91s,7.73s,8.96s,79.07s
|
1,起飞后移动到学生宿舍上方降落,1,0,0.00%,N/A,N/A,N/A,0.00s
|
||||||
2,起飞后移动到学生宿舍上方查找蓝色的车,10,9,90.00%,14.18s,9.40s,25.44s,127.59s
|
2,起飞后移动到学生宿舍上方查找蓝色的车,1,1,100.00%,14.81s,14.81s,14.81s,14.81s
|
||||||
3,起飞后移动到学生宿舍上方寻找蓝色的车,10,10,100.00%,11.95s,7.00s,13.08s,119.49s
|
3,起飞后移动到学生宿舍上方寻找蓝色的车,1,1,100.00%,15.24s,15.24s,15.24s,15.24s
|
||||||
4,起飞后移动到学生宿舍上方检测蓝色的车,10,10,100.00%,12.21s,9.09s,13.09s,122.08s
|
4,起飞后移动到学生宿舍上方检测蓝色的车,1,1,100.00%,13.81s,13.81s,13.81s,13.81s
|
||||||
5,飞到学生宿舍上方查找蓝色的车,10,10,100.00%,12.99s,12.79s,13.07s,129.90s
|
5,飞到学生宿舍上方查找蓝色的车,1,1,100.00%,12.74s,12.74s,12.74s,12.74s
|
||||||
6,飞到学生宿舍上方查找蓝色车辆并进行打击,10,10,100.00%,19.17s,19.08s,19.38s,191.70s
|
6,飞到学生宿舍上方查找蓝色车辆并进行打击,1,1,100.00%,16.12s,16.12s,16.12s,16.12s
|
||||||
7,起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击,10,10,100.00%,19.18s,19.07s,19.25s,191.85s
|
7,起飞后移动到学生宿舍上方搜索蓝色车辆,并进行打击,1,1,100.00%,16.26s,16.26s,16.26s,16.26s
|
||||||
8,起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,10,10,100.00%,15.65s,14.89s,15.91s,156.47s
|
8,起飞到学生宿舍上方搜索被困人员,并为被困人员投递救援物资,1,1,100.00%,16.01s,16.01s,16.01s,16.01s
|
||||||
9,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,10,10,100.00%,14.04s,13.97s,14.15s,140.40s
|
9,飞到学生宿舍上方搜索方圆10米范围内的蓝色车辆,1,1,100.00%,15.53s,15.53s,15.53s,15.53s
|
||||||
10,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,10,10,100.00%,11.64s,11.29s,14.16s,116.39s
|
10,飞到学生宿舍上方搜索半径为10米区域范围内的蓝色车辆,1,1,100.00%,16.66s,16.66s,16.66s,16.66s
|
||||||
11,起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,10,10,100.00%,16.02s,14.96s,24.95s,160.19s
|
11,起飞到学生宿舍搜索有没有被困人员,然后抛洒救援物资,1,1,100.00%,14.13s,14.13s,14.13s,14.13s
|
||||||
|
|||||||
|
@@ -17,7 +17,7 @@ SUMMARY_CSV = "test_summary.csv"
|
|||||||
LOG_FILE = "api_test_log.txt"
|
LOG_FILE = "api_test_log.txt"
|
||||||
|
|
||||||
# 测试参数
|
# 测试参数
|
||||||
TESTS_PER_INSTRUCTION = 10
|
TESTS_PER_INSTRUCTION = 1
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
RETRY_DELAY = 2
|
RETRY_DELAY = 2
|
||||||
|
|
||||||
@@ -207,20 +207,38 @@ def read_instructions(filename):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
|
def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
|
||||||
"""写入详细日志"""
|
"""写入详细日志,包含完整的API响应"""
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
with open(log_file, 'a', encoding='utf-8') as f:
|
with open(log_file, 'a', encoding='utf-8') as f:
|
||||||
f.write(f"\n{'='*80}\n")
|
f.write(f"\n{'='*80}\n")
|
||||||
f.write(f"指令 #{instruction_idx} - 运行 #{run_number} - {timestamp}\n")
|
f.write(f"指令 #{instruction_idx} - 运行 #{run_number} - {timestamp}\n")
|
||||||
f.write(f"HTTP状态: {result.get('http_status', 'N/A')}\n")
|
f.write(f"HTTP状态: {result.get('http_status', 'N/A')}\n")
|
||||||
f.write(f"指令: {prompt}\n")
|
f.write(f"原始指令: {prompt}\n")
|
||||||
f.write(f"尝试次数: {result['attempts']}\n")
|
f.write(f"尝试次数: {result['attempts']}\n")
|
||||||
f.write(f"响应时间: {result['response_time']:.2f}秒\n")
|
f.write(f"响应时间: {result['response_time']:.2f}秒\n")
|
||||||
f.write(f"结果: {'✅ 成功' if result['success'] else '❌ 失败'}\n")
|
f.write(f"结果: {'✅ 成功' if result['success'] else '❌ 失败'}\n")
|
||||||
|
|
||||||
if result['success']:
|
if result['success'] and result.get('data'):
|
||||||
f.write("验证结果:\n")
|
data = result['data']
|
||||||
|
|
||||||
|
# 提取并记录组织后的prompt(如果存在)
|
||||||
|
organized_prompt = None
|
||||||
|
if isinstance(data, dict):
|
||||||
|
organized_prompt = data.get("organized_prompt") or \
|
||||||
|
data.get("processed_prompt") or \
|
||||||
|
data.get("final_prompt") or \
|
||||||
|
data.get("enhanced_prompt") or \
|
||||||
|
data.get("user_prompt_enhanced")
|
||||||
|
|
||||||
|
if organized_prompt:
|
||||||
|
f.write(f"\n📝 组织后的Prompt:\n")
|
||||||
|
f.write(f"{organized_prompt}\n")
|
||||||
|
else:
|
||||||
|
f.write(f"\n📝 组织后的Prompt: (未在响应中返回)\n")
|
||||||
|
|
||||||
|
# 记录验证结果
|
||||||
|
f.write("\n验证结果:\n")
|
||||||
for check_name, check_result in result['validation_checks'].items():
|
for check_name, check_result in result['validation_checks'].items():
|
||||||
f.write(f" {check_name}: {'✅' if check_result else '❌'}\n")
|
f.write(f" {check_name}: {'✅' if check_result else '❌'}\n")
|
||||||
|
|
||||||
@@ -229,8 +247,27 @@ def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
|
|||||||
|
|
||||||
if result['invalid_conditions']:
|
if result['invalid_conditions']:
|
||||||
f.write(f"⚠️ 无效条件节点: {result['invalid_conditions']}\n")
|
f.write(f"⚠️ 无效条件节点: {result['invalid_conditions']}\n")
|
||||||
|
|
||||||
|
# 记录完整的API响应
|
||||||
|
f.write(f"\n完整API响应:\n")
|
||||||
|
try:
|
||||||
|
response_json = json.dumps(data, indent=2, ensure_ascii=False)
|
||||||
|
f.write(response_json)
|
||||||
|
f.write("\n")
|
||||||
|
except Exception as e:
|
||||||
|
f.write(f"⚠️ 无法序列化响应数据: {e}\n")
|
||||||
|
f.write(f"原始数据: {str(data)}\n")
|
||||||
else:
|
else:
|
||||||
f.write(f"错误信息: {result['error']}\n")
|
f.write(f"错误信息: {result['error']}\n")
|
||||||
|
# 即使失败也尝试记录响应数据(如果有)
|
||||||
|
if result.get('data'):
|
||||||
|
f.write(f"\n部分响应数据:\n")
|
||||||
|
try:
|
||||||
|
response_json = json.dumps(result['data'], indent=2, ensure_ascii=False)
|
||||||
|
f.write(response_json)
|
||||||
|
f.write("\n")
|
||||||
|
except Exception:
|
||||||
|
f.write(f"原始数据: {str(result['data'])}\n")
|
||||||
|
|
||||||
def generate_summary_report(instructions, results_summary):
|
def generate_summary_report(instructions, results_summary):
|
||||||
"""
|
"""
|
||||||
@@ -311,6 +348,10 @@ def main():
|
|||||||
write_log_entry(LOG_FILE, instruction_idx, run_number, prompt, result)
|
write_log_entry(LOG_FILE, instruction_idx, run_number, prompt, result)
|
||||||
|
|
||||||
# 记录结果
|
# 记录结果
|
||||||
|
plan_id = ""
|
||||||
|
if result.get("success") and result.get("data") and isinstance(result["data"], dict):
|
||||||
|
plan_id = result["data"].get("plan_id", "")
|
||||||
|
|
||||||
detailed_result = {
|
detailed_result = {
|
||||||
"instruction_index": instruction_idx,
|
"instruction_index": instruction_idx,
|
||||||
"instruction": prompt,
|
"instruction": prompt,
|
||||||
@@ -318,7 +359,7 @@ def main():
|
|||||||
"success": result["success"],
|
"success": result["success"],
|
||||||
"attempts": result["attempts"],
|
"attempts": result["attempts"],
|
||||||
"response_time": result["response_time"],
|
"response_time": result["response_time"],
|
||||||
"http_status": result.get("http_status"),
|
"plan_id": plan_id,
|
||||||
"error": result["error"] or "",
|
"error": result["error"] or "",
|
||||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
}
|
}
|
||||||
@@ -363,6 +404,10 @@ def main():
|
|||||||
# 生成统计摘要
|
# 生成统计摘要
|
||||||
generate_summary_report(instructions, results_summary)
|
generate_summary_report(instructions, results_summary)
|
||||||
|
|
||||||
|
# 计算总统计
|
||||||
|
total_tests = len(instructions) * TESTS_PER_INSTRUCTION
|
||||||
|
total_successful = sum(summary['success_count'] for summary in results_summary)
|
||||||
|
|
||||||
# 打印最终统计
|
# 打印最终统计
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
print("📈 最终测试统计")
|
print("📈 最终测试统计")
|
||||||
@@ -370,7 +415,10 @@ def main():
|
|||||||
print(f"总测试次数: {total_tests}")
|
print(f"总测试次数: {total_tests}")
|
||||||
print(f"成功次数: {total_successful}")
|
print(f"成功次数: {total_successful}")
|
||||||
print(f"失败次数: {total_tests - total_successful}")
|
print(f"失败次数: {total_tests - total_successful}")
|
||||||
print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
|
if total_tests > 0:
|
||||||
|
print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
|
||||||
|
else:
|
||||||
|
print(f"总成功率: N/A")
|
||||||
|
|
||||||
# 打印每个指令的统计
|
# 打印每个指令的统计
|
||||||
print(f"\n📋 每个指令的统计:")
|
print(f"\n📋 每个指令的统计:")
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user