更新vllm后端的使用方法

2025-10-13 11:54:02 +08:00
parent 00cde0d2dc
commit b91291b734
109 changed files with 7953 additions and 737 deletions
--- a/tools/test_validate/test_validity.py
+++ b/tools/test_validate/test_validity.py
@@ -232,12 +232,12 @@ def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
        else:
            f.write(f"错误信息: {result['error']}\n")

-def generate_summary_report(instructions, results_summary):
+def generate_summary_report(instructions, results_summary, summary_csv_path):
    """
-    生成统计摘要报告（修复除零错误）
+    生成统计摘要报告
    """
    try:
-        with open(SUMMARY_CSV, 'w', newline='', encoding='utf-8') as csvfile:
+        with open(summary_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['instruction_index', 'instruction', 'total_runs', 'successful_runs', 
                         'success_rate', 'avg_response_time', 'min_response_time', 
                         'max_response_time', 'total_response_time']
@@ -249,7 +249,6 @@ def generate_summary_report(instructions, results_summary):
                summary = results_summary[i]
                success_count = summary['success_count']
                
-                # 防止除零错误
                avg_time = "N/A"
                min_time = "N/A"
                max_time = "N/A"
@@ -271,14 +270,25 @@ def generate_summary_report(instructions, results_summary):
                    'total_response_time': f"{summary['total_response_time']:.2f}s"
                })
        
-        print(f"📊 统计摘要已保存至: {SUMMARY_CSV}")
+        print(f"📊 统计摘要已保存至: {summary_csv_path}")
    except Exception as e:
        print(f"❌ 保存统计摘要时出错: {e}")

 def main():
    """主测试函数"""
+    # 创建带时间戳的结果目录
+    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+    RESULTS_DIR = f"test_run_{timestamp_str}"
+    os.makedirs(RESULTS_DIR, exist_ok=True)
+    
+    # 定义顶层结果文件的完整路径
+    log_file_path = os.path.join(RESULTS_DIR, "api_test_log.txt")
+    results_csv_path = os.path.join(RESULTS_DIR, "test_results.csv")
+    summary_csv_path = os.path.join(RESULTS_DIR, "test_summary.csv")
+
    print("🚀 开始批量API测试")
    print(f"每个指令测试 {TESTS_PER_INSTRUCTION} 次")
+    print(f"所有结果将保存在: {RESULTS_DIR}")
    
    instructions = read_instructions(INSTRUCTIONS_FILE)
    if not instructions:
@@ -304,11 +314,17 @@ def main():
        print(f"指令: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
        print(f"{'='*60}")
        
+        # 为当前指令创建子目录
+        instruction_dir = os.path.join(RESULTS_DIR, f"instruction_{instruction_idx}")
+        os.makedirs(instruction_dir, exist_ok=True)
+
        for run_number in range(1, TESTS_PER_INSTRUCTION + 1):
            print(f"  运行 {run_number}/{TESTS_PER_INSTRUCTION}...", end=" ", flush=True)
            
            result = send_api_request(prompt, instruction_idx, run_number)
-            write_log_entry(LOG_FILE, instruction_idx, run_number, prompt, result)
+            write_log_entry(log_file_path, instruction_idx, run_number, prompt, result)
+            
+            plan_id = result.get('data', {}).get('plan_id') if result.get('data') else None
            
            # 记录结果
            detailed_result = {
@@ -318,6 +334,7 @@ def main():
                "success": result["success"],
                "attempts": result["attempts"],
                "response_time": result["response_time"],
+                "plan_id": plan_id,
                "http_status": result.get("http_status"),
                "error": result["error"] or "",
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -335,44 +352,73 @@ def main():
                results_summary[idx]['max_response_time'] = max(
                    results_summary[idx]['max_response_time'], result['response_time']
                )
+                
+                # 保存JSON和可视化图片
+                plan_id_str = plan_id or 'unknown_plan'
+                
+                # 1. 保存JSON
+                json_filename = os.path.join(instruction_dir, f"run_{run_number}_{plan_id_str}.json")
+                try:
+                    with open(json_filename, 'w', encoding='utf-8') as f:
+                        json.dump(result['data'], f, indent=4, ensure_ascii=False)
+                except Exception as e:
+                    print(f"  ⚠️  保存JSON失败: {e}")
+
+                # 2. 下载并保存图片
+                viz_url = result['data'].get('visualization_url')
+                if viz_url:
+                    img_filename = os.path.join(instruction_dir, f"run_{run_number}_{plan_id_str}.png")
+                    try:
+                        if viz_url.startswith('/'):
+                            viz_url = BASE_URL + viz_url
+                        
+                        img_response = requests.get(viz_url)
+                        img_response.raise_for_status()
+                        with open(img_filename, 'wb') as f:
+                            f.write(img_response.content)
+                    except requests.exceptions.RequestException as e:
+                        print(f"  ⚠️  下载图片失败: {e}")
+
                print(f"✅ 成功 ({result['response_time']:.1f}s)")
            else:
                print(f"❌ 失败 (HTTP: {result.get('http_status', 'N/A')})")
            
-            # 记录HTTP状态
            if 'http_status' in result:
                results_summary[idx]['http_statuses'].append(result['http_status'])
            
-            time.sleep(1)  # 避免服务器过载
+            time.sleep(1)
    
    # 生成详细结果CSV
    try:
-        with open(RESULTS_CSV, 'w', newline='', encoding='utf-8') as csvfile:
+        with open(results_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['instruction_index', 'instruction', 'run_number', 'success', 
                         'attempts', 'response_time', 'plan_id', 'error', 'timestamp']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            
            writer.writeheader()
-            for result in detailed_results:
-                writer.writerow(result)
+            for res in detailed_results:
+                writer.writerow(res)
        
-        print(f"\n📊 详细结果已保存至: {RESULTS_CSV}")
+        print(f"\n📊 详细结果已保存至: {results_csv_path}")
    except Exception as e:
        print(f"❌ 保存详细结果时出错: {e}")
    
    # 生成统计摘要
-    generate_summary_report(instructions, results_summary)
+    generate_summary_report(instructions, results_summary, summary_csv_path)
+    
+    # 计算并打印最终统计
+    total_tests = len(instructions) * TESTS_PER_INSTRUCTION
+    total_successful = sum(s['success_count'] for s in results_summary)
    
-    # 打印最终统计
    print(f"\n{'='*60}")
    print("📈 最终测试统计")
    print(f"{'='*60}")
-    print(f"总测试次数: {total_tests}")
-    print(f"成功次数: {total_successful}")
-    print(f"失败次数: {total_tests - total_successful}")
-    print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
+    if total_tests > 0:
+        print(f"总测试次数: {total_tests}")
+        print(f"成功次数: {total_successful}")
+        print(f"失败次数: {total_tests - total_successful}")
+        print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
    
-    # 打印每个指令的统计
    print(f"\n📋 每个指令的统计:")
    for i, (instruction, summary) in enumerate(zip(instructions, results_summary), 1):
        success_rate = (summary['success_count'] / TESTS_PER_INSTRUCTION * 100)
@@ -380,10 +426,7 @@ def main():
        print(f"  指令 {i}: {success_rate:.1f}% 成功 ({summary['success_count']}/{TESTS_PER_INSTRUCTION}), "
              f"平均时间: {avg_time:.2f}s")
    
-    print(f"\n📁 输出文件:")
-    print(f"详细日志: {LOG_FILE}")
-    print(f"详细结果: {RESULTS_CSV}")
-    print(f"统计摘要: {SUMMARY_CSV}")
+    print(f"\n📁 输出文件见: {RESULTS_DIR}")

 if __name__ == "__main__":
    main()