更新vllm后端的使用方法
This commit is contained in:
@@ -232,12 +232,12 @@ def write_log_entry(log_file, instruction_idx, run_number, prompt, result):
|
||||
else:
|
||||
f.write(f"错误信息: {result['error']}\n")
|
||||
|
||||
def generate_summary_report(instructions, results_summary):
|
||||
def generate_summary_report(instructions, results_summary, summary_csv_path):
|
||||
"""
|
||||
生成统计摘要报告(修复除零错误)
|
||||
生成统计摘要报告
|
||||
"""
|
||||
try:
|
||||
with open(SUMMARY_CSV, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
with open(summary_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['instruction_index', 'instruction', 'total_runs', 'successful_runs',
|
||||
'success_rate', 'avg_response_time', 'min_response_time',
|
||||
'max_response_time', 'total_response_time']
|
||||
@@ -249,7 +249,6 @@ def generate_summary_report(instructions, results_summary):
|
||||
summary = results_summary[i]
|
||||
success_count = summary['success_count']
|
||||
|
||||
# 防止除零错误
|
||||
avg_time = "N/A"
|
||||
min_time = "N/A"
|
||||
max_time = "N/A"
|
||||
@@ -271,14 +270,25 @@ def generate_summary_report(instructions, results_summary):
|
||||
'total_response_time': f"{summary['total_response_time']:.2f}s"
|
||||
})
|
||||
|
||||
print(f"📊 统计摘要已保存至: {SUMMARY_CSV}")
|
||||
print(f"📊 统计摘要已保存至: {summary_csv_path}")
|
||||
except Exception as e:
|
||||
print(f"❌ 保存统计摘要时出错: {e}")
|
||||
|
||||
def main():
|
||||
"""主测试函数"""
|
||||
# 创建带时间戳的结果目录
|
||||
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
RESULTS_DIR = f"test_run_{timestamp_str}"
|
||||
os.makedirs(RESULTS_DIR, exist_ok=True)
|
||||
|
||||
# 定义顶层结果文件的完整路径
|
||||
log_file_path = os.path.join(RESULTS_DIR, "api_test_log.txt")
|
||||
results_csv_path = os.path.join(RESULTS_DIR, "test_results.csv")
|
||||
summary_csv_path = os.path.join(RESULTS_DIR, "test_summary.csv")
|
||||
|
||||
print("🚀 开始批量API测试")
|
||||
print(f"每个指令测试 {TESTS_PER_INSTRUCTION} 次")
|
||||
print(f"所有结果将保存在: {RESULTS_DIR}")
|
||||
|
||||
instructions = read_instructions(INSTRUCTIONS_FILE)
|
||||
if not instructions:
|
||||
@@ -304,11 +314,17 @@ def main():
|
||||
print(f"指令: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# 为当前指令创建子目录
|
||||
instruction_dir = os.path.join(RESULTS_DIR, f"instruction_{instruction_idx}")
|
||||
os.makedirs(instruction_dir, exist_ok=True)
|
||||
|
||||
for run_number in range(1, TESTS_PER_INSTRUCTION + 1):
|
||||
print(f" 运行 {run_number}/{TESTS_PER_INSTRUCTION}...", end=" ", flush=True)
|
||||
|
||||
result = send_api_request(prompt, instruction_idx, run_number)
|
||||
write_log_entry(LOG_FILE, instruction_idx, run_number, prompt, result)
|
||||
write_log_entry(log_file_path, instruction_idx, run_number, prompt, result)
|
||||
|
||||
plan_id = result.get('data', {}).get('plan_id') if result.get('data') else None
|
||||
|
||||
# 记录结果
|
||||
detailed_result = {
|
||||
@@ -318,6 +334,7 @@ def main():
|
||||
"success": result["success"],
|
||||
"attempts": result["attempts"],
|
||||
"response_time": result["response_time"],
|
||||
"plan_id": plan_id,
|
||||
"http_status": result.get("http_status"),
|
||||
"error": result["error"] or "",
|
||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
@@ -335,44 +352,73 @@ def main():
|
||||
results_summary[idx]['max_response_time'] = max(
|
||||
results_summary[idx]['max_response_time'], result['response_time']
|
||||
)
|
||||
|
||||
# 保存JSON和可视化图片
|
||||
plan_id_str = plan_id or 'unknown_plan'
|
||||
|
||||
# 1. 保存JSON
|
||||
json_filename = os.path.join(instruction_dir, f"run_{run_number}_{plan_id_str}.json")
|
||||
try:
|
||||
with open(json_filename, 'w', encoding='utf-8') as f:
|
||||
json.dump(result['data'], f, indent=4, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
print(f" ⚠️ 保存JSON失败: {e}")
|
||||
|
||||
# 2. 下载并保存图片
|
||||
viz_url = result['data'].get('visualization_url')
|
||||
if viz_url:
|
||||
img_filename = os.path.join(instruction_dir, f"run_{run_number}_{plan_id_str}.png")
|
||||
try:
|
||||
if viz_url.startswith('/'):
|
||||
viz_url = BASE_URL + viz_url
|
||||
|
||||
img_response = requests.get(viz_url)
|
||||
img_response.raise_for_status()
|
||||
with open(img_filename, 'wb') as f:
|
||||
f.write(img_response.content)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" ⚠️ 下载图片失败: {e}")
|
||||
|
||||
print(f"✅ 成功 ({result['response_time']:.1f}s)")
|
||||
else:
|
||||
print(f"❌ 失败 (HTTP: {result.get('http_status', 'N/A')})")
|
||||
|
||||
# 记录HTTP状态
|
||||
if 'http_status' in result:
|
||||
results_summary[idx]['http_statuses'].append(result['http_status'])
|
||||
|
||||
time.sleep(1) # 避免服务器过载
|
||||
time.sleep(1)
|
||||
|
||||
# 生成详细结果CSV
|
||||
try:
|
||||
with open(RESULTS_CSV, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
with open(results_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['instruction_index', 'instruction', 'run_number', 'success',
|
||||
'attempts', 'response_time', 'plan_id', 'error', 'timestamp']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for result in detailed_results:
|
||||
writer.writerow(result)
|
||||
for res in detailed_results:
|
||||
writer.writerow(res)
|
||||
|
||||
print(f"\n📊 详细结果已保存至: {RESULTS_CSV}")
|
||||
print(f"\n📊 详细结果已保存至: {results_csv_path}")
|
||||
except Exception as e:
|
||||
print(f"❌ 保存详细结果时出错: {e}")
|
||||
|
||||
# 生成统计摘要
|
||||
generate_summary_report(instructions, results_summary)
|
||||
generate_summary_report(instructions, results_summary, summary_csv_path)
|
||||
|
||||
# 计算并打印最终统计
|
||||
total_tests = len(instructions) * TESTS_PER_INSTRUCTION
|
||||
total_successful = sum(s['success_count'] for s in results_summary)
|
||||
|
||||
# 打印最终统计
|
||||
print(f"\n{'='*60}")
|
||||
print("📈 最终测试统计")
|
||||
print(f"{'='*60}")
|
||||
print(f"总测试次数: {total_tests}")
|
||||
print(f"成功次数: {total_successful}")
|
||||
print(f"失败次数: {total_tests - total_successful}")
|
||||
print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
|
||||
if total_tests > 0:
|
||||
print(f"总测试次数: {total_tests}")
|
||||
print(f"成功次数: {total_successful}")
|
||||
print(f"失败次数: {total_tests - total_successful}")
|
||||
print(f"总成功率: {(total_successful / total_tests * 100):.2f}%")
|
||||
|
||||
# 打印每个指令的统计
|
||||
print(f"\n📋 每个指令的统计:")
|
||||
for i, (instruction, summary) in enumerate(zip(instructions, results_summary), 1):
|
||||
success_rate = (summary['success_count'] / TESTS_PER_INSTRUCTION * 100)
|
||||
@@ -380,10 +426,7 @@ def main():
|
||||
print(f" 指令 {i}: {success_rate:.1f}% 成功 ({summary['success_count']}/{TESTS_PER_INSTRUCTION}), "
|
||||
f"平均时间: {avg_time:.2f}s")
|
||||
|
||||
print(f"\n📁 输出文件:")
|
||||
print(f"详细日志: {LOG_FILE}")
|
||||
print(f"详细结果: {RESULTS_CSV}")
|
||||
print(f"统计摘要: {SUMMARY_CSV}")
|
||||
print(f"\n📁 输出文件见: {RESULTS_DIR}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user