/
BIRD-critiq1c6ba4c
# utils.py
import sys
import json
import re
def load_jsonl(file_path):
"""
Loads JSONL data from file_path and returns a list of dicts.
"""
try:
with open(file_path, "r") as file:
return [json.loads(line) for line in file]
except Exception as e:
print(f"Failed to load JSONL file: {e}")
sys.exit(1)
def split_field(data, field_name):
"""
Retrieve the specified field from the data dictionary and split it based on [split].
Returns a list of statements.
"""
field_value = data.get(field_name, "")
if not field_value:
return []
if isinstance(field_value, str):
# Use [split] as the delimiter with optional surrounding whitespace
sql_statements = [stmt.strip() for stmt in re.split(r'\[split\]\s*', field_value) if stmt.strip()]
return sql_statements
elif isinstance(field_value, list):
return field_value
else:
return []
def save_report_and_status(report_file_path,
question_test_case_results,
data_list,
number_of_execution_errors,
number_of_timeouts,
number_of_assertion_errors,
overall_accuracy,
timestamp,
big_logger):
"""
Writes a report to report_file_path and updates the 'status'/'error_message' fields
in data_list based on question_test_case_results.
"""
total_instances = len(data_list)
try:
with open(report_file_path, "w") as report_file:
report_file.write("--------------------------------------------------\n")
report_file.write("BIRD CRITIC Stack Overflow Result Statistics (Postgres, Multi-Thread):\n")
report_file.write(f"Number of Instances: {total_instances}\n")
report_file.write(f"Number of Execution Errors: {number_of_execution_errors}\n")
report_file.write(f"Number of Timeouts: {number_of_timeouts}\n")
report_file.write(f"Number of Assertion Errors: {number_of_assertion_errors}\n")
total_errors = (number_of_execution_errors
+ number_of_timeouts
+ number_of_assertion_errors)
report_file.write(f"Total Errors: {total_errors}\n")
report_file.write(f"Overall Accuracy: {overall_accuracy:.2f}%\n")
report_file.write(f"Timestamp: {timestamp}\n\n")
# Go through each question result
for i, q_res in enumerate(question_test_case_results):
q_idx = q_res["instance_id"]
t_total = q_res["total_test_cases"]
t_pass = q_res["passed_test_cases"]
t_fail = t_total - t_pass
failed_list_str = ", ".join(q_res["failed_test_cases"]) if t_fail > 0 else "None"
eval_phase_note = ""
if q_res.get("evaluation_phase_execution_error"):
eval_phase_note += " | Eval Phase: Execution Error"
if q_res.get("evaluation_phase_timeout_error"):
eval_phase_note += " | Eval Phase: Timeout Error"
if q_res.get("evaluation_phase_assertion_error"):
eval_phase_note += " | Eval Phase: Assertion Error"
report_file.write(
f"Question_{q_idx}: ({t_pass}/{t_total}) test cases passed, "
f"failed test cases: {failed_list_str}{eval_phase_note}\n"
)
# Update data_list with statuses
if t_fail == 0 :
# All testcases passed, no error-phase surprises
data_list[i]['status'] = "success"
data_list[i]['error_message'] = None
else:
data_list[i]['status'] = "failed"
if failed_list_str != "None":
data_list[i]['error_message'] = f"{failed_list_str} failed"
else:
data_list[i]['error_message'] = eval_phase_note
except Exception as e:
big_logger.error(f"Failed to write report: {e}")