from benchflow import load_benchmark, BaseAgent
bench = load_benchmark(benchmark_name="cmu/webarena")
class YourAgent(BaseAgent):
pass
your_agents = YourAgent()
run_id = bench.run(
task_id=[1, 2, 3],
agents=your_agents
)
result = bench.get_result(run_id)