mirrored 18 minutes ago
0
chigkimUpdated the regex pattern in extract_final to use [A-J] between word boundaries as an answer. 9dac3c3
import glob
import sys
import json
import re
import random

assert len(sys.argv) > 1, 'You need to pass the directory'
path = sys.argv[1]


def extract_answer(text, level):
    if level == 'l1':
        pattern = r"answer is \(?([A-J])\)?"
        match = re.search(pattern, text)
        if match:
            return match.group(1)
        else:
            return None
    elif level == 'l2':
        pattern = r"answer is \(?([A-J])\)?"
        match = re.search(pattern, text)
        if match:
            return match.group(1)
        else:
            return extract_again(text)


def extract_again(text):
    match = re.search(r'.*[aA]nswer:\s*([A-J])', text)
    if match:
        return match.group(1)
    else:
        return extract_final(text)
    

def extract_final(text):
    pattern = r"\b[A-J]\b(?!.*\b[A-J]\b)"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(0)
    else:
        return None


for name in glob.glob(path + '/*'):
    print('Level 1 regex' + '==' * 20)
    succ, fail = 0, 0
    with open(name, 'r') as f:
        entries = json.load(f)
        for e in entries:
            pred = extract_answer(e['model_outputs'], 'l1')
            if pred is None:
                random.seed(12345)
                pred = random.choice(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"])
            # Remove the None cases
            if pred == e['answer']:
                succ += 1
            else:
                fail += 1
    print(name, succ / (succ + fail))

    print('Level 2 regex' + '==' * 20)
    succ, fail = 0, 0
    with open(name, 'r') as f:
        entries = json.load(f)
        for e in entries:
            pred = extract_answer(e['model_outputs'], 'l2')
            if pred is None:
                random.seed(12345)
                pred = random.choice(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"])
            # Remove the None cases
            if pred == e['answer']:
                succ += 1
            else:
                fail += 1
    print(name, succ / (succ + fail))
    
    print()