narrativeqa
Evaluation script for NarrativeQA dataset.
rouge_l_evaluator#
rouge_l_evaluator = rouge.Rouge(
metrics=["rouge-l"],
max_n=4,
limit_length=True,
length_limit=100,
...
bleu_1#
def bleu_1(p, g)
bleu_4#
def bleu_4(p, g)
meteor#
def meteor(p, g)
rouge_l#
def rouge_l(p, g)
metric_max_over_ground_truths#
def metric_max_over_ground_truths(
metric_fn,
prediction,
ground_truths,
tokenize=False
)
get_metric_score#
def get_metric_score(prediction, ground_truths)