conll_coref_scores

ConllCorefScores#

class ConllCorefScores(Metric):
 | def __init__(self) -> None

call#

class ConllCorefScores(Metric):
 | ...
 | @overrides
 | def __call__(
 |     self,
 |     top_spans: torch.Tensor,
 |     antecedent_indices: torch.Tensor,
 |     predicted_antecedents: torch.Tensor,
 |     metadata_list: List[Dict[str, Any]]
 | )

Parameters

top_spans : torch.Tensor
(start, end) indices for all spans kept after span pruning in the model. Expected shape: (batch_size, num_spans, 2)
antecedent_indices : torch.Tensor
For each span, the indices of all allowed antecedents for that span. Expected shape: (batch_size, num_spans, num_antecedents)
predicted_antecedents : torch.Tensor
For each span, this contains the index (into antecedent_indices) of the most likely antecedent for that span. Expected shape: (batch_size, num_spans)
metadata_list : List[Dict[str, Any]]
A metadata dictionary for each instance in the batch. We use the "clusters" key from this dictionary, which has the annotated gold coreference clusters for that instance.

get_metric#

class ConllCorefScores(Metric):
 | ...
 | @overrides
 | def get_metric(
 |     self,
 |     reset: bool = False
 | ) -> Tuple[float, float, float]

reset#

class ConllCorefScores(Metric):
 | ...
 | @overrides
 | def reset(self)

get_gold_clusters#

class ConllCorefScores(Metric):
 | ...
 | @staticmethod
 | def get_gold_clusters(gold_clusters)

get_predicted_clusters#

class ConllCorefScores(Metric):
 | ...
 | @staticmethod
 | def get_predicted_clusters(
 |     top_spans: torch.Tensor,
 |     antecedent_indices: torch.Tensor,
 |     predicted_antecedents: torch.Tensor
 | ) -> Tuple[
 |         List[Tuple[Tuple[int, int], ...]], Dict[Tuple[int, int], Tuple[Tuple[int, int], ...]]
 |     ]

Scorer#

class Scorer:
 | def __init__(self, metric)

Mostly borrowed from https://github.com/clarkkev/deep-coref/blob/master/evaluation.py

update#

class Scorer:
 | ...
 | def update(
 |     self,
 |     predicted,
 |     gold,
 |     mention_to_predicted,
 |     mention_to_gold
 | )

get_f1#

class Scorer:
 | ...
 | def get_f1(self)

get_recall#

class Scorer:
 | ...
 | def get_recall(self)

get_precision#

class Scorer:
 | ...
 | def get_precision(self)

get_prf#

class Scorer:
 | ...
 | def get_prf(self)

b_cubed#

class Scorer:
 | ...
 | @staticmethod
 | def b_cubed(clusters, mention_to_gold)

Averaged per-mention precision and recall. https://pdfs.semanticscholar.org/cfe3/c24695f1c14b78a5b8e95bcbd1c666140fd1.pdf

muc#

class Scorer:
 | ...
 | @staticmethod
 | def muc(clusters, mention_to_gold)

Counts the mentions in each predicted cluster which need to be re-allocated in order for each predicted cluster to be contained by the respective gold cluster. https://aclweb.org/anthology/M/M95/M95-1005.pdf

phi4#

class Scorer:
 | ...
 | @staticmethod
 | def phi4(gold_clustering, predicted_clustering)

Subroutine for ceafe. Computes the mention F measure between gold and predicted mentions in a cluster.

ceafe#

class Scorer:
 | ...
 | @staticmethod
 | def ceafe(clusters, gold_clusters)

Computes the Constrained Entity-Alignment F-Measure (CEAF) for evaluating coreference. Gold and predicted mentions are aligned into clusterings which maximise a metric - in this case, the F measure between gold and predicted clusters.

https://www.semanticscholar.org/paper/On-Coreference-Resolution-Performance-Metrics-Luo/de133c1f22d0dfe12539e25dda70f28672459b99

conll_coref_scores