Skip to content

snli

allennlp_models.pair_classification.dataset_readers.snli

[SOURCE]


maybe_collapse_label#

def maybe_collapse_label(label: str, collapse: bool)

Helper function that optionally collapses the "contradiction" and "neutral" labels into "non-entailment".

SnliReader#

@DatasetReader.register("snli")
class SnliReader(DatasetReader):
 | def __init__(
 |     self,
 |     tokenizer: Optional[Tokenizer] = None,
 |     token_indexers: Dict[str, TokenIndexer] = None,
 |     combine_input_fields: Optional[bool] = None,
 |     collapse_labels: Optional[bool] = False,
 |     **kwargs
 | ) -> None

Reads a file from the Stanford Natural Language Inference (SNLI) dataset. This data is formatted as jsonl, one json-formatted instance per line. The keys in the data are "gold_label", "sentence1", and "sentence2". We convert these keys into fields named "label", "premise" and "hypothesis", along with a metadata field containing the tokenized strings of the premise and hypothesis.

Registered as a DatasetReader with name "snli".

Parameters

  • tokenizer : Tokenizer, optional (default = SpacyTokenizer())
    We use this Tokenizer for both the premise and the hypothesis. See Tokenizer.
  • token_indexers : Dict[str, TokenIndexer], optional (default = {"tokens": SingleIdTokenIndexer()})
    We similarly use this for both the premise and the hypothesis. See TokenIndexer.
  • combine_input_fields : bool, optional
    (default=isinstance(tokenizer, PretrainedTransformerTokenizer)) If False, represent the premise and the hypothesis as separate fields in the instance. If True, tokenize them together using tokenizer.tokenize_sentence_pair() and provide a single tokens field in the instance.
  • collapse_labels : bool, optional (default = False)
    If True, the "neutral" and "contradiction" labels will be collapsed into "non-entailment"; "entailment" will be left unchanged.

text_to_instance#

class SnliReader(DatasetReader):
 | ...
 | def text_to_instance(
 |     self,
 |     premise: str,
 |     hypothesis: str,
 |     label: str = None
 | ) -> Instance

apply_token_indexers#

class SnliReader(DatasetReader):
 | ...
 | def apply_token_indexers(self, instance: Instance) -> Instance