snli
allennlp_models.pair_classification.dataset_readers.snli
maybe_collapse_label#
def maybe_collapse_label(label: str, collapse: bool)
Helper function that optionally collapses the "contradiction" and "neutral" labels into "non-entailment".
SnliReader#
@DatasetReader.register("snli")
class SnliReader(DatasetReader):
| def __init__(
| self,
| tokenizer: Optional[Tokenizer] = None,
| token_indexers: Dict[str, TokenIndexer] = None,
| combine_input_fields: Optional[bool] = None,
| collapse_labels: Optional[bool] = False,
| **kwargs
| ) -> None
Reads a file from the Stanford Natural Language Inference (SNLI) dataset. This data is formatted as jsonl, one json-formatted instance per line. The keys in the data are "gold_label", "sentence1", and "sentence2". We convert these keys into fields named "label", "premise" and "hypothesis", along with a metadata field containing the tokenized strings of the premise and hypothesis.
Registered as a DatasetReader
with name "snli".
Parameters¶
- tokenizer :
Tokenizer
, optional (default =SpacyTokenizer()
)
We use thisTokenizer
for both the premise and the hypothesis. SeeTokenizer
. - token_indexers :
Dict[str, TokenIndexer]
, optional (default ={"tokens": SingleIdTokenIndexer()}
)
We similarly use this for both the premise and the hypothesis. SeeTokenIndexer
. - combine_input_fields :
bool
, optional
(default=isinstance(tokenizer, PretrainedTransformerTokenizer)
) If False, represent the premise and the hypothesis as separate fields in the instance. If True, tokenize them together usingtokenizer.tokenize_sentence_pair()
and provide a singletokens
field in the instance. - collapse_labels :
bool
, optional (default =False
)
IfTrue
, the "neutral" and "contradiction" labels will be collapsed into "non-entailment"; "entailment" will be left unchanged.
text_to_instance#
class SnliReader(DatasetReader):
| ...
| @overrides
| def text_to_instance(
| self,
| premise: str,
| hypothesis: str,
| label: str = None
| ) -> Instance
apply_token_indexers#
class SnliReader(DatasetReader):
| ...
| @overrides
| def apply_token_indexers(self, instance: Instance) -> Instance