qangaroo

allennlp_models.rc.dataset_readers.qangaroo

QangarooReader#

@DatasetReader.register("qangaroo")
class QangarooReader(DatasetReader):
 | def __init__(
 |     self,
 |     tokenizer: Tokenizer = None,
 |     token_indexers: Dict[str, TokenIndexer] = None,
 |     **kwargs
 | ) -> None

Reads a JSON-formatted Qangaroo file and returns a Dataset where the Instances have six fields: candidates, a ListField[TextField], query, a TextField, supports, a ListField[TextField], answer, a TextField, and answer_index, a IndexField. We also add a MetadataField that stores the instance's ID and annotations if they are present.

Parameters¶

tokenizer : Tokenizer, optional (default = SpacyTokenizer())
We use this Tokenizer for both the question and the passage. See Tokenizer. Default is `SpacyTokenizer().
token_indexers : Dict[str, TokenIndexer], optional
We similarly use this for both the question and the passage. See TokenIndexer. Default is {"tokens": SingleIdTokenIndexer()}.

text_to_instance#

class QangarooReader(DatasetReader):
 | ...
 | @overrides
 | def text_to_instance(
 |     self,
 |     candidates: List[str],
 |     query: str,
 |     supports: List[str],
 |     _id: str = None,
 |     answer: str = None,
 |     annotations: List[List[str]] = None
 | ) -> Instance