Skip to content





class QangarooReader(DatasetReader):
 | def __init__(
 |     self,
 |     tokenizer: Tokenizer = None,
 |     token_indexers: Dict[str, TokenIndexer] = None,
 |     **kwargs
 | ) -> None

Reads a JSON-formatted Qangaroo file and returns a Dataset where the Instances have six fields: candidates, a ListField[TextField], query, a TextField, supports, a ListField[TextField], answer, a TextField, and answer_index, a IndexField. We also add a MetadataField that stores the instance's ID and annotations if they are present.


  • tokenizer : Tokenizer, optional (default = SpacyTokenizer())
    We use this Tokenizer for both the question and the passage. See Tokenizer. Default is `SpacyTokenizer().

  • token_indexers : Dict[str, TokenIndexer], optional
    We similarly use this for both the question and the passage. See TokenIndexer. Default is {"tokens": SingleIdTokenIndexer()}.


class QangarooReader(DatasetReader):
 | ...
 | def text_to_instance(
 |     self,
 |     candidates: List[str],
 |     query: str,
 |     supports: List[str],
 |     _id: str = None,
 |     answer: str = None,
 |     annotations: List[List[str]] = None
 | ) -> Instance