Skip to content

vgqa

allennlp_models.vision.dataset_readers.vgqa

[SOURCE]


VGQAReader#

@DatasetReader.register("vgqa")
class VGQAReader(VisionReader):
 | def __init__(
 |     self,
 |     image_dir: Optional[Union[str, PathLike]] = None,
 |     *, image_loader: Optional[ImageLoader] = None,
 |     *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
 |     *, region_detector: Optional[Lazy[RegionDetector]] = None,
 |     *, answer_vocab: Optional[Union[Vocabulary, str]] = None,
 |     *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
 |     *, tokenizer: Optional[Tokenizer] = None,
 |     *, token_indexers: Optional[Dict[str, TokenIndexer]] = None,
 |     *, cuda_device: Optional[Union[int, torch.device]] = None,
 |     *, max_instances: Optional[int] = None,
 |     *, image_processing_batch_size: int = 8,
 |     *, write_to_cache: bool = True
 | ) -> None

Parametersimage_dir: `str`

Path to directory containing `png` image files.

image_loader: ImageLoader The image loader component used to load the images. image_featurizer: Lazy[GridEmbedder] The backbone image processor (like a ResNet), whose output will be passed to the region detector for finding object boxes in the image. region_detector: Lazy[RegionDetector] For pulling out regions of the image (both coordinates and features) that will be used by downstream models. answer_vocab: Union[Vocabulary, str], optional The vocabulary to use for answers. The reader will look into the "answers" namespace in the vocabulary to find possible answers. If this is given, the reader only outputs instances with answers contained in this vocab. If this is not given, the reader outputs all instances with all answers. If this is a URL or filename, we will download a previously saved vocabulary from there. feature_cache_dir: Union[str, PathLike], optional An optional directory to cache the featurized images in. Featurizing images takes a long time, and many images are duplicated, so we highly recommend to use this cache. tokenizer: Tokenizer, optional The Tokenizer to use to tokenize the text. By default, this uses the tokenizer for "bert-base-uncased". token_indexers: Dict[str, TokenIndexer], optional The TokenIndexer to use. By default, this uses the indexer for "bert-base-uncased". cuda_device: Union[int, torch.device], optional Either a torch device or a GPU number. This is the GPU we'll use to featurize the images. max_instances: int, optional For debugging, you can use this parameter to limit the number of instances the reader returns. image_processing_batch_size: int The number of images to process at one time while featurizing. Default is 8.

text_to_instance#

class VGQAReader(VisionReader):
 | ...
 | def text_to_instance(
 |     self,
 |     qa_id: int,
 |     question: str,
 |     answer: Optional[str],
 |     image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
 |     use_cache: bool = True,
 |     keep_impossible_questions: bool = True
 | ) -> Optional[Instance]

apply_token_indexers#

class VGQAReader(VisionReader):
 | ...
 | def apply_token_indexers(self, instance: Instance) -> None