vgqa
allennlp_models.vision.dataset_readers.vgqa
VGQAReader#
@DatasetReader.register("vgqa")
class VGQAReader(VisionReader):
| def __init__(
| self,
| image_dir: Optional[Union[str, PathLike]] = None,
| *, image_loader: Optional[ImageLoader] = None,
| *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
| *, region_detector: Optional[Lazy[RegionDetector]] = None,
| *, answer_vocab: Optional[Union[Vocabulary, str]] = None,
| *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
| *, tokenizer: Optional[Tokenizer] = None,
| *, token_indexers: Optional[Dict[str, TokenIndexer]] = None,
| *, cuda_device: Optional[Union[int, torch.device]] = None,
| *, max_instances: Optional[int] = None,
| *, image_processing_batch_size: int = 8,
| *, write_to_cache: bool = True
| ) -> None
Parametersimage_dir: `str`¶
Path to directory containing `png` image files.
image_loader: ImageLoader
The image loader component used to load the images.
image_featurizer: Lazy[GridEmbedder]
The backbone image processor (like a ResNet), whose output will be passed to the region
detector for finding object boxes in the image.
region_detector: Lazy[RegionDetector]
For pulling out regions of the image (both coordinates and features) that will be used by
downstream models.
answer_vocab: Union[Vocabulary, str]
, optional
The vocabulary to use for answers. The reader will look into the "answers"
namespace
in the vocabulary to find possible answers.
If this is given, the reader only outputs instances with answers contained in this vocab.
If this is not given, the reader outputs all instances with all answers.
If this is a URL or filename, we will download a previously saved vocabulary from there.
feature_cache_dir: Union[str, PathLike]
, optional
An optional directory to cache the featurized images in. Featurizing images takes a long
time, and many images are duplicated, so we highly recommend to use this cache.
tokenizer: Tokenizer
, optional
The Tokenizer
to use to tokenize the text. By default, this uses the tokenizer for
"bert-base-uncased"
.
token_indexers: Dict[str, TokenIndexer]
, optional
The TokenIndexer
to use. By default, this uses the indexer for "bert-base-uncased"
.
cuda_device: Union[int, torch.device]
, optional
Either a torch device or a GPU number. This is the GPU we'll use to featurize the images.
max_instances: int
, optional
For debugging, you can use this parameter to limit the number of instances the reader
returns.
image_processing_batch_size: int
The number of images to process at one time while featurizing. Default is 8.
text_to_instance#
class VGQAReader(VisionReader):
| ...
| def text_to_instance(
| self,
| qa_id: int,
| question: str,
| answer: Optional[str],
| image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
| use_cache: bool = True,
| keep_impossible_questions: bool = True
| ) -> Optional[Instance]
apply_token_indexers#
class VGQAReader(VisionReader):
| ...
| def apply_token_indexers(self, instance: Instance) -> None