vqav2
allennlp_models.vision.dataset_readers.vqav2
contractions#
contractions = {
"aint": "ain't",
"arent": "aren't",
"cant": "can't",
"couldve": "could've",
"c ...
manual_map#
manual_map = {
"none": "0",
"zero": "0",
"one": "1",
"two": "2",
"three": "3",
"four": "4 ...
articles#
articles = ["a", "an", "the"]
period_strip#
period_strip = re.compile(r"(?!<=\d)(\.)(?!\d)")
comma_strip#
comma_strip = re.compile(r"(\d)(\,)(\d)")
punct#
punct = [
";",
r"/",
"[",
"]",
'"',
"{",
"}",
"(",
")",
"=",
"+" ...
process_punctuation#
def process_punctuation(inText: str) -> str
process_digit_article#
def process_digit_article(input: str) -> str
preprocess_answer#
@lru_cache(maxsize=None)
def preprocess_answer(answer: str) -> str
get_score#
def get_score(count: int) -> float
VQAv2Reader#
@DatasetReader.register("vqav2")
class VQAv2Reader(VisionReader):
| def __init__(
| self,
| image_dir: Optional[Union[str, PathLike]] = None,
| *,
| image_loader: Optional[ImageLoader] = None,
| image_featurizer: Optional[Lazy[GridEmbedder]] = None,
| region_detector: Optional[Lazy[RegionDetector]] = None,
| answer_vocab: Optional[Union[Vocabulary, str]] = None,
| feature_cache_dir: Optional[Union[str, PathLike]] = None,
| tokenizer: Optional[Tokenizer] = None,
| token_indexers: Optional[Dict[str, TokenIndexer]] = None,
| cuda_device: Optional[Union[int, torch.device]] = None,
| max_instances: Optional[int] = None,
| image_processing_batch_size: int = 8,
| multiple_answers_per_question: bool = True,
| write_to_cache: bool = True
| ) -> None
Parametersimage_dir: `str`¶
Path to directory containing `png` image files.
image_loader: ImageLoader
The image loader component used to load the images.
image_featurizer: Lazy[GridEmbedder]
The backbone image processor (like a ResNet), whose output will be passed to the region
detector for finding object boxes in the image.
region_detector: Lazy[RegionDetector]
For pulling out regions of the image (both coordinates and features) that will be used by
downstream models.
answer_vocab: Union[Vocabulary, str]
, optional
The vocabulary to use for answers. The reader will look into the "answers"
namespace
in the vocabulary to find possible answers.
If this is given, the reader only outputs instances with answers contained in this vocab.
If this is not given, the reader outputs all instances with all answers.
If this is a URL or filename, we will download a previously saved vocabulary from there.
feature_cache_dir: Union[str, PathLike]
, optional
An optional directory to cache the featurized images in. Featurizing images takes a long
time, and many images are duplicated, so we highly recommend to use this cache.
tokenizer: Tokenizer
, optional
The Tokenizer
to use to tokenize the text. By default, this uses the tokenizer for
"bert-base-uncased"
.
token_indexers: Dict[str, TokenIndexer]
, optional
The TokenIndexer
to use. By default, this uses the indexer for "bert-base-uncased"
.
cuda_device: Union[int, torch.device]
, optional
Either a torch device or a GPU number. This is the GPU we'll use to featurize the images.
max_instances: int
, optional
For debugging, you can use this parameter to limit the number of instances the reader
returns.
image_processing_batch_size: int
The number of images to process at one time while featurizing. Default is 8.
multiple_answers_per_question: bool
VQA questions have multiple answers. By default, we use all of them, and give more
points to the more common answer. But VQA also has a special answer, the so-called
"multiple choice answer". If this is set to False
, we only use that answer.
text_to_instance#
class VQAv2Reader(VisionReader):
| ...
| @overrides
| def text_to_instance(
| self,
| question: str,
| image: Union[str, Tuple[Tensor, Tensor]],
| answer_counts: Optional[MutableMapping[str, int]] = None,
| *,
| use_cache: bool = True
| ) -> Optional[Instance]
apply_token_indexers#
class VQAv2Reader(VisionReader):
| ...
| @overrides
| def apply_token_indexers(self, instance: Instance) -> None