nlvr2
allennlp_models.vision.dataset_readers.nlvr2
Nlvr2Reader#
@DatasetReader.register("nlvr2")
class Nlvr2Reader(VisionReader):
| def __init__(
| self,
| image_dir: Optional[Union[str, PathLike]] = None,
| *, image_loader: Optional[ImageLoader] = None,
| *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
| *, region_detector: Optional[Lazy[RegionDetector]] = None,
| *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
| *, tokenizer: Optional[Tokenizer] = None,
| *, token_indexers: Optional[Dict[str, TokenIndexer]] = None,
| *, cuda_device: Optional[Union[int, torch.device]] = None,
| *, max_instances: Optional[int] = None,
| *, image_processing_batch_size: int = 8,
| *, write_to_cache: bool = True
| ) -> None
Reads the NLVR2 dataset from http://lil.nlp.cornell.edu/nlvr/.
In this task, the model is presented with two images and a hypothesis referring to those images.
The task for the model is to identify whether the hypothesis is true or false.
Accordingly, the instances produced by this reader contain two images, featurized into the
fields "box_features" and "box_coordinates". In addition to that, it produces a TextField
called "hypothesis", and a MetadataField
called "identifier". The latter contains the question
id from the question set.
Parametersimage_dir: `str`¶
Path to directory containing `png` image files.
image_loader: ImageLoader
An image loader to read the images with
image_featurizer: GridEmbedder
The backbone image processor (like a ResNet), whose output will be passed to the region
detector for finding object boxes in the image.
region_detector: RegionDetector
For pulling out regions of the image (both coordinates and features) that will be used by
downstream models.
feature_cache_dir: str
, optional
If given, the reader will attempt to use the featurized image cache in this directory.
Caching the featurized images can result in big performance improvements, so it is
recommended to set this.
tokenizer: Tokenizer
, optional, defaults to PretrainedTransformerTokenizer("bert-base-uncased")
token_indexers: Dict[str, TokenIndexer]
, optional,
defaults to{"tokens": PretrainedTransformerIndexer("bert-base-uncased")}
cuda_device: int
, optional
Set this to run image featurization on the given GPU. By default, image featurization runs on CPU.
max_instances: int
, optional
If set, the reader only returns the first max_instances
instances, and then stops.
This is useful for testing.
image_processing_batch_size: int
The number of images to process at one time while featurizing. Default is 8.
extract_image_features#
class Nlvr2Reader(VisionReader):
| ...
| def extract_image_features(
| self,
| image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
| use_cache: bool
| )
text_to_instance#
class Nlvr2Reader(VisionReader):
| ...
| def text_to_instance(
| self,
| identifier: Optional[str],
| hypothesis: str,
| image1: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
| image2: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
| label: Optional[bool] = None,
| use_cache: bool = True
| ) -> Instance
apply_token_indexers#
class Nlvr2Reader(VisionReader):
| ...
| def apply_token_indexers(self, instance: Instance) -> None