Skip to content

nlvr2

allennlp_models.vision.dataset_readers.nlvr2

[SOURCE]


Nlvr2Reader#

@DatasetReader.register("nlvr2")
class Nlvr2Reader(VisionReader):
 | def __init__(
 |     self,
 |     image_dir: Optional[Union[str, PathLike]] = None,
 |     *, image_loader: Optional[ImageLoader] = None,
 |     *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
 |     *, region_detector: Optional[Lazy[RegionDetector]] = None,
 |     *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
 |     *, tokenizer: Optional[Tokenizer] = None,
 |     *, token_indexers: Optional[Dict[str, TokenIndexer]] = None,
 |     *, cuda_device: Optional[Union[int, torch.device]] = None,
 |     *, max_instances: Optional[int] = None,
 |     *, image_processing_batch_size: int = 8,
 |     *, write_to_cache: bool = True
 | ) -> None

Reads the NLVR2 dataset from http://lil.nlp.cornell.edu/nlvr/. In this task, the model is presented with two images and a hypothesis referring to those images. The task for the model is to identify whether the hypothesis is true or false. Accordingly, the instances produced by this reader contain two images, featurized into the fields "box_features" and "box_coordinates". In addition to that, it produces a TextField called "hypothesis", and a MetadataField called "identifier". The latter contains the question id from the question set.

Parametersimage_dir: `str`

Path to directory containing `png` image files.

image_loader: ImageLoader An image loader to read the images with image_featurizer: GridEmbedder The backbone image processor (like a ResNet), whose output will be passed to the region detector for finding object boxes in the image. region_detector: RegionDetector For pulling out regions of the image (both coordinates and features) that will be used by downstream models. feature_cache_dir: str, optional If given, the reader will attempt to use the featurized image cache in this directory. Caching the featurized images can result in big performance improvements, so it is recommended to set this. tokenizer: Tokenizer, optional, defaults to PretrainedTransformerTokenizer("bert-base-uncased") token_indexers: Dict[str, TokenIndexer], optional, defaults to{"tokens": PretrainedTransformerIndexer("bert-base-uncased")} cuda_device: int, optional Set this to run image featurization on the given GPU. By default, image featurization runs on CPU. max_instances: int, optional If set, the reader only returns the first max_instances instances, and then stops. This is useful for testing. image_processing_batch_size: int The number of images to process at one time while featurizing. Default is 8.

extract_image_features#

class Nlvr2Reader(VisionReader):
 | ...
 | def extract_image_features(
 |     self,
 |     image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
 |     use_cache: bool
 | )

text_to_instance#

class Nlvr2Reader(VisionReader):
 | ...
 | def text_to_instance(
 |     self,
 |     identifier: Optional[str],
 |     hypothesis: str,
 |     image1: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
 |     image2: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
 |     label: Optional[bool] = None,
 |     use_cache: bool = True
 | ) -> Instance

apply_token_indexers#

class Nlvr2Reader(VisionReader):
 | ...
 | def apply_token_indexers(self, instance: Instance) -> None