flickr30k

allennlp_models.vision.dataset_readers.flickr30k

get_caption_data#

def get_caption_data(filename: str)

Flickr30kReader#

@DatasetReader.register("flickr30k")
class Flickr30kReader(VisionReader):
 | def __init__(
 |     self,
 |     image_dir: Union[str, PathLike],
 |     *, image_loader: Optional[ImageLoader] = None,
 |     *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
 |     *, region_detector: Optional[Lazy[RegionDetector]] = None,
 |     *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
 |     *, data_dir: Optional[Union[str, PathLike]] = None,
 |     *, tokenizer: Tokenizer = None,
 |     *, token_indexers: Dict[str, TokenIndexer] = None,
 |     *, cuda_device: Optional[Union[int, torch.device]] = None,
 |     *, max_instances: Optional[int] = None,
 |     *, image_processing_batch_size: int = 8,
 |     *, write_to_cache: bool = True,
 |     *, featurize_captions: bool = True,
 |     *, is_evaluation: bool = False,
 |     *, num_potential_hard_negatives: int = 100
 | ) -> None

Parametersimage_dir: `str`¶

Path to directory containing `png` image files.

image_loader : ImageLoader image_featurizer: Lazy[GridEmbedder] The backbone image processor (like a ResNet), whose output will be passed to the region detector for finding object boxes in the image. region_detector: Lazy[RegionDetector] For pulling out regions of the image (both coordinates and features) that will be used by downstream models. data_dir: str Path to directory containing text files for each dataset split. These files contain the captions and metadata for each task instance. tokenizer: Tokenizer, optional token_indexers: Dict[str, TokenIndexer] featurize_captions: bool, optional If we should featurize captions while calculating hard negatives, or use placeholder features. is_evaluation: bool, optional If the reader should return instances for evaluation or training. num_potential_hard_negatives: int, optional The number of potential hard negatives to consider.

text_to_instance#

class Flickr30kReader(VisionReader):
 | ...
 | def text_to_instance(
 |     self,
 |     caption_dicts: List[Dict[str, Any]],
 |     image_index: int,
 |     caption_index: int,
 |     features_list: List[TensorField] = [],
 |     coordinates_list: List[TensorField] = [],
 |     masks_list: List[TensorField] = [],
 |     hard_negative_features: Optional[Tensor] = None,
 |     hard_negative_coordinates: Optional[Tensor] = None,
 |     label: int = 0
 | )

get_hard_negatives#

class Flickr30kReader(VisionReader):
 | ...
 | def get_hard_negatives(
 |     self,
 |     image_index: int,
 |     caption_index: int,
 |     caption_dicts: List[Dict[str, Any]],
 |     averaged_features: Tensor,
 |     features_list: List[TensorField],
 |     coordinates_list: List[TensorField],
 |     caption_tensor: Tensor
 | ) -> Tuple[Tensor, Tensor]

get_caption_features#

class Flickr30kReader(VisionReader):
 | ...
 | def get_caption_features(self, captions)

apply_token_indexers#

class Flickr30kReader(VisionReader):
 | ...
 | def apply_token_indexers(self, instance: Instance) -> None