flickr30k
allennlp_models.vision.dataset_readers.flickr30k
get_caption_data#
def get_caption_data(filename: str)
Flickr30kReader#
@DatasetReader.register("flickr30k")
class Flickr30kReader(VisionReader):
| def __init__(
| self,
| image_dir: Union[str, PathLike],
| *, image_loader: Optional[ImageLoader] = None,
| *, image_featurizer: Optional[Lazy[GridEmbedder]] = None,
| *, region_detector: Optional[Lazy[RegionDetector]] = None,
| *, feature_cache_dir: Optional[Union[str, PathLike]] = None,
| *, data_dir: Optional[Union[str, PathLike]] = None,
| *, tokenizer: Tokenizer = None,
| *, token_indexers: Dict[str, TokenIndexer] = None,
| *, cuda_device: Optional[Union[int, torch.device]] = None,
| *, max_instances: Optional[int] = None,
| *, image_processing_batch_size: int = 8,
| *, write_to_cache: bool = True,
| *, featurize_captions: bool = True,
| *, is_evaluation: bool = False,
| *, num_potential_hard_negatives: int = 100
| ) -> None
Parametersimage_dir: `str`¶
Path to directory containing `png` image files.
image_loader : ImageLoader
image_featurizer: Lazy[GridEmbedder]
The backbone image processor (like a ResNet), whose output will be passed to the region
detector for finding object boxes in the image.
region_detector: Lazy[RegionDetector]
For pulling out regions of the image (both coordinates and features) that will be used by
downstream models.
data_dir: str
Path to directory containing text files for each dataset split. These files contain
the captions and metadata for each task instance.
tokenizer: Tokenizer
, optional
token_indexers: Dict[str, TokenIndexer]
featurize_captions: bool
, optional
If we should featurize captions while calculating hard negatives, or use placeholder features.
is_evaluation: bool
, optional
If the reader should return instances for evaluation or training.
num_potential_hard_negatives: int, optional
The number of potential hard negatives to consider.
text_to_instance#
class Flickr30kReader(VisionReader):
| ...
| def text_to_instance(
| self,
| caption_dicts: List[Dict[str, Any]],
| image_index: int,
| caption_index: int,
| features_list: List[TensorField] = [],
| coordinates_list: List[TensorField] = [],
| masks_list: List[TensorField] = [],
| hard_negative_features: Optional[Tensor] = None,
| hard_negative_coordinates: Optional[Tensor] = None,
| label: int = 0
| )
get_hard_negatives#
class Flickr30kReader(VisionReader):
| ...
| def get_hard_negatives(
| self,
| image_index: int,
| caption_index: int,
| caption_dicts: List[Dict[str, Any]],
| averaged_features: Tensor,
| features_list: List[TensorField],
| coordinates_list: List[TensorField],
| caption_tensor: Tensor
| ) -> Tuple[Tensor, Tensor]
get_caption_features#
class Flickr30kReader(VisionReader):
| ...
| def get_caption_features(self, captions)
apply_token_indexers#
class Flickr30kReader(VisionReader):
| ...
| def apply_token_indexers(self, instance: Instance) -> None