Skip to content

vilbert_backbone

allennlp.modules.backbones.vilbert_backbone

[SOURCE]


VilbertBackbone

@Backbone.register("vilbert")
@Backbone.register("vilbert_from_huggingface", constructor="from_huggingface_model_name")
class VilbertBackbone(Backbone):
 | def __init__(
 |     self,
 |     vocab: Vocabulary,
 |     text_embeddings: TransformerEmbeddings,
 |     image_embeddings: ImageFeatureEmbeddings,
 |     encoder: BiModalEncoder,
 |     pooled_output_dim: int,
 |     fusion_method: str = "sum",
 |     dropout: float = 0.1,
 |     vocab_namespace: str = "tokens"
 | ) -> None

Uses a Vilbert model as a Backbone. Registered as a Backbone with name "vilbert".

from_huggingface_model_name

class VilbertBackbone(Backbone):
 | ...
 | @classmethod
 | def from_huggingface_model_name(
 |     cls,
 |     vocab: Vocabulary,
 |     model_name: str,
 |     image_feature_dim: int,
 |     image_num_hidden_layers: int,
 |     image_hidden_size: int,
 |     image_num_attention_heads: int,
 |     combined_hidden_size: int,
 |     combined_num_attention_heads: int,
 |     pooled_output_dim: int,
 |     image_intermediate_size: int,
 |     image_attention_dropout: float,
 |     image_hidden_dropout: float,
 |     image_biattention_id: List[int],
 |     text_biattention_id: List[int],
 |     text_fixed_layer: int,
 |     image_fixed_layer: int,
 |     fusion_method: str = "sum"
 | )

forward

class VilbertBackbone(Backbone):
 | ...
 | def forward(
 |     self,
 |     box_features: torch.Tensor,
 |     box_coordinates: torch.Tensor,
 |     box_mask: torch.Tensor,
 |     text: TextFieldTensors
 | ) -> Dict[str, torch.Tensor]