Skip to content

qanet_encoder

allennlp_models.rc.modules.seq2seq_encoders.qanet_encoder

[SOURCE]


QaNetEncoder#

@Seq2SeqEncoder.register("qanet_encoder", exist_ok=True)
class QaNetEncoder(Seq2SeqEncoder):
 | def __init__(
 |     self,
 |     input_dim: int,
 |     hidden_dim: int,
 |     attention_projection_dim: int,
 |     feedforward_hidden_dim: int,
 |     num_blocks: int,
 |     num_convs_per_block: int,
 |     conv_kernel_size: int,
 |     num_attention_heads: int,
 |     use_positional_encoding: bool = True,
 |     dropout_prob: float = 0.1,
 |     layer_dropout_undecayed_prob: float = 0.1,
 |     attention_dropout_prob: float = 0
 | ) -> None

Stack multiple QANetEncoderBlock into one sequence encoder.

Parameters

  • input_dim : int
    The input dimension of the encoder.
  • hidden_dim : int
    The hidden dimension used for convolution output channels, multi-head attention output and the final output of feedforward layer.
  • attention_projection_dim : int
    The dimension of the linear projections for the self-attention layers.
  • feedforward_hidden_dim : int
    The middle dimension of the FeedForward network. The input and output dimensions are fixed to ensure sizes match up for the self attention layers.
  • num_blocks : int
    The number of stacked encoder blocks.
  • num_convs_per_block : int
    The number of convolutions in each block.
  • conv_kernel_size : int
    The kernel size for convolution.
  • num_attention_heads : int
    The number of attention heads to use per layer.
  • use_positional_encoding : bool, optional (default = True)
    Whether to add sinusoidal frequencies to the input tensor. This is strongly recommended, as without this feature, the self attention layers have no idea of absolute or relative position (as they are just computing pairwise similarity between vectors of elements), which can be important features for many tasks.
  • dropout_prob : float, optional (default = 0.1)
    The dropout probability for the feedforward network.
  • layer_dropout_undecayed_prob : float, optional (default = 0.1)
    The initial dropout probability for layer dropout, and this might decay w.r.t the depth of the layer. For each mini-batch, the convolution/attention/ffn sublayer is stochastically dropped according to its layer dropout probability.
  • attention_dropout_prob : float, optional (default = 0.0)
    The dropout probability for the attention distributions in the attention layer.

get_input_dim#

class QaNetEncoder(Seq2SeqEncoder):
 | ...
 | @overrides
 | def get_input_dim(self) -> int

get_output_dim#

class QaNetEncoder(Seq2SeqEncoder):
 | ...
 | @overrides
 | def get_output_dim(self) -> int

is_bidirectional#

class QaNetEncoder(Seq2SeqEncoder):
 | ...
 | @overrides
 | def is_bidirectional(self) -> bool

forward#

class QaNetEncoder(Seq2SeqEncoder):
 | ...
 | @overrides
 | def forward(
 |     self,
 |     inputs: torch.Tensor,
 |     mask: torch.BoolTensor = None
 | ) -> torch.Tensor

QaNetEncoderBlock#

@Seq2SeqEncoder.register("qanet_encoder_block", exist_ok=True)
class QaNetEncoderBlock(Seq2SeqEncoder):
 | def __init__(
 |     self,
 |     input_dim: int,
 |     hidden_dim: int,
 |     attention_projection_dim: int,
 |     feedforward_hidden_dim: int,
 |     num_convs: int,
 |     conv_kernel_size: int,
 |     num_attention_heads: int,
 |     use_positional_encoding: bool = True,
 |     dropout_prob: float = 0.1,
 |     layer_dropout_undecayed_prob: float = 0.1,
 |     attention_dropout_prob: float = 0
 | ) -> None

Implements the encoder block described in QANet: Combining Local Convolution with Global Self-attention for Reading Comprehension.

One encoder block mainly contains 4 parts:

1. Add position embedding.
2. Several depthwise seperable convolutions.
3. Multi-headed self attention, which uses 2 learnt linear projections
   to perform a dot-product similarity between every pair of elements
   scaled by the square root of the sequence length.
4. A two-layer FeedForward network.

Parameters

  • input_dim : int
    The input dimension of the encoder.
  • hidden_dim : int
    The hidden dimension used for convolution output channels, multi-head attention output and the final output of feedforward layer.
  • attention_projection_dim : int
    The dimension of the linear projections for the self-attention layers.
  • feedforward_hidden_dim : int
    The middle dimension of the FeedForward network. The input and output dimensions are fixed to ensure sizes match up for the self attention layers.
  • num_convs : int
    The number of convolutions in each block.
  • conv_kernel_size : int
    The kernel size for convolution.
  • num_attention_heads : int
    The number of attention heads to use per layer.
  • use_positional_encoding : bool, optional (default = True)
    Whether to add sinusoidal frequencies to the input tensor. This is strongly recommended, as without this feature, the self attention layers have no idea of absolute or relative position (as they are just computing pairwise similarity between vectors of elements), which can be important features for many tasks.
  • dropout_prob : float, optional (default = 0.1)
    The dropout probability for the feedforward network.
  • layer_dropout_undecayed_prob : float, optional (default = 0.1)
    The initial dropout probability for layer dropout, and this might decay w.r.t the depth of the layer. For each mini-batch, the convolution/attention/ffn sublayer is randomly dropped according to its layer dropout probability.
  • attention_dropout_prob : float, optional (default = 0.0)
    The dropout probability for the attention distributions in the attention layer.

get_input_dim#

class QaNetEncoderBlock(Seq2SeqEncoder):
 | ...
 | @overrides
 | def get_input_dim(self) -> int

get_output_dim#

class QaNetEncoderBlock(Seq2SeqEncoder):
 | ...
 | @overrides
 | def get_output_dim(self) -> int

is_bidirectional#

class QaNetEncoderBlock(Seq2SeqEncoder):
 | ...
 | @overrides
 | def is_bidirectional(self)

forward#

class QaNetEncoderBlock(Seq2SeqEncoder):
 | ...
 | @overrides
 | def forward(
 |     self,
 |     inputs: torch.Tensor,
 |     mask: torch.BoolTensor = None
 | ) -> torch.Tensor