cached_transformers

allennlp.common.cached_transformers

TransformerSpec¶

class TransformerSpec(NamedTuple)

model_name¶

class TransformerSpec(NamedTuple):
 | ...
 | model_name: str = None

override_weights_file¶

class TransformerSpec(NamedTuple):
 | ...
 | override_weights_file: Optional[str] = None

override_weights_strip_prefix¶

class TransformerSpec(NamedTuple):
 | ...
 | override_weights_strip_prefix: Optional[str] = None

reinit_modules¶

class TransformerSpec(NamedTuple):
 | ...
 | reinit_modules: Optional[Union[int, Tuple[int, ...], Tuple[str, ...]]] = None

get¶

def get(
    model_name: str,
    make_copy: bool,
    override_weights_file: Optional[str] = None,
    override_weights_strip_prefix: Optional[str] = None,
    reinit_modules: Optional[Union[int, Tuple[int, ...], Tuple[str, ...]]] = None,
    load_weights: bool = True,
    **kwargs
) -> transformers.PreTrainedModel

Returns a transformer model from the cache.

Parameters¶

model_name : str
The name of the transformer, for example "bert-base-cased"
make_copy : bool
If this is True, return a copy of the model instead of the cached model itself. If you want to modify the parameters of the model, set this to True. If you want only part of the model, set this to False, but make sure to copy.deepcopy() the bits you are keeping.
override_weights_file : str, optional (default = None)
If set, this specifies a file from which to load alternate weights that override the weights from huggingface. The file is expected to contain a PyTorch state_dict, created with torch.save().
override_weights_strip_prefix : str, optional (default = None)
If set, strip the given prefix from the state dict when loading it.
reinit_modules : Optional[Union[int, Tuple[int, ...], Tuple[str, ...]]], optional (default = None)
If this is an integer, the last reinit_modules layers of the transformer will be re-initialized. If this is a tuple of integers, the layers indexed by reinit_modules will be re-initialized. Note, because the module structure of the transformer model_name can differ, we cannot guarantee that providing an integer or tuple of integers will work. If this fails, you can instead provide a tuple of strings, which will be treated as regexes and any module with a name matching the regex will be re-initialized. Re-initializing the last few layers of a pretrained transformer can reduce the instability of fine-tuning on small datasets and may improve performance (https://arxiv.org/abs/2006.05987v3). Has no effect if load_weights is False or override_weights_file is not None.
load_weights : bool, optional (default = True)
If set to False, no weights will be loaded. This is helpful when you only want to initialize the architecture, like when you've already fine-tuned a model and are going to load the weights from a state dict elsewhere.

get_tokenizer¶

def get_tokenizer(
    model_name: str,
    **kwargs
) -> transformers.PreTrainedTokenizer