util

[ allennlp.training.util ]

Helper functions for Trainers

HasBeenWarned Objects#

class HasBeenWarned()

tqdm_ignores_underscores#

tqdm_ignores_underscores = False

move_optimizer_to_cuda#

def move_optimizer_to_cuda(optimizer)

Move the optimizer state to GPU, if necessary. After calling, any parameter specific state in the optimizer will be located on the same device as the parameter.

get_batch_size#

def get_batch_size(batch: Union[Dict, torch.Tensor]) -> int

Returns the size of the batch dimension. Assumes a well-formed batch, returns 0 otherwise.

time_to_str#

def time_to_str(timestamp: int) -> str

Convert seconds past Epoch to human readable string.

str_to_time#

def str_to_time(time_str: str) -> datetime.datetime

Convert human readable string to datetime.datetime.

read_all_datasets#

def read_all_datasets(
    train_data_path: str,
    dataset_reader: DatasetReader,
    validation_dataset_reader: DatasetReader = None,
    validation_data_path: str = None,
    test_data_path: str = None
) -> Dict[str, Dataset]

Reads all datasets (perhaps lazily, if the corresponding dataset readers are lazy) and returns a dictionary mapping dataset name ("train", "validation" or "test") to the iterable resulting from reader.read(filename).

datasets_from_params#

def datasets_from_params(
    params: Params,
    train: bool = True,
    validation: bool = True,
    test: bool = True
) -> Dict[str, Dataset]

Load datasets specified by the config.

create_serialization_dir#

def create_serialization_dir(
    params: Params,
    serialization_dir: str,
    recover: bool,
    force: bool
) -> None

This function creates the serialization directory if it doesn't exist. If it already exists and is non-empty, then it verifies that we're recovering from a training with an identical configuration.

Parameters

params : Params
A parameter object specifying an AllenNLP Experiment.
serialization_dir : str
The directory in which to save results and logs.
recover : bool
If True, we will try to recover from an existing serialization directory, and crash if the directory doesn't exist, or doesn't match the configuration we're given.
force : bool
If True, we will overwrite the serialization directory if it already exists.

enable_gradient_clipping#

def enable_gradient_clipping(
    model: Model,
    grad_clipping: Optional[float]
) -> None

rescale_gradients#

def rescale_gradients(
    model: Model,
    grad_norm: Optional[float] = None
) -> Optional[float]

Performs gradient rescaling. Is a no-op if gradient rescaling is not enabled.

get_metrics#

def get_metrics(
    model: Model,
    total_loss: float,
    total_reg_loss: float,
    num_batches: int,
    reset: bool = False,
    world_size: int = 1,
    cuda_device: Union[int, torch.device] = torch.device("cpu")
) -> Dict[str, float]

Gets the metrics but sets "loss" to the total loss divided by the num_batches so that the "loss" metric is "average loss per batch".

evaluate#

def evaluate(
    model: Model,
    data_loader: DataLoader,
    cuda_device: int = -1,
    batch_weight_key: str = None
) -> Dict[str, Any]

Parameters

model : Model
The model to evaluate
data_loader : DataLoader
The DataLoader that will iterate over the evaluation data (data loaders already contain their data).
cuda_device : int, optional (default = -1)
The cuda device to use for this evaluation. The model is assumed to already be using this device; this parameter is only used for moving the input data to the correct device.
batch_weight_key : str, optional (default = None)
If given, this is a key in the output dictionary for each batch that specifies how to weight the loss for that batch. If this is not given, we use a weight of 1 for every batch.

description_from_metrics#

def description_from_metrics(metrics: Dict[str, float]) -> str

make_vocab_from_params#

def make_vocab_from_params(
    params: Params,
    serialization_dir: str,
    print_statistics: bool = False
) -> Vocabulary

ngrams#

def ngrams(
    tensor: torch.LongTensor,
    ngram_size: int,
    exclude_indices: Set[int]
) -> Dict[Tuple[int, ...], int]

get_valid_tokens_mask#

def get_valid_tokens_mask(
    tensor: torch.LongTensor,
    exclude_indices: Set[int]
) -> torch.ByteTensor