ddp_accelerator

allennlp.nn.parallel.ddp_accelerator

StateDictType¶

StateDictType = Union[Dict[str, torch.Tensor], OrderedDict[str, torch.Tensor]]

LoadStateDictReturnType¶

class LoadStateDictReturnType(NamedTuple)

missing_keys¶

class LoadStateDictReturnType(NamedTuple):
 | ...
 | missing_keys: List[str] = None

unexpected_keys¶

class LoadStateDictReturnType(NamedTuple):
 | ...
 | unexpected_keys: List[str] = None

DdpWrappedModel¶

class DdpWrappedModel:
 | def __init__(
 |     self,
 |     model: torch.nn.Module,
 |     local_rank: Optional[int] = None,
 |     world_size: Optional[int] = None
 | ) -> None

The type of the wrapped model returned from DdpAccelerator.wrap_model.

is_sharded¶

class DdpWrappedModel:
 | ...
 | @property
 | def is_sharded(self) -> bool

consolidate_sharded_state¶

class DdpWrappedModel:
 | ...
 | @staticmethod
 | def consolidate_sharded_state(
 |     sharded_state_files: Sequence[Union[str, os.PathLike]]
 | ) -> StateDictType

load_state_dict¶

class DdpWrappedModel:
 | ...
 | def load_state_dict(
 |     self,
 |     state_dict: StateDictType,
 |     strict: bool = True
 | ) -> LoadStateDictReturnType

state_dict¶

class DdpWrappedModel:
 | ...
 | def state_dict(self, *args, **kwargs) -> StateDictType

clip_grad_norm_¶

class DdpWrappedModel:
 | ...
 | def clip_grad_norm_(self, max_norm: Union[float, int]) -> torch.Tensor

init_grad_scaler¶

class DdpWrappedModel:
 | ...
 | def init_grad_scaler(self) -> amp.GradScaler

DdpAccelerator¶

class DdpAccelerator(Registrable):
 | def __init__(
 |     self,
 |     local_rank: Optional[int] = None,
 |     world_size: Optional[int] = None,
 |     cuda_device: Union[torch.device, int] = -1
 | ) -> None

A DdpAccelerator is a generalization of PyTorch's DistributedDataParallel class.

This is primarly used within the GradientDescentTrainer to allow for different DDP implementations, such as FairScale's FullyShardedDataParallel.

In a typical AllenNLP configuration file, local_rank, world_size, and cuda_device should not be specified.

Warning

This API is experimental and may change in the future.

default_implementation¶

class DdpAccelerator(Registrable):
 | ...
 | default_implementation = "torch"

wrap_model¶

class DdpAccelerator(Registrable):
 | ...
 | def wrap_model(
 |     self,
 |     model: "Model"
 | ) -> Tuple["Model", DdpWrappedModel]

Wrap the AllenNLP Model, returning the original model (possibly on a different device) and the wrapper model.

wrap_module¶

class DdpAccelerator(Registrable):
 | ...
 | def wrap_module(self, module: torch.nn.Module) -> torch.nn.Module

Wrap an individual module. By default this just returns the module, but some subclass implementations such as FairScaleFsdpAccelerator do more.

TorchDdpAccelerator¶

@DdpAccelerator.register("torch")
class TorchDdpAccelerator(DdpAccelerator):
 | def __init__(
 |     self,
 |     *, find_unused_parameters: bool = False,
 |     *, local_rank: Optional[int] = None,
 |     *, world_size: Optional[int] = None,
 |     *, cuda_device: Union[torch.device, int] = -1
 | ) -> None

The default implementation of DdpAccelerator, which is just a thin wrapper around PyTorch's DistributedDataParallel.

wrap_model¶

class TorchDdpAccelerator(DdpAccelerator):
 | ...
 | def wrap_model(
 |     self,
 |     model: "Model"
 | ) -> Tuple["Model", DdpWrappedModel]