ddp_accelerator
allennlp.nn.parallel.ddp_accelerator
StateDictType¶
StateDictType = Union[Dict[str, torch.Tensor], OrderedDict[str, torch.Tensor]]
LoadStateDictReturnType¶
class LoadStateDictReturnType(NamedTuple)
missing_keys¶
class LoadStateDictReturnType(NamedTuple):
| ...
| missing_keys: List[str] = None
unexpected_keys¶
class LoadStateDictReturnType(NamedTuple):
| ...
| unexpected_keys: List[str] = None
DdpWrappedModel¶
class DdpWrappedModel:
| def __init__(
| self,
| model: torch.nn.Module,
| local_rank: Optional[int] = None,
| world_size: Optional[int] = None
| ) -> None
The type of the wrapped model returned from DdpAccelerator.wrap_model
.
is_sharded¶
class DdpWrappedModel:
| ...
| @property
| def is_sharded(self) -> bool
consolidate_sharded_state¶
class DdpWrappedModel:
| ...
| @staticmethod
| def consolidate_sharded_state(
| sharded_state_files: Sequence[Union[str, os.PathLike]]
| ) -> StateDictType
load_state_dict¶
class DdpWrappedModel:
| ...
| def load_state_dict(
| self,
| state_dict: StateDictType,
| strict: bool = True
| ) -> LoadStateDictReturnType
state_dict¶
class DdpWrappedModel:
| ...
| def state_dict(self, *args, **kwargs) -> StateDictType
clip_grad_norm_¶
class DdpWrappedModel:
| ...
| def clip_grad_norm_(self, max_norm: Union[float, int]) -> torch.Tensor
init_grad_scaler¶
class DdpWrappedModel:
| ...
| def init_grad_scaler(self) -> amp.GradScaler
DdpAccelerator¶
class DdpAccelerator(Registrable):
| def __init__(
| self,
| local_rank: Optional[int] = None,
| world_size: Optional[int] = None,
| cuda_device: Union[torch.device, int] = -1
| ) -> None
A DdpAccelerator
is a generalization of PyTorch's DistributedDataParallel
class.
This is primarly used within the GradientDescentTrainer
to allow
for different DDP implementations, such as FairScale's FullyShardedDataParallel
.
In a typical AllenNLP configuration file, local_rank
, world_size
, and cuda_device
should not be specified.
Warning
This API is experimental and may change in the future.
default_implementation¶
class DdpAccelerator(Registrable):
| ...
| default_implementation = "torch"
wrap_model¶
class DdpAccelerator(Registrable):
| ...
| def wrap_model(
| self,
| model: "Model"
| ) -> Tuple["Model", DdpWrappedModel]
Wrap the AllenNLP Model
, returning the original model (possibly on a different device)
and the wrapper model.
wrap_module¶
class DdpAccelerator(Registrable):
| ...
| def wrap_module(self, module: torch.nn.Module) -> torch.nn.Module
Wrap an individual module. By default this just returns the module,
but some subclass implementations such as
FairScaleFsdpAccelerator
do more.
TorchDdpAccelerator¶
@DdpAccelerator.register("torch")
class TorchDdpAccelerator(DdpAccelerator):
| def __init__(
| self,
| *, find_unused_parameters: bool = False,
| *, local_rank: Optional[int] = None,
| *, world_size: Optional[int] = None,
| *, cuda_device: Union[torch.device, int] = -1
| ) -> None
The default implementation of DdpAccelerator
, which is just a thin wrapper
around PyTorch's DistributedDataParallel
.
wrap_model¶
class TorchDdpAccelerator(DdpAccelerator):
| ...
| def wrap_model(
| self,
| model: "Model"
| ) -> Tuple["Model", DdpWrappedModel]