dataloader
allennlp.tango.dataloader
AllenNLP Tango is an experimental API and parts of it might change or disappear every time we release a new version.
TangoDataLoader¶
class TangoDataLoader(Registrable)
A Tango data loader in AllenNLP is anything that produces an iterator of batches.
You would usually initialize a data loader with a Sequence[Instance]
to do this, but
some Tango data loaders can be initialized in other ways and still produce batches.
default_implementation¶
class TangoDataLoader(Registrable):
| ...
| default_implementation = "batch_size"
num_batches_per_epoch¶
class TangoDataLoader(Registrable):
| ...
| def num_batches_per_epoch(self) -> Optional[int]
If the dataloader produces epochs of equal length, this is how you get the length.
__iter__¶
class TangoDataLoader(Registrable):
| ...
| def __iter__(self) -> Iterator[TensorDict]
Override this function in your own data loader to make batches.
DataLoaderAdapter¶
class DataLoaderAdapter(DataLoader):
| def __init__(self, *, tango_data_loader: TangoDataLoader)
Adapts a TangoDataLoader to an old-school AllenNLP DataLoader.
__iter__¶
class DataLoaderAdapter(DataLoader):
| ...
| def __iter__(self) -> Iterator[TensorDict]
iter_instances¶
class DataLoaderAdapter(DataLoader):
| ...
| def iter_instances(self) -> Iterator[Instance]
index_with¶
class DataLoaderAdapter(DataLoader):
| ...
| def index_with(self, vocab: Vocabulary) -> None
set_target_device¶
class DataLoaderAdapter(DataLoader):
| ...
| def set_target_device(self, device: torch.device) -> None
BatchSizeDataLoader¶
@TangoDataLoader.register("batch_size")
class BatchSizeDataLoader(TangoDataLoader):
| def __init__(
| self,
| instances: Sequence[Instance],
| *,
| batch_size: int,
| drop_last: bool = False,
| shuffle: bool = True
| )
A data loader that turns instances into batches with a constant number of instances per batch.
instances
contains the instances we want to make batches out of.batch_size
is the number of instances per batchdrop_last
specifies whether to keep the last batch in case it is smaller than `batch_sizeshuffle
specifies whether to shuffle the instances before making batches
num_batches_per_epoch¶
class BatchSizeDataLoader(TangoDataLoader):
| ...
| def num_batches_per_epoch(self) -> Optional[int]
__iter__¶
class BatchSizeDataLoader(TangoDataLoader):
| ...
| def __iter__(self) -> Iterator[TensorDict]
SamplerDataLoader¶
@TangoDataLoader.register("sampler")
class SamplerDataLoader(TangoDataLoader):
| def __init__(
| self,
| instances: Sequence[Instance],
| *,
| batch_sampler: BatchSampler
| )
This dataloader uses a BatchSampler
to make batches out of the instances given in instances
.
num_batches_per_epoch¶
class SamplerDataLoader(TangoDataLoader):
| ...
| def num_batches_per_epoch(self) -> Optional[int]
__iter__¶
class SamplerDataLoader(TangoDataLoader):
| ...
| def __iter__(self) -> Iterator[TensorDict]
BatchesPerEpochDataLoader¶
@TangoDataLoader.register("batches_per_epoch")
class BatchesPerEpochDataLoader(TangoDataLoader):
| def __init__(
| self,
| *,
| inner: TangoDataLoader,
| batches_per_epoch: int
| )
This dataloader wraps another data loader, but changes the length of the epoch. It ends
one epoch and starts another every batches_per_epoch
batches.
num_batches_per_epoch¶
class BatchesPerEpochDataLoader(TangoDataLoader):
| ...
| def num_batches_per_epoch(self) -> Optional[int]
__iter__¶
class BatchesPerEpochDataLoader(TangoDataLoader):
| ...
| def __iter__(self) -> Iterator[TensorDict]
MaxBatchesDataLoader¶
@TangoDataLoader.register("max_batches")
class MaxBatchesDataLoader(TangoDataLoader):
| def __init__(
| self,
| inner: TangoDataLoader,
| max_batches_per_epoch: int
| )
This dataloader wraps another data loader, but only returns the first
max_batches_per_epoch
batches for every epoch. This is useful for debugging.
num_batches_per_epoch¶
class MaxBatchesDataLoader(TangoDataLoader):
| ...
| def num_batches_per_epoch(self) -> Optional[int]
__iter__¶
class MaxBatchesDataLoader(TangoDataLoader):
| ...
| def __iter__(self) -> Iterator[TensorDict]