Skip to content

dataloader

allennlp.tango.dataloader

[SOURCE]


AllenNLP Tango is an experimental API and parts of it might change or disappear every time we release a new version.

TangoDataLoader

class TangoDataLoader(Registrable)

A Tango data loader in AllenNLP is anything that produces an iterator of batches. You would usually initialize a data loader with a Sequence[Instance] to do this, but some Tango data loaders can be initialized in other ways and still produce batches.

default_implementation

class TangoDataLoader(Registrable):
 | ...
 | default_implementation = "batch_size"

num_batches_per_epoch

class TangoDataLoader(Registrable):
 | ...
 | def num_batches_per_epoch(self) -> Optional[int]

If the dataloader produces epochs of equal length, this is how you get the length.

__iter__

class TangoDataLoader(Registrable):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]

Override this function in your own data loader to make batches.

DataLoaderAdapter

class DataLoaderAdapter(DataLoader):
 | def __init__(self, *, tango_data_loader: TangoDataLoader)

Adapts a TangoDataLoader to an old-school AllenNLP DataLoader.

__iter__

class DataLoaderAdapter(DataLoader):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]

iter_instances

class DataLoaderAdapter(DataLoader):
 | ...
 | def iter_instances(self) -> Iterator[Instance]

index_with

class DataLoaderAdapter(DataLoader):
 | ...
 | def index_with(self, vocab: Vocabulary) -> None

set_target_device

class DataLoaderAdapter(DataLoader):
 | ...
 | def set_target_device(self, device: torch.device) -> None

BatchSizeDataLoader

@TangoDataLoader.register("batch_size")
class BatchSizeDataLoader(TangoDataLoader):
 | def __init__(
 |     self,
 |     instances: Sequence[Instance],
 |     *,
 |     batch_size: int,
 |     drop_last: bool = False,
 |     shuffle: bool = True
 | )

A data loader that turns instances into batches with a constant number of instances per batch.

  • instances contains the instances we want to make batches out of.
  • batch_size is the number of instances per batch
  • drop_last specifies whether to keep the last batch in case it is smaller than `batch_size
  • shuffle specifies whether to shuffle the instances before making batches

num_batches_per_epoch

class BatchSizeDataLoader(TangoDataLoader):
 | ...
 | def num_batches_per_epoch(self) -> Optional[int]

__iter__

class BatchSizeDataLoader(TangoDataLoader):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]

SamplerDataLoader

@TangoDataLoader.register("sampler")
class SamplerDataLoader(TangoDataLoader):
 | def __init__(
 |     self,
 |     instances: Sequence[Instance],
 |     *,
 |     batch_sampler: BatchSampler
 | )

This dataloader uses a BatchSampler to make batches out of the instances given in instances.

num_batches_per_epoch

class SamplerDataLoader(TangoDataLoader):
 | ...
 | def num_batches_per_epoch(self) -> Optional[int]

__iter__

class SamplerDataLoader(TangoDataLoader):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]

BatchesPerEpochDataLoader

@TangoDataLoader.register("batches_per_epoch")
class BatchesPerEpochDataLoader(TangoDataLoader):
 | def __init__(
 |     self,
 |     *,
 |     inner: TangoDataLoader,
 |     batches_per_epoch: int
 | )

This dataloader wraps another data loader, but changes the length of the epoch. It ends one epoch and starts another every batches_per_epoch batches.

num_batches_per_epoch

class BatchesPerEpochDataLoader(TangoDataLoader):
 | ...
 | def num_batches_per_epoch(self) -> Optional[int]

__iter__

class BatchesPerEpochDataLoader(TangoDataLoader):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]

MaxBatchesDataLoader

@TangoDataLoader.register("max_batches")
class MaxBatchesDataLoader(TangoDataLoader):
 | def __init__(
 |     self,
 |     inner: TangoDataLoader,
 |     max_batches_per_epoch: int
 | )

This dataloader wraps another data loader, but only returns the first max_batches_per_epoch batches for every epoch. This is useful for debugging.

num_batches_per_epoch

class MaxBatchesDataLoader(TangoDataLoader):
 | ...
 | def num_batches_per_epoch(self) -> Optional[int]

__iter__

class MaxBatchesDataLoader(TangoDataLoader):
 | ...
 | def __iter__(self) -> Iterator[TensorDict]