data_collator
allennlp.data.data_loaders.data_collator
allennlp_collate¶
def allennlp_collate(instances: List[Instance]) -> TensorDict
This is the default function used to turn a list of Instance
s into a TensorDict
batch.
DataCollator¶
class DataCollator(Registrable)
This class is similar with DataCollator
in Transformers
Allow to do some dynamic operations for tensor in different batches
Cause this method run before each epoch to convert List[Instance]
to TensorDict
default_implementation¶
class DataCollator(Registrable):
| ...
| default_implementation = "allennlp"
__call__¶
class DataCollator(Registrable):
| ...
| def __call__(self, instances: List[Instance]) -> TensorDict
DefaultDataCollator¶
@DataCollator.register("allennlp")
class DefaultDataCollator(DataCollator)
__call__¶
class DefaultDataCollator(DataCollator):
| ...
| def __call__(self, instances: List[Instance]) -> TensorDict
LanguageModelingDataCollator¶
@DataCollator.register("language_model")
class LanguageModelingDataCollator(DataCollator):
| def __init__(
| self,
| model_name: str,
| mlm: bool = True,
| mlm_probability: float = 0.15,
| filed_name: str = "source",
| namespace: str = "tokens"
| )
Register as an DataCollator
with name LanguageModelingDataCollator
Used for language modeling.
__call__¶
class LanguageModelingDataCollator(DataCollator):
| ...
| def __call__(self, instances: List[Instance]) -> TensorDict
process_tokens¶
class LanguageModelingDataCollator(DataCollator):
| ...
| def process_tokens(self, tensor_dicts: TensorDict) -> TensorDict