semantic_dependencies
allennlp_models.structured_prediction.dataset_readers.semantic_dependencies
FIELDS#
FIELDS = ["id", "form", "lemma", "pos", "head", "deprel", "top", "pred", "frame"]
parse_sentence#
def parse_sentence(
sentence_blob: str
) -> Tuple[List[Dict[str, str]], List[Tuple[int, int]], List[str]]
Parses a chunk of text in the SemEval SDP format.
Each word in the sentence is returned as a dictionary with the following format:
'id': '1',
'form': 'Pierre',
'lemma': 'Pierre',
'pos': 'NNP',
'head': '2', # Note that this is the `syntactic` head.
'deprel': 'nn',
'top': '-',
'pred': '+',
'frame': 'named:x-c'
Along with a list of arcs and their corresponding tags. Note that in semantic dependency parsing words can have more than one head (it is not a tree), meaning that the list of arcs and tags are not tied to the length of the sentence.
lazy_parse#
def lazy_parse(text: str)
SemanticDependenciesDatasetReader#
@DatasetReader.register("semantic_dependencies")
class SemanticDependenciesDatasetReader(DatasetReader):
| def __init__(
| self,
| token_indexers: Dict[str, TokenIndexer] = None,
| skip_when_no_arcs: bool = True,
| **kwargs
| ) -> None
Reads a file in the SemEval 2015 Task 18 (Broad-coverage Semantic Dependency Parsing) format.
Registered as a DatasetReader
with name "semantic_dependencies".
Parameters¶
- token_indexers :
Dict[str, TokenIndexer]
, optional (default ={"tokens": SingleIdTokenIndexer()}
)
The token indexers to be applied to the words TextField. - skip_when_no_arcs :
bool
, optional (default =True
)
If this is true, skip examples containing no semantic arcs.
text_to_instance#
class SemanticDependenciesDatasetReader(DatasetReader):
| ...
| def text_to_instance(
| self,
| tokens: List[str],
| pos_tags: List[str] = None,
| arc_indices: List[Tuple[int, int]] = None,
| arc_tags: List[str] = None
| ) -> Instance