Skip to content





FIELDS = ["id", "form", "lemma", "pos", "head", "deprel", "top", "pred", "frame"]


def parse_sentence(
    sentence_blob: str
) -> Tuple[List[Dict[str, str]], List[Tuple[int, int]], List[str]]

Parses a chunk of text in the SemEval SDP format.

Each word in the sentence is returned as a dictionary with the following format:

'id': '1',
'form': 'Pierre',
'lemma': 'Pierre',
'pos': 'NNP',
'head': '2',   # Note that this is the `syntactic` head.
'deprel': 'nn',
'top': '-',
'pred': '+',
'frame': 'named:x-c'

Along with a list of arcs and their corresponding tags. Note that in semantic dependency parsing words can have more than one head (it is not a tree), meaning that the list of arcs and tags are not tied to the length of the sentence.


def lazy_parse(text: str)


class SemanticDependenciesDatasetReader(DatasetReader):
 | def __init__(
 |     self,
 |     token_indexers: Dict[str, TokenIndexer] = None,
 |     skip_when_no_arcs: bool = True,
 |     **kwargs
 | ) -> None

Reads a file in the SemEval 2015 Task 18 (Broad-coverage Semantic Dependency Parsing) format.

Registered as a DatasetReader with name "semantic_dependencies".


  • token_indexers : Dict[str, TokenIndexer], optional (default = {"tokens": SingleIdTokenIndexer()})
    The token indexers to be applied to the words TextField.
  • skip_when_no_arcs : bool, optional (default = True)
    If this is true, skip examples containing no semantic arcs.


class SemanticDependenciesDatasetReader(DatasetReader):
 | ...
 | def text_to_instance(
 |     self,
 |     tokens: List[str],
 |     pos_tags: List[str] = None,
 |     arc_indices: List[Tuple[int, int]] = None,
 |     arc_tags: List[str] = None
 | ) -> Instance