Sample usage for dependency

Dependency Grammars

>>> from nltk.grammar import DependencyGrammar
>>> from nltk.parse import (
...     DependencyGraph,
...     ProjectiveDependencyParser,
...     NonprojectiveDependencyParser,
... )

CoNLL Data

>>> treebank_data = """Pierre  NNP     2       NMOD
... Vinken  NNP     8       SUB
... ,       ,       2       P
... 61      CD      5       NMOD
... years   NNS     6       AMOD
... old     JJ      2       NMOD
... ,       ,       2       P
... will    MD      0       ROOT
... join    VB      8       VC
... the     DT      11      NMOD
... board   NN      9       OBJ
... as      IN      9       VMOD
... a       DT      15      NMOD
... nonexecutive    JJ      15      NMOD
... director        NN      12      PMOD
... Nov.    NNP     9       VMOD
... 29      CD      16      NMOD
... .       .       9       VMOD
... """
>>> dg = DependencyGraph(treebank_data)
>>> dg.tree().pprint()
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
>>> for head, rel, dep in dg.triples():
...     print(
...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
...         .format(h=head, r=rel, d=dep)
...     )
(will, MD), SUB, (Vinken, NNP)
(Vinken, NNP), NMOD, (Pierre, NNP)
(Vinken, NNP), P, (,, ,)
(Vinken, NNP), NMOD, (old, JJ)
(old, JJ), AMOD, (years, NNS)
(years, NNS), NMOD, (61, CD)
(Vinken, NNP), P, (,, ,)
(will, MD), VC, (join, VB)
(join, VB), OBJ, (board, NN)
(board, NN), NMOD, (the, DT)
(join, VB), VMOD, (as, IN)
(as, IN), PMOD, (director, NN)
(director, NN), NMOD, (a, DT)
(director, NN), NMOD, (nonexecutive, JJ)
(join, VB), VMOD, (Nov., NNP)
(Nov., NNP), NMOD, (29, CD)
(join, VB), VMOD, (., .)

Using a custom cell extractor.

>>> def custom_extractor(cells):
...     _, tag, head, rel = cells
...     return 'spam', 'spam', tag, tag, '', head, rel
>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
>>> dg.tree().pprint()
  (spam spam spam (spam (spam spam)) spam)
  (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))

Custom cell extractors can take in and return an index.

>>> def custom_extractor(cells, index):
...     word, tag, head, rel = cells
...     return (index, '{}-{}'.format(word, index), word,
...             tag, tag, '', head, rel)
>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
>>> dg.tree().pprint()
  (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
    (board-11 the-10)
    (as-12 (director-15 a-13 nonexecutive-14))
    (Nov.-16 29-17)

Using the dependency-parsed version of the Penn Treebank corpus sample.

>>> from nltk.corpus import dependency_treebank
>>> t = dependency_treebank.parsed_sents()[0]
>>> print(t.to_conll(3))
Pierre      NNP     2
Vinken      NNP     8
,   ,       2
61  CD      5
years       NNS     6
old JJ      2
,   ,       2
will        MD      0
join        VB      8
the DT      11
board       NN      9
as  IN      9
a   DT      15
nonexecutive        JJ      15
director    NN      12
Nov.        NNP     9
29  CD      16
.   .       8

Using the output of zpar (like Malt-TAB but with zero-based indexing)

>>> zpar_data = """
... Pierre  NNP     1       NMOD
... Vinken  NNP     7       SUB
... ,       ,       1       P
... 61      CD      4       NMOD
... years   NNS     5       AMOD
... old     JJ      1       NMOD
... ,       ,       1       P
... will    MD      -1      ROOT
... join    VB      7       VC
... the     DT      10      NMOD
... board   NN      8       OBJ
... as      IN      8       VMOD
... a       DT      14      NMOD
... nonexecutive    JJ      14      NMOD
... director        NN      11      PMOD
... Nov.    NNP     8       VMOD
... 29      CD      15      NMOD
... .       .       7       P
... """
>>> zdg = DependencyGraph(zpar_data, zero_based=True)
>>> print(zdg.tree())
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29))

Projective Dependency Parsing

>>> grammar = DependencyGrammar.fromstring("""
... 'fell' -> 'price' | 'stock'
... 'price' -> 'of' 'the'
... 'of' -> 'stock'
... 'stock' -> 'the'
... """)
>>> print(grammar)
Dependency grammar with 5 productions
  'fell' -> 'price'
  'fell' -> 'stock'
  'price' -> 'of' 'the'
  'of' -> 'stock'
  'stock' -> 'the'
>>> dp = ProjectiveDependencyParser(grammar)
>>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
...     print(t)
(fell (price the (of (stock the))))
(fell (price the of) (stock the))
(fell (price the of the) stock)

Non-Projective Dependency Parsing

>>> grammar = DependencyGrammar.fromstring("""
... 'taught' -> 'play' | 'man'
... 'man' -> 'the'
... 'play' -> 'golf' | 'dog' | 'to'
... 'dog' -> 'his'
... """)
>>> print(grammar)
Dependency grammar with 7 productions
  'taught' -> 'play'
  'taught' -> 'man'
  'man' -> 'the'
  'play' -> 'golf'
  'play' -> 'dog'
  'play' -> 'to'
  'dog' -> 'his'
>>> dp = NonprojectiveDependencyParser(grammar)
>>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
>>> print(g.root['word'])
>>> for _, node in sorted(g.nodes.items()):
...     if node['word'] is not None:
...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
1 the: []
2 man: [1]
3 taught: [2, 7]
4 his: []
5 dog: [4]
6 to: []
7 play: [5, 6, 8]
8 golf: []
>>> print(g.tree())
(taught (man the) (play (dog his) to golf))

Integration with MALT parser

In case the top relation is different from the default, we can set it. In case of MALT parser, it’s set to ‘null’.

>>> dg_str = """1       I       _       NN      NN      _       2       nn      _       _
... 2   shot    _       NN      NN      _       0       null    _       _
... 3   an      _       AT      AT      _       2       dep     _       _
... 4   elephant        _       NN      NN      _       7       nn      _       _
... 5   in      _       NN      NN      _       7       nn      _       _
... 6   my      _       NN      NN      _       7       nn      _       _
... 7   pajamas _       NNS     NNS     _       3       dobj    _       _
... """
>>> dg = DependencyGraph(dg_str, top_relation_label='null')
>>> len(dg.nodes)
>>> dg.root['word'], dg.root['address']
('shot', 2)
>>> print(dg.to_conll(10))
1   I       _       NN      NN      _       2       nn      _       _
2   shot    _       NN      NN      _       0       null    _       _
3   an      _       AT      AT      _       2       dep     _       _
4   elephant        _       NN      NN      _       7       nn      _       _
5   in      _       NN      NN      _       7       nn      _       _
6   my      _       NN      NN      _       7       nn      _       _
7   pajamas _       NNS     NNS     _       3       dobj    _       _