nltk.parse.stanford module

class nltk.parse.stanford.GenericStanfordParser[source]

Bases: nltk.parse.api.ParserI

Interface to the Stanford Parser

__init__(path_to_jar=None, path_to_models_jar=None, model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx4g', corenlp_options='')[source]
parse_sents(sentences, verbose=False)[source]

Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list where each sentence is a list of words. Each sentence will be automatically tagged with this StanfordParser instance’s tagger. If whitespaces exists inside a token, then the token will be treated as separate tokens.

Parameters

sentences (list(list(str))) – Input sentences to parse

Return type

iter(iter(Tree))

raw_parse(sentence, verbose=False)[source]

Use StanfordParser to parse a sentence. Takes a sentence as a string; before parsing, it will be automatically tokenized and tagged by the Stanford Parser.

Parameters

sentence (str) – Input sentence to parse

Return type

iter(Tree)

raw_parse_sents(sentences, verbose=False)[source]

Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list of strings. Each sentence will be automatically tokenized and tagged by the Stanford Parser.

Parameters

sentences (list(str)) – Input sentences to parse

Return type

iter(iter(Tree))

tagged_parse(sentence, verbose=False)[source]

Use StanfordParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged.

Parameters

sentence (list(tuple(str, str))) – Input sentence to parse

Return type

iter(Tree)

tagged_parse_sents(sentences, verbose=False)[source]

Use StanfordParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged.

Parameters

sentences (list(list(tuple(str, str)))) – Input sentences to parse

Return type

iter(iter(Tree))

class nltk.parse.stanford.StanfordParser[source]

Bases: nltk.parse.stanford.GenericStanfordParser

>>> parser=StanfordParser(
...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... )
>>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) 
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
>>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
...     "the quick brown fox jumps over the lazy dog",
...     "the quick grey wolf jumps over the lazy fox"
... ))], []) 
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
[Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
[Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
>>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) 
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
[Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]
>>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
...     (
...         ("The", "DT"),
...         ("quick", "JJ"),
...         ("brown", "JJ"),
...         ("fox", "NN"),
...         ("jumped", "VBD"),
...         ("over", "IN"),
...         ("the", "DT"),
...         ("lazy", "JJ"),
...         ("dog", "NN"),
...         (".", "."),
...     ),
... ))],[]) 
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
[Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
__init__(*args, **kwargs)[source]
class nltk.parse.stanford.StanfordDependencyParser[source]

Bases: nltk.parse.stanford.GenericStanfordParser

>>> dep_parser=StanfordDependencyParser(
...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... )
>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] 
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]
>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] 
[[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]
>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
...     "The quick brown fox jumps over the lazy dog.",
...     "The quick grey wolf jumps over the lazy fox."
... ))], []) 
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]
>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) 
[Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]
>>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
...     (
...         ("The", "DT"),
...         ("quick", "JJ"),
...         ("brown", "JJ"),
...         ("fox", "NN"),
...         ("jumped", "VBD"),
...         ("over", "IN"),
...         ("the", "DT"),
...         ("lazy", "JJ"),
...         ("dog", "NN"),
...         (".", "."),
...     ),
... ))],[]) 
[[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]
__init__(*args, **kwargs)[source]
class nltk.parse.stanford.StanfordNeuralDependencyParser[source]

Bases: nltk.parse.stanford.GenericStanfordParser

>>> from nltk.parse.stanford import StanfordNeuralDependencyParser
>>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')
>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] 
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]
>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] 
[[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
(u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
(u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
u'punct', (u'.', u'.'))]]
>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
...     "The quick brown fox jumps over the lazy dog.",
...     "The quick grey wolf jumps over the lazy fox."
... ))], []) 
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
Tree('fox', ['over', 'the', 'lazy']), '.'])]
>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) 
[Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
__init__(*args, **kwargs)[source]
tagged_parse_sents(sentences, verbose=False)[source]

Currently unimplemented because the neural dependency parser (and the StanfordCoreNLP pipeline class) doesn’t support passing in pre- tagged tokens.