Source code for nltk.test.unit.test_corenlp

# -*- coding: utf-8 -*-

"""
Mock test for Stanford CoreNLP wrappers.
"""

import sys
from itertools import chain
from unittest import TestCase, SkipTest

try:
    from unittest.mock import MagicMock
except ImportError:
    raise SkipTest('unittest.mock no supported in Python2')
from nltk.tree import Tree
from nltk.parse import corenlp


[docs]class TestTokenizerAPI(TestCase):
[docs] def test_tokenize(self): corenlp_tokenizer = corenlp.CoreNLPParser() api_return_value = { u'sentences': [ { u'index': 0, u'tokens': [ { u'after': u' ', u'before': u'', u'characterOffsetBegin': 0, u'characterOffsetEnd': 4, u'index': 1, u'originalText': u'Good', u'word': u'Good', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 5, u'characterOffsetEnd': 12, u'index': 2, u'originalText': u'muffins', u'word': u'muffins', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 13, u'characterOffsetEnd': 17, u'index': 3, u'originalText': u'cost', u'word': u'cost', }, { u'after': u'', u'before': u' ', u'characterOffsetBegin': 18, u'characterOffsetEnd': 19, u'index': 4, u'originalText': u'$', u'word': u'$', }, { u'after': u'\n', u'before': u'', u'characterOffsetBegin': 19, u'characterOffsetEnd': 23, u'index': 5, u'originalText': u'3.88', u'word': u'3.88', }, { u'after': u' ', u'before': u'\n', u'characterOffsetBegin': 24, u'characterOffsetEnd': 26, u'index': 6, u'originalText': u'in', u'word': u'in', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 27, u'characterOffsetEnd': 30, u'index': 7, u'originalText': u'New', u'word': u'New', }, { u'after': u'', u'before': u' ', u'characterOffsetBegin': 31, u'characterOffsetEnd': 35, u'index': 8, u'originalText': u'York', u'word': u'York', }, { u'after': u' ', u'before': u'', u'characterOffsetBegin': 35, u'characterOffsetEnd': 36, u'index': 9, u'originalText': u'.', u'word': u'.', }, ], }, { u'index': 1, u'tokens': [ { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 38, u'characterOffsetEnd': 44, u'index': 1, u'originalText': u'Please', u'word': u'Please', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 45, u'characterOffsetEnd': 48, u'index': 2, u'originalText': u'buy', u'word': u'buy', }, { u'after': u'\n', u'before': u' ', u'characterOffsetBegin': 49, u'characterOffsetEnd': 51, u'index': 3, u'originalText': u'me', u'word': u'me', }, { u'after': u' ', u'before': u'\n', u'characterOffsetBegin': 52, u'characterOffsetEnd': 55, u'index': 4, u'originalText': u'two', u'word': u'two', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 56, u'characterOffsetEnd': 58, u'index': 5, u'originalText': u'of', u'word': u'of', }, { u'after': u'', u'before': u' ', u'characterOffsetBegin': 59, u'characterOffsetEnd': 63, u'index': 6, u'originalText': u'them', u'word': u'them', }, { u'after': u'\n', u'before': u'', u'characterOffsetBegin': 63, u'characterOffsetEnd': 64, u'index': 7, u'originalText': u'.', u'word': u'.', }, ], }, { u'index': 2, u'tokens': [ { u'after': u'', u'before': u'\n', u'characterOffsetBegin': 65, u'characterOffsetEnd': 71, u'index': 1, u'originalText': u'Thanks', u'word': u'Thanks', }, { u'after': u'', u'before': u'', u'characterOffsetBegin': 71, u'characterOffsetEnd': 72, u'index': 2, u'originalText': u'.', u'word': u'.', }, ], }, ] } corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value) input_string = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks." expected_output = [ u'Good', u'muffins', u'cost', u'$', u'3.88', u'in', u'New', u'York', u'.', u'Please', u'buy', u'me', u'two', u'of', u'them', u'.', u'Thanks', u'.', ] tokenized_output = list(corenlp_tokenizer.tokenize(input_string)) corenlp_tokenizer.api_call.assert_called_once_with( 'Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks.', properties={'annotators': 'tokenize,ssplit'}, ) self.assertEqual(expected_output, tokenized_output)
[docs]class TestTaggerAPI(TestCase):
[docs] def test_pos_tagger(self): corenlp_tagger = corenlp.CoreNLPParser(tagtype='pos') api_return_value = { u'sentences': [ { u'basicDependencies': [ { u'dep': u'ROOT', u'dependent': 1, u'dependentGloss': u'What', u'governor': 0, u'governorGloss': u'ROOT', }, { u'dep': u'cop', u'dependent': 2, u'dependentGloss': u'is', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'det', u'dependent': 3, u'dependentGloss': u'the', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'nsubj', u'dependent': 4, u'dependentGloss': u'airspeed', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'case', u'dependent': 5, u'dependentGloss': u'of', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'det', u'dependent': 6, u'dependentGloss': u'an', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'compound', u'dependent': 7, u'dependentGloss': u'unladen', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'nmod', u'dependent': 8, u'dependentGloss': u'swallow', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'punct', u'dependent': 9, u'dependentGloss': u'?', u'governor': 1, u'governorGloss': u'What', }, ], u'enhancedDependencies': [ { u'dep': u'ROOT', u'dependent': 1, u'dependentGloss': u'What', u'governor': 0, u'governorGloss': u'ROOT', }, { u'dep': u'cop', u'dependent': 2, u'dependentGloss': u'is', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'det', u'dependent': 3, u'dependentGloss': u'the', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'nsubj', u'dependent': 4, u'dependentGloss': u'airspeed', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'case', u'dependent': 5, u'dependentGloss': u'of', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'det', u'dependent': 6, u'dependentGloss': u'an', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'compound', u'dependent': 7, u'dependentGloss': u'unladen', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'nmod:of', u'dependent': 8, u'dependentGloss': u'swallow', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'punct', u'dependent': 9, u'dependentGloss': u'?', u'governor': 1, u'governorGloss': u'What', }, ], u'enhancedPlusPlusDependencies': [ { u'dep': u'ROOT', u'dependent': 1, u'dependentGloss': u'What', u'governor': 0, u'governorGloss': u'ROOT', }, { u'dep': u'cop', u'dependent': 2, u'dependentGloss': u'is', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'det', u'dependent': 3, u'dependentGloss': u'the', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'nsubj', u'dependent': 4, u'dependentGloss': u'airspeed', u'governor': 1, u'governorGloss': u'What', }, { u'dep': u'case', u'dependent': 5, u'dependentGloss': u'of', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'det', u'dependent': 6, u'dependentGloss': u'an', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'compound', u'dependent': 7, u'dependentGloss': u'unladen', u'governor': 8, u'governorGloss': u'swallow', }, { u'dep': u'nmod:of', u'dependent': 8, u'dependentGloss': u'swallow', u'governor': 4, u'governorGloss': u'airspeed', }, { u'dep': u'punct', u'dependent': 9, u'dependentGloss': u'?', u'governor': 1, u'governorGloss': u'What', }, ], u'index': 0, u'parse': u'(ROOT\n (SBARQ\n (WHNP (WP What))\n (SQ (VBZ is)\n (NP\n (NP (DT the) (NN airspeed))\n (PP (IN of)\n (NP (DT an) (NN unladen) (NN swallow)))))\n (. ?)))', u'tokens': [ { u'after': u' ', u'before': u'', u'characterOffsetBegin': 0, u'characterOffsetEnd': 4, u'index': 1, u'lemma': u'what', u'originalText': u'What', u'pos': u'WP', u'word': u'What', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 5, u'characterOffsetEnd': 7, u'index': 2, u'lemma': u'be', u'originalText': u'is', u'pos': u'VBZ', u'word': u'is', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 8, u'characterOffsetEnd': 11, u'index': 3, u'lemma': u'the', u'originalText': u'the', u'pos': u'DT', u'word': u'the', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 12, u'characterOffsetEnd': 20, u'index': 4, u'lemma': u'airspeed', u'originalText': u'airspeed', u'pos': u'NN', u'word': u'airspeed', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 21, u'characterOffsetEnd': 23, u'index': 5, u'lemma': u'of', u'originalText': u'of', u'pos': u'IN', u'word': u'of', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 24, u'characterOffsetEnd': 26, u'index': 6, u'lemma': u'a', u'originalText': u'an', u'pos': u'DT', u'word': u'an', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 27, u'characterOffsetEnd': 34, u'index': 7, u'lemma': u'unladen', u'originalText': u'unladen', u'pos': u'JJ', u'word': u'unladen', }, { u'after': u' ', u'before': u' ', u'characterOffsetBegin': 35, u'characterOffsetEnd': 42, u'index': 8, u'lemma': u'swallow', u'originalText': u'swallow', u'pos': u'VB', u'word': u'swallow', }, { u'after': u'', u'before': u' ', u'characterOffsetBegin': 43, u'characterOffsetEnd': 44, u'index': 9, u'lemma': u'?', u'originalText': u'?', u'pos': u'.', u'word': u'?', }, ], } ] } corenlp_tagger.api_call = MagicMock(return_value=api_return_value) input_tokens = 'What is the airspeed of an unladen swallow ?'.split() expected_output = [ ('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.'), ] tagged_output = corenlp_tagger.tag(input_tokens) corenlp_tagger.api_call.assert_called_once_with( 'What is the airspeed of an unladen swallow ?', properties={ 'ssplit.isOneSentence': 'true', 'annotators': 'tokenize,ssplit,pos', }, ) self.assertEqual(expected_output, tagged_output)
[docs] def test_ner_tagger(self): corenlp_tagger = corenlp.CoreNLPParser(tagtype='ner') api_return_value = { 'sentences': [ { 'index': 0, 'tokens': [ { 'after': ' ', 'before': '', 'characterOffsetBegin': 0, 'characterOffsetEnd': 4, 'index': 1, 'lemma': 'Rami', 'ner': 'PERSON', 'originalText': 'Rami', 'pos': 'NNP', 'word': 'Rami', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 5, 'characterOffsetEnd': 8, 'index': 2, 'lemma': 'Eid', 'ner': 'PERSON', 'originalText': 'Eid', 'pos': 'NNP', 'word': 'Eid', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 9, 'characterOffsetEnd': 11, 'index': 3, 'lemma': 'be', 'ner': 'O', 'originalText': 'is', 'pos': 'VBZ', 'word': 'is', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 12, 'characterOffsetEnd': 20, 'index': 4, 'lemma': 'study', 'ner': 'O', 'originalText': 'studying', 'pos': 'VBG', 'word': 'studying', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 21, 'characterOffsetEnd': 23, 'index': 5, 'lemma': 'at', 'ner': 'O', 'originalText': 'at', 'pos': 'IN', 'word': 'at', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 24, 'characterOffsetEnd': 29, 'index': 6, 'lemma': 'Stony', 'ner': 'ORGANIZATION', 'originalText': 'Stony', 'pos': 'NNP', 'word': 'Stony', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 30, 'characterOffsetEnd': 35, 'index': 7, 'lemma': 'Brook', 'ner': 'ORGANIZATION', 'originalText': 'Brook', 'pos': 'NNP', 'word': 'Brook', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 36, 'characterOffsetEnd': 46, 'index': 8, 'lemma': 'University', 'ner': 'ORGANIZATION', 'originalText': 'University', 'pos': 'NNP', 'word': 'University', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 47, 'characterOffsetEnd': 49, 'index': 9, 'lemma': 'in', 'ner': 'O', 'originalText': 'in', 'pos': 'IN', 'word': 'in', }, { 'after': '', 'before': ' ', 'characterOffsetBegin': 50, 'characterOffsetEnd': 52, 'index': 10, 'lemma': 'NY', 'ner': 'O', 'originalText': 'NY', 'pos': 'NNP', 'word': 'NY', }, ], } ] } corenlp_tagger.api_call = MagicMock(return_value=api_return_value) input_tokens = 'Rami Eid is studying at Stony Brook University in NY'.split() expected_output = [ ('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O'), ] tagged_output = corenlp_tagger.tag(input_tokens) corenlp_tagger.api_call.assert_called_once_with( 'Rami Eid is studying at Stony Brook University in NY', properties={ 'ssplit.isOneSentence': 'true', 'annotators': 'tokenize,ssplit,ner', }, ) self.assertEqual(expected_output, tagged_output)
[docs] def test_unexpected_tagtype(self): with self.assertRaises(ValueError): corenlp_tagger = corenlp.CoreNLPParser(tagtype='test')
[docs]class TestParserAPI(TestCase):
[docs] def test_parse(self): corenlp_parser = corenlp.CoreNLPParser() api_return_value = { 'sentences': [ { 'basicDependencies': [ { 'dep': 'ROOT', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'dep', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'enhancedDependencies': [ { 'dep': 'ROOT', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'dep', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod:over', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'enhancedPlusPlusDependencies': [ { 'dep': 'ROOT', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'dep', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod:over', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'index': 0, 'parse': '(ROOT\n (NP\n (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n (NP\n (NP (NNS jumps))\n (PP (IN over)\n (NP (DT the) (JJ lazy) (NN dog))))))', 'tokens': [ { 'after': ' ', 'before': '', 'characterOffsetBegin': 0, 'characterOffsetEnd': 3, 'index': 1, 'lemma': 'the', 'originalText': 'The', 'pos': 'DT', 'word': 'The', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 4, 'characterOffsetEnd': 9, 'index': 2, 'lemma': 'quick', 'originalText': 'quick', 'pos': 'JJ', 'word': 'quick', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 10, 'characterOffsetEnd': 15, 'index': 3, 'lemma': 'brown', 'originalText': 'brown', 'pos': 'JJ', 'word': 'brown', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 16, 'characterOffsetEnd': 19, 'index': 4, 'lemma': 'fox', 'originalText': 'fox', 'pos': 'NN', 'word': 'fox', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 20, 'characterOffsetEnd': 25, 'index': 5, 'lemma': 'jump', 'originalText': 'jumps', 'pos': 'VBZ', 'word': 'jumps', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 26, 'characterOffsetEnd': 30, 'index': 6, 'lemma': 'over', 'originalText': 'over', 'pos': 'IN', 'word': 'over', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 31, 'characterOffsetEnd': 34, 'index': 7, 'lemma': 'the', 'originalText': 'the', 'pos': 'DT', 'word': 'the', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 35, 'characterOffsetEnd': 39, 'index': 8, 'lemma': 'lazy', 'originalText': 'lazy', 'pos': 'JJ', 'word': 'lazy', }, { 'after': '', 'before': ' ', 'characterOffsetBegin': 40, 'characterOffsetEnd': 43, 'index': 9, 'lemma': 'dog', 'originalText': 'dog', 'pos': 'NN', 'word': 'dog', }, ], } ] } corenlp_parser.api_call = MagicMock(return_value=api_return_value) input_string = "The quick brown fox jumps over the lazy dog".split() expected_output = Tree( 'ROOT', [ Tree( 'NP', [ Tree( 'NP', [ Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox']), ], ), Tree( 'NP', [ Tree('NP', [Tree('NNS', ['jumps'])]), Tree( 'PP', [ Tree('IN', ['over']), Tree( 'NP', [ Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog']), ], ), ], ), ], ), ], ) ], ) parsed_data = next(corenlp_parser.parse(input_string)) corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", properties={'ssplit.ssplit.eolonly': 'true'}, ) self.assertEqual(expected_output, parsed_data)
[docs] def test_dependency_parser(self): corenlp_parser = corenlp.CoreNLPDependencyParser() api_return_value = { 'sentences': [ { 'basicDependencies': [ { 'dep': 'ROOT', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'nsubj', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 5, 'governorGloss': 'jumps', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'enhancedDependencies': [ { 'dep': 'ROOT', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'nsubj', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 5, 'governorGloss': 'jumps', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod:over', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'enhancedPlusPlusDependencies': [ { 'dep': 'ROOT', 'dependent': 5, 'dependentGloss': 'jumps', 'governor': 0, 'governorGloss': 'ROOT', }, { 'dep': 'det', 'dependent': 1, 'dependentGloss': 'The', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 2, 'dependentGloss': 'quick', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'amod', 'dependent': 3, 'dependentGloss': 'brown', 'governor': 4, 'governorGloss': 'fox', }, { 'dep': 'nsubj', 'dependent': 4, 'dependentGloss': 'fox', 'governor': 5, 'governorGloss': 'jumps', }, { 'dep': 'case', 'dependent': 6, 'dependentGloss': 'over', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'det', 'dependent': 7, 'dependentGloss': 'the', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'amod', 'dependent': 8, 'dependentGloss': 'lazy', 'governor': 9, 'governorGloss': 'dog', }, { 'dep': 'nmod:over', 'dependent': 9, 'dependentGloss': 'dog', 'governor': 5, 'governorGloss': 'jumps', }, ], 'index': 0, 'tokens': [ { 'after': ' ', 'before': '', 'characterOffsetBegin': 0, 'characterOffsetEnd': 3, 'index': 1, 'lemma': 'the', 'originalText': 'The', 'pos': 'DT', 'word': 'The', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 4, 'characterOffsetEnd': 9, 'index': 2, 'lemma': 'quick', 'originalText': 'quick', 'pos': 'JJ', 'word': 'quick', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 10, 'characterOffsetEnd': 15, 'index': 3, 'lemma': 'brown', 'originalText': 'brown', 'pos': 'JJ', 'word': 'brown', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 16, 'characterOffsetEnd': 19, 'index': 4, 'lemma': 'fox', 'originalText': 'fox', 'pos': 'NN', 'word': 'fox', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 20, 'characterOffsetEnd': 25, 'index': 5, 'lemma': 'jump', 'originalText': 'jumps', 'pos': 'VBZ', 'word': 'jumps', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 26, 'characterOffsetEnd': 30, 'index': 6, 'lemma': 'over', 'originalText': 'over', 'pos': 'IN', 'word': 'over', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 31, 'characterOffsetEnd': 34, 'index': 7, 'lemma': 'the', 'originalText': 'the', 'pos': 'DT', 'word': 'the', }, { 'after': ' ', 'before': ' ', 'characterOffsetBegin': 35, 'characterOffsetEnd': 39, 'index': 8, 'lemma': 'lazy', 'originalText': 'lazy', 'pos': 'JJ', 'word': 'lazy', }, { 'after': '', 'before': ' ', 'characterOffsetBegin': 40, 'characterOffsetEnd': 43, 'index': 9, 'lemma': 'dog', 'originalText': 'dog', 'pos': 'NN', 'word': 'dog', }, ], } ] } corenlp_parser.api_call = MagicMock(return_value=api_return_value) input_string = "The quick brown fox jumps over the lazy dog".split() expected_output = Tree( 'jumps', [ Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), ], ) parsed_data = next(corenlp_parser.parse(input_string)) corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", properties={'ssplit.ssplit.eolonly': 'true'}, ) self.assertEqual(expected_output, parsed_data.tree())