Source code for nltk.test.unit.test_corenlp

"""
Mock test for Stanford CoreNLP wrappers.
"""

from unittest import TestCase
from unittest.mock import MagicMock

import pytest

from nltk.parse import corenlp
from nltk.tree import Tree


[docs]def setup_module(module): global server try: server = corenlp.CoreNLPServer(port=9000) except LookupError: pytest.skip("Could not instantiate CoreNLPServer.") try: server.start() except corenlp.CoreNLPServerError as e: pytest.skip( "Skipping CoreNLP tests because the server could not be started. " "Make sure that the 9000 port is free. " "{}".format(e.strerror) )
[docs]def teardown_module(module): server.stop()
[docs]class TestTokenizerAPI(TestCase):
[docs] def test_tokenize(self): corenlp_tokenizer = corenlp.CoreNLPParser() api_return_value = { "sentences": [ { "index": 0, "tokens": [ { "after": " ", "before": "", "characterOffsetBegin": 0, "characterOffsetEnd": 4, "index": 1, "originalText": "Good", "word": "Good", }, { "after": " ", "before": " ", "characterOffsetBegin": 5, "characterOffsetEnd": 12, "index": 2, "originalText": "muffins", "word": "muffins", }, { "after": " ", "before": " ", "characterOffsetBegin": 13, "characterOffsetEnd": 17, "index": 3, "originalText": "cost", "word": "cost", }, { "after": "", "before": " ", "characterOffsetBegin": 18, "characterOffsetEnd": 19, "index": 4, "originalText": "$", "word": "$", }, { "after": "\n", "before": "", "characterOffsetBegin": 19, "characterOffsetEnd": 23, "index": 5, "originalText": "3.88", "word": "3.88", }, { "after": " ", "before": "\n", "characterOffsetBegin": 24, "characterOffsetEnd": 26, "index": 6, "originalText": "in", "word": "in", }, { "after": " ", "before": " ", "characterOffsetBegin": 27, "characterOffsetEnd": 30, "index": 7, "originalText": "New", "word": "New", }, { "after": "", "before": " ", "characterOffsetBegin": 31, "characterOffsetEnd": 35, "index": 8, "originalText": "York", "word": "York", }, { "after": " ", "before": "", "characterOffsetBegin": 35, "characterOffsetEnd": 36, "index": 9, "originalText": ".", "word": ".", }, ], }, { "index": 1, "tokens": [ { "after": " ", "before": " ", "characterOffsetBegin": 38, "characterOffsetEnd": 44, "index": 1, "originalText": "Please", "word": "Please", }, { "after": " ", "before": " ", "characterOffsetBegin": 45, "characterOffsetEnd": 48, "index": 2, "originalText": "buy", "word": "buy", }, { "after": "\n", "before": " ", "characterOffsetBegin": 49, "characterOffsetEnd": 51, "index": 3, "originalText": "me", "word": "me", }, { "after": " ", "before": "\n", "characterOffsetBegin": 52, "characterOffsetEnd": 55, "index": 4, "originalText": "two", "word": "two", }, { "after": " ", "before": " ", "characterOffsetBegin": 56, "characterOffsetEnd": 58, "index": 5, "originalText": "of", "word": "of", }, { "after": "", "before": " ", "characterOffsetBegin": 59, "characterOffsetEnd": 63, "index": 6, "originalText": "them", "word": "them", }, { "after": "\n", "before": "", "characterOffsetBegin": 63, "characterOffsetEnd": 64, "index": 7, "originalText": ".", "word": ".", }, ], }, { "index": 2, "tokens": [ { "after": "", "before": "\n", "characterOffsetBegin": 65, "characterOffsetEnd": 71, "index": 1, "originalText": "Thanks", "word": "Thanks", }, { "after": "", "before": "", "characterOffsetBegin": 71, "characterOffsetEnd": 72, "index": 2, "originalText": ".", "word": ".", }, ], }, ] } corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value) input_string = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks." expected_output = [ "Good", "muffins", "cost", "$", "3.88", "in", "New", "York", ".", "Please", "buy", "me", "two", "of", "them", ".", "Thanks", ".", ] tokenized_output = list(corenlp_tokenizer.tokenize(input_string)) corenlp_tokenizer.api_call.assert_called_once_with( "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks.", properties={"annotators": "tokenize,ssplit"}, ) self.assertEqual(expected_output, tokenized_output)
[docs]class TestTaggerAPI(TestCase):
[docs] def test_pos_tagger(self): corenlp_tagger = corenlp.CoreNLPParser(tagtype="pos") api_return_value = { "sentences": [ { "basicDependencies": [ { "dep": "ROOT", "dependent": 1, "dependentGloss": "What", "governor": 0, "governorGloss": "ROOT", }, { "dep": "cop", "dependent": 2, "dependentGloss": "is", "governor": 1, "governorGloss": "What", }, { "dep": "det", "dependent": 3, "dependentGloss": "the", "governor": 4, "governorGloss": "airspeed", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "airspeed", "governor": 1, "governorGloss": "What", }, { "dep": "case", "dependent": 5, "dependentGloss": "of", "governor": 8, "governorGloss": "swallow", }, { "dep": "det", "dependent": 6, "dependentGloss": "an", "governor": 8, "governorGloss": "swallow", }, { "dep": "compound", "dependent": 7, "dependentGloss": "unladen", "governor": 8, "governorGloss": "swallow", }, { "dep": "nmod", "dependent": 8, "dependentGloss": "swallow", "governor": 4, "governorGloss": "airspeed", }, { "dep": "punct", "dependent": 9, "dependentGloss": "?", "governor": 1, "governorGloss": "What", }, ], "enhancedDependencies": [ { "dep": "ROOT", "dependent": 1, "dependentGloss": "What", "governor": 0, "governorGloss": "ROOT", }, { "dep": "cop", "dependent": 2, "dependentGloss": "is", "governor": 1, "governorGloss": "What", }, { "dep": "det", "dependent": 3, "dependentGloss": "the", "governor": 4, "governorGloss": "airspeed", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "airspeed", "governor": 1, "governorGloss": "What", }, { "dep": "case", "dependent": 5, "dependentGloss": "of", "governor": 8, "governorGloss": "swallow", }, { "dep": "det", "dependent": 6, "dependentGloss": "an", "governor": 8, "governorGloss": "swallow", }, { "dep": "compound", "dependent": 7, "dependentGloss": "unladen", "governor": 8, "governorGloss": "swallow", }, { "dep": "nmod:of", "dependent": 8, "dependentGloss": "swallow", "governor": 4, "governorGloss": "airspeed", }, { "dep": "punct", "dependent": 9, "dependentGloss": "?", "governor": 1, "governorGloss": "What", }, ], "enhancedPlusPlusDependencies": [ { "dep": "ROOT", "dependent": 1, "dependentGloss": "What", "governor": 0, "governorGloss": "ROOT", }, { "dep": "cop", "dependent": 2, "dependentGloss": "is", "governor": 1, "governorGloss": "What", }, { "dep": "det", "dependent": 3, "dependentGloss": "the", "governor": 4, "governorGloss": "airspeed", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "airspeed", "governor": 1, "governorGloss": "What", }, { "dep": "case", "dependent": 5, "dependentGloss": "of", "governor": 8, "governorGloss": "swallow", }, { "dep": "det", "dependent": 6, "dependentGloss": "an", "governor": 8, "governorGloss": "swallow", }, { "dep": "compound", "dependent": 7, "dependentGloss": "unladen", "governor": 8, "governorGloss": "swallow", }, { "dep": "nmod:of", "dependent": 8, "dependentGloss": "swallow", "governor": 4, "governorGloss": "airspeed", }, { "dep": "punct", "dependent": 9, "dependentGloss": "?", "governor": 1, "governorGloss": "What", }, ], "index": 0, "parse": "(ROOT\n (SBARQ\n (WHNP (WP What))\n (SQ (VBZ is)\n (NP\n (NP (DT the) (NN airspeed))\n (PP (IN of)\n (NP (DT an) (NN unladen) (NN swallow)))))\n (. ?)))", "tokens": [ { "after": " ", "before": "", "characterOffsetBegin": 0, "characterOffsetEnd": 4, "index": 1, "lemma": "what", "originalText": "What", "pos": "WP", "word": "What", }, { "after": " ", "before": " ", "characterOffsetBegin": 5, "characterOffsetEnd": 7, "index": 2, "lemma": "be", "originalText": "is", "pos": "VBZ", "word": "is", }, { "after": " ", "before": " ", "characterOffsetBegin": 8, "characterOffsetEnd": 11, "index": 3, "lemma": "the", "originalText": "the", "pos": "DT", "word": "the", }, { "after": " ", "before": " ", "characterOffsetBegin": 12, "characterOffsetEnd": 20, "index": 4, "lemma": "airspeed", "originalText": "airspeed", "pos": "NN", "word": "airspeed", }, { "after": " ", "before": " ", "characterOffsetBegin": 21, "characterOffsetEnd": 23, "index": 5, "lemma": "of", "originalText": "of", "pos": "IN", "word": "of", }, { "after": " ", "before": " ", "characterOffsetBegin": 24, "characterOffsetEnd": 26, "index": 6, "lemma": "a", "originalText": "an", "pos": "DT", "word": "an", }, { "after": " ", "before": " ", "characterOffsetBegin": 27, "characterOffsetEnd": 34, "index": 7, "lemma": "unladen", "originalText": "unladen", "pos": "JJ", "word": "unladen", }, { "after": " ", "before": " ", "characterOffsetBegin": 35, "characterOffsetEnd": 42, "index": 8, "lemma": "swallow", "originalText": "swallow", "pos": "VB", "word": "swallow", }, { "after": "", "before": " ", "characterOffsetBegin": 43, "characterOffsetEnd": 44, "index": 9, "lemma": "?", "originalText": "?", "pos": ".", "word": "?", }, ], } ] } corenlp_tagger.api_call = MagicMock(return_value=api_return_value) input_tokens = "What is the airspeed of an unladen swallow ?".split() expected_output = [ ("What", "WP"), ("is", "VBZ"), ("the", "DT"), ("airspeed", "NN"), ("of", "IN"), ("an", "DT"), ("unladen", "JJ"), ("swallow", "VB"), ("?", "."), ] tagged_output = corenlp_tagger.tag(input_tokens) corenlp_tagger.api_call.assert_called_once_with( "What is the airspeed of an unladen swallow ?", properties={ "ssplit.isOneSentence": "true", "annotators": "tokenize,ssplit,pos", }, ) self.assertEqual(expected_output, tagged_output)
[docs] def test_ner_tagger(self): corenlp_tagger = corenlp.CoreNLPParser(tagtype="ner") api_return_value = { "sentences": [ { "index": 0, "tokens": [ { "after": " ", "before": "", "characterOffsetBegin": 0, "characterOffsetEnd": 4, "index": 1, "lemma": "Rami", "ner": "PERSON", "originalText": "Rami", "pos": "NNP", "word": "Rami", }, { "after": " ", "before": " ", "characterOffsetBegin": 5, "characterOffsetEnd": 8, "index": 2, "lemma": "Eid", "ner": "PERSON", "originalText": "Eid", "pos": "NNP", "word": "Eid", }, { "after": " ", "before": " ", "characterOffsetBegin": 9, "characterOffsetEnd": 11, "index": 3, "lemma": "be", "ner": "O", "originalText": "is", "pos": "VBZ", "word": "is", }, { "after": " ", "before": " ", "characterOffsetBegin": 12, "characterOffsetEnd": 20, "index": 4, "lemma": "study", "ner": "O", "originalText": "studying", "pos": "VBG", "word": "studying", }, { "after": " ", "before": " ", "characterOffsetBegin": 21, "characterOffsetEnd": 23, "index": 5, "lemma": "at", "ner": "O", "originalText": "at", "pos": "IN", "word": "at", }, { "after": " ", "before": " ", "characterOffsetBegin": 24, "characterOffsetEnd": 29, "index": 6, "lemma": "Stony", "ner": "ORGANIZATION", "originalText": "Stony", "pos": "NNP", "word": "Stony", }, { "after": " ", "before": " ", "characterOffsetBegin": 30, "characterOffsetEnd": 35, "index": 7, "lemma": "Brook", "ner": "ORGANIZATION", "originalText": "Brook", "pos": "NNP", "word": "Brook", }, { "after": " ", "before": " ", "characterOffsetBegin": 36, "characterOffsetEnd": 46, "index": 8, "lemma": "University", "ner": "ORGANIZATION", "originalText": "University", "pos": "NNP", "word": "University", }, { "after": " ", "before": " ", "characterOffsetBegin": 47, "characterOffsetEnd": 49, "index": 9, "lemma": "in", "ner": "O", "originalText": "in", "pos": "IN", "word": "in", }, { "after": "", "before": " ", "characterOffsetBegin": 50, "characterOffsetEnd": 52, "index": 10, "lemma": "NY", "ner": "O", "originalText": "NY", "pos": "NNP", "word": "NY", }, ], } ] } corenlp_tagger.api_call = MagicMock(return_value=api_return_value) input_tokens = "Rami Eid is studying at Stony Brook University in NY".split() expected_output = [ ("Rami", "PERSON"), ("Eid", "PERSON"), ("is", "O"), ("studying", "O"), ("at", "O"), ("Stony", "ORGANIZATION"), ("Brook", "ORGANIZATION"), ("University", "ORGANIZATION"), ("in", "O"), ("NY", "O"), ] tagged_output = corenlp_tagger.tag(input_tokens) corenlp_tagger.api_call.assert_called_once_with( "Rami Eid is studying at Stony Brook University in NY", properties={ "ssplit.isOneSentence": "true", "annotators": "tokenize,ssplit,ner", }, ) self.assertEqual(expected_output, tagged_output)
[docs] def test_unexpected_tagtype(self): with self.assertRaises(ValueError): corenlp_tagger = corenlp.CoreNLPParser(tagtype="test")
[docs]class TestParserAPI(TestCase):
[docs] def test_parse(self): corenlp_parser = corenlp.CoreNLPParser() api_return_value = { "sentences": [ { "basicDependencies": [ { "dep": "ROOT", "dependent": 4, "dependentGloss": "fox", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "dep", "dependent": 5, "dependentGloss": "jumps", "governor": 4, "governorGloss": "fox", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "enhancedDependencies": [ { "dep": "ROOT", "dependent": 4, "dependentGloss": "fox", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "dep", "dependent": 5, "dependentGloss": "jumps", "governor": 4, "governorGloss": "fox", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod:over", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "enhancedPlusPlusDependencies": [ { "dep": "ROOT", "dependent": 4, "dependentGloss": "fox", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "dep", "dependent": 5, "dependentGloss": "jumps", "governor": 4, "governorGloss": "fox", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod:over", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "index": 0, "parse": "(ROOT\n (NP\n (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n (NP\n (NP (NNS jumps))\n (PP (IN over)\n (NP (DT the) (JJ lazy) (NN dog))))))", "tokens": [ { "after": " ", "before": "", "characterOffsetBegin": 0, "characterOffsetEnd": 3, "index": 1, "lemma": "the", "originalText": "The", "pos": "DT", "word": "The", }, { "after": " ", "before": " ", "characterOffsetBegin": 4, "characterOffsetEnd": 9, "index": 2, "lemma": "quick", "originalText": "quick", "pos": "JJ", "word": "quick", }, { "after": " ", "before": " ", "characterOffsetBegin": 10, "characterOffsetEnd": 15, "index": 3, "lemma": "brown", "originalText": "brown", "pos": "JJ", "word": "brown", }, { "after": " ", "before": " ", "characterOffsetBegin": 16, "characterOffsetEnd": 19, "index": 4, "lemma": "fox", "originalText": "fox", "pos": "NN", "word": "fox", }, { "after": " ", "before": " ", "characterOffsetBegin": 20, "characterOffsetEnd": 25, "index": 5, "lemma": "jump", "originalText": "jumps", "pos": "VBZ", "word": "jumps", }, { "after": " ", "before": " ", "characterOffsetBegin": 26, "characterOffsetEnd": 30, "index": 6, "lemma": "over", "originalText": "over", "pos": "IN", "word": "over", }, { "after": " ", "before": " ", "characterOffsetBegin": 31, "characterOffsetEnd": 34, "index": 7, "lemma": "the", "originalText": "the", "pos": "DT", "word": "the", }, { "after": " ", "before": " ", "characterOffsetBegin": 35, "characterOffsetEnd": 39, "index": 8, "lemma": "lazy", "originalText": "lazy", "pos": "JJ", "word": "lazy", }, { "after": "", "before": " ", "characterOffsetBegin": 40, "characterOffsetEnd": 43, "index": 9, "lemma": "dog", "originalText": "dog", "pos": "NN", "word": "dog", }, ], } ] } corenlp_parser.api_call = MagicMock(return_value=api_return_value) input_string = "The quick brown fox jumps over the lazy dog".split() expected_output = Tree( "ROOT", [ Tree( "NP", [ Tree( "NP", [ Tree("DT", ["The"]), Tree("JJ", ["quick"]), Tree("JJ", ["brown"]), Tree("NN", ["fox"]), ], ), Tree( "NP", [ Tree("NP", [Tree("NNS", ["jumps"])]), Tree( "PP", [ Tree("IN", ["over"]), Tree( "NP", [ Tree("DT", ["the"]), Tree("JJ", ["lazy"]), Tree("NN", ["dog"]), ], ), ], ), ], ), ], ) ], ) parsed_data = next(corenlp_parser.parse(input_string)) corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", properties={"ssplit.eolonly": "true"}, ) self.assertEqual(expected_output, parsed_data)
[docs] def test_dependency_parser(self): corenlp_parser = corenlp.CoreNLPDependencyParser() api_return_value = { "sentences": [ { "basicDependencies": [ { "dep": "ROOT", "dependent": 5, "dependentGloss": "jumps", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "fox", "governor": 5, "governorGloss": "jumps", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "enhancedDependencies": [ { "dep": "ROOT", "dependent": 5, "dependentGloss": "jumps", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "fox", "governor": 5, "governorGloss": "jumps", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod:over", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "enhancedPlusPlusDependencies": [ { "dep": "ROOT", "dependent": 5, "dependentGloss": "jumps", "governor": 0, "governorGloss": "ROOT", }, { "dep": "det", "dependent": 1, "dependentGloss": "The", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 2, "dependentGloss": "quick", "governor": 4, "governorGloss": "fox", }, { "dep": "amod", "dependent": 3, "dependentGloss": "brown", "governor": 4, "governorGloss": "fox", }, { "dep": "nsubj", "dependent": 4, "dependentGloss": "fox", "governor": 5, "governorGloss": "jumps", }, { "dep": "case", "dependent": 6, "dependentGloss": "over", "governor": 9, "governorGloss": "dog", }, { "dep": "det", "dependent": 7, "dependentGloss": "the", "governor": 9, "governorGloss": "dog", }, { "dep": "amod", "dependent": 8, "dependentGloss": "lazy", "governor": 9, "governorGloss": "dog", }, { "dep": "nmod:over", "dependent": 9, "dependentGloss": "dog", "governor": 5, "governorGloss": "jumps", }, ], "index": 0, "tokens": [ { "after": " ", "before": "", "characterOffsetBegin": 0, "characterOffsetEnd": 3, "index": 1, "lemma": "the", "originalText": "The", "pos": "DT", "word": "The", }, { "after": " ", "before": " ", "characterOffsetBegin": 4, "characterOffsetEnd": 9, "index": 2, "lemma": "quick", "originalText": "quick", "pos": "JJ", "word": "quick", }, { "after": " ", "before": " ", "characterOffsetBegin": 10, "characterOffsetEnd": 15, "index": 3, "lemma": "brown", "originalText": "brown", "pos": "JJ", "word": "brown", }, { "after": " ", "before": " ", "characterOffsetBegin": 16, "characterOffsetEnd": 19, "index": 4, "lemma": "fox", "originalText": "fox", "pos": "NN", "word": "fox", }, { "after": " ", "before": " ", "characterOffsetBegin": 20, "characterOffsetEnd": 25, "index": 5, "lemma": "jump", "originalText": "jumps", "pos": "VBZ", "word": "jumps", }, { "after": " ", "before": " ", "characterOffsetBegin": 26, "characterOffsetEnd": 30, "index": 6, "lemma": "over", "originalText": "over", "pos": "IN", "word": "over", }, { "after": " ", "before": " ", "characterOffsetBegin": 31, "characterOffsetEnd": 34, "index": 7, "lemma": "the", "originalText": "the", "pos": "DT", "word": "the", }, { "after": " ", "before": " ", "characterOffsetBegin": 35, "characterOffsetEnd": 39, "index": 8, "lemma": "lazy", "originalText": "lazy", "pos": "JJ", "word": "lazy", }, { "after": "", "before": " ", "characterOffsetBegin": 40, "characterOffsetEnd": 43, "index": 9, "lemma": "dog", "originalText": "dog", "pos": "NN", "word": "dog", }, ], } ] } corenlp_parser.api_call = MagicMock(return_value=api_return_value) input_string = "The quick brown fox jumps over the lazy dog".split() expected_output = Tree( "jumps", [ Tree("fox", ["The", "quick", "brown"]), Tree("dog", ["over", "the", "lazy"]), ], ) parsed_data = next(corenlp_parser.parse(input_string)) corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", properties={"ssplit.eolonly": "true"}, ) self.assertEqual(expected_output, parsed_data.tree())