Source code for nltk.test.unit.test_corenlp
"""
Mock test for Stanford CoreNLP wrappers.
"""
from unittest import TestCase
from unittest.mock import MagicMock
import pytest
from nltk.parse import corenlp
from nltk.tree import Tree
[docs]
def setup_module(module):
global server
try:
server = corenlp.CoreNLPServer(port=9000)
except LookupError:
pytest.skip("Could not instantiate CoreNLPServer.")
try:
server.start()
except corenlp.CoreNLPServerError as e:
pytest.skip(
"Skipping CoreNLP tests because the server could not be started. "
"Make sure that the 9000 port is free. "
"{}".format(e.strerror)
)
[docs]
class TestTokenizerAPI(TestCase):
[docs]
def test_tokenize(self):
corenlp_tokenizer = corenlp.CoreNLPParser()
api_return_value = {
"sentences": [
{
"index": 0,
"tokens": [
{
"after": " ",
"before": "",
"characterOffsetBegin": 0,
"characterOffsetEnd": 4,
"index": 1,
"originalText": "Good",
"word": "Good",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 5,
"characterOffsetEnd": 12,
"index": 2,
"originalText": "muffins",
"word": "muffins",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 13,
"characterOffsetEnd": 17,
"index": 3,
"originalText": "cost",
"word": "cost",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 18,
"characterOffsetEnd": 19,
"index": 4,
"originalText": "$",
"word": "$",
},
{
"after": "\n",
"before": "",
"characterOffsetBegin": 19,
"characterOffsetEnd": 23,
"index": 5,
"originalText": "3.88",
"word": "3.88",
},
{
"after": " ",
"before": "\n",
"characterOffsetBegin": 24,
"characterOffsetEnd": 26,
"index": 6,
"originalText": "in",
"word": "in",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 27,
"characterOffsetEnd": 30,
"index": 7,
"originalText": "New",
"word": "New",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 31,
"characterOffsetEnd": 35,
"index": 8,
"originalText": "York",
"word": "York",
},
{
"after": " ",
"before": "",
"characterOffsetBegin": 35,
"characterOffsetEnd": 36,
"index": 9,
"originalText": ".",
"word": ".",
},
],
},
{
"index": 1,
"tokens": [
{
"after": " ",
"before": " ",
"characterOffsetBegin": 38,
"characterOffsetEnd": 44,
"index": 1,
"originalText": "Please",
"word": "Please",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 45,
"characterOffsetEnd": 48,
"index": 2,
"originalText": "buy",
"word": "buy",
},
{
"after": "\n",
"before": " ",
"characterOffsetBegin": 49,
"characterOffsetEnd": 51,
"index": 3,
"originalText": "me",
"word": "me",
},
{
"after": " ",
"before": "\n",
"characterOffsetBegin": 52,
"characterOffsetEnd": 55,
"index": 4,
"originalText": "two",
"word": "two",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 56,
"characterOffsetEnd": 58,
"index": 5,
"originalText": "of",
"word": "of",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 59,
"characterOffsetEnd": 63,
"index": 6,
"originalText": "them",
"word": "them",
},
{
"after": "\n",
"before": "",
"characterOffsetBegin": 63,
"characterOffsetEnd": 64,
"index": 7,
"originalText": ".",
"word": ".",
},
],
},
{
"index": 2,
"tokens": [
{
"after": "",
"before": "\n",
"characterOffsetBegin": 65,
"characterOffsetEnd": 71,
"index": 1,
"originalText": "Thanks",
"word": "Thanks",
},
{
"after": "",
"before": "",
"characterOffsetBegin": 71,
"characterOffsetEnd": 72,
"index": 2,
"originalText": ".",
"word": ".",
},
],
},
]
}
corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value)
input_string = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks."
expected_output = [
"Good",
"muffins",
"cost",
"$",
"3.88",
"in",
"New",
"York",
".",
"Please",
"buy",
"me",
"two",
"of",
"them",
".",
"Thanks",
".",
]
tokenized_output = list(corenlp_tokenizer.tokenize(input_string))
corenlp_tokenizer.api_call.assert_called_once_with(
"Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks.",
properties={"annotators": "tokenize,ssplit"},
)
self.assertEqual(expected_output, tokenized_output)
[docs]
class TestTaggerAPI(TestCase):
[docs]
def test_pos_tagger(self):
corenlp_tagger = corenlp.CoreNLPParser(tagtype="pos")
api_return_value = {
"sentences": [
{
"basicDependencies": [
{
"dep": "ROOT",
"dependent": 1,
"dependentGloss": "What",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "cop",
"dependent": 2,
"dependentGloss": "is",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "det",
"dependent": 3,
"dependentGloss": "the",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "airspeed",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "case",
"dependent": 5,
"dependentGloss": "of",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "det",
"dependent": 6,
"dependentGloss": "an",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "compound",
"dependent": 7,
"dependentGloss": "unladen",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "nmod",
"dependent": 8,
"dependentGloss": "swallow",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "punct",
"dependent": 9,
"dependentGloss": "?",
"governor": 1,
"governorGloss": "What",
},
],
"enhancedDependencies": [
{
"dep": "ROOT",
"dependent": 1,
"dependentGloss": "What",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "cop",
"dependent": 2,
"dependentGloss": "is",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "det",
"dependent": 3,
"dependentGloss": "the",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "airspeed",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "case",
"dependent": 5,
"dependentGloss": "of",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "det",
"dependent": 6,
"dependentGloss": "an",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "compound",
"dependent": 7,
"dependentGloss": "unladen",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "nmod:of",
"dependent": 8,
"dependentGloss": "swallow",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "punct",
"dependent": 9,
"dependentGloss": "?",
"governor": 1,
"governorGloss": "What",
},
],
"enhancedPlusPlusDependencies": [
{
"dep": "ROOT",
"dependent": 1,
"dependentGloss": "What",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "cop",
"dependent": 2,
"dependentGloss": "is",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "det",
"dependent": 3,
"dependentGloss": "the",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "airspeed",
"governor": 1,
"governorGloss": "What",
},
{
"dep": "case",
"dependent": 5,
"dependentGloss": "of",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "det",
"dependent": 6,
"dependentGloss": "an",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "compound",
"dependent": 7,
"dependentGloss": "unladen",
"governor": 8,
"governorGloss": "swallow",
},
{
"dep": "nmod:of",
"dependent": 8,
"dependentGloss": "swallow",
"governor": 4,
"governorGloss": "airspeed",
},
{
"dep": "punct",
"dependent": 9,
"dependentGloss": "?",
"governor": 1,
"governorGloss": "What",
},
],
"index": 0,
"parse": "(ROOT\n (SBARQ\n (WHNP (WP What))\n (SQ (VBZ is)\n (NP\n (NP (DT the) (NN airspeed))\n (PP (IN of)\n (NP (DT an) (NN unladen) (NN swallow)))))\n (. ?)))",
"tokens": [
{
"after": " ",
"before": "",
"characterOffsetBegin": 0,
"characterOffsetEnd": 4,
"index": 1,
"lemma": "what",
"originalText": "What",
"pos": "WP",
"word": "What",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 5,
"characterOffsetEnd": 7,
"index": 2,
"lemma": "be",
"originalText": "is",
"pos": "VBZ",
"word": "is",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 8,
"characterOffsetEnd": 11,
"index": 3,
"lemma": "the",
"originalText": "the",
"pos": "DT",
"word": "the",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 12,
"characterOffsetEnd": 20,
"index": 4,
"lemma": "airspeed",
"originalText": "airspeed",
"pos": "NN",
"word": "airspeed",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 21,
"characterOffsetEnd": 23,
"index": 5,
"lemma": "of",
"originalText": "of",
"pos": "IN",
"word": "of",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 24,
"characterOffsetEnd": 26,
"index": 6,
"lemma": "a",
"originalText": "an",
"pos": "DT",
"word": "an",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 27,
"characterOffsetEnd": 34,
"index": 7,
"lemma": "unladen",
"originalText": "unladen",
"pos": "JJ",
"word": "unladen",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 35,
"characterOffsetEnd": 42,
"index": 8,
"lemma": "swallow",
"originalText": "swallow",
"pos": "VB",
"word": "swallow",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 43,
"characterOffsetEnd": 44,
"index": 9,
"lemma": "?",
"originalText": "?",
"pos": ".",
"word": "?",
},
],
}
]
}
corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
input_tokens = "What is the airspeed of an unladen swallow ?".split()
expected_output = [
("What", "WP"),
("is", "VBZ"),
("the", "DT"),
("airspeed", "NN"),
("of", "IN"),
("an", "DT"),
("unladen", "JJ"),
("swallow", "VB"),
("?", "."),
]
tagged_output = corenlp_tagger.tag(input_tokens)
corenlp_tagger.api_call.assert_called_once_with(
"What is the airspeed of an unladen swallow ?",
properties={
"ssplit.isOneSentence": "true",
"annotators": "tokenize,ssplit,pos",
},
)
self.assertEqual(expected_output, tagged_output)
[docs]
def test_ner_tagger(self):
corenlp_tagger = corenlp.CoreNLPParser(tagtype="ner")
api_return_value = {
"sentences": [
{
"index": 0,
"tokens": [
{
"after": " ",
"before": "",
"characterOffsetBegin": 0,
"characterOffsetEnd": 4,
"index": 1,
"lemma": "Rami",
"ner": "PERSON",
"originalText": "Rami",
"pos": "NNP",
"word": "Rami",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 5,
"characterOffsetEnd": 8,
"index": 2,
"lemma": "Eid",
"ner": "PERSON",
"originalText": "Eid",
"pos": "NNP",
"word": "Eid",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 9,
"characterOffsetEnd": 11,
"index": 3,
"lemma": "be",
"ner": "O",
"originalText": "is",
"pos": "VBZ",
"word": "is",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 12,
"characterOffsetEnd": 20,
"index": 4,
"lemma": "study",
"ner": "O",
"originalText": "studying",
"pos": "VBG",
"word": "studying",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 21,
"characterOffsetEnd": 23,
"index": 5,
"lemma": "at",
"ner": "O",
"originalText": "at",
"pos": "IN",
"word": "at",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 24,
"characterOffsetEnd": 29,
"index": 6,
"lemma": "Stony",
"ner": "ORGANIZATION",
"originalText": "Stony",
"pos": "NNP",
"word": "Stony",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 30,
"characterOffsetEnd": 35,
"index": 7,
"lemma": "Brook",
"ner": "ORGANIZATION",
"originalText": "Brook",
"pos": "NNP",
"word": "Brook",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 36,
"characterOffsetEnd": 46,
"index": 8,
"lemma": "University",
"ner": "ORGANIZATION",
"originalText": "University",
"pos": "NNP",
"word": "University",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 47,
"characterOffsetEnd": 49,
"index": 9,
"lemma": "in",
"ner": "O",
"originalText": "in",
"pos": "IN",
"word": "in",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 50,
"characterOffsetEnd": 52,
"index": 10,
"lemma": "NY",
"ner": "O",
"originalText": "NY",
"pos": "NNP",
"word": "NY",
},
],
}
]
}
corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
input_tokens = "Rami Eid is studying at Stony Brook University in NY".split()
expected_output = [
("Rami", "PERSON"),
("Eid", "PERSON"),
("is", "O"),
("studying", "O"),
("at", "O"),
("Stony", "ORGANIZATION"),
("Brook", "ORGANIZATION"),
("University", "ORGANIZATION"),
("in", "O"),
("NY", "O"),
]
tagged_output = corenlp_tagger.tag(input_tokens)
corenlp_tagger.api_call.assert_called_once_with(
"Rami Eid is studying at Stony Brook University in NY",
properties={
"ssplit.isOneSentence": "true",
"annotators": "tokenize,ssplit,ner",
},
)
self.assertEqual(expected_output, tagged_output)
[docs]
def test_unexpected_tagtype(self):
with self.assertRaises(ValueError):
corenlp_tagger = corenlp.CoreNLPParser(tagtype="test")
[docs]
class TestParserAPI(TestCase):
[docs]
def test_parse(self):
corenlp_parser = corenlp.CoreNLPParser()
api_return_value = {
"sentences": [
{
"basicDependencies": [
{
"dep": "ROOT",
"dependent": 4,
"dependentGloss": "fox",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "dep",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"enhancedDependencies": [
{
"dep": "ROOT",
"dependent": 4,
"dependentGloss": "fox",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "dep",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod:over",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"enhancedPlusPlusDependencies": [
{
"dep": "ROOT",
"dependent": 4,
"dependentGloss": "fox",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "dep",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod:over",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"index": 0,
"parse": "(ROOT\n (NP\n (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n (NP\n (NP (NNS jumps))\n (PP (IN over)\n (NP (DT the) (JJ lazy) (NN dog))))))",
"tokens": [
{
"after": " ",
"before": "",
"characterOffsetBegin": 0,
"characterOffsetEnd": 3,
"index": 1,
"lemma": "the",
"originalText": "The",
"pos": "DT",
"word": "The",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 4,
"characterOffsetEnd": 9,
"index": 2,
"lemma": "quick",
"originalText": "quick",
"pos": "JJ",
"word": "quick",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 10,
"characterOffsetEnd": 15,
"index": 3,
"lemma": "brown",
"originalText": "brown",
"pos": "JJ",
"word": "brown",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 16,
"characterOffsetEnd": 19,
"index": 4,
"lemma": "fox",
"originalText": "fox",
"pos": "NN",
"word": "fox",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 20,
"characterOffsetEnd": 25,
"index": 5,
"lemma": "jump",
"originalText": "jumps",
"pos": "VBZ",
"word": "jumps",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 26,
"characterOffsetEnd": 30,
"index": 6,
"lemma": "over",
"originalText": "over",
"pos": "IN",
"word": "over",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 31,
"characterOffsetEnd": 34,
"index": 7,
"lemma": "the",
"originalText": "the",
"pos": "DT",
"word": "the",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 35,
"characterOffsetEnd": 39,
"index": 8,
"lemma": "lazy",
"originalText": "lazy",
"pos": "JJ",
"word": "lazy",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 40,
"characterOffsetEnd": 43,
"index": 9,
"lemma": "dog",
"originalText": "dog",
"pos": "NN",
"word": "dog",
},
],
}
]
}
corenlp_parser.api_call = MagicMock(return_value=api_return_value)
input_string = "The quick brown fox jumps over the lazy dog".split()
expected_output = Tree(
"ROOT",
[
Tree(
"NP",
[
Tree(
"NP",
[
Tree("DT", ["The"]),
Tree("JJ", ["quick"]),
Tree("JJ", ["brown"]),
Tree("NN", ["fox"]),
],
),
Tree(
"NP",
[
Tree("NP", [Tree("NNS", ["jumps"])]),
Tree(
"PP",
[
Tree("IN", ["over"]),
Tree(
"NP",
[
Tree("DT", ["the"]),
Tree("JJ", ["lazy"]),
Tree("NN", ["dog"]),
],
),
],
),
],
),
],
)
],
)
parsed_data = next(corenlp_parser.parse(input_string))
corenlp_parser.api_call.assert_called_once_with(
"The quick brown fox jumps over the lazy dog",
properties={"ssplit.eolonly": "true"},
)
self.assertEqual(expected_output, parsed_data)
[docs]
def test_dependency_parser(self):
corenlp_parser = corenlp.CoreNLPDependencyParser()
api_return_value = {
"sentences": [
{
"basicDependencies": [
{
"dep": "ROOT",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "fox",
"governor": 5,
"governorGloss": "jumps",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"enhancedDependencies": [
{
"dep": "ROOT",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "fox",
"governor": 5,
"governorGloss": "jumps",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod:over",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"enhancedPlusPlusDependencies": [
{
"dep": "ROOT",
"dependent": 5,
"dependentGloss": "jumps",
"governor": 0,
"governorGloss": "ROOT",
},
{
"dep": "det",
"dependent": 1,
"dependentGloss": "The",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 2,
"dependentGloss": "quick",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "amod",
"dependent": 3,
"dependentGloss": "brown",
"governor": 4,
"governorGloss": "fox",
},
{
"dep": "nsubj",
"dependent": 4,
"dependentGloss": "fox",
"governor": 5,
"governorGloss": "jumps",
},
{
"dep": "case",
"dependent": 6,
"dependentGloss": "over",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "det",
"dependent": 7,
"dependentGloss": "the",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "amod",
"dependent": 8,
"dependentGloss": "lazy",
"governor": 9,
"governorGloss": "dog",
},
{
"dep": "nmod:over",
"dependent": 9,
"dependentGloss": "dog",
"governor": 5,
"governorGloss": "jumps",
},
],
"index": 0,
"tokens": [
{
"after": " ",
"before": "",
"characterOffsetBegin": 0,
"characterOffsetEnd": 3,
"index": 1,
"lemma": "the",
"originalText": "The",
"pos": "DT",
"word": "The",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 4,
"characterOffsetEnd": 9,
"index": 2,
"lemma": "quick",
"originalText": "quick",
"pos": "JJ",
"word": "quick",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 10,
"characterOffsetEnd": 15,
"index": 3,
"lemma": "brown",
"originalText": "brown",
"pos": "JJ",
"word": "brown",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 16,
"characterOffsetEnd": 19,
"index": 4,
"lemma": "fox",
"originalText": "fox",
"pos": "NN",
"word": "fox",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 20,
"characterOffsetEnd": 25,
"index": 5,
"lemma": "jump",
"originalText": "jumps",
"pos": "VBZ",
"word": "jumps",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 26,
"characterOffsetEnd": 30,
"index": 6,
"lemma": "over",
"originalText": "over",
"pos": "IN",
"word": "over",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 31,
"characterOffsetEnd": 34,
"index": 7,
"lemma": "the",
"originalText": "the",
"pos": "DT",
"word": "the",
},
{
"after": " ",
"before": " ",
"characterOffsetBegin": 35,
"characterOffsetEnd": 39,
"index": 8,
"lemma": "lazy",
"originalText": "lazy",
"pos": "JJ",
"word": "lazy",
},
{
"after": "",
"before": " ",
"characterOffsetBegin": 40,
"characterOffsetEnd": 43,
"index": 9,
"lemma": "dog",
"originalText": "dog",
"pos": "NN",
"word": "dog",
},
],
}
]
}
corenlp_parser.api_call = MagicMock(return_value=api_return_value)
input_string = "The quick brown fox jumps over the lazy dog".split()
expected_output = Tree(
"jumps",
[
Tree("fox", ["The", "quick", "brown"]),
Tree("dog", ["over", "the", "lazy"]),
],
)
parsed_data = next(corenlp_parser.parse(input_string))
corenlp_parser.api_call.assert_called_once_with(
"The quick brown fox jumps over the lazy dog",
properties={"ssplit.eolonly": "true"},
)
self.assertEqual(expected_output, parsed_data.tree())