import pytest
from nltk.util import everygrams
[docs]
def test_everygrams_without_padding(everygram_input):
expected_output = [
("a",),
("a", "b"),
("a", "b", "c"),
("b",),
("b", "c"),
("c",),
]
output = list(everygrams(everygram_input))
assert output == expected_output
[docs]
def test_everygrams_max_len(everygram_input):
expected_output = [
("a",),
("a", "b"),
("b",),
("b", "c"),
("c",),
]
output = list(everygrams(everygram_input, max_len=2))
assert output == expected_output
[docs]
def test_everygrams_min_len(everygram_input):
expected_output = [
("a", "b"),
("a", "b", "c"),
("b", "c"),
]
output = list(everygrams(everygram_input, min_len=2))
assert output == expected_output
[docs]
def test_everygrams_pad_right(everygram_input):
expected_output = [
("a",),
("a", "b"),
("a", "b", "c"),
("b",),
("b", "c"),
("b", "c", None),
("c",),
("c", None),
("c", None, None),
(None,),
(None, None),
(None,),
]
output = list(everygrams(everygram_input, max_len=3, pad_right=True))
assert output == expected_output
[docs]
def test_everygrams_pad_left(everygram_input):
expected_output = [
(None,),
(None, None),
(None, None, "a"),
(None,),
(None, "a"),
(None, "a", "b"),
("a",),
("a", "b"),
("a", "b", "c"),
("b",),
("b", "c"),
("c",),
]
output = list(everygrams(everygram_input, max_len=3, pad_left=True))
assert output == expected_output