Source code for nltk.test.unit.test_brill

# -*- coding: utf-8 -*-
"""
Tests for Brill tagger.
"""

import unittest

from nltk.tag import UnigramTagger, brill, brill_trainer
from nltk.tbl import Template
from nltk.corpus import treebank

from nltk.tbl import demo


[docs]class TestBrill(unittest.TestCase):
[docs] def test_pos_template(self): train_sents = treebank.tagged_sents()[:1000] tagger = UnigramTagger(train_sents) trainer = brill_trainer.BrillTaggerTrainer( tagger, [brill.Template(brill.Pos([-1]))] ) brill_tagger = trainer.train(train_sents) # Example from https://github.com/nltk/nltk/issues/769 result = brill_tagger.tag('This is a foo bar sentence'.split()) expected = [ ('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('foo', None), ('bar', 'NN'), ('sentence', None), ] self.assertEqual(result, expected)
[docs] @unittest.skip("Should be tested in __main__ of nltk.tbl.demo") def test_brill_demo(self): demo()