Source code for nltk.app.chunkparser_app

# Natural Language Toolkit: Regexp Chunk Parser Application
#
# Copyright (C) 2001-2019 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

"""
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
"""

# Todo: Add a way to select the development set from the menubar.  This
# might just need to be a selection box (conll vs treebank etc) plus
# configuration parameters to select what's being chunked (eg VP vs NP)
# and what part of the data is being used as the development set.

from __future__ import division
import time
import textwrap
import re
import random

from six.moves.tkinter import (
    Button,
    Canvas,
    Checkbutton,
    Frame,
    IntVar,
    Label,
    Menu,
    Scrollbar,
    Text,
    Tk,
)
from six.moves.tkinter_tkfiledialog import askopenfilename, asksaveasfilename
from six.moves.tkinter_font import Font

from nltk.tree import Tree
from nltk.util import in_idle
from nltk.draw.util import ShowText
from nltk.corpus import conll2000, treebank_chunk
from nltk.chunk import ChunkScore, RegexpChunkParser
from nltk.chunk.regexp import RegexpChunkRule


class RegexpChunkApp(object):
    """
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    """

    ##/////////////////////////////////////////////////////////////////
    ##  Help Text
    ##/////////////////////////////////////////////////////////////////

    #: A dictionary mapping from part of speech tags to descriptions,
    #: which is used in the help text.  (This should probably live with
    #: the conll and/or treebank corpus instead.)
    TAGSET = {
        'CC': 'Coordinating conjunction',
        'PRP$': 'Possessive pronoun',
        'CD': 'Cardinal number',
        'RB': 'Adverb',
        'DT': 'Determiner',
        'RBR': 'Adverb, comparative',
        'EX': 'Existential there',
        'RBS': 'Adverb, superlative',
        'FW': 'Foreign word',
        'RP': 'Particle',
        'JJ': 'Adjective',
        'TO': 'to',
        'JJR': 'Adjective, comparative',
        'UH': 'Interjection',
        'JJS': 'Adjective, superlative',
        'VB': 'Verb, base form',
        'LS': 'List item marker',
        'VBD': 'Verb, past tense',
        'MD': 'Modal',
        'NNS': 'Noun, plural',
        'NN': 'Noun, singular or masps',
        'VBN': 'Verb, past participle',
        'VBZ': 'Verb,3rd ps. sing. present',
        'NNP': 'Proper noun, singular',
        'NNPS': 'Proper noun plural',
        'WDT': 'wh-determiner',
        'PDT': 'Predeterminer',
        'WP': 'wh-pronoun',
        'POS': 'Possessive ending',
        'WP$': 'Possessive wh-pronoun',
        'PRP': 'Personal pronoun',
        'WRB': 'wh-adverb',
        '(': 'open parenthesis',
        ')': 'close parenthesis',
        '``': 'open quote',
        ',': 'comma',
        "''": 'close quote',
        '.': 'period',
        '#': 'pound sign (currency marker)',
        '$': 'dollar sign (currency marker)',
        'IN': 'Preposition/subord. conjunction',
        'SYM': 'Symbol (mathematical or scientific)',
        'VBG': 'Verb, gerund/present participle',
        'VBP': 'Verb, non-3rd ps. sing. present',
        ':': 'colon',
    }

    #: Contents for the help box.  This is a list of tuples, one for
    #: each help page, where each tuple has four elements:
    #:   - A title (displayed as a tab)
    #:   - A string description of tabstops (see Tkinter.Text for details)
    #:   - The text contents for the help page.  You can use expressions
    #:     like <red>...</red> to colorize the text; see ``HELP_AUTOTAG``
    #:     for a list of tags you can use for colorizing.
    HELP = [
        (
            'Help',
            '20',
            "Welcome to the regular expression chunk-parser grammar editor.  "
            "You can use this editor to develop and test chunk parser grammars "
            "based on NLTK's RegexpChunkParser class.\n\n"
            # Help box.
            "Use this box ('Help') to learn more about the editor; click on the "
            "tabs for help on specific topics:"
            "<indent>\n"
            "Rules: grammar rule types\n"
            "Regexps: regular expression syntax\n"
            "Tags: part of speech tags\n</indent>\n"
            # Grammar.
            "Use the upper-left box ('Grammar') to edit your grammar.  "
            "Each line of your grammar specifies a single 'rule', "
            "which performs an action such as creating a chunk or merging "
            "two chunks.\n\n"
            # Dev set.
            "The lower-left box ('Development Set') runs your grammar on the "
            "development set, and displays the results.  "
            "Your grammar's chunks are <highlight>highlighted</highlight>, and "
            "the correct (gold standard) chunks are "
            "<underline>underlined</underline>.  If they "
            "match, they are displayed in <green>green</green>; otherwise, "
            "they are displayed in <red>red</red>.  The box displays a single "
            "sentence from the development set at a time; use the scrollbar or "
            "the next/previous buttons view additional sentences.\n\n"
            # Performance
            "The lower-right box ('Evaluation') tracks the performance of "
            "your grammar on the development set.  The 'precision' axis "
            "indicates how many of your grammar's chunks are correct; and "
            "the 'recall' axis indicates how many of the gold standard "
            "chunks your system generated.  Typically, you should try to "
            "design a grammar that scores high on both metrics.  The "
            "exact precision and recall of the current grammar, as well "
            "as their harmonic mean (the 'f-score'), are displayed in "
            "the status bar at the bottom of the window.",
        ),
        (
            'Rules',
            '10',
            "<h1>{...regexp...}</h1>"
            "<indent>\nChunk rule: creates new chunks from words matching "
            "regexp.</indent>\n\n"
            "<h1>}...regexp...{</h1>"
            "<indent>\nChink rule: removes words matching regexp from existing "
            "chunks.</indent>\n\n"
            "<h1>...regexp1...}{...regexp2...</h1>"
            "<indent>\nSplit rule: splits chunks that match regexp1 followed by "
            "regexp2 in two.</indent>\n\n"
            "<h1>...regexp...{}...regexp...</h1>"
            "<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
            "and regexp2</indent>\n",
        ),
        (
            'Regexps',
            '10 60',
            # "Regular Expression Syntax Summary:\n\n"
            "<h1>Pattern\t\tMatches...</h1>\n"
            "<hangindent>"
            "\t<<var>T</var>>\ta word with tag <var>T</var> "
            "(where <var>T</var> may be a regexp).\n"
            "\t<var>x</var>?\tan optional <var>x</var>\n"
            "\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
            "\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
            "\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
            "\t.\tmatches any character\n"
            "\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
            "\t# <var>x...</var>\tTreats <var>x...</var> "
            "(to the end of the line) as a comment\n"
            "\t\\<var>C</var>\tmatches character <var>C</var> "
            "(useful when <var>C</var> is a special character "
            "like + or #)\n"
            "</hangindent>"
            "\n<h1>Examples:</h1>\n"
            "<hangindent>"
            '\t<regexp><NN></regexp>\n'
            '\t\tMatches <match>"cow/NN"</match>\n'
            '\t\tMatches <match>"green/NN"</match>\n'
            '\t<regexp><VB.*></regexp>\n'
            '\t\tMatches <match>"eating/VBG"</match>\n'
            '\t\tMatches <match>"ate/VBD"</match>\n'
            '\t<regexp><IN><DT><NN></regexp>\n'
            '\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
            '\t<regexp><RB>?<VBD></regexp>\n'
            '\t\tMatches <match>"ran/VBD"</match>\n'
            '\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
            '\t<regexp><\#><CD> # This is a comment...</regexp>\n'
            '\t\tMatches <match>"#/# 100/CD"</match>\n'
            "</hangindent>",
        ),
        (
            'Tags',
            '10 60',
            "<h1>Part of Speech Tags:</h1>\n"
            + '<hangindent>'
            + '<<TAGSET>>'
            + '</hangindent>\n',  # this gets auto-substituted w/ self.TAGSET
        ),
    ]

    HELP_AUTOTAG = [
        ('red', dict(foreground='#a00')),
        ('green', dict(foreground='#080')),
        ('highlight', dict(background='#ddd')),
        ('underline', dict(underline=True)),
        ('h1', dict(underline=True)),
        ('indent', dict(lmargin1=20, lmargin2=20)),
        ('hangindent', dict(lmargin1=0, lmargin2=60)),
        ('var', dict(foreground='#88f')),
        ('regexp', dict(foreground='#ba7')),
        ('match', dict(foreground='#6a6')),
    ]

    ##/////////////////////////////////////////////////////////////////
    ##  Config Parmeters
    ##/////////////////////////////////////////////////////////////////

    _EVAL_DELAY = 1
    """If the user has not pressed any key for this amount of time (in
       seconds), and the current grammar has not been evaluated, then
       the eval demon will evaluate it."""

    _EVAL_CHUNK = 15
    """The number of sentences that should be evaluated by the eval
       demon each time it runs."""
    _EVAL_FREQ = 0.2
    """The frequency (in seconds) at which the eval demon is run"""
    _EVAL_DEMON_MIN = 0.02
    """The minimum amount of time that the eval demon should take each time
       it runs -- if it takes less than this time, _EVAL_CHUNK will be
       modified upwards."""
    _EVAL_DEMON_MAX = 0.04
    """The maximum amount of time that the eval demon should take each time
       it runs -- if it takes more than this time, _EVAL_CHUNK will be
       modified downwards."""

    _GRAMMARBOX_PARAMS = dict(
        width=40,
        height=12,
        background='#efe',
        highlightbackground='#efe',
        highlightthickness=1,
        relief='groove',
        border=2,
        wrap='word',
    )
    _HELPBOX_PARAMS = dict(
        width=15,
        height=15,
        background='#efe',
        highlightbackground='#efe',
        foreground='#555',
        highlightthickness=1,
        relief='groove',
        border=2,
        wrap='word',
    )
    _DEVSETBOX_PARAMS = dict(
        width=70,
        height=10,
        background='#eef',
        highlightbackground='#eef',
        highlightthickness=1,
        relief='groove',
        border=2,
        wrap='word',
        tabs=(30,),
    )
    _STATUS_PARAMS = dict(background='#9bb', relief='groove', border=2)
    _FONT_PARAMS = dict(family='helvetica', size=-20)
    _FRAME_PARAMS = dict(background='#777', padx=2, pady=2, border=3)
    _EVALBOX_PARAMS = dict(
        background='#eef',
        highlightbackground='#eef',
        highlightthickness=1,
        relief='groove',
        border=2,
        width=300,
        height=280,
    )
    _BUTTON_PARAMS = dict(
        background='#777', activebackground='#777', highlightbackground='#777'
    )
    _HELPTAB_BG_COLOR = '#aba'
    _HELPTAB_FG_COLOR = '#efe'

    _HELPTAB_FG_PARAMS = dict(background='#efe')
    _HELPTAB_BG_PARAMS = dict(background='#aba')
    _HELPTAB_SPACER = 6

    def normalize_grammar(self, grammar):
        # Strip comments
        grammar = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', grammar)
        # Normalize whitespace
        grammar = re.sub(' +', ' ', grammar)
        grammar = re.sub('\n\s+', '\n', grammar)
        grammar = grammar.strip()
        # [xx] Hack: automatically backslash $!
        grammar = re.sub(r'([^\\])\$', r'\1\\$', grammar)
        return grammar

    def __init__(
        self,
        devset_name='conll2000',
        devset=None,
        grammar='',
        chunk_label='NP',
        tagset=None,
    ):
        """
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        """
        self._chunk_label = chunk_label

        if tagset is None:
            tagset = self.TAGSET
        self.tagset = tagset

        # Named development sets:
        if devset is None:
            if devset_name == 'conll2000':
                devset = conll2000.chunked_sents('train.txt')  # [:100]
            elif devset == 'treebank':
                devset = treebank_chunk.chunked_sents()  # [:100]
            else:
                raise ValueError('Unknown development set %s' % devset_name)

        self.chunker = None
        """The chunker built from the grammar string"""

        self.grammar = grammar
        """The unparsed grammar string"""

        self.normalized_grammar = None
        """A normalized version of ``self.grammar``."""

        self.grammar_changed = 0
        """The last time() that the grammar was changed."""

        self.devset = devset
        """The development set -- a list of chunked sentences."""

        self.devset_name = devset_name
        """The name of the development set (for save files)."""

        self.devset_index = -1
        """The index into the development set of the first instance
           that's currently being viewed."""

        self._last_keypress = 0
        """The time() when a key was most recently pressed"""

        self._history = []
        """A list of (grammar, precision, recall, fscore) tuples for
           grammars that the user has already tried."""

        self._history_index = 0
        """When the user is scrolling through previous grammars, this
           is used to keep track of which grammar they're looking at."""

        self._eval_grammar = None
        """The grammar that is being currently evaluated by the eval
           demon."""

        self._eval_normalized_grammar = None
        """A normalized copy of ``_eval_grammar``."""

        self._eval_index = 0
        """The index of the next sentence in the development set that
           should be looked at by the eval demon."""

        self._eval_score = ChunkScore(chunk_label=chunk_label)
        """The ``ChunkScore`` object that's used to keep track of the score
        of the current grammar on the development set."""

        # Set up the main window.
        top = self.top = Tk()
        top.geometry('+50+50')
        top.title('Regexp Chunk Parser App')
        top.bind('<Control-q>', self.destroy)

        # Varaible that restricts how much of the devset we look at.
        self._devset_size = IntVar(top)
        self._devset_size.set(100)

        # Set up all the tkinter widgets
        self._init_fonts(top)
        self._init_widgets(top)
        self._init_bindings(top)
        self._init_menubar(top)
        self.grammarbox.focus()

        # If a grammar was given, then display it.
        if grammar:
            self.grammarbox.insert('end', grammar + '\n')
            self.grammarbox.mark_set('insert', '1.0')

        # Display the first item in the development set
        self.show_devset(0)
        self.update()

    def _init_bindings(self, top):
        top.bind('<Control-n>', self._devset_next)
        top.bind('<Control-p>', self._devset_prev)
        top.bind('<Control-t>', self.toggle_show_trace)
        top.bind('<KeyPress>', self.update)
        top.bind('<Control-s>', lambda e: self.save_grammar())
        top.bind('<Control-o>', lambda e: self.load_grammar())
        self.grammarbox.bind('<Control-t>', self.toggle_show_trace)
        self.grammarbox.bind('<Control-n>', self._devset_next)
        self.grammarbox.bind('<Control-p>', self._devset_prev)

        # Redraw the eval graph when the window size changes
        self.evalbox.bind('<Configure>', self._eval_plot)

    def _init_fonts(self, top):
        # TWhat's our font size (default=same as sysfont)
        self._size = IntVar(top)
        self._size.set(20)
        self._font = Font(family='helvetica', size=-self._size.get())
        self._smallfont = Font(
            family='helvetica', size=-(int(self._size.get() * 14 // 20))
        )

    def _init_menubar(self, parent):
        menubar = Menu(parent)

        filemenu = Menu(menubar, tearoff=0)
        filemenu.add_command(label='Reset Application', underline=0, command=self.reset)
        filemenu.add_command(
            label='Save Current Grammar',
            underline=0,
            accelerator='Ctrl-s',
            command=self.save_grammar,
        )
        filemenu.add_command(
            label='Load Grammar',
            underline=0,
            accelerator='Ctrl-o',
            command=self.load_grammar,
        )

        filemenu.add_command(
            label='Save Grammar History', underline=13, command=self.save_history
        )

        filemenu.add_command(
            label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q'
        )
        menubar.add_cascade(label='File', underline=0, menu=filemenu)

        viewmenu = Menu(menubar, tearoff=0)
        viewmenu.add_radiobutton(
            label='Tiny',
            variable=self._size,
            underline=0,
            value=10,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label='Small',
            variable=self._size,
            underline=0,
            value=16,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label='Medium',
            variable=self._size,
            underline=0,
            value=20,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label='Large',
            variable=self._size,
            underline=0,
            value=24,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label='Huge',
            variable=self._size,
            underline=0,
            value=34,
            command=self.resize,
        )
        menubar.add_cascade(label='View', underline=0, menu=viewmenu)

        devsetmenu = Menu(menubar, tearoff=0)
        devsetmenu.add_radiobutton(
            label='50 sentences',
            variable=self._devset_size,
            value=50,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label='100 sentences',
            variable=self._devset_size,
            value=100,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label='200 sentences',
            variable=self._devset_size,
            value=200,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label='500 sentences',
            variable=self._devset_size,
            value=500,
            command=self.set_devset_size,
        )
        menubar.add_cascade(label='Development-Set', underline=0, menu=devsetmenu)

        helpmenu = Menu(menubar, tearoff=0)
        helpmenu.add_command(label='About', underline=0, command=self.about)
        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)

        parent.config(menu=menubar)

    def toggle_show_trace(self, *e):
        if self._showing_trace:
            self.show_devset()
        else:
            self.show_trace()
        return 'break'

    _SCALE_N = 5  # center on the last 5 examples.
    _DRAW_LINES = False

    def _eval_plot(self, *e, **config):
        width = config.get('width', self.evalbox.winfo_width())
        height = config.get('height', self.evalbox.winfo_height())

        # Clear the canvas
        self.evalbox.delete('all')

        # Draw the precision & recall labels.
        tag = self.evalbox.create_text(
            10, height // 2 - 10, justify='left', anchor='w', text='Precision'
        )
        left, right = self.evalbox.bbox(tag)[2] + 5, width - 10
        tag = self.evalbox.create_text(
            left + (width - left) // 2,
            height - 10,
            anchor='s',
            text='Recall',
            justify='center',
        )
        top, bot = 10, self.evalbox.bbox(tag)[1] - 10

        # Draw masks for clipping the plot.
        bg = self._EVALBOX_PARAMS['background']
        self.evalbox.lower(
            self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg)
        )
        self.evalbox.lower(
            self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg)
        )

        # Calculate the plot's scale.
        if self._autoscale.get() and len(self._history) > 1:
            max_precision = max_recall = 0
            min_precision = min_recall = 1
            for i in range(1, min(len(self._history), self._SCALE_N + 1)):
                grammar, precision, recall, fmeasure = self._history[-i]
                min_precision = min(precision, min_precision)
                min_recall = min(recall, min_recall)
                max_precision = max(precision, max_precision)
                max_recall = max(recall, max_recall)
            #             if max_precision-min_precision > max_recall-min_recall:
            #                 min_recall -= (max_precision-min_precision)/2
            #                 max_recall += (max_precision-min_precision)/2
            #             else:
            #                 min_precision -= (max_recall-min_recall)/2
            #                 max_precision += (max_recall-min_recall)/2
            #             if min_recall < 0:
            #                 max_recall -= min_recall
            #                 min_recall = 0
            #             if min_precision < 0:
            #                 max_precision -= min_precision
            #                 min_precision = 0
            min_precision = max(min_precision - 0.01, 0)
            min_recall = max(min_recall - 0.01, 0)
            max_precision = min(max_precision + 0.01, 1)
            max_recall = min(max_recall + 0.01, 1)
        else:
            min_precision = min_recall = 0
            max_precision = max_recall = 1

        # Draw the axis lines & grid lines
        for i in range(11):
            x = left + (right - left) * (
                (i / 10.0 - min_recall) / (max_recall - min_recall)
            )
            y = bot - (bot - top) * (
                (i / 10.0 - min_precision) / (max_precision - min_precision)
            )
            if left < x < right:
                self.evalbox.create_line(x, top, x, bot, fill='#888')
            if top < y < bot:
                self.evalbox.create_line(left, y, right, y, fill='#888')
        self.evalbox.create_line(left, top, left, bot)
        self.evalbox.create_line(left, bot, right, bot)

        # Display the plot's scale
        self.evalbox.create_text(
            left - 3,
            bot,
            justify='right',
            anchor='se',
            text='%d%%' % (100 * min_precision),
        )
        self.evalbox.create_text(
            left - 3,
            top,
            justify='right',
            anchor='ne',
            text='%d%%' % (100 * max_precision),
        )
        self.evalbox.create_text(
            left,
            bot + 3,
            justify='center',
            anchor='nw',
            text='%d%%' % (100 * min_recall),
        )
        self.evalbox.create_text(
            right,
            bot + 3,
            justify='center',
            anchor='ne',
            text='%d%%' % (100 * max_recall),
        )

        # Display the scores.
        prev_x = prev_y = None
        for i, (_, precision, recall, fscore) in enumerate(self._history):
            x = left + (right - left) * (
                (recall - min_recall) / (max_recall - min_recall)
            )
            y = bot - (bot - top) * (
                (precision - min_precision) / (max_precision - min_precision)
            )
            if i == self._history_index:
                self.evalbox.create_oval(
                    x - 2, y - 2, x + 2, y + 2, fill='#0f0', outline='#000'
                )
                self.status['text'] = (
                    'Precision: %.2f%%\t' % (precision * 100)
                    + 'Recall: %.2f%%\t' % (recall * 100)
                    + 'F-score: %.2f%%' % (fscore * 100)
                )
            else:
                self.evalbox.lower(
                    self.evalbox.create_oval(
                        x - 2, y - 2, x + 2, y + 2, fill='#afa', outline='#8c8'
                    )
                )
            if prev_x is not None and self._eval_lines.get():
                self.evalbox.lower(
                    self.evalbox.create_line(prev_x, prev_y, x, y, fill='#8c8')
                )
            prev_x, prev_y = x, y

    _eval_demon_running = False

    def _eval_demon(self):
        if self.top is None:
            return
        if self.chunker is None:
            self._eval_demon_running = False
            return

        # Note our starting time.
        t0 = time.time()

        # If are still typing, then wait for them to finish.
        if (
            time.time() - self._last_keypress < self._EVAL_DELAY
            and self.normalized_grammar != self._eval_normalized_grammar
        ):
            self._eval_demon_running = True
            return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)

        # If the grammar changed, restart the evaluation.
        if self.normalized_grammar != self._eval_normalized_grammar:
            # Check if we've seen this grammar already.  If so, then
            # just use the old evaluation values.
            for (g, p, r, f) in self._history:
                if self.normalized_grammar == self.normalize_grammar(g):
                    self._history.append((g, p, r, f))
                    self._history_index = len(self._history) - 1
                    self._eval_plot()
                    self._eval_demon_running = False
                    self._eval_normalized_grammar = None
                    return
            self._eval_index = 0
            self._eval_score = ChunkScore(chunk_label=self._chunk_label)
            self._eval_grammar = self.grammar
            self._eval_normalized_grammar = self.normalized_grammar

        # If the grammar is empty, the don't bother evaluating it, or
        # recording it in history -- the score will just be 0.
        if self.normalized_grammar.strip() == '':
            # self._eval_index = self._devset_size.get()
            self._eval_demon_running = False
            return

        # Score the next set of examples
        for gold in self.devset[
            self._eval_index : min(
                self._eval_index + self._EVAL_CHUNK, self._devset_size.get()
            )
        ]:
            guess = self._chunkparse(gold.leaves())
            self._eval_score.score(gold, guess)

        # update our index in the devset.
        self._eval_index += self._EVAL_CHUNK

        # Check if we're done
        if self._eval_index >= self._devset_size.get():
            self._history.append(
                (
                    self._eval_grammar,
                    self._eval_score.precision(),
                    self._eval_score.recall(),
                    self._eval_score.f_measure(),
                )
            )
            self._history_index = len(self._history) - 1
            self._eval_plot()
            self._eval_demon_running = False
            self._eval_normalized_grammar = None
        else:
            progress = 100 * self._eval_index / self._devset_size.get()
            self.status['text'] = 'Evaluating on Development Set (%d%%)' % progress
            self._eval_demon_running = True
            self._adaptively_modify_eval_chunk(time.time() - t0)
            self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)

    def _adaptively_modify_eval_chunk(self, t):
        """
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        """
        if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5:
            self._EVAL_CHUNK = min(
                self._EVAL_CHUNK - 1,
                max(
                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)),
                    self._EVAL_CHUNK - 10,
                ),
            )
        elif t < self._EVAL_DEMON_MIN:
            self._EVAL_CHUNK = max(
                self._EVAL_CHUNK + 1,
                min(
                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)),
                    self._EVAL_CHUNK + 10,
                ),
            )

    def _init_widgets(self, top):
        frame0 = Frame(top, **self._FRAME_PARAMS)
        frame0.grid_columnconfigure(0, weight=4)
        frame0.grid_columnconfigure(3, weight=2)
        frame0.grid_rowconfigure(1, weight=1)
        frame0.grid_rowconfigure(5, weight=1)

        # The grammar
        self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS)
        self.grammarlabel = Label(
            frame0,
            font=self._font,
            text='Grammar:',
            highlightcolor='black',
            background=self._GRAMMARBOX_PARAMS['background'],
        )
        self.grammarlabel.grid(column=0, row=0, sticky='SW')
        self.grammarbox.grid(column=0, row=1, sticky='NEWS')

        # Scroll bar for grammar
        grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview)
        grammar_scrollbar.grid(column=1, row=1, sticky='NWS')
        self.grammarbox.config(yscrollcommand=grammar_scrollbar.set)

        # grammar buttons
        bg = self._FRAME_PARAMS['background']
        frame3 = Frame(frame0, background=bg)
        frame3.grid(column=0, row=2, sticky='EW')
        Button(
            frame3,
            text='Prev Grammar',
            command=self._history_prev,
            **self._BUTTON_PARAMS
        ).pack(side='left')
        Button(
            frame3,
            text='Next Grammar',
            command=self._history_next,
            **self._BUTTON_PARAMS
        ).pack(side='left')

        # Help box
        self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS)
        self.helpbox.grid(column=3, row=1, sticky='NEWS')
        self.helptabs = {}
        bg = self._FRAME_PARAMS['background']
        helptab_frame = Frame(frame0, background=bg)
        helptab_frame.grid(column=3, row=0, sticky='SW')
        for i, (tab, tabstops, text) in enumerate(self.HELP):
            label = Label(helptab_frame, text=tab, font=self._smallfont)
            label.grid(column=i * 2, row=0, sticky='S')
            # help_frame.grid_columnconfigure(i, weight=1)
            # label.pack(side='left')
            label.bind('<ButtonPress>', lambda e, tab=tab: self.show_help(tab))
            self.helptabs[tab] = label
            Frame(
                helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg
            ).grid(column=i * 2 + 1, row=0)
        self.helptabs[self.HELP[0][0]].configure(font=self._font)
        self.helpbox.tag_config('elide', elide=True)
        for (tag, params) in self.HELP_AUTOTAG:
            self.helpbox.tag_config('tag-%s' % tag, **params)
        self.show_help(self.HELP[0][0])

        # Scroll bar for helpbox
        help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview)
        self.helpbox.config(yscrollcommand=help_scrollbar.set)
        help_scrollbar.grid(column=4, row=1, sticky='NWS')

        # The dev set
        frame4 = Frame(frame0, background=self._FRAME_PARAMS['background'])
        self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS)
        self.devsetbox.pack(expand=True, fill='both')
        self.devsetlabel = Label(
            frame0,
            font=self._font,
            text='Development Set:',
            justify='right',
            background=self._DEVSETBOX_PARAMS['background'],
        )
        self.devsetlabel.grid(column=0, row=4, sticky='SW')
        frame4.grid(column=0, row=5, sticky='NEWS')

        # dev set scrollbars
        self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll)
        self.devset_scroll.grid(column=1, row=5, sticky='NWS')
        self.devset_xscroll = Scrollbar(
            frame4, command=self.devsetbox.xview, orient='horiz'
        )
        self.devsetbox['xscrollcommand'] = self.devset_xscroll.set
        self.devset_xscroll.pack(side='bottom', fill='x')

        # dev set buttons
        bg = self._FRAME_PARAMS['background']
        frame1 = Frame(frame0, background=bg)
        frame1.grid(column=0, row=7, sticky='EW')
        Button(
            frame1,
            text='Prev Example (Ctrl-p)',
            command=self._devset_prev,
            **self._BUTTON_PARAMS
        ).pack(side='left')
        Button(
            frame1,
            text='Next Example (Ctrl-n)',
            command=self._devset_next,
            **self._BUTTON_PARAMS
        ).pack(side='left')
        self.devset_button = Button(
            frame1,
            text='Show example',
            command=self.show_devset,
            state='disabled',
            **self._BUTTON_PARAMS
        )
        self.devset_button.pack(side='right')
        self.trace_button = Button(
            frame1, text='Show trace', command=self.show_trace, **self._BUTTON_PARAMS
        )
        self.trace_button.pack(side='right')

        # evaluation box
        self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS)
        label = Label(
            frame0,
            font=self._font,
            text='Evaluation:',
            justify='right',
            background=self._EVALBOX_PARAMS['background'],
        )
        label.grid(column=3, row=4, sticky='SW')
        self.evalbox.grid(column=3, row=5, sticky='NEWS', columnspan=2)

        # evaluation box buttons
        bg = self._FRAME_PARAMS['background']
        frame2 = Frame(frame0, background=bg)
        frame2.grid(column=3, row=7, sticky='EW')
        self._autoscale = IntVar(self.top)
        self._autoscale.set(False)
        Checkbutton(
            frame2,
            variable=self._autoscale,
            command=self._eval_plot,
            text='Zoom',
            **self._BUTTON_PARAMS
        ).pack(side='left')
        self._eval_lines = IntVar(self.top)
        self._eval_lines.set(False)
        Checkbutton(
            frame2,
            variable=self._eval_lines,
            command=self._eval_plot,
            text='Lines',
            **self._BUTTON_PARAMS
        ).pack(side='left')
        Button(frame2, text='History', **self._BUTTON_PARAMS).pack(side='right')

        # The status label
        self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS)
        self.status.grid(column=0, row=9, sticky='NEW', padx=3, pady=2, columnspan=5)

        # Help box & devset box can't be edited.
        self.helpbox['state'] = 'disabled'
        self.devsetbox['state'] = 'disabled'

        # Spacers
        bg = self._FRAME_PARAMS['background']
        Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3)
        Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0)
        Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8)

        # pack the frame.
        frame0.pack(fill='both', expand=True)

        # Set up colors for the devset box
        self.devsetbox.tag_config('true-pos', background='#afa', underline='True')
        self.devsetbox.tag_config('false-neg', underline='True', foreground='#800')
        self.devsetbox.tag_config('false-pos', background='#faa')
        self.devsetbox.tag_config('trace', foreground='#666', wrap='none')
        self.devsetbox.tag_config('wrapindent', lmargin2=30, wrap='none')
        self.devsetbox.tag_config('error', foreground='#800')

        # And for the grammarbox
        self.grammarbox.tag_config('error', background='#fec')
        self.grammarbox.tag_config('comment', foreground='#840')
        self.grammarbox.tag_config('angle', foreground='#00f')
        self.grammarbox.tag_config('brace', foreground='#0a0')
        self.grammarbox.tag_config('hangindent', lmargin1=0, lmargin2=40)

    _showing_trace = False

    def show_trace(self, *e):
        self._showing_trace = True
        self.trace_button['state'] = 'disabled'
        self.devset_button['state'] = 'normal'

        self.devsetbox['state'] = 'normal'
        # self.devsetbox['wrap'] = 'none'
        self.devsetbox.delete('1.0', 'end')
        self.devsetlabel['text'] = 'Development Set (%d/%d)' % (
            (self.devset_index + 1, self._devset_size.get())
        )

        if self.chunker is None:
            self.devsetbox.insert('1.0', 'Trace: waiting for a valid grammar.')
            self.devsetbox.tag_add('error', '1.0', 'end')
            return  # can't do anything more

        gold_tree = self.devset[self.devset_index]
        rules = self.chunker.rules()

        # Calculate the tag sequence
        tagseq = '\t'
        charnum = [1]
        for wordnum, (word, pos) in enumerate(gold_tree.leaves()):
            tagseq += '%s ' % pos
            charnum.append(len(tagseq))
        self.charnum = dict(
            ((i, j), charnum[j])
            for i in range(len(rules) + 1)
            for j in range(len(charnum))
        )
        self.linenum = dict((i, i * 2 + 2) for i in range(len(rules) + 1))

        for i in range(len(rules) + 1):
            if i == 0:
                self.devsetbox.insert('end', 'Start:\n')
                self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')
            else:
                self.devsetbox.insert('end', 'Apply %s:\n' % rules[i - 1])
                self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')
            # Display the tag sequence.
            self.devsetbox.insert('end', tagseq + '\n')
            self.devsetbox.tag_add('wrapindent', 'end -2c linestart', 'end -2c')
            # Run a partial parser, and extract gold & test chunks
            chunker = RegexpChunkParser(rules[:i])
            test_tree = self._chunkparse(gold_tree.leaves())
            gold_chunks = self._chunks(gold_tree)
            test_chunks = self._chunks(test_tree)
            # Compare them.
            for chunk in gold_chunks.intersection(test_chunks):
                self._color_chunk(i, chunk, 'true-pos')
            for chunk in gold_chunks - test_chunks:
                self._color_chunk(i, chunk, 'false-neg')
            for chunk in test_chunks - gold_chunks:
                self._color_chunk(i, chunk, 'false-pos')
        self.devsetbox.insert('end', 'Finished.\n')
        self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')

        # This is a hack, because the x-scrollbar isn't updating its
        # position right -- I'm not sure what the underlying cause is
        # though.  (This is on OS X w/ python 2.5)
        self.top.after(100, self.devset_xscroll.set, 0, 0.3)

    def show_help(self, tab):
        self.helpbox['state'] = 'normal'
        self.helpbox.delete('1.0', 'end')
        for (name, tabstops, text) in self.HELP:
            if name == tab:
                text = text.replace(
                    '<<TAGSET>>',
                    '\n'.join(
                        (
                            '\t%s\t%s' % item
                            for item in sorted(
                                list(self.tagset.items()),
                                key=lambda t_w: re.match('\w+', t_w[0])
                                and (0, t_w[0])
                                or (1, t_w[0]),
                            )
                        )
                    ),
                )

                self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                self.helpbox.config(tabs=tabstops)
                self.helpbox.insert('1.0', text + '\n' * 20)
                C = '1.0 + %d chars'
                for (tag, params) in self.HELP_AUTOTAG:
                    pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag)
                    for m in re.finditer(pattern, text):
                        self.helpbox.tag_add('elide', C % m.start(1), C % m.end(1))
                        self.helpbox.tag_add(
                            'tag-%s' % tag, C % m.start(2), C % m.end(2)
                        )
                        self.helpbox.tag_add('elide', C % m.start(3), C % m.end(3))
            else:
                self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
        self.helpbox['state'] = 'disabled'

    def _history_prev(self, *e):
        self._view_history(self._history_index - 1)
        return 'break'

    def _history_next(self, *e):
        self._view_history(self._history_index + 1)
        return 'break'

    def _view_history(self, index):
        # Bounds & sanity checking:
        index = max(0, min(len(self._history) - 1, index))
        if not self._history:
            return
        # Already viewing the requested history item?
        if index == self._history_index:
            return
        # Show the requested grammar.  It will get added to _history
        # only if they edit it (causing self.update() to get run.)
        self.grammarbox['state'] = 'normal'
        self.grammarbox.delete('1.0', 'end')
        self.grammarbox.insert('end', self._history[index][0])
        self.grammarbox.mark_set('insert', '1.0')
        self._history_index = index
        self._syntax_highlight_grammar(self._history[index][0])
        # Record the normalized grammar & regenerate the chunker.
        self.normalized_grammar = self.normalize_grammar(self._history[index][0])
        if self.normalized_grammar:
            rules = [
                RegexpChunkRule.fromstring(line)
                for line in self.normalized_grammar.split('\n')
            ]
        else:
            rules = []
        self.chunker = RegexpChunkParser(rules)
        # Show the score.
        self._eval_plot()
        # Update the devset box
        self._highlight_devset()
        if self._showing_trace:
            self.show_trace()
        # Update the grammar label
        if self._history_index < len(self._history) - 1:
            self.grammarlabel['text'] = 'Grammar %s/%s:' % (
                self._history_index + 1,
                len(self._history),
            )
        else:
            self.grammarlabel['text'] = 'Grammar:'

    def _devset_next(self, *e):
        self._devset_scroll('scroll', 1, 'page')
        return 'break'

    def _devset_prev(self, *e):
        self._devset_scroll('scroll', -1, 'page')
        return 'break'

    def destroy(self, *e):
        if self.top is None:
            return
        self.top.destroy()
        self.top = None

    def _devset_scroll(self, command, *args):
        N = 1  # size of a page -- one sentence.
        showing_trace = self._showing_trace
        if command == 'scroll' and args[1].startswith('unit'):
            self.show_devset(self.devset_index + int(args[0]))
        elif command == 'scroll' and args[1].startswith('page'):
            self.show_devset(self.devset_index + N * int(args[0]))
        elif command == 'moveto':
            self.show_devset(int(float(args[0]) * self._devset_size.get()))
        else:
            assert 0, 'bad scroll command %s %s' % (command, args)
        if showing_trace:
            self.show_trace()

    def show_devset(self, index=None):
        if index is None:
            index = self.devset_index

        # Bounds checking
        index = min(max(0, index), self._devset_size.get() - 1)

        if index == self.devset_index and not self._showing_trace:
            return
        self.devset_index = index

        self._showing_trace = False
        self.trace_button['state'] = 'normal'
        self.devset_button['state'] = 'disabled'

        # Clear the text box.
        self.devsetbox['state'] = 'normal'
        self.devsetbox['wrap'] = 'word'
        self.devsetbox.delete('1.0', 'end')
        self.devsetlabel['text'] = 'Development Set (%d/%d)' % (
            (self.devset_index + 1, self._devset_size.get())
        )

        # Add the sentences
        sample = self.devset[self.devset_index : self.devset_index + 1]
        self.charnum = {}
        self.linenum = {0: 1}
        for sentnum, sent in enumerate(sample):
            linestr = ''
            for wordnum, (word, pos) in enumerate(sent.leaves()):
                self.charnum[sentnum, wordnum] = len(linestr)
                linestr += '%s/%s ' % (word, pos)
                self.charnum[sentnum, wordnum + 1] = len(linestr)
            self.devsetbox.insert('end', linestr[:-1] + '\n\n')

        # Highlight chunks in the dev set
        if self.chunker is not None:
            self._highlight_devset()
        self.devsetbox['state'] = 'disabled'

        # Update the scrollbar
        first = self.devset_index / self._devset_size.get()
        last = (self.devset_index + 2) / self._devset_size.get()
        self.devset_scroll.set(first, last)

    def _chunks(self, tree):
        chunks = set()
        wordnum = 0
        for child in tree:
            if isinstance(child, Tree):
                if child.label() == self._chunk_label:
                    chunks.add((wordnum, wordnum + len(child)))
                wordnum += len(child)
            else:
                wordnum += 1
        return chunks

    def _syntax_highlight_grammar(self, grammar):
        if self.top is None:
            return
        self.grammarbox.tag_remove('comment', '1.0', 'end')
        self.grammarbox.tag_remove('angle', '1.0', 'end')
        self.grammarbox.tag_remove('brace', '1.0', 'end')
        self.grammarbox.tag_add('hangindent', '1.0', 'end')
        for lineno, line in enumerate(grammar.split('\n')):
            if not line.strip():
                continue
            m = re.match(r'(\\.|[^#])*(#.*)?', line)
            comment_start = None
            if m.group(2):
                comment_start = m.start(2)
                s = '%d.%d' % (lineno + 1, m.start(2))
                e = '%d.%d' % (lineno + 1, m.end(2))
                self.grammarbox.tag_add('comment', s, e)
            for m in re.finditer('[<>{}]', line):
                if comment_start is not None and m.start() >= comment_start:
                    break
                s = '%d.%d' % (lineno + 1, m.start())
                e = '%d.%d' % (lineno + 1, m.end())
                if m.group() in '<>':
                    self.grammarbox.tag_add('angle', s, e)
                else:
                    self.grammarbox.tag_add('brace', s, e)

    def _grammarcheck(self, grammar):
        if self.top is None:
            return
        self.grammarbox.tag_remove('error', '1.0', 'end')
        self._grammarcheck_errs = []
        for lineno, line in enumerate(grammar.split('\n')):
            line = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', line)
            line = line.strip()
            if line:
                try:
                    RegexpChunkRule.fromstring(line)
                except ValueError as e:
                    self.grammarbox.tag_add(
                        'error', '%s.0' % (lineno + 1), '%s.0 lineend' % (lineno + 1)
                    )
        self.status['text'] = ''

    def update(self, *event):
        # Record when update was called (for grammarcheck)
        if event:
            self._last_keypress = time.time()

        # Read the grammar from the Text box.
        self.grammar = grammar = self.grammarbox.get('1.0', 'end')

        # If the grammar hasn't changed, do nothing:
        normalized_grammar = self.normalize_grammar(grammar)
        if normalized_grammar == self.normalized_grammar:
            return
        else:
            self.normalized_grammar = normalized_grammar

        # If the grammar has changed, and we're looking at history,
        # then stop looking at history.
        if self._history_index < len(self._history) - 1:
            self.grammarlabel['text'] = 'Grammar:'

        self._syntax_highlight_grammar(grammar)

        # The grammar has changed; try parsing it.  If it doesn't
        # parse, do nothing.  (flag error location?)
        try:
            # Note: the normalized grammar has no blank lines.
            if normalized_grammar:
                rules = [
                    RegexpChunkRule.fromstring(line)
                    for line in normalized_grammar.split('\n')
                ]
            else:
                rules = []
        except ValueError as e:
            # Use the un-normalized grammar for error highlighting.
            self._grammarcheck(grammar)
            self.chunker = None
            return

        self.chunker = RegexpChunkParser(rules)
        self.grammarbox.tag_remove('error', '1.0', 'end')
        self.grammar_changed = time.time()
        # Display the results
        if self._showing_trace:
            self.show_trace()
        else:
            self._highlight_devset()
        # Start the eval demon
        if not self._eval_demon_running:
            self._eval_demon()

    def _highlight_devset(self, sample=None):
        if sample is None:
            sample = self.devset[self.devset_index : self.devset_index + 1]

        self.devsetbox.tag_remove('true-pos', '1.0', 'end')
        self.devsetbox.tag_remove('false-neg', '1.0', 'end')
        self.devsetbox.tag_remove('false-pos', '1.0', 'end')

        # Run the grammar on the test cases.
        for sentnum, gold_tree in enumerate(sample):
            # Run the chunk parser
            test_tree = self._chunkparse(gold_tree.leaves())
            # Extract gold & test chunks
            gold_chunks = self._chunks(gold_tree)
            test_chunks = self._chunks(test_tree)
            # Compare them.
            for chunk in gold_chunks.intersection(test_chunks):
                self._color_chunk(sentnum, chunk, 'true-pos')
            for chunk in gold_chunks - test_chunks:
                self._color_chunk(sentnum, chunk, 'false-neg')
            for chunk in test_chunks - gold_chunks:
                self._color_chunk(sentnum, chunk, 'false-pos')

    def _chunkparse(self, words):
        try:
            return self.chunker.parse(words)
        except (ValueError, IndexError) as e:
            # There's an error somewhere in the grammar, but we're not sure
            # exactly where, so just mark the whole grammar as bad.
            # E.g., this is caused by: "({<NN>})"
            self.grammarbox.tag_add('error', '1.0', 'end')
            # Treat it as tagging nothing:
            return words

    def _color_chunk(self, sentnum, chunk, tag):
        start, end = chunk
        self.devsetbox.tag_add(
            tag,
            '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, start]),
            '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, end] - 1),
        )

    def reset(self):
        # Clear various variables
        self.chunker = None
        self.grammar = None
        self.normalized_grammar = None
        self.grammar_changed = 0
        self._history = []
        self._history_index = 0
        # Update the on-screen display.
        self.grammarbox.delete('1.0', 'end')
        self.show_devset(0)
        self.update()
        # self._eval_plot()

    SAVE_GRAMMAR_TEMPLATE = (
        '# Regexp Chunk Parsing Grammar\n'
        '# Saved %(date)s\n'
        '#\n'
        '# Development set: %(devset)s\n'
        '#   Precision: %(precision)s\n'
        '#   Recall:    %(recall)s\n'
        '#   F-score:   %(fscore)s\n\n'
        '%(grammar)s\n'
    )

    def save_grammar(self, filename=None):
        if not filename:
            ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')]
            filename = asksaveasfilename(filetypes=ftypes, defaultextension='.chunk')
            if not filename:
                return
        if self._history and self.normalized_grammar == self.normalize_grammar(
            self._history[-1][0]
        ):
            precision, recall, fscore = [
                '%.2f%%' % (100 * v) for v in self._history[-1][1:]
            ]
        elif self.chunker is None:
            precision = recall = fscore = 'Grammar not well formed'
        else:
            precision = recall = fscore = 'Not finished evaluation yet'

        with open(filename, 'w') as outfile:
            outfile.write(
                self.SAVE_GRAMMAR_TEMPLATE
                % dict(
                    date=time.ctime(),
                    devset=self.devset_name,
                    precision=precision,
                    recall=recall,
                    fscore=fscore,
                    grammar=self.grammar.strip(),
                )
            )

    def load_grammar(self, filename=None):
        if not filename:
            ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')]
            filename = askopenfilename(filetypes=ftypes, defaultextension='.chunk')
            if not filename:
                return
        self.grammarbox.delete('1.0', 'end')
        self.update()
        with open(filename, 'r') as infile:
            grammar = infile.read()
        grammar = re.sub(
            '^\# Regexp Chunk Parsing Grammar[\s\S]*' 'F-score:.*\n', '', grammar
        ).lstrip()
        self.grammarbox.insert('1.0', grammar)
        self.update()

    def save_history(self, filename=None):
        if not filename:
            ftypes = [('Chunk Gramamr History', '.txt'), ('All files', '*')]
            filename = asksaveasfilename(filetypes=ftypes, defaultextension='.txt')
            if not filename:
                return

        with open(filename, 'w') as outfile:
            outfile.write('# Regexp Chunk Parsing Grammar History\n')
            outfile.write('# Saved %s\n' % time.ctime())
            outfile.write('# Development set: %s\n' % self.devset_name)
            for i, (g, p, r, f) in enumerate(self._history):
                hdr = (
                    'Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, '
                    'fscore=%.2f%%)'
                    % (i + 1, len(self._history), p * 100, r * 100, f * 100)
                )
                outfile.write('\n%s\n' % hdr)
                outfile.write(''.join('  %s\n' % line for line in g.strip().split()))

            if not (
                self._history
                and self.normalized_grammar
                == self.normalize_grammar(self._history[-1][0])
            ):
                if self.chunker is None:
                    outfile.write('\nCurrent Grammar (not well-formed)\n')
                else:
                    outfile.write('\nCurrent Grammar (not evaluated)\n')
                outfile.write(
                    ''.join('  %s\n' % line for line in self.grammar.strip().split())
                )

    def about(self, *e):
        ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper"
        TITLE = 'About: Regular Expression Chunk Parser Application'
        try:
            from six.moves.tkinter_messagebox import Message

            Message(message=ABOUT, title=TITLE).show()
        except:
            ShowText(self.top, TITLE, ABOUT)

    def set_devset_size(self, size=None):
        if size is not None:
            self._devset_size.set(size)
        self._devset_size.set(min(len(self.devset), self._devset_size.get()))
        self.show_devset(1)
        self.show_devset(0)
        # what about history?  Evaluated at diff dev set sizes!

    def resize(self, size=None):
        if size is not None:
            self._size.set(size)
        size = self._size.get()
        self._font.configure(size=-(abs(size)))
        self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20))

    def mainloop(self, *args, **kwargs):
        """
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        """
        if in_idle():
            return
        self.top.mainloop(*args, **kwargs)


[docs]def app(): RegexpChunkApp().mainloop()
if __name__ == '__main__': app() __all__ = ['app']