Source code for nltk.app.chunkparser_app

# Natural Language Toolkit: Regexp Chunk Parser Application
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

"""
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
"""

# Todo: Add a way to select the development set from the menubar.  This
# might just need to be a selection box (conll vs treebank etc) plus
# configuration parameters to select what's being chunked (eg VP vs NP)
# and what part of the data is being used as the development set.

import random
import re
import textwrap
import time
from tkinter import (
    Button,
    Canvas,
    Checkbutton,
    Frame,
    IntVar,
    Label,
    Menu,
    Scrollbar,
    Text,
    Tk,
)
from tkinter.filedialog import askopenfilename, asksaveasfilename
from tkinter.font import Font

from nltk.chunk import ChunkScore, RegexpChunkParser
from nltk.chunk.regexp import RegexpChunkRule
from nltk.corpus import conll2000, treebank_chunk
from nltk.draw.util import ShowText
from nltk.tree import Tree
from nltk.util import in_idle


class RegexpChunkApp:
    """
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    """

    ##/////////////////////////////////////////////////////////////////
    ##  Help Text
    ##/////////////////////////////////////////////////////////////////

    #: A dictionary mapping from part of speech tags to descriptions,
    #: which is used in the help text.  (This should probably live with
    #: the conll and/or treebank corpus instead.)
    TAGSET = {
        "CC": "Coordinating conjunction",
        "PRP$": "Possessive pronoun",
        "CD": "Cardinal number",
        "RB": "Adverb",
        "DT": "Determiner",
        "RBR": "Adverb, comparative",
        "EX": "Existential there",
        "RBS": "Adverb, superlative",
        "FW": "Foreign word",
        "RP": "Particle",
        "JJ": "Adjective",
        "TO": "to",
        "JJR": "Adjective, comparative",
        "UH": "Interjection",
        "JJS": "Adjective, superlative",
        "VB": "Verb, base form",
        "LS": "List item marker",
        "VBD": "Verb, past tense",
        "MD": "Modal",
        "NNS": "Noun, plural",
        "NN": "Noun, singular or mass",
        "VBN": "Verb, past participle",
        "VBZ": "Verb,3rd ps. sing. present",
        "NNP": "Proper noun, singular",
        "NNPS": "Proper noun plural",
        "WDT": "wh-determiner",
        "PDT": "Predeterminer",
        "WP": "wh-pronoun",
        "POS": "Possessive ending",
        "WP$": "Possessive wh-pronoun",
        "PRP": "Personal pronoun",
        "WRB": "wh-adverb",
        "(": "open parenthesis",
        ")": "close parenthesis",
        "``": "open quote",
        ",": "comma",
        "''": "close quote",
        ".": "period",
        "#": "pound sign (currency marker)",
        "$": "dollar sign (currency marker)",
        "IN": "Preposition/subord. conjunction",
        "SYM": "Symbol (mathematical or scientific)",
        "VBG": "Verb, gerund/present participle",
        "VBP": "Verb, non-3rd ps. sing. present",
        ":": "colon",
    }

    #: Contents for the help box.  This is a list of tuples, one for
    #: each help page, where each tuple has four elements:
    #:   - A title (displayed as a tab)
    #:   - A string description of tabstops (see Tkinter.Text for details)
    #:   - The text contents for the help page.  You can use expressions
    #:     like <red>...</red> to colorize the text; see ``HELP_AUTOTAG``
    #:     for a list of tags you can use for colorizing.
    HELP = [
        (
            "Help",
            "20",
            "Welcome to the regular expression chunk-parser grammar editor.  "
            "You can use this editor to develop and test chunk parser grammars "
            "based on NLTK's RegexpChunkParser class.\n\n"
            # Help box.
            "Use this box ('Help') to learn more about the editor; click on the "
            "tabs for help on specific topics:"
            "<indent>\n"
            "Rules: grammar rule types\n"
            "Regexps: regular expression syntax\n"
            "Tags: part of speech tags\n</indent>\n"
            # Grammar.
            "Use the upper-left box ('Grammar') to edit your grammar.  "
            "Each line of your grammar specifies a single 'rule', "
            "which performs an action such as creating a chunk or merging "
            "two chunks.\n\n"
            # Dev set.
            "The lower-left box ('Development Set') runs your grammar on the "
            "development set, and displays the results.  "
            "Your grammar's chunks are <highlight>highlighted</highlight>, and "
            "the correct (gold standard) chunks are "
            "<underline>underlined</underline>.  If they "
            "match, they are displayed in <green>green</green>; otherwise, "
            "they are displayed in <red>red</red>.  The box displays a single "
            "sentence from the development set at a time; use the scrollbar or "
            "the next/previous buttons view additional sentences.\n\n"
            # Performance
            "The lower-right box ('Evaluation') tracks the performance of "
            "your grammar on the development set.  The 'precision' axis "
            "indicates how many of your grammar's chunks are correct; and "
            "the 'recall' axis indicates how many of the gold standard "
            "chunks your system generated.  Typically, you should try to "
            "design a grammar that scores high on both metrics.  The "
            "exact precision and recall of the current grammar, as well "
            "as their harmonic mean (the 'f-score'), are displayed in "
            "the status bar at the bottom of the window.",
        ),
        (
            "Rules",
            "10",
            "<h1>{...regexp...}</h1>"
            "<indent>\nChunk rule: creates new chunks from words matching "
            "regexp.</indent>\n\n"
            "<h1>}...regexp...{</h1>"
            "<indent>\nStrip rule: removes words matching regexp from existing "
            "chunks.</indent>\n\n"
            "<h1>...regexp1...}{...regexp2...</h1>"
            "<indent>\nSplit rule: splits chunks that match regexp1 followed by "
            "regexp2 in two.</indent>\n\n"
            "<h1>...regexp...{}...regexp...</h1>"
            "<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
            "and regexp2</indent>\n",
        ),
        (
            "Regexps",
            "10 60",
            # "Regular Expression Syntax Summary:\n\n"
            "<h1>Pattern\t\tMatches...</h1>\n"
            "<hangindent>"
            "\t<<var>T</var>>\ta word with tag <var>T</var> "
            "(where <var>T</var> may be a regexp).\n"
            "\t<var>x</var>?\tan optional <var>x</var>\n"
            "\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
            "\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
            "\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
            "\t.\tmatches any character\n"
            "\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
            "\t# <var>x...</var>\tTreats <var>x...</var> "
            "(to the end of the line) as a comment\n"
            "\t\\<var>C</var>\tmatches character <var>C</var> "
            "(useful when <var>C</var> is a special character "
            "like + or #)\n"
            "</hangindent>"
            "\n<h1>Examples:</h1>\n"
            "<hangindent>"
            "\t<regexp><NN></regexp>\n"
            '\t\tMatches <match>"cow/NN"</match>\n'
            '\t\tMatches <match>"green/NN"</match>\n'
            "\t<regexp><VB.*></regexp>\n"
            '\t\tMatches <match>"eating/VBG"</match>\n'
            '\t\tMatches <match>"ate/VBD"</match>\n'
            "\t<regexp><IN><DT><NN></regexp>\n"
            '\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
            "\t<regexp><RB>?<VBD></regexp>\n"
            '\t\tMatches <match>"ran/VBD"</match>\n'
            '\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
            r"\t<regexp><\#><CD> # This is a comment...</regexp>\n"
            '\t\tMatches <match>"#/# 100/CD"</match>\n'
            "</hangindent>",
        ),
        (
            "Tags",
            "10 60",
            "<h1>Part of Speech Tags:</h1>\n"
            + "<hangindent>"
            + "<<TAGSET>>"
            + "</hangindent>\n",  # this gets auto-substituted w/ self.TAGSET
        ),
    ]

    HELP_AUTOTAG = [
        ("red", dict(foreground="#a00")),
        ("green", dict(foreground="#080")),
        ("highlight", dict(background="#ddd")),
        ("underline", dict(underline=True)),
        ("h1", dict(underline=True)),
        ("indent", dict(lmargin1=20, lmargin2=20)),
        ("hangindent", dict(lmargin1=0, lmargin2=60)),
        ("var", dict(foreground="#88f")),
        ("regexp", dict(foreground="#ba7")),
        ("match", dict(foreground="#6a6")),
    ]

    ##/////////////////////////////////////////////////////////////////
    ##  Config Parameters
    ##/////////////////////////////////////////////////////////////////

    _EVAL_DELAY = 1
    """If the user has not pressed any key for this amount of time (in
       seconds), and the current grammar has not been evaluated, then
       the eval demon will evaluate it."""

    _EVAL_CHUNK = 15
    """The number of sentences that should be evaluated by the eval
       demon each time it runs."""
    _EVAL_FREQ = 0.2
    """The frequency (in seconds) at which the eval demon is run"""
    _EVAL_DEMON_MIN = 0.02
    """The minimum amount of time that the eval demon should take each time
       it runs -- if it takes less than this time, _EVAL_CHUNK will be
       modified upwards."""
    _EVAL_DEMON_MAX = 0.04
    """The maximum amount of time that the eval demon should take each time
       it runs -- if it takes more than this time, _EVAL_CHUNK will be
       modified downwards."""

    _GRAMMARBOX_PARAMS = dict(
        width=40,
        height=12,
        background="#efe",
        highlightbackground="#efe",
        highlightthickness=1,
        relief="groove",
        border=2,
        wrap="word",
    )
    _HELPBOX_PARAMS = dict(
        width=15,
        height=15,
        background="#efe",
        highlightbackground="#efe",
        foreground="#555",
        highlightthickness=1,
        relief="groove",
        border=2,
        wrap="word",
    )
    _DEVSETBOX_PARAMS = dict(
        width=70,
        height=10,
        background="#eef",
        highlightbackground="#eef",
        highlightthickness=1,
        relief="groove",
        border=2,
        wrap="word",
        tabs=(30,),
    )
    _STATUS_PARAMS = dict(background="#9bb", relief="groove", border=2)
    _FONT_PARAMS = dict(family="helvetica", size=-20)
    _FRAME_PARAMS = dict(background="#777", padx=2, pady=2, border=3)
    _EVALBOX_PARAMS = dict(
        background="#eef",
        highlightbackground="#eef",
        highlightthickness=1,
        relief="groove",
        border=2,
        width=300,
        height=280,
    )
    _BUTTON_PARAMS = dict(
        background="#777", activebackground="#777", highlightbackground="#777"
    )
    _HELPTAB_BG_COLOR = "#aba"
    _HELPTAB_FG_COLOR = "#efe"

    _HELPTAB_FG_PARAMS = dict(background="#efe")
    _HELPTAB_BG_PARAMS = dict(background="#aba")
    _HELPTAB_SPACER = 6

    def normalize_grammar(self, grammar):
        # Strip comments
        grammar = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", grammar)
        # Normalize whitespace
        grammar = re.sub(" +", " ", grammar)
        grammar = re.sub(r"\n\s+", r"\n", grammar)
        grammar = grammar.strip()
        # [xx] Hack: automatically backslash $!
        grammar = re.sub(r"([^\\])\$", r"\1\\$", grammar)
        return grammar

    def __init__(
        self,
        devset_name="conll2000",
        devset=None,
        grammar="",
        chunk_label="NP",
        tagset=None,
    ):
        """
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        """
        self._chunk_label = chunk_label

        if tagset is None:
            tagset = self.TAGSET
        self.tagset = tagset

        # Named development sets:
        if devset is None:
            if devset_name == "conll2000":
                devset = conll2000.chunked_sents("train.txt")  # [:100]
            elif devset == "treebank":
                devset = treebank_chunk.chunked_sents()  # [:100]
            else:
                raise ValueError("Unknown development set %s" % devset_name)

        self.chunker = None
        """The chunker built from the grammar string"""

        self.grammar = grammar
        """The unparsed grammar string"""

        self.normalized_grammar = None
        """A normalized version of ``self.grammar``."""

        self.grammar_changed = 0
        """The last time() that the grammar was changed."""

        self.devset = devset
        """The development set -- a list of chunked sentences."""

        self.devset_name = devset_name
        """The name of the development set (for save files)."""

        self.devset_index = -1
        """The index into the development set of the first instance
           that's currently being viewed."""

        self._last_keypress = 0
        """The time() when a key was most recently pressed"""

        self._history = []
        """A list of (grammar, precision, recall, fscore) tuples for
           grammars that the user has already tried."""

        self._history_index = 0
        """When the user is scrolling through previous grammars, this
           is used to keep track of which grammar they're looking at."""

        self._eval_grammar = None
        """The grammar that is being currently evaluated by the eval
           demon."""

        self._eval_normalized_grammar = None
        """A normalized copy of ``_eval_grammar``."""

        self._eval_index = 0
        """The index of the next sentence in the development set that
           should be looked at by the eval demon."""

        self._eval_score = ChunkScore(chunk_label=chunk_label)
        """The ``ChunkScore`` object that's used to keep track of the score
        of the current grammar on the development set."""

        # Set up the main window.
        top = self.top = Tk()
        top.geometry("+50+50")
        top.title("Regexp Chunk Parser App")
        top.bind("<Control-q>", self.destroy)

        # Variable that restricts how much of the devset we look at.
        self._devset_size = IntVar(top)
        self._devset_size.set(100)

        # Set up all the tkinter widgets
        self._init_fonts(top)
        self._init_widgets(top)
        self._init_bindings(top)
        self._init_menubar(top)
        self.grammarbox.focus()

        # If a grammar was given, then display it.
        if grammar:
            self.grammarbox.insert("end", grammar + "\n")
            self.grammarbox.mark_set("insert", "1.0")

        # Display the first item in the development set
        self.show_devset(0)
        self.update()

    def _init_bindings(self, top):
        top.bind("<Control-n>", self._devset_next)
        top.bind("<Control-p>", self._devset_prev)
        top.bind("<Control-t>", self.toggle_show_trace)
        top.bind("<KeyPress>", self.update)
        top.bind("<Control-s>", lambda e: self.save_grammar())
        top.bind("<Control-o>", lambda e: self.load_grammar())
        self.grammarbox.bind("<Control-t>", self.toggle_show_trace)
        self.grammarbox.bind("<Control-n>", self._devset_next)
        self.grammarbox.bind("<Control-p>", self._devset_prev)

        # Redraw the eval graph when the window size changes
        self.evalbox.bind("<Configure>", self._eval_plot)

    def _init_fonts(self, top):
        # TWhat's our font size (default=same as sysfont)
        self._size = IntVar(top)
        self._size.set(20)
        self._font = Font(family="helvetica", size=-self._size.get())
        self._smallfont = Font(
            family="helvetica", size=-(int(self._size.get() * 14 // 20))
        )

    def _init_menubar(self, parent):
        menubar = Menu(parent)

        filemenu = Menu(menubar, tearoff=0)
        filemenu.add_command(label="Reset Application", underline=0, command=self.reset)
        filemenu.add_command(
            label="Save Current Grammar",
            underline=0,
            accelerator="Ctrl-s",
            command=self.save_grammar,
        )
        filemenu.add_command(
            label="Load Grammar",
            underline=0,
            accelerator="Ctrl-o",
            command=self.load_grammar,
        )

        filemenu.add_command(
            label="Save Grammar History", underline=13, command=self.save_history
        )

        filemenu.add_command(
            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
        )
        menubar.add_cascade(label="File", underline=0, menu=filemenu)

        viewmenu = Menu(menubar, tearoff=0)
        viewmenu.add_radiobutton(
            label="Tiny",
            variable=self._size,
            underline=0,
            value=10,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label="Small",
            variable=self._size,
            underline=0,
            value=16,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label="Medium",
            variable=self._size,
            underline=0,
            value=20,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label="Large",
            variable=self._size,
            underline=0,
            value=24,
            command=self.resize,
        )
        viewmenu.add_radiobutton(
            label="Huge",
            variable=self._size,
            underline=0,
            value=34,
            command=self.resize,
        )
        menubar.add_cascade(label="View", underline=0, menu=viewmenu)

        devsetmenu = Menu(menubar, tearoff=0)
        devsetmenu.add_radiobutton(
            label="50 sentences",
            variable=self._devset_size,
            value=50,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label="100 sentences",
            variable=self._devset_size,
            value=100,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label="200 sentences",
            variable=self._devset_size,
            value=200,
            command=self.set_devset_size,
        )
        devsetmenu.add_radiobutton(
            label="500 sentences",
            variable=self._devset_size,
            value=500,
            command=self.set_devset_size,
        )
        menubar.add_cascade(label="Development-Set", underline=0, menu=devsetmenu)

        helpmenu = Menu(menubar, tearoff=0)
        helpmenu.add_command(label="About", underline=0, command=self.about)
        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)

        parent.config(menu=menubar)

    def toggle_show_trace(self, *e):
        if self._showing_trace:
            self.show_devset()
        else:
            self.show_trace()
        return "break"

    _SCALE_N = 5  # center on the last 5 examples.
    _DRAW_LINES = False

    def _eval_plot(self, *e, **config):
        width = config.get("width", self.evalbox.winfo_width())
        height = config.get("height", self.evalbox.winfo_height())

        # Clear the canvas
        self.evalbox.delete("all")

        # Draw the precision & recall labels.
        tag = self.evalbox.create_text(
            10, height // 2 - 10, justify="left", anchor="w", text="Precision"
        )
        left, right = self.evalbox.bbox(tag)[2] + 5, width - 10
        tag = self.evalbox.create_text(
            left + (width - left) // 2,
            height - 10,
            anchor="s",
            text="Recall",
            justify="center",
        )
        top, bot = 10, self.evalbox.bbox(tag)[1] - 10

        # Draw masks for clipping the plot.
        bg = self._EVALBOX_PARAMS["background"]
        self.evalbox.lower(
            self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg)
        )
        self.evalbox.lower(
            self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg)
        )

        # Calculate the plot's scale.
        if self._autoscale.get() and len(self._history) > 1:
            max_precision = max_recall = 0
            min_precision = min_recall = 1
            for i in range(1, min(len(self._history), self._SCALE_N + 1)):
                grammar, precision, recall, fmeasure = self._history[-i]
                min_precision = min(precision, min_precision)
                min_recall = min(recall, min_recall)
                max_precision = max(precision, max_precision)
                max_recall = max(recall, max_recall)
            #             if max_precision-min_precision > max_recall-min_recall:
            #                 min_recall -= (max_precision-min_precision)/2
            #                 max_recall += (max_precision-min_precision)/2
            #             else:
            #                 min_precision -= (max_recall-min_recall)/2
            #                 max_precision += (max_recall-min_recall)/2
            #             if min_recall < 0:
            #                 max_recall -= min_recall
            #                 min_recall = 0
            #             if min_precision < 0:
            #                 max_precision -= min_precision
            #                 min_precision = 0
            min_precision = max(min_precision - 0.01, 0)
            min_recall = max(min_recall - 0.01, 0)
            max_precision = min(max_precision + 0.01, 1)
            max_recall = min(max_recall + 0.01, 1)
        else:
            min_precision = min_recall = 0
            max_precision = max_recall = 1

        # Draw the axis lines & grid lines
        for i in range(11):
            x = left + (right - left) * (
                (i / 10.0 - min_recall) / (max_recall - min_recall)
            )
            y = bot - (bot - top) * (
                (i / 10.0 - min_precision) / (max_precision - min_precision)
            )
            if left < x < right:
                self.evalbox.create_line(x, top, x, bot, fill="#888")
            if top < y < bot:
                self.evalbox.create_line(left, y, right, y, fill="#888")
        self.evalbox.create_line(left, top, left, bot)
        self.evalbox.create_line(left, bot, right, bot)

        # Display the plot's scale
        self.evalbox.create_text(
            left - 3,
            bot,
            justify="right",
            anchor="se",
            text="%d%%" % (100 * min_precision),
        )
        self.evalbox.create_text(
            left - 3,
            top,
            justify="right",
            anchor="ne",
            text="%d%%" % (100 * max_precision),
        )
        self.evalbox.create_text(
            left,
            bot + 3,
            justify="center",
            anchor="nw",
            text="%d%%" % (100 * min_recall),
        )
        self.evalbox.create_text(
            right,
            bot + 3,
            justify="center",
            anchor="ne",
            text="%d%%" % (100 * max_recall),
        )

        # Display the scores.
        prev_x = prev_y = None
        for i, (_, precision, recall, fscore) in enumerate(self._history):
            x = left + (right - left) * (
                (recall - min_recall) / (max_recall - min_recall)
            )
            y = bot - (bot - top) * (
                (precision - min_precision) / (max_precision - min_precision)
            )
            if i == self._history_index:
                self.evalbox.create_oval(
                    x - 2, y - 2, x + 2, y + 2, fill="#0f0", outline="#000"
                )
                self.status["text"] = (
                    "Precision: %.2f%%\t" % (precision * 100)
                    + "Recall: %.2f%%\t" % (recall * 100)
                    + "F-score: %.2f%%" % (fscore * 100)
                )
            else:
                self.evalbox.lower(
                    self.evalbox.create_oval(
                        x - 2, y - 2, x + 2, y + 2, fill="#afa", outline="#8c8"
                    )
                )
            if prev_x is not None and self._eval_lines.get():
                self.evalbox.lower(
                    self.evalbox.create_line(prev_x, prev_y, x, y, fill="#8c8")
                )
            prev_x, prev_y = x, y

    _eval_demon_running = False

    def _eval_demon(self):
        if self.top is None:
            return
        if self.chunker is None:
            self._eval_demon_running = False
            return

        # Note our starting time.
        t0 = time.time()

        # If are still typing, then wait for them to finish.
        if (
            time.time() - self._last_keypress < self._EVAL_DELAY
            and self.normalized_grammar != self._eval_normalized_grammar
        ):
            self._eval_demon_running = True
            return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)

        # If the grammar changed, restart the evaluation.
        if self.normalized_grammar != self._eval_normalized_grammar:
            # Check if we've seen this grammar already.  If so, then
            # just use the old evaluation values.
            for g, p, r, f in self._history:
                if self.normalized_grammar == self.normalize_grammar(g):
                    self._history.append((g, p, r, f))
                    self._history_index = len(self._history) - 1
                    self._eval_plot()
                    self._eval_demon_running = False
                    self._eval_normalized_grammar = None
                    return
            self._eval_index = 0
            self._eval_score = ChunkScore(chunk_label=self._chunk_label)
            self._eval_grammar = self.grammar
            self._eval_normalized_grammar = self.normalized_grammar

        # If the grammar is empty, the don't bother evaluating it, or
        # recording it in history -- the score will just be 0.
        if self.normalized_grammar.strip() == "":
            # self._eval_index = self._devset_size.get()
            self._eval_demon_running = False
            return

        # Score the next set of examples
        for gold in self.devset[
            self._eval_index : min(
                self._eval_index + self._EVAL_CHUNK, self._devset_size.get()
            )
        ]:
            guess = self._chunkparse(gold.leaves())
            self._eval_score.score(gold, guess)

        # update our index in the devset.
        self._eval_index += self._EVAL_CHUNK

        # Check if we're done
        if self._eval_index >= self._devset_size.get():
            self._history.append(
                (
                    self._eval_grammar,
                    self._eval_score.precision(),
                    self._eval_score.recall(),
                    self._eval_score.f_measure(),
                )
            )
            self._history_index = len(self._history) - 1
            self._eval_plot()
            self._eval_demon_running = False
            self._eval_normalized_grammar = None
        else:
            progress = 100 * self._eval_index / self._devset_size.get()
            self.status["text"] = "Evaluating on Development Set (%d%%)" % progress
            self._eval_demon_running = True
            self._adaptively_modify_eval_chunk(time.time() - t0)
            self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)

    def _adaptively_modify_eval_chunk(self, t):
        """
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        """
        if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5:
            self._EVAL_CHUNK = min(
                self._EVAL_CHUNK - 1,
                max(
                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)),
                    self._EVAL_CHUNK - 10,
                ),
            )
        elif t < self._EVAL_DEMON_MIN:
            self._EVAL_CHUNK = max(
                self._EVAL_CHUNK + 1,
                min(
                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)),
                    self._EVAL_CHUNK + 10,
                ),
            )

    def _init_widgets(self, top):
        frame0 = Frame(top, **self._FRAME_PARAMS)
        frame0.grid_columnconfigure(0, weight=4)
        frame0.grid_columnconfigure(3, weight=2)
        frame0.grid_rowconfigure(1, weight=1)
        frame0.grid_rowconfigure(5, weight=1)

        # The grammar
        self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS)
        self.grammarlabel = Label(
            frame0,
            font=self._font,
            text="Grammar:",
            highlightcolor="black",
            background=self._GRAMMARBOX_PARAMS["background"],
        )
        self.grammarlabel.grid(column=0, row=0, sticky="SW")
        self.grammarbox.grid(column=0, row=1, sticky="NEWS")

        # Scroll bar for grammar
        grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview)
        grammar_scrollbar.grid(column=1, row=1, sticky="NWS")
        self.grammarbox.config(yscrollcommand=grammar_scrollbar.set)

        # grammar buttons
        bg = self._FRAME_PARAMS["background"]
        frame3 = Frame(frame0, background=bg)
        frame3.grid(column=0, row=2, sticky="EW")
        Button(
            frame3,
            text="Prev Grammar",
            command=self._history_prev,
            **self._BUTTON_PARAMS,
        ).pack(side="left")
        Button(
            frame3,
            text="Next Grammar",
            command=self._history_next,
            **self._BUTTON_PARAMS,
        ).pack(side="left")

        # Help box
        self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS)
        self.helpbox.grid(column=3, row=1, sticky="NEWS")
        self.helptabs = {}
        bg = self._FRAME_PARAMS["background"]
        helptab_frame = Frame(frame0, background=bg)
        helptab_frame.grid(column=3, row=0, sticky="SW")
        for i, (tab, tabstops, text) in enumerate(self.HELP):
            label = Label(helptab_frame, text=tab, font=self._smallfont)
            label.grid(column=i * 2, row=0, sticky="S")
            # help_frame.grid_columnconfigure(i, weight=1)
            # label.pack(side='left')
            label.bind("<ButtonPress>", lambda e, tab=tab: self.show_help(tab))
            self.helptabs[tab] = label
            Frame(
                helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg
            ).grid(column=i * 2 + 1, row=0)
        self.helptabs[self.HELP[0][0]].configure(font=self._font)
        self.helpbox.tag_config("elide", elide=True)
        for tag, params in self.HELP_AUTOTAG:
            self.helpbox.tag_config("tag-%s" % tag, **params)
        self.show_help(self.HELP[0][0])

        # Scroll bar for helpbox
        help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview)
        self.helpbox.config(yscrollcommand=help_scrollbar.set)
        help_scrollbar.grid(column=4, row=1, sticky="NWS")

        # The dev set
        frame4 = Frame(frame0, background=self._FRAME_PARAMS["background"])
        self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS)
        self.devsetbox.pack(expand=True, fill="both")
        self.devsetlabel = Label(
            frame0,
            font=self._font,
            text="Development Set:",
            justify="right",
            background=self._DEVSETBOX_PARAMS["background"],
        )
        self.devsetlabel.grid(column=0, row=4, sticky="SW")
        frame4.grid(column=0, row=5, sticky="NEWS")

        # dev set scrollbars
        self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll)
        self.devset_scroll.grid(column=1, row=5, sticky="NWS")
        self.devset_xscroll = Scrollbar(
            frame4, command=self.devsetbox.xview, orient="horiz"
        )
        self.devsetbox["xscrollcommand"] = self.devset_xscroll.set
        self.devset_xscroll.pack(side="bottom", fill="x")

        # dev set buttons
        bg = self._FRAME_PARAMS["background"]
        frame1 = Frame(frame0, background=bg)
        frame1.grid(column=0, row=7, sticky="EW")
        Button(
            frame1,
            text="Prev Example (Ctrl-p)",
            command=self._devset_prev,
            **self._BUTTON_PARAMS,
        ).pack(side="left")
        Button(
            frame1,
            text="Next Example (Ctrl-n)",
            command=self._devset_next,
            **self._BUTTON_PARAMS,
        ).pack(side="left")
        self.devset_button = Button(
            frame1,
            text="Show example",
            command=self.show_devset,
            state="disabled",
            **self._BUTTON_PARAMS,
        )
        self.devset_button.pack(side="right")
        self.trace_button = Button(
            frame1, text="Show trace", command=self.show_trace, **self._BUTTON_PARAMS
        )
        self.trace_button.pack(side="right")

        # evaluation box
        self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS)
        label = Label(
            frame0,
            font=self._font,
            text="Evaluation:",
            justify="right",
            background=self._EVALBOX_PARAMS["background"],
        )
        label.grid(column=3, row=4, sticky="SW")
        self.evalbox.grid(column=3, row=5, sticky="NEWS", columnspan=2)

        # evaluation box buttons
        bg = self._FRAME_PARAMS["background"]
        frame2 = Frame(frame0, background=bg)
        frame2.grid(column=3, row=7, sticky="EW")
        self._autoscale = IntVar(self.top)
        self._autoscale.set(False)
        Checkbutton(
            frame2,
            variable=self._autoscale,
            command=self._eval_plot,
            text="Zoom",
            **self._BUTTON_PARAMS,
        ).pack(side="left")
        self._eval_lines = IntVar(self.top)
        self._eval_lines.set(False)
        Checkbutton(
            frame2,
            variable=self._eval_lines,
            command=self._eval_plot,
            text="Lines",
            **self._BUTTON_PARAMS,
        ).pack(side="left")
        Button(frame2, text="History", **self._BUTTON_PARAMS).pack(side="right")

        # The status label
        self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS)
        self.status.grid(column=0, row=9, sticky="NEW", padx=3, pady=2, columnspan=5)

        # Help box & devset box can't be edited.
        self.helpbox["state"] = "disabled"
        self.devsetbox["state"] = "disabled"

        # Spacers
        bg = self._FRAME_PARAMS["background"]
        Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3)
        Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0)
        Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8)

        # pack the frame.
        frame0.pack(fill="both", expand=True)

        # Set up colors for the devset box
        self.devsetbox.tag_config("true-pos", background="#afa", underline="True")
        self.devsetbox.tag_config("false-neg", underline="True", foreground="#800")
        self.devsetbox.tag_config("false-pos", background="#faa")
        self.devsetbox.tag_config("trace", foreground="#666", wrap="none")
        self.devsetbox.tag_config("wrapindent", lmargin2=30, wrap="none")
        self.devsetbox.tag_config("error", foreground="#800")

        # And for the grammarbox
        self.grammarbox.tag_config("error", background="#fec")
        self.grammarbox.tag_config("comment", foreground="#840")
        self.grammarbox.tag_config("angle", foreground="#00f")
        self.grammarbox.tag_config("brace", foreground="#0a0")
        self.grammarbox.tag_config("hangindent", lmargin1=0, lmargin2=40)

    _showing_trace = False

    def show_trace(self, *e):
        self._showing_trace = True
        self.trace_button["state"] = "disabled"
        self.devset_button["state"] = "normal"

        self.devsetbox["state"] = "normal"
        # self.devsetbox['wrap'] = 'none'
        self.devsetbox.delete("1.0", "end")
        self.devsetlabel["text"] = "Development Set (%d/%d)" % (
            (self.devset_index + 1, self._devset_size.get())
        )

        if self.chunker is None:
            self.devsetbox.insert("1.0", "Trace: waiting for a valid grammar.")
            self.devsetbox.tag_add("error", "1.0", "end")
            return  # can't do anything more

        gold_tree = self.devset[self.devset_index]
        rules = self.chunker.rules()

        # Calculate the tag sequence
        tagseq = "\t"
        charnum = [1]
        for wordnum, (word, pos) in enumerate(gold_tree.leaves()):
            tagseq += "%s " % pos
            charnum.append(len(tagseq))
        self.charnum = {
            (i, j): charnum[j]
            for i in range(len(rules) + 1)
            for j in range(len(charnum))
        }
        self.linenum = {i: i * 2 + 2 for i in range(len(rules) + 1)}

        for i in range(len(rules) + 1):
            if i == 0:
                self.devsetbox.insert("end", "Start:\n")
                self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
            else:
                self.devsetbox.insert("end", "Apply %s:\n" % rules[i - 1])
                self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
            # Display the tag sequence.
            self.devsetbox.insert("end", tagseq + "\n")
            self.devsetbox.tag_add("wrapindent", "end -2c linestart", "end -2c")
            # Run a partial parser, and extract gold & test chunks
            chunker = RegexpChunkParser(rules[:i])
            test_tree = self._chunkparse(gold_tree.leaves())
            gold_chunks = self._chunks(gold_tree)
            test_chunks = self._chunks(test_tree)
            # Compare them.
            for chunk in gold_chunks.intersection(test_chunks):
                self._color_chunk(i, chunk, "true-pos")
            for chunk in gold_chunks - test_chunks:
                self._color_chunk(i, chunk, "false-neg")
            for chunk in test_chunks - gold_chunks:
                self._color_chunk(i, chunk, "false-pos")
        self.devsetbox.insert("end", "Finished.\n")
        self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")

        # This is a hack, because the x-scrollbar isn't updating its
        # position right -- I'm not sure what the underlying cause is
        # though.  (This is on OS X w/ python 2.5)
        self.top.after(100, self.devset_xscroll.set, 0, 0.3)

    def show_help(self, tab):
        self.helpbox["state"] = "normal"
        self.helpbox.delete("1.0", "end")
        for name, tabstops, text in self.HELP:
            if name == tab:
                text = text.replace(
                    "<<TAGSET>>",
                    "\n".join(
                        "\t%s\t%s" % item
                        for item in sorted(
                            list(self.tagset.items()),
                            key=lambda t_w: re.match(r"\w+", t_w[0])
                            and (0, t_w[0])
                            or (1, t_w[0]),
                        )
                    ),
                )

                self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                self.helpbox.config(tabs=tabstops)
                self.helpbox.insert("1.0", text + "\n" * 20)
                C = "1.0 + %d chars"
                for tag, params in self.HELP_AUTOTAG:
                    pattern = f"(?s)(<{tag}>)(.*?)(</{tag}>)"
                    for m in re.finditer(pattern, text):
                        self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))
                        self.helpbox.tag_add(
                            "tag-%s" % tag, C % m.start(2), C % m.end(2)
                        )
                        self.helpbox.tag_add("elide", C % m.start(3), C % m.end(3))
            else:
                self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
        self.helpbox["state"] = "disabled"

    def _history_prev(self, *e):
        self._view_history(self._history_index - 1)
        return "break"

    def _history_next(self, *e):
        self._view_history(self._history_index + 1)
        return "break"

    def _view_history(self, index):
        # Bounds & sanity checking:
        index = max(0, min(len(self._history) - 1, index))
        if not self._history:
            return
        # Already viewing the requested history item?
        if index == self._history_index:
            return
        # Show the requested grammar.  It will get added to _history
        # only if they edit it (causing self.update() to get run.)
        self.grammarbox["state"] = "normal"
        self.grammarbox.delete("1.0", "end")
        self.grammarbox.insert("end", self._history[index][0])
        self.grammarbox.mark_set("insert", "1.0")
        self._history_index = index
        self._syntax_highlight_grammar(self._history[index][0])
        # Record the normalized grammar & regenerate the chunker.
        self.normalized_grammar = self.normalize_grammar(self._history[index][0])
        if self.normalized_grammar:
            rules = [
                RegexpChunkRule.fromstring(line)
                for line in self.normalized_grammar.split("\n")
            ]
        else:
            rules = []
        self.chunker = RegexpChunkParser(rules)
        # Show the score.
        self._eval_plot()
        # Update the devset box
        self._highlight_devset()
        if self._showing_trace:
            self.show_trace()
        # Update the grammar label
        if self._history_index < len(self._history) - 1:
            self.grammarlabel["text"] = "Grammar {}/{}:".format(
                self._history_index + 1,
                len(self._history),
            )
        else:
            self.grammarlabel["text"] = "Grammar:"

    def _devset_next(self, *e):
        self._devset_scroll("scroll", 1, "page")
        return "break"

    def _devset_prev(self, *e):
        self._devset_scroll("scroll", -1, "page")
        return "break"

    def destroy(self, *e):
        if self.top is None:
            return
        self.top.destroy()
        self.top = None

    def _devset_scroll(self, command, *args):
        N = 1  # size of a page -- one sentence.
        showing_trace = self._showing_trace
        if command == "scroll" and args[1].startswith("unit"):
            self.show_devset(self.devset_index + int(args[0]))
        elif command == "scroll" and args[1].startswith("page"):
            self.show_devset(self.devset_index + N * int(args[0]))
        elif command == "moveto":
            self.show_devset(int(float(args[0]) * self._devset_size.get()))
        else:
            assert 0, f"bad scroll command {command} {args}"
        if showing_trace:
            self.show_trace()

    def show_devset(self, index=None):
        if index is None:
            index = self.devset_index

        # Bounds checking
        index = min(max(0, index), self._devset_size.get() - 1)

        if index == self.devset_index and not self._showing_trace:
            return
        self.devset_index = index

        self._showing_trace = False
        self.trace_button["state"] = "normal"
        self.devset_button["state"] = "disabled"

        # Clear the text box.
        self.devsetbox["state"] = "normal"
        self.devsetbox["wrap"] = "word"
        self.devsetbox.delete("1.0", "end")
        self.devsetlabel["text"] = "Development Set (%d/%d)" % (
            (self.devset_index + 1, self._devset_size.get())
        )

        # Add the sentences
        sample = self.devset[self.devset_index : self.devset_index + 1]
        self.charnum = {}
        self.linenum = {0: 1}
        for sentnum, sent in enumerate(sample):
            linestr = ""
            for wordnum, (word, pos) in enumerate(sent.leaves()):
                self.charnum[sentnum, wordnum] = len(linestr)
                linestr += f"{word}/{pos} "
                self.charnum[sentnum, wordnum + 1] = len(linestr)
            self.devsetbox.insert("end", linestr[:-1] + "\n\n")

        # Highlight chunks in the dev set
        if self.chunker is not None:
            self._highlight_devset()
        self.devsetbox["state"] = "disabled"

        # Update the scrollbar
        first = self.devset_index / self._devset_size.get()
        last = (self.devset_index + 2) / self._devset_size.get()
        self.devset_scroll.set(first, last)

    def _chunks(self, tree):
        chunks = set()
        wordnum = 0
        for child in tree:
            if isinstance(child, Tree):
                if child.label() == self._chunk_label:
                    chunks.add((wordnum, wordnum + len(child)))
                wordnum += len(child)
            else:
                wordnum += 1
        return chunks

    def _syntax_highlight_grammar(self, grammar):
        if self.top is None:
            return
        self.grammarbox.tag_remove("comment", "1.0", "end")
        self.grammarbox.tag_remove("angle", "1.0", "end")
        self.grammarbox.tag_remove("brace", "1.0", "end")
        self.grammarbox.tag_add("hangindent", "1.0", "end")
        for lineno, line in enumerate(grammar.split("\n")):
            if not line.strip():
                continue
            m = re.match(r"(\\.|[^#])*(#.*)?", line)
            comment_start = None
            if m.group(2):
                comment_start = m.start(2)
                s = "%d.%d" % (lineno + 1, m.start(2))
                e = "%d.%d" % (lineno + 1, m.end(2))
                self.grammarbox.tag_add("comment", s, e)
            for m in re.finditer("[<>{}]", line):
                if comment_start is not None and m.start() >= comment_start:
                    break
                s = "%d.%d" % (lineno + 1, m.start())
                e = "%d.%d" % (lineno + 1, m.end())
                if m.group() in "<>":
                    self.grammarbox.tag_add("angle", s, e)
                else:
                    self.grammarbox.tag_add("brace", s, e)

    def _grammarcheck(self, grammar):
        if self.top is None:
            return
        self.grammarbox.tag_remove("error", "1.0", "end")
        self._grammarcheck_errs = []
        for lineno, line in enumerate(grammar.split("\n")):
            line = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", line)
            line = line.strip()
            if line:
                try:
                    RegexpChunkRule.fromstring(line)
                except ValueError as e:
                    self.grammarbox.tag_add(
                        "error", "%s.0" % (lineno + 1), "%s.0 lineend" % (lineno + 1)
                    )
        self.status["text"] = ""

    def update(self, *event):
        # Record when update was called (for grammarcheck)
        if event:
            self._last_keypress = time.time()

        # Read the grammar from the Text box.
        self.grammar = grammar = self.grammarbox.get("1.0", "end")

        # If the grammar hasn't changed, do nothing:
        normalized_grammar = self.normalize_grammar(grammar)
        if normalized_grammar == self.normalized_grammar:
            return
        else:
            self.normalized_grammar = normalized_grammar

        # If the grammar has changed, and we're looking at history,
        # then stop looking at history.
        if self._history_index < len(self._history) - 1:
            self.grammarlabel["text"] = "Grammar:"

        self._syntax_highlight_grammar(grammar)

        # The grammar has changed; try parsing it.  If it doesn't
        # parse, do nothing.  (flag error location?)
        try:
            # Note: the normalized grammar has no blank lines.
            if normalized_grammar:
                rules = [
                    RegexpChunkRule.fromstring(line)
                    for line in normalized_grammar.split("\n")
                ]
            else:
                rules = []
        except ValueError as e:
            # Use the un-normalized grammar for error highlighting.
            self._grammarcheck(grammar)
            self.chunker = None
            return

        self.chunker = RegexpChunkParser(rules)
        self.grammarbox.tag_remove("error", "1.0", "end")
        self.grammar_changed = time.time()
        # Display the results
        if self._showing_trace:
            self.show_trace()
        else:
            self._highlight_devset()
        # Start the eval demon
        if not self._eval_demon_running:
            self._eval_demon()

    def _highlight_devset(self, sample=None):
        if sample is None:
            sample = self.devset[self.devset_index : self.devset_index + 1]

        self.devsetbox.tag_remove("true-pos", "1.0", "end")
        self.devsetbox.tag_remove("false-neg", "1.0", "end")
        self.devsetbox.tag_remove("false-pos", "1.0", "end")

        # Run the grammar on the test cases.
        for sentnum, gold_tree in enumerate(sample):
            # Run the chunk parser
            test_tree = self._chunkparse(gold_tree.leaves())
            # Extract gold & test chunks
            gold_chunks = self._chunks(gold_tree)
            test_chunks = self._chunks(test_tree)
            # Compare them.
            for chunk in gold_chunks.intersection(test_chunks):
                self._color_chunk(sentnum, chunk, "true-pos")
            for chunk in gold_chunks - test_chunks:
                self._color_chunk(sentnum, chunk, "false-neg")
            for chunk in test_chunks - gold_chunks:
                self._color_chunk(sentnum, chunk, "false-pos")

    def _chunkparse(self, words):
        try:
            return self.chunker.parse(words)
        except (ValueError, IndexError) as e:
            # There's an error somewhere in the grammar, but we're not sure
            # exactly where, so just mark the whole grammar as bad.
            # E.g., this is caused by: "({<NN>})"
            self.grammarbox.tag_add("error", "1.0", "end")
            # Treat it as tagging nothing:
            return words

    def _color_chunk(self, sentnum, chunk, tag):
        start, end = chunk
        self.devsetbox.tag_add(
            tag,
            f"{self.linenum[sentnum]}.{self.charnum[sentnum, start]}",
            f"{self.linenum[sentnum]}.{self.charnum[sentnum, end] - 1}",
        )

    def reset(self):
        # Clear various variables
        self.chunker = None
        self.grammar = None
        self.normalized_grammar = None
        self.grammar_changed = 0
        self._history = []
        self._history_index = 0
        # Update the on-screen display.
        self.grammarbox.delete("1.0", "end")
        self.show_devset(0)
        self.update()
        # self._eval_plot()

    SAVE_GRAMMAR_TEMPLATE = (
        "# Regexp Chunk Parsing Grammar\n"
        "# Saved %(date)s\n"
        "#\n"
        "# Development set: %(devset)s\n"
        "#   Precision: %(precision)s\n"
        "#   Recall:    %(recall)s\n"
        "#   F-score:   %(fscore)s\n\n"
        "%(grammar)s\n"
    )

    def save_grammar(self, filename=None):
        if not filename:
            ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
            filename = asksaveasfilename(filetypes=ftypes, defaultextension=".chunk")
            if not filename:
                return
        if self._history and self.normalized_grammar == self.normalize_grammar(
            self._history[-1][0]
        ):
            precision, recall, fscore = (
                "%.2f%%" % (100 * v) for v in self._history[-1][1:]
            )
        elif self.chunker is None:
            precision = recall = fscore = "Grammar not well formed"
        else:
            precision = recall = fscore = "Not finished evaluation yet"

        with open(filename, "w") as outfile:
            outfile.write(
                self.SAVE_GRAMMAR_TEMPLATE
                % dict(
                    date=time.ctime(),
                    devset=self.devset_name,
                    precision=precision,
                    recall=recall,
                    fscore=fscore,
                    grammar=self.grammar.strip(),
                )
            )

    def load_grammar(self, filename=None):
        if not filename:
            ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
            filename = askopenfilename(filetypes=ftypes, defaultextension=".chunk")
            if not filename:
                return
        self.grammarbox.delete("1.0", "end")
        self.update()
        with open(filename) as infile:
            grammar = infile.read()
        grammar = re.sub(
            r"^\# Regexp Chunk Parsing Grammar[\s\S]*" "F-score:.*\n", "", grammar
        ).lstrip()
        self.grammarbox.insert("1.0", grammar)
        self.update()

    def save_history(self, filename=None):
        if not filename:
            ftypes = [("Chunk Gramamr History", ".txt"), ("All files", "*")]
            filename = asksaveasfilename(filetypes=ftypes, defaultextension=".txt")
            if not filename:
                return

        with open(filename, "w") as outfile:
            outfile.write("# Regexp Chunk Parsing Grammar History\n")
            outfile.write("# Saved %s\n" % time.ctime())
            outfile.write("# Development set: %s\n" % self.devset_name)
            for i, (g, p, r, f) in enumerate(self._history):
                hdr = (
                    "Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, "
                    "fscore=%.2f%%)"
                    % (i + 1, len(self._history), p * 100, r * 100, f * 100)
                )
                outfile.write("\n%s\n" % hdr)
                outfile.write("".join("  %s\n" % line for line in g.strip().split()))

            if not (
                self._history
                and self.normalized_grammar
                == self.normalize_grammar(self._history[-1][0])
            ):
                if self.chunker is None:
                    outfile.write("\nCurrent Grammar (not well-formed)\n")
                else:
                    outfile.write("\nCurrent Grammar (not evaluated)\n")
                outfile.write(
                    "".join("  %s\n" % line for line in self.grammar.strip().split())
                )

    def about(self, *e):
        ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper"
        TITLE = "About: Regular Expression Chunk Parser Application"
        try:
            from tkinter.messagebox import Message

            Message(message=ABOUT, title=TITLE).show()
        except:
            ShowText(self.top, TITLE, ABOUT)

    def set_devset_size(self, size=None):
        if size is not None:
            self._devset_size.set(size)
        self._devset_size.set(min(len(self.devset), self._devset_size.get()))
        self.show_devset(1)
        self.show_devset(0)
        # what about history?  Evaluated at diff dev set sizes!

    def resize(self, size=None):
        if size is not None:
            self._size.set(size)
        size = self._size.get()
        self._font.configure(size=-(abs(size)))
        self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20))

    def mainloop(self, *args, **kwargs):
        """
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        """
        if in_idle():
            return
        self.top.mainloop(*args, **kwargs)



[docs]
def app():
    RegexpChunkApp().mainloop()



if __name__ == "__main__":
    app()

__all__ = ["app"]