Source code for nltk.draw.dispersion

# Natural Language Toolkit: Dispersion Plots
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

"""
A utility for displaying lexical dispersion.
"""


[docs]def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"): """ Generate a lexical dispersion plot. :param text: The source text :type text: list(str) or iter(str) :param words: The target words :type words: list of str :param ignore_case: flag to set if case should be ignored when searching text :type ignore_case: bool :return: a matplotlib Axes object that may still be modified before plotting :rtype: Axes """ try: import matplotlib.pyplot as plt except ImportError as e: raise ImportError( "The plot function requires matplotlib to be installed. " "See https://matplotlib.org/" ) from e word2y = { word.casefold() if ignore_case else word: y for y, word in enumerate(reversed(words)) } xs, ys = [], [] for x, token in enumerate(text): token = token.casefold() if ignore_case else token y = word2y.get(token) if y is not None: xs.append(x) ys.append(y) _, ax = plt.subplots() ax.plot(xs, ys, "|") ax.set_yticks(list(range(len(words))), words, color="C0") ax.set_ylim(-1, len(words)) ax.set_title(title) ax.set_xlabel("Word Offset") return ax
if __name__ == "__main__": import matplotlib.pyplot as plt from nltk.corpus import gutenberg words = ["Elinor", "Marianne", "Edward", "Willoughby"] dispersion_plot(gutenberg.words("austen-sense.txt"), words) plt.show()