Source code for nltk.draw.dispersion

# Natural Language Toolkit: Dispersion Plots
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <>
# URL: <>
# For license information, see LICENSE.TXT

A utility for displaying lexical dispersion.

[docs]def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"): """ Generate a lexical dispersion plot. :param text: The source text :type text: list(str) or iter(str) :param words: The target words :type words: list of str :param ignore_case: flag to set if case should be ignored when searching text :type ignore_case: bool :return: a matplotlib Axes object that may still be modified before plotting :rtype: Axes """ try: import matplotlib.pyplot as plt except ImportError as e: raise ImportError( "The plot function requires matplotlib to be installed. " "See" ) from e word2y = { word.casefold() if ignore_case else word: y for y, word in enumerate(reversed(words)) } xs, ys = [], [] for x, token in enumerate(text): token = token.casefold() if ignore_case else token y = word2y.get(token) if y is not None: xs.append(x) ys.append(y) _, ax = plt.subplots() ax.plot(xs, ys, "|") ax.set_yticks(list(range(len(words))), words, color="C0") ax.set_ylim(-1, len(words)) ax.set_title(title) ax.set_xlabel("Word Offset") return ax
if __name__ == "__main__": import matplotlib.pyplot as plt from nltk.corpus import gutenberg words = ["Elinor", "Marianne", "Edward", "Willoughby"] dispersion_plot(gutenberg.words("austen-sense.txt"), words)