Source code for nltk.sem.evaluate

# Natural Language Toolkit: Models for first-order languages with lambda
#
# Copyright (C) 2001-2018 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
# URL: <http://nltk.sourceforge.net>
# For license information, see LICENSE.TXT

#TODO:
    #- fix tracing
    #- fix iterator-based approach to existentials

"""
This module provides data structures for representing first-order
models.
"""
from __future__ import print_function, unicode_literals

from pprint import pformat
import inspect
import textwrap
import re
import sys

from six import string_types

from nltk.decorators import decorator # this used in code that is commented out
from nltk.compat import python_2_unicode_compatible

from nltk.sem.logic import (AbstractVariableExpression, AllExpression, Expression,
                            AndExpression, ApplicationExpression, EqualityExpression,
                            ExistsExpression, IffExpression, ImpExpression,
                            IndividualVariableExpression, LambdaExpression,
                            NegatedExpression, OrExpression,
                            Variable, is_indvar)


[docs]class Error(Exception): pass
[docs]class Undefined(Error): pass
[docs]def trace(f, *args, **kw): if sys.version_info[0] >= 3: argspec = inspect.getfullargspec(f) else: argspec = inspect.getargspec(f) d = dict(zip(argspec[0], args)) if d.pop('trace', None): print() for item in d.items(): print("%s => %s" % item) return f(*args, **kw)
[docs]def is_rel(s): """ Check whether a set represents a relation (of any arity). :param s: a set containing tuples of str elements :type s: set :rtype: bool """ # we have the empty relation, i.e. set() if len(s) == 0: return True # all the elements are tuples of the same length elif all(isinstance(el, tuple) for el in s) and len(max(s))==len(min(s)): return True else: raise ValueError("Set %r contains sequences of different lengths" % s)
[docs]def set2rel(s): """ Convert a set containing individuals (strings or numbers) into a set of unary tuples. Any tuples of strings already in the set are passed through unchanged. For example: - set(['a', 'b']) => set([('a',), ('b',)]) - set([3, 27]) => set([('3',), ('27',)]) :type s: set :rtype: set of tuple of str """ new = set() for elem in s: if isinstance(elem, string_types): new.add((elem,)) elif isinstance(elem, int): new.add((str(elem,))) else: new.add(elem) return new
[docs]def arity(rel): """ Check the arity of a relation. :type rel: set of tuples :rtype: int of tuple of str """ if len(rel) == 0: return 0 return len(list(rel)[0])
[docs]@python_2_unicode_compatible class Valuation(dict): """ A dictionary which represents a model-theoretic Valuation of non-logical constants. Keys are strings representing the constants to be interpreted, and values correspond to individuals (represented as strings) and n-ary relations (represented as sets of tuples of strings). An instance of ``Valuation`` will raise a KeyError exception (i.e., just behave like a standard dictionary) if indexed with an expression that is not in its list of symbols. """ def __init__(self, xs): """ :param xs: a list of (symbol, value) pairs. """ super(Valuation, self).__init__() for (sym, val) in xs: if isinstance(val, string_types) or isinstance(val, bool): self[sym] = val elif isinstance(val, set): self[sym] = set2rel(val) else: msg = textwrap.fill("Error in initializing Valuation. " "Unrecognized value for symbol '%s':\n%s" % (sym, val), width=66) raise ValueError(msg) def __getitem__(self, key): if key in self: return dict.__getitem__(self, key) else: raise Undefined("Unknown expression: '%s'" % key) def __str__(self): return pformat(self) @property def domain(self): """Set-theoretic domain of the value-space of a Valuation.""" dom = [] for val in self.values(): if isinstance(val, string_types): dom.append(val) elif not isinstance(val, bool): dom.extend([elem for tuple_ in val for elem in tuple_ if elem is not None]) return set(dom) @property def symbols(self): """The non-logical constants which the Valuation recognizes.""" return sorted(self.keys())
[docs] @classmethod def fromstring(cls, s): return read_valuation(s)
########################################## # REs used by the _read_valuation function ########################################## _VAL_SPLIT_RE = re.compile(r'\s*=+>\s*') _ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*') _TUPLES_RE = re.compile(r"""\s* (\([^)]+\)) # tuple-expression \s*""", re.VERBOSE) def _read_valuation_line(s): """ Read a line in a valuation file. Lines are expected to be of the form:: noosa => n girl => {g1, g2} chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} :param s: input line :type s: str :return: a pair (symbol, value) :rtype: tuple """ pieces = _VAL_SPLIT_RE.split(s) symbol = pieces[0] value = pieces[1] # check whether the value is meant to be a set if value.startswith('{'): value = value[1:-1] tuple_strings = _TUPLES_RE.findall(value) # are the set elements tuples? if tuple_strings: set_elements = [] for ts in tuple_strings: ts = ts[1:-1] element = tuple(_ELEMENT_SPLIT_RE.split(ts)) set_elements.append(element) else: set_elements = _ELEMENT_SPLIT_RE.split(value) value = set(set_elements) return symbol, value
[docs]def read_valuation(s, encoding=None): """ Convert a valuation string into a valuation. :param s: a valuation string :type s: str :param encoding: the encoding of the input string, if it is binary :type encoding: str :return: a ``nltk.sem`` valuation :rtype: Valuation """ if encoding is not None: s = s.decode(encoding) statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() if line.startswith('#') or line=='': continue try: statements.append(_read_valuation_line(line)) except ValueError: raise ValueError('Unable to parse line %s: %s' % (linenum, line)) return Valuation(statements)
[docs]@python_2_unicode_compatible class Assignment(dict): """ A dictionary which represents an assignment of values to variables. An assigment can only assign values from its domain. If an unknown expression *a* is passed to a model *M*\ 's interpretation function *i*, *i* will first check whether *M*\ 's valuation assigns an interpretation to *a* as a constant, and if this fails, *i* will delegate the interpretation of *a* to *g*. *g* only assigns values to individual variables (i.e., members of the class ``IndividualVariableExpression`` in the ``logic`` module. If a variable is not assigned a value by *g*, it will raise an ``Undefined`` exception. A variable *Assignment* is a mapping from individual variables to entities in the domain. Individual variables are usually indicated with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are created using the ``Assignment`` constructor, which also takes the domain as a parameter. >>> from nltk.sem.evaluate import Assignment >>> dom = set(['u1', 'u2', 'u3', 'u4']) >>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')]) >>> g3 == {'x': 'u1', 'y': 'u2'} True There is also a ``print`` format for assignments which uses a notation closer to that in logic textbooks: >>> print(g3) g[u1/x][u2/y] It is also possible to update an assignment using the ``add`` method: >>> dom = set(['u1', 'u2', 'u3', 'u4']) >>> g4 = Assignment(dom) >>> g4.add('x', 'u1') {'x': 'u1'} With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary: >>> g4.purge() >>> g4 {} :param domain: the domain of discourse :type domain: set :param assign: a list of (varname, value) associations :type assign: list """ def __init__(self, domain, assign=None): super(Assignment, self).__init__() self.domain = domain if assign: for (var, val) in assign: assert val in self.domain,\ "'%s' is not in the domain: %s" % (val, self.domain) assert is_indvar(var),\ "Wrong format for an Individual Variable: '%s'" % var self[var] = val self.variant = None self._addvariant() def __getitem__(self, key): if key in self: return dict.__getitem__(self, key) else: raise Undefined("Not recognized as a variable: '%s'" % key)
[docs] def copy(self): new = Assignment(self.domain) new.update(self) return new
[docs] def purge(self, var=None): """ Remove one or all keys (i.e. logic variables) from an assignment, and update ``self.variant``. :param var: a Variable acting as a key for the assignment. """ if var: del self[var] else: self.clear() self._addvariant() return None
def __str__(self): """ Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]' """ gstring = "g" # Deterministic output for unit testing. variant = sorted(self.variant) for (val, var) in variant: gstring += "[%s/%s]" % (val, var) return gstring def _addvariant(self): """ Create a more pretty-printable version of the assignment. """ list_ = [] for item in self.items(): pair = (item[1], item[0]) list_.append(pair) self.variant = list_ return None
[docs] def add(self, var, val): """ Add a new variable-value pair to the assignment, and update ``self.variant``. """ assert val in self.domain,\ "%s is not in the domain %s" % (val, self.domain) assert is_indvar(var),\ "Wrong format for an Individual Variable: '%s'" % var self[var] = val self._addvariant() return self
[docs]@python_2_unicode_compatible class Model(object): """ A first order model is a domain *D* of discourse and a valuation *V*. A domain *D* is a set, and a valuation *V* is a map that associates expressions with values in the model. The domain of *V* should be a subset of *D*. Construct a new ``Model``. :type domain: set :param domain: A set of entities representing the domain of discourse of the model. :type valuation: Valuation :param valuation: the valuation of the model. :param prop: If this is set, then we are building a propositional\ model and don't require the domain of *V* to be subset of *D*. """ def __init__(self, domain, valuation): assert isinstance(domain, set) self.domain = domain self.valuation = valuation if not domain.issuperset(valuation.domain): raise Error("The valuation domain, %s, must be a subset of the model's domain, %s"\ % (valuation.domain, domain)) def __repr__(self): return "(%r, %r)" % (self.domain, self.valuation) def __str__(self): return "Domain = %s,\nValuation = \n%s" % (self.domain, self.valuation)
[docs] def evaluate(self, expr, g, trace=None): """ Read input expressions, and provide a handler for ``satisfy`` that blocks further propagation of the ``Undefined`` error. :param expr: An ``Expression`` of ``logic``. :type g: Assignment :param g: an assignment to individual variables. :rtype: bool or 'Undefined' """ try: parsed = Expression.fromstring(expr) value = self.satisfy(parsed, g, trace=trace) if trace: print() print("'%s' evaluates to %s under M, %s" % (expr, value, g)) return value except Undefined: if trace: print() print("'%s' is undefined under M, %s" % (expr, g)) return 'Undefined'
[docs] def satisfy(self, parsed, g, trace=None): """ Recursive interpretation function for a formula of first-order logic. Raises an ``Undefined`` error when ``parsed`` is an atomic string but is not a symbol or an individual variable. :return: Returns a truth value or ``Undefined`` if ``parsed`` is\ complex, and calls the interpretation function ``i`` if ``parsed``\ is atomic. :param parsed: An expression of ``logic``. :type g: Assignment :param g: an assignment to individual variables. """ if isinstance(parsed, ApplicationExpression): function, arguments = parsed.uncurry() if isinstance(function, AbstractVariableExpression): #It's a predicate expression ("P(x,y)"), so used uncurried arguments funval = self.satisfy(function, g) argvals = tuple(self.satisfy(arg, g) for arg in arguments) return argvals in funval else: #It must be a lambda expression, so use curried form funval = self.satisfy(parsed.function, g) argval = self.satisfy(parsed.argument, g) return funval[argval] elif isinstance(parsed, NegatedExpression): return not self.satisfy(parsed.term, g) elif isinstance(parsed, AndExpression): return self.satisfy(parsed.first, g) and \ self.satisfy(parsed.second, g) elif isinstance(parsed, OrExpression): return self.satisfy(parsed.first, g) or \ self.satisfy(parsed.second, g) elif isinstance(parsed, ImpExpression): return (not self.satisfy(parsed.first, g)) or \ self.satisfy(parsed.second, g) elif isinstance(parsed, IffExpression): return self.satisfy(parsed.first, g) == \ self.satisfy(parsed.second, g) elif isinstance(parsed, EqualityExpression): return self.satisfy(parsed.first, g) == \ self.satisfy(parsed.second, g) elif isinstance(parsed, AllExpression): new_g = g.copy() for u in self.domain: new_g.add(parsed.variable.name, u) if not self.satisfy(parsed.term, new_g): return False return True elif isinstance(parsed, ExistsExpression): new_g = g.copy() for u in self.domain: new_g.add(parsed.variable.name, u) if self.satisfy(parsed.term, new_g): return True return False elif isinstance(parsed, LambdaExpression): cf = {} var = parsed.variable.name for u in self.domain: val = self.satisfy(parsed.term, g.add(var, u)) # NB the dict would be a lot smaller if we do this: # if val: cf[u] = val # But then need to deal with cases where f(a) should yield # a function rather than just False. cf[u] = val return cf else: return self.i(parsed, g, trace)
#@decorator(trace_eval)
[docs] def i(self, parsed, g, trace=False): """ An interpretation function. Assuming that ``parsed`` is atomic: - if ``parsed`` is a non-logical constant, calls the valuation *V* - else if ``parsed`` is an individual variable, calls assignment *g* - else returns ``Undefined``. :param parsed: an ``Expression`` of ``logic``. :type g: Assignment :param g: an assignment to individual variables. :return: a semantic value """ # If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols # and also be an IndividualVariableExpression. We want to catch this first case. # So there is a procedural consequence to the ordering of clauses here: if parsed.variable.name in self.valuation.symbols: return self.valuation[parsed.variable.name] elif isinstance(parsed, IndividualVariableExpression): return g[parsed.variable.name] else: raise Undefined("Can't find a value for %s" % parsed)
[docs] def satisfiers(self, parsed, varex, g, trace=None, nesting=0): """ Generate the entities from the model's domain that satisfy an open formula. :param parsed: an open formula :type parsed: Expression :param varex: the relevant free individual variable in ``parsed``. :type varex: VariableExpression or str :param g: a variable assignment :type g: Assignment :return: a set of the entities that satisfy ``parsed``. """ spacer = ' ' indent = spacer + (spacer * nesting) candidates = [] if isinstance(varex, string_types): var = Variable(varex) else: var = varex if var in parsed.free(): if trace: print() print((spacer * nesting) + "Open formula is '%s' with assignment %s" % (parsed, g)) for u in self.domain: new_g = g.copy() new_g.add(var.name, u) if trace and trace > 1: lowtrace = trace-1 else: lowtrace = 0 value = self.satisfy(parsed, new_g, lowtrace) if trace: print(indent + "(trying assignment %s)" % new_g) # parsed == False under g[u/var]? if value == False: if trace: print(indent + "value of '%s' under %s is False" % (parsed, new_g)) # so g[u/var] is a satisfying assignment else: candidates.append(u) if trace: print(indent + "value of '%s' under %s is %s" % (parsed, new_g, value)) result = set(c for c in candidates) # var isn't free in parsed else: raise Undefined("%s is not free in %s" % (var.name, parsed)) return result
#////////////////////////////////////////////////////////////////////// # Demo.. #////////////////////////////////////////////////////////////////////// # number of spacer chars mult = 30 # Demo 1: Propositional Logic #################
[docs]def propdemo(trace=None): """Example of a propositional model.""" global val1, dom1, m1, g1 val1 = Valuation([('P', True), ('Q', True), ('R', False)]) dom1 = set([]) m1 = Model(dom1, val1) g1 = Assignment(dom1) print() print('*' * mult) print("Propositional Formulas Demo") print('*' * mult) print('(Propositional constants treated as nullary predicates)') print() print("Model m1:\n", m1) print('*' * mult) sentences = [ '(P & Q)', '(P & R)', '- P', '- R', '- - P', '- (P & R)', '(P | R)', '(R | P)', '(R | R)', '(- P | R)', '(P | - P)', '(P -> Q)', '(P -> R)', '(R -> P)', '(P <-> P)', '(R <-> R)', '(P <-> R)', ] for sent in sentences: if trace: print() m1.evaluate(sent, g1, trace) else: print("The value of '%s' is: %s" % (sent, m1.evaluate(sent, g1)))
# Demo 2: FOL Model #############
[docs]def folmodel(quiet=False, trace=None): """Example of a first-order model.""" global val2, v2, dom2, m2, g2 v2 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\ ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])), ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] val2 = Valuation(v2) dom2 = val2.domain m2 = Model(dom2, val2) g2 = Assignment(dom2, [('x', 'b1'), ('y', 'g2')]) if not quiet: print() print('*' * mult) print("Models Demo") print("*" * mult) print("Model m2:\n", "-" * 14,"\n", m2) print("Variable assignment = ", g2) exprs = ['adam', 'boy', 'love', 'walks', 'x', 'y', 'z'] parsed_exprs = [Expression.fromstring(e) for e in exprs] print() for parsed in parsed_exprs: try: print("The interpretation of '%s' in m2 is %s" % (parsed, m2.i(parsed, g2))) except Undefined: print("The interpretation of '%s' in m2 is Undefined" % parsed) applications = [('boy', ('adam')), ('walks', ('adam',)), ('love', ('adam', 'y')), ('love', ('y', 'adam'))] for (fun, args) in applications: try: funval = m2.i(Expression.fromstring(fun), g2) argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args) print("%s(%s) evaluates to %s" % (fun, args, argsval in funval)) except Undefined: print("%s(%s) evaluates to Undefined" % (fun, args))
# Demo 3: FOL #########
[docs]def foldemo(trace=None): """ Interpretation of closed expressions in a first-order model. """ folmodel(quiet=True) print() print('*' * mult) print("FOL Formulas Demo") print('*' * mult) formulas = [ 'love (adam, betty)', '(adam = mia)', '\\x. (boy(x) | girl(x))', '\\x. boy(x)(adam)', '\\x y. love(x, y)', '\\x y. love(x, y)(adam)(betty)', '\\x y. love(x, y)(adam, betty)', '\\x y. (boy(x) & love(x, y))', '\\x. exists y. (boy(x) & love(x, y))', 'exists z1. boy(z1)', 'exists x. (boy(x) & -(x = adam))', 'exists x. (boy(x) & all y. love(y, x))', 'all x. (boy(x) | girl(x))', 'all x. (girl(x) -> exists y. boy(y) & love(x, y))', #Every girl loves exists boy. 'exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', #There is exists boy that every girl loves. 'exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', #exists boy loves every girl. 'all x. (dog(x) -> - girl(x))', 'exists x. exists y. (love(x, y) & love(x, y))' ] for fmla in formulas: g2.purge() if trace: m2.evaluate(fmla, g2, trace) else: print("The value of '%s' is: %s" % (fmla, m2.evaluate(fmla, g2)))
# Demo 3: Satisfaction #############
[docs]def satdemo(trace=None): """Satisfiers of an open formula in a first order model.""" print() print('*' * mult) print("Satisfiers Demo") print('*' * mult) folmodel(quiet=True) formulas = [ 'boy(x)', '(x = x)', '(boy(x) | girl(x))', '(boy(x) & girl(x))', 'love(adam, x)', 'love(x, adam)', '-(x = adam)', 'exists z22. love(x, z22)', 'exists y. love(y, x)', 'all y. (girl(y) -> love(x, y))', 'all y. (girl(y) -> love(y, x))', 'all y. (girl(y) -> (boy(x) & love(y, x)))', '(boy(x) & all y. (girl(y) -> love(x, y)))', '(boy(x) & all y. (girl(y) -> love(y, x)))', '(boy(x) & exists y. (girl(y) & love(y, x)))', '(girl(x) -> dog(x))', 'all y. (dog(y) -> (x = y))', 'exists y. love(y, x)', 'exists y. (love(adam, y) & love(y, x))' ] if trace: print(m2) for fmla in formulas: print(fmla) Expression.fromstring(fmla) parsed = [Expression.fromstring(fmla) for fmla in formulas] for p in parsed: g2.purge() print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, 'x', g2, trace)))
[docs]def demo(num=0, trace=None): """ Run exists demos. - num = 1: propositional logic demo - num = 2: first order model demo (only if trace is set) - num = 3: first order sentences demo - num = 4: satisfaction of open formulas demo - any other value: run all the demos :param trace: trace = 1, or trace = 2 for more verbose tracing """ demos = { 1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo} try: demos[num](trace=trace) except KeyError: for num in demos: demos[num](trace=trace)
if __name__ == "__main__": demo(2, trace=0)