Source code for nltk.ccg.api

# Natural Language Toolkit: CCG Categories
#
# Copyright (C) 2001-2018 NLTK Project
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
from __future__ import unicode_literals
from functools import total_ordering

from abc import ABCMeta, abstractmethod
from six import add_metaclass

from nltk.internals import raise_unorderable_types
from nltk.compat import (python_2_unicode_compatible, unicode_repr)


[docs]@add_metaclass(ABCMeta) @total_ordering class AbstractCCGCategory(object): ''' Interface for categories in combinatory grammars. '''
[docs] @abstractmethod def is_primitive(self): """ Returns true if the category is primitive. """
[docs] @abstractmethod def is_function(self): """ Returns true if the category is a function application. """
[docs] @abstractmethod def is_var(self): """ Returns true if the category is a variable. """
[docs] @abstractmethod def substitute(self, substitutions): """ Takes a set of (var, category) substitutions, and replaces every occurrence of the variable with the corresponding category. """
[docs] @abstractmethod def can_unify(self, other): """ Determines whether two categories can be unified. - Returns None if they cannot be unified - Returns a list of necessary substitutions if they can. """
# Utility functions: comparison, strings and hashing. @abstractmethod def __str__(self): pass def __eq__(self, other): return (self.__class__ is other.__class__ and self._comparison_key == other._comparison_key) def __ne__(self, other): return not self == other def __lt__(self, other): if not isinstance(other, AbstractCCGCategory): raise_unorderable_types("<", self, other) if self.__class__ is other.__class__: return self._comparison_key < other._comparison_key else: return self.__class__.__name__ < other.__class__.__name__ def __hash__(self): try: return self._hash except AttributeError: self._hash = hash(self._comparison_key) return self._hash
[docs]@python_2_unicode_compatible class CCGVar(AbstractCCGCategory): ''' Class representing a variable CCG category. Used for conjunctions (and possibly type-raising, if implemented as a unary rule). ''' _maxID = 0 def __init__(self, prim_only=False): """Initialize a variable (selects a new identifier) :param prim_only: a boolean that determines whether the variable is restricted to primitives :type prim_only: bool """ self._id = self.new_id() self._prim_only = prim_only self._comparison_key = self._id
[docs] @classmethod def new_id(cls): """ A class method allowing generation of unique variable identifiers. """ cls._maxID = cls._maxID + 1 return cls._maxID - 1
[docs] @classmethod def reset_id(cls): cls._maxID = 0
[docs] def is_primitive(self): return False
[docs] def is_function(self): return False
[docs] def is_var(self): return True
[docs] def substitute(self, substitutions): """If there is a substitution corresponding to this variable, return the substituted category. """ for (var, cat) in substitutions: if var == self: return cat return self
[docs] def can_unify(self, other): """ If the variable can be replaced with other a substitution is returned. """ if other.is_primitive() or not self._prim_only: return [(self, other)] return None
[docs] def id(self): return self._id
def __str__(self): return "_var" + str(self._id)
[docs]@total_ordering @python_2_unicode_compatible class Direction(object): ''' Class representing the direction of a function application. Also contains maintains information as to which combinators may be used with the category. ''' def __init__(self, dir, restrictions): self._dir = dir self._restrs = restrictions self._comparison_key = (dir, tuple(restrictions)) # Testing the application direction
[docs] def is_forward(self): return self._dir == '/'
[docs] def is_backward(self): return self._dir == '\\'
[docs] def dir(self): return self._dir
[docs] def restrs(self): """A list of restrictions on the combinators. '.' denotes that permuting operations are disallowed ',' denotes that function composition is disallowed '_' denotes that the direction has variable restrictions. (This is redundant in the current implementation of type-raising) """ return self._restrs
[docs] def is_variable(self): return self._restrs == '_'
# Unification and substitution of variable directions. # Used only if type-raising is implemented as a unary rule, as it # must inherit restrictions from the argument category.
[docs] def can_unify(self, other): if other.is_variable(): return [('_', self.restrs())] elif self.is_variable(): return [('_', other.restrs())] else: if self.restrs() == other.restrs(): return [] return None
[docs] def substitute(self, subs): if not self.is_variable(): return self for (var, restrs) in subs: if var == '_': return Direction(self._dir, restrs) return self
# Testing permitted combinators
[docs] def can_compose(self): return (',' not in self._restrs)
[docs] def can_cross(self): return ('.' not in self._restrs)
def __eq__(self, other): return (self.__class__ is other.__class__ and self._comparison_key == other._comparison_key) def __ne__(self, other): return not self == other def __lt__(self, other): if not isinstance(other, Direction): raise_unorderable_types("<", self, other) if self.__class__ is other.__class__: return self._comparison_key < other._comparison_key else: return self.__class__.__name__ < other.__class__.__name__ def __hash__(self): try: return self._hash except AttributeError: self._hash = hash(self._comparison_key) return self._hash def __str__(self): r_str = "" for r in self._restrs: r_str = r_str + "%s" % r return "%s%s" % (self._dir, r_str) # The negation operator reverses the direction of the application def __neg__(self): if self._dir == '/': return Direction('\\', self._restrs) else: return Direction('/', self._restrs)
[docs]@python_2_unicode_compatible class PrimitiveCategory(AbstractCCGCategory): ''' Class representing primitive categories. Takes a string representation of the category, and a list of strings specifying the morphological subcategories. ''' def __init__(self, categ, restrictions=[]): self._categ = categ self._restrs = restrictions self._comparison_key = (categ, tuple(restrictions))
[docs] def is_primitive(self): return True
[docs] def is_function(self): return False
[docs] def is_var(self): return False
[docs] def restrs(self): return self._restrs
[docs] def categ(self): return self._categ
# Substitution does nothing to a primitive category
[docs] def substitute(self, subs): return self
# A primitive can be unified with a class of the same # base category, given that the other category shares all # of its subclasses, or with a variable.
[docs] def can_unify(self, other): if not other.is_primitive(): return None if other.is_var(): return [(other, self)] if other.categ() == self.categ(): for restr in self._restrs: if restr not in other.restrs(): return None return [] return None
def __str__(self): if self._restrs == []: return "%s" % self._categ restrictions = "[%s]" % ",".join(unicode_repr(r) for r in self._restrs) return "%s%s" % (self._categ, restrictions)
[docs]@python_2_unicode_compatible class FunctionalCategory(AbstractCCGCategory): ''' Class that represents a function application category. Consists of argument and result categories, together with an application direction. ''' def __init__(self, res, arg, dir): self._res = res self._arg = arg self._dir = dir self._comparison_key = (arg, dir, res)
[docs] def is_primitive(self): return False
[docs] def is_function(self): return True
[docs] def is_var(self): return False
# Substitution returns the category consisting of the # substitution applied to each of its constituents.
[docs] def substitute(self, subs): sub_res = self._res.substitute(subs) sub_dir = self._dir.substitute(subs) sub_arg = self._arg.substitute(subs) return FunctionalCategory(sub_res, sub_arg, self._dir)
# A function can unify with another function, so long as its # constituents can unify, or with an unrestricted variable.
[docs] def can_unify(self, other): if other.is_var(): return [(other, self)] if other.is_function(): sa = self._res.can_unify(other.res()) sd = self._dir.can_unify(other.dir()) if sa is not None and sd is not None: sb = self._arg.substitute(sa).can_unify( other.arg().substitute(sa)) if sb is not None: return sa + sb return None
# Constituent accessors
[docs] def arg(self): return self._arg
[docs] def res(self): return self._res
[docs] def dir(self): return self._dir
def __str__(self): return "(%s%s%s)" % (self._res, self._dir, self._arg)