AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/patsy/desc.py

# This file is part of Patsy
# Copyright (C) 2011-2012 Nathaniel Smith <njs@pobox.com>
# See file LICENSE.txt for license information.

# This file defines the ModelDesc class, which describes a model at a high
# level, as a list of interactions of factors. It also has the code to convert
# a formula parse tree (from patsy.parse_formula) into a ModelDesc.

from __future__ import print_function

import six
from patsy import PatsyError
from patsy.parse_formula import ParseNode, Token, parse_formula
from patsy.eval import EvalEnvironment, EvalFactor
from patsy.util import uniqueify_list
from patsy.util import repr_pretty_delegate, repr_pretty_impl
from patsy.util import no_pickling, assert_no_pickling

# These are made available in the patsy.* namespace
__all__ = ["Term", "ModelDesc", "INTERCEPT"]

# One might think it would make more sense for 'factors' to be a set, rather
# than a tuple-with-guaranteed-unique-entries-that-compares-like-a-set. The
# reason we do it this way is that it preserves the order that the user typed
# and is expecting, which then ends up producing nicer names in our final
# output, nicer column ordering, etc. (A similar comment applies to the
# ordering of terms in ModelDesc objects as a whole.)
class Term(object):
    """The interaction between a collection of factor objects.

    This is one of the basic types used in representing formulas, and
    corresponds to an expression like ``"a:b:c"`` in a formula string.
    For details, see :ref:`formulas` and :ref:`expert-model-specification`.

    Terms are hashable and compare by value.

    Attributes:
    
    .. attribute:: factors

       A tuple of factor objects.
    """
    def __init__(self, factors):
        self.factors = tuple(uniqueify_list(factors))

    def __eq__(self, other):
        return (isinstance(other, Term)
                and frozenset(other.factors) == frozenset(self.factors))

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((Term, frozenset(self.factors)))

    __repr__ = repr_pretty_delegate
    def _repr_pretty_(self, p, cycle):
        assert not cycle
        repr_pretty_impl(p, self, [list(self.factors)])

    def name(self):
        """Return a human-readable name for this term."""
        if self.factors:
            return ":".join([f.name() for f in self.factors])
        else:
            return "Intercept"

    __getstate__ = no_pickling

INTERCEPT = Term([])

class _MockFactor(object):
    def __init__(self, name):
        self._name = name

    def name(self):
        return self._name

def test_Term():
    assert Term([1, 2, 1]).factors == (1, 2)
    assert Term([1, 2]) == Term([2, 1])
    assert hash(Term([1, 2])) == hash(Term([2, 1]))
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    assert Term([f1, f2]).name() == "a:b"
    assert Term([f2, f1]).name() == "b:a"
    assert Term([]).name() == "Intercept"

    assert_no_pickling(Term([]))

class ModelDesc(object):
    """A simple container representing the termlists parsed from a formula.

    This is a simple container object which has exactly the same
    representational power as a formula string, but is a Python object
    instead. You can construct one by hand, and pass it to functions like
    :func:`dmatrix` or :func:`incr_dbuilder` that are expecting a formula
    string, but without having to do any messy string manipulation. For
    details see :ref:`expert-model-specification`.

    Attributes:

    .. attribute:: lhs_termlist
                   rhs_termlist

       Two termlists representing the left- and right-hand sides of a
       formula, suitable for passing to :func:`design_matrix_builders`.
    """
    def __init__(self, lhs_termlist, rhs_termlist):
        self.lhs_termlist = uniqueify_list(lhs_termlist)
        self.rhs_termlist = uniqueify_list(rhs_termlist)

    __repr__ = repr_pretty_delegate
    def _repr_pretty_(self, p, cycle):
        assert not cycle
        return repr_pretty_impl(p, self,
                                [],
                                [("lhs_termlist", self.lhs_termlist),
                                 ("rhs_termlist", self.rhs_termlist)])

    def describe(self):
        """Returns a human-readable representation of this :class:`ModelDesc`
        in pseudo-formula notation.

        .. warning:: There is no guarantee that the strings returned by this
           function can be parsed as formulas. They are best-effort
           descriptions intended for human users. However, if this ModelDesc
           was created by parsing a formula, then it should work in
           practice. If you *really* have to.
        """
        def term_code(term):
            if term == INTERCEPT:
                return "1"
            else:
                return term.name()
        result = " + ".join([term_code(term) for term in self.lhs_termlist])
        if result:
            result += " ~ "
        else:
            result += "~ "
        if self.rhs_termlist == [INTERCEPT]:
            result += term_code(INTERCEPT)
        else:
            term_names = []
            if INTERCEPT not in self.rhs_termlist:
                term_names.append("0")
            term_names += [term_code(term) for term in self.rhs_termlist
                           if term != INTERCEPT]
            result += " + ".join(term_names)
        return result
            
    @classmethod
    def from_formula(cls, tree_or_string):
        """Construct a :class:`ModelDesc` from a formula string.

        :arg tree_or_string: A formula string. (Or an unevaluated formula
          parse tree, but the API for generating those isn't public yet. Shh,
          it can be our secret.)
        :returns: A new :class:`ModelDesc`.
        """
        if isinstance(tree_or_string, ParseNode):
            tree = tree_or_string
        else:
            tree = parse_formula(tree_or_string)
        value = Evaluator().eval(tree, require_evalexpr=False)
        assert isinstance(value, cls)
        return value

    __getstate__ = no_pickling

def test_ModelDesc():
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])])
    assert m.lhs_termlist == [INTERCEPT, Term([f1])]
    assert m.rhs_termlist == [Term([f1]), Term([f1, f2])]
    print(m.describe())
    assert m.describe() == "1 + a ~ 0 + a + a:b"

    assert_no_pickling(m)

    assert ModelDesc([], []).describe() == "~ 0"
    assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"
    assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1"
    assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe()
            == "1 ~ b")

def test_ModelDesc_from_formula():
    for input in ("y ~ x", parse_formula("y ~ x")):
        md = ModelDesc.from_formula(input)
        assert md.lhs_termlist == [Term([EvalFactor("y")]),]
        assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])]

class IntermediateExpr(object):
    "This class holds an intermediate result while we're evaluating a tree."
    def __init__(self, intercept, intercept_origin, intercept_removed, terms):
        self.intercept = intercept
        self.intercept_origin = intercept_origin
        self.intercept_removed =intercept_removed
        self.terms = tuple(uniqueify_list(terms))
        if self.intercept:
            assert self.intercept_origin
        assert not (self.intercept and self.intercept_removed)

    __repr__ = repr_pretty_delegate
    def _pretty_repr_(self, p, cycle): # pragma: no cover
        assert not cycle
        return repr_pretty_impl(p, self,
                                [self.intercept, self.intercept_origin,
                                 self.intercept_removed, self.terms])

    __getstate__ = no_pickling

def _maybe_add_intercept(doit, terms):
    if doit:
        return (INTERCEPT,) + terms
    else:
        return terms

def _eval_any_tilde(evaluator, tree):
    exprs = [evaluator.eval(arg) for arg in tree.args]    
    if len(exprs) == 1:
        # Formula was like: "~ foo"
        # We pretend that instead it was like: "0 ~ foo"
        exprs.insert(0, IntermediateExpr(False, None, True, []))
    assert len(exprs) == 2
    # Note that only the RHS gets an implicit intercept:
    return ModelDesc(_maybe_add_intercept(exprs[0].intercept, exprs[0].terms),
                     _maybe_add_intercept(not exprs[1].intercept_removed,
                                          exprs[1].terms))

def _eval_binary_plus(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    if tree.args[1].type == "ZERO":
        return IntermediateExpr(False, None, True, left_expr.terms)
    else:
        right_expr = evaluator.eval(tree.args[1])
        if right_expr.intercept:
            return IntermediateExpr(True, right_expr.intercept_origin, False,
                                    left_expr.terms + right_expr.terms)
        else:
            return IntermediateExpr(left_expr.intercept,
                                    left_expr.intercept_origin,
                                    left_expr.intercept_removed,
                                    left_expr.terms + right_expr.terms)
    

def _eval_binary_minus(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    if tree.args[1].type == "ZERO":
        return IntermediateExpr(True, tree.args[1], False,
                                left_expr.terms)
    elif tree.args[1].type == "ONE":
        return IntermediateExpr(False, None, True, left_expr.terms)
    else:
        right_expr = evaluator.eval(tree.args[1])
        terms = [term for term in left_expr.terms
                 if term not in right_expr.terms]
        if right_expr.intercept:
            return IntermediateExpr(False, None, True, terms)
        else:
            return IntermediateExpr(left_expr.intercept,
                                    left_expr.intercept_origin,
                                    left_expr.intercept_removed,
                                    terms)

def _check_interactable(expr):
    if expr.intercept:
        raise PatsyError("intercept term cannot interact with "
                            "anything else", expr.intercept_origin)

def _interaction(left_expr, right_expr):
    for expr in (left_expr, right_expr):
        _check_interactable(expr)
    terms = []
    for l_term in left_expr.terms:
        for r_term in right_expr.terms:
            terms.append(Term(l_term.factors + r_term.factors))
    return IntermediateExpr(False, None, False, terms)

def _eval_binary_prod(evaluator, tree):
    exprs = [evaluator.eval(arg) for arg in tree.args]
    return IntermediateExpr(False, None, False,
                            exprs[0].terms
                            + exprs[1].terms
                            + _interaction(*exprs).terms)

# Division (nesting) is right-ward distributive:
#   a / (b + c) -> a/b + a/c -> a + a:b + a:c
# But left-ward, in S/R it has a quirky behavior:
#   (a + b)/c -> a + b + a:b:c
# This is because it's meaningless for a factor to be "nested" under two
# different factors. (This is documented in Chambers and Hastie (page 30) as a
# "Slightly more subtle..." rule, with no further elaboration. Hopefully we
# will do better.)
def _eval_binary_div(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    right_expr = evaluator.eval(tree.args[1])
    terms = list(left_expr.terms)
    _check_interactable(left_expr)
    # Build a single giant combined term for everything on the left:
    left_factors = []
    for term in left_expr.terms:
        left_factors += list(term.factors)
    left_combined_expr = IntermediateExpr(False, None, False,
                                          [Term(left_factors)])
    # Then interact it with everything on the right:
    terms += list(_interaction(left_combined_expr, right_expr).terms)
    return IntermediateExpr(False, None, False, terms)

def _eval_binary_interact(evaluator, tree):
    exprs = [evaluator.eval(arg) for arg in tree.args]
    return _interaction(*exprs)

def _eval_binary_power(evaluator, tree):
    left_expr = evaluator.eval(tree.args[0])
    _check_interactable(left_expr)
    power = -1
    if tree.args[1].type in ("ONE", "NUMBER"):
        expr = tree.args[1].token.extra
        try:
            power = int(expr)
        except ValueError:
            pass
    if power < 1:
        raise PatsyError("'**' requires a positive integer", tree.args[1])
    all_terms = left_expr.terms
    big_expr = left_expr
    # Small optimization: (a + b)**100 is just the same as (a + b)**2.
    power = min(len(left_expr.terms), power)
    for i in range(1, power):
        big_expr = _interaction(left_expr, big_expr)
        all_terms = all_terms + big_expr.terms
    return IntermediateExpr(False, None, False, all_terms)

def _eval_unary_plus(evaluator, tree):
    return evaluator.eval(tree.args[0])

def _eval_unary_minus(evaluator, tree):
    if tree.args[0].type == "ZERO":
        return IntermediateExpr(True, tree.origin, False, [])
    elif tree.args[0].type == "ONE":
        return IntermediateExpr(False, None, True, [])
    else:
        raise PatsyError("Unary minus can only be applied to 1 or 0", tree)

def _eval_zero(evaluator, tree):
    return IntermediateExpr(False, None, True, [])
    
def _eval_one(evaluator, tree):
    return IntermediateExpr(True, tree.origin, False, [])

def _eval_number(evaluator, tree):
    raise PatsyError("numbers besides '0' and '1' are "
                        "only allowed with **", tree)

def _eval_python_expr(evaluator, tree):
    factor = EvalFactor(tree.token.extra, origin=tree.origin)
    return IntermediateExpr(False, None, False, [Term([factor])])

class Evaluator(object):
    def __init__(self):
        self._evaluators = {}
        self.add_op("~", 2, _eval_any_tilde)
        self.add_op("~", 1, _eval_any_tilde)

        self.add_op("+", 2, _eval_binary_plus)
        self.add_op("-", 2, _eval_binary_minus)
        self.add_op("*", 2, _eval_binary_prod)
        self.add_op("/", 2, _eval_binary_div)
        self.add_op(":", 2, _eval_binary_interact)
        self.add_op("**", 2, _eval_binary_power)

        self.add_op("+", 1, _eval_unary_plus)
        self.add_op("-", 1, _eval_unary_minus)

        self.add_op("ZERO", 0, _eval_zero)
        self.add_op("ONE", 0, _eval_one)
        self.add_op("NUMBER", 0, _eval_number)
        self.add_op("PYTHON_EXPR", 0, _eval_python_expr)

        # Not used by Patsy -- provided for the convenience of eventual
        # user-defined operators.
        self.stash = {}

    # This should not be considered a public API yet (to use for actually
    # adding new operator semantics) because I wrote in some of the relevant
    # code sort of speculatively, but it isn't actually tested.
    def add_op(self, op, arity, evaluator):
        self._evaluators[op, arity] = evaluator

    def eval(self, tree, require_evalexpr=True):
        result = None
        assert isinstance(tree, ParseNode)
        key = (tree.type, len(tree.args))
        if key not in self._evaluators:
            raise PatsyError("I don't know how to evaluate this "
                                "'%s' operator" % (tree.type,),
                                tree.token)
        result = self._evaluators[key](self, tree)
        if require_evalexpr and not isinstance(result, IntermediateExpr):
            if isinstance(result, ModelDesc):
                raise PatsyError("~ can only be used once, and "
                                    "only at the top level",
                                    tree)
            else:
                raise PatsyError("custom operator returned an "
                                    "object that I don't know how to "
                                    "handle", tree)
        return result

#############

_eval_tests = {
    "": (True, []),
    " ": (True, []),
    " \n ": (True, []),
    "a": (True, ["a"]),

    "1": (True, []),
    "0": (False, []),
    "- 1": (False, []),
    "- 0": (True, []),
    "+ 1": (True, []),
    "+ 0": (False, []),
    "0 + 1": (True, []),
    "1 + 0": (False, []),
    "1 - 0": (True, []),
    "0 - 1": (False, []),
    
    "1 + a": (True, ["a"]),
    "0 + a": (False, ["a"]),
    "a - 1": (False, ["a"]),
    "a - 0": (True, ["a"]),
    "1 - a": (True, []),

    "a + b": (True, ["a", "b"]),
    "(a + b)": (True, ["a", "b"]),
    "a + ((((b))))": (True, ["a", "b"]),
    "a + ((((+b))))": (True, ["a", "b"]),
    "a + ((((b - a))))": (True, ["a", "b"]),

    "a + a + a": (True, ["a"]),

    "a + (b - a)": (True, ["a", "b"]),

    "a + np.log(a, base=10)": (True, ["a", "np.log(a, base=10)"]),
    # Note different spacing:
    "a + np.log(a, base=10) - np . log(a , base = 10)": (True, ["a"]),
    
    "a + (I(b) + c)": (True, ["a", "I(b)", "c"]),
    "a + I(b + c)": (True, ["a", "I(b + c)"]),

    "a:b": (True, [("a", "b")]),
    "a:b:a": (True, [("a", "b")]),
    "a:(b + c)": (True, [("a", "b"), ("a", "c")]),
    "(a + b):c": (True, [("a", "c"), ("b", "c")]),
    "a:(b - c)": (True, [("a", "b")]),
    "c + a:c + a:(b - c)": (True, ["c", ("a", "c"), ("a", "b")]),
    "(a - b):c": (True, [("a", "c")]),
    "b + b:c + (a - b):c": (True, ["b", ("b", "c"), ("a", "c")]),

    "a:b - a:b": (True, []),
    "a:b - b:a": (True, []),

    "1 - (a + b)": (True, []),
    "a + b - (a + b)": (True, []),

    "a * b": (True, ["a", "b", ("a", "b")]),
    "a * b * a": (True, ["a", "b", ("a", "b")]),
    "a * (b + c)": (True, ["a", "b", "c", ("a", "b"), ("a", "c")]),
    "(a + b) * c": (True, ["a", "b", "c", ("a", "c"), ("b", "c")]),
    "a * (b - c)": (True, ["a", "b", ("a", "b")]),
    "c + a:c + a * (b - c)": (True, ["c", ("a", "c"), "a", "b", ("a", "b")]),
    "(a - b) * c": (True, ["a", "c", ("a", "c")]),
    "b + b:c + (a - b) * c": (True, ["b", ("b", "c"), "a", "c", ("a", "c")]),

    "a/b": (True, ["a", ("a", "b")]),
    "(a + b)/c": (True, ["a", "b", ("a", "b", "c")]),
    "b + b:c + (a - b)/c": (True, ["b", ("b", "c"), "a", ("a", "c")]),
    "a/(b + c)": (True, ["a", ("a", "b"), ("a", "c")]),

    "a ** 2": (True, ["a"]),
    "(a + b + c + d) ** 2": (True, ["a", "b", "c", "d",
                                    ("a", "b"), ("a", "c"), ("a", "d"),
                                    ("b", "c"), ("b", "d"), ("c", "d")]),
    "(a + b + c + d) ** 3": (True, ["a", "b", "c", "d",
                                    ("a", "b"), ("a", "c"), ("a", "d"),
                                    ("b", "c"), ("b", "d"), ("c", "d"),
                                    ("a", "b", "c"), ("a", "b", "d"),
                                    ("a", "c", "d"), ("b", "c", "d")]),

    "a + +a": (True, ["a"]),

    "~ a + b": (True, ["a", "b"]),
    "~ a*b": (True, ["a", "b", ("a", "b")]),
    "~ a*b + 0": (False, ["a", "b", ("a", "b")]),
    "~ -1": (False, []),

    "0 ~ a + b": (True, ["a", "b"]),
    "1 ~ a + b": (True, [], True, ["a", "b"]),
    "y ~ a + b": (False, ["y"], True, ["a", "b"]),
    "0 + y ~ a + b": (False, ["y"], True, ["a", "b"]),
    "0 + y * z ~ a + b": (False, ["y", "z", ("y", "z")], True, ["a", "b"]),
    "-1 ~ 1": (False, [], True, []),
    "1 + y ~ a + b": (True, ["y"], True, ["a", "b"]),

    # Check precedence:
    "a + b * c": (True, ["a", "b", "c", ("b", "c")]),
    "a * b + c": (True, ["a", "b", ("a", "b"), "c"]),
    "a * b - a": (True, ["b", ("a", "b")]),
    "a + b / c": (True, ["a", "b", ("b", "c")]),
    "a / b + c": (True, ["a", ("a", "b"), "c"]),
    "a*b:c": (True, ["a", ("b", "c"), ("a", "b", "c")]),
    "a:b*c": (True, [("a", "b"), "c", ("a", "b", "c")]),

    # Intercept handling:
    "~ 1 + 1 + 0 + 1": (True, []),
    "~ 0 + 1 + 0": (False, []),
    "~ 0 - 1 - 1 + 0 + 1": (True, []),
    "~ 1 - 1": (False, []),
    "~ 0 + a + 1": (True, ["a"]),
    "~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising!
    "~ 0 + (a + 1)": (True, ["a"]), # Also correct!
    "~ 1 - (a + 1)": (False, []),
}

# <> mark off where the error should be reported:
_eval_error_tests = [
    "a <+>",
    "a + <(>",

    "b + <(-a)>",

    "a:<1>",
    "(a + <1>)*b",

    "a + <2>",
    "a + <1.0>",
    # eh, catching this is a hassle, we'll just leave the user some rope if
    # they really want it:
    #"a + <0x1>",

    "a ** <b>",
    "a ** <(1 + 1)>",
    "a ** <1.5>",

    "a + b <# asdf>",

    "<)>",
    "a + <)>",
    "<*> a",
    "a + <*>",

    "a + <foo[bar>",
    "a + <foo{bar>",
    "a + <foo(bar>",

    "a + <[bar>",
    "a + <{bar>",

    "a + <{bar[]>",

    "a + foo<]>bar",
    "a + foo[]<]>bar",
    "a + foo{}<}>bar",
    "a + foo<)>bar",

    "a + b<)>",
    "(a) <.>",

    "<(>a + b",

    "<y ~ a> ~ b",
    "y ~ <(a ~ b)>",
    "<~ a> ~ b",
    "~ <(a ~ b)>",

    "1 + <-(a + b)>",

    "<- a>",
    "a + <-a**2>",
]

def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover
    if expected_intercept:
        expecteds = [()] + expecteds
    assert len(terms) == len(expecteds)
    for term, expected in zip(terms, expecteds):
        if isinstance(term, Term):
            if isinstance(expected, str):
                expected = (expected,)
            assert term.factors == tuple([EvalFactor(s) for s in expected])
        else:
            assert term == expected

def _do_eval_formula_tests(tests): # pragma: no cover
    for code, result in six.iteritems(tests):
        if len(result) == 2:
            result = (False, []) + result
        model_desc = ModelDesc.from_formula(code)
        print(repr(code))
        print(result)
        print(model_desc)
        lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result
        _assert_terms_match(model_desc.lhs_termlist,
                            lhs_intercept, lhs_termlist)
        _assert_terms_match(model_desc.rhs_termlist,
                            rhs_intercept, rhs_termlist)

def test_eval_formula():
    _do_eval_formula_tests(_eval_tests)

def test_eval_formula_error_reporting():
    from patsy.parse_formula import _parsing_error_test
    parse_fn = lambda formula: ModelDesc.from_formula(formula)
    _parsing_error_test(parse_fn, _eval_error_tests)

def test_formula_factor_origin():
    from patsy.origin import Origin
    desc = ModelDesc.from_formula("a + b")
    assert (desc.rhs_termlist[1].factors[0].origin
            == Origin("a + b", 0, 1))
    assert (desc.rhs_termlist[2].factors[0].origin
            == Origin("a + b", 4, 5))
lab 1 is done 2024-10-02 22:15:59 +04:00			`# This file is part of Patsy`
			`# Copyright (C) 2011-2012 Nathaniel Smith <njs@pobox.com>`
			`# See file LICENSE.txt for license information.`

			`# This file defines the ModelDesc class, which describes a model at a high`
			`# level, as a list of interactions of factors. It also has the code to convert`
			`# a formula parse tree (from patsy.parse_formula) into a ModelDesc.`

			`from __future__ import print_function`

			`import six`
			`from patsy import PatsyError`
			`from patsy.parse_formula import ParseNode, Token, parse_formula`
			`from patsy.eval import EvalEnvironment, EvalFactor`
			`from patsy.util import uniqueify_list`
			`from patsy.util import repr_pretty_delegate, repr_pretty_impl`
			`from patsy.util import no_pickling, assert_no_pickling`

			`# These are made available in the patsy.* namespace`
			`__all__ = ["Term", "ModelDesc", "INTERCEPT"]`

			`# One might think it would make more sense for 'factors' to be a set, rather`
			`# than a tuple-with-guaranteed-unique-entries-that-compares-like-a-set. The`
			`# reason we do it this way is that it preserves the order that the user typed`
			`# and is expecting, which then ends up producing nicer names in our final`
			`# output, nicer column ordering, etc. (A similar comment applies to the`
			`# ordering of terms in ModelDesc objects as a whole.)`
			`class Term(object):`
			`"""The interaction between a collection of factor objects.`

			`This is one of the basic types used in representing formulas, and`
			corresponds to an expression like ``"a:b:c"`` in a formula string.
			For details, see :ref:`formulas` and :ref:`expert-model-specification`.

			`Terms are hashable and compare by value.`

			`Attributes:`

			`.. attribute:: factors`

			`A tuple of factor objects.`
			`"""`
			`def __init__(self, factors):`
			`self.factors = tuple(uniqueify_list(factors))`

			`def __eq__(self, other):`
			`return (isinstance(other, Term)`
			`and frozenset(other.factors) == frozenset(self.factors))`

			`def __ne__(self, other):`
			`return not self == other`

			`def __hash__(self):`
			`return hash((Term, frozenset(self.factors)))`

			`__repr__ = repr_pretty_delegate`
			`def _repr_pretty_(self, p, cycle):`
			`assert not cycle`
			`repr_pretty_impl(p, self, [list(self.factors)])`

			`def name(self):`
			`"""Return a human-readable name for this term."""`
			`if self.factors:`
			`return ":".join([f.name() for f in self.factors])`
			`else:`
			`return "Intercept"`

			`__getstate__ = no_pickling`

			`INTERCEPT = Term([])`

			`class _MockFactor(object):`
			`def __init__(self, name):`
			`self._name = name`

			`def name(self):`
			`return self._name`

			`def test_Term():`
			`assert Term([1, 2, 1]).factors == (1, 2)`
			`assert Term([1, 2]) == Term([2, 1])`
			`assert hash(Term([1, 2])) == hash(Term([2, 1]))`
			`f1 = _MockFactor("a")`
			`f2 = _MockFactor("b")`
			`assert Term([f1, f2]).name() == "a:b"`
			`assert Term([f2, f1]).name() == "b:a"`
			`assert Term([]).name() == "Intercept"`

			`assert_no_pickling(Term([]))`

			`class ModelDesc(object):`
			`"""A simple container representing the termlists parsed from a formula.`

			`This is a simple container object which has exactly the same`
			`representational power as a formula string, but is a Python object`
			`instead. You can construct one by hand, and pass it to functions like`
			:func:`dmatrix` or :func:`incr_dbuilder` that are expecting a formula
			`string, but without having to do any messy string manipulation. For`
			details see :ref:`expert-model-specification`.

			`Attributes:`

			`.. attribute:: lhs_termlist`
			`rhs_termlist`

			`Two termlists representing the left- and right-hand sides of a`
			formula, suitable for passing to :func:`design_matrix_builders`.
			`"""`
			`def __init__(self, lhs_termlist, rhs_termlist):`
			`self.lhs_termlist = uniqueify_list(lhs_termlist)`
			`self.rhs_termlist = uniqueify_list(rhs_termlist)`

			`__repr__ = repr_pretty_delegate`
			`def _repr_pretty_(self, p, cycle):`
			`assert not cycle`
			`return repr_pretty_impl(p, self,`
			`[],`
			`[("lhs_termlist", self.lhs_termlist),`
			`("rhs_termlist", self.rhs_termlist)])`

			`def describe(self):`
			"""Returns a human-readable representation of this :class:`ModelDesc`
			`in pseudo-formula notation.`

			`.. warning:: There is no guarantee that the strings returned by this`
			`function can be parsed as formulas. They are best-effort`
			`descriptions intended for human users. However, if this ModelDesc`
			`was created by parsing a formula, then it should work in`
			`practice. If you really have to.`
			`"""`
			`def term_code(term):`
			`if term == INTERCEPT:`
			`return "1"`
			`else:`
			`return term.name()`
			`result = " + ".join([term_code(term) for term in self.lhs_termlist])`
			`if result:`
			`result += " ~ "`
			`else:`
			`result += "~ "`
			`if self.rhs_termlist == [INTERCEPT]:`
			`result += term_code(INTERCEPT)`
			`else:`
			`term_names = []`
			`if INTERCEPT not in self.rhs_termlist:`
			`term_names.append("0")`
			`term_names += [term_code(term) for term in self.rhs_termlist`
			`if term != INTERCEPT]`
			`result += " + ".join(term_names)`
			`return result`

			`@classmethod`
			`def from_formula(cls, tree_or_string):`
			"""Construct a :class:`ModelDesc` from a formula string.

			`:arg tree_or_string: A formula string. (Or an unevaluated formula`
			`parse tree, but the API for generating those isn't public yet. Shh,`
			`it can be our secret.)`
			:returns: A new :class:`ModelDesc`.
			`"""`
			`if isinstance(tree_or_string, ParseNode):`
			`tree = tree_or_string`
			`else:`
			`tree = parse_formula(tree_or_string)`
			`value = Evaluator().eval(tree, require_evalexpr=False)`
			`assert isinstance(value, cls)`
			`return value`

			`__getstate__ = no_pickling`

			`def test_ModelDesc():`
			`f1 = _MockFactor("a")`
			`f2 = _MockFactor("b")`
			`m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])])`
			`assert m.lhs_termlist == [INTERCEPT, Term([f1])]`
			`assert m.rhs_termlist == [Term([f1]), Term([f1, f2])]`
			`print(m.describe())`
			`assert m.describe() == "1 + a ~ 0 + a + a:b"`

			`assert_no_pickling(m)`

			`assert ModelDesc([], []).describe() == "~ 0"`
			`assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"`
			`assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1"`
			`assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe()`
			`== "1 ~ b")`

			`def test_ModelDesc_from_formula():`
			`for input in ("y ~ x", parse_formula("y ~ x")):`
			`md = ModelDesc.from_formula(input)`
			`assert md.lhs_termlist == [Term([EvalFactor("y")]),]`
			`assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])]`

			`class IntermediateExpr(object):`
			`"This class holds an intermediate result while we're evaluating a tree."`
			`def __init__(self, intercept, intercept_origin, intercept_removed, terms):`
			`self.intercept = intercept`
			`self.intercept_origin = intercept_origin`
			`self.intercept_removed =intercept_removed`
			`self.terms = tuple(uniqueify_list(terms))`
			`if self.intercept:`
			`assert self.intercept_origin`
			`assert not (self.intercept and self.intercept_removed)`

			`__repr__ = repr_pretty_delegate`
			`def _pretty_repr_(self, p, cycle): # pragma: no cover`
			`assert not cycle`
			`return repr_pretty_impl(p, self,`
			`[self.intercept, self.intercept_origin,`
			`self.intercept_removed, self.terms])`

			`__getstate__ = no_pickling`

			`def _maybe_add_intercept(doit, terms):`
			`if doit:`
			`return (INTERCEPT,) + terms`
			`else:`
			`return terms`

			`def _eval_any_tilde(evaluator, tree):`
			`exprs = [evaluator.eval(arg) for arg in tree.args]`
			`if len(exprs) == 1:`
			`# Formula was like: "~ foo"`
			`# We pretend that instead it was like: "0 ~ foo"`
			`exprs.insert(0, IntermediateExpr(False, None, True, []))`
			`assert len(exprs) == 2`
			`# Note that only the RHS gets an implicit intercept:`
			`return ModelDesc(_maybe_add_intercept(exprs[0].intercept, exprs[0].terms),`
			`_maybe_add_intercept(not exprs[1].intercept_removed,`
			`exprs[1].terms))`

			`def _eval_binary_plus(evaluator, tree):`
			`left_expr = evaluator.eval(tree.args[0])`
			`if tree.args[1].type == "ZERO":`
			`return IntermediateExpr(False, None, True, left_expr.terms)`
			`else:`
			`right_expr = evaluator.eval(tree.args[1])`
			`if right_expr.intercept:`
			`return IntermediateExpr(True, right_expr.intercept_origin, False,`
			`left_expr.terms + right_expr.terms)`
			`else:`
			`return IntermediateExpr(left_expr.intercept,`
			`left_expr.intercept_origin,`
			`left_expr.intercept_removed,`
			`left_expr.terms + right_expr.terms)`


			`def _eval_binary_minus(evaluator, tree):`
			`left_expr = evaluator.eval(tree.args[0])`
			`if tree.args[1].type == "ZERO":`
			`return IntermediateExpr(True, tree.args[1], False,`
			`left_expr.terms)`
			`elif tree.args[1].type == "ONE":`
			`return IntermediateExpr(False, None, True, left_expr.terms)`
			`else:`
			`right_expr = evaluator.eval(tree.args[1])`
			`terms = [term for term in left_expr.terms`
			`if term not in right_expr.terms]`
			`if right_expr.intercept:`
			`return IntermediateExpr(False, None, True, terms)`
			`else:`
			`return IntermediateExpr(left_expr.intercept,`
			`left_expr.intercept_origin,`
			`left_expr.intercept_removed,`
			`terms)`

			`def _check_interactable(expr):`
			`if expr.intercept:`
			`raise PatsyError("intercept term cannot interact with "`
			`"anything else", expr.intercept_origin)`

			`def _interaction(left_expr, right_expr):`
			`for expr in (left_expr, right_expr):`
			`_check_interactable(expr)`
			`terms = []`
			`for l_term in left_expr.terms:`
			`for r_term in right_expr.terms:`
			`terms.append(Term(l_term.factors + r_term.factors))`
			`return IntermediateExpr(False, None, False, terms)`

			`def _eval_binary_prod(evaluator, tree):`
			`exprs = [evaluator.eval(arg) for arg in tree.args]`
			`return IntermediateExpr(False, None, False,`
			`exprs[0].terms`
			`+ exprs[1].terms`
			`+ _interaction(*exprs).terms)`

			`# Division (nesting) is right-ward distributive:`
			`# a / (b + c) -> a/b + a/c -> a + a:b + a:c`
			`# But left-ward, in S/R it has a quirky behavior:`
			`# (a + b)/c -> a + b + a:b:c`
			`# This is because it's meaningless for a factor to be "nested" under two`
			`# different factors. (This is documented in Chambers and Hastie (page 30) as a`
			`# "Slightly more subtle..." rule, with no further elaboration. Hopefully we`
			`# will do better.)`
			`def _eval_binary_div(evaluator, tree):`
			`left_expr = evaluator.eval(tree.args[0])`
			`right_expr = evaluator.eval(tree.args[1])`
			`terms = list(left_expr.terms)`
			`_check_interactable(left_expr)`
			`# Build a single giant combined term for everything on the left:`
			`left_factors = []`
			`for term in left_expr.terms:`
			`left_factors += list(term.factors)`
			`left_combined_expr = IntermediateExpr(False, None, False,`
			`[Term(left_factors)])`
			`# Then interact it with everything on the right:`
			`terms += list(_interaction(left_combined_expr, right_expr).terms)`
			`return IntermediateExpr(False, None, False, terms)`

			`def _eval_binary_interact(evaluator, tree):`
			`exprs = [evaluator.eval(arg) for arg in tree.args]`
			`return _interaction(*exprs)`

			`def _eval_binary_power(evaluator, tree):`
			`left_expr = evaluator.eval(tree.args[0])`
			`_check_interactable(left_expr)`
			`power = -1`
			`if tree.args[1].type in ("ONE", "NUMBER"):`
			`expr = tree.args[1].token.extra`
			`try:`
			`power = int(expr)`
			`except ValueError:`
			`pass`
			`if power < 1:`
			`raise PatsyError("'**' requires a positive integer", tree.args[1])`
			`all_terms = left_expr.terms`
			`big_expr = left_expr`
			`# Small optimization: (a + b)100 is just the same as (a + b)2.`
			`power = min(len(left_expr.terms), power)`
			`for i in range(1, power):`
			`big_expr = _interaction(left_expr, big_expr)`
			`all_terms = all_terms + big_expr.terms`
			`return IntermediateExpr(False, None, False, all_terms)`

			`def _eval_unary_plus(evaluator, tree):`
			`return evaluator.eval(tree.args[0])`

			`def _eval_unary_minus(evaluator, tree):`
			`if tree.args[0].type == "ZERO":`
			`return IntermediateExpr(True, tree.origin, False, [])`
			`elif tree.args[0].type == "ONE":`
			`return IntermediateExpr(False, None, True, [])`
			`else:`
			`raise PatsyError("Unary minus can only be applied to 1 or 0", tree)`

			`def _eval_zero(evaluator, tree):`
			`return IntermediateExpr(False, None, True, [])`

			`def _eval_one(evaluator, tree):`
			`return IntermediateExpr(True, tree.origin, False, [])`

			`def _eval_number(evaluator, tree):`
			`raise PatsyError("numbers besides '0' and '1' are "`
			`"only allowed with **", tree)`

			`def _eval_python_expr(evaluator, tree):`
			`factor = EvalFactor(tree.token.extra, origin=tree.origin)`
			`return IntermediateExpr(False, None, False, [Term([factor])])`

			`class Evaluator(object):`
			`def __init__(self):`
			`self._evaluators = {}`
			`self.add_op("~", 2, _eval_any_tilde)`
			`self.add_op("~", 1, _eval_any_tilde)`

			`self.add_op("+", 2, _eval_binary_plus)`
			`self.add_op("-", 2, _eval_binary_minus)`
			`self.add_op("*", 2, _eval_binary_prod)`
			`self.add_op("/", 2, _eval_binary_div)`
			`self.add_op(":", 2, _eval_binary_interact)`
			`self.add_op("**", 2, _eval_binary_power)`

			`self.add_op("+", 1, _eval_unary_plus)`
			`self.add_op("-", 1, _eval_unary_minus)`

			`self.add_op("ZERO", 0, _eval_zero)`
			`self.add_op("ONE", 0, _eval_one)`
			`self.add_op("NUMBER", 0, _eval_number)`
			`self.add_op("PYTHON_EXPR", 0, _eval_python_expr)`

			`# Not used by Patsy -- provided for the convenience of eventual`
			`# user-defined operators.`
			`self.stash = {}`

			`# This should not be considered a public API yet (to use for actually`
			`# adding new operator semantics) because I wrote in some of the relevant`
			`# code sort of speculatively, but it isn't actually tested.`
			`def add_op(self, op, arity, evaluator):`
			`self._evaluators[op, arity] = evaluator`

			`def eval(self, tree, require_evalexpr=True):`
			`result = None`
			`assert isinstance(tree, ParseNode)`
			`key = (tree.type, len(tree.args))`
			`if key not in self._evaluators:`
			`raise PatsyError("I don't know how to evaluate this "`
			`"'%s' operator" % (tree.type,),`
			`tree.token)`
			`result = self._evaluators[key](self, tree)`
			`if require_evalexpr and not isinstance(result, IntermediateExpr):`
			`if isinstance(result, ModelDesc):`
			`raise PatsyError("~ can only be used once, and "`
			`"only at the top level",`
			`tree)`
			`else:`
			`raise PatsyError("custom operator returned an "`
			`"object that I don't know how to "`
			`"handle", tree)`
			`return result`

			`#############`

			`_eval_tests = {`
			`"": (True, []),`
			`" ": (True, []),`
			`" \n ": (True, []),`
			`"a": (True, ["a"]),`

			`"1": (True, []),`
			`"0": (False, []),`
			`"- 1": (False, []),`
			`"- 0": (True, []),`
			`"+ 1": (True, []),`
			`"+ 0": (False, []),`
			`"0 + 1": (True, []),`
			`"1 + 0": (False, []),`
			`"1 - 0": (True, []),`
			`"0 - 1": (False, []),`

			`"1 + a": (True, ["a"]),`
			`"0 + a": (False, ["a"]),`
			`"a - 1": (False, ["a"]),`
			`"a - 0": (True, ["a"]),`
			`"1 - a": (True, []),`

			`"a + b": (True, ["a", "b"]),`
			`"(a + b)": (True, ["a", "b"]),`
			`"a + ((((b))))": (True, ["a", "b"]),`
			`"a + ((((+b))))": (True, ["a", "b"]),`
			`"a + ((((b - a))))": (True, ["a", "b"]),`

			`"a + a + a": (True, ["a"]),`

			`"a + (b - a)": (True, ["a", "b"]),`

			`"a + np.log(a, base=10)": (True, ["a", "np.log(a, base=10)"]),`
			`# Note different spacing:`
			`"a + np.log(a, base=10) - np . log(a , base = 10)": (True, ["a"]),`

			`"a + (I(b) + c)": (True, ["a", "I(b)", "c"]),`
			`"a + I(b + c)": (True, ["a", "I(b + c)"]),`

			`"a:b": (True, [("a", "b")]),`
			`"a:b:a": (True, [("a", "b")]),`
			`"a:(b + c)": (True, [("a", "b"), ("a", "c")]),`
			`"(a + b):c": (True, [("a", "c"), ("b", "c")]),`
			`"a:(b - c)": (True, [("a", "b")]),`
			`"c + a:c + a:(b - c)": (True, ["c", ("a", "c"), ("a", "b")]),`
			`"(a - b):c": (True, [("a", "c")]),`
			`"b + b:c + (a - b):c": (True, ["b", ("b", "c"), ("a", "c")]),`

			`"a:b - a:b": (True, []),`
			`"a:b - b:a": (True, []),`

			`"1 - (a + b)": (True, []),`
			`"a + b - (a + b)": (True, []),`

			`"a * b": (True, ["a", "b", ("a", "b")]),`
			`"a * b * a": (True, ["a", "b", ("a", "b")]),`
			`"a * (b + c)": (True, ["a", "b", "c", ("a", "b"), ("a", "c")]),`
			`"(a + b) * c": (True, ["a", "b", "c", ("a", "c"), ("b", "c")]),`
			`"a * (b - c)": (True, ["a", "b", ("a", "b")]),`
			`"c + a:c + a * (b - c)": (True, ["c", ("a", "c"), "a", "b", ("a", "b")]),`
			`"(a - b) * c": (True, ["a", "c", ("a", "c")]),`
			`"b + b:c + (a - b) * c": (True, ["b", ("b", "c"), "a", "c", ("a", "c")]),`

			`"a/b": (True, ["a", ("a", "b")]),`
			`"(a + b)/c": (True, ["a", "b", ("a", "b", "c")]),`
			`"b + b:c + (a - b)/c": (True, ["b", ("b", "c"), "a", ("a", "c")]),`
			`"a/(b + c)": (True, ["a", ("a", "b"), ("a", "c")]),`

			`"a ** 2": (True, ["a"]),`
			`"(a + b + c + d) ** 2": (True, ["a", "b", "c", "d",`
			`("a", "b"), ("a", "c"), ("a", "d"),`
			`("b", "c"), ("b", "d"), ("c", "d")]),`
			`"(a + b + c + d) ** 3": (True, ["a", "b", "c", "d",`
			`("a", "b"), ("a", "c"), ("a", "d"),`
			`("b", "c"), ("b", "d"), ("c", "d"),`
			`("a", "b", "c"), ("a", "b", "d"),`
			`("a", "c", "d"), ("b", "c", "d")]),`

			`"a + +a": (True, ["a"]),`

			`"~ a + b": (True, ["a", "b"]),`
			`"~ a*b": (True, ["a", "b", ("a", "b")]),`
			`"~ a*b + 0": (False, ["a", "b", ("a", "b")]),`
			`"~ -1": (False, []),`

			`"0 ~ a + b": (True, ["a", "b"]),`
			`"1 ~ a + b": (True, [], True, ["a", "b"]),`
			`"y ~ a + b": (False, ["y"], True, ["a", "b"]),`
			`"0 + y ~ a + b": (False, ["y"], True, ["a", "b"]),`
			`"0 + y * z ~ a + b": (False, ["y", "z", ("y", "z")], True, ["a", "b"]),`
			`"-1 ~ 1": (False, [], True, []),`
			`"1 + y ~ a + b": (True, ["y"], True, ["a", "b"]),`

			`# Check precedence:`
			`"a + b * c": (True, ["a", "b", "c", ("b", "c")]),`
			`"a * b + c": (True, ["a", "b", ("a", "b"), "c"]),`
			`"a * b - a": (True, ["b", ("a", "b")]),`
			`"a + b / c": (True, ["a", "b", ("b", "c")]),`
			`"a / b + c": (True, ["a", ("a", "b"), "c"]),`
			`"a*b:c": (True, ["a", ("b", "c"), ("a", "b", "c")]),`
			`"a:b*c": (True, [("a", "b"), "c", ("a", "b", "c")]),`

			`# Intercept handling:`
			`"~ 1 + 1 + 0 + 1": (True, []),`
			`"~ 0 + 1 + 0": (False, []),`
			`"~ 0 - 1 - 1 + 0 + 1": (True, []),`
			`"~ 1 - 1": (False, []),`
			`"~ 0 + a + 1": (True, ["a"]),`
			`"~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising!`
			`"~ 0 + (a + 1)": (True, ["a"]), # Also correct!`
			`"~ 1 - (a + 1)": (False, []),`
			`}`

			`# <> mark off where the error should be reported:`
			`_eval_error_tests = [`
			`"a <+>",`
			`"a + <(>",`

			`"b + <(-a)>",`

			`"a:<1>",`
			`"(a + <1>)*b",`

			`"a + <2>",`
			`"a + <1.0>",`
			`# eh, catching this is a hassle, we'll just leave the user some rope if`
			`# they really want it:`
			`#"a + <0x1>",`

			`"a ** <b>",`
			`"a ** <(1 + 1)>",`
			`"a ** <1.5>",`

			`"a + b <# asdf>",`

			`"<)>",`
			`"a + <)>",`
			`"<*> a",`
			`"a + <*>",`

			`"a + <foo[bar>",`
			`"a + <foo{bar>",`
			`"a + <foo(bar>",`

			`"a + <[bar>",`
			`"a + <{bar>",`

			`"a + <{bar[]>",`

			`"a + foo<]>bar",`
			`"a + foo[]<]>bar",`
			`"a + foo{}<}>bar",`
			`"a + foo<)>bar",`

			`"a + b<)>",`
			`"(a) <.>",`

			`"<(>a + b",`

			`"<y ~ a> ~ b",`
			`"y ~ <(a ~ b)>",`
			`"<~ a> ~ b",`
			`"~ <(a ~ b)>",`

			`"1 + <-(a + b)>",`

			`"<- a>",`
			`"a + <-a**2>",`
			`]`

			`def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover`
			`if expected_intercept:`
			`expecteds = [()] + expecteds`
			`assert len(terms) == len(expecteds)`
			`for term, expected in zip(terms, expecteds):`
			`if isinstance(term, Term):`
			`if isinstance(expected, str):`
			`expected = (expected,)`
			`assert term.factors == tuple([EvalFactor(s) for s in expected])`
			`else:`
			`assert term == expected`

			`def _do_eval_formula_tests(tests): # pragma: no cover`
			`for code, result in six.iteritems(tests):`
			`if len(result) == 2:`
			`result = (False, []) + result`
			`model_desc = ModelDesc.from_formula(code)`
			`print(repr(code))`
			`print(result)`
			`print(model_desc)`
			`lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result`
			`_assert_terms_match(model_desc.lhs_termlist,`
			`lhs_intercept, lhs_termlist)`
			`_assert_terms_match(model_desc.rhs_termlist,`
			`rhs_intercept, rhs_termlist)`

			`def test_eval_formula():`
			`_do_eval_formula_tests(_eval_tests)`

			`def test_eval_formula_error_reporting():`
			`from patsy.parse_formula import _parsing_error_test`
			`parse_fn = lambda formula: ModelDesc.from_formula(formula)`
			`_parsing_error_test(parse_fn, _eval_error_tests)`

			`def test_formula_factor_origin():`
			`from patsy.origin import Origin`
			`desc = ModelDesc.from_formula("a + b")`
			`assert (desc.rhs_termlist[1].factors[0].origin`
			`== Origin("a + b", 0, 1))`
			`assert (desc.rhs_termlist[2].factors[0].origin`
			`== Origin("a + b", 4, 5))`