"""
Spline and other smoother classes for Generalized Additive Models

Author: Luca Puggini
Author: Josef Perktold

Created on Fri Jun  5 16:32:00 2015
"""

# import useful only for development
from abc import ABCMeta, abstractmethod
from statsmodels.compat.python import with_metaclass

import numpy as np
import pandas as pd
from patsy import dmatrix
from patsy.mgcv_cubic_splines import _get_all_sorted_knots

from statsmodels.tools.linalg import transf_constraints


# Obtain b splines from patsy

def _equally_spaced_knots(x, df):
    n_knots = df - 2
    x_min = x.min()
    x_max = x.max()
    knots = np.linspace(x_min, x_max, n_knots)
    return knots


def _R_compat_quantile(x, probs):
    # return np.percentile(x, 100 * np.asarray(probs))
    probs = np.asarray(probs)
    quantiles = np.asarray([np.percentile(x, 100 * prob)
                            for prob in probs.ravel(order="C")])
    return quantiles.reshape(probs.shape, order="C")


# FIXME: is this copy/pasted?  If so, why do we need it?  If not, get
#  rid of the try/except for scipy import
# from patsy splines.py
def _eval_bspline_basis(x, knots, degree, deriv='all', include_intercept=True):
    try:
        from scipy.interpolate import splev
    except ImportError:
        raise ImportError("spline functionality requires scipy")
    # 'knots' are assumed to be already pre-processed. E.g. usually you
    # want to include duplicate copies of boundary knots; you should do
    # that *before* calling this constructor.
    knots = np.atleast_1d(np.asarray(knots, dtype=float))
    assert knots.ndim == 1
    knots.sort()
    degree = int(degree)
    x = np.atleast_1d(x)
    if x.ndim == 2 and x.shape[1] == 1:
        x = x[:, 0]
    assert x.ndim == 1
    # XX FIXME: when points fall outside of the boundaries, splev and R seem
    # to handle them differently. I do not know why yet. So until we understand
    # this and decide what to do with it, I'm going to play it safe and
    # disallow such points.
    if np.min(x) < np.min(knots) or np.max(x) > np.max(knots):
        raise NotImplementedError("some data points fall outside the "
                                  "outermost knots, and I'm not sure how "
                                  "to handle them. (Patches accepted!)")
    # Thanks to Charles Harris for explaining splev. It's not well
    # documented, but basically it computes an arbitrary b-spline basis
    # given knots and degree on some specificed points (or derivatives
    # thereof, but we do not use that functionality), and then returns some
    # linear combination of these basis functions. To get out the basis
    # functions themselves, we use linear combinations like [1, 0, 0], [0,
    # 1, 0], [0, 0, 1].
    # NB: This probably makes it rather inefficient (though I have not checked
    # to be sure -- maybe the fortran code actually skips computing the basis
    # function for coefficients that are zero).
    # Note: the order of a spline is the same as its degree + 1.
    # Note: there are (len(knots) - order) basis functions.

    k_const = 1 - int(include_intercept)
    n_bases = len(knots) - (degree + 1) - k_const
    if deriv in ['all', 0]:
        basis = np.empty((x.shape[0], n_bases), dtype=float)
        ret = basis
    if deriv in ['all', 1]:
        der1_basis = np.empty((x.shape[0], n_bases), dtype=float)
        ret = der1_basis
    if deriv in ['all', 2]:
        der2_basis = np.empty((x.shape[0], n_bases), dtype=float)
        ret = der2_basis

    for i in range(n_bases):
        coefs = np.zeros((n_bases + k_const,))
        # we are skipping the first column of the basis to drop constant
        coefs[i + k_const] = 1
        ii = i
        if deriv in ['all', 0]:
            basis[:, ii] = splev(x, (knots, coefs, degree))
        if deriv in ['all', 1]:
            der1_basis[:, ii] = splev(x, (knots, coefs, degree), der=1)
        if deriv in ['all', 2]:
            der2_basis[:, ii] = splev(x, (knots, coefs, degree), der=2)

    if deriv == 'all':
        return basis, der1_basis, der2_basis
    else:
        return ret


def compute_all_knots(x, df, degree):
    order = degree + 1
    n_inner_knots = df - order
    lower_bound = np.min(x)
    upper_bound = np.max(x)
    knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
    inner_knots = _R_compat_quantile(x, knot_quantiles)
    all_knots = np.concatenate(([lower_bound, upper_bound] * order,
                                inner_knots))
    return all_knots, lower_bound, upper_bound, inner_knots


def make_bsplines_basis(x, df, degree):
    ''' make a spline basis for x '''

    all_knots, _, _, _ = compute_all_knots(x, df, degree)
    basis, der_basis, der2_basis = _eval_bspline_basis(x, all_knots, degree)
    return basis, der_basis, der2_basis


def get_knots_bsplines(x=None, df=None, knots=None, degree=3,
                       spacing='quantile', lower_bound=None,
                       upper_bound=None, all_knots=None):
    """knots for use in B-splines

    There are two main options for the knot placement

    - quantile spacing with multiplicity of boundary knots
    - equal spacing extended to boundary or exterior knots

    The first corresponds to splines as used by patsy. the second is the
    knot spacing for P-Splines.
    """
    # based on patsy memorize_finish
    if all_knots is not None:
        return all_knots

    x_min = x.min()
    x_max = x.max()

    if degree < 0:
        raise ValueError("degree must be greater than 0 (not %r)"
                         % (degree,))
    if int(degree) != degree:
        raise ValueError("degree must be an integer (not %r)"
                         % (degree,))

    # These are guaranteed to all be 1d vectors by the code above
    # x = np.concatenate(tmp["xs"])
    if df is None and knots is None:
        raise ValueError("must specify either df or knots")
    order = degree + 1
    if df is not None:
        n_inner_knots = df - order
        if n_inner_knots < 0:
            raise ValueError("df=%r is too small for degree=%r; must be >= %s"
                             % (df, degree,
                                # We know that n_inner_knots is negative;
                                # if df were that much larger, it would
                                # have been zero, and things would work.
                                df - n_inner_knots))
        if knots is not None:
            if len(knots) != n_inner_knots:
                raise ValueError("df=%s with degree=%r implies %s knots, "
                                 "but %s knots were provided"
                                 % (df, degree,
                                    n_inner_knots, len(knots)))
        elif spacing == 'quantile':
            # Need to compute inner knots
            knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
            inner_knots = _R_compat_quantile(x, knot_quantiles)
        elif spacing == 'equal':
            # Need to compute inner knots
            grid = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
            inner_knots = x_min + grid * (x_max - x_min)
            diff_knots = inner_knots[1] - inner_knots[0]
        else:
            raise ValueError("incorrect option for spacing")
    if knots is not None:
        inner_knots = knots
    if lower_bound is None:
        lower_bound = np.min(x)
    if upper_bound is None:
        upper_bound = np.max(x)

    if lower_bound > upper_bound:
        raise ValueError("lower_bound > upper_bound (%r > %r)"
                         % (lower_bound, upper_bound))
    inner_knots = np.asarray(inner_knots)
    if inner_knots.ndim > 1:
        raise ValueError("knots must be 1 dimensional")
    if np.any(inner_knots < lower_bound):
        raise ValueError("some knot values (%s) fall below lower bound "
                         "(%r)"
                         % (inner_knots[inner_knots < lower_bound],
                            lower_bound))
    if np.any(inner_knots > upper_bound):
        raise ValueError("some knot values (%s) fall above upper bound "
                         "(%r)"
                         % (inner_knots[inner_knots > upper_bound],
                            upper_bound))

    if spacing == "equal":
        diffs = np.arange(1, order + 1) * diff_knots
        lower_knots = inner_knots[0] - diffs[::-1]
        upper_knots = inner_knots[-1] + diffs
        all_knots = np.concatenate((lower_knots, inner_knots, upper_knots))
    else:
        all_knots = np.concatenate(([lower_bound, upper_bound] * order,
                                    inner_knots))
    all_knots.sort()

    return all_knots


def _get_integration_points(knots, k_points=3):
    """add points to each subinterval defined by knots

    inserts k_points between each two consecutive knots
    """
    k_points = k_points + 1
    knots = np.unique(knots)
    dxi = np.arange(k_points) / k_points
    dxk = np.diff(knots)
    dx = dxk[:, None] * dxi
    x = np.concatenate(((knots[:-1, None] + dx).ravel(), [knots[-1]]))
    return x


def get_covder2(smoother, k_points=3, integration_points=None,
                skip_ctransf=False, deriv=2):
    """
    Approximate integral of cross product of second derivative of smoother

    This uses scipy.integrate simps to compute an approximation to the
    integral of the smoother derivative cross-product at knots plus k_points
    in between knots.
    """
    try:
        from scipy.integrate import simpson
    except ImportError:
        # Remove after SciPy 1.7 is the minimum version
        from scipy.integrate import simps as simpson
    knots = smoother.knots
    if integration_points is None:
        x = _get_integration_points(knots, k_points=k_points)
    else:
        x = integration_points
    d2 = smoother.transform(x, deriv=deriv, skip_ctransf=skip_ctransf)
    covd2 = simpson(d2[:, :, None] * d2[:, None, :], x=x, axis=0)
    return covd2


# TODO: this function should be deleted
def make_poly_basis(x, degree, intercept=True):
    '''
    given a vector x returns poly=(1, x, x^2, ..., x^degree)
    and its first and second derivative
    '''

    if intercept:
        start = 0
    else:
        start = 1

    nobs = len(x)
    basis = np.zeros(shape=(nobs, degree + 1 - start))
    der_basis = np.zeros(shape=(nobs, degree + 1 - start))
    der2_basis = np.zeros(shape=(nobs, degree + 1 - start))

    for i in range(start, degree + 1):
        basis[:, i - start] = x ** i
        der_basis[:, i - start] = i * x ** (i - 1)
        der2_basis[:, i - start] = i * (i - 1) * x ** (i - 2)

    return basis, der_basis, der2_basis


# TODO: try to include other kinds of splines from patsy
# x = np.linspace(0, 1, 30)
# df = 10
# degree = 3
# from patsy.mgcv_cubic_splines import cc, cr, te
# all_knots, lower, upper, inner  = compute_all_knots(x, df, degree)
# result = cc(x, df=df, knots=all_knots, lower_bound=lower, upper_bound=upper,
#             constraints=None)
#
# import matplotlib.pyplot as plt
#
# result = np.array(result)
# print(result.shape)
# plt.plot(result.T)
# plt.show()

class UnivariateGamSmoother(with_metaclass(ABCMeta)):
    """Base Class for single smooth component
    """
    def __init__(self, x, constraints=None, variable_name='x'):
        self.x = x
        self.constraints = constraints
        self.variable_name = variable_name
        self.nobs, self.k_variables = len(x), 1

        base4 = self._smooth_basis_for_single_variable()
        if constraints == 'center':
            constraints = base4[0].mean(0)[None, :]

        if constraints is not None and not isinstance(constraints, str):
            ctransf = transf_constraints(constraints)
            self.ctransf = ctransf
        else:
            # subclasses might set ctransf directly
            # only used if constraints is None
            if not hasattr(self, 'ctransf'):
                self.ctransf = None

        self.basis, self.der_basis, self.der2_basis, self.cov_der2 = base4
        if self.ctransf is not None:
            ctransf = self.ctransf
            # transform attributes that are not None
            if base4[0] is not None:
                self.basis = base4[0].dot(ctransf)
            if base4[1] is not None:
                self.der_basis = base4[1].dot(ctransf)
            if base4[2] is not None:
                self.der2_basis = base4[2].dot(ctransf)
            if base4[3] is not None:
                self.cov_der2 = ctransf.T.dot(base4[3]).dot(ctransf)

        self.dim_basis = self.basis.shape[1]
        self.col_names = [self.variable_name + "_s" + str(i)
                          for i in range(self.dim_basis)]

    @abstractmethod
    def _smooth_basis_for_single_variable(self):
        return


class UnivariateGenericSmoother(UnivariateGamSmoother):
    """Generic single smooth component
    """
    def __init__(self, x, basis, der_basis, der2_basis, cov_der2,
                 variable_name='x'):
        self.basis = basis
        self.der_basis = der_basis
        self.der2_basis = der2_basis
        self.cov_der2 = cov_der2

        super().__init__(x, variable_name=variable_name)

    def _smooth_basis_for_single_variable(self):
        return self.basis, self.der_basis, self.der2_basis, self.cov_der2


class UnivariatePolynomialSmoother(UnivariateGamSmoother):
    """polynomial single smooth component
    """
    def __init__(self, x, degree, variable_name='x'):
        self.degree = degree
        super().__init__(x, variable_name=variable_name)

    def _smooth_basis_for_single_variable(self):
        # TODO: unclear description
        """
        given a vector x returns poly=(1, x, x^2, ..., x^degree)
        and its first and second derivative
        """

        basis = np.zeros(shape=(self.nobs, self.degree))
        der_basis = np.zeros(shape=(self.nobs, self.degree))
        der2_basis = np.zeros(shape=(self.nobs, self.degree))
        for i in range(self.degree):
            dg = i + 1
            basis[:, i] = self.x ** dg
            der_basis[:, i] = dg * self.x ** (dg - 1)
            if dg > 1:
                der2_basis[:, i] = dg * (dg - 1) * self.x ** (dg - 2)
            else:
                der2_basis[:, i] = 0

        cov_der2 = np.dot(der2_basis.T, der2_basis)

        return basis, der_basis, der2_basis, cov_der2


class UnivariateBSplines(UnivariateGamSmoother):
    """B-Spline single smooth component

    This creates and holds the B-Spline basis function for one
    component.

    Parameters
    ----------
    x : ndarray, 1-D
        underlying explanatory variable for smooth terms.
    df : int
        number of basis functions or degrees of freedom
    degree : int
        degree of the spline
    include_intercept : bool
        If False, then the basis functions are transformed so that they
        do not include a constant. This avoids perfect collinearity if
        a constant or several components are included in the model.
    constraints : {None, str, array}
        Constraints are used to transform the basis functions to satisfy
        those constraints.
        `constraints = 'center'` applies a linear transform to remove the
        constant and center the basis functions.
    variable_name : {None, str}
        The name for the underlying explanatory variable, x, used in for
        creating the column and parameter names for the basis functions.
    covder2_kwds : {None, dict}
        options for computing the penalty matrix from the second derivative
        of the spline.
    knot_kwds : {None, list[dict]}
        option for the knot selection.
        By default knots are selected in the same way as in patsy, however the
        number of knots is independent of keeping or removing the constant.
        Interior knot selection is based on quantiles of the data and is the
        same in patsy and mgcv. Boundary points are at the limits of the data
        range.
        The available options use with `get_knots_bsplines` are

        - knots : None or array
          interior knots
        - spacing : 'quantile' or 'equal'
        - lower_bound : None or float
          location of lower boundary knots, all boundary knots are at the same
          point
        - upper_bound : None or float
          location of upper boundary knots, all boundary knots are at the same
          point
        - all_knots : None or array
          If all knots are provided, then those will be taken as given and
          all other options will be ignored.
    """
    def __init__(self, x, df, degree=3, include_intercept=False,
                 constraints=None, variable_name='x',
                 covder2_kwds=None, **knot_kwds):
        self.degree = degree
        self.df = df
        self.include_intercept = include_intercept
        self.knots = get_knots_bsplines(x, degree=degree, df=df, **knot_kwds)
        self.covder2_kwds = (covder2_kwds if covder2_kwds is not None
                             else {})
        super().__init__(
            x, constraints=constraints, variable_name=variable_name
        )

    def _smooth_basis_for_single_variable(self):
        basis, der_basis, der2_basis = _eval_bspline_basis(
            self.x, self.knots, self.degree,
            include_intercept=self.include_intercept)
        # cov_der2 = np.dot(der2_basis.T, der2_basis)

        cov_der2 = get_covder2(self, skip_ctransf=True,
                               **self.covder2_kwds)

        return basis, der_basis, der2_basis, cov_der2

    def transform(self, x_new, deriv=0, skip_ctransf=False):
        """create the spline basis for new observations

        The main use of this stateful transformation is for prediction
        using the same specification of the spline basis.

        Parameters
        ----------
        x_new : ndarray
            observations of the underlying explanatory variable
        deriv : int
            which derivative of the spline basis to compute
            This is an options for internal computation.
        skip_ctransf : bool
            whether to skip the constraint transform
            This is an options for internal computation.

        Returns
        -------
        basis : ndarray
            design matrix for the spline basis for given ``x_new``
        """

        if x_new is None:
            x_new = self.x
        exog = _eval_bspline_basis(x_new, self.knots, self.degree,
                                   deriv=deriv,
                                   include_intercept=self.include_intercept)

        # ctransf does not exist yet when cov_der2 is computed
        ctransf = getattr(self, 'ctransf', None)
        if ctransf is not None and not skip_ctransf:
            exog = exog.dot(self.ctransf)
        return exog


class UnivariateCubicSplines(UnivariateGamSmoother):
    """Cubic Spline single smooth component

    Cubic splines as described in the wood's book in chapter 3
    """

    def __init__(self, x, df, constraints=None, transform='domain',
                 variable_name='x'):

        self.degree = 3
        self.df = df
        self.transform_data_method = transform

        self.x = x = self.transform_data(x, initialize=True)
        self.knots = _equally_spaced_knots(x, df)
        super().__init__(
            x, constraints=constraints, variable_name=variable_name
        )

    def transform_data(self, x, initialize=False):
        tm = self.transform_data_method
        if tm is None:
            return x

        if initialize is True:
            if tm == 'domain':
                self.domain_low = x.min(0)
                self.domain_upp = x.max(0)
            elif isinstance(tm, tuple):
                self.domain_low = tm[0]
                self.domain_upp = tm[1]
                self.transform_data_method = 'domain'
            else:
                raise ValueError("transform should be None, 'domain' "
                                 "or a tuple")
            self.domain_diff = self.domain_upp - self.domain_low

        if self.transform_data_method == 'domain':
            x = (x - self.domain_low) / self.domain_diff
            return x
        else:
            raise ValueError("incorrect transform_data_method")

    def _smooth_basis_for_single_variable(self):

        basis = self._splines_x()[:, :-1]
        # demean except for constant, does not affect derivatives
        if not self.constraints == 'none':
            self.transf_mean = basis[:, 1:].mean(0)
            basis[:, 1:] -= self.transf_mean
        else:
            self.transf_mean = np.zeros(basis.shape[1])
        s = self._splines_s()[:-1, :-1]
        if not self.constraints == 'none':
            ctransf = np.diag(1/np.max(np.abs(basis), axis=0))
        else:
            ctransf = np.eye(basis.shape[1])
        # use np.eye to avoid rescaling
        # ctransf = np.eye(basis.shape[1])

        if self.constraints == 'no-const':
            ctransf = ctransf[1:]

        self.ctransf = ctransf

        return basis, None, None, s

    def _rk(self, x, z):
        p1 = ((z - 1 / 2) ** 2 - 1 / 12) * ((x - 1 / 2) ** 2 - 1 / 12) / 4
        p2 = ((np.abs(z - x) - 1 / 2) ** 4 -
              1 / 2 * (np.abs(z - x) - 1 / 2) ** 2 +
              7 / 240) / 24.
        return p1 - p2

    def _splines_x(self, x=None):
        if x is None:
            x = self.x
        n_columns = len(self.knots) + 2
        nobs = x.shape[0]
        basis = np.ones(shape=(nobs, n_columns))
        basis[:, 1] = x
        # for loop equivalent to outer(x, xk, fun=rk)
        for i, xi in enumerate(x):
            for j, xkj in enumerate(self.knots):
                s_ij = self._rk(xi, xkj)
                basis[i, j + 2] = s_ij
        return basis

    def _splines_s(self):
        q = len(self.knots) + 2
        s = np.zeros(shape=(q, q))
        for i, x1 in enumerate(self.knots):
            for j, x2 in enumerate(self.knots):
                s[i + 2, j + 2] = self._rk(x1, x2)
        return s

    def transform(self, x_new):
        x_new = self.transform_data(x_new, initialize=False)
        exog = self._splines_x(x_new)
        exog[:, 1:] -= self.transf_mean
        if self.ctransf is not None:
            exog = exog.dot(self.ctransf)
        return exog


class UnivariateCubicCyclicSplines(UnivariateGamSmoother):
    """cyclic cubic regression spline single smooth component

    This creates and holds the Cyclic CubicSpline basis function for one
    component.

    Parameters
    ----------
    x : ndarray, 1-D
        underlying explanatory variable for smooth terms.
    df : int
        number of basis functions or degrees of freedom
    degree : int
        degree of the spline
    include_intercept : bool
        If False, then the basis functions are transformed so that they
        do not include a constant. This avoids perfect collinearity if
        a constant or several components are included in the model.
    constraints : {None, str, array}
        Constraints are used to transform the basis functions to satisfy
        those constraints.
        `constraints = 'center'` applies a linear transform to remove the
        constant and center the basis functions.
    variable_name : None or str
        The name for the underlying explanatory variable, x, used in for
        creating the column and parameter names for the basis functions.
    """
    def __init__(self, x, df, constraints=None, variable_name='x'):
        self.degree = 3
        self.df = df
        self.x = x
        self.knots = _equally_spaced_knots(x, df)
        super().__init__(
            x, constraints=constraints, variable_name=variable_name
        )

    def _smooth_basis_for_single_variable(self):
        basis = dmatrix("cc(x, df=" + str(self.df) + ") - 1", {"x": self.x})
        self.design_info = basis.design_info
        n_inner_knots = self.df - 2 + 1  # +n_constraints
        # TODO: from CubicRegressionSplines class
        all_knots = _get_all_sorted_knots(self.x, n_inner_knots=n_inner_knots,
                                          inner_knots=None,
                                          lower_bound=None, upper_bound=None)

        b, d = self._get_b_and_d(all_knots)
        s = self._get_s(b, d)

        return basis, None, None, s

    def _get_b_and_d(self, knots):
        """Returns mapping of cyclic cubic spline values to 2nd derivatives.

        .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006,
           pp 146-147

        Parameters
        ----------
        knots : ndarray
            The 1-d array knots used for cubic spline parametrization,
            must be sorted in ascending order.

        Returns
        -------
        b : ndarray
            Array for mapping cyclic cubic spline values at knots to
            second derivatives.
        d : ndarray
            Array for mapping cyclic cubic spline values at knots to
            second derivatives.

        Notes
        -----
        The penalty matrix is equal to ``s = d.T.dot(b^-1).dot(d)``
        """
        h = knots[1:] - knots[:-1]
        n = knots.size - 1

        # b and d are defined such that the penalty matrix is equivalent to:
        # s = d.T.dot(b^-1).dot(d)
        # reference in particular to pag 146 of Wood's book
        b = np.zeros((n, n))  # the b matrix on page 146 of Wood's book
        d = np.zeros((n, n))  # the d matrix on page 146 of Wood's book

        b[0, 0] = (h[n - 1] + h[0]) / 3.
        b[0, n - 1] = h[n - 1] / 6.
        b[n - 1, 0] = h[n - 1] / 6.

        d[0, 0] = -1. / h[0] - 1. / h[n - 1]
        d[0, n - 1] = 1. / h[n - 1]
        d[n - 1, 0] = 1. / h[n - 1]

        for i in range(1, n):
            b[i, i] = (h[i - 1] + h[i]) / 3.
            b[i, i - 1] = h[i - 1] / 6.
            b[i - 1, i] = h[i - 1] / 6.

            d[i, i] = -1. / h[i - 1] - 1. / h[i]
            d[i, i - 1] = 1. / h[i - 1]
            d[i - 1, i] = 1. / h[i - 1]

        return b, d

    def _get_s(self, b, d):
        return d.T.dot(np.linalg.inv(b)).dot(d)

    def transform(self, x_new):
        exog = dmatrix(self.design_info, {"x": x_new})
        if self.ctransf is not None:
            exog = exog.dot(self.ctransf)
        return exog


class AdditiveGamSmoother(with_metaclass(ABCMeta)):
    """Base class for additive smooth components
    """
    def __init__(self, x, variable_names=None, include_intercept=False,
                 **kwargs):

        # get pandas names before using asarray
        if isinstance(x, pd.DataFrame):
            data_names = x.columns.tolist()
        elif isinstance(x, pd.Series):
            data_names = [x.name]
        else:
            data_names = None

        x = np.asarray(x)

        if x.ndim == 1:
            self.x = x.copy()
            self.x.shape = (len(x), 1)
        else:
            self.x = x

        self.nobs, self.k_variables = self.x.shape
        if isinstance(include_intercept, bool):
            self.include_intercept = [include_intercept] * self.k_variables
        else:
            self.include_intercept = include_intercept

        if variable_names is None:
            if data_names is not None:
                self.variable_names = data_names
            else:
                self.variable_names = ['x' + str(i)
                                       for i in range(self.k_variables)]
        else:
            self.variable_names = variable_names

        self.smoothers = self._make_smoothers_list()
        self.basis = np.hstack(list(smoother.basis
                               for smoother in self.smoothers))
        self.dim_basis = self.basis.shape[1]
        self.penalty_matrices = [smoother.cov_der2
                                 for smoother in self.smoothers]
        self.col_names = []
        for smoother in self.smoothers:
            self.col_names.extend(smoother.col_names)

        self.mask = []
        last_column = 0
        for smoother in self.smoothers:
            mask = np.array([False] * self.dim_basis)
            mask[last_column:smoother.dim_basis + last_column] = True
            last_column = last_column + smoother.dim_basis
            self.mask.append(mask)

    @abstractmethod
    def _make_smoothers_list(self):
        pass

    def transform(self, x_new):
        """create the spline basis for new observations

        The main use of this stateful transformation is for prediction
        using the same specification of the spline basis.

        Parameters
        ----------
        x_new: ndarray
            observations of the underlying explanatory variable

        Returns
        -------
        basis : ndarray
            design matrix for the spline basis for given ``x_new``.
        """
        if x_new.ndim == 1 and self.k_variables == 1:
            x_new = x_new.reshape(-1, 1)
        exog = np.hstack(list(self.smoothers[i].transform(x_new[:, i])
                         for i in range(self.k_variables)))
        return exog


class GenericSmoothers(AdditiveGamSmoother):
    """generic class for additive smooth components for GAM
    """
    def __init__(self, x, smoothers):
        self.smoothers = smoothers
        super().__init__(x, variable_names=None)

    def _make_smoothers_list(self):
        return self.smoothers


class PolynomialSmoother(AdditiveGamSmoother):
    """additive polynomial components for GAM
    """
    def __init__(self, x, degrees, variable_names=None):
        self.degrees = degrees
        super().__init__(x, variable_names=variable_names)

    def _make_smoothers_list(self):
        smoothers = []
        for v in range(self.k_variables):
            uv_smoother = UnivariatePolynomialSmoother(
                self.x[:, v],
                degree=self.degrees[v],
                variable_name=self.variable_names[v])
            smoothers.append(uv_smoother)
        return smoothers


class BSplines(AdditiveGamSmoother):
    """additive smooth components using B-Splines

    This creates and holds the B-Spline basis function for several
    components.

    Parameters
    ----------
    x : array_like, 1-D or 2-D
        underlying explanatory variable for smooth terms.
        If 2-dimensional, then observations should be in rows and
        explanatory variables in columns.
    df :  {int, array_like[int]}
        number of basis functions or degrees of freedom; should be equal
        in length to the number of columns of `x`; may be an integer if
        `x` has one column or is 1-D.
    degree : {int, array_like[int]}
        degree(s) of the spline; the same length and type rules apply as
        to `df`
    include_intercept : bool
        If False, then the basis functions are transformed so that they
        do not include a constant. This avoids perfect collinearity if
        a constant or several components are included in the model.
    constraints : {None, str, array}
        Constraints are used to transform the basis functions to satisfy
        those constraints.
        `constraints = 'center'` applies a linear transform to remove the
        constant and center the basis functions.
    variable_names : {list[str], None}
        The names for the underlying explanatory variables, x used in for
        creating the column and parameter names for the basis functions.
        If ``x`` is a pandas object, then the names will be taken from it.
    knot_kwds : None or list of dict
        option for the knot selection.
        By default knots are selected in the same way as in patsy, however the
        number of knots is independent of keeping or removing the constant.
        Interior knot selection is based on quantiles of the data and is the
        same in patsy and mgcv. Boundary points are at the limits of the data
        range.
        The available options use with `get_knots_bsplines` are

        - knots : None or array
          interior knots
        - spacing : 'quantile' or 'equal'
        - lower_bound : None or float
          location of lower boundary knots, all boundary knots are at the same
          point
        - upper_bound : None or float
          location of upper boundary knots, all boundary knots are at the same
          point
        - all_knots : None or array
          If all knots are provided, then those will be taken as given and
          all other options will be ignored.


    Attributes
    ----------
    smoothers : list of univariate smooth component instances
    basis : design matrix, array of spline bases columns for all components
    penalty_matrices : list of penalty matrices, one for each smooth term
    dim_basis : number of columns in the basis
    k_variables : number of smooth components
    col_names : created names for the basis columns

    There are additional attributes about the specification of the splines
    and some attributes mainly for internal use.

    Notes
    -----
    A constant in the spline basis function can be removed in two different
    ways.
    The first is by dropping one basis column and normalizing the
    remaining columns. This is obtained by the default
    ``include_intercept=False, constraints=None``
    The second option is by using the centering transform which is a linear
    transformation of all basis functions. As a consequence of the
    transformation, the B-spline basis functions do not have locally bounded
    support anymore. This is obtained ``constraints='center'``. In this case
    ``include_intercept`` will be automatically set to True to avoid
    dropping an additional column.
    """
    def __init__(self, x, df, degree, include_intercept=False,
                 constraints=None, variable_names=None, knot_kwds=None):
        if isinstance(degree, int):
            self.degrees = np.array([degree], dtype=int)
        else:
            self.degrees = degree
        if isinstance(df, int):
            self.dfs = np.array([df], dtype=int)
        else:
            self.dfs = df
        self.knot_kwds = knot_kwds
        # TODO: move attaching constraints to super call
        self.constraints = constraints
        if constraints == 'center':
            include_intercept = True

        super().__init__(
            x,
            include_intercept=include_intercept,
            variable_names=variable_names
        )

    def _make_smoothers_list(self):
        smoothers = []
        for v in range(self.k_variables):
            kwds = self.knot_kwds[v] if self.knot_kwds else {}
            uv_smoother = UnivariateBSplines(
                self.x[:, v],
                df=self.dfs[v], degree=self.degrees[v],
                include_intercept=self.include_intercept[v],
                constraints=self.constraints,
                variable_name=self.variable_names[v], **kwds)
            smoothers.append(uv_smoother)

        return smoothers


class CubicSplines(AdditiveGamSmoother):
    """additive smooth components using cubic splines as in Wood 2006.

    Note, these splines do NOT use the same spline basis as
    ``Cubic Regression Splines``.
    """
    def __init__(self, x, df, constraints='center', transform='domain',
                 variable_names=None):
        self.dfs = df
        self.constraints = constraints
        self.transform = transform
        super().__init__(
            x, constraints=constraints, variable_names=variable_names
        )

    def _make_smoothers_list(self):
        smoothers = []
        for v in range(self.k_variables):
            uv_smoother = UnivariateCubicSplines(
                            self.x[:, v], df=self.dfs[v],
                            constraints=self.constraints,
                            transform=self.transform,
                            variable_name=self.variable_names[v])
            smoothers.append(uv_smoother)

        return smoothers


class CyclicCubicSplines(AdditiveGamSmoother):
    """additive smooth components using cyclic cubic regression splines

    This spline basis is the same as in patsy.

    Parameters
    ----------
    x : array_like, 1-D or 2-D
        underlying explanatory variable for smooth terms.
        If 2-dimensional, then observations should be in rows and
        explanatory variables in columns.
    df :  int
        numer of basis functions or degrees of freedom
    constraints : {None, str, array}
        Constraints are used to transform the basis functions to satisfy
        those constraints.
    variable_names : {list[str], None}
        The names for the underlying explanatory variables, x used in for
        creating the column and parameter names for the basis functions.
        If ``x`` is a pandas object, then the names will be taken from it.
    """
    def __init__(self, x, df, constraints=None, variable_names=None):
        self.dfs = df
        # TODO: move attaching constraints to super call
        self.constraints = constraints
        super().__init__(x, variable_names=variable_names)

    def _make_smoothers_list(self):
        smoothers = []
        for v in range(self.k_variables):
            uv_smoother = UnivariateCubicCyclicSplines(
                self.x[:, v],
                df=self.dfs[v], constraints=self.constraints,
                variable_name=self.variable_names[v])
            smoothers.append(uv_smoother)

        return smoothers

# class CubicRegressionSplines(BaseCubicSplines):
#     # TODO: this class is still not tested
#
#     def __init__(self, x, df=10):
#         import warnings
#         warnings.warn("This class is still not tested and it is probably"
#                       " not working properly. "
#                       "I suggest to use another smoother", Warning)
#
#         super(CubicRegressionSplines, self).__init__(x, df)
#
#         self.basis = dmatrix("cc(x, df=" + str(df) + ") - 1", {"x": x})
#         n_inner_knots = df - 2 + 1 # +n_constraints
#         # TODO: ACcording to CubicRegressionSplines class this should be
#         #  n_inner_knots = df - 2
#         all_knots = _get_all_sorted_knots(x, n_inner_knots=n_inner_knots,
#                                           inner_knots=None,
#                                           lower_bound=None, upper_bound=None)
#
#         b, d = self._get_b_and_d(all_knots)
#         self.s = self._get_s(b, d)
#
#         self.dim_basis = self.basis.shape[1]
#
#     def _get_b_and_d(self, knots):
#
#         h = knots[1:] - knots[:-1]
#         n = knots.size - 1
#
#         # b and d are defined such that the penalty matrix is equivalent to:
#         # s = d.T.dot(b^-1).dot(d)
#         # reference in particular to pag 146 of Wood's book
#         b = np.zeros((n, n)) # the b matrix on page 146 of Wood's book
#         d = np.zeros((n, n)) # the d matrix on page 146 of Wood's book
#
#         for i in range(n-2):
#             d[i, i] = 1/h[i]
#             d[i, i+1] = -1/h[i] - 1/h[i+1]
#             d[i, i+2] = 1/h[i+1]
#
#             b[i, i] = (h[i] + h[i+1])/3
#
#         for i in range(n-3):
#             b[i, i+1] = h[i+1]/6
#             b[i+1, i] = h[i+1]/6
#
#         return b, d
#
#     def _get_s(self, b, d):
#
#         return d.T.dot(np.linalg.pinv(b)).dot(d)