"""Treatment effect estimators

follows largely Stata's teffects in Stata 13 manual

Created on Tue Jun  9 22:45:23 2015

Author: Josef Perktold
License: BSD-3

currently available

                     ATE        POM_0        POM_1
res_ipw       230.688598  3172.774059  3403.462658
res_aipw     -230.989201  3403.355253  3172.366052
res_aipw_wls -227.195618  3403.250651  3176.055033
res_ra       -239.639211  3403.242272  3163.603060
res_ipwra    -229.967078  3403.335639  3173.368561


Lots of todos, just the beginning, but most effects are available but not
standard errors, and no code structure that has a useful pattern

see https://github.com/statsmodels/statsmodels/issues/2443

Note: script requires cattaneo2 data file from Stata 14, hardcoded file path
could be loaded with webuse

"""

import numpy as np
from statsmodels.compat.pandas import Substitution
from scipy.linalg import block_diag
from statsmodels.regression.linear_model import WLS
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.stats.contrast import ContrastResults
from statsmodels.tools.docstring import indent


def _mom_ate(params, endog, tind, prob, weighted=True):
    """moment condition for average treatment effect

    This does not include a moment condition for potential outcome mean (POM).

    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)
    if weighted:
        w0 /= w0.mean()
        w1 /= w1.mean()

    wdiff = w1 - w0

    return endog * wdiff - params


def _mom_atm(params, endog, tind, prob, weighted=True):
    """moment conditions for average treatment means (POM)

    moment conditions are POM0 and POM1
    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)
    if weighted:
        w1 /= w1.mean()
        w0 /= w0.mean()

    return np.column_stack((endog * w0 - params[0], endog * w1 - params[1]))


def _mom_ols(params, endog, tind, prob, weighted=True):
    """
    moment condition for average treatment mean based on OLS dummy regression

    moment conditions are POM0 and POM1

    """
    w = tind / prob + (1-tind) / (1 - prob)

    treat_ind = np.column_stack((1 - tind, tind))
    mom = (w * (endog - treat_ind.dot(params)))[:, None] * treat_ind

    return mom


def _mom_ols_te(tm, endog, tind, prob, weighted=True):
    """
    moment condition for average treatment mean based on OLS dummy regression

    first moment is ATE
    second moment is POM0  (control)

    """
    w = tind / prob + (1-tind) / (1 - prob)

    treat_ind = np.column_stack((tind, np.ones(len(tind))))
    mom = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind

    return mom


def _mom_olsex(params, model=None, exog=None, scale=None):
    exog = exog if exog is not None else model.exog
    fitted = model.predict(params, exog)
    resid = model.endog - fitted
    if scale is not None:
        resid /= scale
    mom = resid[:, None] * exog
    return mom


def ate_ipw(endog, tind, prob, weighted=True, probt=None):
    """average treatment effect based on basic inverse propensity weighting.

    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)

    if probt is not None:
        w1 *= probt
        w0 *= probt

    if weighted:
        w0 /= w0.mean()
        w1 /= w1.mean()

    wdiff = w1 - w0

    return (endog * wdiff).mean(), (endog * w0).mean(), (endog * w1).mean()


class _TEGMMGeneric1(GMM):
    """GMM class to get cov_params for treatment effects

    This combines moment conditions for the selection/treatment model and the
    outcome model to get the standard errors for the treatment effect that
    takes the first step estimation of the treatment model into account.

    this also matches standard errors of ATE and POM in Stata

    """

    def __init__(self, endog, res_select, mom_outcome, exclude_tmoms=False,
                 **kwargs):
        super().__init__(endog, None, None)
        self.results_select = res_select
        self.mom_outcome = mom_outcome
        self.exclude_tmoms = exclude_tmoms
        self.__dict__.update(kwargs)

        # add xnames so it's not None
        # we don't have exog in init in this version
        if self.data.xnames is None:
            self.data.xnames = []

        # need information about decomposition of parameters
        if exclude_tmoms:
            self.k_select = 0
        else:
            self.k_select = len(res_select.model.data.param_names)

        if exclude_tmoms:
            # fittedvalues is still linpred
            self.prob = self.results_select.predict()
        else:
            self.prob = None

    def momcond(self, params):
        k_outcome = len(params) - self.k_select
        tm = params[:k_outcome]
        p_tm = params[k_outcome:]

        tind = self.results_select.model.endog

        if self.exclude_tmoms:
            prob = self.prob
        else:
            prob = self.results_select.model.predict(p_tm)

        moms_list = []
        mom_o = self.mom_outcome(tm, self.endog, tind, prob, weighted=True)
        moms_list.append(mom_o)

        if not self.exclude_tmoms:
            mom_t = self.results_select.model.score_obs(p_tm)
            moms_list.append(mom_t)

        moms = np.column_stack(moms_list)
        return moms


class _TEGMM(GMM):
    """GMM class to get cov_params for treatment effects

    This combines moment conditions for the selection/treatment model and the
    outcome model to get the standard errors for the treatment effect that
    takes the first step estimation of the treatment model into account.

    this also matches standard errors of ATE and POM in Stata

    """

    def __init__(self, endog, res_select, mom_outcome):
        super().__init__(endog, None, None)
        self.results_select = res_select
        self.mom_outcome = mom_outcome

        # add xnames so it's not None
        # we don't have exog in init in this version
        if self.data.xnames is None:
            self.data.xnames = []

    def momcond(self, params):
        tm = params[:2]
        p_tm = params[2:]

        tind = self.results_select.model.endog
        prob = self.results_select.model.predict(p_tm)
        momt = self.mom_outcome(tm, self.endog, tind, prob)  # weighted=True)
        moms = np.column_stack((momt,
                                self.results_select.model.score_obs(p_tm)))
        return moms


class _IPWGMM(_TEGMMGeneric1):
    """ GMM for aipw treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        # Note: momcond in original order of observations
        ra = self.teff
        res_select = ra.results_select
        tind = ra.treatment
        endog = ra.model_pool.endog
        effect_group = self.effect_group

        tm = params[:2]
        ps = params[2:]

        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.01, 0.99)
        prob = prob_sel

        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = prob
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - prob
        elif isinstance(effect_group, np.ndarray):
            probt = probt
        else:
            raise ValueError("incorrect option for effect_group")

        w = tind / prob + (1 - tind) / (1 - prob)
        # Are we supposed to use scaled weights? doesn't cloesely match Stata
        # w1 = tind / prob
        # w2 = (1 - tind) / (1 - prob)
        # w = w1 / w1.sum() * tind.sum() + w2 / w2.sum() * (1 - tind).sum()
        if probt is not None:
            w *= probt

        treat_ind = np.column_stack((tind, np.ones(len(tind))))
        mm = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind

        mom_select = res_select.model.score_obs(ps)
        moms = np.column_stack((mm, mom_select))
        return moms


class _AIPWGMM(_TEGMMGeneric1):
    """ GMM for aipw treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[2*k+1:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped
        endog = ra.endog_grouped

        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.01, 0.99)

        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]
        prob = np.concatenate((prob0, prob1))

        # outcome models by treatment unweighted
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0)

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1)

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        tind = ra.treatment
        tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))
        correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
        correct1 = (endog - fitted1) / prob * tind

        tmean0 = fitted0 + correct0
        tmean1 = fitted1 + correct1
        ate = tmean1 - tmean0

        mm = ate - pm
        mpom = tmean0 - ppom
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


class _AIPWWLSGMM(_TEGMMGeneric1):
    """ GMM for aipw-wls treatment effect and potential outcome

    uses weighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[-6:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped
        endog = ra.endog_grouped

        # todo: need weights in outcome models
        prob_sel = np.asarray(res_select.model.predict(ps))

        prob_sel = np.clip(prob_sel, 0.001, 0.999)

        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]
        prob = np.concatenate((prob0, prob1))

        tind = 0
        ww0 = (1 - tind) / (1 - prob0) * ((1 - tind) / (1 - prob0) - 1)
        tind = 1
        ww1 = tind / prob1 * (tind / prob1 - 1)

        # outcome models by treatment using IPW weights
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0) * ww0[:, None]

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1) * ww1[:, None]

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        tind = ra.treatment
        tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))

        correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
        correct1 = (endog - fitted1) / prob * tind

        tmean0 = fitted0 + correct0
        tmean1 = fitted1 + correct1
        ate = tmean1 - tmean0

        mm = ate - pm
        mpom = tmean0 - ppom
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


class _RAGMM(_TEGMMGeneric1):
    """GMM for regression adjustment treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]
        p0 = params[1:k+1]
        p1 = params[-k:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped

        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0)

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1)

        momout = block_diag(mom0, mom1)

        mm = fitted1 - fitted0 - pm
        mpom = fitted0 - ppom
        mm = np.column_stack((mm, mpom))
        if self.probt is not None:
            mm *= (self.probt / self.probt.mean())[:, None]

        moms = np.column_stack((mm, momout))
        return moms


class _IPWRAGMM(_TEGMMGeneric1):
    """ GMM for ipwra treatment effect and potential outcome
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[-6:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model

        # use reordered exog so it matches sub models by group
        exog = ra.exog_grouped
        tind = np.zeros(len(treat_mask))
        tind[-treat_mask.sum():] = 1

        # selection probability by group, propensity score
        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.001, 0.999)
        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]

        effect_group = self.effect_group
        if effect_group == "all":
            w0 = 1 / (1 - prob0)
            w1 = 1 / prob1
            sind = 1
        elif effect_group in [1, "treated"]:
            w0 = prob0 / (1 - prob0)
            w1 = prob1 / prob1
            # for averaging effect on treated
            sind = tind / tind.mean()
        elif effect_group in [0, "untreated", "control"]:
            w0 = (1 - prob0) / (1 - prob0)
            w1 = (1 - prob1) / prob1

            sind = (1 - tind)
            sind /= sind.mean()
        else:
            raise ValueError("incorrect option for effect_group")

        # outcome models by treatment using IPW weights
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0) * w0[:, None]

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1) * w1[:, None]

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        mm = (fitted1 - fitted0 - pm) * sind
        mpom = (fitted0 - ppom) * sind
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


class TreatmentEffectResults(ContrastResults):
    """
    Results class for treatment effect estimation

    Parameters
    ----------
    teff : instance of TreatmentEffect class
    results_gmm : instance of GMMResults class
    method : string
        Method and estimator of treatment effect.
    kwds: dict
        Other keywords with additional information.

    Notes
    -----
    This class is a subclass of ContrastResults and inherits methods like
    summary, summary_frame and conf_int. Attributes correspond to a z-test
    given by ``GMMResults.t_test``.
    """

    def __init__(self, teff, results_gmm, method, **kwds):
        super().__init__()
        k_params = len(results_gmm.params)
        constraints = np.zeros((3, k_params))
        constraints[0, 0] = 1
        constraints[1, 1] = 1
        constraints[2, :2] = [1, 1]
        tt = results_gmm.t_test(constraints)
        self.__dict__.update(tt.__dict__)
        self.teff = teff
        self.results_gmm = results_gmm
        self.method = method
        # TODO: make those explicit?
        self.__dict__.update(kwds)

        self.c_names = ["ATE", "POM0", "POM1"]


doc_params_returns = """\
Parameters
----------
return_results : bool
    If True, then a results instance is returned.
    If False, just ATE, POM0 and POM1 are returned.
effect_group : {"all", 0, 1}
    ``effectgroup`` determines for which population the effects are
    estimated.
    If effect_group is "all", then sample average treatment effect and
    potential outcomes are returned
    If effect_group is 1 or "treated", then effects on treated are
    returned.
    If effect_group is 0, "treated" or "control", then effects on
    untreated, i.e. control group, are returned.
disp : bool
    Indicates whether the scipy optimizer should display the
    optimization results

Returns
-------
TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)
"""

doc_params_returns2 = """\
Parameters
----------
return_results : bool
    If True, then a results instance is returned.
    If False, just ATE, POM0 and POM1 are returned.
disp : bool
    Indicates whether the scipy optimizer should display the
    optimization results

Returns
-------
TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)
"""


class TreatmentEffect:
    """
    Estimate average treatment effect under conditional independence

    .. versionadded:: 0.14.0

    This class estimates treatment effect and potential outcome using 5
    different methods, ipw, ra, aipw, aipw-wls, ipw-ra.
    Standard errors and inference are based on the joint GMM representation of
    selection or treatment model, outcome model and effect functions.

    Parameters
    ----------
    model : instance of a model class
        The model class should contain endog and exog for the outcome model.
    treatment : ndarray
        indicator array for observations with treatment (1) or without (0)
    results_select : results instance
        The results instance for the treatment or selection model.
    _cov_type : "HC0"
        Internal keyword. The keyword oes not affect GMMResults which always
        corresponds to HC0 standard errors.
    kwds : keyword arguments
        currently not used

    Notes
    -----
    The outcome model is currently limited to a linear model based on OLS.
    Other outcome models, like Logit and Poisson, will become available in
    future.

    See `Treatment Effect notebook
    <../examples/notebooks/generated/treatment_effect.html>`__
    for an overview.

    """

    def __init__(self, model, treatment, results_select=None, _cov_type="HC0",
                 **kwds):
        # Note _cov_type is only for preliminary estimators,
        # cov in GMM alwasy corresponds to HC0
        self.__dict__.update(kwds)  # currently not used
        self.treatment = np.asarray(treatment)
        self.treat_mask = treat_mask = (treatment == 1)

        if results_select is not None:
            self.results_select = results_select
            self.prob_select = results_select.predict()

        self.model_pool = model
        endog = model.endog
        exog = model.exog
        self.nobs = endog.shape[0]
        self._cov_type = _cov_type

        # no init keys are supported
        mod0 = model.__class__(endog[~treat_mask], exog[~treat_mask])
        self.results0 = mod0.fit(cov_type=_cov_type)
        mod1 = model.__class__(endog[treat_mask], exog[treat_mask])
        self.results1 = mod1.fit(cov_type=_cov_type)
        # self.predict_mean0 = self.model_pool.predict(self.results0.params
        #                                             ).mean()
        # self.predict_mean1 = self.model_pool.predict(self.results1.params
        #                                             ).mean()

        self.exog_grouped = np.concatenate((mod0.exog, mod1.exog), axis=0)
        self.endog_grouped = np.concatenate((mod0.endog, mod1.endog), axis=0)

    @classmethod
    def from_data(cls, endog, exog, treatment, model='ols', **kwds):
        """create models from data

        not yet implemented

        """
        raise NotImplementedError

    def ipw(self, return_results=True, effect_group="all", disp=False):
        """Inverse Probability Weighted treatment effect estimation.

        Parameters
        ----------
        return_results : bool
            If True, then a results instance is returned.
            If False, just ATE, POM0 and POM1 are returned.
        effect_group : {"all", 0, 1}
            ``effectgroup`` determines for which population the effects are
            estimated.
            If effect_group is "all", then sample average treatment effect and
            potential outcomes are returned.
            If effect_group is 1 or "treated", then effects on treated are
            returned.
            If effect_group is 0, "treated" or "control", then effects on
            untreated, i.e. control group, are returned.
        disp : bool
            Indicates whether the scipy optimizer should display the
            optimization results

        Returns
        -------
        TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)

        See Also
        --------
        TreatmentEffectsResults
        """
        endog = self.model_pool.endog
        tind = self.treatment
        prob = self.prob_select
        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = prob
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - prob
            effect_group = 0  # standardize effect_group name
        elif isinstance(effect_group, np.ndarray):
            probt = effect_group
            effect_group = "user"  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        res_ipw = ate_ipw(endog, tind, prob, weighted=True, probt=probt)

        if not return_results:
            return res_ipw

        # gmm = _TEGMMGeneric1(endog, self.results_select, _mom_ols_te,
        #                     probt=probt)
        gmm = _IPWGMM(endog, self.results_select, None, teff=self,
                      effect_group=effect_group)
        start_params = np.concatenate((res_ipw[:2],
                                       self.results_select.params))
        res_gmm = gmm.fit(start_params=start_params,
                          inv_weights=np.eye(len(start_params)),
                          optim_method='nm',
                          optim_args={"maxiter": 5000, "disp": disp},
                          maxiter=1,
                          )

        res = TreatmentEffectResults(self, res_gmm, "IPW",
                                     start_params=start_params,
                                     effect_group=effect_group,
                                     )
        return res

    @Substitution(params_returns=indent(doc_params_returns, " " * 8))
    def ra(self, return_results=True, effect_group="all", disp=False):
        """
        Regression Adjustment treatment effect estimation.
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults
        """
        # need indicator for reordered observations
        tind = np.zeros(len(self.treatment))
        tind[-self.treatment.sum():] = 1
        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = tind
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - tind
            effect_group = 0  # standardize effect_group name
        elif isinstance(effect_group, np.ndarray):
            # TODO: do we keep this?
            probt = effect_group
            effect_group = "user"  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        exog = self.exog_grouped

        # weight or indicator for effect_group
        if probt is not None:
            cw = (probt / probt.mean())
        else:
            cw = 1

        pom0 = (self.results0.predict(exog) * cw).mean()
        pom1 = (self.results1.predict(exog) * cw).mean()
        if not return_results:
            return pom1 - pom0, pom0, pom1

        endog = self.model_pool.endog
        mod_gmm = _RAGMM(endog, self.results_select, None, teff=self,
                         probt=probt)
        start_params = np.concatenate((
            # ate, tt0.effect,
            [pom1 - pom0, pom0],
            self.results0.params,
            self.results1.params))
        res_gmm = mod_gmm.fit(start_params=start_params,
                              inv_weights=np.eye(len(start_params)),
                              optim_method='nm',
                              optim_args={"maxiter": 5000, "disp": disp},
                              maxiter=1,
                              )
        res = TreatmentEffectResults(self, res_gmm, "IPW",
                                     start_params=start_params,
                                     effect_group=effect_group,
                                     )
        return res

    @Substitution(params_returns=indent(doc_params_returns2, " " * 8))
    def aipw(self, return_results=True, disp=False):
        """
        ATE and POM from double robust augmented inverse probability weighting
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """

        nobs = self.nobs
        prob = self.prob_select
        tind = self.treatment
        exog = self.model_pool.exog  # in original order
        correct0 = (self.results0.resid / (1 - prob[tind == 0])).sum() / nobs
        correct1 = (self.results1.resid / (prob[tind == 1])).sum() / nobs
        tmean0 = self.results0.predict(exog).mean() + correct0
        tmean1 = self.results1.predict(exog).mean() + correct1
        ate = tmean1 - tmean0
        if not return_results:
            return ate, tmean0, tmean1

        endog = self.model_pool.endog
        p2_aipw = np.asarray([ate, tmean0])

        mag_aipw1 = _AIPWGMM(endog, self.results_select, None, teff=self)
        start_params = np.concatenate((
            p2_aipw,
            self.results0.params, self.results1.params,
            self.results_select.params))
        res_gmm = mag_aipw1.fit(
            start_params=start_params,
            inv_weights=np.eye(len(start_params)),
            optim_method='nm',
            optim_args={"maxiter": 5000, "disp": disp},
            maxiter=1)

        res = TreatmentEffectResults(self, res_gmm, "IPW",
                                     start_params=start_params,
                                     effect_group="all",
                                     )
        return res

    @Substitution(params_returns=indent(doc_params_returns2, " " * 8))
    def aipw_wls(self, return_results=True, disp=False):
        """
        ATE and POM from double robust augmented inverse probability weighting.

        This uses weighted outcome regression, while `aipw` uses unweighted
        outcome regression.
        Option for effect on treated or on untreated is not available.
        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        nobs = self.nobs
        prob = self.prob_select

        endog = self.model_pool.endog
        exog = self.model_pool.exog
        tind = self.treatment
        treat_mask = self.treat_mask

        ww1 = tind / prob * (tind / prob - 1)
        mod1 = WLS(endog[treat_mask], exog[treat_mask],
                   weights=ww1[treat_mask])
        result1 = mod1.fit(cov_type='HC1')
        mean1_ipw2 = result1.predict(exog).mean()

        ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1)
        mod0 = WLS(endog[~treat_mask], exog[~treat_mask],
                   weights=ww0[~treat_mask])
        result0 = mod0.fit(cov_type='HC1')
        mean0_ipw2 = result0.predict(exog).mean()

        self.results_ipwwls0 = result0
        self.results_ipwwls1 = result1

        correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs
        correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs
        tmean0 = mean0_ipw2 + correct0
        tmean1 = mean1_ipw2 + correct1
        ate = tmean1 - tmean0

        if not return_results:
            return ate, tmean0, tmean1

        p2_aipw_wls = np.asarray([ate, tmean0]).squeeze()

        # GMM
        mod_gmm = _AIPWWLSGMM(endog, self.results_select, None,
                              teff=self)
        start_params = np.concatenate((
            p2_aipw_wls,
            result0.params,
            result1.params,
            self.results_select.params))
        res_gmm = mod_gmm.fit(
            start_params=start_params,
            inv_weights=np.eye(len(start_params)),
            optim_method='nm',
            optim_args={"maxiter": 5000, "disp": disp},
            maxiter=1)
        res = TreatmentEffectResults(self, res_gmm, "IPW",
                                     start_params=start_params,
                                     effect_group="all",
                                     )
        return res

    @Substitution(params_returns=indent(doc_params_returns, " " * 8))
    def ipw_ra(self, return_results=True, effect_group="all", disp=False):
        """
        ATE and POM from inverse probability weighted regression adjustment.

        \n%(params_returns)s
        See Also
        --------
        TreatmentEffectsResults

        """
        treat_mask = self.treat_mask
        endog = self.model_pool.endog
        exog = self.model_pool.exog
        prob = self.prob_select

        prob0 = prob[~treat_mask]
        prob1 = prob[treat_mask]
        if effect_group == "all":
            w0 = 1 / (1 - prob0)
            w1 = 1 / prob1
            exogt = exog
        elif effect_group in [1, "treated"]:
            w0 = prob0 / (1 - prob0)
            w1 = prob1 / prob1
            exogt = exog[treat_mask]
            effect_group = 1  # standardize effect_group name
        elif effect_group in [0, "untreated", "control"]:
            w0 = (1 - prob0) / (1 - prob0)
            w1 = (1 - prob1) / prob1
            exogt = exog[~treat_mask]
            effect_group = 0  # standardize effect_group name
        else:
            raise ValueError("incorrect option for effect_group")

        mod0 = WLS(endog[~treat_mask], exog[~treat_mask],
                   weights=w0)
        result0 = mod0.fit(cov_type='HC1')
        # mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean()
        mean0_ipwra = result0.predict(exogt).mean()

        mod1 = WLS(endog[treat_mask], exog[treat_mask],
                   weights=w1)
        result1 = mod1.fit(cov_type='HC1')
        # mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean()
        mean1_ipwra = result1.predict(exogt).mean()

        if not return_results:
            return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra

        # GMM
        mod_gmm = _IPWRAGMM(endog, self.results_select, None, teff=self,
                            effect_group=effect_group)
        start_params = np.concatenate((
            [mean1_ipwra - mean0_ipwra, mean0_ipwra],
            result0.params,
            result1.params,
            np.asarray(self.results_select.params)
            ))
        res_gmm = mod_gmm.fit(
            start_params=start_params,
            inv_weights=np.eye(len(start_params)),
            optim_method='nm',
            optim_args={"maxiter": 2000, "disp": disp},
            maxiter=1
            )

        res = TreatmentEffectResults(self, res_gmm, "IPW",
                                     start_params=start_params,
                                     effect_group=effect_group,
                                     )
        return res