from statsmodels.compat.python import lmap, lrange, lzip import copy from itertools import zip_longest import time import numpy as np from statsmodels.iolib.table import SimpleTable from statsmodels.iolib.tableformatting import ( fmt_2, fmt_2cols, fmt_params, gen_fmt, ) from .summary2 import _model_types def forg(x, prec=3): x = np.squeeze(x) if prec == 3: # for 3 decimals if (abs(x) >= 1e4) or (abs(x) < 1e-4): return '%9.3g' % x else: return '%9.3f' % x elif prec == 4: if (abs(x) >= 1e4) or (abs(x) < 1e-4): return '%10.4g' % x else: return '%10.4f' % x else: raise ValueError("`prec` argument must be either 3 or 4, not {prec}" .format(prec=prec)) def d_or_f(x, width=6): """convert number to string with either integer of float formatting This is used internally for nobs and degrees of freedom which are usually integers but can be float in some cases. Parameters ---------- x : int or float width : int only used if x is nan Returns ------- str : str number as formatted string """ if np.isnan(x): return (width - 3) * ' ' + 'NaN' if x // 1 == x: return "%#6d" % x else: return "%#8.2f" % x def summary(self, yname=None, xname=None, title=0, alpha=.05, returns='text', model_info=None): """ Parameters ---------- yname : str optional, Default is `Y` xname : list[str] optional, Default is `X.#` for # in p the number of regressors Confidance interval : (0,1) not implimented title : str optional, Defualt is 'Generalized linear model' returns : str 'text', 'table', 'csv', 'latex', 'html' Returns ------- Default : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- conf_int calculated from normal dist. """ if title == 0: title = _model_types[self.model.__class__.__name__] if xname is not None and len(xname) != len(self.params): # GH 2298 raise ValueError('User supplied xnames must have the same number of ' 'entries as the number of model parameters ' '({})'.format(len(self.params))) yname, xname = _getnames(self, yname, xname) time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) modeltype = self.model.__class__.__name__ nobs = self.nobs df_model = self.df_model df_resid = self.df_resid #General part of the summary table, Applicable to all? models #------------------------------------------------------------ # TODO: define this generically, overwrite in model classes #replace definition of stubs data by single list #e.g. gen_left = [('Model type:', [modeltype]), ('Date:', [date]), ('Dependent Variable:', yname), # TODO: What happens with multiple names? ('df model', [df_model]) ] gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_title = title gen_header = None gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=gen_fmt ) gen_stubs_right = ('Method:', 'Time:', 'Number of Obs:', 'df resid') gen_data_right = ([modeltype], #was dist family need to look at more time_of_day, [nobs], [df_resid] ) gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=gen_fmt ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal tstats = {'OLS': self.t(), 'GLS': self.t(), 'GLSAR': self.t(), 'WLS': self.t(), 'RLM': self.t(), 'GLM': self.t()} prob_stats = {'OLS': self.pvalues, 'GLS': self.pvalues, 'GLSAR': self.pvalues, 'WLS': self.pvalues, 'RLM': self.pvalues, 'GLM': self.pvalues } # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype alp = str((1-alpha)*100)+'%' param_header = { 'OLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLSAR' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'WLS' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], 'GLM' : ['coef', 'std err', 't', 'P>|t|', alp + ' Conf. Interval'], #glm uses t-distribution 'RLM' : ['coef', 'std err', 'z', 'P>|z|', alp + ' Conf. Interval'] #checke z } params_stubs = xname params = self.params conf_int = self.conf_int(alpha) std_err = self.bse exog_len = lrange(len(xname)) tstat = tstats[modeltype] prob_stat = prob_stats[modeltype] # Simpletable should be able to handle the formating params_data = lzip(["%#6.4g" % (params[i]) for i in exog_len], ["%#6.4f" % (std_err[i]) for i in exog_len], ["%#6.4f" % (tstat[i]) for i in exog_len], ["%#6.4f" % (prob_stat[i]) for i in exog_len], ["(%#5g, %#5g)" % tuple(conf_int[i]) for i in exog_len]) parameter_table = SimpleTable(params_data, param_header[modeltype], params_stubs, title=None, txt_fmt=fmt_2 ) #special table #------------- #TODO: exists in linear_model, what about other models #residual diagnostics #output options #-------------- #TODO: JP the rest needs to be fixed, similar to summary in linear_model def ols_printer(): """ print summary table for ols models """ table = str(general_table)+'\n'+str(parameter_table) return table def glm_printer(): table = str(general_table)+'\n'+str(parameter_table) return table printers = {'OLS': ols_printer, 'GLM': glm_printer} if returns == 'print': try: return printers[modeltype]() except KeyError: return printers['OLS']() def _getnames(self, yname=None, xname=None): '''extract names from model or construct names ''' if yname is None: if getattr(self.model, 'endog_names', None) is not None: yname = self.model.endog_names else: yname = 'y' if xname is None: if getattr(self.model, 'exog_names', None) is not None: xname = self.model.exog_names else: xname = ['var_%d' % i for i in range(len(self.params))] return yname, xname def summary_top(results, title=None, gleft=None, gright=None, yname=None, xname=None): '''generate top table(s) TODO: this still uses predefined model_methods ? allow gleft, gright to be 1 element tuples instead of filling with None? ''' #change of names ? gen_left, gen_right = gleft, gright # time and names are always included time_now = time.localtime() time_of_day = [time.strftime("%H:%M:%S", time_now)] date = time.strftime("%a, %d %b %Y", time_now) yname, xname = _getnames(results, yname=yname, xname=xname) # create dictionary with default # use lambdas because some values raise exception if they are not available default_items = dict([ ('Dependent Variable:', lambda: [yname]), ('Dep. Variable:', lambda: [yname]), ('Model:', lambda: [results.model.__class__.__name__]), ('Date:', lambda: [date]), ('Time:', lambda: time_of_day), ('Number of Obs:', lambda: [results.nobs]), ('No. Observations:', lambda: [d_or_f(results.nobs)]), ('Df Model:', lambda: [d_or_f(results.df_model)]), ('Df Residuals:', lambda: [d_or_f(results.df_resid)]), ('Log-Likelihood:', lambda: ["%#8.5g" % results.llf]) # does not exist for RLM - exception ]) if title is None: title = results.model.__class__.__name__ + 'Regression Results' if gen_left is None: # default: General part of the summary table, Applicable to all? models gen_left = [('Dep. Variable:', None), ('Model type:', None), ('Date:', None), ('No. Observations:', None), ('Df model:', None), ('Df resid:', None)] try: llf = results.llf # noqa: F841 gen_left.append(('Log-Likelihood', None)) except: # AttributeError, NotImplementedError pass gen_right = [] gen_title = title gen_header = None # replace missing (None) values with default values gen_left_ = [] for item, value in gen_left: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_left_.append((item, value)) gen_left = gen_left_ if gen_right: gen_right_ = [] for item, value in gen_right: if value is None: value = default_items[item]() # let KeyErrors raise exception gen_right_.append((item, value)) gen_right = gen_right_ # check nothing was missed missing_values = [k for k,v in gen_left + gen_right if v is None] assert missing_values == [], missing_values # pad both tables to equal number of rows if gen_right: if len(gen_right) < len(gen_left): # fill up with blank lines to same length gen_right += [(' ', ' ')] * (len(gen_left) - len(gen_right)) elif len(gen_right) > len(gen_left): # fill up with blank lines to same length, just to keep it symmetric gen_left += [(' ', ' ')] * (len(gen_right) - len(gen_left)) # padding in SimpleTable does not work like I want #force extra spacing and exact string length in right table gen_right = [('%-21s' % (' '+k), v) for k,v in gen_right] gen_stubs_right, gen_data_right = zip_longest(*gen_right) #transpose row col gen_table_right = SimpleTable(gen_data_right, gen_header, gen_stubs_right, title=gen_title, txt_fmt=fmt_2cols ) else: gen_table_right = [] #because .extend_right seems works with [] #moved below so that we can pad if needed to match length of gen_right #transpose rows and columns, `unzip` gen_stubs_left, gen_data_left = zip_longest(*gen_left) #transpose row col gen_table_left = SimpleTable(gen_data_left, gen_header, gen_stubs_left, title=gen_title, txt_fmt=fmt_2cols ) gen_table_left.extend_right(gen_table_right) general_table = gen_table_left return general_table def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True, skip_header=False, title=None): '''create a summary table for the parameters Parameters ---------- res : results instance some required information is directly taken from the result instance yname : {str, None} optional name for the endogenous variable, default is "y" xname : {list[str], None} optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) skip_headers : bool If false (default), then the header row is added. If true, then no header row is added. Returns ------- params_table : SimpleTable instance ''' # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal if isinstance(results, tuple): # for multivariate endog # TODO: check whether I do not want to refactor this #we need to give parameter alpha to conf_int results, params, std_err, tvalues, pvalues, conf_int = results else: params = np.asarray(results.params) std_err = np.asarray(results.bse) tvalues = np.asarray(results.tvalues) # is this sometimes called zvalues pvalues = np.asarray(results.pvalues) conf_int = np.asarray(results.conf_int(alpha)) if params.size == 0: return SimpleTable([['No Model Parameters']]) # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype if use_t: param_header = ['coef', 'std err', 't', 'P>|t|', '[' + str(alpha/2), str(1-alpha/2) + ']'] else: param_header = ['coef', 'std err', 'z', 'P>|z|', '[' + str(alpha/2), str(1-alpha/2) + ']'] if skip_header: param_header = None _, xname = _getnames(results, yname=yname, xname=xname) if len(xname) != len(params): raise ValueError('xnames and params do not have the same length') params_stubs = xname exog_idx = lrange(len(xname)) params = np.asarray(params) std_err = np.asarray(std_err) tvalues = np.asarray(tvalues) pvalues = np.asarray(pvalues) conf_int = np.asarray(conf_int) params_data = lzip([forg(params[i], prec=4) for i in exog_idx], [forg(std_err[i]) for i in exog_idx], [forg(tvalues[i]) for i in exog_idx], ["%#6.3f" % (pvalues[i]) for i in exog_idx], [forg(conf_int[i,0]) for i in exog_idx], [forg(conf_int[i,1]) for i in exog_idx]) parameter_table = SimpleTable(params_data, param_header, params_stubs, title=title, txt_fmt=fmt_params ) return parameter_table def summary_params_frame(results, yname=None, xname=None, alpha=.05, use_t=True): """ Create a summary table for the parameters Parameters ---------- res : results instance some required information is directly taken from the result instance yname : {str, None} optional name for the endogenous variable, default is "y" xname : {list[str], None} optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) skip_headers : bool If false (default), then the header row is added. If true, then no header row is added. Returns ------- params_table : SimpleTable instance """ # Parameters part of the summary table # ------------------------------------ # Note: this is not necessary since we standardized names, # only t versus normal if isinstance(results, tuple): # for multivariate endog # TODO: check whether I do not want to refactor this #we need to give parameter alpha to conf_int results, params, std_err, tvalues, pvalues, conf_int = results else: params = results.params std_err = results.bse tvalues = results.tvalues #is this sometimes called zvalues pvalues = results.pvalues conf_int = results.conf_int(alpha) # Dictionary to store the header names for the parameter part of the # summary table. look up by modeltype if use_t: param_header = ['coef', 'std err', 't', 'P>|t|', 'Conf. Int. Low', 'Conf. Int. Upp.'] else: param_header = ['coef', 'std err', 'z', 'P>|z|', 'Conf. Int. Low', 'Conf. Int. Upp.'] _, xname = _getnames(results, yname=yname, xname=xname) from pandas import DataFrame table = np.column_stack((params, std_err, tvalues, pvalues, conf_int)) return DataFrame(table, columns=param_header, index=xname) def summary_params_2d(result, extras=None, endog_names=None, exog_names=None, title=None): """create summary table of regression parameters with several equations This allows interleaving of parameters with bse and/or tvalues Parameters ---------- result : result instance the result instance with params and attributes in extras extras : list[str] additional attributes to add below a parameter row, e.g. bse or tvalues endog_names : {list[str], None} names for rows of the parameter array (multivariate endog) exog_names : {list[str], None} names for columns of the parameter array (exog) alpha : float level for confidence intervals, default 0.95 title : None or string Returns ------- tables : list of SimpleTable this contains a list of all seperate Subtables table_all : SimpleTable the merged table with results concatenated for each row of the parameter array """ if endog_names is None: # TODO: note the [1:] is specific to current MNLogit endog_names = ['endog_%d' % i for i in np.unique(result.model.endog)[1:]] if exog_names is None: exog_names = ['var%d' % i for i in range(len(result.params))] # TODO: check formatting options with different values res_params = [[forg(item, prec=4) for item in row] for row in result.params] if extras: extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')') for v in col] for col in getattr(result, what)] for what in extras ] data = lzip(res_params, *extras_list) data = [i for j in data for i in j] #flatten stubs = lzip(endog_names, *[['']*len(endog_names)]*len(extras)) stubs = [i for j in stubs for i in j] #flatten else: data = res_params stubs = endog_names txt_fmt = copy.deepcopy(fmt_params) txt_fmt["data_fmts"] = ["%s"]*result.params.shape[1] return SimpleTable(data, headers=exog_names, stubs=stubs, title=title, txt_fmt=txt_fmt) def summary_params_2dflat(result, endog_names=None, exog_names=None, alpha=0.05, use_t=True, keep_headers=True, endog_cols=False): """summary table for parameters that are 2d, e.g. multi-equation models Parameters ---------- result : result instance the result instance with params, bse, tvalues and conf_int endog_names : {list[str], None} names for rows of the parameter array (multivariate endog) exog_names : {list[str], None} names for columns of the parameter array (exog) alpha : float level for confidence intervals, default 0.95 use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) keep_headers : bool If true (default), then sub-tables keep their headers. If false, then only the first headers are kept, the other headerse are blanked out endog_cols : bool If false (default) then params and other result statistics have equations by rows. If true, then equations are assumed to be in columns. Not implemented yet. Returns ------- tables : list of SimpleTable this contains a list of all seperate Subtables table_all : SimpleTable the merged table with results concatenated for each row of the parameter array """ res = result params = res.params if params.ndim == 2: # we've got multiple equations n_equ = params.shape[1] if len(endog_names) != params.shape[1]: raise ValueError('endog_names has wrong length') else: if len(endog_names) != len(params): raise ValueError('endog_names has wrong length') n_equ = 1 #VAR does not have conf_int #params = res.params.T # this is a convention for multi-eq models # check that we have the right length of names if not isinstance(endog_names, list): # TODO: this might be specific to multinomial logit type, move? if endog_names is None: endog_basename = 'endog' else: endog_basename = endog_names # TODO: note, the [1:] is specific to current MNLogit endog_names = res.model.endog_names[1:] tables = [] for eq in range(n_equ): restup = (res, res.params[:,eq], res.bse[:,eq], res.tvalues[:,eq], res.pvalues[:,eq], res.conf_int(alpha)[eq]) skiph = False tble = summary_params(restup, yname=endog_names[eq], xname=exog_names, alpha=alpha, use_t=use_t, skip_header=skiph) tables.append(tble) # add titles, they will be moved to header lines in table_extend for i in range(len(endog_names)): tables[i].title = endog_names[i] table_all = table_extend(tables, keep_headers=keep_headers) return tables, table_all def table_extend(tables, keep_headers=True): """extend a list of SimpleTables, adding titles to header of subtables This function returns the merged table as a deepcopy, in contrast to the SimpleTable extend method. Parameters ---------- tables : list of SimpleTable instances keep_headers : bool If true, then all headers are kept. If falls, then the headers of subtables are blanked out. Returns ------- table_all : SimpleTable merged tables as a single SimpleTable instance """ from copy import deepcopy for ii, t in enumerate(tables[:]): #[1:]: t = deepcopy(t) #move title to first cell of header # TODO: check if we have multiline headers if t[0].datatype == 'header': t[0][0].data = t.title t[0][0]._datatype = None t[0][0].row = t[0][1].row if not keep_headers and (ii > 0): for c in t[0][1:]: c.data = '' # add separating line and extend tables if ii == 0: table_all = t else: r1 = table_all[-1] r1.add_format('txt', row_dec_below='-') table_all.extend(t) table_all.title = None return table_all def summary_return(tables, return_fmt='text'): # join table parts then print if return_fmt == 'text': strdrop = lambda x: str(x).rsplit('\n',1)[0] # convert to string drop last line return '\n'.join(lmap(strdrop, tables[:-1]) + [str(tables[-1])]) elif return_fmt == 'tables': return tables elif return_fmt == 'csv': return '\n'.join(x.as_csv() for x in tables) elif return_fmt == 'latex': # TODO: insert \hline after updating SimpleTable table = copy.deepcopy(tables[0]) for part in tables[1:]: table.extend(part) return table.as_latex_tabular() elif return_fmt == 'html': return "\n".join(table.as_html() for table in tables) else: raise ValueError('available output formats are text, csv, latex, html') class Summary: """ Result summary Construction does not take any parameters. Tables and text can be added with the `add_` methods. Attributes ---------- tables : list of tables Contains the list of SimpleTable instances, horizontally concatenated tables are not saved separately. extra_txt : str extra lines that are added to the text output, used for warnings and explanations. """ def __init__(self): self.tables = [] self.extra_txt = None def __str__(self): return self.as_text() def __repr__(self): return str(type(self)) + '\n"""\n' + self.__str__() + '\n"""' def _repr_html_(self): """Display as HTML in IPython notebook.""" return self.as_html() def _repr_latex_(self): """Display as LaTeX when converting IPython notebook to PDF.""" return self.as_latex() def add_table_2cols(self, res, title=None, gleft=None, gright=None, yname=None, xname=None): """ Add a double table, 2 tables with one column merged horizontally Parameters ---------- res : results instance some required information is directly taken from the result instance title : str, optional if None, then a default title is used. gleft : list[tuple], optional elements for the left table, tuples are (name, value) pairs If gleft is None, then a default table is created gright : list[tuple], optional elements for the right table, tuples are (name, value) pairs yname : str, optional optional name for the endogenous variable, default is "y" xname : list[str], optional optional names for the exogenous variables, default is "var_xx". Must match the number of parameters in the model. """ table = summary_top(res, title=title, gleft=gleft, gright=gright, yname=yname, xname=xname) self.tables.append(table) def add_table_params(self, res, yname=None, xname=None, alpha=.05, use_t=True): """create and add a table for the parameter estimates Parameters ---------- res : results instance some required information is directly taken from the result instance yname : {str, None} optional name for the endogenous variable, default is "y" xname : {list[str], None} optional names for the exogenous variables, default is "var_xx" alpha : float significance level for the confidence intervals use_t : bool indicator whether the p-values are based on the Student-t distribution (if True) or on the normal distribution (if False) Returns ------- None : table is attached """ if res.params.ndim == 1: table = summary_params(res, yname=yname, xname=xname, alpha=alpha, use_t=use_t) elif res.params.ndim == 2: _, table = summary_params_2dflat(res, endog_names=yname, exog_names=xname, alpha=alpha, use_t=use_t) else: raise ValueError('params has to be 1d or 2d') self.tables.append(table) def add_extra_txt(self, etext): """add additional text that will be added at the end in text format Parameters ---------- etext : list[str] string with lines that are added to the text output. """ self.extra_txt = '\n'.join(etext) def as_text(self): """return tables as string Returns ------- txt : str summary tables and extra text as one string """ txt = summary_return(self.tables, return_fmt='text') if self.extra_txt is not None: txt = txt + '\n\n' + self.extra_txt return txt def as_latex(self): """return tables as string Returns ------- latex : str summary tables and extra text as string of Latex Notes ----- This currently merges tables with different number of columns. It is recommended to use `as_latex_tabular` directly on the individual tables. """ latex = summary_return(self.tables, return_fmt='latex') if self.extra_txt is not None: latex = latex + '\n\n' + self.extra_txt.replace('\n', ' \\newline\n ') return latex def as_csv(self): """return tables as string Returns ------- csv : str concatenated summary tables in comma delimited format """ csv = summary_return(self.tables, return_fmt='csv') if self.extra_txt is not None: csv = csv + '\n\n' + self.extra_txt return csv def as_html(self): """return tables as string Returns ------- html : str concatenated summary tables in HTML format """ html = summary_return(self.tables, return_fmt='html') if self.extra_txt is not None: html = html + '

' + self.extra_txt.replace('\n', '
') return html