AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/sandbox/infotheo.py

"""
Information Theoretic and Entropy Measures

References
----------
Golan, As. 2008. "Information and Entropy Econometrics -- A Review and
    Synthesis." Foundations And Trends in Econometrics 2(1-2), 1-145.

Golan, A., Judge, G., and Miller, D.  1996.  Maximum Entropy Econometrics.
    Wiley & Sons, Chichester.
"""
#For MillerMadow correction
#Miller, G. 1955. Note on the bias of information estimates. Info. Theory
#    Psychol. Prob. Methods II-B:95-100.

#For ChaoShen method
#Chao, A., and T.-J. Shen. 2003. Nonparametric estimation of Shannon's index of diversity when
#there are unseen species in sample. Environ. Ecol. Stat. 10:429-443.
#Good, I. J. 1953. The population frequencies of species and the estimation of population parameters.
#Biometrika 40:237-264.
#Horvitz, D.G., and D. J. Thompson. 1952. A generalization of sampling without replacement from a finute universe. J. Am. Stat. Assoc. 47:663-685.

#For NSB method
#Nemenman, I., F. Shafee, and W. Bialek. 2002. Entropy and inference, revisited. In: Dietterich, T.,
#S. Becker, Z. Gharamani, eds. Advances in Neural Information Processing Systems 14: 471-478.
#Cambridge (Massachusetts): MIT Press.

#For shrinkage method
#Dougherty, J., Kohavi, R., and Sahami, M. (1995). Supervised and unsupervised discretization of
#continuous features. In International Conference on Machine Learning.
#Yang, Y. and Webb, G. I. (2003). Discretization for naive-bayes learning: managing discretization
#bias and variance. Technical Report 2003/131 School of Computer Science and Software Engineer-
#ing, Monash University.

from statsmodels.compat.python import lzip, lmap
from scipy import stats
import numpy as np
from matplotlib import pyplot as plt
from scipy.special import logsumexp as sp_logsumexp

#TODO: change these to use maxentutils so that over/underflow is handled
#with the logsumexp.


def logsumexp(a, axis=None):
    """
    Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a

    Avoids numerical overflow.

    Parameters
    ----------
    a : array_like
        The vector to exponentiate and sum
    axis : int, optional
        The axis along which to apply the operation.  Defaults is None.

    Returns
    -------
    sum(log(exp(a)))

    Notes
    -----
    This function was taken from the mailing list
    http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html

    This should be superceded by the ufunc when it is finished.
    """
    if axis is None:
        # Use the scipy.maxentropy version.
        return sp_logsumexp(a)
    a = np.asarray(a)
    shp = list(a.shape)
    shp[axis] = 1
    a_max = a.max(axis=axis)
    s = np.log(np.exp(a - a_max.reshape(shp)).sum(axis=axis))
    lse  = a_max + s
    return lse


def _isproperdist(X):
    """
    Checks to see if `X` is a proper probability distribution
    """
    X = np.asarray(X)
    if not np.allclose(np.sum(X), 1) or not np.all(X>=0) or not np.all(X<=1):
        return False
    else:
        return True

def discretize(X, method="ef", nbins=None):
    """
    Discretize `X`

    Parameters
    ----------
    bins : int, optional
        Number of bins.  Default is floor(sqrt(N))
    method : str
        "ef" is equal-frequency binning
        "ew" is equal-width binning

    Examples
    --------
    """
    nobs = len(X)
    if nbins is None:
        nbins = np.floor(np.sqrt(nobs))
    if method == "ef":
        discrete = np.ceil(nbins * stats.rankdata(X)/nobs)
    if method == "ew":
        width = np.max(X) - np.min(X)
        width = np.floor(width/nbins)
        svec, ivec = stats.fastsort(X)
        discrete = np.zeros(nobs)
        binnum = 1
        base = svec[0]
        discrete[ivec[0]] = binnum
        for i in range(1,nobs):
            if svec[i] < base + width:
                discrete[ivec[i]] = binnum
            else:
                base = svec[i]
                binnum += 1
                discrete[ivec[i]] = binnum
    return discrete
#TODO: looks okay but needs more robust tests for corner cases


def logbasechange(a,b):
    """
    There is a one-to-one transformation of the entropy value from
    a log base b to a log base a :

    H_{b}(X)=log_{b}(a)[H_{a}(X)]

    Returns
    -------
    log_{b}(a)
    """
    return np.log(b)/np.log(a)

def natstobits(X):
    """
    Converts from nats to bits
    """
    return logbasechange(np.e, 2) * X

def bitstonats(X):
    """
    Converts from bits to nats
    """
    return logbasechange(2, np.e) * X

#TODO: make this entropy, and then have different measures as
#a method
def shannonentropy(px, logbase=2):
    """
    This is Shannon's entropy

    Parameters
    ----------
    logbase, int or np.e
        The base of the log
    px : 1d or 2d array_like
        Can be a discrete probability distribution, a 2d joint distribution,
        or a sequence of probabilities.

    Returns
    -----
    For log base 2 (bits) given a discrete distribution
        H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]

    For log base 2 (bits) given a joint distribution
        H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})

    Notes
    -----
    shannonentropy(0) is defined as 0
    """
#TODO: have not defined the px,py case?
    px = np.asarray(px)
    if not np.all(px <= 1) or not np.all(px >= 0):
        raise ValueError("px does not define proper distribution")
    entropy = -np.sum(np.nan_to_num(px*np.log2(px)))
    if logbase != 2:
        return logbasechange(2,logbase) * entropy
    else:
        return entropy

# Shannon's information content
def shannoninfo(px, logbase=2):
    """
    Shannon's information

    Parameters
    ----------
    px : float or array_like
        `px` is a discrete probability distribution

    Returns
    -------
    For logbase = 2
    np.log2(px)
    """
    px = np.asarray(px)
    if not np.all(px <= 1) or not np.all(px >= 0):
        raise ValueError("px does not define proper distribution")
    if logbase != 2:
        return - logbasechange(2,logbase) * np.log2(px)
    else:
        return - np.log2(px)

def condentropy(px, py, pxpy=None, logbase=2):
    """
    Return the conditional entropy of X given Y.

    Parameters
    ----------
    px : array_like
    py : array_like
    pxpy : array_like, optional
        If pxpy is None, the distributions are assumed to be independent
        and conendtropy(px,py) = shannonentropy(px)
    logbase : int or np.e

    Returns
    -------
    sum_{kj}log(q_{j}/w_{kj}

    where q_{j} = Y[j]
    and w_kj = X[k,j]
    """
    if not _isproperdist(px) or not _isproperdist(py):
        raise ValueError("px or py is not a proper probability distribution")
    if pxpy is not None and not _isproperdist(pxpy):
        raise ValueError("pxpy is not a proper joint distribtion")
    if pxpy is None:
        pxpy = np.outer(py,px)
    condent = np.sum(pxpy * np.nan_to_num(np.log2(py/pxpy)))
    if logbase == 2:
        return condent
    else:
        return logbasechange(2, logbase) * condent

def mutualinfo(px,py,pxpy, logbase=2):
    """
    Returns the mutual information between X and Y.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like
        The joint probability distribution of random variables X and Y.
        Note that if X and Y are independent then the mutual information
        is zero.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    shannonentropy(px) - condentropy(px,py,pxpy)
    """
    if not _isproperdist(px) or not _isproperdist(py):
        raise ValueError("px or py is not a proper probability distribution")
    if pxpy is not None and not _isproperdist(pxpy):
        raise ValueError("pxpy is not a proper joint distribtion")
    if pxpy is None:
        pxpy = np.outer(py,px)
    return shannonentropy(px, logbase=logbase) - condentropy(px,py,pxpy,
            logbase=logbase)

def corrent(px,py,pxpy,logbase=2):
    """
    An information theoretic correlation measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,logbase=logbase)

    Notes
    -----
    This is also equivalent to

    corrent(px,py,pxpy) = 1 - condent(px,py,pxpy)/shannonentropy(py)
    """
    if not _isproperdist(px) or not _isproperdist(py):
        raise ValueError("px or py is not a proper probability distribution")
    if pxpy is not None and not _isproperdist(pxpy):
        raise ValueError("pxpy is not a proper joint distribtion")
    if pxpy is None:
        pxpy = np.outer(py,px)

    return mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,
            logbase=logbase)

def covent(px,py,pxpy,logbase=2):
    """
    An information theoretic covariance measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    condent(px,py,pxpy,logbase=logbase) + condent(py,px,pxpy,
            logbase=logbase)

    Notes
    -----
    This is also equivalent to

    covent(px,py,pxpy) = condent(px,py,pxpy) + condent(py,px,pxpy)
    """
    if not _isproperdist(px) or not _isproperdist(py):
        raise ValueError("px or py is not a proper probability distribution")
    if pxpy is not None and not _isproperdist(pxpy):
        raise ValueError("pxpy is not a proper joint distribtion")
    if pxpy is None:
        pxpy = np.outer(py,px)

    # FIXME: these should be `condentropy`, not `condent`
    return (condent(px, py, pxpy, logbase=logbase)  # noqa:F821  See GH#5756
            + condent(py, px, pxpy, logbase=logbase))  # noqa:F821  See GH#5756


#### Generalized Entropies ####

def renyientropy(px,alpha=1,logbase=2,measure='R'):
    """
    Renyi's generalized entropy

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X.  Note that
        px is assumed to be a proper probability distribution.
    logbase : int or np.e, optional
        Default is 2 (bits)
    alpha : float or inf
        The order of the entropy.  The default is 1, which in the limit
        is just Shannon's entropy.  2 is Renyi (Collision) entropy.  If
        the string "inf" or numpy.inf is specified the min-entropy is returned.
    measure : str, optional
        The type of entropy measure desired.  'R' returns Renyi entropy
        measure.  'T' returns the Tsallis entropy measure.

    Returns
    -------
    1/(1-alpha)*log(sum(px**alpha))

    In the limit as alpha -> 1, Shannon's entropy is returned.

    In the limit as alpha -> inf, min-entropy is returned.
    """
#TODO:finish returns
#TODO:add checks for measure
    if not _isproperdist(px):
        raise ValueError("px is not a proper probability distribution")
    alpha = float(alpha)
    if alpha == 1:
        genent = shannonentropy(px)
        if logbase != 2:
            return logbasechange(2, logbase) * genent
        return genent
    elif 'inf' in str(alpha).lower() or alpha == np.inf:
        return -np.log(np.max(px))

    # gets here if alpha != (1 or inf)
    px = px**alpha
    genent = np.log(px.sum())
    if logbase == 2:
        return 1/(1-alpha) * genent
    else:
        return 1/(1-alpha) * logbasechange(2, logbase) * genent

#TODO: before completing this, need to rethink the organization of
# (relative) entropy measures, ie., all put into one function
# and have kwdargs, etc.?
def gencrossentropy(px,py,pxpy,alpha=1,logbase=2, measure='T'):
    """
    Generalized cross-entropy measures.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)
    measure : str, optional
        The measure is the type of generalized cross-entropy desired. 'T' is
        the cross-entropy version of the Tsallis measure.  'CR' is Cressie-Read
        measure.
    """


if __name__ == "__main__":
    print("From Golan (2008) \"Information and Entropy Econometrics -- A Review \
and Synthesis")
    print("Table 3.1")
    # Examples from Golan (2008)

    X = [.2,.2,.2,.2,.2]
    Y = [.322,.072,.511,.091,.004]

    for i in X:
        print(shannoninfo(i))
    for i in Y:
        print(shannoninfo(i))
    print(shannonentropy(X))
    print(shannonentropy(Y))

    p = [1e-5,1e-4,.001,.01,.1,.15,.2,.25,.3,.35,.4,.45,.5]

    plt.subplot(111)
    plt.ylabel("Information")
    plt.xlabel("Probability")
    x = np.linspace(0,1,100001)
    plt.plot(x, shannoninfo(x))
#    plt.show()

    plt.subplot(111)
    plt.ylabel("Entropy")
    plt.xlabel("Probability")
    x = np.linspace(0,1,101)
    plt.plot(x, lmap(shannonentropy, lzip(x,1-x)))
#    plt.show()

    # define a joint probability distribution
    # from Golan (2008) table 3.3
    w = np.array([[0,0,1./3],[1/9.,1/9.,1/9.],[1/18.,1/9.,1/6.]])
    # table 3.4
    px = w.sum(0)
    py = w.sum(1)
    H_X = shannonentropy(px)
    H_Y = shannonentropy(py)
    H_XY = shannonentropy(w)
    H_XgivenY = condentropy(px,py,w)
    H_YgivenX = condentropy(py,px,w)
# note that cross-entropy is not a distance measure as the following shows
    D_YX = logbasechange(2,np.e)*stats.entropy(px, py)
    D_XY = logbasechange(2,np.e)*stats.entropy(py, px)
    I_XY = mutualinfo(px,py,w)
    print("Table 3.3")
    print(H_X,H_Y, H_XY, H_XgivenY, H_YgivenX, D_YX, D_XY, I_XY)

    print("discretize functions")
    X=np.array([21.2,44.5,31.0,19.5,40.6,38.7,11.1,15.8,31.9,25.8,20.2,14.2,
        24.0,21.0,11.3,18.0,16.3,22.2,7.8,27.8,16.3,35.1,14.9,17.1,28.2,16.4,
        16.5,46.0,9.5,18.8,32.1,26.1,16.1,7.3,21.4,20.0,29.3,14.9,8.3,22.5,
        12.8,26.9,25.5,22.9,11.2,20.7,26.2,9.3,10.8,15.6])
    discX = discretize(X)
    #CF: R's infotheo
#TODO: compare to pyentropy quantize?
    print
    print("Example in section 3.6 of Golan, using table 3.3")
    print("Bounding errors using Fano's inequality")
    print("H(P_{e}) + P_{e}log(K-1) >= H(X|Y)")
    print("or, a weaker inequality")
    print("P_{e} >= [H(X|Y) - 1]/log(K)")
    print("P(x) = %s" % px)
    print("X = 3 has the highest probability, so this is the estimate Xhat")
    pe = 1 - px[2]
    print("The probability of error Pe is 1 - p(X=3) = %0.4g" % pe)
    H_pe = shannonentropy([pe,1-pe])
    print("H(Pe) = %0.4g and K=3" % H_pe)
    print("H(Pe) + Pe*log(K-1) = %0.4g >= H(X|Y) = %0.4g" % \
            (H_pe+pe*np.log2(2), H_XgivenY))
    print("or using the weaker inequality")
    print(f"Pe = {pe:0.4g} >= [H(X) - 1]/log(K) = {(H_X - 1)/np.log2(3):0.4g}")
    print("Consider now, table 3.5, where there is additional information")
    print("The conditional probabilities of P(X|Y=y) are ")
    w2 = np.array([[0.,0.,1.],[1/3.,1/3.,1/3.],[1/6.,1/3.,1/2.]])
    print(w2)
# not a proper distribution?
    print("The probability of error given this information is")
    print("Pe = [H(X|Y) -1]/log(K) = %0.4g" % ((np.mean([0,shannonentropy(w2[1]),shannonentropy(w2[2])])-1)/np.log2(3)))
    print("such that more information lowers the error")

### Stochastic processes
    markovchain = np.array([[.553,.284,.163],[.465,.312,.223],[.420,.322,.258]])
lab 1 is done 2024-10-02 22:15:59 +04:00			`"""`
			`Information Theoretic and Entropy Measures`

			`References`
			`----------`
			`Golan, As. 2008. "Information and Entropy Econometrics -- A Review and`
			`Synthesis." Foundations And Trends in Econometrics 2(1-2), 1-145.`

			`Golan, A., Judge, G., and Miller, D. 1996. Maximum Entropy Econometrics.`
			`Wiley & Sons, Chichester.`
			`"""`
			`#For MillerMadow correction`
			`#Miller, G. 1955. Note on the bias of information estimates. Info. Theory`
			`# Psychol. Prob. Methods II-B:95-100.`

			`#For ChaoShen method`
			`#Chao, A., and T.-J. Shen. 2003. Nonparametric estimation of Shannon's index of diversity when`
			`#there are unseen species in sample. Environ. Ecol. Stat. 10:429-443.`
			`#Good, I. J. 1953. The population frequencies of species and the estimation of population parameters.`
			`#Biometrika 40:237-264.`
			`#Horvitz, D.G., and D. J. Thompson. 1952. A generalization of sampling without replacement from a finute universe. J. Am. Stat. Assoc. 47:663-685.`

			`#For NSB method`
			`#Nemenman, I., F. Shafee, and W. Bialek. 2002. Entropy and inference, revisited. In: Dietterich, T.,`
			`#S. Becker, Z. Gharamani, eds. Advances in Neural Information Processing Systems 14: 471-478.`
			`#Cambridge (Massachusetts): MIT Press.`

			`#For shrinkage method`
			`#Dougherty, J., Kohavi, R., and Sahami, M. (1995). Supervised and unsupervised discretization of`
			`#continuous features. In International Conference on Machine Learning.`
			`#Yang, Y. and Webb, G. I. (2003). Discretization for naive-bayes learning: managing discretization`
			`#bias and variance. Technical Report 2003/131 School of Computer Science and Software Engineer-`
			`#ing, Monash University.`

			`from statsmodels.compat.python import lzip, lmap`
			`from scipy import stats`
			`import numpy as np`
			`from matplotlib import pyplot as plt`
			`from scipy.special import logsumexp as sp_logsumexp`

			`#TODO: change these to use maxentutils so that over/underflow is handled`
			`#with the logsumexp.`


			`def logsumexp(a, axis=None):`
			`"""`
			`Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a`

			`Avoids numerical overflow.`

			`Parameters`
			`----------`
			`a : array_like`
			`The vector to exponentiate and sum`
			`axis : int, optional`
			`The axis along which to apply the operation. Defaults is None.`

			`Returns`
			`-------`
			`sum(log(exp(a)))`

			`Notes`
			`-----`
			`This function was taken from the mailing list`
			`http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html`

			`This should be superceded by the ufunc when it is finished.`
			`"""`
			`if axis is None:`
			`# Use the scipy.maxentropy version.`
			`return sp_logsumexp(a)`
			`a = np.asarray(a)`
			`shp = list(a.shape)`
			`shp[axis] = 1`
			`a_max = a.max(axis=axis)`
			`s = np.log(np.exp(a - a_max.reshape(shp)).sum(axis=axis))`
			`lse = a_max + s`
			`return lse`


			`def _isproperdist(X):`
			`"""`
			Checks to see if `X` is a proper probability distribution
			`"""`
			`X = np.asarray(X)`
			`if not np.allclose(np.sum(X), 1) or not np.all(X>=0) or not np.all(X<=1):`
			`return False`
			`else:`
			`return True`

			`def discretize(X, method="ef", nbins=None):`
			`"""`
			Discretize `X`

			`Parameters`
			`----------`
			`bins : int, optional`
			`Number of bins. Default is floor(sqrt(N))`
			`method : str`
			`"ef" is equal-frequency binning`
			`"ew" is equal-width binning`

			`Examples`
			`--------`
			`"""`
			`nobs = len(X)`
			`if nbins is None:`
			`nbins = np.floor(np.sqrt(nobs))`
			`if method == "ef":`
			`discrete = np.ceil(nbins * stats.rankdata(X)/nobs)`
			`if method == "ew":`
			`width = np.max(X) - np.min(X)`
			`width = np.floor(width/nbins)`
			`svec, ivec = stats.fastsort(X)`
			`discrete = np.zeros(nobs)`
			`binnum = 1`
			`base = svec[0]`
			`discrete[ivec[0]] = binnum`
			`for i in range(1,nobs):`
			`if svec[i] < base + width:`
			`discrete[ivec[i]] = binnum`
			`else:`
			`base = svec[i]`
			`binnum += 1`
			`discrete[ivec[i]] = binnum`
			`return discrete`
			`#TODO: looks okay but needs more robust tests for corner cases`



			`def logbasechange(a,b):`
			`"""`
			`There is a one-to-one transformation of the entropy value from`
			`a log base b to a log base a :`

			`H_{b}(X)=log_{b}(a)[H_{a}(X)]`

			`Returns`
			`-------`
			`log_{b}(a)`
			`"""`
			`return np.log(b)/np.log(a)`

			`def natstobits(X):`
			`"""`
			`Converts from nats to bits`
			`"""`
			`return logbasechange(np.e, 2) * X`

			`def bitstonats(X):`
			`"""`
			`Converts from bits to nats`
			`"""`
			`return logbasechange(2, np.e) * X`

			`#TODO: make this entropy, and then have different measures as`
			`#a method`
			`def shannonentropy(px, logbase=2):`
			`"""`
			`This is Shannon's entropy`

			`Parameters`
			`----------`
			`logbase, int or np.e`
			`The base of the log`
			`px : 1d or 2d array_like`
			`Can be a discrete probability distribution, a 2d joint distribution,`
			`or a sequence of probabilities.`

			`Returns`
			`-----`
			`For log base 2 (bits) given a discrete distribution`
			`H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]`

			`For log base 2 (bits) given a joint distribution`
			`H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})`

			`Notes`
			`-----`
			`shannonentropy(0) is defined as 0`
			`"""`
			`#TODO: have not defined the px,py case?`
			`px = np.asarray(px)`
			`if not np.all(px <= 1) or not np.all(px >= 0):`
			`raise ValueError("px does not define proper distribution")`
			`entropy = -np.sum(np.nan_to_num(px*np.log2(px)))`
			`if logbase != 2:`
			`return logbasechange(2,logbase) * entropy`
			`else:`
			`return entropy`

			`# Shannon's information content`
			`def shannoninfo(px, logbase=2):`
			`"""`
			`Shannon's information`

			`Parameters`
			`----------`
			`px : float or array_like`
			`px` is a discrete probability distribution

			`Returns`
			`-------`
			`For logbase = 2`
			`np.log2(px)`
			`"""`
			`px = np.asarray(px)`
			`if not np.all(px <= 1) or not np.all(px >= 0):`
			`raise ValueError("px does not define proper distribution")`
			`if logbase != 2:`
			`return - logbasechange(2,logbase) * np.log2(px)`
			`else:`
			`return - np.log2(px)`

			`def condentropy(px, py, pxpy=None, logbase=2):`
			`"""`
			`Return the conditional entropy of X given Y.`

			`Parameters`
			`----------`
			`px : array_like`
			`py : array_like`
			`pxpy : array_like, optional`
			`If pxpy is None, the distributions are assumed to be independent`
			`and conendtropy(px,py) = shannonentropy(px)`
			`logbase : int or np.e`

			`Returns`
			`-------`
			`sum_{kj}log(q_{j}/w_{kj}`

			`where q_{j} = Y[j]`
			`and w_kj = X[k,j]`
			`"""`
			`if not _isproperdist(px) or not _isproperdist(py):`
			`raise ValueError("px or py is not a proper probability distribution")`
			`if pxpy is not None and not _isproperdist(pxpy):`
			`raise ValueError("pxpy is not a proper joint distribtion")`
			`if pxpy is None:`
			`pxpy = np.outer(py,px)`
			`condent = np.sum(pxpy * np.nan_to_num(np.log2(py/pxpy)))`
			`if logbase == 2:`
			`return condent`
			`else:`
			`return logbasechange(2, logbase) * condent`

			`def mutualinfo(px,py,pxpy, logbase=2):`
			`"""`
			`Returns the mutual information between X and Y.`

			`Parameters`
			`----------`
			`px : array_like`
			`Discrete probability distribution of random variable X`
			`py : array_like`
			`Discrete probability distribution of random variable Y`
			`pxpy : 2d array_like`
			`The joint probability distribution of random variables X and Y.`
			`Note that if X and Y are independent then the mutual information`
			`is zero.`
			`logbase : int or np.e, optional`
			`Default is 2 (bits)`

			`Returns`
			`-------`
			`shannonentropy(px) - condentropy(px,py,pxpy)`
			`"""`
			`if not _isproperdist(px) or not _isproperdist(py):`
			`raise ValueError("px or py is not a proper probability distribution")`
			`if pxpy is not None and not _isproperdist(pxpy):`
			`raise ValueError("pxpy is not a proper joint distribtion")`
			`if pxpy is None:`
			`pxpy = np.outer(py,px)`
			`return shannonentropy(px, logbase=logbase) - condentropy(px,py,pxpy,`
			`logbase=logbase)`

			`def corrent(px,py,pxpy,logbase=2):`
			`"""`
			`An information theoretic correlation measure.`

			`Reflects linear and nonlinear correlation between two random variables`
			`X and Y, characterized by the discrete probability distributions px and py`
			`respectively.`

			`Parameters`
			`----------`
			`px : array_like`
			`Discrete probability distribution of random variable X`
			`py : array_like`
			`Discrete probability distribution of random variable Y`
			`pxpy : 2d array_like, optional`
			`Joint probability distribution of X and Y. If pxpy is None, X and Y`
			`are assumed to be independent.`
			`logbase : int or np.e, optional`
			`Default is 2 (bits)`

			`Returns`
			`-------`
			`mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,logbase=logbase)`

			`Notes`
			`-----`
			`This is also equivalent to`

			`corrent(px,py,pxpy) = 1 - condent(px,py,pxpy)/shannonentropy(py)`
			`"""`
			`if not _isproperdist(px) or not _isproperdist(py):`
			`raise ValueError("px or py is not a proper probability distribution")`
			`if pxpy is not None and not _isproperdist(pxpy):`
			`raise ValueError("pxpy is not a proper joint distribtion")`
			`if pxpy is None:`
			`pxpy = np.outer(py,px)`

			`return mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,`
			`logbase=logbase)`

			`def covent(px,py,pxpy,logbase=2):`
			`"""`
			`An information theoretic covariance measure.`

			`Reflects linear and nonlinear correlation between two random variables`
			`X and Y, characterized by the discrete probability distributions px and py`
			`respectively.`

			`Parameters`
			`----------`
			`px : array_like`
			`Discrete probability distribution of random variable X`
			`py : array_like`
			`Discrete probability distribution of random variable Y`
			`pxpy : 2d array_like, optional`
			`Joint probability distribution of X and Y. If pxpy is None, X and Y`
			`are assumed to be independent.`
			`logbase : int or np.e, optional`
			`Default is 2 (bits)`

			`Returns`
			`-------`
			`condent(px,py,pxpy,logbase=logbase) + condent(py,px,pxpy,`
			`logbase=logbase)`

			`Notes`
			`-----`
			`This is also equivalent to`

			`covent(px,py,pxpy) = condent(px,py,pxpy) + condent(py,px,pxpy)`
			`"""`
			`if not _isproperdist(px) or not _isproperdist(py):`
			`raise ValueError("px or py is not a proper probability distribution")`
			`if pxpy is not None and not _isproperdist(pxpy):`
			`raise ValueError("pxpy is not a proper joint distribtion")`
			`if pxpy is None:`
			`pxpy = np.outer(py,px)`

			# FIXME: these should be `condentropy`, not `condent`
			`return (condent(px, py, pxpy, logbase=logbase) # noqa:F821 See GH#5756`
			`+ condent(py, px, pxpy, logbase=logbase)) # noqa:F821 See GH#5756`



			`#### Generalized Entropies ####`

			`def renyientropy(px,alpha=1,logbase=2,measure='R'):`
			`"""`
			`Renyi's generalized entropy`

			`Parameters`
			`----------`
			`px : array_like`
			`Discrete probability distribution of random variable X. Note that`
			`px is assumed to be a proper probability distribution.`
			`logbase : int or np.e, optional`
			`Default is 2 (bits)`
			`alpha : float or inf`
			`The order of the entropy. The default is 1, which in the limit`
			`is just Shannon's entropy. 2 is Renyi (Collision) entropy. If`
			`the string "inf" or numpy.inf is specified the min-entropy is returned.`
			`measure : str, optional`
			`The type of entropy measure desired. 'R' returns Renyi entropy`
			`measure. 'T' returns the Tsallis entropy measure.`

			`Returns`
			`-------`
			`1/(1-alpha)log(sum(px*alpha))`

			`In the limit as alpha -> 1, Shannon's entropy is returned.`

			`In the limit as alpha -> inf, min-entropy is returned.`
			`"""`
			`#TODO:finish returns`
			`#TODO:add checks for measure`
			`if not _isproperdist(px):`
			`raise ValueError("px is not a proper probability distribution")`
			`alpha = float(alpha)`
			`if alpha == 1:`
			`genent = shannonentropy(px)`
			`if logbase != 2:`
			`return logbasechange(2, logbase) * genent`
			`return genent`
			`elif 'inf' in str(alpha).lower() or alpha == np.inf:`
			`return -np.log(np.max(px))`

			`# gets here if alpha != (1 or inf)`
			`px = px**alpha`
			`genent = np.log(px.sum())`
			`if logbase == 2:`
			`return 1/(1-alpha) * genent`
			`else:`
			`return 1/(1-alpha) * logbasechange(2, logbase) * genent`

			`#TODO: before completing this, need to rethink the organization of`
			`# (relative) entropy measures, ie., all put into one function`
			`# and have kwdargs, etc.?`
			`def gencrossentropy(px,py,pxpy,alpha=1,logbase=2, measure='T'):`
			`"""`
			`Generalized cross-entropy measures.`

			`Parameters`
			`----------`
			`px : array_like`
			`Discrete probability distribution of random variable X`
			`py : array_like`
			`Discrete probability distribution of random variable Y`
			`pxpy : 2d array_like, optional`
			`Joint probability distribution of X and Y. If pxpy is None, X and Y`
			`are assumed to be independent.`
			`logbase : int or np.e, optional`
			`Default is 2 (bits)`
			`measure : str, optional`
			`The measure is the type of generalized cross-entropy desired. 'T' is`
			`the cross-entropy version of the Tsallis measure. 'CR' is Cressie-Read`
			`measure.`
			`"""`


			`if __name__ == "__main__":`
			`print("From Golan (2008) \"Information and Entropy Econometrics -- A Review \`
			`and Synthesis")`
			`print("Table 3.1")`
			`# Examples from Golan (2008)`

			`X = [.2,.2,.2,.2,.2]`
			`Y = [.322,.072,.511,.091,.004]`

			`for i in X:`
			`print(shannoninfo(i))`
			`for i in Y:`
			`print(shannoninfo(i))`
			`print(shannonentropy(X))`
			`print(shannonentropy(Y))`

			`p = [1e-5,1e-4,.001,.01,.1,.15,.2,.25,.3,.35,.4,.45,.5]`

			`plt.subplot(111)`
			`plt.ylabel("Information")`
			`plt.xlabel("Probability")`
			`x = np.linspace(0,1,100001)`
			`plt.plot(x, shannoninfo(x))`
			`# plt.show()`

			`plt.subplot(111)`
			`plt.ylabel("Entropy")`
			`plt.xlabel("Probability")`
			`x = np.linspace(0,1,101)`
			`plt.plot(x, lmap(shannonentropy, lzip(x,1-x)))`
			`# plt.show()`

			`# define a joint probability distribution`
			`# from Golan (2008) table 3.3`
			`w = np.array([[0,0,1./3],[1/9.,1/9.,1/9.],[1/18.,1/9.,1/6.]])`
			`# table 3.4`
			`px = w.sum(0)`
			`py = w.sum(1)`
			`H_X = shannonentropy(px)`
			`H_Y = shannonentropy(py)`
			`H_XY = shannonentropy(w)`
			`H_XgivenY = condentropy(px,py,w)`
			`H_YgivenX = condentropy(py,px,w)`
			`# note that cross-entropy is not a distance measure as the following shows`
			`D_YX = logbasechange(2,np.e)*stats.entropy(px, py)`
			`D_XY = logbasechange(2,np.e)*stats.entropy(py, px)`
			`I_XY = mutualinfo(px,py,w)`
			`print("Table 3.3")`
			`print(H_X,H_Y, H_XY, H_XgivenY, H_YgivenX, D_YX, D_XY, I_XY)`

			`print("discretize functions")`
			`X=np.array([21.2,44.5,31.0,19.5,40.6,38.7,11.1,15.8,31.9,25.8,20.2,14.2,`
			`24.0,21.0,11.3,18.0,16.3,22.2,7.8,27.8,16.3,35.1,14.9,17.1,28.2,16.4,`
			`16.5,46.0,9.5,18.8,32.1,26.1,16.1,7.3,21.4,20.0,29.3,14.9,8.3,22.5,`
			`12.8,26.9,25.5,22.9,11.2,20.7,26.2,9.3,10.8,15.6])`
			`discX = discretize(X)`
			`#CF: R's infotheo`
			`#TODO: compare to pyentropy quantize?`
			`print`
			`print("Example in section 3.6 of Golan, using table 3.3")`
			`print("Bounding errors using Fano's inequality")`
			`print("H(P_{e}) + P_{e}log(K-1) >= H(X\|Y)")`
			`print("or, a weaker inequality")`
			`print("P_{e} >= [H(X\|Y) - 1]/log(K)")`
			`print("P(x) = %s" % px)`
			`print("X = 3 has the highest probability, so this is the estimate Xhat")`
			`pe = 1 - px[2]`
			`print("The probability of error Pe is 1 - p(X=3) = %0.4g" % pe)`
			`H_pe = shannonentropy([pe,1-pe])`
			`print("H(Pe) = %0.4g and K=3" % H_pe)`
			`print("H(Pe) + Pe*log(K-1) = %0.4g >= H(X\|Y) = %0.4g" % \`
			`(H_pe+pe*np.log2(2), H_XgivenY))`
			`print("or using the weaker inequality")`
			`print(f"Pe = {pe:0.4g} >= [H(X) - 1]/log(K) = {(H_X - 1)/np.log2(3):0.4g}")`
			`print("Consider now, table 3.5, where there is additional information")`
			`print("The conditional probabilities of P(X\|Y=y) are ")`
			`w2 = np.array([[0.,0.,1.],[1/3.,1/3.,1/3.],[1/6.,1/3.,1/2.]])`
			`print(w2)`
			`# not a proper distribution?`
			`print("The probability of error given this information is")`
			`print("Pe = [H(X\|Y) -1]/log(K) = %0.4g" % ((np.mean([0,shannonentropy(w2[1]),shannonentropy(w2[2])])-1)/np.log2(3)))`
			`print("such that more information lowers the error")`

			`### Stochastic processes`
			`markovchain = np.array([[.553,.284,.163],[.465,.312,.223],[.420,.322,.258]])`