AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/graphics/tests/test_mosaicplot.py

from statsmodels.compat.python import lrange

from io import BytesIO
from itertools import product

import numpy as np
from numpy.testing import assert_, assert_raises
import pandas as pd
import pytest

from statsmodels.api import datasets

# utilities for the tests

try:
    import matplotlib.pyplot as plt  # noqa:F401
except ImportError:
    pass

# other functions to be tested for accuracy
# the main drawing function
from statsmodels.graphics.mosaicplot import (
    _hierarchical_split,
    _key_splitting,
    _normalize_split,
    _reduce_dict,
    _split_rect,
    mosaic,
)


@pytest.mark.matplotlib
def test_data_conversion(close_figures):
    # It will not reorder the elements
    # so the dictionary will look odd
    # as it key order has the c and b
    # keys swapped
    import pandas
    _, ax = plt.subplots(4, 4)
    data = {'ax': 1, 'bx': 2, 'cx': 3}
    mosaic(data, ax=ax[0, 0], title='basic dict', axes_label=False)
    data = pandas.Series(data)
    mosaic(data, ax=ax[0, 1], title='basic series', axes_label=False)
    data = [1, 2, 3]
    mosaic(data, ax=ax[0, 2], title='basic list', axes_label=False)
    data = np.asarray(data)
    mosaic(data, ax=ax[0, 3], title='basic array', axes_label=False)
    plt.close("all")

    data = {('ax', 'cx'): 1, ('bx', 'cx'): 2, ('ax', 'dx'): 3, ('bx', 'dx'): 4}
    mosaic(data, ax=ax[1, 0], title='compound dict', axes_label=False)
    mosaic(data, ax=ax[2, 0], title='inverted keys dict', index=[1, 0], axes_label=False)
    data = pandas.Series(data)
    mosaic(data, ax=ax[1, 1], title='compound series', axes_label=False)
    mosaic(data, ax=ax[2, 1], title='inverted keys series', index=[1, 0])
    data = [[1, 2], [3, 4]]
    mosaic(data, ax=ax[1, 2], title='compound list', axes_label=False)
    mosaic(data, ax=ax[2, 2], title='inverted keys list', index=[1, 0])
    data = np.array([[1, 2], [3, 4]])
    mosaic(data, ax=ax[1, 3], title='compound array', axes_label=False)
    mosaic(data, ax=ax[2, 3], title='inverted keys array', index=[1, 0], axes_label=False)
    plt.close("all")

    gender = ['male', 'male', 'male', 'female', 'female', 'female']
    pet = ['cat', 'dog', 'dog', 'cat', 'dog', 'cat']
    data = pandas.DataFrame({'gender': gender, 'pet': pet})
    mosaic(data, ['gender'], ax=ax[3, 0], title='dataframe by key 1', axes_label=False)
    mosaic(data, ['pet'], ax=ax[3, 1], title='dataframe by key 2', axes_label=False)
    mosaic(data, ['gender', 'pet'], ax=ax[3, 2], title='both keys', axes_label=False)
    mosaic(data, ['pet', 'gender'], ax=ax[3, 3], title='keys inverted', axes_label=False)
    plt.close("all")
    plt.suptitle('testing data conversion (plot 1 of 4)')


@pytest.mark.matplotlib
def test_mosaic_simple(close_figures):
    # display a simple plot of 4 categories of data, splitted in four
    # levels with increasing size for each group
    # creation of the levels
    key_set = (['male', 'female'], ['old', 'adult', 'young'],
               ['worker', 'unemployed'], ['healty', 'ill'])
    # the cartesian product of all the categories is
    # the complete set of categories
    keys = list(product(*key_set))
    data = dict(zip(keys, range(1, 1 + len(keys))))
    # which colours should I use for the various categories?
    # put it into a dict
    props = {}
    #males and females in blue and red
    props[('male',)] = {'color': 'b'}
    props[('female',)] = {'color': 'r'}
    # all the groups corresponding to ill groups have a different color
    for key in keys:
        if 'ill' in key:
            if 'male' in key:
                props[key] = {'color': 'BlueViolet' , 'hatch': '+'}
            else:
                props[key] = {'color': 'Crimson' , 'hatch': '+'}
    # mosaic of the data, with given gaps and colors
    mosaic(data, gap=0.05, properties=props, axes_label=False)
    plt.suptitle('syntetic data, 4 categories (plot 2 of 4)')


@pytest.mark.matplotlib
def test_mosaic(close_figures):
    # make the same analysis on a known dataset

    # load the data and clean it a bit
    affairs = datasets.fair.load_pandas()
    datas = affairs.exog
    # any time greater than 0 is cheating
    datas['cheated'] = affairs.endog > 0
    # sort by the marriage quality and give meaningful name
    # [rate_marriage, age, yrs_married, children,
    # religious, educ, occupation, occupation_husb]
    datas = datas.sort_values(['rate_marriage', 'religious'])

    num_to_desc = {1: 'awful', 2: 'bad', 3: 'intermediate',
                   4: 'good', 5: 'wonderful'}
    datas['rate_marriage'] = datas['rate_marriage'].map(num_to_desc)
    num_to_faith = {1: 'non religious', 2: 'poorly religious', 3: 'religious',
                    4: 'very religious'}
    datas['religious'] = datas['religious'].map(num_to_faith)
    num_to_cheat = {False: 'faithful', True: 'cheated'}
    datas['cheated'] = datas['cheated'].map(num_to_cheat)
    # finished cleaning
    _, ax = plt.subplots(2, 2)
    mosaic(datas, ['rate_marriage', 'cheated'], ax=ax[0, 0],
           title='by marriage happiness')
    mosaic(datas, ['religious', 'cheated'], ax=ax[0, 1],
           title='by religiosity')
    mosaic(datas, ['rate_marriage', 'religious', 'cheated'], ax=ax[1, 0],
           title='by both', labelizer=lambda k:'')
    ax[1, 0].set_xlabel('marriage rating')
    ax[1, 0].set_ylabel('religion status')
    mosaic(datas, ['religious', 'rate_marriage'], ax=ax[1, 1],
           title='inter-dependence', axes_label=False)
    plt.suptitle("extramarital affairs (plot 3 of 4)")


@pytest.mark.matplotlib
def test_mosaic_very_complex(close_figures):
    # make a scattermatrix of mosaic plots to show the correlations between
    # each pair of variable in a dataset. Could be easily converted into a
    # new function that does this automatically based on the type of data
    key_name = ['gender', 'age', 'health', 'work']
    key_base = (['male', 'female'], ['old', 'young'],
                ['healty', 'ill'], ['work', 'unemployed'])
    keys = list(product(*key_base))
    data = dict(zip(keys, range(1, 1 + len(keys))))
    props = {}
    props[('male', 'old')] = {'color': 'r'}
    props[('female',)] = {'color': 'pink'}
    L = len(key_base)
    _, axes = plt.subplots(L, L)
    for i in range(L):
        for j in range(L):
            m = set(range(L)).difference({i, j})
            if i == j:
                axes[i, i].text(0.5, 0.5, key_name[i],
                                ha='center', va='center')
                axes[i, i].set_xticks([])
                axes[i, i].set_xticklabels([])
                axes[i, i].set_yticks([])
                axes[i, i].set_yticklabels([])
            else:
                ji = max(i, j)
                ij = min(i, j)
                temp_data = {(k[ij], k[ji]) + tuple(k[r] for r in m): v
                                  for k, v in data.items()}

                keys = list(temp_data.keys())
                for k in keys:
                    value = _reduce_dict(temp_data, k[:2])
                    temp_data[k[:2]] = value
                    del temp_data[k]
                mosaic(temp_data, ax=axes[i, j], axes_label=False,
                       properties=props, gap=0.05, horizontal=i > j)
    plt.suptitle('old males should look bright red,  (plot 4 of 4)')


@pytest.mark.matplotlib
def test_axes_labeling(close_figures):
    from numpy.random import rand
    key_set = (['male', 'female'], ['old', 'adult', 'young'],
               ['worker', 'unemployed'], ['yes', 'no'])
    # the cartesian product of all the categories is
    # the complete set of categories
    keys = list(product(*key_set))
    data = dict(zip(keys, rand(len(keys))))
    lab = lambda k: ''.join(s[0] for s in k)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    mosaic(data, ax=ax1, labelizer=lab, horizontal=True, label_rotation=45)
    mosaic(data, ax=ax2, labelizer=lab, horizontal=False,
           label_rotation=[0, 45, 90, 0])
    #fig.tight_layout()
    fig.suptitle("correct alignment of the axes labels")


@pytest.mark.smoke
@pytest.mark.matplotlib
def test_mosaic_empty_cells(close_figures):
    # GH#2286
    import pandas as pd
    mydata = pd.DataFrame({'id2': {64: 'Angelica',
                                   65: 'DXW_UID', 66: 'casuid01',
                                   67: 'casuid01', 68: 'EC93_uid',
                                   69: 'EC93_uid', 70: 'EC93_uid',
                                   60: 'DXW_UID',  61: 'AtmosFox',
                                   62: 'DXW_UID', 63: 'DXW_UID'},
                           'id1': {64: 'TGP',
                                   65: 'Retention01', 66: 'default',
                                   67: 'default', 68: 'Musa_EC_9_3',
                                   69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',
                                   60: 'default', 61: 'default',
                                   62: 'default', 63: 'default'}})

    ct = pd.crosstab(mydata.id1, mydata.id2)
    _, vals = mosaic(ct.T.unstack())
    _, vals = mosaic(mydata, ['id1','id2'])


eq = lambda x, y: assert_(np.allclose(x, y))


def test_recursive_split():
    keys = list(product('mf'))
    data = dict(zip(keys, [1] * len(keys)))
    res = _hierarchical_split(data, gap=0)
    assert_(list(res.keys()) == keys)
    res[('m',)] = (0.0, 0.0, 0.5, 1.0)
    res[('f',)] = (0.5, 0.0, 0.5, 1.0)
    keys = list(product('mf', 'yao'))
    data = dict(zip(keys, [1] * len(keys)))
    res = _hierarchical_split(data, gap=0)
    assert_(list(res.keys()) == keys)
    res[('m', 'y')] = (0.0, 0.0, 0.5, 1 / 3)
    res[('m', 'a')] = (0.0, 1 / 3, 0.5, 1 / 3)
    res[('m', 'o')] = (0.0, 2 / 3, 0.5, 1 / 3)
    res[('f', 'y')] = (0.5, 0.0, 0.5, 1 / 3)
    res[('f', 'a')] = (0.5, 1 / 3, 0.5, 1 / 3)
    res[('f', 'o')] = (0.5, 2 / 3, 0.5, 1 / 3)


def test__reduce_dict():
    data = dict(zip(list(product('mf', 'oy', 'wn')), [1] * 8))
    eq(_reduce_dict(data, ('m',)), 4)
    eq(_reduce_dict(data, ('m', 'o')), 2)
    eq(_reduce_dict(data, ('m', 'o', 'w')), 1)
    data = dict(zip(list(product('mf', 'oy', 'wn')), lrange(8)))
    eq(_reduce_dict(data, ('m',)), 6)
    eq(_reduce_dict(data, ('m', 'o')), 1)
    eq(_reduce_dict(data, ('m', 'o', 'w')), 0)


def test__key_splitting():
    # subdivide starting with an empty tuple
    base_rect = {tuple(): (0, 0, 1, 1)}
    res = _key_splitting(base_rect, ['a', 'b'], [1, 1], tuple(), True, 0)
    assert_(list(res.keys()) == [('a',), ('b',)])
    eq(res[('a',)], (0, 0, 0.5, 1))
    eq(res[('b',)], (0.5, 0, 0.5, 1))
    # subdivide a in two sublevel
    res_bis = _key_splitting(res, ['c', 'd'], [1, 1], ('a',), False, 0)
    assert_(list(res_bis.keys()) == [('a', 'c'), ('a', 'd'), ('b',)])
    eq(res_bis[('a', 'c')], (0.0, 0.0, 0.5, 0.5))
    eq(res_bis[('a', 'd')], (0.0, 0.5, 0.5, 0.5))
    eq(res_bis[('b',)], (0.5, 0, 0.5, 1))
    # starting with a non empty tuple and uneven distribution
    base_rect = {('total',): (0, 0, 1, 1)}
    res = _key_splitting(base_rect, ['a', 'b'], [1, 2], ('total',), True, 0)
    assert_(list(res.keys()) == [('total',) + (e,) for e in ['a', 'b']])
    eq(res[('total', 'a')], (0, 0, 1 / 3, 1))
    eq(res[('total', 'b')], (1 / 3, 0, 2 / 3, 1))


def test_proportion_normalization():
    # extremes should give the whole set, as well
    # as if 0 is inserted
    eq(_normalize_split(0.), [0.0, 0.0, 1.0])
    eq(_normalize_split(1.), [0.0, 1.0, 1.0])
    eq(_normalize_split(2.), [0.0, 1.0, 1.0])
    # negative values should raise ValueError
    assert_raises(ValueError, _normalize_split, -1)
    assert_raises(ValueError, _normalize_split, [1., -1])
    assert_raises(ValueError, _normalize_split, [1., -1, 0.])
    # if everything is zero it will complain
    assert_raises(ValueError, _normalize_split, [0.])
    assert_raises(ValueError, _normalize_split, [0., 0.])
    # one-element array should return the whole interval
    eq(_normalize_split([0.5]), [0.0, 1.0])
    eq(_normalize_split([1.]), [0.0, 1.0])
    eq(_normalize_split([2.]), [0.0, 1.0])
    # simple division should give two pieces
    for x in [0.3, 0.5, 0.9]:
        eq(_normalize_split(x), [0., x, 1.0])
    # multiple division should split as the sum of the components
    for x, y in [(0.25, 0.5), (0.1, 0.8), (10., 30.)]:
        eq(_normalize_split([x, y]), [0., x / (x + y), 1.0])
    for x, y, z in [(1., 1., 1.), (0.1, 0.5, 0.7), (10., 30., 40)]:
        eq(_normalize_split(
            [x, y, z]), [0., x / (x + y + z), (x + y) / (x + y + z), 1.0])


def test_false_split():
    # if you ask it to be divided in only one piece, just return the original
    # one
    pure_square = [0., 0., 1., 1.]
    conf_h = dict(proportion=[1], gap=0.0, horizontal=True)
    conf_v = dict(proportion=[1], gap=0.0, horizontal=False)
    eq(_split_rect(*pure_square, **conf_h), pure_square)
    eq(_split_rect(*pure_square, **conf_v), pure_square)
    conf_h = dict(proportion=[1], gap=0.5, horizontal=True)
    conf_v = dict(proportion=[1], gap=0.5, horizontal=False)
    eq(_split_rect(*pure_square, **conf_h), pure_square)
    eq(_split_rect(*pure_square, **conf_v), pure_square)

    # identity on a void rectangle should not give anything strange
    null_square = [0., 0., 0., 0.]
    conf = dict(proportion=[1], gap=0.0, horizontal=True)
    eq(_split_rect(*null_square, **conf), null_square)
    conf = dict(proportion=[1], gap=1.0, horizontal=True)
    eq(_split_rect(*null_square, **conf), null_square)

    # splitting a negative rectangle should raise error
    neg_square = [0., 0., -1., 0.]
    conf = dict(proportion=[1], gap=0.0, horizontal=True)
    assert_raises(ValueError, _split_rect, *neg_square, **conf)
    conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)
    assert_raises(ValueError, _split_rect, *neg_square, **conf)
    conf = dict(proportion=[1], gap=0.5, horizontal=True)
    assert_raises(ValueError, _split_rect, *neg_square, **conf)
    conf = dict(proportion=[1, 1], gap=0.5, horizontal=True)
    assert_raises(ValueError, _split_rect, *neg_square, **conf)


def test_rect_pure_split():
    pure_square = [0., 0., 1., 1.]
    # division in two equal pieces from the perfect square
    h_2split = [(0.0, 0.0, 0.5, 1.0), (0.5, 0.0, 0.5, 1.0)]
    conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)

    v_2split = [(0.0, 0.0, 1.0, 0.5), (0.0, 0.5, 1.0, 0.5)]
    conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)
    eq(_split_rect(*pure_square, **conf_v), v_2split)

    # division in two non-equal pieces from the perfect square
    h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 2 / 3, 1.0)]
    conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)

    v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 2 / 3)]
    conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)
    eq(_split_rect(*pure_square, **conf_v), v_2split)

    # division in three equal pieces from the perfect square
    h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 1 / 3, 1.0), (2 / 3, 0.0,
                 1 / 3, 1.0)]
    conf_h = dict(proportion=[1, 1, 1], gap=0.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)

    v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 1 / 3), (0.0, 2 / 3,
                 1.0, 1 / 3)]
    conf_v = dict(proportion=[1, 1, 1], gap=0.0, horizontal=False)
    eq(_split_rect(*pure_square, **conf_v), v_2split)

    # division in three non-equal pieces from the perfect square
    h_2split = [(0.0, 0.0, 1 / 4, 1.0), (1 / 4, 0.0, 1 / 2, 1.0), (3 / 4, 0.0,
                 1 / 4, 1.0)]
    conf_h = dict(proportion=[1, 2, 1], gap=0.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)

    v_2split = [(0.0, 0.0, 1.0, 1 / 4), (0.0, 1 / 4, 1.0, 1 / 2), (0.0, 3 / 4,
                 1.0, 1 / 4)]
    conf_v = dict(proportion=[1, 2, 1], gap=0.0, horizontal=False)
    eq(_split_rect(*pure_square, **conf_v), v_2split)

    # splitting on a void rectangle should give multiple void
    null_square = [0., 0., 0., 0.]
    conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)
    eq(_split_rect(*null_square, **conf), [null_square, null_square])
    conf = dict(proportion=[1, 2], gap=1.0, horizontal=True)
    eq(_split_rect(*null_square, **conf), [null_square, null_square])


def test_rect_deformed_split():
    non_pure_square = [1., -1., 1., 0.5]
    # division in two equal pieces from the perfect square
    h_2split = [(1.0, -1.0, 0.5, 0.5), (1.5, -1.0, 0.5, 0.5)]
    conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)
    eq(_split_rect(*non_pure_square, **conf_h), h_2split)

    v_2split = [(1.0, -1.0, 1.0, 0.25), (1.0, -0.75, 1.0, 0.25)]
    conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)
    eq(_split_rect(*non_pure_square, **conf_v), v_2split)

    # division in two non-equal pieces from the perfect square
    h_2split = [(1.0, -1.0, 1 / 3, 0.5), (1 + 1 / 3, -1.0, 2 / 3, 0.5)]
    conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)
    eq(_split_rect(*non_pure_square, **conf_h), h_2split)

    v_2split = [(1.0, -1.0, 1.0, 1 / 6), (1.0, 1 / 6 - 1, 1.0, 2 / 6)]
    conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)
    eq(_split_rect(*non_pure_square, **conf_v), v_2split)


def test_gap_split():
    pure_square = [0., 0., 1., 1.]

    # null split
    conf_h = dict(proportion=[1], gap=1.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), pure_square)

    # equal split
    h_2split = [(0.0, 0.0, 0.25, 1.0), (0.75, 0.0, 0.25, 1.0)]
    conf_h = dict(proportion=[1, 1], gap=1.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)

    # disequal split
    h_2split = [(0.0, 0.0, 1 / 6, 1.0), (0.5 + 1 / 6, 0.0, 1 / 3, 1.0)]
    conf_h = dict(proportion=[1, 2], gap=1.0, horizontal=True)
    eq(_split_rect(*pure_square, **conf_h), h_2split)


@pytest.mark.matplotlib
def test_default_arg_index(close_figures):
    # 2116
    df = pd.DataFrame({'size' : ['small', 'large', 'large', 'small', 'large',
                                 'small'],
                       'length' : ['long', 'short', 'short', 'long', 'long',
                                   'short']})
    assert_raises(ValueError, mosaic, data=df, title='foobar')


@pytest.mark.matplotlib
def test_missing_category(close_figures):
    # GH5639
    animal = ['dog', 'dog', 'dog', 'cat', 'dog', 'cat', 'cat',
              'dog', 'dog', 'cat']
    size = ['medium', 'large', 'medium', 'medium', 'medium', 'medium',
            'large', 'large', 'large', 'small']
    testdata = pd.DataFrame({'animal': animal, 'size': size})
    testdata['size'] = pd.Categorical(testdata['size'],
                                      categories=['small', 'medium', 'large'])
    testdata = testdata.sort_values('size')
    fig, _ = mosaic(testdata, ['animal', 'size'])
    bio = BytesIO()
    fig.savefig(bio, format='png')
lab 1 is done 2024-10-02 22:15:59 +04:00			`from statsmodels.compat.python import lrange`

			`from io import BytesIO`
			`from itertools import product`

			`import numpy as np`
			`from numpy.testing import assert_, assert_raises`
			`import pandas as pd`
			`import pytest`

			`from statsmodels.api import datasets`

			`# utilities for the tests`

			`try:`
			`import matplotlib.pyplot as plt # noqa:F401`
			`except ImportError:`
			`pass`

			`# other functions to be tested for accuracy`
			`# the main drawing function`
			`from statsmodels.graphics.mosaicplot import (`
			`_hierarchical_split,`
			`_key_splitting,`
			`_normalize_split,`
			`_reduce_dict,`
			`_split_rect,`
			`mosaic,`
			`)`


			`@pytest.mark.matplotlib`
			`def test_data_conversion(close_figures):`
			`# It will not reorder the elements`
			`# so the dictionary will look odd`
			`# as it key order has the c and b`
			`# keys swapped`
			`import pandas`
			`_, ax = plt.subplots(4, 4)`
			`data = {'ax': 1, 'bx': 2, 'cx': 3}`
			`mosaic(data, ax=ax[0, 0], title='basic dict', axes_label=False)`
			`data = pandas.Series(data)`
			`mosaic(data, ax=ax[0, 1], title='basic series', axes_label=False)`
			`data = [1, 2, 3]`
			`mosaic(data, ax=ax[0, 2], title='basic list', axes_label=False)`
			`data = np.asarray(data)`
			`mosaic(data, ax=ax[0, 3], title='basic array', axes_label=False)`
			`plt.close("all")`

			`data = {('ax', 'cx'): 1, ('bx', 'cx'): 2, ('ax', 'dx'): 3, ('bx', 'dx'): 4}`
			`mosaic(data, ax=ax[1, 0], title='compound dict', axes_label=False)`
			`mosaic(data, ax=ax[2, 0], title='inverted keys dict', index=[1, 0], axes_label=False)`
			`data = pandas.Series(data)`
			`mosaic(data, ax=ax[1, 1], title='compound series', axes_label=False)`
			`mosaic(data, ax=ax[2, 1], title='inverted keys series', index=[1, 0])`
			`data = [[1, 2], [3, 4]]`
			`mosaic(data, ax=ax[1, 2], title='compound list', axes_label=False)`
			`mosaic(data, ax=ax[2, 2], title='inverted keys list', index=[1, 0])`
			`data = np.array([[1, 2], [3, 4]])`
			`mosaic(data, ax=ax[1, 3], title='compound array', axes_label=False)`
			`mosaic(data, ax=ax[2, 3], title='inverted keys array', index=[1, 0], axes_label=False)`
			`plt.close("all")`

			`gender = ['male', 'male', 'male', 'female', 'female', 'female']`
			`pet = ['cat', 'dog', 'dog', 'cat', 'dog', 'cat']`
			`data = pandas.DataFrame({'gender': gender, 'pet': pet})`
			`mosaic(data, ['gender'], ax=ax[3, 0], title='dataframe by key 1', axes_label=False)`
			`mosaic(data, ['pet'], ax=ax[3, 1], title='dataframe by key 2', axes_label=False)`
			`mosaic(data, ['gender', 'pet'], ax=ax[3, 2], title='both keys', axes_label=False)`
			`mosaic(data, ['pet', 'gender'], ax=ax[3, 3], title='keys inverted', axes_label=False)`
			`plt.close("all")`
			`plt.suptitle('testing data conversion (plot 1 of 4)')`


			`@pytest.mark.matplotlib`
			`def test_mosaic_simple(close_figures):`
			`# display a simple plot of 4 categories of data, splitted in four`
			`# levels with increasing size for each group`
			`# creation of the levels`
			`key_set = (['male', 'female'], ['old', 'adult', 'young'],`
			`['worker', 'unemployed'], ['healty', 'ill'])`
			`# the cartesian product of all the categories is`
			`# the complete set of categories`
			`keys = list(product(*key_set))`
			`data = dict(zip(keys, range(1, 1 + len(keys))))`
			`# which colours should I use for the various categories?`
			`# put it into a dict`
			`props = {}`
			`#males and females in blue and red`
			`props[('male',)] = {'color': 'b'}`
			`props[('female',)] = {'color': 'r'}`
			`# all the groups corresponding to ill groups have a different color`
			`for key in keys:`
			`if 'ill' in key:`
			`if 'male' in key:`
			`props[key] = {'color': 'BlueViolet' , 'hatch': '+'}`
			`else:`
			`props[key] = {'color': 'Crimson' , 'hatch': '+'}`
			`# mosaic of the data, with given gaps and colors`
			`mosaic(data, gap=0.05, properties=props, axes_label=False)`
			`plt.suptitle('syntetic data, 4 categories (plot 2 of 4)')`


			`@pytest.mark.matplotlib`
			`def test_mosaic(close_figures):`
			`# make the same analysis on a known dataset`

			`# load the data and clean it a bit`
			`affairs = datasets.fair.load_pandas()`
			`datas = affairs.exog`
			`# any time greater than 0 is cheating`
			`datas['cheated'] = affairs.endog > 0`
			`# sort by the marriage quality and give meaningful name`
			`# [rate_marriage, age, yrs_married, children,`
			`# religious, educ, occupation, occupation_husb]`
			`datas = datas.sort_values(['rate_marriage', 'religious'])`

			`num_to_desc = {1: 'awful', 2: 'bad', 3: 'intermediate',`
			`4: 'good', 5: 'wonderful'}`
			`datas['rate_marriage'] = datas['rate_marriage'].map(num_to_desc)`
			`num_to_faith = {1: 'non religious', 2: 'poorly religious', 3: 'religious',`
			`4: 'very religious'}`
			`datas['religious'] = datas['religious'].map(num_to_faith)`
			`num_to_cheat = {False: 'faithful', True: 'cheated'}`
			`datas['cheated'] = datas['cheated'].map(num_to_cheat)`
			`# finished cleaning`
			`_, ax = plt.subplots(2, 2)`
			`mosaic(datas, ['rate_marriage', 'cheated'], ax=ax[0, 0],`
			`title='by marriage happiness')`
			`mosaic(datas, ['religious', 'cheated'], ax=ax[0, 1],`
			`title='by religiosity')`
			`mosaic(datas, ['rate_marriage', 'religious', 'cheated'], ax=ax[1, 0],`
			`title='by both', labelizer=lambda k:'')`
			`ax[1, 0].set_xlabel('marriage rating')`
			`ax[1, 0].set_ylabel('religion status')`
			`mosaic(datas, ['religious', 'rate_marriage'], ax=ax[1, 1],`
			`title='inter-dependence', axes_label=False)`
			`plt.suptitle("extramarital affairs (plot 3 of 4)")`


			`@pytest.mark.matplotlib`
			`def test_mosaic_very_complex(close_figures):`
			`# make a scattermatrix of mosaic plots to show the correlations between`
			`# each pair of variable in a dataset. Could be easily converted into a`
			`# new function that does this automatically based on the type of data`
			`key_name = ['gender', 'age', 'health', 'work']`
			`key_base = (['male', 'female'], ['old', 'young'],`
			`['healty', 'ill'], ['work', 'unemployed'])`
			`keys = list(product(*key_base))`
			`data = dict(zip(keys, range(1, 1 + len(keys))))`
			`props = {}`
			`props[('male', 'old')] = {'color': 'r'}`
			`props[('female',)] = {'color': 'pink'}`
			`L = len(key_base)`
			`_, axes = plt.subplots(L, L)`
			`for i in range(L):`
			`for j in range(L):`
			`m = set(range(L)).difference({i, j})`
			`if i == j:`
			`axes[i, i].text(0.5, 0.5, key_name[i],`
			`ha='center', va='center')`
			`axes[i, i].set_xticks([])`
			`axes[i, i].set_xticklabels([])`
			`axes[i, i].set_yticks([])`
			`axes[i, i].set_yticklabels([])`
			`else:`
			`ji = max(i, j)`
			`ij = min(i, j)`
			`temp_data = {(k[ij], k[ji]) + tuple(k[r] for r in m): v`
			`for k, v in data.items()}`

			`keys = list(temp_data.keys())`
			`for k in keys:`
			`value = _reduce_dict(temp_data, k[:2])`
			`temp_data[k[:2]] = value`
			`del temp_data[k]`
			`mosaic(temp_data, ax=axes[i, j], axes_label=False,`
			`properties=props, gap=0.05, horizontal=i > j)`
			`plt.suptitle('old males should look bright red, (plot 4 of 4)')`


			`@pytest.mark.matplotlib`
			`def test_axes_labeling(close_figures):`
			`from numpy.random import rand`
			`key_set = (['male', 'female'], ['old', 'adult', 'young'],`
			`['worker', 'unemployed'], ['yes', 'no'])`
			`# the cartesian product of all the categories is`
			`# the complete set of categories`
			`keys = list(product(*key_set))`
			`data = dict(zip(keys, rand(len(keys))))`
			`lab = lambda k: ''.join(s[0] for s in k)`
			`fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))`
			`mosaic(data, ax=ax1, labelizer=lab, horizontal=True, label_rotation=45)`
			`mosaic(data, ax=ax2, labelizer=lab, horizontal=False,`
			`label_rotation=[0, 45, 90, 0])`
			`#fig.tight_layout()`
			`fig.suptitle("correct alignment of the axes labels")`


			`@pytest.mark.smoke`
			`@pytest.mark.matplotlib`
			`def test_mosaic_empty_cells(close_figures):`
			`# GH#2286`
			`import pandas as pd`
			`mydata = pd.DataFrame({'id2': {64: 'Angelica',`
			`65: 'DXW_UID', 66: 'casuid01',`
			`67: 'casuid01', 68: 'EC93_uid',`
			`69: 'EC93_uid', 70: 'EC93_uid',`
			`60: 'DXW_UID', 61: 'AtmosFox',`
			`62: 'DXW_UID', 63: 'DXW_UID'},`
			`'id1': {64: 'TGP',`
			`65: 'Retention01', 66: 'default',`
			`67: 'default', 68: 'Musa_EC_9_3',`
			`69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',`
			`60: 'default', 61: 'default',`
			`62: 'default', 63: 'default'}})`

			`ct = pd.crosstab(mydata.id1, mydata.id2)`
			`_, vals = mosaic(ct.T.unstack())`
			`_, vals = mosaic(mydata, ['id1','id2'])`


			`eq = lambda x, y: assert_(np.allclose(x, y))`


			`def test_recursive_split():`
			`keys = list(product('mf'))`
			`data = dict(zip(keys, [1] * len(keys)))`
			`res = _hierarchical_split(data, gap=0)`
			`assert_(list(res.keys()) == keys)`
			`res[('m',)] = (0.0, 0.0, 0.5, 1.0)`
			`res[('f',)] = (0.5, 0.0, 0.5, 1.0)`
			`keys = list(product('mf', 'yao'))`
			`data = dict(zip(keys, [1] * len(keys)))`
			`res = _hierarchical_split(data, gap=0)`
			`assert_(list(res.keys()) == keys)`
			`res[('m', 'y')] = (0.0, 0.0, 0.5, 1 / 3)`
			`res[('m', 'a')] = (0.0, 1 / 3, 0.5, 1 / 3)`
			`res[('m', 'o')] = (0.0, 2 / 3, 0.5, 1 / 3)`
			`res[('f', 'y')] = (0.5, 0.0, 0.5, 1 / 3)`
			`res[('f', 'a')] = (0.5, 1 / 3, 0.5, 1 / 3)`
			`res[('f', 'o')] = (0.5, 2 / 3, 0.5, 1 / 3)`


			`def test__reduce_dict():`
			`data = dict(zip(list(product('mf', 'oy', 'wn')), [1] * 8))`
			`eq(_reduce_dict(data, ('m',)), 4)`
			`eq(_reduce_dict(data, ('m', 'o')), 2)`
			`eq(_reduce_dict(data, ('m', 'o', 'w')), 1)`
			`data = dict(zip(list(product('mf', 'oy', 'wn')), lrange(8)))`
			`eq(_reduce_dict(data, ('m',)), 6)`
			`eq(_reduce_dict(data, ('m', 'o')), 1)`
			`eq(_reduce_dict(data, ('m', 'o', 'w')), 0)`


			`def test__key_splitting():`
			`# subdivide starting with an empty tuple`
			`base_rect = {tuple(): (0, 0, 1, 1)}`
			`res = _key_splitting(base_rect, ['a', 'b'], [1, 1], tuple(), True, 0)`
			`assert_(list(res.keys()) == [('a',), ('b',)])`
			`eq(res[('a',)], (0, 0, 0.5, 1))`
			`eq(res[('b',)], (0.5, 0, 0.5, 1))`
			`# subdivide a in two sublevel`
			`res_bis = _key_splitting(res, ['c', 'd'], [1, 1], ('a',), False, 0)`
			`assert_(list(res_bis.keys()) == [('a', 'c'), ('a', 'd'), ('b',)])`
			`eq(res_bis[('a', 'c')], (0.0, 0.0, 0.5, 0.5))`
			`eq(res_bis[('a', 'd')], (0.0, 0.5, 0.5, 0.5))`
			`eq(res_bis[('b',)], (0.5, 0, 0.5, 1))`
			`# starting with a non empty tuple and uneven distribution`
			`base_rect = {('total',): (0, 0, 1, 1)}`
			`res = _key_splitting(base_rect, ['a', 'b'], [1, 2], ('total',), True, 0)`
			`assert_(list(res.keys()) == [('total',) + (e,) for e in ['a', 'b']])`
			`eq(res[('total', 'a')], (0, 0, 1 / 3, 1))`
			`eq(res[('total', 'b')], (1 / 3, 0, 2 / 3, 1))`


			`def test_proportion_normalization():`
			`# extremes should give the whole set, as well`
			`# as if 0 is inserted`
			`eq(_normalize_split(0.), [0.0, 0.0, 1.0])`
			`eq(_normalize_split(1.), [0.0, 1.0, 1.0])`
			`eq(_normalize_split(2.), [0.0, 1.0, 1.0])`
			`# negative values should raise ValueError`
			`assert_raises(ValueError, _normalize_split, -1)`
			`assert_raises(ValueError, _normalize_split, [1., -1])`
			`assert_raises(ValueError, _normalize_split, [1., -1, 0.])`
			`# if everything is zero it will complain`
			`assert_raises(ValueError, _normalize_split, [0.])`
			`assert_raises(ValueError, _normalize_split, [0., 0.])`
			`# one-element array should return the whole interval`
			`eq(_normalize_split([0.5]), [0.0, 1.0])`
			`eq(_normalize_split([1.]), [0.0, 1.0])`
			`eq(_normalize_split([2.]), [0.0, 1.0])`
			`# simple division should give two pieces`
			`for x in [0.3, 0.5, 0.9]:`
			`eq(_normalize_split(x), [0., x, 1.0])`
			`# multiple division should split as the sum of the components`
			`for x, y in [(0.25, 0.5), (0.1, 0.8), (10., 30.)]:`
			`eq(_normalize_split([x, y]), [0., x / (x + y), 1.0])`
			`for x, y, z in [(1., 1., 1.), (0.1, 0.5, 0.7), (10., 30., 40)]:`
			`eq(_normalize_split(`
			`[x, y, z]), [0., x / (x + y + z), (x + y) / (x + y + z), 1.0])`


			`def test_false_split():`
			`# if you ask it to be divided in only one piece, just return the original`
			`# one`
			`pure_square = [0., 0., 1., 1.]`
			`conf_h = dict(proportion=[1], gap=0.0, horizontal=True)`
			`conf_v = dict(proportion=[1], gap=0.0, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_h), pure_square)`
			`eq(_split_rect(pure_square, *conf_v), pure_square)`
			`conf_h = dict(proportion=[1], gap=0.5, horizontal=True)`
			`conf_v = dict(proportion=[1], gap=0.5, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_h), pure_square)`
			`eq(_split_rect(pure_square, *conf_v), pure_square)`

			`# identity on a void rectangle should not give anything strange`
			`null_square = [0., 0., 0., 0.]`
			`conf = dict(proportion=[1], gap=0.0, horizontal=True)`
			`eq(_split_rect(null_square, *conf), null_square)`
			`conf = dict(proportion=[1], gap=1.0, horizontal=True)`
			`eq(_split_rect(null_square, *conf), null_square)`

			`# splitting a negative rectangle should raise error`
			`neg_square = [0., 0., -1., 0.]`
			`conf = dict(proportion=[1], gap=0.0, horizontal=True)`
			`assert_raises(ValueError, _split_rect, neg_square, *conf)`
			`conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)`
			`assert_raises(ValueError, _split_rect, neg_square, *conf)`
			`conf = dict(proportion=[1], gap=0.5, horizontal=True)`
			`assert_raises(ValueError, _split_rect, neg_square, *conf)`
			`conf = dict(proportion=[1, 1], gap=0.5, horizontal=True)`
			`assert_raises(ValueError, _split_rect, neg_square, *conf)`


			`def test_rect_pure_split():`
			`pure_square = [0., 0., 1., 1.]`
			`# division in two equal pieces from the perfect square`
			`h_2split = [(0.0, 0.0, 0.5, 1.0), (0.5, 0.0, 0.5, 1.0)]`
			`conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`

			`v_2split = [(0.0, 0.0, 1.0, 0.5), (0.0, 0.5, 1.0, 0.5)]`
			`conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_v), v_2split)`

			`# division in two non-equal pieces from the perfect square`
			`h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 2 / 3, 1.0)]`
			`conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`

			`v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 2 / 3)]`
			`conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_v), v_2split)`

			`# division in three equal pieces from the perfect square`
			`h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 1 / 3, 1.0), (2 / 3, 0.0,`
			`1 / 3, 1.0)]`
			`conf_h = dict(proportion=[1, 1, 1], gap=0.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`

			`v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 1 / 3), (0.0, 2 / 3,`
			`1.0, 1 / 3)]`
			`conf_v = dict(proportion=[1, 1, 1], gap=0.0, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_v), v_2split)`

			`# division in three non-equal pieces from the perfect square`
			`h_2split = [(0.0, 0.0, 1 / 4, 1.0), (1 / 4, 0.0, 1 / 2, 1.0), (3 / 4, 0.0,`
			`1 / 4, 1.0)]`
			`conf_h = dict(proportion=[1, 2, 1], gap=0.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`

			`v_2split = [(0.0, 0.0, 1.0, 1 / 4), (0.0, 1 / 4, 1.0, 1 / 2), (0.0, 3 / 4,`
			`1.0, 1 / 4)]`
			`conf_v = dict(proportion=[1, 2, 1], gap=0.0, horizontal=False)`
			`eq(_split_rect(pure_square, *conf_v), v_2split)`

			`# splitting on a void rectangle should give multiple void`
			`null_square = [0., 0., 0., 0.]`
			`conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)`
			`eq(_split_rect(null_square, *conf), [null_square, null_square])`
			`conf = dict(proportion=[1, 2], gap=1.0, horizontal=True)`
			`eq(_split_rect(null_square, *conf), [null_square, null_square])`


			`def test_rect_deformed_split():`
			`non_pure_square = [1., -1., 1., 0.5]`
			`# division in two equal pieces from the perfect square`
			`h_2split = [(1.0, -1.0, 0.5, 0.5), (1.5, -1.0, 0.5, 0.5)]`
			`conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)`
			`eq(_split_rect(non_pure_square, *conf_h), h_2split)`

			`v_2split = [(1.0, -1.0, 1.0, 0.25), (1.0, -0.75, 1.0, 0.25)]`
			`conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)`
			`eq(_split_rect(non_pure_square, *conf_v), v_2split)`

			`# division in two non-equal pieces from the perfect square`
			`h_2split = [(1.0, -1.0, 1 / 3, 0.5), (1 + 1 / 3, -1.0, 2 / 3, 0.5)]`
			`conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)`
			`eq(_split_rect(non_pure_square, *conf_h), h_2split)`

			`v_2split = [(1.0, -1.0, 1.0, 1 / 6), (1.0, 1 / 6 - 1, 1.0, 2 / 6)]`
			`conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)`
			`eq(_split_rect(non_pure_square, *conf_v), v_2split)`


			`def test_gap_split():`
			`pure_square = [0., 0., 1., 1.]`

			`# null split`
			`conf_h = dict(proportion=[1], gap=1.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), pure_square)`

			`# equal split`
			`h_2split = [(0.0, 0.0, 0.25, 1.0), (0.75, 0.0, 0.25, 1.0)]`
			`conf_h = dict(proportion=[1, 1], gap=1.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`

			`# disequal split`
			`h_2split = [(0.0, 0.0, 1 / 6, 1.0), (0.5 + 1 / 6, 0.0, 1 / 3, 1.0)]`
			`conf_h = dict(proportion=[1, 2], gap=1.0, horizontal=True)`
			`eq(_split_rect(pure_square, *conf_h), h_2split)`


			`@pytest.mark.matplotlib`
			`def test_default_arg_index(close_figures):`
			`# 2116`
			`df = pd.DataFrame({'size' : ['small', 'large', 'large', 'small', 'large',`
			`'small'],`
			`'length' : ['long', 'short', 'short', 'long', 'long',`
			`'short']})`
			`assert_raises(ValueError, mosaic, data=df, title='foobar')`


			`@pytest.mark.matplotlib`
			`def test_missing_category(close_figures):`
			`# GH5639`
			`animal = ['dog', 'dog', 'dog', 'cat', 'dog', 'cat', 'cat',`
			`'dog', 'dog', 'cat']`
			`size = ['medium', 'large', 'medium', 'medium', 'medium', 'medium',`
			`'large', 'large', 'large', 'small']`
			`testdata = pd.DataFrame({'animal': animal, 'size': size})`
			`testdata['size'] = pd.Categorical(testdata['size'],`
			`categories=['small', 'medium', 'large'])`
			`testdata = testdata.sort_values('size')`
			`fig, _ = mosaic(testdata, ['animal', 'size'])`
			`bio = BytesIO()`
			`fig.savefig(bio, format='png')`