AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/sandbox/regression/try_treewalker.py

146 lines
4.3 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
'''Trying out tree structure for nested logit
sum is standing for likelihood calculations
should collect and aggregate likelihood contributions bottom up
'''
from statsmodels.compat.python import lrange
import numpy as np
tree = [[0,1],[[2,3],[4,5,6]],[7]]
#singleton/degenerate branch needs to be list
xb = 2*np.arange(8)
testxb = 1 #0
def branch(tree):
'''walking a tree bottom-up
'''
if not isinstance(tree[0], int): #assumes leaves are int for choice index
branchsum = 0
for b in tree:
branchsum += branch(b)
else:
print(tree)
print('final branch with', tree, sum(tree))
if testxb:
return sum(xb[tree])
else:
return sum(tree)
print('working on branch', tree, branchsum)
return branchsum
print(branch(tree))
#new version that also keeps track of branch name and allows V_j for a branch
# as in Greene, V_j + lamda * IV does not look the same as including the
# explanatory variables in leaf X_j, V_j is linear in X, IV is logsumexp of X,
testxb = 0#1#0
def branch2(tree):
'''walking a tree bottom-up based on dictionary
'''
if isinstance(tree, tuple): #assumes leaves are int for choice index
name, subtree = tree
print(name, data2[name])
print('subtree', subtree)
if testxb:
branchsum = data2[name]
else:
branchsum = name #0
for b in subtree:
#branchsum += branch2(b)
branchsum = branchsum + branch2(b)
else:
leavessum = sum(data2[bi] for bi in tree)
print('final branch with', tree, ''.join(tree), leavessum) #sum(tree)
if testxb:
return leavessum #sum(xb[tree])
else:
return ''.join(tree) #sum(tree)
print('working on branch', tree, branchsum)
return branchsum
tree = [[0,1],[[2,3],[4,5,6]],[7]]
tree2 = ('top',
[('B1',['a','b']),
('B2',
[('B21',['c', 'd']),
('B22',['e', 'f', 'g'])
]
),
('B3',['h'])]
)
data2 = dict([i for i in zip('abcdefgh',lrange(8))])
#data2.update({'top':1000, 'B1':100, 'B2':200, 'B21':300,'B22':400, 'B3':400})
data2.update({'top':1000, 'B1':100, 'B2':200, 'B21':21,'B22':22, 'B3':300})
#data2
#{'a': 0, 'c': 2, 'b': 1, 'e': 4, 'd': 3, 'g': 6, 'f': 5, 'h': 7,
#'top': 1000, 'B22': 22, 'B21': 21, 'B1': 100, 'B2': 200, 'B3': 300}
print('\n tree with dictionary data')
print(branch2(tree2)) # results look correct for testxb=0 and 1
#parameters/coefficients map coefficient names to indices, list of indices into
#a 1d params one for each leave and branch
#Note: dict looses ordering
paramsind = {
'B1': [],
'a': ['consta', 'p'],
'b': ['constb', 'p'],
'B2': ['const2', 'x2'],
'B21': [],
'c': ['consta', 'p', 'time'],
'd': ['consta', 'p', 'time'],
'B22': ['x22'],
'e': ['conste', 'p', 'hince'],
'f': ['constt', 'p', 'hincf'],
'g': [ 'p', 'hincg'],
'B3': [],
'h': ['consth', 'p', 'h'],
'top': []}
#unique, parameter array names,
#sorted alphabetically, order is/should be only internal
paramsnames = sorted({i for j in paramsind.values() for i in j})
#mapping coefficient names to indices to unique/parameter array
paramsidx = {name: idx for (idx,name) in enumerate(paramsnames)}
#mapping branch and leaf names to index in parameter array
inddict = {k:[paramsidx[j] for j in v] for k,v in paramsind.items()}
'''
>>> paramsnames
['const2', 'consta', 'constb', 'conste', 'consth', 'constt', 'h', 'hince',
'hincf', 'hincg', 'p', 'time', 'x2', 'x22']
>>> parmasidx
{'conste': 3, 'consta': 1, 'constb': 2, 'h': 6, 'time': 11, 'consth': 4,
'p': 10, 'constt': 5, 'const2': 0, 'x2': 12, 'x22': 13, 'hince': 7,
'hincg': 9, 'hincf': 8}
>>> inddict
{'a': [1, 10], 'c': [1, 10, 11], 'b': [2, 10], 'e': [3, 10, 7],
'd': [1, 10, 11], 'g': [10, 9], 'f': [5, 10, 8], 'h': [4, 10, 6],
'top': [], 'B22': [13], 'B21': [], 'B1': [], 'B2': [0, 12], 'B3': []}
>>> paramsind
{'a': ['consta', 'p'], 'c': ['consta', 'p', 'time'], 'b': ['constb', 'p'],
'e': ['conste', 'p', 'hince'], 'd': ['consta', 'p', 'time'],
'g': ['p', 'hincg'], 'f': ['constt', 'p', 'hincf'], 'h': ['consth', 'p', 'h'],
'top': [], 'B22': ['x22'], 'B21': [], 'B1': [], 'B2': ['const2', 'x2'],
'B3': []}
'''