MAI/LabWork01/LabWork6/SecondTree.py

import pandas as pd # 0 tabs
import numpy as np # 0 tabs

class DecisionTree: # 0 tabs
    def __init__(self, max_depth=None): # 1 tab
        self.max_depth = max_depth # 2 tabs
        self.tree = {} # 2 tabs

    def entropy(self, y): # 1 tab
        classes, counts = np.unique(y, return_counts=True) # 2 tabs
        probabilities = counts / len(y) # 2 tabs
        entropy = sum(-p * np.log2(p) for p in probabilities) # 2 tabs
        return entropy # 2 tabs

    def information_gain(self, X, y, feature, threshold): # 1 tab
        left_indices = X[:, feature] < threshold # 2 tabs
        right_indices = ~left_indices # 2 tabs
        left_entropy = self.entropy(y[left_indices]) # 2 tabs
        right_entropy = self.entropy(y[right_indices]) # 2 tabs
        left_weight = np.sum(left_indices) / len(y) # 2 tabs
        right_weight = 1 - left_weight # 2 tabs
        gain = self.entropy(y) - (left_weight * left_entropy + right_weight * right_entropy) # 2 tabs
        return gain # 2 tabs

    def best_split(self, X, y): # 1 tab
        best_feature = None # 2 tabs
        best_threshold = None # 2 tabs
        best_gain = 0 # 2 tabs
        for feature in range(X.shape[1]): # 2 tabs
            thresholds = np.unique(X[:, feature]) # 3 tabs
            for threshold in thresholds: # 3 tabs
                gain = self.information_gain(X, y, feature, threshold) # 4 tabs
                if gain > best_gain: # 4 tabs
                    best_gain = gain # 5 tabs
                    best_feature = feature # 5 tabs
                    best_threshold = threshold # 5 tabs
        return best_feature, best_threshold # 2 tabs

    def build_tree(self, X, y, depth=0): # 1 tab
        if len(np.unique(y)) == 1 or (self.max_depth is not None and depth >= self.max_depth): # 2 tabs
            return {'class': np.argmax(np.bincount(y))} # 3 tabs
        best_feature, best_threshold = self.best_split(X, y) # 2 tabs
        left_indices = X[:, best_feature] < best_threshold # 2 tabs
        right_indices = ~left_indices # 2 tabs
        tree = {'feature': best_feature, 'threshold': best_threshold} # 2 tabs
        tree['left'] = self.build_tree(X[left_indices], y[left_indices], depth + 1) # 2 tabs
        tree['right'] = self.build_tree(X[right_indices], y[right_indices], depth + 1) # 2 tabs
        return tree # 2 tabs

    def fit(self, X, y): # 1 tab
        self.tree = self.build_tree(X, y) # 2 tabs

    def predict_instance(self, tree, x): # 1 tab
        if 'class' in tree: # 2 tabs
            return tree['class'] # 3 tabs
        if x[tree['feature']] < tree['threshold']: # 2 tabs
            return self.predict_instance(tree['left'], x) # 3 tabs
        else: # 2 tabs
            return self.predict_instance(tree['right'], x) # 3 tabs

    def predict(self, X): # 1 tab
        return [self.predict_instance(self.tree, x) for x in X] # 2 tabs

# Пример использования
data = { # 0 tabs
    'feature1': [1, 2, 3, 4, 5], # 1 tab
    'feature2': [0, 0, 1, 1, 0], # 1 tab
    'target': [0, 0, 1, 1, 1] # 1 tab
}

df = pd.DataFrame(data) # 0 tabs
X = df[['feature1', 'feature2']].values # 1 tab
y = df['target'].values # 1 tab

model = DecisionTree(max_depth=3) # 0 tabs
model.fit(X, y) # 1 tab
predictions = model.predict(X) # 1 tab
print(predictions) # 0 tabs