import pandas as pd # 0 tabs import numpy as np # 0 tabs class DecisionTree: # 0 tabs def __init__(self, max_depth=None): # 1 tab self.max_depth = max_depth # 2 tabs self.tree = {} # 2 tabs def entropy(self, y): # 1 tab classes, counts = np.unique(y, return_counts=True) # 2 tabs probabilities = counts / len(y) # 2 tabs entropy = sum(-p * np.log2(p) for p in probabilities) # 2 tabs return entropy # 2 tabs def information_gain(self, X, y, feature, threshold): # 1 tab left_indices = X[:, feature] < threshold # 2 tabs right_indices = ~left_indices # 2 tabs left_entropy = self.entropy(y[left_indices]) # 2 tabs right_entropy = self.entropy(y[right_indices]) # 2 tabs left_weight = np.sum(left_indices) / len(y) # 2 tabs right_weight = 1 - left_weight # 2 tabs gain = self.entropy(y) - (left_weight * left_entropy + right_weight * right_entropy) # 2 tabs return gain # 2 tabs def best_split(self, X, y): # 1 tab best_feature = None # 2 tabs best_threshold = None # 2 tabs best_gain = 0 # 2 tabs for feature in range(X.shape[1]): # 2 tabs thresholds = np.unique(X[:, feature]) # 3 tabs for threshold in thresholds: # 3 tabs gain = self.information_gain(X, y, feature, threshold) # 4 tabs if gain > best_gain: # 4 tabs best_gain = gain # 5 tabs best_feature = feature # 5 tabs best_threshold = threshold # 5 tabs return best_feature, best_threshold # 2 tabs def build_tree(self, X, y, depth=0): # 1 tab if len(np.unique(y)) == 1 or (self.max_depth is not None and depth >= self.max_depth): # 2 tabs return {'class': np.argmax(np.bincount(y))} # 3 tabs best_feature, best_threshold = self.best_split(X, y) # 2 tabs left_indices = X[:, best_feature] < best_threshold # 2 tabs right_indices = ~left_indices # 2 tabs tree = {'feature': best_feature, 'threshold': best_threshold} # 2 tabs tree['left'] = self.build_tree(X[left_indices], y[left_indices], depth + 1) # 2 tabs tree['right'] = self.build_tree(X[right_indices], y[right_indices], depth + 1) # 2 tabs return tree # 2 tabs def fit(self, X, y): # 1 tab self.tree = self.build_tree(X, y) # 2 tabs def predict_instance(self, tree, x): # 1 tab if 'class' in tree: # 2 tabs return tree['class'] # 3 tabs if x[tree['feature']] < tree['threshold']: # 2 tabs return self.predict_instance(tree['left'], x) # 3 tabs else: # 2 tabs return self.predict_instance(tree['right'], x) # 3 tabs def predict(self, X): # 1 tab return [self.predict_instance(self.tree, x) for x in X] # 2 tabs # Пример использования data = { # 0 tabs 'feature1': [1, 2, 3, 4, 5], # 1 tab 'feature2': [0, 0, 1, 1, 0], # 1 tab 'target': [0, 0, 1, 1, 1] # 1 tab } df = pd.DataFrame(data) # 0 tabs X = df[['feature1', 'feature2']].values # 1 tab y = df['target'].values # 1 tab model = DecisionTree(max_depth=3) # 0 tabs model.fit(X, y) # 1 tab predictions = model.predict(X) # 1 tab print(predictions) # 0 tabs