IIS_2023_1/malkova_anastasia_lab_1/dataset.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split


def generate_dataset():
    x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
                               n_informative=2, random_state=0, n_clusters_per_class=1)
    random = np.random.RandomState(2)
    x += 2.5 * random.uniform(size=x.shape)
    return x, y


def split_dataset(x, y):
    return train_test_split(
        x, y, test_size=.05, random_state=42)