lab_2 is modified

This commit is contained in:
DyCTaTOR 2024-10-12 10:08:57 +04:00
parent b957001b5a
commit e87fd960c4

View File

@ -30,13 +30,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import seaborn as sns\n", "import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"\n", "\n",
"# Загрузка данных\n", "# Загрузка данных\n",
"stores_pd = pd.read_csv('static/csv/Stores.csv')" "stores_pd = pd.read_csv('static/csv/Stores.csv')"
@ -51,7 +52,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -71,6 +72,10 @@
"sns.scatterplot(x='Store_Area', y='Store_Sales', data=stores_pd)\n", "sns.scatterplot(x='Store_Area', y='Store_Sales', data=stores_pd)\n",
"plt.title('Store_Area vs Store_Sales')\n", "plt.title('Store_Area vs Store_Sales')\n",
"\n", "\n",
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
"train_df, test_df = train_test_split(stores_pd, test_size=0.4, random_state=42)\n",
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
"\n",
"# Диаграмма рассеяния для Items_Available и Store_Sales\n", "# Диаграмма рассеяния для Items_Available и Store_Sales\n",
"plt.subplot(2, 2, 2)\n", "plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x='Items_Available', y='Store_Sales', data=stores_pd)\n", "sns.scatterplot(x='Items_Available', y='Store_Sales', data=stores_pd)\n",
@ -172,13 +177,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import seaborn as sns\n", "import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"\n", "\n",
"# Загрузка данных\n", "# Загрузка данных\n",
"economic_df = pd.read_csv('static/csv/Economic Data - 9 Countries (1980-2020).csv')" "economic_df = pd.read_csv('static/csv/Economic Data - 9 Countries (1980-2020).csv')"
@ -193,7 +199,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -218,6 +224,10 @@
"sns.scatterplot(x='inflationrate', y='index price', data=economic_df)\n", "sns.scatterplot(x='inflationrate', y='index price', data=economic_df)\n",
"plt.title('Inflation Rate vs Index Price')\n", "plt.title('Inflation Rate vs Index Price')\n",
"\n", "\n",
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
"train_df, test_df = train_test_split(economic_df, test_size=0.4, random_state=42)\n",
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
"\n",
"# Диаграмма рассеяния для oil prices и index price\n", "# Диаграмма рассеяния для oil prices и index price\n",
"plt.subplot(2, 2, 3)\n", "plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x='oil prices', y='index price', data=economic_df)\n", "sns.scatterplot(x='oil prices', y='index price', data=economic_df)\n",
@ -325,13 +335,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import seaborn as sns\n", "import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"\n", "\n",
"# Загрузка данных\n", "# Загрузка данных\n",
"salaries_df = pd.read_csv('static/csv/ds_salaries.csv')\n", "salaries_df = pd.read_csv('static/csv/ds_salaries.csv')\n",
@ -382,7 +393,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -413,6 +424,10 @@
"outliers = data[(data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))]\n", "outliers = data[(data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))]\n",
"print(f'Количество выбросов: {len(outliers)}')\n", "print(f'Количество выбросов: {len(outliers)}')\n",
"\n", "\n",
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
"train_df, test_df = train_test_split(salaries_df, test_size=0.4, random_state=42)\n",
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
"\n",
"# Удаление выбросов\n", "# Удаление выбросов\n",
"filtered_data = data[(data >= (Q1 - 1.5 * IQR)) & (data <= (Q3 + 1.5 * IQR))]\n", "filtered_data = data[(data >= (Q1 - 1.5 * IQR)) & (data <= (Q3 + 1.5 * IQR))]\n",
"\n", "\n",