lab_2 is modified
This commit is contained in:
parent
b957001b5a
commit
e87fd960c4
@ -30,13 +30,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"# Загрузка данных\n",
|
||||
"stores_pd = pd.read_csv('static/csv/Stores.csv')"
|
||||
@ -51,7 +52,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -71,6 +72,10 @@
|
||||
"sns.scatterplot(x='Store_Area', y='Store_Sales', data=stores_pd)\n",
|
||||
"plt.title('Store_Area vs Store_Sales')\n",
|
||||
"\n",
|
||||
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||
"train_df, test_df = train_test_split(stores_pd, test_size=0.4, random_state=42)\n",
|
||||
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||
"\n",
|
||||
"# Диаграмма рассеяния для Items_Available и Store_Sales\n",
|
||||
"plt.subplot(2, 2, 2)\n",
|
||||
"sns.scatterplot(x='Items_Available', y='Store_Sales', data=stores_pd)\n",
|
||||
@ -172,13 +177,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"# Загрузка данных\n",
|
||||
"economic_df = pd.read_csv('static/csv/Economic Data - 9 Countries (1980-2020).csv')"
|
||||
@ -193,7 +199,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -218,6 +224,10 @@
|
||||
"sns.scatterplot(x='inflationrate', y='index price', data=economic_df)\n",
|
||||
"plt.title('Inflation Rate vs Index Price')\n",
|
||||
"\n",
|
||||
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||
"train_df, test_df = train_test_split(economic_df, test_size=0.4, random_state=42)\n",
|
||||
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||
"\n",
|
||||
"# Диаграмма рассеяния для oil prices и index price\n",
|
||||
"plt.subplot(2, 2, 3)\n",
|
||||
"sns.scatterplot(x='oil prices', y='index price', data=economic_df)\n",
|
||||
@ -325,13 +335,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"# Загрузка данных\n",
|
||||
"salaries_df = pd.read_csv('static/csv/ds_salaries.csv')\n",
|
||||
@ -382,7 +393,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -413,6 +424,10 @@
|
||||
"outliers = data[(data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))]\n",
|
||||
"print(f'Количество выбросов: {len(outliers)}')\n",
|
||||
"\n",
|
||||
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||
"train_df, test_df = train_test_split(salaries_df, test_size=0.4, random_state=42)\n",
|
||||
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||
"\n",
|
||||
"# Удаление выбросов\n",
|
||||
"filtered_data = data[(data >= (Q1 - 1.5 * IQR)) & (data <= (Q3 + 1.5 * IQR))]\n",
|
||||
"\n",
|
||||
|
Loading…
Reference in New Issue
Block a user