lab_2 is modified
This commit is contained in:
parent
b957001b5a
commit
e87fd960c4
@ -30,13 +30,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"import seaborn as sns\n",
|
"import seaborn as sns\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Загрузка данных\n",
|
"# Загрузка данных\n",
|
||||||
"stores_pd = pd.read_csv('static/csv/Stores.csv')"
|
"stores_pd = pd.read_csv('static/csv/Stores.csv')"
|
||||||
@ -51,7 +52,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -71,6 +72,10 @@
|
|||||||
"sns.scatterplot(x='Store_Area', y='Store_Sales', data=stores_pd)\n",
|
"sns.scatterplot(x='Store_Area', y='Store_Sales', data=stores_pd)\n",
|
||||||
"plt.title('Store_Area vs Store_Sales')\n",
|
"plt.title('Store_Area vs Store_Sales')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||||
|
"train_df, test_df = train_test_split(stores_pd, test_size=0.4, random_state=42)\n",
|
||||||
|
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||||
|
"\n",
|
||||||
"# Диаграмма рассеяния для Items_Available и Store_Sales\n",
|
"# Диаграмма рассеяния для Items_Available и Store_Sales\n",
|
||||||
"plt.subplot(2, 2, 2)\n",
|
"plt.subplot(2, 2, 2)\n",
|
||||||
"sns.scatterplot(x='Items_Available', y='Store_Sales', data=stores_pd)\n",
|
"sns.scatterplot(x='Items_Available', y='Store_Sales', data=stores_pd)\n",
|
||||||
@ -172,13 +177,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"import seaborn as sns\n",
|
"import seaborn as sns\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Загрузка данных\n",
|
"# Загрузка данных\n",
|
||||||
"economic_df = pd.read_csv('static/csv/Economic Data - 9 Countries (1980-2020).csv')"
|
"economic_df = pd.read_csv('static/csv/Economic Data - 9 Countries (1980-2020).csv')"
|
||||||
@ -193,7 +199,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -218,6 +224,10 @@
|
|||||||
"sns.scatterplot(x='inflationrate', y='index price', data=economic_df)\n",
|
"sns.scatterplot(x='inflationrate', y='index price', data=economic_df)\n",
|
||||||
"plt.title('Inflation Rate vs Index Price')\n",
|
"plt.title('Inflation Rate vs Index Price')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||||
|
"train_df, test_df = train_test_split(economic_df, test_size=0.4, random_state=42)\n",
|
||||||
|
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||||
|
"\n",
|
||||||
"# Диаграмма рассеяния для oil prices и index price\n",
|
"# Диаграмма рассеяния для oil prices и index price\n",
|
||||||
"plt.subplot(2, 2, 3)\n",
|
"plt.subplot(2, 2, 3)\n",
|
||||||
"sns.scatterplot(x='oil prices', y='index price', data=economic_df)\n",
|
"sns.scatterplot(x='oil prices', y='index price', data=economic_df)\n",
|
||||||
@ -325,13 +335,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"import seaborn as sns\n",
|
"import seaborn as sns\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Загрузка данных\n",
|
"# Загрузка данных\n",
|
||||||
"salaries_df = pd.read_csv('static/csv/ds_salaries.csv')\n",
|
"salaries_df = pd.read_csv('static/csv/ds_salaries.csv')\n",
|
||||||
@ -382,7 +393,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -413,6 +424,10 @@
|
|||||||
"outliers = data[(data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))]\n",
|
"outliers = data[(data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))]\n",
|
||||||
"print(f'Количество выбросов: {len(outliers)}')\n",
|
"print(f'Количество выбросов: {len(outliers)}')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Разбиение на обучающую, контрольную и тестовую выборки\n",
|
||||||
|
"train_df, test_df = train_test_split(salaries_df, test_size=0.4, random_state=42)\n",
|
||||||
|
"val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)\n",
|
||||||
|
"\n",
|
||||||
"# Удаление выбросов\n",
|
"# Удаление выбросов\n",
|
||||||
"filtered_data = data[(data >= (Q1 - 1.5 * IQR)) & (data <= (Q3 + 1.5 * IQR))]\n",
|
"filtered_data = data[(data >= (Q1 - 1.5 * IQR)) & (data <= (Q3 + 1.5 * IQR))]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user