{ "cells": [ { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n", " 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n", " 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n", " 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n", " dtype='object')" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import set_config\n", "set_config(transform_output=\"pandas\")\n", "\n", "random_state = 42\n", "\n", "# Подключим датафрейм и выгрузим данные\n", "df = pd.read_csv(\"data/house_data.csv\")\n", "\n", "df.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Удалим ненужные столбцы" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | sqft_living | \n", "price | \n", "floors | \n", "bathrooms | \n", "bedrooms | \n", "sqft_basement | \n", "
---|---|---|---|---|---|---|
0 | \n", "1180 | \n", "221900.0 | \n", "1.0 | \n", "1.00 | \n", "3 | \n", "0 | \n", "
1 | \n", "2570 | \n", "538000.0 | \n", "2.0 | \n", "2.25 | \n", "3 | \n", "400 | \n", "
2 | \n", "770 | \n", "180000.0 | \n", "1.0 | \n", "1.00 | \n", "2 | \n", "0 | \n", "
3 | \n", "1960 | \n", "604000.0 | \n", "1.0 | \n", "3.00 | \n", "4 | \n", "910 | \n", "
4 | \n", "1680 | \n", "510000.0 | \n", "1.0 | \n", "2.00 | \n", "3 | \n", "0 | \n", "
5 | \n", "5420 | \n", "1225000.0 | \n", "1.0 | \n", "4.50 | \n", "4 | \n", "1530 | \n", "
6 | \n", "1715 | \n", "257500.0 | \n", "2.0 | \n", "2.25 | \n", "3 | \n", "0 | \n", "
7 | \n", "1060 | \n", "291850.0 | \n", "1.0 | \n", "1.50 | \n", "3 | \n", "0 | \n", "
8 | \n", "1780 | \n", "229500.0 | \n", "1.0 | \n", "1.00 | \n", "3 | \n", "730 | \n", "
9 | \n", "1890 | \n", "323000.0 | \n", "2.0 | \n", "2.50 | \n", "3 | \n", "0 | \n", "
\n", " | sqft_living | \n", "price | \n", "floors | \n", "bathrooms | \n", "bedrooms | \n", "sqft_basement | \n", "
---|---|---|---|---|---|---|
0 | \n", "1180 | \n", "221900.0 | \n", "1.0 | \n", "1.00 | \n", "3 | \n", "0 | \n", "
1 | \n", "2570 | \n", "538000.0 | \n", "2.0 | \n", "2.25 | \n", "3 | \n", "400 | \n", "
2 | \n", "770 | \n", "180000.0 | \n", "1.0 | \n", "1.00 | \n", "2 | \n", "0 | \n", "
3 | \n", "1960 | \n", "604000.0 | \n", "1.0 | \n", "3.00 | \n", "4 | \n", "910 | \n", "
4 | \n", "1680 | \n", "510000.0 | \n", "1.0 | \n", "2.00 | \n", "3 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
21608 | \n", "1530 | \n", "360000.0 | \n", "3.0 | \n", "2.50 | \n", "3 | \n", "0 | \n", "
21609 | \n", "2310 | \n", "400000.0 | \n", "2.0 | \n", "2.50 | \n", "4 | \n", "0 | \n", "
21610 | \n", "1020 | \n", "402101.0 | \n", "2.0 | \n", "0.75 | \n", "2 | \n", "0 | \n", "
21611 | \n", "1600 | \n", "400000.0 | \n", "2.0 | \n", "2.50 | \n", "3 | \n", "0 | \n", "
21612 | \n", "1020 | \n", "325000.0 | \n", "2.0 | \n", "0.75 | \n", "2 | \n", "0 | \n", "
21613 rows × 6 columns
\n", "