{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Начинаем работу ... \n", "\n", "Датафрейм: Продажа домов в округе Кинг (вариант-6) \n", "https://www.kaggle.com/datasets/harlfoxem/housesalesprediction" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n", " 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n", " 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n", " 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n", " dtype='object')\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n", "from sklearn.cluster import KMeans\n", "from sklearn.decomposition import PCA\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import silhouette_score\n", "\n", "# Подключим датафрейм и выгрузим данные\n", "df = pd.read_csv(\".//static//csv//kc_house_data.csv\")\n", "\n", "print(df.columns)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "date | \n", "price | \n", "bedrooms | \n", "bathrooms | \n", "sqft_living | \n", "sqft_lot | \n", "floors | \n", "waterfront | \n", "view | \n", "... | \n", "grade | \n", "sqft_above | \n", "sqft_basement | \n", "yr_built | \n", "yr_renovated | \n", "zipcode | \n", "lat | \n", "long | \n", "sqft_living15 | \n", "sqft_lot15 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7129300520 | \n", "20141013T000000 | \n", "221900.0 | \n", "3 | \n", "1.00 | \n", "1180 | \n", "5650 | \n", "1.0 | \n", "0 | \n", "0 | \n", "... | \n", "7 | \n", "1180 | \n", "0 | \n", "1955 | \n", "0 | \n", "98178 | \n", "47.5112 | \n", "-122.257 | \n", "1340 | \n", "5650 | \n", "
1 | \n", "6414100192 | \n", "20141209T000000 | \n", "538000.0 | \n", "3 | \n", "2.25 | \n", "2570 | \n", "7242 | \n", "2.0 | \n", "0 | \n", "0 | \n", "... | \n", "7 | \n", "2170 | \n", "400 | \n", "1951 | \n", "1991 | \n", "98125 | \n", "47.7210 | \n", "-122.319 | \n", "1690 | \n", "7639 | \n", "
2 | \n", "5631500400 | \n", "20150225T000000 | \n", "180000.0 | \n", "2 | \n", "1.00 | \n", "770 | \n", "10000 | \n", "1.0 | \n", "0 | \n", "0 | \n", "... | \n", "6 | \n", "770 | \n", "0 | \n", "1933 | \n", "0 | \n", "98028 | \n", "47.7379 | \n", "-122.233 | \n", "2720 | \n", "8062 | \n", "
3 | \n", "2487200875 | \n", "20141209T000000 | \n", "604000.0 | \n", "4 | \n", "3.00 | \n", "1960 | \n", "5000 | \n", "1.0 | \n", "0 | \n", "0 | \n", "... | \n", "7 | \n", "1050 | \n", "910 | \n", "1965 | \n", "0 | \n", "98136 | \n", "47.5208 | \n", "-122.393 | \n", "1360 | \n", "5000 | \n", "
4 | \n", "1954400510 | \n", "20150218T000000 | \n", "510000.0 | \n", "3 | \n", "2.00 | \n", "1680 | \n", "8080 | \n", "1.0 | \n", "0 | \n", "0 | \n", "... | \n", "8 | \n", "1680 | \n", "0 | \n", "1987 | \n", "0 | \n", "98074 | \n", "47.6168 | \n", "-122.045 | \n", "1800 | \n", "7503 | \n", "
5 rows × 21 columns
\n", "\n", " | id | \n", "price | \n", "bedrooms | \n", "bathrooms | \n", "sqft_living | \n", "sqft_lot | \n", "floors | \n", "waterfront | \n", "view | \n", "condition | \n", "grade | \n", "sqft_above | \n", "sqft_basement | \n", "yr_built | \n", "yr_renovated | \n", "zipcode | \n", "lat | \n", "long | \n", "sqft_living15 | \n", "sqft_lot15 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "2.161300e+04 | \n", "2.161300e+04 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "2.161300e+04 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "21613.000000 | \n", "
mean | \n", "4.580302e+09 | \n", "5.400881e+05 | \n", "3.370842 | \n", "2.114757 | \n", "2079.899736 | \n", "1.510697e+04 | \n", "1.494309 | \n", "0.007542 | \n", "0.234303 | \n", "3.409430 | \n", "7.656873 | \n", "1788.390691 | \n", "291.509045 | \n", "1971.005136 | \n", "84.402258 | \n", "98077.939805 | \n", "47.560053 | \n", "-122.213896 | \n", "1986.552492 | \n", "12768.455652 | \n", "
std | \n", "2.876566e+09 | \n", "3.671272e+05 | \n", "0.930062 | \n", "0.770163 | \n", "918.440897 | \n", "4.142051e+04 | \n", "0.539989 | \n", "0.086517 | \n", "0.766318 | \n", "0.650743 | \n", "1.175459 | \n", "828.090978 | \n", "442.575043 | \n", "29.373411 | \n", "401.679240 | \n", "53.505026 | \n", "0.138564 | \n", "0.140828 | \n", "685.391304 | \n", "27304.179631 | \n", "
min | \n", "1.000102e+06 | \n", "7.500000e+04 | \n", "0.000000 | \n", "0.000000 | \n", "290.000000 | \n", "5.200000e+02 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "290.000000 | \n", "0.000000 | \n", "1900.000000 | \n", "0.000000 | \n", "98001.000000 | \n", "47.155900 | \n", "-122.519000 | \n", "399.000000 | \n", "651.000000 | \n", "
25% | \n", "2.123049e+09 | \n", "3.219500e+05 | \n", "3.000000 | \n", "1.750000 | \n", "1427.000000 | \n", "5.040000e+03 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "3.000000 | \n", "7.000000 | \n", "1190.000000 | \n", "0.000000 | \n", "1951.000000 | \n", "0.000000 | \n", "98033.000000 | \n", "47.471000 | \n", "-122.328000 | \n", "1490.000000 | \n", "5100.000000 | \n", "
50% | \n", "3.904930e+09 | \n", "4.500000e+05 | \n", "3.000000 | \n", "2.250000 | \n", "1910.000000 | \n", "7.618000e+03 | \n", "1.500000 | \n", "0.000000 | \n", "0.000000 | \n", "3.000000 | \n", "7.000000 | \n", "1560.000000 | \n", "0.000000 | \n", "1975.000000 | \n", "0.000000 | \n", "98065.000000 | \n", "47.571800 | \n", "-122.230000 | \n", "1840.000000 | \n", "7620.000000 | \n", "
75% | \n", "7.308900e+09 | \n", "6.450000e+05 | \n", "4.000000 | \n", "2.500000 | \n", "2550.000000 | \n", "1.068800e+04 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "4.000000 | \n", "8.000000 | \n", "2210.000000 | \n", "560.000000 | \n", "1997.000000 | \n", "0.000000 | \n", "98118.000000 | \n", "47.678000 | \n", "-122.125000 | \n", "2360.000000 | \n", "10083.000000 | \n", "
max | \n", "9.900000e+09 | \n", "7.700000e+06 | \n", "33.000000 | \n", "8.000000 | \n", "13540.000000 | \n", "1.651359e+06 | \n", "3.500000 | \n", "1.000000 | \n", "4.000000 | \n", "5.000000 | \n", "13.000000 | \n", "9410.000000 | \n", "4820.000000 | \n", "2015.000000 | \n", "2015.000000 | \n", "98199.000000 | \n", "47.777600 | \n", "-121.315000 | \n", "6210.000000 | \n", "871200.000000 | \n", "