{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### Бизнес-цель: кластеризация пациентов для выявления групп с схожими характеристиками здоровья и рисками инсульта. Что, к примеру, может использоваться для следующего:\n", "\n", "- определение, люди каких групп могут иметь бОльшую предрасположенность к возникновению инсульта\n", "- помощь в медицине на основе полученных данных в разработке медицинских показаний людям с повышенным риском возникновения инсульта" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | gender | \n", "age | \n", "hypertension | \n", "heart_disease | \n", "ever_married | \n", "work_type | \n", "Residence_type | \n", "avg_glucose_level | \n", "bmi | \n", "smoking_status | \n", "stroke | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
9046 | \n", "Male | \n", "67.0 | \n", "0 | \n", "1 | \n", "Yes | \n", "Private | \n", "Urban | \n", "228.69 | \n", "36.6 | \n", "formerly smoked | \n", "1 | \n", "
51676 | \n", "Female | \n", "61.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Self-employed | \n", "Rural | \n", "202.21 | \n", "NaN | \n", "never smoked | \n", "1 | \n", "
31112 | \n", "Male | \n", "80.0 | \n", "0 | \n", "1 | \n", "Yes | \n", "Private | \n", "Rural | \n", "105.92 | \n", "32.5 | \n", "never smoked | \n", "1 | \n", "
60182 | \n", "Female | \n", "49.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Private | \n", "Urban | \n", "171.23 | \n", "34.4 | \n", "smokes | \n", "1 | \n", "
1665 | \n", "Female | \n", "79.0 | \n", "1 | \n", "0 | \n", "Yes | \n", "Self-employed | \n", "Rural | \n", "174.12 | \n", "24.0 | \n", "never smoked | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
34383 | \n", "Male | \n", "46.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Private | \n", "Urban | \n", "88.23 | \n", "25.8 | \n", "Unknown | \n", "0 | \n", "
8646 | \n", "Female | \n", "54.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Private | \n", "Rural | \n", "97.47 | \n", "26.7 | \n", "never smoked | \n", "0 | \n", "
46653 | \n", "Female | \n", "81.0 | \n", "1 | \n", "1 | \n", "Yes | \n", "Private | \n", "Rural | \n", "59.28 | \n", "28.1 | \n", "never smoked | \n", "0 | \n", "
1099 | \n", "Female | \n", "15.0 | \n", "0 | \n", "0 | \n", "No | \n", "children | \n", "Rural | \n", "101.15 | \n", "22.2 | \n", "Unknown | \n", "0 | \n", "
61676 | \n", "Male | \n", "77.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Self-employed | \n", "Urban | \n", "68.38 | \n", "25.1 | \n", "Unknown | \n", "0 | \n", "
2500 rows × 11 columns
\n", "\n", " | age | \n", "hypertension | \n", "heart_disease | \n", "avg_glucose_level | \n", "bmi | \n", "stroke | \n", "
---|---|---|---|---|---|---|
count | \n", "2500.000000 | \n", "2500.000000 | \n", "2500.000000 | \n", "2500.000000 | \n", "2500.000000 | \n", "2500.000000 | \n", "
mean | \n", "44.605296 | \n", "0.108000 | \n", "0.062400 | \n", "108.630440 | \n", "29.102840 | \n", "0.099600 | \n", "
std | \n", "22.817713 | \n", "0.310443 | \n", "0.241929 | \n", "47.124712 | \n", "7.804786 | \n", "0.299526 | \n", "
min | \n", "0.080000 | \n", "0.000000 | \n", "0.000000 | \n", "55.220000 | \n", "10.300000 | \n", "0.000000 | \n", "
25% | \n", "26.000000 | \n", "0.000000 | \n", "0.000000 | \n", "77.902500 | \n", "23.975000 | \n", "0.000000 | \n", "
50% | \n", "47.000000 | \n", "0.000000 | \n", "0.000000 | \n", "93.200000 | \n", "28.200000 | \n", "0.000000 | \n", "
75% | \n", "63.000000 | \n", "0.000000 | \n", "0.000000 | \n", "117.510000 | \n", "33.000000 | \n", "0.000000 | \n", "
max | \n", "82.000000 | \n", "1.000000 | \n", "1.000000 | \n", "271.740000 | \n", "97.600000 | \n", "1.000000 | \n", "
\n", " | age | \n", "avg_glucose_level | \n", "bmi | \n", "hypertension | \n", "
---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " |
9046 | \n", "0.981658 | \n", "2.548208 | \n", "0.960777 | \n", "0 | \n", "
51676 | \n", "0.718652 | \n", "1.986183 | \n", "-0.115701 | \n", "0 | \n", "
31112 | \n", "1.551505 | \n", "-0.057528 | \n", "0.435353 | \n", "0 | \n", "
60182 | \n", "0.192639 | \n", "1.328647 | \n", "0.678842 | \n", "0 | \n", "
1665 | \n", "1.507670 | \n", "1.389985 | \n", "-0.653940 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
34383 | \n", "0.061136 | \n", "-0.432990 | \n", "-0.423266 | \n", "0 | \n", "
8646 | \n", "0.411811 | \n", "-0.236875 | \n", "-0.307929 | \n", "0 | \n", "
46653 | \n", "1.595339 | \n", "-1.047440 | \n", "-0.128516 | \n", "1 | \n", "
1099 | \n", "-1.297729 | \n", "-0.158769 | \n", "-0.884614 | \n", "0 | \n", "
61676 | \n", "1.420001 | \n", "-0.854297 | \n", "-0.512973 | \n", "0 | \n", "
2500 rows × 4 columns
\n", "