PIbd-33_Dyakonov_R_R_MAI/lab1.ipynb
dyakonovr 851ccb3e02 done
2024-10-07 12:45:24 +04:00

303 KiB
Raw Permalink Blame History

Тестим Pandas

Считываем csv:

In [30]:
import pandas

# Считаем csv
data_frame = pandas.read_csv("data/kc_house_data.csv", index_col="id")
# Сохраняем data_frame в новый csv
data_frame.to_csv("data/new_kc_house_data.csv")

Получение сведений о data_frame:

In [31]:
# Общая информация о data_frame
print("Общая информация о data_frame:")
print(data_frame.info(), "\n")

# Первые строки
print("Первые строки data_frame:")
print(data_frame.head(), "\n")

# Описание данных
print("Описание данных data_frame:")
print(data_frame.describe(), "\n")

# Количество строк и столбцов 
print(f"Количество строк и столбцов data_frame: {data_frame.shape}\n")

# Названия столбцов
print(f"Названия столбцов: {data_frame.columns}\n")

# Типы данных каждого столбца
print("Типы данных каждого столбца:")
print(data_frame.dtypes, "\n")

# Количество пропущенных значений
print("Количество пропущенных значений в каждом столбце:")
print(data_frame.isnull().sum(), "\n")
Общая информация о data_frame:
<class 'pandas.core.frame.DataFrame'>
Index: 21613 entries, 7129300520 to 1523300157
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           21613 non-null  object 
 1   price          21613 non-null  float64
 2   bedrooms       21613 non-null  int64  
 3   bathrooms      21613 non-null  float64
 4   sqft_living    21613 non-null  int64  
 5   sqft_lot       21613 non-null  int64  
 6   floors         21613 non-null  float64
 7   waterfront     21613 non-null  int64  
 8   view           21613 non-null  int64  
 9   condition      21613 non-null  int64  
 10  grade          21613 non-null  int64  
 11  sqft_above     21613 non-null  int64  
 12  sqft_basement  21613 non-null  int64  
 13  yr_built       21613 non-null  int64  
 14  yr_renovated   21613 non-null  int64  
 15  zipcode        21613 non-null  int64  
 16  lat            21613 non-null  float64
 17  long           21613 non-null  float64
 18  sqft_living15  21613 non-null  int64  
 19  sqft_lot15     21613 non-null  int64  
dtypes: float64(5), int64(14), object(1)
memory usage: 3.5+ MB
None 

Первые строки data_frame:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
7129300520      5650     1.0           0     0          3      7        1180   
6414100192      7242     2.0           0     0          3      7        2170   
5631500400     10000     1.0           0     0          3      6         770   
2487200875      5000     1.0           0     0          5      7        1050   
1954400510      8080     1.0           0     0          3      8        1680   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
7129300520              0      1955             0    98178  47.5112 -122.257   
6414100192            400      1951          1991    98125  47.7210 -122.319   
5631500400              0      1933             0    98028  47.7379 -122.233   
2487200875            910      1965             0    98136  47.5208 -122.393   
1954400510              0      1987             0    98074  47.6168 -122.045   

            sqft_living15  sqft_lot15  
id                                     
7129300520           1340        5650  
6414100192           1690        7639  
5631500400           2720        8062  
2487200875           1360        5000  
1954400510           1800        7503   

Описание данных data_frame:
              price      bedrooms     bathrooms   sqft_living      sqft_lot  \
count  2.161300e+04  21613.000000  21613.000000  21613.000000  2.161300e+04   
mean   5.400881e+05      3.370842      2.114757   2079.899736  1.510697e+04   
std    3.671272e+05      0.930062      0.770163    918.440897  4.142051e+04   
min    7.500000e+04      0.000000      0.000000    290.000000  5.200000e+02   
25%    3.219500e+05      3.000000      1.750000   1427.000000  5.040000e+03   
50%    4.500000e+05      3.000000      2.250000   1910.000000  7.618000e+03   
75%    6.450000e+05      4.000000      2.500000   2550.000000  1.068800e+04   
max    7.700000e+06     33.000000      8.000000  13540.000000  1.651359e+06   

             floors    waterfront          view     condition         grade  \
count  21613.000000  21613.000000  21613.000000  21613.000000  21613.000000   
mean       1.494309      0.007542      0.234303      3.409430      7.656873   
std        0.539989      0.086517      0.766318      0.650743      1.175459   
min        1.000000      0.000000      0.000000      1.000000      1.000000   
25%        1.000000      0.000000      0.000000      3.000000      7.000000   
50%        1.500000      0.000000      0.000000      3.000000      7.000000   
75%        2.000000      0.000000      0.000000      4.000000      8.000000   
max        3.500000      1.000000      4.000000      5.000000     13.000000   

         sqft_above  sqft_basement      yr_built  yr_renovated       zipcode  \
count  21613.000000   21613.000000  21613.000000  21613.000000  21613.000000   
mean    1788.390691     291.509045   1971.005136     84.402258  98077.939805   
std      828.090978     442.575043     29.373411    401.679240     53.505026   
min      290.000000       0.000000   1900.000000      0.000000  98001.000000   
25%     1190.000000       0.000000   1951.000000      0.000000  98033.000000   
50%     1560.000000       0.000000   1975.000000      0.000000  98065.000000   
75%     2210.000000     560.000000   1997.000000      0.000000  98118.000000   
max     9410.000000    4820.000000   2015.000000   2015.000000  98199.000000   

                lat          long  sqft_living15     sqft_lot15  
count  21613.000000  21613.000000   21613.000000   21613.000000  
mean      47.560053   -122.213896    1986.552492   12768.455652  
std        0.138564      0.140828     685.391304   27304.179631  
min       47.155900   -122.519000     399.000000     651.000000  
25%       47.471000   -122.328000    1490.000000    5100.000000  
50%       47.571800   -122.230000    1840.000000    7620.000000  
75%       47.678000   -122.125000    2360.000000   10083.000000  
max       47.777600   -121.315000    6210.000000  871200.000000   

Количество строк и столбцов data_frame: (21613, 20)

Названия столбцов: Index(['date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15'],
      dtype='object')

Типы данных каждого столбца:
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object 

Количество пропущенных значений в каждом столбце:
date             0
price            0
bedrooms         0
bathrooms        0
sqft_living      0
sqft_lot         0
floors           0
waterfront       0
view             0
condition        0
grade            0
sqft_above       0
sqft_basement    0
yr_built         0
yr_renovated     0
zipcode          0
lat              0
long             0
sqft_living15    0
sqft_lot15       0
dtype: int64 

Получение сведений о колонках

In [32]:
# Список всех столбцов
print(f"Список всех столбцов: {data_frame.columns}\n")

# Типы данных каждого столбца
print("Типы данных каждого столбца:")
print(data_frame.dtypes, "\n")

# Описание всех столбцов
print("Описание всех столбцов DataFrame:")
print(data_frame.describe(include="all"), "\n")

# Количество пропущенных значений в каждом столбце
print("Количество пропущенных значений в каждом столбце:")
print(data_frame.isnull().sum(), "\n")

# Количество уникальных значений в столбце 'date'
print("Количество уникальных значений в столбце 'date':")
print(data_frame["date"].value_counts(), "\n")
Список всех столбцов: Index(['date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15'],
      dtype='object')

Типы данных каждого столбца:
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object 

Описание всех столбцов DataFrame:
                   date         price      bedrooms     bathrooms  \
count             21613  2.161300e+04  21613.000000  21613.000000   
unique              372           NaN           NaN           NaN   
top     20140623T000000           NaN           NaN           NaN   
freq                142           NaN           NaN           NaN   
mean                NaN  5.400881e+05      3.370842      2.114757   
std                 NaN  3.671272e+05      0.930062      0.770163   
min                 NaN  7.500000e+04      0.000000      0.000000   
25%                 NaN  3.219500e+05      3.000000      1.750000   
50%                 NaN  4.500000e+05      3.000000      2.250000   
75%                 NaN  6.450000e+05      4.000000      2.500000   
max                 NaN  7.700000e+06     33.000000      8.000000   

         sqft_living      sqft_lot        floors    waterfront          view  \
count   21613.000000  2.161300e+04  21613.000000  21613.000000  21613.000000   
unique           NaN           NaN           NaN           NaN           NaN   
top              NaN           NaN           NaN           NaN           NaN   
freq             NaN           NaN           NaN           NaN           NaN   
mean     2079.899736  1.510697e+04      1.494309      0.007542      0.234303   
std       918.440897  4.142051e+04      0.539989      0.086517      0.766318   
min       290.000000  5.200000e+02      1.000000      0.000000      0.000000   
25%      1427.000000  5.040000e+03      1.000000      0.000000      0.000000   
50%      1910.000000  7.618000e+03      1.500000      0.000000      0.000000   
75%      2550.000000  1.068800e+04      2.000000      0.000000      0.000000   
max     13540.000000  1.651359e+06      3.500000      1.000000      4.000000   

           condition         grade    sqft_above  sqft_basement      yr_built  \
count   21613.000000  21613.000000  21613.000000   21613.000000  21613.000000   
unique           NaN           NaN           NaN            NaN           NaN   
top              NaN           NaN           NaN            NaN           NaN   
freq             NaN           NaN           NaN            NaN           NaN   
mean        3.409430      7.656873   1788.390691     291.509045   1971.005136   
std         0.650743      1.175459    828.090978     442.575043     29.373411   
min         1.000000      1.000000    290.000000       0.000000   1900.000000   
25%         3.000000      7.000000   1190.000000       0.000000   1951.000000   
50%         3.000000      7.000000   1560.000000       0.000000   1975.000000   
75%         4.000000      8.000000   2210.000000     560.000000   1997.000000   
max         5.000000     13.000000   9410.000000    4820.000000   2015.000000   

        yr_renovated       zipcode           lat          long  sqft_living15  \
count   21613.000000  21613.000000  21613.000000  21613.000000   21613.000000   
unique           NaN           NaN           NaN           NaN            NaN   
top              NaN           NaN           NaN           NaN            NaN   
freq             NaN           NaN           NaN           NaN            NaN   
mean       84.402258  98077.939805     47.560053   -122.213896    1986.552492   
std       401.679240     53.505026      0.138564      0.140828     685.391304   
min         0.000000  98001.000000     47.155900   -122.519000     399.000000   
25%         0.000000  98033.000000     47.471000   -122.328000    1490.000000   
50%         0.000000  98065.000000     47.571800   -122.230000    1840.000000   
75%         0.000000  98118.000000     47.678000   -122.125000    2360.000000   
max      2015.000000  98199.000000     47.777600   -121.315000    6210.000000   

           sqft_lot15  
count    21613.000000  
unique            NaN  
top               NaN  
freq              NaN  
mean     12768.455652  
std      27304.179631  
min        651.000000  
25%       5100.000000  
50%       7620.000000  
75%      10083.000000  
max     871200.000000   

Количество пропущенных значений в каждом столбце:
date             0
price            0
bedrooms         0
bathrooms        0
sqft_living      0
sqft_lot         0
floors           0
waterfront       0
view             0
condition        0
grade            0
sqft_above       0
sqft_basement    0
yr_built         0
yr_renovated     0
zipcode          0
lat              0
long             0
sqft_living15    0
sqft_lot15       0
dtype: int64 

Количество уникальных значений в столбце 'date':
date
20140623T000000    142
20140626T000000    131
20140625T000000    131
20140708T000000    127
20150427T000000    126
                  ... 
20150131T000000      1
20150117T000000      1
20150308T000000      1
20150515T000000      1
20140803T000000      1
Name: count, Length: 372, dtype: int64 

Вывод строки и стобца

In [33]:
# Вывод столбца 'date'
print("Столбец 'date':")
print(data_frame["date"], "\n")

# Вывод строки с индексом 2
print("Строка с индексом 2:")
print(data_frame.iloc[2], "\n")

# Вывод значения в первой строке и столбце 'date'
print("Значение в первой строке и столбце 'date':")
print(data_frame.iloc[0, 2], "\n")

# Вывод конкретного значения по метке строки и имени столбца
print("Значение в строке с индексом 1 и столбце 'date':")
print(data_frame.loc[7129300520, "date"], "\n")

# Вывод нескольких столбцов 'date' и 'price'
print("Столбцы 'date' и 'price':")
print(data_frame[["date", "price"]], "\n")

# Вывод первых двух строк
print("Первые две строки DataFrame:")
print(data_frame.iloc[:2], "\n")
Столбец 'date':
id
7129300520    20141013T000000
6414100192    20141209T000000
5631500400    20150225T000000
2487200875    20141209T000000
1954400510    20150218T000000
                   ...       
263000018     20140521T000000
6600060120    20150223T000000
1523300141    20140623T000000
291310100     20150116T000000
1523300157    20141015T000000
Name: date, Length: 21613, dtype: object 

Строка с индексом 2:
date             20150225T000000
price                   180000.0
bedrooms                       2
bathrooms                    1.0
sqft_living                  770
sqft_lot                   10000
floors                       1.0
waterfront                     0
view                           0
condition                      3
grade                          6
sqft_above                   770
sqft_basement                  0
yr_built                    1933
yr_renovated                   0
zipcode                    98028
lat                      47.7379
long                    -122.233
sqft_living15               2720
sqft_lot15                  8062
Name: 5631500400, dtype: object 

Значение в первой строке и столбце 'date':
3 

Значение в строке с индексом 1 и столбце 'date':
20141013T000000 

Столбцы 'date' и 'price':
                       date     price
id                                   
7129300520  20141013T000000  221900.0
6414100192  20141209T000000  538000.0
5631500400  20150225T000000  180000.0
2487200875  20141209T000000  604000.0
1954400510  20150218T000000  510000.0
...                     ...       ...
263000018   20140521T000000  360000.0
6600060120  20150223T000000  400000.0
1523300141  20140623T000000  402101.0
291310100   20150116T000000  400000.0
1523300157  20141015T000000  325000.0

[21613 rows x 2 columns] 

Первые две строки DataFrame:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
7129300520      5650     1.0           0     0          3      7        1180   
6414100192      7242     2.0           0     0          3      7        2170   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
7129300520              0      1955             0    98178  47.5112 -122.257   
6414100192            400      1951          1991    98125  47.7210 -122.319   

            sqft_living15  sqft_lot15  
id                                     
7129300520           1340        5650  
6414100192           1690        7639   

Группировка и агрегация данных

In [34]:
# 1. Средняя цена по количеству спален
print("Средняя цена по количеству спален:")
print(data_frame.groupby("bedrooms")["price"].mean())

# 2. Количество продаж по почтовому индексу
print("\nКоличество продаж по почтовому индексу:")
print(data_frame.groupby("zipcode")["price"].count())

# 3. Максимальная цена по количеству ванных комнат
print("\nМаксимальная цена по количеству ванных комнат:")
print(data_frame.groupby("bathrooms")["price"].max())

# 4. Общая цена по количеству этажей
print("\nОбщая цена по количеству этажей:")
print(data_frame.groupby("floors")["price"].sum())
Средняя цена по количеству спален:
bedrooms
0     4.095038e+05
1     3.176429e+05
2     4.013727e+05
3     4.662321e+05
4     6.354195e+05
5     7.865998e+05
6     8.255206e+05
7     9.511847e+05
8     1.105077e+06
9     8.939998e+05
10    8.193333e+05
11    5.200000e+05
33    6.400000e+05
Name: price, dtype: float64

Количество продаж по почтовому индексу:
zipcode
98001    362
98002    199
98003    280
98004    317
98005    168
        ... 
98177    255
98178    262
98188    136
98198    280
98199    317
Name: price, Length: 70, dtype: int64

Максимальная цена по количеству ванных комнат:
bathrooms
0.00    1295650.0
0.50     312500.0
0.75     785000.0
1.00    1300000.0
1.25    1388000.0
1.50    1500000.0
1.75    3278000.0
2.00    2200000.0
2.25    2400000.0
2.50    3070000.0
2.75    2700000.0
3.00    4489000.0
3.25    3640900.0
3.50    3710000.0
3.75    3650000.0
4.00    3400000.0
4.25    3850000.0
4.50    7062500.0
4.75    3650000.0
5.00    5350000.0
5.25    5110800.0
5.50    4500000.0
5.75    5570000.0
6.00    5300000.0
6.25    3300000.0
6.50    2238890.0
6.75    4668000.0
7.50     450000.0
7.75    6885000.0
8.00    7700000.0
Name: price, dtype: float64

Общая цена по количеству этажей:
floors
1.0    4.722489e+09
1.5    1.067653e+09
2.0    5.347512e+09
2.5    1.707158e+08
3.0    3.570885e+08
3.5    7.466500e+06
Name: price, dtype: float64

Сортировка данных

In [35]:
print("Сортировка по цене (возрастание):")
print(data_frame.sort_values(by="price"))

# 2. Сортировка по цене (убывание)
print("\nСортировка по цене (убывание):")
print(data_frame.sort_values(by="price", ascending=False))

# 3. Сортировка по количеству спален и затем по цене (возрастание)
print("\nСортировка по количеству спален и затем по цене (возрастание):")
print(data_frame.sort_values(by=["bedrooms", "price"]))

# 4. Сортировка по почтовому индексу и количеству ванных комнат (убывание)
print("\nСортировка по почтовому индексу и количеству ванных комнат (убывание):")
print(data_frame.sort_values(by=["zipcode", "bathrooms"], ascending=[False, False]))
Сортировка по цене (возрастание):
                       date      price  bedrooms  bathrooms  sqft_living  \
id                                                                         
3421079032  20150217T000000    75000.0         1       0.00          670   
40000362    20140506T000000    78000.0         2       1.00          780   
8658300340  20140523T000000    80000.0         1       0.75          430   
3028200080  20150324T000000    81000.0         2       1.00          730   
3883800011  20141105T000000    82000.0         3       1.00          860   
...                     ...        ...       ...        ...          ...   
8907500070  20150413T000000  5350000.0         5       5.00         8000   
2470100110  20140804T000000  5570000.0         5       5.75         9200   
9208900037  20140919T000000  6885000.0         6       7.75         9890   
9808700762  20140611T000000  7062500.0         5       4.50        10040   
6762700020  20141013T000000  7700000.0         6       8.00        12050   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
3421079032     43377     1.0           0     0          3      3         670   
40000362       16344     1.0           0     0          1      5         780   
8658300340      5050     1.0           0     0          2      4         430   
3028200080      9975     1.0           0     0          1      5         730   
3883800011     10426     1.0           0     0          3      6         860   
...              ...     ...         ...   ...        ...    ...         ...   
8907500070     23985     2.0           0     4          3     12        6720   
2470100110     35069     2.0           0     0          3     13        6200   
9208900037     31374     2.0           0     4          3     13        8860   
9808700762     37325     2.0           1     2          3     11        7680   
6762700020     27600     2.5           0     3          4     13        8570   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
3421079032              0      1966             0    98022  47.2638 -121.906   
40000362                0      1942             0    98168  47.4739 -122.280   
8658300340              0      1912             0    98014  47.6499 -121.909   
3028200080              0      1943             0    98168  47.4808 -122.315   
3883800011              0      1954             0    98146  47.4987 -122.341   
...                   ...       ...           ...      ...      ...      ...   
8907500070           1280      2009             0    98004  47.6232 -122.220   
2470100110           3000      2001             0    98039  47.6289 -122.233   
9208900037           1030      2001             0    98039  47.6305 -122.240   
9808700762           2360      1940          2001    98004  47.6500 -122.214   
6762700020           3480      1910          1987    98102  47.6298 -122.323   

            sqft_living15  sqft_lot15  
id                                     
3421079032           1160       42882  
40000362             1700       10387  
8658300340           1200        7500  
3028200080            860        9000  
3883800011           1140       11250  
...                   ...         ...  
8907500070           4600       21750  
2470100110           3560       24345  
9208900037           4540       42730  
9808700762           3930       25449  
6762700020           3940        8800  

[21613 rows x 20 columns]

Сортировка по цене (убывание):
                       date      price  bedrooms  bathrooms  sqft_living  \
id                                                                         
6762700020  20141013T000000  7700000.0         6       8.00        12050   
9808700762  20140611T000000  7062500.0         5       4.50        10040   
9208900037  20140919T000000  6885000.0         6       7.75         9890   
2470100110  20140804T000000  5570000.0         5       5.75         9200   
8907500070  20150413T000000  5350000.0         5       5.00         8000   
...                     ...        ...       ...        ...          ...   
3883800011  20141105T000000    82000.0         3       1.00          860   
3028200080  20150324T000000    81000.0         2       1.00          730   
8658300340  20140523T000000    80000.0         1       0.75          430   
40000362    20140506T000000    78000.0         2       1.00          780   
3421079032  20150217T000000    75000.0         1       0.00          670   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
6762700020     27600     2.5           0     3          4     13        8570   
9808700762     37325     2.0           1     2          3     11        7680   
9208900037     31374     2.0           0     4          3     13        8860   
2470100110     35069     2.0           0     0          3     13        6200   
8907500070     23985     2.0           0     4          3     12        6720   
...              ...     ...         ...   ...        ...    ...         ...   
3883800011     10426     1.0           0     0          3      6         860   
3028200080      9975     1.0           0     0          1      5         730   
8658300340      5050     1.0           0     0          2      4         430   
40000362       16344     1.0           0     0          1      5         780   
3421079032     43377     1.0           0     0          3      3         670   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
6762700020           3480      1910          1987    98102  47.6298 -122.323   
9808700762           2360      1940          2001    98004  47.6500 -122.214   
9208900037           1030      2001             0    98039  47.6305 -122.240   
2470100110           3000      2001             0    98039  47.6289 -122.233   
8907500070           1280      2009             0    98004  47.6232 -122.220   
...                   ...       ...           ...      ...      ...      ...   
3883800011              0      1954             0    98146  47.4987 -122.341   
3028200080              0      1943             0    98168  47.4808 -122.315   
8658300340              0      1912             0    98014  47.6499 -121.909   
40000362                0      1942             0    98168  47.4739 -122.280   
3421079032              0      1966             0    98022  47.2638 -121.906   

            sqft_living15  sqft_lot15  
id                                     
6762700020           3940        8800  
9808700762           3930       25449  
9208900037           4540       42730  
2470100110           3560       24345  
8907500070           4600       21750  
...                   ...         ...  
3883800011           1140       11250  
3028200080            860        9000  
8658300340           1200        7500  
40000362             1700       10387  
3421079032           1160       42882  

[21613 rows x 20 columns]

Сортировка по количеству спален и затем по цене (возрастание):
                       date      price  bedrooms  bathrooms  sqft_living  \
id                                                                         
9543000205  20150413T000000   139950.0         0       0.00          844   
3980300371  20140926T000000   142000.0         0       0.00          290   
6896300380  20141002T000000   228000.0         0       1.00          390   
7849202190  20141223T000000   235000.0         0       0.00         1470   
2310060040  20140925T000000   240000.0         0       2.50         1810   
...                     ...        ...       ...        ...          ...   
5566100170  20141029T000000   650000.0        10       2.00         3610   
8812401450  20141229T000000   660000.0        10       3.00         2920   
627300145   20140814T000000  1148000.0        10       5.25         4590   
1773100755  20140821T000000   520000.0        11       3.00         3000   
2402100895  20140625T000000   640000.0        33       1.75         1620   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
9543000205      4269     1.0           0     0          4      7         844   
3980300371     20875     1.0           0     0          1      1         290   
6896300380      5900     1.0           0     0          2      4         390   
7849202190      4800     2.0           0     0          3      7        1470   
2310060040      5669     2.0           0     0          3      7        1810   
...              ...     ...         ...   ...        ...    ...         ...   
5566100170     11914     2.0           0     0          4      7        3010   
8812401450      3745     2.0           0     0          4      7        1860   
627300145      10920     1.0           0     2          3      9        2500   
1773100755      4960     2.0           0     0          3      7        2400   
2402100895      6000     1.0           0     0          5      7        1040   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
9543000205              0      1913             0    98001  47.2781 -122.250   
3980300371              0      1963             0    98024  47.5308 -121.888   
6896300380              0      1953             0    98118  47.5260 -122.261   
7849202190              0      1996             0    98065  47.5265 -121.828   
2310060040              0      2003             0    98038  47.3493 -122.053   
...                   ...       ...           ...      ...      ...      ...   
5566100170            600      1958             0    98006  47.5705 -122.175   
8812401450           1060      1913             0    98105  47.6635 -122.320   
627300145            2090      2008             0    98004  47.5861 -122.113   
1773100755            600      1918          1999    98106  47.5560 -122.363   
2402100895            580      1947             0    98103  47.6878 -122.331   

            sqft_living15  sqft_lot15  
id                                     
9543000205           1380        9600  
3980300371           1620       22850  
6896300380           2170        6000  
7849202190           1060        7200  
2310060040           1810        5685  
...                   ...         ...  
5566100170           2040       11914  
8812401450           1810        3745  
627300145            2730       10400  
1773100755           1420        4960  
2402100895           1330        4700  

[21613 rows x 20 columns]

Сортировка по почтовому индексу и количеству ванных комнат (убывание):
                       date      price  bedrooms  bathrooms  sqft_living  \
id                                                                         
1068000375  20140923T000000  3200000.0         6       5.00         7100   
3271800295  20150203T000000  1569500.0         5       4.50         5620   
1370802115  20141205T000000  1925000.0         3       4.50         3950   
1370802455  20140813T000000  1050000.0         4       4.50         3180   
2771604190  20140617T000000   824000.0         7       4.25         3670   
...                     ...        ...       ...        ...          ...   
1312900180  20150325T000000   225000.0         3       1.00         1250   
3356403400  20140724T000000   159000.0         3       1.00         1360   
1278000210  20150311T000000   110000.0         2       1.00          828   
4045700455  20150316T000000   363000.0         3       0.75         2510   
9543000205  20150413T000000   139950.0         0       0.00          844   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
1068000375     18200     2.5           0     0          3     13        5240   
3271800295      5800     3.0           0     3          3     11        4700   
1370802115      6134     2.0           0     3          3     11        2880   
1370802455      4606     2.0           0     3          4      9        1990   
2771604190      4000     2.0           0     1          3      8        2800   
...              ...     ...         ...   ...        ...    ...         ...   
1312900180      7820     1.0           0     0          3      7        1250   
3356403400     20000     1.0           0     0          4      7        1360   
1278000210      4524     1.0           0     0          3      6         828   
4045700455     20000     2.0           0     0          4      7        2510   
9543000205      4269     1.0           0     0          4      7         844   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
1068000375           1860      1933          2002    98199  47.6427 -122.408   
3271800295            920      1999             0    98199  47.6482 -122.412   
1370802115           1070      1998             0    98199  47.6413 -122.405   
1370802455           1190      1929             0    98199  47.6402 -122.405   
2771604190            870      1964             0    98199  47.6375 -122.388   
...                   ...       ...           ...      ...      ...      ...   
1312900180              0      1967             0    98001  47.3397 -122.291   
3356403400              0      1953             0    98001  47.2861 -122.253   
1278000210              0      1968          2007    98001  47.2655 -122.244   
4045700455              0      1961             0    98001  47.2871 -122.287   
9543000205              0      1913             0    98001  47.2781 -122.250   

            sqft_living15  sqft_lot15  
id                                     
1068000375           3130        6477  
3271800295           2360        5800  
1370802115           3050        5281  
1370802455           2110        5323  
2771604190           2010        4000  
...                   ...         ...  
1312900180           1300        7920  
3356403400           1530        9997  
1278000210            828        5402  
4045700455           2130       20000  
9543000205           1380        9600  

[21613 rows x 20 columns]

Удаление строк и столбцов

In [36]:
print("Удаление строки с индексом 1736800520:")
print(data_frame.drop(index=1736800520))

# 2. Удаление нескольких строк по индексам (например, удаляем строки с индексами 0 и 2)
print("\nУдаление строк с индексами 1736800520 и 6300500875:")
print(data_frame.drop(index=[1736800520, 6300500875]))

# 3. Удаление столбца по имени (например, удаляем столбец 'zipcode')
print("\nУдаление столбца 'zipcode':")
print(data_frame.drop(columns="zipcode"))

# 4. Удаление нескольких столбцов (например, 'bathrooms' и 'floors')
print("\nУдаление столбцов 'bathrooms' и 'floors':")
print(data_frame.drop(columns=["bathrooms", "floors"]))
Удаление строки с индексом 1736800520:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
7129300520      5650     1.0           0     0          3      7        1180   
6414100192      7242     2.0           0     0          3      7        2170   
5631500400     10000     1.0           0     0          3      6         770   
2487200875      5000     1.0           0     0          5      7        1050   
1954400510      8080     1.0           0     0          3      8        1680   
...              ...     ...         ...   ...        ...    ...         ...   
263000018       1131     3.0           0     0          3      8        1530   
6600060120      5813     2.0           0     0          3      8        2310   
1523300141      1350     2.0           0     0          3      7        1020   
291310100       2388     2.0           0     0          3      8        1600   
1523300157      1076     2.0           0     0          3      7        1020   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
7129300520              0      1955             0    98178  47.5112 -122.257   
6414100192            400      1951          1991    98125  47.7210 -122.319   
5631500400              0      1933             0    98028  47.7379 -122.233   
2487200875            910      1965             0    98136  47.5208 -122.393   
1954400510              0      1987             0    98074  47.6168 -122.045   
...                   ...       ...           ...      ...      ...      ...   
263000018               0      2009             0    98103  47.6993 -122.346   
6600060120              0      2014             0    98146  47.5107 -122.362   
1523300141              0      2009             0    98144  47.5944 -122.299   
291310100               0      2004             0    98027  47.5345 -122.069   
1523300157              0      2008             0    98144  47.5941 -122.299   

            sqft_living15  sqft_lot15  
id                                     
7129300520           1340        5650  
6414100192           1690        7639  
5631500400           2720        8062  
2487200875           1360        5000  
1954400510           1800        7503  
...                   ...         ...  
263000018            1530        1509  
6600060120           1830        7200  
1523300141           1020        2007  
291310100            1410        1287  
1523300157           1020        1357  

[21612 rows x 20 columns]

Удаление строк с индексами 1736800520 и 6300500875:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
7129300520      5650     1.0           0     0          3      7        1180   
6414100192      7242     2.0           0     0          3      7        2170   
5631500400     10000     1.0           0     0          3      6         770   
2487200875      5000     1.0           0     0          5      7        1050   
1954400510      8080     1.0           0     0          3      8        1680   
...              ...     ...         ...   ...        ...    ...         ...   
263000018       1131     3.0           0     0          3      8        1530   
6600060120      5813     2.0           0     0          3      8        2310   
1523300141      1350     2.0           0     0          3      7        1020   
291310100       2388     2.0           0     0          3      8        1600   
1523300157      1076     2.0           0     0          3      7        1020   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
7129300520              0      1955             0    98178  47.5112 -122.257   
6414100192            400      1951          1991    98125  47.7210 -122.319   
5631500400              0      1933             0    98028  47.7379 -122.233   
2487200875            910      1965             0    98136  47.5208 -122.393   
1954400510              0      1987             0    98074  47.6168 -122.045   
...                   ...       ...           ...      ...      ...      ...   
263000018               0      2009             0    98103  47.6993 -122.346   
6600060120              0      2014             0    98146  47.5107 -122.362   
1523300141              0      2009             0    98144  47.5944 -122.299   
291310100               0      2004             0    98027  47.5345 -122.069   
1523300157              0      2008             0    98144  47.5941 -122.299   

            sqft_living15  sqft_lot15  
id                                     
7129300520           1340        5650  
6414100192           1690        7639  
5631500400           2720        8062  
2487200875           1360        5000  
1954400510           1800        7503  
...                   ...         ...  
263000018            1530        1509  
6600060120           1830        7200  
1523300141           1020        2007  
291310100            1410        1287  
1523300157           1020        1357  

[21611 rows x 20 columns]

Удаление столбца 'zipcode':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  grade  sqft_above  \
id                                                                             
7129300520      5650     1.0           0     0          3      7        1180   
6414100192      7242     2.0           0     0          3      7        2170   
5631500400     10000     1.0           0     0          3      6         770   
2487200875      5000     1.0           0     0          5      7        1050   
1954400510      8080     1.0           0     0          3      8        1680   
...              ...     ...         ...   ...        ...    ...         ...   
263000018       1131     3.0           0     0          3      8        1530   
6600060120      5813     2.0           0     0          3      8        2310   
1523300141      1350     2.0           0     0          3      7        1020   
291310100       2388     2.0           0     0          3      8        1600   
1523300157      1076     2.0           0     0          3      7        1020   

            sqft_basement  yr_built  yr_renovated      lat     long  \
id                                                                    
7129300520              0      1955             0  47.5112 -122.257   
6414100192            400      1951          1991  47.7210 -122.319   
5631500400              0      1933             0  47.7379 -122.233   
2487200875            910      1965             0  47.5208 -122.393   
1954400510              0      1987             0  47.6168 -122.045   
...                   ...       ...           ...      ...      ...   
263000018               0      2009             0  47.6993 -122.346   
6600060120              0      2014             0  47.5107 -122.362   
1523300141              0      2009             0  47.5944 -122.299   
291310100               0      2004             0  47.5345 -122.069   
1523300157              0      2008             0  47.5941 -122.299   

            sqft_living15  sqft_lot15  
id                                     
7129300520           1340        5650  
6414100192           1690        7639  
5631500400           2720        8062  
2487200875           1360        5000  
1954400510           1800        7503  
...                   ...         ...  
263000018            1530        1509  
6600060120           1830        7200  
1523300141           1020        2007  
291310100            1410        1287  
1523300157           1020        1357  

[21613 rows x 19 columns]

Удаление столбцов 'bathrooms' и 'floors':
                       date     price  bedrooms  sqft_living  sqft_lot  \
id                                                                       
7129300520  20141013T000000  221900.0         3         1180      5650   
6414100192  20141209T000000  538000.0         3         2570      7242   
5631500400  20150225T000000  180000.0         2          770     10000   
2487200875  20141209T000000  604000.0         4         1960      5000   
1954400510  20150218T000000  510000.0         3         1680      8080   
...                     ...       ...       ...          ...       ...   
263000018   20140521T000000  360000.0         3         1530      1131   
6600060120  20150223T000000  400000.0         4         2310      5813   
1523300141  20140623T000000  402101.0         2         1020      1350   
291310100   20150116T000000  400000.0         3         1600      2388   
1523300157  20141015T000000  325000.0         2         1020      1076   

            waterfront  view  condition  grade  sqft_above  sqft_basement  \
id                                                                          
7129300520           0     0          3      7        1180              0   
6414100192           0     0          3      7        2170            400   
5631500400           0     0          3      6         770              0   
2487200875           0     0          5      7        1050            910   
1954400510           0     0          3      8        1680              0   
...                ...   ...        ...    ...         ...            ...   
263000018            0     0          3      8        1530              0   
6600060120           0     0          3      8        2310              0   
1523300141           0     0          3      7        1020              0   
291310100            0     0          3      8        1600              0   
1523300157           0     0          3      7        1020              0   

            yr_built  yr_renovated  zipcode      lat     long  sqft_living15  \
id                                                                             
7129300520      1955             0    98178  47.5112 -122.257           1340   
6414100192      1951          1991    98125  47.7210 -122.319           1690   
5631500400      1933             0    98028  47.7379 -122.233           2720   
2487200875      1965             0    98136  47.5208 -122.393           1360   
1954400510      1987             0    98074  47.6168 -122.045           1800   
...              ...           ...      ...      ...      ...            ...   
263000018       2009             0    98103  47.6993 -122.346           1530   
6600060120      2014             0    98146  47.5107 -122.362           1830   
1523300141      2009             0    98144  47.5944 -122.299           1020   
291310100       2004             0    98027  47.5345 -122.069           1410   
1523300157      2008             0    98144  47.5941 -122.299           1020   

            sqft_lot15  
id                      
7129300520        5650  
6414100192        7639  
5631500400        8062  
2487200875        5000  
1954400510        7503  
...                ...  
263000018         1509  
6600060120        7200  
1523300141        2007  
291310100         1287  
1523300157        1357  

[21613 rows x 18 columns]

Создание новых столбцов на основе данных из существующих столбцов

In [37]:
# 1. Создание нового столбца 'price_per_bedroom'
print("Создание нового столбца 'price_per_bedroom':")
data_frame["price_per_bedroom"] = data_frame["price"] / data_frame["bedrooms"]
print(data_frame)

# 2. Создание нового столбца 'total_rooms' (сумма спален и ванных комнат)
print("\nСоздание нового столбца 'total_rooms':")
data_frame["total_rooms"] = data_frame["bedrooms"] + data_frame["bathrooms"]
print(data_frame)

# 3. Создание нового столбца 'is_expensive' (определяем, дорогой ли дом)
print("\nСоздание нового столбца 'is_expensive':")
data_frame["is_expensive"] = data_frame["price"] > 300000
print(data_frame)

# 4. Создание нового столбца 'floor_area_ratio' (соотношение этажей к количеству спален)
print("\nСоздание нового столбца 'floor_area_ratio':")
data_frame["floor_area_ratio"] = data_frame["floors"] / data_frame["bedrooms"]
print(data_frame)
Создание нового столбца 'price_per_bedroom':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  sqft_above  \
id                                                         ...               
7129300520      5650     1.0           0     0          3  ...        1180   
6414100192      7242     2.0           0     0          3  ...        2170   
5631500400     10000     1.0           0     0          3  ...         770   
2487200875      5000     1.0           0     0          5  ...        1050   
1954400510      8080     1.0           0     0          3  ...        1680   
...              ...     ...         ...   ...        ...  ...         ...   
263000018       1131     3.0           0     0          3  ...        1530   
6600060120      5813     2.0           0     0          3  ...        2310   
1523300141      1350     2.0           0     0          3  ...        1020   
291310100       2388     2.0           0     0          3  ...        1600   
1523300157      1076     2.0           0     0          3  ...        1020   

            sqft_basement  yr_built  yr_renovated  zipcode      lat     long  \
id                                                                             
7129300520              0      1955             0    98178  47.5112 -122.257   
6414100192            400      1951          1991    98125  47.7210 -122.319   
5631500400              0      1933             0    98028  47.7379 -122.233   
2487200875            910      1965             0    98136  47.5208 -122.393   
1954400510              0      1987             0    98074  47.6168 -122.045   
...                   ...       ...           ...      ...      ...      ...   
263000018               0      2009             0    98103  47.6993 -122.346   
6600060120              0      2014             0    98146  47.5107 -122.362   
1523300141              0      2009             0    98144  47.5944 -122.299   
291310100               0      2004             0    98027  47.5345 -122.069   
1523300157              0      2008             0    98144  47.5941 -122.299   

            sqft_living15  sqft_lot15  price_per_bedroom  
id                                                        
7129300520           1340        5650       73966.666667  
6414100192           1690        7639      179333.333333  
5631500400           2720        8062       90000.000000  
2487200875           1360        5000      151000.000000  
1954400510           1800        7503      170000.000000  
...                   ...         ...                ...  
263000018            1530        1509      120000.000000  
6600060120           1830        7200      100000.000000  
1523300141           1020        2007      201050.500000  
291310100            1410        1287      133333.333333  
1523300157           1020        1357      162500.000000  

[21613 rows x 21 columns]

Создание нового столбца 'total_rooms':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  sqft_basement  \
id                                                         ...                  
7129300520      5650     1.0           0     0          3  ...              0   
6414100192      7242     2.0           0     0          3  ...            400   
5631500400     10000     1.0           0     0          3  ...              0   
2487200875      5000     1.0           0     0          5  ...            910   
1954400510      8080     1.0           0     0          3  ...              0   
...              ...     ...         ...   ...        ...  ...            ...   
263000018       1131     3.0           0     0          3  ...              0   
6600060120      5813     2.0           0     0          3  ...              0   
1523300141      1350     2.0           0     0          3  ...              0   
291310100       2388     2.0           0     0          3  ...              0   
1523300157      1076     2.0           0     0          3  ...              0   

            yr_built  yr_renovated  zipcode      lat     long  sqft_living15  \
id                                                                             
7129300520      1955             0    98178  47.5112 -122.257           1340   
6414100192      1951          1991    98125  47.7210 -122.319           1690   
5631500400      1933             0    98028  47.7379 -122.233           2720   
2487200875      1965             0    98136  47.5208 -122.393           1360   
1954400510      1987             0    98074  47.6168 -122.045           1800   
...              ...           ...      ...      ...      ...            ...   
263000018       2009             0    98103  47.6993 -122.346           1530   
6600060120      2014             0    98146  47.5107 -122.362           1830   
1523300141      2009             0    98144  47.5944 -122.299           1020   
291310100       2004             0    98027  47.5345 -122.069           1410   
1523300157      2008             0    98144  47.5941 -122.299           1020   

            sqft_lot15  price_per_bedroom  total_rooms  
id                                                      
7129300520        5650       73966.666667         4.00  
6414100192        7639      179333.333333         5.25  
5631500400        8062       90000.000000         3.00  
2487200875        5000      151000.000000         7.00  
1954400510        7503      170000.000000         5.00  
...                ...                ...          ...  
263000018         1509      120000.000000         5.50  
6600060120        7200      100000.000000         6.50  
1523300141        2007      201050.500000         2.75  
291310100         1287      133333.333333         5.50  
1523300157        1357      162500.000000         2.75  

[21613 rows x 22 columns]

Создание нового столбца 'is_expensive':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_built  \
id                                                         ...             
7129300520      5650     1.0           0     0          3  ...      1955   
6414100192      7242     2.0           0     0          3  ...      1951   
5631500400     10000     1.0           0     0          3  ...      1933   
2487200875      5000     1.0           0     0          5  ...      1965   
1954400510      8080     1.0           0     0          3  ...      1987   
...              ...     ...         ...   ...        ...  ...       ...   
263000018       1131     3.0           0     0          3  ...      2009   
6600060120      5813     2.0           0     0          3  ...      2014   
1523300141      1350     2.0           0     0          3  ...      2009   
291310100       2388     2.0           0     0          3  ...      2004   
1523300157      1076     2.0           0     0          3  ...      2008   

            yr_renovated  zipcode      lat     long  sqft_living15  \
id                                                                   
7129300520             0    98178  47.5112 -122.257           1340   
6414100192          1991    98125  47.7210 -122.319           1690   
5631500400             0    98028  47.7379 -122.233           2720   
2487200875             0    98136  47.5208 -122.393           1360   
1954400510             0    98074  47.6168 -122.045           1800   
...                  ...      ...      ...      ...            ...   
263000018              0    98103  47.6993 -122.346           1530   
6600060120             0    98146  47.5107 -122.362           1830   
1523300141             0    98144  47.5944 -122.299           1020   
291310100              0    98027  47.5345 -122.069           1410   
1523300157             0    98144  47.5941 -122.299           1020   

            sqft_lot15  price_per_bedroom  total_rooms  is_expensive  
id                                                                    
7129300520        5650       73966.666667         4.00         False  
6414100192        7639      179333.333333         5.25          True  
5631500400        8062       90000.000000         3.00         False  
2487200875        5000      151000.000000         7.00          True  
1954400510        7503      170000.000000         5.00          True  
...                ...                ...          ...           ...  
263000018         1509      120000.000000         5.50          True  
6600060120        7200      100000.000000         6.50          True  
1523300141        2007      201050.500000         2.75          True  
291310100         1287      133333.333333         5.50          True  
1523300157        1357      162500.000000         2.75          True  

[21613 rows x 23 columns]

Создание нового столбца 'floor_area_ratio':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_renovated  \
id                                                         ...                 
7129300520      5650     1.0           0     0          3  ...             0   
6414100192      7242     2.0           0     0          3  ...          1991   
5631500400     10000     1.0           0     0          3  ...             0   
2487200875      5000     1.0           0     0          5  ...             0   
1954400510      8080     1.0           0     0          3  ...             0   
...              ...     ...         ...   ...        ...  ...           ...   
263000018       1131     3.0           0     0          3  ...             0   
6600060120      5813     2.0           0     0          3  ...             0   
1523300141      1350     2.0           0     0          3  ...             0   
291310100       2388     2.0           0     0          3  ...             0   
1523300157      1076     2.0           0     0          3  ...             0   

            zipcode      lat     long  sqft_living15  sqft_lot15  \
id                                                                 
7129300520    98178  47.5112 -122.257           1340        5650   
6414100192    98125  47.7210 -122.319           1690        7639   
5631500400    98028  47.7379 -122.233           2720        8062   
2487200875    98136  47.5208 -122.393           1360        5000   
1954400510    98074  47.6168 -122.045           1800        7503   
...             ...      ...      ...            ...         ...   
263000018     98103  47.6993 -122.346           1530        1509   
6600060120    98146  47.5107 -122.362           1830        7200   
1523300141    98144  47.5944 -122.299           1020        2007   
291310100     98027  47.5345 -122.069           1410        1287   
1523300157    98144  47.5941 -122.299           1020        1357   

            price_per_bedroom  total_rooms  is_expensive  floor_area_ratio  
id                                                                          
7129300520       73966.666667         4.00         False          0.333333  
6414100192      179333.333333         5.25          True          0.666667  
5631500400       90000.000000         3.00         False          0.500000  
2487200875      151000.000000         7.00          True          0.250000  
1954400510      170000.000000         5.00          True          0.333333  
...                       ...          ...           ...               ...  
263000018       120000.000000         5.50          True          1.000000  
6600060120      100000.000000         6.50          True          0.500000  
1523300141      201050.500000         2.75          True          1.000000  
291310100       133333.333333         5.50          True          0.666667  
1523300157      162500.000000         2.75          True          1.000000  

[21613 rows x 24 columns]

Удаление строк с пустыми значениями

In [38]:
# 1. Исходный DataFrame с пустыми значениями
print("Исходный DataFrame с пустыми значениями:")
print(data_frame)

# 2. Удаление строк с любыми пустыми значениями
print("\nУдаление строк с любыми пустыми значениями:")
print(data_frame.dropna())

# 3. Удаление строк только с пустыми значениями в определенном столбце (например, 'price')
print("\nУдаление строк с пустыми значениями в столбце 'price':")
print(data_frame.dropna(subset=["price"]))

# 4. Удаление строк, где все значения пустые
print("\nУдаление строк, где все значения пустые:")
print(data_frame.dropna(how="all"))
Исходный DataFrame с пустыми значениями:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_renovated  \
id                                                         ...                 
7129300520      5650     1.0           0     0          3  ...             0   
6414100192      7242     2.0           0     0          3  ...          1991   
5631500400     10000     1.0           0     0          3  ...             0   
2487200875      5000     1.0           0     0          5  ...             0   
1954400510      8080     1.0           0     0          3  ...             0   
...              ...     ...         ...   ...        ...  ...           ...   
263000018       1131     3.0           0     0          3  ...             0   
6600060120      5813     2.0           0     0          3  ...             0   
1523300141      1350     2.0           0     0          3  ...             0   
291310100       2388     2.0           0     0          3  ...             0   
1523300157      1076     2.0           0     0          3  ...             0   

            zipcode      lat     long  sqft_living15  sqft_lot15  \
id                                                                 
7129300520    98178  47.5112 -122.257           1340        5650   
6414100192    98125  47.7210 -122.319           1690        7639   
5631500400    98028  47.7379 -122.233           2720        8062   
2487200875    98136  47.5208 -122.393           1360        5000   
1954400510    98074  47.6168 -122.045           1800        7503   
...             ...      ...      ...            ...         ...   
263000018     98103  47.6993 -122.346           1530        1509   
6600060120    98146  47.5107 -122.362           1830        7200   
1523300141    98144  47.5944 -122.299           1020        2007   
291310100     98027  47.5345 -122.069           1410        1287   
1523300157    98144  47.5941 -122.299           1020        1357   

            price_per_bedroom  total_rooms  is_expensive  floor_area_ratio  
id                                                                          
7129300520       73966.666667         4.00         False          0.333333  
6414100192      179333.333333         5.25          True          0.666667  
5631500400       90000.000000         3.00         False          0.500000  
2487200875      151000.000000         7.00          True          0.250000  
1954400510      170000.000000         5.00          True          0.333333  
...                       ...          ...           ...               ...  
263000018       120000.000000         5.50          True          1.000000  
6600060120      100000.000000         6.50          True          0.500000  
1523300141      201050.500000         2.75          True          1.000000  
291310100       133333.333333         5.50          True          0.666667  
1523300157      162500.000000         2.75          True          1.000000  

[21613 rows x 24 columns]

Удаление строк с любыми пустыми значениями:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_renovated  \
id                                                         ...                 
7129300520      5650     1.0           0     0          3  ...             0   
6414100192      7242     2.0           0     0          3  ...          1991   
5631500400     10000     1.0           0     0          3  ...             0   
2487200875      5000     1.0           0     0          5  ...             0   
1954400510      8080     1.0           0     0          3  ...             0   
...              ...     ...         ...   ...        ...  ...           ...   
263000018       1131     3.0           0     0          3  ...             0   
6600060120      5813     2.0           0     0          3  ...             0   
1523300141      1350     2.0           0     0          3  ...             0   
291310100       2388     2.0           0     0          3  ...             0   
1523300157      1076     2.0           0     0          3  ...             0   

            zipcode      lat     long  sqft_living15  sqft_lot15  \
id                                                                 
7129300520    98178  47.5112 -122.257           1340        5650   
6414100192    98125  47.7210 -122.319           1690        7639   
5631500400    98028  47.7379 -122.233           2720        8062   
2487200875    98136  47.5208 -122.393           1360        5000   
1954400510    98074  47.6168 -122.045           1800        7503   
...             ...      ...      ...            ...         ...   
263000018     98103  47.6993 -122.346           1530        1509   
6600060120    98146  47.5107 -122.362           1830        7200   
1523300141    98144  47.5944 -122.299           1020        2007   
291310100     98027  47.5345 -122.069           1410        1287   
1523300157    98144  47.5941 -122.299           1020        1357   

            price_per_bedroom  total_rooms  is_expensive  floor_area_ratio  
id                                                                          
7129300520       73966.666667         4.00         False          0.333333  
6414100192      179333.333333         5.25          True          0.666667  
5631500400       90000.000000         3.00         False          0.500000  
2487200875      151000.000000         7.00          True          0.250000  
1954400510      170000.000000         5.00          True          0.333333  
...                       ...          ...           ...               ...  
263000018       120000.000000         5.50          True          1.000000  
6600060120      100000.000000         6.50          True          0.500000  
1523300141      201050.500000         2.75          True          1.000000  
291310100       133333.333333         5.50          True          0.666667  
1523300157      162500.000000         2.75          True          1.000000  

[21613 rows x 24 columns]

Удаление строк с пустыми значениями в столбце 'price':
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_renovated  \
id                                                         ...                 
7129300520      5650     1.0           0     0          3  ...             0   
6414100192      7242     2.0           0     0          3  ...          1991   
5631500400     10000     1.0           0     0          3  ...             0   
2487200875      5000     1.0           0     0          5  ...             0   
1954400510      8080     1.0           0     0          3  ...             0   
...              ...     ...         ...   ...        ...  ...           ...   
263000018       1131     3.0           0     0          3  ...             0   
6600060120      5813     2.0           0     0          3  ...             0   
1523300141      1350     2.0           0     0          3  ...             0   
291310100       2388     2.0           0     0          3  ...             0   
1523300157      1076     2.0           0     0          3  ...             0   

            zipcode      lat     long  sqft_living15  sqft_lot15  \
id                                                                 
7129300520    98178  47.5112 -122.257           1340        5650   
6414100192    98125  47.7210 -122.319           1690        7639   
5631500400    98028  47.7379 -122.233           2720        8062   
2487200875    98136  47.5208 -122.393           1360        5000   
1954400510    98074  47.6168 -122.045           1800        7503   
...             ...      ...      ...            ...         ...   
263000018     98103  47.6993 -122.346           1530        1509   
6600060120    98146  47.5107 -122.362           1830        7200   
1523300141    98144  47.5944 -122.299           1020        2007   
291310100     98027  47.5345 -122.069           1410        1287   
1523300157    98144  47.5941 -122.299           1020        1357   

            price_per_bedroom  total_rooms  is_expensive  floor_area_ratio  
id                                                                          
7129300520       73966.666667         4.00         False          0.333333  
6414100192      179333.333333         5.25          True          0.666667  
5631500400       90000.000000         3.00         False          0.500000  
2487200875      151000.000000         7.00          True          0.250000  
1954400510      170000.000000         5.00          True          0.333333  
...                       ...          ...           ...               ...  
263000018       120000.000000         5.50          True          1.000000  
6600060120      100000.000000         6.50          True          0.500000  
1523300141      201050.500000         2.75          True          1.000000  
291310100       133333.333333         5.50          True          0.666667  
1523300157      162500.000000         2.75          True          1.000000  

[21613 rows x 24 columns]

Удаление строк, где все значения пустые:
                       date     price  bedrooms  bathrooms  sqft_living  \
id                                                                        
7129300520  20141013T000000  221900.0         3       1.00         1180   
6414100192  20141209T000000  538000.0         3       2.25         2570   
5631500400  20150225T000000  180000.0         2       1.00          770   
2487200875  20141209T000000  604000.0         4       3.00         1960   
1954400510  20150218T000000  510000.0         3       2.00         1680   
...                     ...       ...       ...        ...          ...   
263000018   20140521T000000  360000.0         3       2.50         1530   
6600060120  20150223T000000  400000.0         4       2.50         2310   
1523300141  20140623T000000  402101.0         2       0.75         1020   
291310100   20150116T000000  400000.0         3       2.50         1600   
1523300157  20141015T000000  325000.0         2       0.75         1020   

            sqft_lot  floors  waterfront  view  condition  ...  yr_renovated  \
id                                                         ...                 
7129300520      5650     1.0           0     0          3  ...             0   
6414100192      7242     2.0           0     0          3  ...          1991   
5631500400     10000     1.0           0     0          3  ...             0   
2487200875      5000     1.0           0     0          5  ...             0   
1954400510      8080     1.0           0     0          3  ...             0   
...              ...     ...         ...   ...        ...  ...           ...   
263000018       1131     3.0           0     0          3  ...             0   
6600060120      5813     2.0           0     0          3  ...             0   
1523300141      1350     2.0           0     0          3  ...             0   
291310100       2388     2.0           0     0          3  ...             0   
1523300157      1076     2.0           0     0          3  ...             0   

            zipcode      lat     long  sqft_living15  sqft_lot15  \
id                                                                 
7129300520    98178  47.5112 -122.257           1340        5650   
6414100192    98125  47.7210 -122.319           1690        7639   
5631500400    98028  47.7379 -122.233           2720        8062   
2487200875    98136  47.5208 -122.393           1360        5000   
1954400510    98074  47.6168 -122.045           1800        7503   
...             ...      ...      ...            ...         ...   
263000018     98103  47.6993 -122.346           1530        1509   
6600060120    98146  47.5107 -122.362           1830        7200   
1523300141    98144  47.5944 -122.299           1020        2007   
291310100     98027  47.5345 -122.069           1410        1287   
1523300157    98144  47.5941 -122.299           1020        1357   

            price_per_bedroom  total_rooms  is_expensive  floor_area_ratio  
id                                                                          
7129300520       73966.666667         4.00         False          0.333333  
6414100192      179333.333333         5.25          True          0.666667  
5631500400       90000.000000         3.00         False          0.500000  
2487200875      151000.000000         7.00          True          0.250000  
1954400510      170000.000000         5.00          True          0.333333  
...                       ...          ...           ...               ...  
263000018       120000.000000         5.50          True          1.000000  
6600060120      100000.000000         6.50          True          0.500000  
1523300141      201050.500000         2.75          True          1.000000  
291310100       133333.333333         5.50          True          0.666667  
1523300157      162500.000000         2.75          True          1.000000  

[21613 rows x 24 columns]

Matplotlib

In [39]:
import matplotlib.pyplot as plt

# 1. Столбчатая диаграмма: средняя цена по количеству спален
data_frame.groupby("bedrooms")["price"].mean().plot.bar(color="skyblue")
plt.title("Средняя цена по количеству спален")
plt.xlabel("Количество спален")
plt.ylabel("Средняя цена")
plt.show()

# 2. Гистограмма: распределение цен
data_frame["price"].plot.hist(bins=30, color="orange", alpha=0.7)
plt.title("Гистограмма цен")
plt.xlabel("Цена")
plt.ylabel("Частота")
plt.show()

# 3. Ящик с усами: цена по количеству ванных комнат
data_frame.boxplot(column="price", by="bathrooms")
plt.title("Ящик с усами цен по количеству ванных комнат")
plt.suptitle("")
plt.xlabel("Количество ванных комнат")
plt.ylabel("Цена")
plt.show()

# 4. Диаграмма с областями: суммарная цена по количеству этажей
data_frame.groupby("floors")["price"].sum().plot.area(color="lightgreen", alpha=0.5)
plt.title("Суммарная цена по количеству этажей")
plt.xlabel("Количество этажей")
plt.ylabel("Суммарная цена")
plt.show()

# 5. Диаграмма рассеяния: цена vs. площадь
data_frame.plot.scatter(x="sqft_living", y="price", color="purple", alpha=0.5)
plt.title("Диаграмма рассеяния: Цена vs Площадь")
plt.xlabel("Площадь (sqft)")
plt.ylabel("Цена")
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image