TSK_AIBA/LR1.ipynb
2025-01-07 15:22:33 +04:00

414 KiB
Raw Permalink Blame History

Лабораторная работа №1

Вариант №7

In [ ]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data.csv')
In [70]:
print(df.info()) 
print(df.describe())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 369 entries, 0 to 368
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   stock index          369 non-null    object 
 1   country              369 non-null    object 
 2   year                 369 non-null    float64
 3   index price          317 non-null    float64
 4   log_indexprice       369 non-null    float64
 5   inflationrate        326 non-null    float64
 6   oil prices           369 non-null    float64
 7   exchange_rate        367 non-null    float64
 8   gdppercent           350 non-null    float64
 9   percapitaincome      368 non-null    float64
 10  unemploymentrate     348 non-null    float64
 11  manufacturingoutput  278 non-null    float64
 12  tradebalance         365 non-null    float64
 13  USTreasury           369 non-null    float64
dtypes: float64(12), object(2)
memory usage: 40.5+ KB
None
              year   index price  log_indexprice  inflationrate  oil prices  \
count   369.000000    317.000000      369.000000     326.000000  369.000000   
mean   2000.000000   7898.648297        3.610542       0.041748   39.743171   
std      11.848225   7811.336862        0.482481       0.039579   25.452654   
min    1980.000000    168.610000        2.230000      -0.040000   11.350000   
25%    1990.000000   2407.100000        3.320000       0.020000   19.410000   
50%    2000.000000   5160.100000        3.600000       0.030000   28.520000   
75%    2010.000000  10279.500000        3.980000       0.057500   57.880000   
max    2020.000000  47751.330000        4.680000       0.240000   98.560000   

       exchange_rate  gdppercent  percapitaincome  unemploymentrate  \
count     367.000000  350.000000       368.000000        348.000000   
mean       27.897548    0.037114     20719.964674          0.068908   
std        49.620521    0.037850     17435.037783          0.043207   
min         0.900000   -0.110000        27.000000          0.020000   
25%         1.330000    0.020000      2090.250000          0.040000   
50%         5.440000    0.030000     19969.500000          0.060000   
75%        15.055000    0.060000     36384.000000          0.090000   
max       249.050000    0.150000     65280.000000          0.260000   

       manufacturingoutput  tradebalance  USTreasury  
count           278.000000    365.000000  369.000000  
mean            328.084820    -15.996384    0.059024  
std             622.395923    154.557170    0.033086  
min               0.590000   -770.930000    0.010000  
25%              80.380000    -25.370000    0.030000  
50%             188.160000     -0.140000    0.050000  
75%             271.977500     19.080000    0.080000  
max            3868.460000    366.140000    0.140000  
In [71]:
print(df.columns)
Index(['stock index', 'country', 'year', 'index price', 'log_indexprice',
       'inflationrate', 'oil prices', 'exchange_rate', 'gdppercent',
       'percapitaincome', 'unemploymentrate', 'manufacturingoutput',
       'tradebalance', 'USTreasury'],
      dtype='object')
In [72]:
print(df.iloc[0]) 
print(df['country'])
stock index                              NASDAQ
country                United States of America
year                                     1980.0
index price                              168.61
log_indexprice                             2.23
inflationrate                              0.14
oil prices                                21.59
exchange_rate                               1.0
gdppercent                                 0.09
percapitaincome                         12575.0
unemploymentrate                           0.07
manufacturingoutput                         NaN
tradebalance                             -13.06
USTreasury                                 0.11
Name: 0, dtype: object
0      United States of America
1      United States of America
2      United States of America
3      United States of America
4      United States of America
                 ...           
364                       Spain
365                       Spain
366                       Spain
367                       Spain
368                       Spain
Name: country, Length: 369, dtype: object
In [73]:
grouped = df.groupby('country').agg({'index price': 'mean'})
print(grouped)
                           index price
country                               
China                      2329.655417
France                     3166.182683
Germany                    5030.647561
Hong Kong                 14790.129091
India                     14204.314516
Japan                     16142.580000
Spain                      7735.470294
United Kingdom             5323.457097
United States of America   2282.994878
In [74]:
sorted_df = df.sort_values(by='stock index', ascending=True) 
print(sorted_df)
    stock index country    year  index price  log_indexprice  inflationrate  \
307      CAC 40  France  2000.0      5926.42            3.77           0.02   
304      CAC 40  France  1997.0      2998.90            3.48           0.01   
303      CAC 40  France  1996.0      2315.70            3.36           0.02   
302      CAC 40  France  1995.0      1872.00            3.27           0.02   
301      CAC 40  France  1994.0      1881.20            3.27           0.02   
..          ...     ...     ...          ...             ...            ...   
228      SZCOMP   China  2003.0      1467.88            3.17           0.01   
227      SZCOMP   China  2002.0      1561.31            3.19          -0.01   
226      SZCOMP   China  2001.0      1940.96            3.29           0.01   
235      SZCOMP   China  2010.0      2795.88            3.45           0.03   
244      SZCOMP   China  2019.0      2928.94            3.47           0.03   

     oil prices  exchange_rate  gdppercent  percapitaincome  unemploymentrate  \
307       28.44           0.92        0.04          22420.0              0.10   
304       18.33           5.84        0.02           3694.0              0.12   
303       25.23           5.12        0.01           4097.0              0.12   
302       19.03           4.99        0.02           4099.0              0.12   
301       17.16           5.55        0.02           3582.0              0.12   
..          ...            ...         ...              ...               ...   
228       32.13           8.28        0.10           1289.0              0.04   
227       29.46           8.28        0.09           1149.0              0.03   
226       19.39           8.28        0.08           1053.0              0.03   
235       89.15           6.77        0.11           4550.0              0.04   
244       59.88           6.91        0.06          10217.0              0.05   

     manufacturingoutput  tradebalance  USTreasury  
307               197.71         18.15        0.06  
304                32.50          6.67        0.06  
303                35.44          4.36        0.06  
302                36.32          3.90        0.07  
301                31.23          3.18        0.07  
..                   ...           ...         ...  
228                  NaN         35.82        0.04  
227                  NaN         37.38        0.05  
226                  NaN         28.09        0.05  
235              1924.32        222.40        0.03  
244              3823.41        164.99        0.02  

[369 rows x 14 columns]
In [75]:
df_dropped = df.drop(columns=['USTreasury'])  
df_dropped = df_dropped.drop(index=[0, 1])
In [76]:
df['real_index_price'] = df['index price'] / (1 + df['inflationrate'])
In [77]:
df_cleaned = df.dropna()
In [78]:
df_filled = df.fillna(value=0)
In [79]:
plt.figure(figsize=(10,6))
for country in df['country'].unique():
    plt.plot(df[df['country'] == country]['year'], df[df['country'] == country]['index price'], label=country)
plt.title('Индексные цены по годам')
plt.xlabel('Год')
plt.ylabel('Индексная цена')
plt.legend()
plt.show()
No description has been provided for this image
In [80]:
df.groupby('country')['index price'].mean().plot(kind='bar')
plt.title('Средние индексные цены по странам')
plt.ylabel('Средняя индексная цена')
plt.show()
No description has been provided for this image
In [81]:
df['index price'].hist(bins=30)
plt.title('Гистограмма индексных цен')
plt.xlabel('Индексная цена')
plt.ylabel('Частота')
plt.show()
No description has been provided for this image
In [82]:
df.boxplot(column='index price')
plt.title('Ящик с усами для индексных цен')
plt.ylabel('Индексная цена')
plt.show()
No description has been provided for this image
In [83]:
df.plot.area(x='year', y='percapitaincome', alpha=0.5)
plt.title('Диаграмма с областями для ВВП на душу населения')
plt.xlabel('Год')
plt.ylabel('ВВП на душу населения')
plt.show()
No description has been provided for this image
In [84]:
plt.scatter(df['oil prices'], df['index price'])
plt.title('Диаграмма рассеяния: Цены на нефть vs Индексная цена')
plt.xlabel('Цены на нефть')
plt.ylabel('Индексная цена')
plt.show()
No description has been provided for this image
In [85]:
df['country'].value_counts().plot.pie(autopct='%1.1f%%', startangle=90)
plt.title('Распределение стран по индексной цене')
plt.ylabel('')
plt.show()
No description has been provided for this image