ну вроде бы всё

2024-11-02 00:19:57 +04:00 · 2024-11-02 00:19:57 +04:00 · 7f7102c559
commit 7f7102c559
parent d4cdc8ab91
1 changed files with 624 additions and 1 deletions
--- a/lab_3/lab3.ipynb
+++ b/lab_3/lab3.ipynb
@ -436,7 +436,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Приведём пример использования future tools"
+    "# Приведём пример использования future tools\n",
    "## Попробую вынести страну в отдельную таблицу"
   ]
  },
  {
@ -544,6 +545,628 @@
   "source": [
    "pip install --upgrade setuptools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index billioner_id not found in dataframe, creating new integer column\n",
      "  warnings.warn(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function max at 0x000001952157A520> is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n",
      "  ).agg(to_agg)\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function std at 0x000001952157B060> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"std\" instead.\n",
      "  ).agg(to_agg)\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function sum at 0x0000019521579B20> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
      "  ).agg(to_agg)\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function min at 0x000001952157A660> is currently using SeriesGroupBy.min. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"min\" instead.\n",
      "  ).agg(to_agg)\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function mean at 0x000001952157AF20> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n",
      "  ).agg(to_agg)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Rank</th>\n",
       "      <th>Networth</th>\n",
       "      <th>Age</th>\n",
       "      <th>Industry</th>\n",
       "      <th>id</th>\n",
       "      <th>country_id</th>\n",
       "      <th>country_table.id</th>\n",
       "      <th>country_table.Country</th>\n",
       "      <th>country_table.COUNT(other_about_billioner)</th>\n",
       "      <th>country_table.MAX(other_about_billioner.Age)</th>\n",
       "      <th>...</th>\n",
       "      <th>country_table.SKEW(other_about_billioner.Rank )</th>\n",
       "      <th>country_table.SKEW(other_about_billioner.id)</th>\n",
       "      <th>country_table.STD(other_about_billioner.Age)</th>\n",
       "      <th>country_table.STD(other_about_billioner.Networth)</th>\n",
       "      <th>country_table.STD(other_about_billioner.Rank )</th>\n",
       "      <th>country_table.STD(other_about_billioner.id)</th>\n",
       "      <th>country_table.SUM(other_about_billioner.Age)</th>\n",
       "      <th>country_table.SUM(other_about_billioner.Networth)</th>\n",
       "      <th>country_table.SUM(other_about_billioner.Rank )</th>\n",
       "      <th>country_table.SUM(other_about_billioner.id)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>billioner_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>219</td>\n",
       "      <td>50</td>\n",
       "      <td>Automotive</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>United States</td>\n",
       "      <td>1</td>\n",
       "      <td>50.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50.0</td>\n",
       "      <td>219.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>171</td>\n",
       "      <td>58</td>\n",
       "      <td>Technology</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>United States</td>\n",
       "      <td>1</td>\n",
       "      <td>58.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>58.0</td>\n",
       "      <td>171.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>158</td>\n",
       "      <td>73</td>\n",
       "      <td>Fashion &amp; Retail</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>France</td>\n",
       "      <td>1</td>\n",
       "      <td>73.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73.0</td>\n",
       "      <td>158.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>129</td>\n",
       "      <td>66</td>\n",
       "      <td>Technology</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>United States</td>\n",
       "      <td>1</td>\n",
       "      <td>66.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>66.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>118</td>\n",
       "      <td>91</td>\n",
       "      <td>Finance &amp; Investments</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>United States</td>\n",
       "      <td>1</td>\n",
       "      <td>91.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>91.0</td>\n",
       "      <td>118.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2595</th>\n",
       "      <td>2578</td>\n",
       "      <td>1</td>\n",
       "      <td>80</td>\n",
       "      <td>Healthcare</td>\n",
       "      <td>2595</td>\n",
       "      <td>2595</td>\n",
       "      <td>2595</td>\n",
       "      <td>Spain</td>\n",
       "      <td>1</td>\n",
       "      <td>80.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2578.0</td>\n",
       "      <td>2595.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2596</th>\n",
       "      <td>2578</td>\n",
       "      <td>1</td>\n",
       "      <td>82</td>\n",
       "      <td>Fashion &amp; Retail</td>\n",
       "      <td>2596</td>\n",
       "      <td>2596</td>\n",
       "      <td>2596</td>\n",
       "      <td>Philippines</td>\n",
       "      <td>1</td>\n",
       "      <td>82.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2578.0</td>\n",
       "      <td>2596.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2597</th>\n",
       "      <td>2578</td>\n",
       "      <td>1</td>\n",
       "      <td>71</td>\n",
       "      <td>Fashion &amp; Retail</td>\n",
       "      <td>2597</td>\n",
       "      <td>2597</td>\n",
       "      <td>2597</td>\n",
       "      <td>Philippines</td>\n",
       "      <td>1</td>\n",
       "      <td>71.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>71.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2578.0</td>\n",
       "      <td>2597.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598</th>\n",
       "      <td>2578</td>\n",
       "      <td>1</td>\n",
       "      <td>68</td>\n",
       "      <td>Fashion &amp; Retail</td>\n",
       "      <td>2598</td>\n",
       "      <td>2598</td>\n",
       "      <td>2598</td>\n",
       "      <td>Philippines</td>\n",
       "      <td>1</td>\n",
       "      <td>68.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2578.0</td>\n",
       "      <td>2598.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2599</th>\n",
       "      <td>2578</td>\n",
       "      <td>1</td>\n",
       "      <td>69</td>\n",
       "      <td>Food &amp; Beverage</td>\n",
       "      <td>2599</td>\n",
       "      <td>2599</td>\n",
       "      <td>2599</td>\n",
       "      <td>Germany</td>\n",
       "      <td>1</td>\n",
       "      <td>69.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2578.0</td>\n",
       "      <td>2599.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2600 rows × 35 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              Rank   Networth  Age                Industry    id  country_id  \\\n",
       "billioner_id                                                                   \n",
       "0                 1       219   50             Automotive      0           0   \n",
       "1                 2       171   58             Technology      1           1   \n",
       "2                 3       158   73       Fashion & Retail      2           2   \n",
       "3                 4       129   66             Technology      3           3   \n",
       "4                 5       118   91  Finance & Investments      4           4   \n",
       "...             ...       ...  ...                     ...   ...         ...   \n",
       "2595           2578         1   80             Healthcare   2595        2595   \n",
       "2596           2578         1   82       Fashion & Retail   2596        2596   \n",
       "2597           2578         1   71       Fashion & Retail   2597        2597   \n",
       "2598           2578         1   68       Fashion & Retail   2598        2598   \n",
       "2599           2578         1   69        Food & Beverage   2599        2599   \n",
       "\n",
       "              country_table.id country_table.Country  \\\n",
       "billioner_id                                           \n",
       "0                            0         United States   \n",
       "1                            1         United States   \n",
       "2                            2                France   \n",
       "3                            3         United States   \n",
       "4                            4         United States   \n",
       "...                        ...                   ...   \n",
       "2595                      2595                 Spain   \n",
       "2596                      2596           Philippines   \n",
       "2597                      2597           Philippines   \n",
       "2598                      2598           Philippines   \n",
       "2599                      2599               Germany   \n",
       "\n",
       "              country_table.COUNT(other_about_billioner)  \\\n",
       "billioner_id                                               \n",
       "0                                                      1   \n",
       "1                                                      1   \n",
       "2                                                      1   \n",
       "3                                                      1   \n",
       "4                                                      1   \n",
       "...                                                  ...   \n",
       "2595                                                   1   \n",
       "2596                                                   1   \n",
       "2597                                                   1   \n",
       "2598                                                   1   \n",
       "2599                                                   1   \n",
       "\n",
       "              country_table.MAX(other_about_billioner.Age)  ...  \\\n",
       "billioner_id                                                ...   \n",
       "0                                                     50.0  ...   \n",
       "1                                                     58.0  ...   \n",
       "2                                                     73.0  ...   \n",
       "3                                                     66.0  ...   \n",
       "4                                                     91.0  ...   \n",
       "...                                                    ...  ...   \n",
       "2595                                                  80.0  ...   \n",
       "2596                                                  82.0  ...   \n",
       "2597                                                  71.0  ...   \n",
       "2598                                                  68.0  ...   \n",
       "2599                                                  69.0  ...   \n",
       "\n",
       "              country_table.SKEW(other_about_billioner.Rank )  \\\n",
       "billioner_id                                                    \n",
       "0                                                         NaN   \n",
       "1                                                         NaN   \n",
       "2                                                         NaN   \n",
       "3                                                         NaN   \n",
       "4                                                         NaN   \n",
       "...                                                       ...   \n",
       "2595                                                      NaN   \n",
       "2596                                                      NaN   \n",
       "2597                                                      NaN   \n",
       "2598                                                      NaN   \n",
       "2599                                                      NaN   \n",
       "\n",
       "              country_table.SKEW(other_about_billioner.id)  \\\n",
       "billioner_id                                                 \n",
       "0                                                      NaN   \n",
       "1                                                      NaN   \n",
       "2                                                      NaN   \n",
       "3                                                      NaN   \n",
       "4                                                      NaN   \n",
       "...                                                    ...   \n",
       "2595                                                   NaN   \n",
       "2596                                                   NaN   \n",
       "2597                                                   NaN   \n",
       "2598                                                   NaN   \n",
       "2599                                                   NaN   \n",
       "\n",
       "              country_table.STD(other_about_billioner.Age)  \\\n",
       "billioner_id                                                 \n",
       "0                                                      NaN   \n",
       "1                                                      NaN   \n",
       "2                                                      NaN   \n",
       "3                                                      NaN   \n",
       "4                                                      NaN   \n",
       "...                                                    ...   \n",
       "2595                                                   NaN   \n",
       "2596                                                   NaN   \n",
       "2597                                                   NaN   \n",
       "2598                                                   NaN   \n",
       "2599                                                   NaN   \n",
       "\n",
       "              country_table.STD(other_about_billioner.Networth)  \\\n",
       "billioner_id                                                      \n",
       "0                                                           NaN   \n",
       "1                                                           NaN   \n",
       "2                                                           NaN   \n",
       "3                                                           NaN   \n",
       "4                                                           NaN   \n",
       "...                                                         ...   \n",
       "2595                                                        NaN   \n",
       "2596                                                        NaN   \n",
       "2597                                                        NaN   \n",
       "2598                                                        NaN   \n",
       "2599                                                        NaN   \n",
       "\n",
       "              country_table.STD(other_about_billioner.Rank )  \\\n",
       "billioner_id                                                   \n",
       "0                                                        NaN   \n",
       "1                                                        NaN   \n",
       "2                                                        NaN   \n",
       "3                                                        NaN   \n",
       "4                                                        NaN   \n",
       "...                                                      ...   \n",
       "2595                                                     NaN   \n",
       "2596                                                     NaN   \n",
       "2597                                                     NaN   \n",
       "2598                                                     NaN   \n",
       "2599                                                     NaN   \n",
       "\n",
       "              country_table.STD(other_about_billioner.id)  \\\n",
       "billioner_id                                                \n",
       "0                                                     NaN   \n",
       "1                                                     NaN   \n",
       "2                                                     NaN   \n",
       "3                                                     NaN   \n",
       "4                                                     NaN   \n",
       "...                                                   ...   \n",
       "2595                                                  NaN   \n",
       "2596                                                  NaN   \n",
       "2597                                                  NaN   \n",
       "2598                                                  NaN   \n",
       "2599                                                  NaN   \n",
       "\n",
       "              country_table.SUM(other_about_billioner.Age)  \\\n",
       "billioner_id                                                 \n",
       "0                                                     50.0   \n",
       "1                                                     58.0   \n",
       "2                                                     73.0   \n",
       "3                                                     66.0   \n",
       "4                                                     91.0   \n",
       "...                                                    ...   \n",
       "2595                                                  80.0   \n",
       "2596                                                  82.0   \n",
       "2597                                                  71.0   \n",
       "2598                                                  68.0   \n",
       "2599                                                  69.0   \n",
       "\n",
       "              country_table.SUM(other_about_billioner.Networth)  \\\n",
       "billioner_id                                                      \n",
       "0                                                         219.0   \n",
       "1                                                         171.0   \n",
       "2                                                         158.0   \n",
       "3                                                         129.0   \n",
       "4                                                         118.0   \n",
       "...                                                         ...   \n",
       "2595                                                        1.0   \n",
       "2596                                                        1.0   \n",
       "2597                                                        1.0   \n",
       "2598                                                        1.0   \n",
       "2599                                                        1.0   \n",
       "\n",
       "              country_table.SUM(other_about_billioner.Rank )  \\\n",
       "billioner_id                                                   \n",
       "0                                                        1.0   \n",
       "1                                                        2.0   \n",
       "2                                                        3.0   \n",
       "3                                                        4.0   \n",
       "4                                                        5.0   \n",
       "...                                                      ...   \n",
       "2595                                                  2578.0   \n",
       "2596                                                  2578.0   \n",
       "2597                                                  2578.0   \n",
       "2598                                                  2578.0   \n",
       "2599                                                  2578.0   \n",
       "\n",
       "              country_table.SUM(other_about_billioner.id)  \n",
       "billioner_id                                               \n",
       "0                                                     0.0  \n",
       "1                                                     1.0  \n",
       "2                                                     2.0  \n",
       "3                                                     3.0  \n",
       "4                                                     4.0  \n",
       "...                                                   ...  \n",
       "2595                                               2595.0  \n",
       "2596                                               2596.0  \n",
       "2597                                               2597.0  \n",
       "2598                                               2598.0  \n",
       "2599                                               2599.0  \n",
       "\n",
       "[2600 rows x 35 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import featuretools as ft\n",
    "from woodwork.logical_types import Categorical, Integer\n",
    "import pandas as pd\n",
    "df = pd.read_csv(\"C://Users//annal//aim//static//csv//Forbes_Billionaires.csv\")\n",
    "df['id'] = pd.Series(range(len(df))) \n",
    "# Создание двух таблиц: одна с моделью, другая с остальными данными\n",
    "country_df = df[['id', 'Country']].drop_duplicates().reset_index(drop=True)\n",
    "other_df = df.drop(columns=['Country'])\n",
    "\n",
    "# Создание уникального идентификатора для связи\n",
    "country_df['country_id'] = country_df.index\n",
    "other_df['country_id'] = other_df['id'].map(country_df.set_index('id')['country_id'])\n",
    "\n",
    "es = ft.EntitySet(id=\"orders\")\n",
    "es = es.add_dataframe(\n",
    "    dataframe_name=\"country_table\",\n",
    "    dataframe=country_df,\n",
    "    index=\"country_id\",  # Индекс для уникальной идентификации моделей\n",
    "    logical_types={\n",
    "        \"Country\": Categorical  # Определяем логический тип для модели\n",
    "    },\n",
    ")\n",
    "es = es.add_dataframe(\n",
    "    dataframe_name=\"other_about_billioner\",\n",
    "    dataframe=other_df,\n",
    "    index=\"billioner_id\",  # Индекс для уникальной идентификации автомобилей\n",
    "    logical_types={\n",
    "        \"Rank \": Integer,  # Целевая переменная (цена)\n",
    "        \"Networth\": Integer,  # Пробег (числовой признак)\n",
    "        \"Age\": Integer,\n",
    "        \"country_id\": Integer,  # Пробег (числовой признак)\n",
    "    },\n",
    ")\n",
    "es = es.add_relationship(\"country_table\", \"country_id\", \"other_about_billioner\", \"country_id\")\n",
    "\n",
    "feature_matrix, feature_defs = ft.dfs(\n",
    "    entityset=es,\n",
    "    target_dataframe_name=\"other_about_billioner\"\n",
    ")\n",
    "\n",
    "feature_matrix"
   ]
  }
 ],
 "metadata": {