ну вроде бы всё
This commit is contained in:
parent
d4cdc8ab91
commit
7f7102c559
625
lab_3/lab3.ipynb
625
lab_3/lab3.ipynb
@ -436,7 +436,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Приведём пример использования future tools"
|
||||
"# Приведём пример использования future tools\n",
|
||||
"## Попробую вынести страну в отдельную таблицу"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -544,6 +545,628 @@
|
||||
"source": [
|
||||
"pip install --upgrade setuptools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index billioner_id not found in dataframe, creating new integer column\n",
|
||||
" warnings.warn(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||||
" pd.to_datetime(\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function max at 0x000001952157A520> is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n",
|
||||
" ).agg(to_agg)\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function std at 0x000001952157B060> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"std\" instead.\n",
|
||||
" ).agg(to_agg)\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function sum at 0x0000019521579B20> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
|
||||
" ).agg(to_agg)\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function min at 0x000001952157A660> is currently using SeriesGroupBy.min. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"min\" instead.\n",
|
||||
" ).agg(to_agg)\n",
|
||||
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable <function mean at 0x000001952157AF20> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n",
|
||||
" ).agg(to_agg)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Rank</th>\n",
|
||||
" <th>Networth</th>\n",
|
||||
" <th>Age</th>\n",
|
||||
" <th>Industry</th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>country_id</th>\n",
|
||||
" <th>country_table.id</th>\n",
|
||||
" <th>country_table.Country</th>\n",
|
||||
" <th>country_table.COUNT(other_about_billioner)</th>\n",
|
||||
" <th>country_table.MAX(other_about_billioner.Age)</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>country_table.SKEW(other_about_billioner.Rank )</th>\n",
|
||||
" <th>country_table.SKEW(other_about_billioner.id)</th>\n",
|
||||
" <th>country_table.STD(other_about_billioner.Age)</th>\n",
|
||||
" <th>country_table.STD(other_about_billioner.Networth)</th>\n",
|
||||
" <th>country_table.STD(other_about_billioner.Rank )</th>\n",
|
||||
" <th>country_table.STD(other_about_billioner.id)</th>\n",
|
||||
" <th>country_table.SUM(other_about_billioner.Age)</th>\n",
|
||||
" <th>country_table.SUM(other_about_billioner.Networth)</th>\n",
|
||||
" <th>country_table.SUM(other_about_billioner.Rank )</th>\n",
|
||||
" <th>country_table.SUM(other_about_billioner.id)</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>billioner_id</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>219</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>Automotive</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>United States</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>50.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>50.0</td>\n",
|
||||
" <td>219.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>171</td>\n",
|
||||
" <td>58</td>\n",
|
||||
" <td>Technology</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>United States</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>58.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>58.0</td>\n",
|
||||
" <td>171.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>158</td>\n",
|
||||
" <td>73</td>\n",
|
||||
" <td>Fashion & Retail</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>73.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>73.0</td>\n",
|
||||
" <td>158.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>129</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" <td>Technology</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>United States</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>66.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>66.0</td>\n",
|
||||
" <td>129.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>118</td>\n",
|
||||
" <td>91</td>\n",
|
||||
" <td>Finance & Investments</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>United States</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>91.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>91.0</td>\n",
|
||||
" <td>118.0</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2595</th>\n",
|
||||
" <td>2578</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>80</td>\n",
|
||||
" <td>Healthcare</td>\n",
|
||||
" <td>2595</td>\n",
|
||||
" <td>2595</td>\n",
|
||||
" <td>2595</td>\n",
|
||||
" <td>Spain</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>80.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>80.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2578.0</td>\n",
|
||||
" <td>2595.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2596</th>\n",
|
||||
" <td>2578</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>82</td>\n",
|
||||
" <td>Fashion & Retail</td>\n",
|
||||
" <td>2596</td>\n",
|
||||
" <td>2596</td>\n",
|
||||
" <td>2596</td>\n",
|
||||
" <td>Philippines</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>82.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>82.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2578.0</td>\n",
|
||||
" <td>2596.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2597</th>\n",
|
||||
" <td>2578</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>71</td>\n",
|
||||
" <td>Fashion & Retail</td>\n",
|
||||
" <td>2597</td>\n",
|
||||
" <td>2597</td>\n",
|
||||
" <td>2597</td>\n",
|
||||
" <td>Philippines</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>71.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>71.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2578.0</td>\n",
|
||||
" <td>2597.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2598</th>\n",
|
||||
" <td>2578</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>68</td>\n",
|
||||
" <td>Fashion & Retail</td>\n",
|
||||
" <td>2598</td>\n",
|
||||
" <td>2598</td>\n",
|
||||
" <td>2598</td>\n",
|
||||
" <td>Philippines</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2578.0</td>\n",
|
||||
" <td>2598.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2599</th>\n",
|
||||
" <td>2578</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>69</td>\n",
|
||||
" <td>Food & Beverage</td>\n",
|
||||
" <td>2599</td>\n",
|
||||
" <td>2599</td>\n",
|
||||
" <td>2599</td>\n",
|
||||
" <td>Germany</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>69.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>69.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2578.0</td>\n",
|
||||
" <td>2599.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>2600 rows × 35 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Rank Networth Age Industry id country_id \\\n",
|
||||
"billioner_id \n",
|
||||
"0 1 219 50 Automotive 0 0 \n",
|
||||
"1 2 171 58 Technology 1 1 \n",
|
||||
"2 3 158 73 Fashion & Retail 2 2 \n",
|
||||
"3 4 129 66 Technology 3 3 \n",
|
||||
"4 5 118 91 Finance & Investments 4 4 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"2595 2578 1 80 Healthcare 2595 2595 \n",
|
||||
"2596 2578 1 82 Fashion & Retail 2596 2596 \n",
|
||||
"2597 2578 1 71 Fashion & Retail 2597 2597 \n",
|
||||
"2598 2578 1 68 Fashion & Retail 2598 2598 \n",
|
||||
"2599 2578 1 69 Food & Beverage 2599 2599 \n",
|
||||
"\n",
|
||||
" country_table.id country_table.Country \\\n",
|
||||
"billioner_id \n",
|
||||
"0 0 United States \n",
|
||||
"1 1 United States \n",
|
||||
"2 2 France \n",
|
||||
"3 3 United States \n",
|
||||
"4 4 United States \n",
|
||||
"... ... ... \n",
|
||||
"2595 2595 Spain \n",
|
||||
"2596 2596 Philippines \n",
|
||||
"2597 2597 Philippines \n",
|
||||
"2598 2598 Philippines \n",
|
||||
"2599 2599 Germany \n",
|
||||
"\n",
|
||||
" country_table.COUNT(other_about_billioner) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 1 \n",
|
||||
"1 1 \n",
|
||||
"2 1 \n",
|
||||
"3 1 \n",
|
||||
"4 1 \n",
|
||||
"... ... \n",
|
||||
"2595 1 \n",
|
||||
"2596 1 \n",
|
||||
"2597 1 \n",
|
||||
"2598 1 \n",
|
||||
"2599 1 \n",
|
||||
"\n",
|
||||
" country_table.MAX(other_about_billioner.Age) ... \\\n",
|
||||
"billioner_id ... \n",
|
||||
"0 50.0 ... \n",
|
||||
"1 58.0 ... \n",
|
||||
"2 73.0 ... \n",
|
||||
"3 66.0 ... \n",
|
||||
"4 91.0 ... \n",
|
||||
"... ... ... \n",
|
||||
"2595 80.0 ... \n",
|
||||
"2596 82.0 ... \n",
|
||||
"2597 71.0 ... \n",
|
||||
"2598 68.0 ... \n",
|
||||
"2599 69.0 ... \n",
|
||||
"\n",
|
||||
" country_table.SKEW(other_about_billioner.Rank ) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.SKEW(other_about_billioner.id) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.STD(other_about_billioner.Age) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.STD(other_about_billioner.Networth) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.STD(other_about_billioner.Rank ) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.STD(other_about_billioner.id) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"2595 NaN \n",
|
||||
"2596 NaN \n",
|
||||
"2597 NaN \n",
|
||||
"2598 NaN \n",
|
||||
"2599 NaN \n",
|
||||
"\n",
|
||||
" country_table.SUM(other_about_billioner.Age) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 50.0 \n",
|
||||
"1 58.0 \n",
|
||||
"2 73.0 \n",
|
||||
"3 66.0 \n",
|
||||
"4 91.0 \n",
|
||||
"... ... \n",
|
||||
"2595 80.0 \n",
|
||||
"2596 82.0 \n",
|
||||
"2597 71.0 \n",
|
||||
"2598 68.0 \n",
|
||||
"2599 69.0 \n",
|
||||
"\n",
|
||||
" country_table.SUM(other_about_billioner.Networth) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 219.0 \n",
|
||||
"1 171.0 \n",
|
||||
"2 158.0 \n",
|
||||
"3 129.0 \n",
|
||||
"4 118.0 \n",
|
||||
"... ... \n",
|
||||
"2595 1.0 \n",
|
||||
"2596 1.0 \n",
|
||||
"2597 1.0 \n",
|
||||
"2598 1.0 \n",
|
||||
"2599 1.0 \n",
|
||||
"\n",
|
||||
" country_table.SUM(other_about_billioner.Rank ) \\\n",
|
||||
"billioner_id \n",
|
||||
"0 1.0 \n",
|
||||
"1 2.0 \n",
|
||||
"2 3.0 \n",
|
||||
"3 4.0 \n",
|
||||
"4 5.0 \n",
|
||||
"... ... \n",
|
||||
"2595 2578.0 \n",
|
||||
"2596 2578.0 \n",
|
||||
"2597 2578.0 \n",
|
||||
"2598 2578.0 \n",
|
||||
"2599 2578.0 \n",
|
||||
"\n",
|
||||
" country_table.SUM(other_about_billioner.id) \n",
|
||||
"billioner_id \n",
|
||||
"0 0.0 \n",
|
||||
"1 1.0 \n",
|
||||
"2 2.0 \n",
|
||||
"3 3.0 \n",
|
||||
"4 4.0 \n",
|
||||
"... ... \n",
|
||||
"2595 2595.0 \n",
|
||||
"2596 2596.0 \n",
|
||||
"2597 2597.0 \n",
|
||||
"2598 2598.0 \n",
|
||||
"2599 2599.0 \n",
|
||||
"\n",
|
||||
"[2600 rows x 35 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import featuretools as ft\n",
|
||||
"from woodwork.logical_types import Categorical, Integer\n",
|
||||
"import pandas as pd\n",
|
||||
"df = pd.read_csv(\"C://Users//annal//aim//static//csv//Forbes_Billionaires.csv\")\n",
|
||||
"df['id'] = pd.Series(range(len(df))) \n",
|
||||
"# Создание двух таблиц: одна с моделью, другая с остальными данными\n",
|
||||
"country_df = df[['id', 'Country']].drop_duplicates().reset_index(drop=True)\n",
|
||||
"other_df = df.drop(columns=['Country'])\n",
|
||||
"\n",
|
||||
"# Создание уникального идентификатора для связи\n",
|
||||
"country_df['country_id'] = country_df.index\n",
|
||||
"other_df['country_id'] = other_df['id'].map(country_df.set_index('id')['country_id'])\n",
|
||||
"\n",
|
||||
"es = ft.EntitySet(id=\"orders\")\n",
|
||||
"es = es.add_dataframe(\n",
|
||||
" dataframe_name=\"country_table\",\n",
|
||||
" dataframe=country_df,\n",
|
||||
" index=\"country_id\", # Индекс для уникальной идентификации моделей\n",
|
||||
" logical_types={\n",
|
||||
" \"Country\": Categorical # Определяем логический тип для модели\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"es = es.add_dataframe(\n",
|
||||
" dataframe_name=\"other_about_billioner\",\n",
|
||||
" dataframe=other_df,\n",
|
||||
" index=\"billioner_id\", # Индекс для уникальной идентификации автомобилей\n",
|
||||
" logical_types={\n",
|
||||
" \"Rank \": Integer, # Целевая переменная (цена)\n",
|
||||
" \"Networth\": Integer, # Пробег (числовой признак)\n",
|
||||
" \"Age\": Integer,\n",
|
||||
" \"country_id\": Integer, # Пробег (числовой признак)\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"es = es.add_relationship(\"country_table\", \"country_id\", \"other_about_billioner\", \"country_id\")\n",
|
||||
"\n",
|
||||
"feature_matrix, feature_defs = ft.dfs(\n",
|
||||
" entityset=es,\n",
|
||||
" target_dataframe_name=\"other_about_billioner\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"feature_matrix"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
Reference in New Issue
Block a user