lab2 done #3
2601
lab_2/datasetlab2/Forbes Billionaires.csv
Normal file
2601
lab_2/datasetlab2/Forbes Billionaires.csv
Normal file
File diff suppressed because it is too large
Load Diff
897
lab_2/datasetlab2/Stores.csv
Normal file
897
lab_2/datasetlab2/Stores.csv
Normal file
@ -0,0 +1,897 @@
|
|||||||
|
Store ID ,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
|
||||||
|
1,1659,1961,530,66490
|
||||||
|
2,1461,1752,210,39820
|
||||||
|
3,1340,1609,720,54010
|
||||||
|
4,1451,1748,620,53730
|
||||||
|
5,1770,2111,450,46620
|
||||||
|
6,1442,1733,760,45260
|
||||||
|
7,1542,1858,1030,72240
|
||||||
|
8,1261,1507,1020,37720
|
||||||
|
9,1090,1321,680,46310
|
||||||
|
10,1030,1235,1130,44150
|
||||||
|
11,1187,1439,1090,71280
|
||||||
|
12,1751,2098,720,57620
|
||||||
|
13,1746,2064,1050,60470
|
||||||
|
14,1615,1931,1160,59130
|
||||||
|
15,1469,1756,770,66360
|
||||||
|
16,1644,1950,790,78870
|
||||||
|
17,1578,1907,1440,77250
|
||||||
|
18,1703,2045,670,38170
|
||||||
|
19,1438,1731,1030,63540
|
||||||
|
20,1940,2340,980,40190
|
||||||
|
21,1421,1700,370,43460
|
||||||
|
22,1458,1746,690,68890
|
||||||
|
23,1719,2065,950,52780
|
||||||
|
24,1449,1752,620,50680
|
||||||
|
25,1234,1488,840,41880
|
||||||
|
26,1732,2073,820,70050
|
||||||
|
27,1475,1777,1100,25820
|
||||||
|
28,1390,1648,980,60530
|
||||||
|
29,1642,1943,710,78100
|
||||||
|
30,1715,2071,650,84860
|
||||||
|
31,1439,1746,990,80140
|
||||||
|
32,1250,1508,990,14920
|
||||||
|
33,1331,1608,880,60460
|
||||||
|
34,1784,2163,620,74560
|
||||||
|
35,1375,1648,1020,72430
|
||||||
|
36,1871,2230,700,45460
|
||||||
|
37,1442,1744,610,41570
|
||||||
|
38,1174,1411,1080,62870
|
||||||
|
39,1839,2204,1010,55170
|
||||||
|
40,1270,1516,10,45480
|
||||||
|
41,1435,1725,1250,49550
|
||||||
|
42,965,1152,600,48140
|
||||||
|
43,1665,2001,730,67640
|
||||||
|
44,1780,2117,780,39730
|
||||||
|
45,1009,1194,520,35800
|
||||||
|
46,1227,1471,870,49270
|
||||||
|
47,1769,2087,690,66510
|
||||||
|
48,1660,1982,910,62530
|
||||||
|
49,1472,1776,1260,59980
|
||||||
|
50,1408,1688,1040,76350
|
||||||
|
51,1514,1820,910,81820
|
||||||
|
52,1565,1880,1300,57830
|
||||||
|
53,1074,1288,320,70450
|
||||||
|
54,1864,2240,530,67000
|
||||||
|
55,1570,1898,980,64090
|
||||||
|
56,1417,1701,740,48670
|
||||||
|
57,1734,2060,1240,66210
|
||||||
|
58,1470,1763,1080,83660
|
||||||
|
59,1761,2104,1080,70770
|
||||||
|
60,1756,2070,460,53870
|
||||||
|
61,1704,2045,300,71300
|
||||||
|
62,2011,2391,530,46100
|
||||||
|
63,1472,1748,600,49100
|
||||||
|
64,1310,1561,860,65920
|
||||||
|
65,1544,1821,590,58660
|
||||||
|
66,1707,2052,920,69130
|
||||||
|
67,1881,2262,570,49080
|
||||||
|
68,1416,1681,290,72710
|
||||||
|
69,1631,1941,650,33430
|
||||||
|
70,1318,1576,710,42430
|
||||||
|
71,1692,2019,850,56650
|
||||||
|
72,1152,1380,530,33580
|
||||||
|
73,891,1073,630,67370
|
||||||
|
74,1468,1749,700,71780
|
||||||
|
75,1539,1833,650,84840
|
||||||
|
76,1635,1956,720,82070
|
||||||
|
77,1267,1520,450,26770
|
||||||
|
78,1250,1475,1390,65560
|
||||||
|
79,1720,2044,960,38660
|
||||||
|
80,1462,1761,600,65660
|
||||||
|
81,1431,1711,620,40700
|
||||||
|
82,1539,1858,1020,88910
|
||||||
|
83,1441,1723,330,57860
|
||||||
|
84,1572,1884,1410,42670
|
||||||
|
85,1287,1525,1200,90180
|
||||||
|
86,1468,1760,280,51280
|
||||||
|
87,1931,2342,940,97260
|
||||||
|
88,1252,1506,850,39650
|
||||||
|
89,1238,1468,960,45720
|
||||||
|
90,1479,1758,420,42060
|
||||||
|
91,1590,1912,830,65350
|
||||||
|
92,2169,2617,600,67080
|
||||||
|
93,1838,2205,400,54030
|
||||||
|
94,1385,1655,760,56360
|
||||||
|
95,1921,2305,1470,77120
|
||||||
|
96,1975,2385,500,50810
|
||||||
|
97,1853,2235,1120,60960
|
||||||
|
98,1816,2171,1160,61180
|
||||||
|
99,1785,2147,820,63660
|
||||||
|
100,1579,1899,1140,41190
|
||||||
|
101,1096,1321,900,78420
|
||||||
|
102,1919,2294,760,65580
|
||||||
|
103,1262,1500,1170,89080
|
||||||
|
104,1374,1655,1080,94170
|
||||||
|
105,1309,1587,1000,50950
|
||||||
|
106,1207,1434,690,65180
|
||||||
|
107,1692,2031,810,69310
|
||||||
|
108,1929,2311,630,79210
|
||||||
|
109,1573,1878,650,23740
|
||||||
|
110,1415,1700,920,36330
|
||||||
|
111,1162,1382,1260,51700
|
||||||
|
112,1485,1787,800,62950
|
||||||
|
113,1897,2248,1330,56010
|
||||||
|
114,1607,1927,940,45080
|
||||||
|
115,1909,2287,1210,46830
|
||||||
|
116,1274,1503,660,64750
|
||||||
|
117,1157,1379,770,80780
|
||||||
|
118,1712,2046,460,31180
|
||||||
|
119,1500,1798,860,56710
|
||||||
|
120,1682,2017,780,49390
|
||||||
|
121,1441,1727,890,66000
|
||||||
|
122,1525,1835,900,32770
|
||||||
|
123,1408,1669,530,46580
|
||||||
|
124,1947,2333,790,79780
|
||||||
|
125,1164,1390,370,35510
|
||||||
|
126,1787,2137,610,80970
|
||||||
|
127,1871,2241,500,61150
|
||||||
|
128,1718,2051,750,49210
|
||||||
|
129,1365,1636,980,79950
|
||||||
|
130,1368,1654,530,68740
|
||||||
|
131,1342,1595,910,57480
|
||||||
|
132,1076,1270,620,72630
|
||||||
|
133,1396,1672,1170,50070
|
||||||
|
134,1713,2071,900,40490
|
||||||
|
135,1370,1638,980,51850
|
||||||
|
136,1667,1993,740,42840
|
||||||
|
137,1638,1972,810,60940
|
||||||
|
138,1581,1905,810,62280
|
||||||
|
139,1795,2187,300,76530
|
||||||
|
140,1179,1412,790,85130
|
||||||
|
141,1978,2374,800,48590
|
||||||
|
142,1688,2042,760,73080
|
||||||
|
143,1214,1456,530,48950
|
||||||
|
144,1504,1805,540,48560
|
||||||
|
145,1498,1770,620,59380
|
||||||
|
146,1462,1762,1010,51190
|
||||||
|
147,1442,1750,130,58920
|
||||||
|
148,1250,1486,730,50360
|
||||||
|
149,1229,1480,830,38070
|
||||||
|
150,1936,2300,1060,49170
|
||||||
|
151,1369,1629,770,39740
|
||||||
|
152,1662,1986,70,63730
|
||||||
|
153,1548,1855,670,85330
|
||||||
|
154,1649,1963,490,27410
|
||||||
|
155,1393,1663,670,37320
|
||||||
|
156,1450,1734,380,71120
|
||||||
|
157,1613,1921,1200,72800
|
||||||
|
158,1408,1696,350,34410
|
||||||
|
159,775,932,1090,42530
|
||||||
|
160,1275,1534,1230,54300
|
||||||
|
161,1740,2078,680,50780
|
||||||
|
162,1372,1657,580,45020
|
||||||
|
163,1414,1723,680,69600
|
||||||
|
164,2044,2474,340,80340
|
||||||
|
165,1823,2176,700,37810
|
||||||
|
166,955,1133,580,46140
|
||||||
|
167,1465,1763,680,99570
|
||||||
|
168,1331,1606,630,38650
|
||||||
|
169,1232,1487,860,49800
|
||||||
|
170,1481,1765,490,69910
|
||||||
|
171,1343,1599,870,44910
|
||||||
|
172,1539,1837,990,78470
|
||||||
|
173,1007,1207,670,47460
|
||||||
|
174,1762,2145,490,33460
|
||||||
|
175,1527,1832,580,44090
|
||||||
|
176,1356,1619,700,42620
|
||||||
|
177,1536,1848,670,69450
|
||||||
|
178,1605,1902,390,73120
|
||||||
|
179,1704,2032,590,48300
|
||||||
|
180,1626,1941,1350,58090
|
||||||
|
181,1612,1939,840,74250
|
||||||
|
182,1174,1396,1100,40930
|
||||||
|
183,1923,2339,950,70930
|
||||||
|
184,1702,2053,950,64670
|
||||||
|
185,1398,1692,650,77420
|
||||||
|
186,1437,1717,230,32330
|
||||||
|
187,1524,1796,1060,41080
|
||||||
|
188,1660,1985,1180,42860
|
||||||
|
189,1302,1569,710,68450
|
||||||
|
190,1666,2000,480,39730
|
||||||
|
191,1391,1649,810,83750
|
||||||
|
192,1778,2148,1140,69940
|
||||||
|
193,1462,1770,1070,67710
|
||||||
|
194,1751,2115,790,67360
|
||||||
|
195,1652,1982,690,52460
|
||||||
|
196,1841,2215,610,88760
|
||||||
|
197,1496,1791,1240,67030
|
||||||
|
198,1504,1827,840,78230
|
||||||
|
199,1524,1808,460,62270
|
||||||
|
200,1148,1371,940,49760
|
||||||
|
201,1468,1744,590,73660
|
||||||
|
202,1310,1558,890,72320
|
||||||
|
203,1321,1579,770,68890
|
||||||
|
204,992,1192,900,34180
|
||||||
|
205,1540,1857,1020,58260
|
||||||
|
206,1807,2149,910,38120
|
||||||
|
207,1526,1853,660,49070
|
||||||
|
208,1406,1677,480,61660
|
||||||
|
209,1703,2055,1080,37830
|
||||||
|
210,1575,1872,690,52270
|
||||||
|
211,1309,1572,510,52280
|
||||||
|
212,1488,1807,1030,70810
|
||||||
|
213,1658,1988,370,71530
|
||||||
|
214,1863,2245,640,77260
|
||||||
|
215,1458,1725,750,75550
|
||||||
|
216,1604,1909,370,33730
|
||||||
|
217,1575,1899,840,66270
|
||||||
|
218,1525,1829,840,55820
|
||||||
|
219,1451,1737,890,68430
|
||||||
|
220,1390,1687,620,73990
|
||||||
|
221,1442,1742,310,62800
|
||||||
|
222,1620,1922,550,33740
|
||||||
|
223,1251,1527,380,63830
|
||||||
|
224,1318,1606,1200,24410
|
||||||
|
225,1647,1962,800,70020
|
||||||
|
226,1829,2175,870,92240
|
||||||
|
227,1852,2227,1220,68230
|
||||||
|
228,1699,2053,1080,81870
|
||||||
|
229,1325,1595,540,73860
|
||||||
|
230,1350,1634,880,77120
|
||||||
|
231,1347,1628,120,72350
|
||||||
|
232,1397,1661,1410,49160
|
||||||
|
233,1245,1499,570,45650
|
||||||
|
234,1366,1649,940,52780
|
||||||
|
235,1378,1658,760,90960
|
||||||
|
236,1767,2110,1200,64950
|
||||||
|
237,1184,1434,670,47230
|
||||||
|
238,1257,1505,950,83250
|
||||||
|
239,1863,2247,480,51950
|
||||||
|
240,1881,2244,920,66030
|
||||||
|
241,1329,1609,1150,68590
|
||||||
|
242,1539,1848,750,47140
|
||||||
|
243,1557,1861,370,69940
|
||||||
|
244,2007,2397,610,65890
|
||||||
|
245,1185,1418,1150,89310
|
||||||
|
246,1657,2003,1070,58540
|
||||||
|
247,1294,1539,790,78130
|
||||||
|
248,1296,1559,1070,92300
|
||||||
|
249,1733,2097,730,56170
|
||||||
|
250,1641,1976,620,46050
|
||||||
|
251,1373,1648,530,43390
|
||||||
|
252,1550,1845,700,61750
|
||||||
|
253,1583,1907,680,21830
|
||||||
|
254,1428,1719,1060,39800
|
||||||
|
255,1604,1925,670,54370
|
||||||
|
256,1439,1735,400,62470
|
||||||
|
257,1648,2003,910,82930
|
||||||
|
258,1025,1231,760,63720
|
||||||
|
259,2001,2394,540,79180
|
||||||
|
260,1145,1370,350,38210
|
||||||
|
261,1174,1426,980,25950
|
||||||
|
262,913,1106,860,56610
|
||||||
|
263,1199,1433,1020,73710
|
||||||
|
264,1875,2254,1120,70400
|
||||||
|
265,1153,1397,1020,50440
|
||||||
|
266,1240,1492,940,66840
|
||||||
|
267,1381,1660,970,50170
|
||||||
|
268,1701,2030,830,60140
|
||||||
|
269,1206,1456,920,37130
|
||||||
|
270,1476,1777,660,42890
|
||||||
|
271,1189,1439,780,26220
|
||||||
|
272,1837,2220,340,50840
|
||||||
|
273,1319,1571,1190,25630
|
||||||
|
274,1617,1901,490,60770
|
||||||
|
275,1631,1967,1090,69600
|
||||||
|
276,1517,1805,1040,41740
|
||||||
|
277,1764,2109,1210,50130
|
||||||
|
278,1572,1869,1030,21750
|
||||||
|
279,1855,2197,1170,80490
|
||||||
|
280,1327,1571,730,34020
|
||||||
|
281,1270,1515,720,60240
|
||||||
|
282,1734,2073,500,39460
|
||||||
|
283,1533,1848,1070,56440
|
||||||
|
284,1390,1646,800,46840
|
||||||
|
285,1856,2216,1020,64820
|
||||||
|
286,1000,1215,1070,52520
|
||||||
|
287,1313,1586,420,45940
|
||||||
|
288,1494,1799,510,38970
|
||||||
|
289,1386,1674,1210,58610
|
||||||
|
290,1979,2364,660,30810
|
||||||
|
291,1057,1264,360,47730
|
||||||
|
292,902,1093,1210,64640
|
||||||
|
293,1347,1622,560,44860
|
||||||
|
294,1314,1576,360,55660
|
||||||
|
295,1513,1803,970,57530
|
||||||
|
296,1305,1548,480,75200
|
||||||
|
297,1180,1436,690,37330
|
||||||
|
298,1142,1352,710,35280
|
||||||
|
299,1471,1768,780,70610
|
||||||
|
300,1075,1288,630,49720
|
||||||
|
301,1578,1885,220,68850
|
||||||
|
302,1585,1916,1110,50740
|
||||||
|
303,1391,1648,720,77070
|
||||||
|
304,1577,1892,560,74730
|
||||||
|
305,1092,1314,600,76530
|
||||||
|
306,1375,1681,440,68900
|
||||||
|
307,1523,1813,520,44960
|
||||||
|
308,1373,1654,410,41490
|
||||||
|
309,1550,1871,590,74320
|
||||||
|
310,1614,1946,740,73800
|
||||||
|
311,1566,1889,610,56400
|
||||||
|
312,2019,2396,540,71570
|
||||||
|
313,1494,1806,1450,43640
|
||||||
|
314,1659,2008,620,35120
|
||||||
|
315,1766,2131,340,58670
|
||||||
|
316,1293,1554,970,75800
|
||||||
|
317,1375,1659,1080,76640
|
||||||
|
318,1236,1484,560,31890
|
||||||
|
319,1332,1586,630,61670
|
||||||
|
320,1513,1825,980,75950
|
||||||
|
321,1208,1459,930,41490
|
||||||
|
322,1190,1429,470,66170
|
||||||
|
323,1448,1734,680,37980
|
||||||
|
324,1771,2147,430,62710
|
||||||
|
325,1365,1645,830,60470
|
||||||
|
326,1510,1810,950,35230
|
||||||
|
327,1458,1736,870,48550
|
||||||
|
328,1808,2157,730,56810
|
||||||
|
329,1615,1954,760,41080
|
||||||
|
330,1640,1948,960,51270
|
||||||
|
331,1060,1273,860,57500
|
||||||
|
332,1633,1968,330,81470
|
||||||
|
333,1222,1473,630,49570
|
||||||
|
334,1619,1957,1280,45580
|
||||||
|
335,1624,1973,1440,44660
|
||||||
|
336,1887,2278,570,76240
|
||||||
|
337,1320,1583,540,43720
|
||||||
|
338,1450,1750,480,46700
|
||||||
|
339,1455,1764,390,84690
|
||||||
|
340,966,1172,900,85470
|
||||||
|
341,1922,2290,290,80410
|
||||||
|
342,1678,1999,740,46650
|
||||||
|
343,1638,1952,690,81840
|
||||||
|
344,1145,1375,950,63590
|
||||||
|
345,2004,2390,930,50130
|
||||||
|
346,1954,2378,810,45820
|
||||||
|
347,1577,1879,760,86710
|
||||||
|
348,1766,2138,580,49980
|
||||||
|
349,1362,1634,770,82940
|
||||||
|
350,1886,2228,1530,40350
|
||||||
|
351,1291,1546,420,93950
|
||||||
|
352,1584,1897,1210,47310
|
||||||
|
353,1397,1686,850,21300
|
||||||
|
354,1445,1709,1340,62180
|
||||||
|
355,1433,1707,1160,61460
|
||||||
|
356,1269,1511,500,54360
|
||||||
|
357,1798,2134,820,72050
|
||||||
|
358,1514,1822,670,48090
|
||||||
|
359,1015,1216,460,27310
|
||||||
|
360,1495,1799,950,57160
|
||||||
|
361,1759,2095,980,34190
|
||||||
|
362,1219,1468,850,35600
|
||||||
|
363,1571,1877,580,54670
|
||||||
|
364,1404,1670,620,76730
|
||||||
|
365,1124,1369,650,63540
|
||||||
|
366,1514,1837,1130,36690
|
||||||
|
367,1207,1476,720,87370
|
||||||
|
368,1484,1774,940,59800
|
||||||
|
369,1398,1678,920,48030
|
||||||
|
370,1769,2112,660,96650
|
||||||
|
371,1111,1322,610,65500
|
||||||
|
372,1078,1305,1190,55530
|
||||||
|
373,1876,2254,1340,21650
|
||||||
|
374,1909,2306,820,31940
|
||||||
|
375,1940,2343,1130,84690
|
||||||
|
376,1391,1683,890,68390
|
||||||
|
377,1496,1774,810,75490
|
||||||
|
378,1412,1699,680,39200
|
||||||
|
379,1121,1345,320,85670
|
||||||
|
380,1691,2026,700,60530
|
||||||
|
381,1599,1946,940,78090
|
||||||
|
382,1208,1467,910,50720
|
||||||
|
383,1454,1737,870,23090
|
||||||
|
384,1555,1881,1260,91360
|
||||||
|
385,1554,1852,440,48120
|
||||||
|
386,1491,1800,980,75620
|
||||||
|
387,1415,1697,1200,39420
|
||||||
|
388,1487,1801,890,51130
|
||||||
|
389,1339,1589,1050,33890
|
||||||
|
390,1320,1562,610,87170
|
||||||
|
391,1509,1799,960,38600
|
||||||
|
392,1406,1680,860,60980
|
||||||
|
393,1264,1502,800,79410
|
||||||
|
394,1905,2280,1060,82350
|
||||||
|
395,1209,1464,600,36740
|
||||||
|
396,1546,1829,380,27720
|
||||||
|
397,1689,2044,1140,32260
|
||||||
|
398,1153,1381,730,53270
|
||||||
|
399,2063,2493,810,51480
|
||||||
|
400,1848,2254,1000,59970
|
||||||
|
401,1718,2056,1220,83600
|
||||||
|
402,1480,1773,360,63020
|
||||||
|
403,1439,1724,900,50920
|
||||||
|
404,1576,1896,750,56450
|
||||||
|
405,1948,2374,930,89540
|
||||||
|
406,1613,1893,1180,46030
|
||||||
|
407,896,1059,870,75110
|
||||||
|
408,1625,1943,680,74520
|
||||||
|
409,1303,1587,1310,102310
|
||||||
|
410,1340,1605,1000,53400
|
||||||
|
411,1410,1704,1130,59760
|
||||||
|
412,1432,1719,990,49540
|
||||||
|
413,1891,2280,360,51560
|
||||||
|
414,1322,1583,720,49510
|
||||||
|
415,1378,1670,950,58610
|
||||||
|
416,1462,1732,840,68260
|
||||||
|
417,1440,1733,1120,65310
|
||||||
|
418,1421,1724,920,52090
|
||||||
|
419,1280,1530,1240,43860
|
||||||
|
420,1431,1710,840,74170
|
||||||
|
421,1161,1404,430,58380
|
||||||
|
422,1175,1405,810,91200
|
||||||
|
423,1395,1662,920,90940
|
||||||
|
424,1443,1755,880,49330
|
||||||
|
425,1247,1523,1350,53500
|
||||||
|
426,1788,2133,1000,54590
|
||||||
|
427,1138,1375,1220,57450
|
||||||
|
428,1709,2042,430,33240
|
||||||
|
429,1777,2145,520,80790
|
||||||
|
430,1612,1918,580,61000
|
||||||
|
431,1618,1943,460,47620
|
||||||
|
432,1311,1571,470,72090
|
||||||
|
433,1365,1638,680,102920
|
||||||
|
434,1249,1503,950,61970
|
||||||
|
435,1373,1635,840,61040
|
||||||
|
436,1536,1836,990,52060
|
||||||
|
437,1744,2101,520,69570
|
||||||
|
438,1513,1820,520,66020
|
||||||
|
439,1297,1561,1070,40000
|
||||||
|
440,1908,2304,990,79500
|
||||||
|
441,1721,2076,710,76300
|
||||||
|
442,1243,1491,430,69030
|
||||||
|
443,1472,1766,1290,57140
|
||||||
|
444,1307,1570,1080,41710
|
||||||
|
445,1628,1959,890,71480
|
||||||
|
446,1556,1869,1000,33010
|
||||||
|
447,1179,1428,1290,74570
|
||||||
|
448,1768,2123,860,49590
|
||||||
|
449,1378,1656,1010,73170
|
||||||
|
450,1685,2015,1160,79220
|
||||||
|
451,1474,1759,880,75880
|
||||||
|
452,1794,2137,670,67610
|
||||||
|
453,1086,1291,930,69090
|
||||||
|
454,1808,2154,990,35220
|
||||||
|
455,1501,1790,450,53940
|
||||||
|
456,1353,1598,690,56660
|
||||||
|
457,1455,1757,650,67520
|
||||||
|
458,1165,1411,1020,38620
|
||||||
|
459,1332,1610,880,38890
|
||||||
|
460,1396,1668,760,79270
|
||||||
|
461,1513,1821,690,42880
|
||||||
|
462,1618,1940,630,44240
|
||||||
|
463,1845,2233,370,43190
|
||||||
|
464,1172,1411,460,74550
|
||||||
|
465,1436,1737,770,57090
|
||||||
|
466,1738,2065,750,56480
|
||||||
|
467,2229,2667,660,87410
|
||||||
|
468,1490,1783,730,81370
|
||||||
|
469,1060,1279,670,97360
|
||||||
|
470,2015,2436,680,77960
|
||||||
|
471,1611,1919,960,71240
|
||||||
|
472,1187,1417,1230,58940
|
||||||
|
473,1430,1690,800,78950
|
||||||
|
474,1543,1840,450,36380
|
||||||
|
475,1836,2195,940,45160
|
||||||
|
476,1463,1764,1060,69050
|
||||||
|
477,1213,1462,560,56830
|
||||||
|
478,1244,1480,860,93530
|
||||||
|
479,1745,2108,730,46920
|
||||||
|
480,933,1121,940,55990
|
||||||
|
481,1764,2132,920,40840
|
||||||
|
482,1675,2002,1050,64990
|
||||||
|
483,1688,2046,380,53550
|
||||||
|
484,1842,2204,930,51320
|
||||||
|
485,1316,1597,980,36560
|
||||||
|
486,1440,1719,580,66050
|
||||||
|
487,1760,2111,680,52400
|
||||||
|
488,1323,1571,850,27970
|
||||||
|
489,1230,1466,730,67100
|
||||||
|
490,1540,1838,570,43710
|
||||||
|
491,1167,1388,620,38600
|
||||||
|
492,1429,1695,890,53890
|
||||||
|
493,1491,1770,800,52610
|
||||||
|
494,1313,1574,920,43130
|
||||||
|
495,1609,1943,920,40300
|
||||||
|
496,1109,1342,760,49750
|
||||||
|
497,1207,1440,500,43840
|
||||||
|
498,902,1087,680,56820
|
||||||
|
499,1191,1422,770,36350
|
||||||
|
500,1335,1601,460,50820
|
||||||
|
501,1382,1660,1070,83720
|
||||||
|
502,1588,1906,450,46970
|
||||||
|
503,1918,2284,310,78020
|
||||||
|
504,1484,1774,880,45080
|
||||||
|
505,1334,1607,370,55160
|
||||||
|
506,1556,1846,760,72020
|
||||||
|
507,1784,2142,950,64010
|
||||||
|
508,1244,1477,890,27840
|
||||||
|
509,1496,1787,800,58070
|
||||||
|
510,1719,2058,700,51760
|
||||||
|
511,1678,2022,1050,66050
|
||||||
|
512,1247,1490,330,65750
|
||||||
|
513,1191,1421,980,65820
|
||||||
|
514,1832,2208,1090,46760
|
||||||
|
515,1271,1523,1140,50940
|
||||||
|
516,1735,2084,820,56440
|
||||||
|
517,1627,1948,890,32610
|
||||||
|
518,1351,1616,650,62770
|
||||||
|
519,1520,1817,850,63600
|
||||||
|
520,1490,1788,360,45840
|
||||||
|
521,1777,2117,780,38280
|
||||||
|
522,1688,2037,590,50960
|
||||||
|
523,1537,1836,670,39480
|
||||||
|
524,1622,1968,340,69610
|
||||||
|
525,1148,1384,730,47800
|
||||||
|
526,1001,1194,1210,44890
|
||||||
|
527,1857,2236,1280,67420
|
||||||
|
528,1552,1869,710,78870
|
||||||
|
529,1700,2064,940,70310
|
||||||
|
530,1554,1844,670,38530
|
||||||
|
531,1482,1777,800,77570
|
||||||
|
532,1275,1517,790,59920
|
||||||
|
533,1642,1981,720,54450
|
||||||
|
534,1381,1633,1270,50250
|
||||||
|
535,1381,1634,930,30790
|
||||||
|
536,1057,1262,1490,35420
|
||||||
|
537,1192,1445,810,43470
|
||||||
|
538,1601,1920,600,61000
|
||||||
|
539,1622,1968,210,64780
|
||||||
|
540,1607,1909,460,39030
|
||||||
|
541,2214,2647,740,65900
|
||||||
|
542,1633,1936,1320,46050
|
||||||
|
543,1546,1845,760,59070
|
||||||
|
544,1475,1753,920,44670
|
||||||
|
545,1270,1519,920,58390
|
||||||
|
546,1185,1420,880,80370
|
||||||
|
547,1614,1938,1110,53230
|
||||||
|
548,1141,1353,1370,72000
|
||||||
|
549,1244,1481,410,84040
|
||||||
|
550,869,1050,850,52540
|
||||||
|
551,2049,2465,720,63510
|
||||||
|
552,1883,2262,570,42240
|
||||||
|
553,1526,1842,690,39580
|
||||||
|
554,1165,1390,1220,54610
|
||||||
|
555,1832,2185,840,87330
|
||||||
|
556,1723,2072,560,88410
|
||||||
|
557,932,1138,820,89760
|
||||||
|
558,1137,1374,700,101780
|
||||||
|
559,1231,1472,810,70290
|
||||||
|
560,1237,1512,1070,88210
|
||||||
|
561,1371,1650,540,87160
|
||||||
|
562,1767,2158,530,41540
|
||||||
|
563,1748,2092,580,49170
|
||||||
|
564,1212,1440,500,63950
|
||||||
|
565,1466,1743,1200,70810
|
||||||
|
566,1152,1386,980,49590
|
||||||
|
567,1439,1703,1000,67290
|
||||||
|
568,2026,2400,720,51240
|
||||||
|
569,1772,2146,1030,48540
|
||||||
|
570,1511,1822,420,72410
|
||||||
|
571,1199,1461,1070,54370
|
||||||
|
572,1834,2184,830,94460
|
||||||
|
573,1143,1375,940,85160
|
||||||
|
574,1494,1794,550,52130
|
||||||
|
575,1770,2131,1140,54650
|
||||||
|
576,1455,1747,750,69320
|
||||||
|
577,1141,1372,620,51480
|
||||||
|
578,1586,1886,660,50060
|
||||||
|
579,1701,2034,660,62180
|
||||||
|
580,1860,2246,410,79780
|
||||||
|
581,1167,1406,440,42860
|
||||||
|
582,1424,1716,630,54410
|
||||||
|
583,1710,2053,730,69390
|
||||||
|
584,1408,1708,220,42810
|
||||||
|
585,1517,1831,610,30840
|
||||||
|
586,1227,1476,720,56260
|
||||||
|
587,1609,1930,740,76470
|
||||||
|
588,1553,1831,740,35680
|
||||||
|
589,1814,2174,770,90070
|
||||||
|
590,1240,1493,590,33120
|
||||||
|
591,1206,1437,1330,54060
|
||||||
|
592,1847,2186,910,75120
|
||||||
|
593,1009,1202,330,41600
|
||||||
|
594,1624,1946,870,20270
|
||||||
|
595,1612,1931,790,60060
|
||||||
|
596,1498,1805,1270,82270
|
||||||
|
597,946,1125,590,29170
|
||||||
|
598,1563,1872,1080,68420
|
||||||
|
599,1664,2016,830,59130
|
||||||
|
600,1619,1947,910,74330
|
||||||
|
601,1433,1722,830,77080
|
||||||
|
602,1241,1489,1380,76250
|
||||||
|
603,1429,1720,1180,59540
|
||||||
|
604,1241,1488,770,54690
|
||||||
|
605,1078,1306,680,84360
|
||||||
|
606,1690,2065,910,51420
|
||||||
|
607,1289,1536,540,65120
|
||||||
|
608,1581,1894,760,49380
|
||||||
|
609,1608,1945,760,37830
|
||||||
|
610,1344,1608,730,35980
|
||||||
|
611,1513,1804,430,69190
|
||||||
|
612,1529,1839,1000,50590
|
||||||
|
613,1677,2014,660,60800
|
||||||
|
614,1015,1229,930,31180
|
||||||
|
615,1438,1751,760,77790
|
||||||
|
616,1426,1718,370,47570
|
||||||
|
617,1412,1701,630,69130
|
||||||
|
618,1622,1944,360,75970
|
||||||
|
619,1503,1791,630,68350
|
||||||
|
620,1501,1789,670,41680
|
||||||
|
621,1971,2342,690,86560
|
||||||
|
622,1383,1687,830,81390
|
||||||
|
623,1371,1635,720,50730
|
||||||
|
624,1508,1823,520,71290
|
||||||
|
625,1057,1284,750,70110
|
||||||
|
626,1411,1680,1070,61590
|
||||||
|
627,1466,1746,590,69370
|
||||||
|
628,1545,1888,600,67110
|
||||||
|
629,2044,2408,380,82020
|
||||||
|
630,1887,2264,830,62050
|
||||||
|
631,1505,1836,940,61730
|
||||||
|
632,1422,1722,560,58660
|
||||||
|
633,1564,1869,1030,53370
|
||||||
|
634,1510,1810,730,39700
|
||||||
|
635,1568,1920,890,53750
|
||||||
|
636,1933,2338,1140,44730
|
||||||
|
637,1501,1822,590,49350
|
||||||
|
638,1593,1911,580,43340
|
||||||
|
639,1812,2189,310,78090
|
||||||
|
640,1580,1895,720,54950
|
||||||
|
641,1440,1749,490,75530
|
||||||
|
642,1100,1331,1010,57330
|
||||||
|
643,1534,1841,680,87930
|
||||||
|
644,1299,1555,1020,56850
|
||||||
|
645,1767,2121,1050,78430
|
||||||
|
646,1368,1649,740,63660
|
||||||
|
647,1393,1670,410,62960
|
||||||
|
648,1327,1590,770,81870
|
||||||
|
649,1514,1794,1400,54820
|
||||||
|
650,1989,2414,860,116320
|
||||||
|
651,1334,1584,840,57200
|
||||||
|
652,1533,1817,950,84360
|
||||||
|
653,1809,2145,940,36530
|
||||||
|
654,1607,1933,930,81260
|
||||||
|
655,1165,1387,1060,82350
|
||||||
|
656,1193,1430,560,80830
|
||||||
|
657,1709,2065,670,30610
|
||||||
|
658,1525,1839,540,51310
|
||||||
|
659,1348,1623,1010,72940
|
||||||
|
660,1132,1366,1340,52450
|
||||||
|
661,1667,2020,980,66070
|
||||||
|
662,1427,1720,630,43190
|
||||||
|
663,1211,1447,1110,40730
|
||||||
|
664,1717,2048,700,78530
|
||||||
|
665,1766,2111,580,94690
|
||||||
|
666,1086,1299,1050,44400
|
||||||
|
667,1410,1692,790,73800
|
||||||
|
668,1476,1760,600,37390
|
||||||
|
669,1068,1278,440,64120
|
||||||
|
670,1485,1785,1340,66160
|
||||||
|
671,1461,1739,1250,22310
|
||||||
|
672,1685,2010,990,62380
|
||||||
|
673,1624,1958,290,63850
|
||||||
|
674,1658,2000,350,36210
|
||||||
|
675,1427,1677,210,54590
|
||||||
|
676,1755,2072,810,69610
|
||||||
|
677,1211,1472,790,65390
|
||||||
|
678,1591,1896,780,78130
|
||||||
|
679,1797,2126,730,55710
|
||||||
|
680,1519,1823,1040,69210
|
||||||
|
681,1637,1958,760,59940
|
||||||
|
682,1451,1750,570,72550
|
||||||
|
683,1203,1446,620,44260
|
||||||
|
684,1884,2262,310,56910
|
||||||
|
685,1540,1820,310,82390
|
||||||
|
686,1121,1332,790,54590
|
||||||
|
687,1307,1562,490,69990
|
||||||
|
688,1475,1775,230,72740
|
||||||
|
689,1160,1401,900,35360
|
||||||
|
690,1078,1276,640,94370
|
||||||
|
691,1191,1436,840,43520
|
||||||
|
692,1317,1569,780,36000
|
||||||
|
693,1548,1858,480,99480
|
||||||
|
694,1560,1883,1040,83220
|
||||||
|
695,1297,1529,870,52940
|
||||||
|
696,1645,1958,530,93360
|
||||||
|
697,1225,1455,750,73590
|
||||||
|
698,1421,1704,840,53840
|
||||||
|
699,1655,1956,800,47350
|
||||||
|
700,1615,1928,660,65080
|
||||||
|
701,1872,2262,560,62050
|
||||||
|
702,1317,1581,910,30020
|
||||||
|
703,1434,1729,480,49510
|
||||||
|
704,1791,2167,700,64320
|
||||||
|
705,932,1120,660,35590
|
||||||
|
706,1609,1924,1170,63050
|
||||||
|
707,1495,1793,1020,65300
|
||||||
|
708,1769,2153,580,69560
|
||||||
|
709,1693,2032,610,41910
|
||||||
|
710,1247,1497,590,28330
|
||||||
|
711,1502,1815,190,55980
|
||||||
|
712,1360,1612,490,61080
|
||||||
|
713,1542,1844,680,51380
|
||||||
|
714,1631,1947,670,84410
|
||||||
|
715,1246,1482,1070,60680
|
||||||
|
716,1990,2384,1110,64690
|
||||||
|
717,967,1154,560,45780
|
||||||
|
718,1582,1894,1100,41800
|
||||||
|
719,1430,1743,970,53230
|
||||||
|
720,1827,2160,930,36160
|
||||||
|
721,1118,1338,1040,40450
|
||||||
|
722,1766,2109,1120,57910
|
||||||
|
723,1799,2173,910,36280
|
||||||
|
724,1167,1411,440,39190
|
||||||
|
725,1493,1795,530,62380
|
||||||
|
726,1445,1734,900,21470
|
||||||
|
727,1033,1237,740,34610
|
||||||
|
728,1440,1711,1020,88120
|
||||||
|
729,1487,1773,970,59190
|
||||||
|
730,1854,2205,890,36290
|
||||||
|
731,1748,2086,550,53760
|
||||||
|
732,1937,2310,520,66300
|
||||||
|
733,1641,1999,950,93000
|
||||||
|
734,1659,1999,650,65660
|
||||||
|
735,1743,2061,860,81930
|
||||||
|
736,1449,1733,320,60060
|
||||||
|
737,1098,1309,860,59530
|
||||||
|
738,1121,1351,900,46380
|
||||||
|
739,1526,1858,550,76200
|
||||||
|
740,1358,1645,770,56860
|
||||||
|
741,1336,1616,710,86620
|
||||||
|
742,1502,1802,840,49730
|
||||||
|
743,1534,1858,860,88370
|
||||||
|
744,1418,1699,870,49160
|
||||||
|
745,854,1018,660,77740
|
||||||
|
746,1450,1728,930,38560
|
||||||
|
747,1474,1776,1020,51990
|
||||||
|
748,1524,1819,1190,39970
|
||||||
|
749,1361,1638,1140,46040
|
||||||
|
750,1398,1683,490,49500
|
||||||
|
751,1085,1308,1170,76670
|
||||||
|
752,1660,1979,480,75800
|
||||||
|
753,1648,2017,930,81720
|
||||||
|
754,1453,1749,890,58440
|
||||||
|
755,1323,1591,680,85720
|
||||||
|
756,1385,1643,740,70940
|
||||||
|
757,1250,1506,990,62420
|
||||||
|
758,1389,1683,680,56880
|
||||||
|
759,1486,1758,820,101820
|
||||||
|
760,1655,1993,440,86890
|
||||||
|
761,1645,1963,900,47300
|
||||||
|
762,1464,1771,1080,31270
|
||||||
|
763,1197,1428,830,65410
|
||||||
|
764,1878,2264,310,54200
|
||||||
|
765,1150,1378,730,67390
|
||||||
|
766,1562,1881,740,54530
|
||||||
|
767,1596,1939,960,79760
|
||||||
|
768,1119,1345,790,78060
|
||||||
|
769,1116,1347,700,74080
|
||||||
|
770,1934,2349,750,52990
|
||||||
|
771,1299,1540,590,70580
|
||||||
|
772,1417,1689,570,34310
|
||||||
|
773,1235,1503,660,74160
|
||||||
|
774,1497,1815,700,59190
|
||||||
|
775,1430,1704,1070,43370
|
||||||
|
776,1537,1877,660,17670
|
||||||
|
777,1444,1742,840,56710
|
||||||
|
778,1477,1798,850,59820
|
||||||
|
779,1041,1246,600,36190
|
||||||
|
780,1226,1472,710,60440
|
||||||
|
781,1489,1783,450,75300
|
||||||
|
782,1549,1871,740,74080
|
||||||
|
783,1073,1280,1240,60440
|
||||||
|
784,1473,1785,570,80720
|
||||||
|
785,2013,2396,580,47060
|
||||||
|
786,1975,2368,450,86830
|
||||||
|
787,1561,1877,790,56790
|
||||||
|
788,1427,1723,1040,67090
|
||||||
|
789,1441,1747,670,44370
|
||||||
|
790,1275,1548,370,82970
|
||||||
|
791,1574,1876,620,56230
|
||||||
|
792,1511,1791,1010,53760
|
||||||
|
793,1428,1713,550,55390
|
||||||
|
794,1388,1672,800,73500
|
||||||
|
795,1057,1280,610,41050
|
||||||
|
796,1440,1747,1090,67320
|
||||||
|
797,1349,1610,700,65890
|
||||||
|
798,1536,1808,830,56380
|
||||||
|
799,2019,2420,850,85670
|
||||||
|
800,1236,1508,1260,70830
|
||||||
|
801,1436,1715,1030,48180
|
||||||
|
802,1862,2248,1160,51910
|
||||||
|
803,1200,1442,880,44320
|
||||||
|
804,1360,1650,420,58940
|
||||||
|
805,1722,2078,770,73610
|
||||||
|
806,1577,1902,910,54060
|
||||||
|
807,1850,2214,1110,85000
|
||||||
|
808,1447,1730,510,49030
|
||||||
|
809,1496,1800,780,63300
|
||||||
|
810,1679,2008,790,84300
|
||||||
|
811,994,1194,1090,81390
|
||||||
|
812,1354,1635,1270,95900
|
||||||
|
813,1597,1918,1260,71830
|
||||||
|
814,1873,2252,330,79310
|
||||||
|
815,1218,1459,540,87890
|
||||||
|
816,1458,1746,720,48610
|
||||||
|
817,1546,1860,670,73160
|
||||||
|
818,1608,1962,770,36280
|
||||||
|
819,1822,2160,860,49720
|
||||||
|
820,1716,2038,410,44400
|
||||||
|
821,1072,1296,900,47590
|
||||||
|
822,1330,1604,480,51460
|
||||||
|
823,1588,1892,540,57750
|
||||||
|
824,1425,1733,760,66000
|
||||||
|
825,1778,2133,280,45950
|
||||||
|
826,1363,1630,1120,53900
|
||||||
|
827,1609,1928,160,37920
|
||||||
|
828,1671,2024,620,63100
|
||||||
|
829,1379,1636,440,36770
|
||||||
|
830,1218,1452,870,43910
|
||||||
|
831,1724,2101,900,66390
|
||||||
|
832,986,1179,710,59160
|
||||||
|
833,1330,1606,590,38510
|
||||||
|
834,1437,1725,910,46220
|
||||||
|
835,1327,1609,1320,41500
|
||||||
|
836,1651,2009,1000,58160
|
||||||
|
837,1211,1462,670,38530
|
||||||
|
838,1916,2277,610,55880
|
||||||
|
839,1638,1937,1050,70940
|
||||||
|
840,1172,1413,480,53940
|
||||||
|
841,1350,1606,770,43030
|
||||||
|
842,1528,1843,340,59820
|
||||||
|
843,1305,1557,580,55500
|
||||||
|
844,1463,1751,900,49990
|
||||||
|
845,1409,1727,700,42980
|
||||||
|
846,1419,1743,860,65970
|
||||||
|
847,1535,1819,540,59290
|
||||||
|
848,1474,1745,970,63020
|
||||||
|
849,919,1099,1560,73810
|
||||||
|
850,2067,2492,790,70230
|
||||||
|
851,1977,2362,1020,59950
|
||||||
|
852,1293,1558,790,78100
|
||||||
|
853,1477,1790,880,16370
|
||||||
|
854,1582,1906,550,92640
|
||||||
|
855,1481,1789,550,63540
|
||||||
|
856,1214,1455,950,87220
|
||||||
|
857,1206,1460,810,41990
|
||||||
|
858,1653,1982,390,79410
|
||||||
|
859,1152,1393,860,54380
|
||||||
|
860,1458,1757,850,58600
|
||||||
|
861,1249,1510,660,48950
|
||||||
|
862,1939,2333,830,40670
|
||||||
|
863,1591,1919,640,52340
|
||||||
|
864,1180,1397,750,39140
|
||||||
|
865,1846,2195,1170,41090
|
||||||
|
866,780,951,790,25600
|
||||||
|
867,1565,1854,900,100900
|
||||||
|
868,1648,1959,370,77080
|
||||||
|
869,1775,2104,980,105150
|
||||||
|
870,1439,1732,1170,80580
|
||||||
|
871,1487,1776,800,46230
|
||||||
|
872,1800,2158,1100,98260
|
||||||
|
873,1690,2024,1070,75930
|
||||||
|
874,1209,1452,830,52050
|
||||||
|
875,1859,2222,1210,87000
|
||||||
|
876,1691,2023,540,60270
|
||||||
|
877,1259,1493,100,88270
|
||||||
|
878,1771,2138,820,57820
|
||||||
|
879,1205,1468,1210,61210
|
||||||
|
880,1792,2131,810,76420
|
||||||
|
881,1263,1516,780,70980
|
||||||
|
882,1344,1605,1160,76740
|
||||||
|
883,1819,2187,590,47920
|
||||||
|
884,1357,1625,1140,52160
|
||||||
|
885,1396,1673,690,32740
|
||||||
|
886,1118,1337,560,72270
|
||||||
|
887,1655,1986,1150,77430
|
||||||
|
888,1156,1398,140,92370
|
||||||
|
889,1451,1734,670,34880
|
||||||
|
890,1539,1829,650,46580
|
||||||
|
891,1549,1851,1220,70620
|
||||||
|
892,1582,1910,1080,66390
|
||||||
|
893,1387,1663,850,82080
|
||||||
|
894,1200,1436,1060,76440
|
||||||
|
895,1299,1560,770,96610
|
||||||
|
896,1174,1429,1110,54340
|
|
21614
lab_2/datasetlab2/kc_house_data.csv
Normal file
21614
lab_2/datasetlab2/kc_house_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
599
lab_2/lab2.ipynb
599
lab_2/lab2.ipynb
@ -0,0 +1,599 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**Lab2 Pibd-31 Malafeev**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"*Загрузка трёх других датасетов(не своего варианта)*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 97,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
||||||
|
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
||||||
|
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Далее будут выполнены в Markdown пукнты лабораторной 2-8 с пометкой каждого пункта."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"2.Проблемной областью явлются: датасет stores.csv - магазины, kc_house_data.csv - датасет продажи домов и Forber Billionares.csv - датасет миллионеров."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"3.Объектами наблюдениями явлются магазины, дома и миллионеры. Связи между объектами нет, единственная схожесть - магазин и дом являются зданиями."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"4.Датасет миллионеров нужны например для сайта forbes - чтобы составить тир лист. В целом, другие датасеты тоже подходят для составления тир листа)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"5.Технический проект - тир лист, на входе датасет"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"6.Пункт будем выполнять в коде, оставлю к каждому комменты:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 98,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"id 0\n",
|
||||||
|
"date 0\n",
|
||||||
|
"price 0\n",
|
||||||
|
"bedrooms 0\n",
|
||||||
|
"bathrooms 0\n",
|
||||||
|
"sqft_living 0\n",
|
||||||
|
"sqft_lot 0\n",
|
||||||
|
"floors 0\n",
|
||||||
|
"waterfront 0\n",
|
||||||
|
"view 0\n",
|
||||||
|
"condition 0\n",
|
||||||
|
"grade 0\n",
|
||||||
|
"sqft_above 0\n",
|
||||||
|
"sqft_basement 0\n",
|
||||||
|
"yr_built 0\n",
|
||||||
|
"yr_renovated 0\n",
|
||||||
|
"zipcode 0\n",
|
||||||
|
"lat 0\n",
|
||||||
|
"long 0\n",
|
||||||
|
"sqft_living15 0\n",
|
||||||
|
"sqft_lot15 0\n",
|
||||||
|
"dtype: int64\n",
|
||||||
|
"Store ID 0\n",
|
||||||
|
"Store_Area 0\n",
|
||||||
|
"Items_Available 0\n",
|
||||||
|
"Daily_Customer_Count 0\n",
|
||||||
|
"Store_Sales 0\n",
|
||||||
|
"dtype: int64\n",
|
||||||
|
"Rank 0\n",
|
||||||
|
"Name 0\n",
|
||||||
|
"Networth 0\n",
|
||||||
|
"Age 0\n",
|
||||||
|
"Country 0\n",
|
||||||
|
"Source 0\n",
|
||||||
|
"Industry 0\n",
|
||||||
|
"dtype: int64\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Проверка на пропущенные значения\n",
|
||||||
|
"print(df.isnull().sum())\n",
|
||||||
|
"print(df2.isnull().sum())\n",
|
||||||
|
"print(df3.isnull().sum())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 99,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" id price bedrooms bathrooms sqft_living \\\n",
|
||||||
|
"count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
|
||||||
|
"mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
|
||||||
|
"std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
|
||||||
|
"min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
|
||||||
|
"25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
|
||||||
|
"50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
|
||||||
|
"75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
|
||||||
|
"max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
|
||||||
|
"\n",
|
||||||
|
" sqft_lot floors waterfront view condition \\\n",
|
||||||
|
"count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||||
|
"mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
|
||||||
|
"std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
|
||||||
|
"min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
|
||||||
|
"25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
|
||||||
|
"50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
|
||||||
|
"75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
|
||||||
|
"max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
|
||||||
|
"\n",
|
||||||
|
" grade sqft_above sqft_basement yr_built yr_renovated \\\n",
|
||||||
|
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||||
|
"mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
|
||||||
|
"std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
|
||||||
|
"min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
|
||||||
|
"25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
|
||||||
|
"50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
|
||||||
|
"75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
|
||||||
|
"max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
|
||||||
|
"\n",
|
||||||
|
" zipcode lat long sqft_living15 sqft_lot15 \n",
|
||||||
|
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||||
|
"mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
|
||||||
|
"std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
|
||||||
|
"min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
|
||||||
|
"25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
|
||||||
|
"50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
|
||||||
|
"75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
|
||||||
|
"max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 \n",
|
||||||
|
" Store ID Store_Area Items_Available Daily_Customer_Count \\\n",
|
||||||
|
"count 896.000000 896.000000 896.000000 896.000000 \n",
|
||||||
|
"mean 448.500000 1485.409598 1782.035714 786.350446 \n",
|
||||||
|
"std 258.797218 250.237011 299.872053 265.389281 \n",
|
||||||
|
"min 1.000000 775.000000 932.000000 10.000000 \n",
|
||||||
|
"25% 224.750000 1316.750000 1575.500000 600.000000 \n",
|
||||||
|
"50% 448.500000 1477.000000 1773.500000 780.000000 \n",
|
||||||
|
"75% 672.250000 1653.500000 1982.750000 970.000000 \n",
|
||||||
|
"max 896.000000 2229.000000 2667.000000 1560.000000 \n",
|
||||||
|
"\n",
|
||||||
|
" Store_Sales \n",
|
||||||
|
"count 896.000000 \n",
|
||||||
|
"mean 59351.305804 \n",
|
||||||
|
"std 17190.741895 \n",
|
||||||
|
"min 14920.000000 \n",
|
||||||
|
"25% 46530.000000 \n",
|
||||||
|
"50% 58605.000000 \n",
|
||||||
|
"75% 71872.500000 \n",
|
||||||
|
"max 116320.000000 \n",
|
||||||
|
" Rank Networth Age\n",
|
||||||
|
"count 2600.000000 2600.000000 2600.000000\n",
|
||||||
|
"mean 1269.570769 4.860750 64.271923\n",
|
||||||
|
"std 728.146364 10.659671 13.220607\n",
|
||||||
|
"min 1.000000 1.000000 19.000000\n",
|
||||||
|
"25% 637.000000 1.500000 55.000000\n",
|
||||||
|
"50% 1292.000000 2.400000 64.000000\n",
|
||||||
|
"75% 1929.000000 4.500000 74.000000\n",
|
||||||
|
"max 2578.000000 219.000000 100.000000\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Статистика по числовым данным для выявления аномальных распределений\n",
|
||||||
|
"print(df.describe())\n",
|
||||||
|
"print(df2.describe())\n",
|
||||||
|
"print(df3.describe())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"7.По перым трём строкам кода, т.е после проверки на пропущенные значения выявлено, что их нет. А дальше я обнаружил аномалию: в датасете миллионеров есть столбец networth - чистое количество денег во всех формах ( в миллиардах ), в этом солбце минимальное значение является единицей, медиана в районе 2.4, а максимальное - 219. В ЭТОМ СТОЛБЦЕ АНОМАЛИЯ"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"8.Наши датасеты довольно информационные. Например у миллионер датасета можно посмотреть фио, сколько денег, что он сделал. Датасет по продаже домов гораздо информационнее, является лидером по наполненности и соответствует реальности. А вот датасет магазинов слабоват, можно например добавить: количество филлиалов, работников, прибыль"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"9.Возьмём датасет магазинов, будем удалять столбцы, где площадь ниже 1500 (по тз надо)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 100,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales\n",
|
||||||
|
"0 1 1659 1961 530 66490\n",
|
||||||
|
"4 5 1770 2111 450 46620\n",
|
||||||
|
"6 7 1542 1858 1030 72240\n",
|
||||||
|
"11 12 1751 2098 720 57620\n",
|
||||||
|
"12 13 1746 2064 1050 60470\n",
|
||||||
|
".. ... ... ... ... ...\n",
|
||||||
|
"882 883 1819 2187 590 47920\n",
|
||||||
|
"886 887 1655 1986 1150 77430\n",
|
||||||
|
"889 890 1539 1829 650 46580\n",
|
||||||
|
"890 891 1549 1851 1220 70620\n",
|
||||||
|
"891 892 1582 1910 1080 66390\n",
|
||||||
|
"\n",
|
||||||
|
"[415 rows x 5 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df2_filtered = df2[df2['Store_Area'] >= 1500]\n",
|
||||||
|
"print(df2_filtered)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Теперь в датасете магазнов price заменим у всех на константное значение - 1 500 000"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 101,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" id date price bedrooms bathrooms sqft_living \\\n",
|
||||||
|
"0 7129300520 20141013T000000 1500000 3 1.00 1180 \n",
|
||||||
|
"1 6414100192 20141209T000000 1500000 3 2.25 2570 \n",
|
||||||
|
"2 5631500400 20150225T000000 1500000 2 1.00 770 \n",
|
||||||
|
"3 2487200875 20141209T000000 1500000 4 3.00 1960 \n",
|
||||||
|
"4 1954400510 20150218T000000 1500000 3 2.00 1680 \n",
|
||||||
|
"... ... ... ... ... ... ... \n",
|
||||||
|
"21608 263000018 20140521T000000 1500000 3 2.50 1530 \n",
|
||||||
|
"21609 6600060120 20150223T000000 1500000 4 2.50 2310 \n",
|
||||||
|
"21610 1523300141 20140623T000000 1500000 2 0.75 1020 \n",
|
||||||
|
"21611 291310100 20150116T000000 1500000 3 2.50 1600 \n",
|
||||||
|
"21612 1523300157 20141015T000000 1500000 2 0.75 1020 \n",
|
||||||
|
"\n",
|
||||||
|
" sqft_lot floors waterfront view ... grade sqft_above \\\n",
|
||||||
|
"0 5650 1.0 0 0 ... 7 1180 \n",
|
||||||
|
"1 7242 2.0 0 0 ... 7 2170 \n",
|
||||||
|
"2 10000 1.0 0 0 ... 6 770 \n",
|
||||||
|
"3 5000 1.0 0 0 ... 7 1050 \n",
|
||||||
|
"4 8080 1.0 0 0 ... 8 1680 \n",
|
||||||
|
"... ... ... ... ... ... ... ... \n",
|
||||||
|
"21608 1131 3.0 0 0 ... 8 1530 \n",
|
||||||
|
"21609 5813 2.0 0 0 ... 8 2310 \n",
|
||||||
|
"21610 1350 2.0 0 0 ... 7 1020 \n",
|
||||||
|
"21611 2388 2.0 0 0 ... 8 1600 \n",
|
||||||
|
"21612 1076 2.0 0 0 ... 7 1020 \n",
|
||||||
|
"\n",
|
||||||
|
" sqft_basement yr_built yr_renovated zipcode lat long \\\n",
|
||||||
|
"0 0 1955 0 98178 47.5112 -122.257 \n",
|
||||||
|
"1 400 1951 1991 98125 47.7210 -122.319 \n",
|
||||||
|
"2 0 1933 0 98028 47.7379 -122.233 \n",
|
||||||
|
"3 910 1965 0 98136 47.5208 -122.393 \n",
|
||||||
|
"4 0 1987 0 98074 47.6168 -122.045 \n",
|
||||||
|
"... ... ... ... ... ... ... \n",
|
||||||
|
"21608 0 2009 0 98103 47.6993 -122.346 \n",
|
||||||
|
"21609 0 2014 0 98146 47.5107 -122.362 \n",
|
||||||
|
"21610 0 2009 0 98144 47.5944 -122.299 \n",
|
||||||
|
"21611 0 2004 0 98027 47.5345 -122.069 \n",
|
||||||
|
"21612 0 2008 0 98144 47.5941 -122.299 \n",
|
||||||
|
"\n",
|
||||||
|
" sqft_living15 sqft_lot15 \n",
|
||||||
|
"0 1340 5650 \n",
|
||||||
|
"1 1690 7639 \n",
|
||||||
|
"2 2720 8062 \n",
|
||||||
|
"3 1360 5000 \n",
|
||||||
|
"4 1800 7503 \n",
|
||||||
|
"... ... ... \n",
|
||||||
|
"21608 1530 1509 \n",
|
||||||
|
"21609 1830 7200 \n",
|
||||||
|
"21610 1020 2007 \n",
|
||||||
|
"21611 1410 1287 \n",
|
||||||
|
"21612 1020 1357 \n",
|
||||||
|
"\n",
|
||||||
|
"[21613 rows x 21 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df['price'] = 1500000\n",
|
||||||
|
"print(df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Теперь у миллионеров в networth подставим среднее по столбцу:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 102,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" Networth\n",
|
||||||
|
"0 4.86075\n",
|
||||||
|
"1 4.86075\n",
|
||||||
|
"2 4.86075\n",
|
||||||
|
"3 4.86075\n",
|
||||||
|
"4 4.86075\n",
|
||||||
|
"... ...\n",
|
||||||
|
"2595 4.86075\n",
|
||||||
|
"2596 4.86075\n",
|
||||||
|
"2597 4.86075\n",
|
||||||
|
"2598 4.86075\n",
|
||||||
|
"2599 4.86075\n",
|
||||||
|
"\n",
|
||||||
|
"[2600 rows x 1 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"networth_mean = df3['Networth'].mean()\n",
|
||||||
|
"df3['Networth'] = networth_mean\n",
|
||||||
|
"print(df3[['Networth']])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"10.КОД"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 103,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Train df: (15129, 21), Validation df: (3242, 21), Test df: (3242, 21)\n",
|
||||||
|
"Train df2: (627, 5), Validation df2: (134, 5), Test df2: (135, 5)\n",
|
||||||
|
"Train df3: (1820, 7), Validation df3: (390, 7), Test df3: (390, 7)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)\n",
|
||||||
|
"val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42) \n",
|
||||||
|
"\n",
|
||||||
|
"train_df2, temp_df2 = train_test_split(df2, test_size=0.3, random_state=42)\n",
|
||||||
|
"val_df2, test_df2 = train_test_split(temp_df2, test_size=0.5, random_state=42)\n",
|
||||||
|
"\n",
|
||||||
|
"train_df3, temp_df3 = train_test_split(df3, test_size=0.3, random_state=42)\n",
|
||||||
|
"val_df3, test_df3 = train_test_split(temp_df3, test_size=0.5, random_state=42)\n",
|
||||||
|
"print(f\"Train df: {train_df.shape}, Validation df: {val_df.shape}, Test df: {test_df.shape}\")\n",
|
||||||
|
"print(f\"Train df2: {train_df2.shape}, Validation df2: {val_df2.shape}, Test df2: {test_df2.shape}\")\n",
|
||||||
|
"print(f\"Train df3: {train_df3.shape}, Validation df3: {val_df3.shape}, Test df3: {test_df3.shape}\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Было сделаное разбиение на три выборки: 70%, 15% и 15%. Подключена была библиотека scikit-learn и функция train_test_split , как сказано в пункте 15. Вполне сбалансированные"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"12.Качаем библиотеку imbalanced-learn, достаём нужные функции и погнали"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 104,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Class distribution after oversampling (df):\n",
|
||||||
|
"price_category\n",
|
||||||
|
"Low 10787\n",
|
||||||
|
"Medium 10787\n",
|
||||||
|
"High 10787\n",
|
||||||
|
"Luxury 10787\n",
|
||||||
|
"Name: count, dtype: int64\n",
|
||||||
|
"Class distribution after undersampling (df):\n",
|
||||||
|
"price_category\n",
|
||||||
|
"Low 1465\n",
|
||||||
|
"Medium 1465\n",
|
||||||
|
"High 1465\n",
|
||||||
|
"Luxury 1465\n",
|
||||||
|
"Name: count, dtype: int64\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from imblearn.over_sampling import RandomOverSampler\n",
|
||||||
|
"from imblearn.under_sampling import RandomUnderSampler\n",
|
||||||
|
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
||||||
|
"df['price_category'] = pd.cut(df['price'], bins=[0, 300000, 600000, 1000000, float('inf')],\n",
|
||||||
|
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
||||||
|
"\n",
|
||||||
|
"y = df['price_category']\n",
|
||||||
|
"X = df.drop(columns=['price', 'price_category'])\n",
|
||||||
|
"\n",
|
||||||
|
"oversampler = RandomOverSampler(random_state=42)\n",
|
||||||
|
"X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
|
||||||
|
"\n",
|
||||||
|
"undersampler = RandomUnderSampler(random_state=42)\n",
|
||||||
|
"X_resampled_under, y_resampled_under = undersampler.fit_resample(X, y)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after oversampling (df):\")\n",
|
||||||
|
"print(pd.Series(y_resampled).value_counts())\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after undersampling (df):\")\n",
|
||||||
|
"print(pd.Series(y_resampled_under).value_counts())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 105,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Class distribution after oversampling (df3):\n",
|
||||||
|
"AGE_category\n",
|
||||||
|
"Young 1401\n",
|
||||||
|
"Middle-aged 1401\n",
|
||||||
|
"Senior 1401\n",
|
||||||
|
"Elderly 1401\n",
|
||||||
|
"Name: count, dtype: int64\n",
|
||||||
|
"Class distribution after undersampling (df3):\n",
|
||||||
|
"AGE_category\n",
|
||||||
|
"Young 15\n",
|
||||||
|
"Middle-aged 15\n",
|
||||||
|
"Senior 15\n",
|
||||||
|
"Elderly 15\n",
|
||||||
|
"Name: count, dtype: int64\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")\n",
|
||||||
|
"\n",
|
||||||
|
"df3['AGE_category'] = pd.cut(df3['Age'], bins=[0, 30, 50, 70, float('inf')],\n",
|
||||||
|
" labels=['Young', 'Middle-aged', 'Senior', 'Elderly'])\n",
|
||||||
|
"\n",
|
||||||
|
"y3 = df3['AGE_category']\n",
|
||||||
|
"X3 = df3.drop(columns=['Age', 'AGE_category'])\n",
|
||||||
|
"\n",
|
||||||
|
"oversampler3 = RandomOverSampler(random_state=42)\n",
|
||||||
|
"X_resampled_3, y_resampled_3 = oversampler3.fit_resample(X3, y3)\n",
|
||||||
|
"\n",
|
||||||
|
"undersampler3 = RandomUnderSampler(random_state=42)\n",
|
||||||
|
"X_resampled_3_under, y_resampled_3_under = undersampler3.fit_resample(X3, y3)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after oversampling (df3):\")\n",
|
||||||
|
"print(pd.Series(y_resampled_3).value_counts())\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after undersampling (df3):\")\n",
|
||||||
|
"print(pd.Series(y_resampled_3_under).value_counts())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 106,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Class distribution after oversampling (df2):\n",
|
||||||
|
"Sales_category\n",
|
||||||
|
"Low 598\n",
|
||||||
|
"Medium 598\n",
|
||||||
|
"High 598\n",
|
||||||
|
"Luxury 0\n",
|
||||||
|
"Name: count, dtype: int64\n",
|
||||||
|
"Class distribution after undersampling (df2):\n",
|
||||||
|
"Sales_category\n",
|
||||||
|
"Low 7\n",
|
||||||
|
"Medium 7\n",
|
||||||
|
"High 7\n",
|
||||||
|
"Luxury 0\n",
|
||||||
|
"Name: count, dtype: int64\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
||||||
|
"\n",
|
||||||
|
"df2['Sales_category'] = pd.cut(df2['Store_Sales'], bins=[0, 50000, 100000, 200000, float('inf')],\n",
|
||||||
|
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
||||||
|
"\n",
|
||||||
|
"y2 = df2['Sales_category']\n",
|
||||||
|
"X2 = df2.drop(columns=['Store_Sales', 'Sales_category'])\n",
|
||||||
|
"\n",
|
||||||
|
"oversampler2 = RandomOverSampler(random_state=42)\n",
|
||||||
|
"X_resampled_2, y_resampled_2 = oversampler2.fit_resample(X2, y2)\n",
|
||||||
|
"\n",
|
||||||
|
"undersampler2 = RandomUnderSampler(random_state=42)\n",
|
||||||
|
"X_resampled_2_under, y_resampled_2_under = undersampler2.fit_resample(X2, y2)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after oversampling (df2):\")\n",
|
||||||
|
"print(pd.Series(y_resampled_2).value_counts())\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Class distribution after undersampling (df2):\")\n",
|
||||||
|
"print(pd.Series(y_resampled_2_under).value_counts())"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "miivenv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user