Compare commits
No commits in common. "main" and "lab1" have entirely different histories.
File diff suppressed because it is too large
Load Diff
@ -1,897 +0,0 @@
|
|||||||
Store ID ,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
|
|
||||||
1,1659,1961,530,66490
|
|
||||||
2,1461,1752,210,39820
|
|
||||||
3,1340,1609,720,54010
|
|
||||||
4,1451,1748,620,53730
|
|
||||||
5,1770,2111,450,46620
|
|
||||||
6,1442,1733,760,45260
|
|
||||||
7,1542,1858,1030,72240
|
|
||||||
8,1261,1507,1020,37720
|
|
||||||
9,1090,1321,680,46310
|
|
||||||
10,1030,1235,1130,44150
|
|
||||||
11,1187,1439,1090,71280
|
|
||||||
12,1751,2098,720,57620
|
|
||||||
13,1746,2064,1050,60470
|
|
||||||
14,1615,1931,1160,59130
|
|
||||||
15,1469,1756,770,66360
|
|
||||||
16,1644,1950,790,78870
|
|
||||||
17,1578,1907,1440,77250
|
|
||||||
18,1703,2045,670,38170
|
|
||||||
19,1438,1731,1030,63540
|
|
||||||
20,1940,2340,980,40190
|
|
||||||
21,1421,1700,370,43460
|
|
||||||
22,1458,1746,690,68890
|
|
||||||
23,1719,2065,950,52780
|
|
||||||
24,1449,1752,620,50680
|
|
||||||
25,1234,1488,840,41880
|
|
||||||
26,1732,2073,820,70050
|
|
||||||
27,1475,1777,1100,25820
|
|
||||||
28,1390,1648,980,60530
|
|
||||||
29,1642,1943,710,78100
|
|
||||||
30,1715,2071,650,84860
|
|
||||||
31,1439,1746,990,80140
|
|
||||||
32,1250,1508,990,14920
|
|
||||||
33,1331,1608,880,60460
|
|
||||||
34,1784,2163,620,74560
|
|
||||||
35,1375,1648,1020,72430
|
|
||||||
36,1871,2230,700,45460
|
|
||||||
37,1442,1744,610,41570
|
|
||||||
38,1174,1411,1080,62870
|
|
||||||
39,1839,2204,1010,55170
|
|
||||||
40,1270,1516,10,45480
|
|
||||||
41,1435,1725,1250,49550
|
|
||||||
42,965,1152,600,48140
|
|
||||||
43,1665,2001,730,67640
|
|
||||||
44,1780,2117,780,39730
|
|
||||||
45,1009,1194,520,35800
|
|
||||||
46,1227,1471,870,49270
|
|
||||||
47,1769,2087,690,66510
|
|
||||||
48,1660,1982,910,62530
|
|
||||||
49,1472,1776,1260,59980
|
|
||||||
50,1408,1688,1040,76350
|
|
||||||
51,1514,1820,910,81820
|
|
||||||
52,1565,1880,1300,57830
|
|
||||||
53,1074,1288,320,70450
|
|
||||||
54,1864,2240,530,67000
|
|
||||||
55,1570,1898,980,64090
|
|
||||||
56,1417,1701,740,48670
|
|
||||||
57,1734,2060,1240,66210
|
|
||||||
58,1470,1763,1080,83660
|
|
||||||
59,1761,2104,1080,70770
|
|
||||||
60,1756,2070,460,53870
|
|
||||||
61,1704,2045,300,71300
|
|
||||||
62,2011,2391,530,46100
|
|
||||||
63,1472,1748,600,49100
|
|
||||||
64,1310,1561,860,65920
|
|
||||||
65,1544,1821,590,58660
|
|
||||||
66,1707,2052,920,69130
|
|
||||||
67,1881,2262,570,49080
|
|
||||||
68,1416,1681,290,72710
|
|
||||||
69,1631,1941,650,33430
|
|
||||||
70,1318,1576,710,42430
|
|
||||||
71,1692,2019,850,56650
|
|
||||||
72,1152,1380,530,33580
|
|
||||||
73,891,1073,630,67370
|
|
||||||
74,1468,1749,700,71780
|
|
||||||
75,1539,1833,650,84840
|
|
||||||
76,1635,1956,720,82070
|
|
||||||
77,1267,1520,450,26770
|
|
||||||
78,1250,1475,1390,65560
|
|
||||||
79,1720,2044,960,38660
|
|
||||||
80,1462,1761,600,65660
|
|
||||||
81,1431,1711,620,40700
|
|
||||||
82,1539,1858,1020,88910
|
|
||||||
83,1441,1723,330,57860
|
|
||||||
84,1572,1884,1410,42670
|
|
||||||
85,1287,1525,1200,90180
|
|
||||||
86,1468,1760,280,51280
|
|
||||||
87,1931,2342,940,97260
|
|
||||||
88,1252,1506,850,39650
|
|
||||||
89,1238,1468,960,45720
|
|
||||||
90,1479,1758,420,42060
|
|
||||||
91,1590,1912,830,65350
|
|
||||||
92,2169,2617,600,67080
|
|
||||||
93,1838,2205,400,54030
|
|
||||||
94,1385,1655,760,56360
|
|
||||||
95,1921,2305,1470,77120
|
|
||||||
96,1975,2385,500,50810
|
|
||||||
97,1853,2235,1120,60960
|
|
||||||
98,1816,2171,1160,61180
|
|
||||||
99,1785,2147,820,63660
|
|
||||||
100,1579,1899,1140,41190
|
|
||||||
101,1096,1321,900,78420
|
|
||||||
102,1919,2294,760,65580
|
|
||||||
103,1262,1500,1170,89080
|
|
||||||
104,1374,1655,1080,94170
|
|
||||||
105,1309,1587,1000,50950
|
|
||||||
106,1207,1434,690,65180
|
|
||||||
107,1692,2031,810,69310
|
|
||||||
108,1929,2311,630,79210
|
|
||||||
109,1573,1878,650,23740
|
|
||||||
110,1415,1700,920,36330
|
|
||||||
111,1162,1382,1260,51700
|
|
||||||
112,1485,1787,800,62950
|
|
||||||
113,1897,2248,1330,56010
|
|
||||||
114,1607,1927,940,45080
|
|
||||||
115,1909,2287,1210,46830
|
|
||||||
116,1274,1503,660,64750
|
|
||||||
117,1157,1379,770,80780
|
|
||||||
118,1712,2046,460,31180
|
|
||||||
119,1500,1798,860,56710
|
|
||||||
120,1682,2017,780,49390
|
|
||||||
121,1441,1727,890,66000
|
|
||||||
122,1525,1835,900,32770
|
|
||||||
123,1408,1669,530,46580
|
|
||||||
124,1947,2333,790,79780
|
|
||||||
125,1164,1390,370,35510
|
|
||||||
126,1787,2137,610,80970
|
|
||||||
127,1871,2241,500,61150
|
|
||||||
128,1718,2051,750,49210
|
|
||||||
129,1365,1636,980,79950
|
|
||||||
130,1368,1654,530,68740
|
|
||||||
131,1342,1595,910,57480
|
|
||||||
132,1076,1270,620,72630
|
|
||||||
133,1396,1672,1170,50070
|
|
||||||
134,1713,2071,900,40490
|
|
||||||
135,1370,1638,980,51850
|
|
||||||
136,1667,1993,740,42840
|
|
||||||
137,1638,1972,810,60940
|
|
||||||
138,1581,1905,810,62280
|
|
||||||
139,1795,2187,300,76530
|
|
||||||
140,1179,1412,790,85130
|
|
||||||
141,1978,2374,800,48590
|
|
||||||
142,1688,2042,760,73080
|
|
||||||
143,1214,1456,530,48950
|
|
||||||
144,1504,1805,540,48560
|
|
||||||
145,1498,1770,620,59380
|
|
||||||
146,1462,1762,1010,51190
|
|
||||||
147,1442,1750,130,58920
|
|
||||||
148,1250,1486,730,50360
|
|
||||||
149,1229,1480,830,38070
|
|
||||||
150,1936,2300,1060,49170
|
|
||||||
151,1369,1629,770,39740
|
|
||||||
152,1662,1986,70,63730
|
|
||||||
153,1548,1855,670,85330
|
|
||||||
154,1649,1963,490,27410
|
|
||||||
155,1393,1663,670,37320
|
|
||||||
156,1450,1734,380,71120
|
|
||||||
157,1613,1921,1200,72800
|
|
||||||
158,1408,1696,350,34410
|
|
||||||
159,775,932,1090,42530
|
|
||||||
160,1275,1534,1230,54300
|
|
||||||
161,1740,2078,680,50780
|
|
||||||
162,1372,1657,580,45020
|
|
||||||
163,1414,1723,680,69600
|
|
||||||
164,2044,2474,340,80340
|
|
||||||
165,1823,2176,700,37810
|
|
||||||
166,955,1133,580,46140
|
|
||||||
167,1465,1763,680,99570
|
|
||||||
168,1331,1606,630,38650
|
|
||||||
169,1232,1487,860,49800
|
|
||||||
170,1481,1765,490,69910
|
|
||||||
171,1343,1599,870,44910
|
|
||||||
172,1539,1837,990,78470
|
|
||||||
173,1007,1207,670,47460
|
|
||||||
174,1762,2145,490,33460
|
|
||||||
175,1527,1832,580,44090
|
|
||||||
176,1356,1619,700,42620
|
|
||||||
177,1536,1848,670,69450
|
|
||||||
178,1605,1902,390,73120
|
|
||||||
179,1704,2032,590,48300
|
|
||||||
180,1626,1941,1350,58090
|
|
||||||
181,1612,1939,840,74250
|
|
||||||
182,1174,1396,1100,40930
|
|
||||||
183,1923,2339,950,70930
|
|
||||||
184,1702,2053,950,64670
|
|
||||||
185,1398,1692,650,77420
|
|
||||||
186,1437,1717,230,32330
|
|
||||||
187,1524,1796,1060,41080
|
|
||||||
188,1660,1985,1180,42860
|
|
||||||
189,1302,1569,710,68450
|
|
||||||
190,1666,2000,480,39730
|
|
||||||
191,1391,1649,810,83750
|
|
||||||
192,1778,2148,1140,69940
|
|
||||||
193,1462,1770,1070,67710
|
|
||||||
194,1751,2115,790,67360
|
|
||||||
195,1652,1982,690,52460
|
|
||||||
196,1841,2215,610,88760
|
|
||||||
197,1496,1791,1240,67030
|
|
||||||
198,1504,1827,840,78230
|
|
||||||
199,1524,1808,460,62270
|
|
||||||
200,1148,1371,940,49760
|
|
||||||
201,1468,1744,590,73660
|
|
||||||
202,1310,1558,890,72320
|
|
||||||
203,1321,1579,770,68890
|
|
||||||
204,992,1192,900,34180
|
|
||||||
205,1540,1857,1020,58260
|
|
||||||
206,1807,2149,910,38120
|
|
||||||
207,1526,1853,660,49070
|
|
||||||
208,1406,1677,480,61660
|
|
||||||
209,1703,2055,1080,37830
|
|
||||||
210,1575,1872,690,52270
|
|
||||||
211,1309,1572,510,52280
|
|
||||||
212,1488,1807,1030,70810
|
|
||||||
213,1658,1988,370,71530
|
|
||||||
214,1863,2245,640,77260
|
|
||||||
215,1458,1725,750,75550
|
|
||||||
216,1604,1909,370,33730
|
|
||||||
217,1575,1899,840,66270
|
|
||||||
218,1525,1829,840,55820
|
|
||||||
219,1451,1737,890,68430
|
|
||||||
220,1390,1687,620,73990
|
|
||||||
221,1442,1742,310,62800
|
|
||||||
222,1620,1922,550,33740
|
|
||||||
223,1251,1527,380,63830
|
|
||||||
224,1318,1606,1200,24410
|
|
||||||
225,1647,1962,800,70020
|
|
||||||
226,1829,2175,870,92240
|
|
||||||
227,1852,2227,1220,68230
|
|
||||||
228,1699,2053,1080,81870
|
|
||||||
229,1325,1595,540,73860
|
|
||||||
230,1350,1634,880,77120
|
|
||||||
231,1347,1628,120,72350
|
|
||||||
232,1397,1661,1410,49160
|
|
||||||
233,1245,1499,570,45650
|
|
||||||
234,1366,1649,940,52780
|
|
||||||
235,1378,1658,760,90960
|
|
||||||
236,1767,2110,1200,64950
|
|
||||||
237,1184,1434,670,47230
|
|
||||||
238,1257,1505,950,83250
|
|
||||||
239,1863,2247,480,51950
|
|
||||||
240,1881,2244,920,66030
|
|
||||||
241,1329,1609,1150,68590
|
|
||||||
242,1539,1848,750,47140
|
|
||||||
243,1557,1861,370,69940
|
|
||||||
244,2007,2397,610,65890
|
|
||||||
245,1185,1418,1150,89310
|
|
||||||
246,1657,2003,1070,58540
|
|
||||||
247,1294,1539,790,78130
|
|
||||||
248,1296,1559,1070,92300
|
|
||||||
249,1733,2097,730,56170
|
|
||||||
250,1641,1976,620,46050
|
|
||||||
251,1373,1648,530,43390
|
|
||||||
252,1550,1845,700,61750
|
|
||||||
253,1583,1907,680,21830
|
|
||||||
254,1428,1719,1060,39800
|
|
||||||
255,1604,1925,670,54370
|
|
||||||
256,1439,1735,400,62470
|
|
||||||
257,1648,2003,910,82930
|
|
||||||
258,1025,1231,760,63720
|
|
||||||
259,2001,2394,540,79180
|
|
||||||
260,1145,1370,350,38210
|
|
||||||
261,1174,1426,980,25950
|
|
||||||
262,913,1106,860,56610
|
|
||||||
263,1199,1433,1020,73710
|
|
||||||
264,1875,2254,1120,70400
|
|
||||||
265,1153,1397,1020,50440
|
|
||||||
266,1240,1492,940,66840
|
|
||||||
267,1381,1660,970,50170
|
|
||||||
268,1701,2030,830,60140
|
|
||||||
269,1206,1456,920,37130
|
|
||||||
270,1476,1777,660,42890
|
|
||||||
271,1189,1439,780,26220
|
|
||||||
272,1837,2220,340,50840
|
|
||||||
273,1319,1571,1190,25630
|
|
||||||
274,1617,1901,490,60770
|
|
||||||
275,1631,1967,1090,69600
|
|
||||||
276,1517,1805,1040,41740
|
|
||||||
277,1764,2109,1210,50130
|
|
||||||
278,1572,1869,1030,21750
|
|
||||||
279,1855,2197,1170,80490
|
|
||||||
280,1327,1571,730,34020
|
|
||||||
281,1270,1515,720,60240
|
|
||||||
282,1734,2073,500,39460
|
|
||||||
283,1533,1848,1070,56440
|
|
||||||
284,1390,1646,800,46840
|
|
||||||
285,1856,2216,1020,64820
|
|
||||||
286,1000,1215,1070,52520
|
|
||||||
287,1313,1586,420,45940
|
|
||||||
288,1494,1799,510,38970
|
|
||||||
289,1386,1674,1210,58610
|
|
||||||
290,1979,2364,660,30810
|
|
||||||
291,1057,1264,360,47730
|
|
||||||
292,902,1093,1210,64640
|
|
||||||
293,1347,1622,560,44860
|
|
||||||
294,1314,1576,360,55660
|
|
||||||
295,1513,1803,970,57530
|
|
||||||
296,1305,1548,480,75200
|
|
||||||
297,1180,1436,690,37330
|
|
||||||
298,1142,1352,710,35280
|
|
||||||
299,1471,1768,780,70610
|
|
||||||
300,1075,1288,630,49720
|
|
||||||
301,1578,1885,220,68850
|
|
||||||
302,1585,1916,1110,50740
|
|
||||||
303,1391,1648,720,77070
|
|
||||||
304,1577,1892,560,74730
|
|
||||||
305,1092,1314,600,76530
|
|
||||||
306,1375,1681,440,68900
|
|
||||||
307,1523,1813,520,44960
|
|
||||||
308,1373,1654,410,41490
|
|
||||||
309,1550,1871,590,74320
|
|
||||||
310,1614,1946,740,73800
|
|
||||||
311,1566,1889,610,56400
|
|
||||||
312,2019,2396,540,71570
|
|
||||||
313,1494,1806,1450,43640
|
|
||||||
314,1659,2008,620,35120
|
|
||||||
315,1766,2131,340,58670
|
|
||||||
316,1293,1554,970,75800
|
|
||||||
317,1375,1659,1080,76640
|
|
||||||
318,1236,1484,560,31890
|
|
||||||
319,1332,1586,630,61670
|
|
||||||
320,1513,1825,980,75950
|
|
||||||
321,1208,1459,930,41490
|
|
||||||
322,1190,1429,470,66170
|
|
||||||
323,1448,1734,680,37980
|
|
||||||
324,1771,2147,430,62710
|
|
||||||
325,1365,1645,830,60470
|
|
||||||
326,1510,1810,950,35230
|
|
||||||
327,1458,1736,870,48550
|
|
||||||
328,1808,2157,730,56810
|
|
||||||
329,1615,1954,760,41080
|
|
||||||
330,1640,1948,960,51270
|
|
||||||
331,1060,1273,860,57500
|
|
||||||
332,1633,1968,330,81470
|
|
||||||
333,1222,1473,630,49570
|
|
||||||
334,1619,1957,1280,45580
|
|
||||||
335,1624,1973,1440,44660
|
|
||||||
336,1887,2278,570,76240
|
|
||||||
337,1320,1583,540,43720
|
|
||||||
338,1450,1750,480,46700
|
|
||||||
339,1455,1764,390,84690
|
|
||||||
340,966,1172,900,85470
|
|
||||||
341,1922,2290,290,80410
|
|
||||||
342,1678,1999,740,46650
|
|
||||||
343,1638,1952,690,81840
|
|
||||||
344,1145,1375,950,63590
|
|
||||||
345,2004,2390,930,50130
|
|
||||||
346,1954,2378,810,45820
|
|
||||||
347,1577,1879,760,86710
|
|
||||||
348,1766,2138,580,49980
|
|
||||||
349,1362,1634,770,82940
|
|
||||||
350,1886,2228,1530,40350
|
|
||||||
351,1291,1546,420,93950
|
|
||||||
352,1584,1897,1210,47310
|
|
||||||
353,1397,1686,850,21300
|
|
||||||
354,1445,1709,1340,62180
|
|
||||||
355,1433,1707,1160,61460
|
|
||||||
356,1269,1511,500,54360
|
|
||||||
357,1798,2134,820,72050
|
|
||||||
358,1514,1822,670,48090
|
|
||||||
359,1015,1216,460,27310
|
|
||||||
360,1495,1799,950,57160
|
|
||||||
361,1759,2095,980,34190
|
|
||||||
362,1219,1468,850,35600
|
|
||||||
363,1571,1877,580,54670
|
|
||||||
364,1404,1670,620,76730
|
|
||||||
365,1124,1369,650,63540
|
|
||||||
366,1514,1837,1130,36690
|
|
||||||
367,1207,1476,720,87370
|
|
||||||
368,1484,1774,940,59800
|
|
||||||
369,1398,1678,920,48030
|
|
||||||
370,1769,2112,660,96650
|
|
||||||
371,1111,1322,610,65500
|
|
||||||
372,1078,1305,1190,55530
|
|
||||||
373,1876,2254,1340,21650
|
|
||||||
374,1909,2306,820,31940
|
|
||||||
375,1940,2343,1130,84690
|
|
||||||
376,1391,1683,890,68390
|
|
||||||
377,1496,1774,810,75490
|
|
||||||
378,1412,1699,680,39200
|
|
||||||
379,1121,1345,320,85670
|
|
||||||
380,1691,2026,700,60530
|
|
||||||
381,1599,1946,940,78090
|
|
||||||
382,1208,1467,910,50720
|
|
||||||
383,1454,1737,870,23090
|
|
||||||
384,1555,1881,1260,91360
|
|
||||||
385,1554,1852,440,48120
|
|
||||||
386,1491,1800,980,75620
|
|
||||||
387,1415,1697,1200,39420
|
|
||||||
388,1487,1801,890,51130
|
|
||||||
389,1339,1589,1050,33890
|
|
||||||
390,1320,1562,610,87170
|
|
||||||
391,1509,1799,960,38600
|
|
||||||
392,1406,1680,860,60980
|
|
||||||
393,1264,1502,800,79410
|
|
||||||
394,1905,2280,1060,82350
|
|
||||||
395,1209,1464,600,36740
|
|
||||||
396,1546,1829,380,27720
|
|
||||||
397,1689,2044,1140,32260
|
|
||||||
398,1153,1381,730,53270
|
|
||||||
399,2063,2493,810,51480
|
|
||||||
400,1848,2254,1000,59970
|
|
||||||
401,1718,2056,1220,83600
|
|
||||||
402,1480,1773,360,63020
|
|
||||||
403,1439,1724,900,50920
|
|
||||||
404,1576,1896,750,56450
|
|
||||||
405,1948,2374,930,89540
|
|
||||||
406,1613,1893,1180,46030
|
|
||||||
407,896,1059,870,75110
|
|
||||||
408,1625,1943,680,74520
|
|
||||||
409,1303,1587,1310,102310
|
|
||||||
410,1340,1605,1000,53400
|
|
||||||
411,1410,1704,1130,59760
|
|
||||||
412,1432,1719,990,49540
|
|
||||||
413,1891,2280,360,51560
|
|
||||||
414,1322,1583,720,49510
|
|
||||||
415,1378,1670,950,58610
|
|
||||||
416,1462,1732,840,68260
|
|
||||||
417,1440,1733,1120,65310
|
|
||||||
418,1421,1724,920,52090
|
|
||||||
419,1280,1530,1240,43860
|
|
||||||
420,1431,1710,840,74170
|
|
||||||
421,1161,1404,430,58380
|
|
||||||
422,1175,1405,810,91200
|
|
||||||
423,1395,1662,920,90940
|
|
||||||
424,1443,1755,880,49330
|
|
||||||
425,1247,1523,1350,53500
|
|
||||||
426,1788,2133,1000,54590
|
|
||||||
427,1138,1375,1220,57450
|
|
||||||
428,1709,2042,430,33240
|
|
||||||
429,1777,2145,520,80790
|
|
||||||
430,1612,1918,580,61000
|
|
||||||
431,1618,1943,460,47620
|
|
||||||
432,1311,1571,470,72090
|
|
||||||
433,1365,1638,680,102920
|
|
||||||
434,1249,1503,950,61970
|
|
||||||
435,1373,1635,840,61040
|
|
||||||
436,1536,1836,990,52060
|
|
||||||
437,1744,2101,520,69570
|
|
||||||
438,1513,1820,520,66020
|
|
||||||
439,1297,1561,1070,40000
|
|
||||||
440,1908,2304,990,79500
|
|
||||||
441,1721,2076,710,76300
|
|
||||||
442,1243,1491,430,69030
|
|
||||||
443,1472,1766,1290,57140
|
|
||||||
444,1307,1570,1080,41710
|
|
||||||
445,1628,1959,890,71480
|
|
||||||
446,1556,1869,1000,33010
|
|
||||||
447,1179,1428,1290,74570
|
|
||||||
448,1768,2123,860,49590
|
|
||||||
449,1378,1656,1010,73170
|
|
||||||
450,1685,2015,1160,79220
|
|
||||||
451,1474,1759,880,75880
|
|
||||||
452,1794,2137,670,67610
|
|
||||||
453,1086,1291,930,69090
|
|
||||||
454,1808,2154,990,35220
|
|
||||||
455,1501,1790,450,53940
|
|
||||||
456,1353,1598,690,56660
|
|
||||||
457,1455,1757,650,67520
|
|
||||||
458,1165,1411,1020,38620
|
|
||||||
459,1332,1610,880,38890
|
|
||||||
460,1396,1668,760,79270
|
|
||||||
461,1513,1821,690,42880
|
|
||||||
462,1618,1940,630,44240
|
|
||||||
463,1845,2233,370,43190
|
|
||||||
464,1172,1411,460,74550
|
|
||||||
465,1436,1737,770,57090
|
|
||||||
466,1738,2065,750,56480
|
|
||||||
467,2229,2667,660,87410
|
|
||||||
468,1490,1783,730,81370
|
|
||||||
469,1060,1279,670,97360
|
|
||||||
470,2015,2436,680,77960
|
|
||||||
471,1611,1919,960,71240
|
|
||||||
472,1187,1417,1230,58940
|
|
||||||
473,1430,1690,800,78950
|
|
||||||
474,1543,1840,450,36380
|
|
||||||
475,1836,2195,940,45160
|
|
||||||
476,1463,1764,1060,69050
|
|
||||||
477,1213,1462,560,56830
|
|
||||||
478,1244,1480,860,93530
|
|
||||||
479,1745,2108,730,46920
|
|
||||||
480,933,1121,940,55990
|
|
||||||
481,1764,2132,920,40840
|
|
||||||
482,1675,2002,1050,64990
|
|
||||||
483,1688,2046,380,53550
|
|
||||||
484,1842,2204,930,51320
|
|
||||||
485,1316,1597,980,36560
|
|
||||||
486,1440,1719,580,66050
|
|
||||||
487,1760,2111,680,52400
|
|
||||||
488,1323,1571,850,27970
|
|
||||||
489,1230,1466,730,67100
|
|
||||||
490,1540,1838,570,43710
|
|
||||||
491,1167,1388,620,38600
|
|
||||||
492,1429,1695,890,53890
|
|
||||||
493,1491,1770,800,52610
|
|
||||||
494,1313,1574,920,43130
|
|
||||||
495,1609,1943,920,40300
|
|
||||||
496,1109,1342,760,49750
|
|
||||||
497,1207,1440,500,43840
|
|
||||||
498,902,1087,680,56820
|
|
||||||
499,1191,1422,770,36350
|
|
||||||
500,1335,1601,460,50820
|
|
||||||
501,1382,1660,1070,83720
|
|
||||||
502,1588,1906,450,46970
|
|
||||||
503,1918,2284,310,78020
|
|
||||||
504,1484,1774,880,45080
|
|
||||||
505,1334,1607,370,55160
|
|
||||||
506,1556,1846,760,72020
|
|
||||||
507,1784,2142,950,64010
|
|
||||||
508,1244,1477,890,27840
|
|
||||||
509,1496,1787,800,58070
|
|
||||||
510,1719,2058,700,51760
|
|
||||||
511,1678,2022,1050,66050
|
|
||||||
512,1247,1490,330,65750
|
|
||||||
513,1191,1421,980,65820
|
|
||||||
514,1832,2208,1090,46760
|
|
||||||
515,1271,1523,1140,50940
|
|
||||||
516,1735,2084,820,56440
|
|
||||||
517,1627,1948,890,32610
|
|
||||||
518,1351,1616,650,62770
|
|
||||||
519,1520,1817,850,63600
|
|
||||||
520,1490,1788,360,45840
|
|
||||||
521,1777,2117,780,38280
|
|
||||||
522,1688,2037,590,50960
|
|
||||||
523,1537,1836,670,39480
|
|
||||||
524,1622,1968,340,69610
|
|
||||||
525,1148,1384,730,47800
|
|
||||||
526,1001,1194,1210,44890
|
|
||||||
527,1857,2236,1280,67420
|
|
||||||
528,1552,1869,710,78870
|
|
||||||
529,1700,2064,940,70310
|
|
||||||
530,1554,1844,670,38530
|
|
||||||
531,1482,1777,800,77570
|
|
||||||
532,1275,1517,790,59920
|
|
||||||
533,1642,1981,720,54450
|
|
||||||
534,1381,1633,1270,50250
|
|
||||||
535,1381,1634,930,30790
|
|
||||||
536,1057,1262,1490,35420
|
|
||||||
537,1192,1445,810,43470
|
|
||||||
538,1601,1920,600,61000
|
|
||||||
539,1622,1968,210,64780
|
|
||||||
540,1607,1909,460,39030
|
|
||||||
541,2214,2647,740,65900
|
|
||||||
542,1633,1936,1320,46050
|
|
||||||
543,1546,1845,760,59070
|
|
||||||
544,1475,1753,920,44670
|
|
||||||
545,1270,1519,920,58390
|
|
||||||
546,1185,1420,880,80370
|
|
||||||
547,1614,1938,1110,53230
|
|
||||||
548,1141,1353,1370,72000
|
|
||||||
549,1244,1481,410,84040
|
|
||||||
550,869,1050,850,52540
|
|
||||||
551,2049,2465,720,63510
|
|
||||||
552,1883,2262,570,42240
|
|
||||||
553,1526,1842,690,39580
|
|
||||||
554,1165,1390,1220,54610
|
|
||||||
555,1832,2185,840,87330
|
|
||||||
556,1723,2072,560,88410
|
|
||||||
557,932,1138,820,89760
|
|
||||||
558,1137,1374,700,101780
|
|
||||||
559,1231,1472,810,70290
|
|
||||||
560,1237,1512,1070,88210
|
|
||||||
561,1371,1650,540,87160
|
|
||||||
562,1767,2158,530,41540
|
|
||||||
563,1748,2092,580,49170
|
|
||||||
564,1212,1440,500,63950
|
|
||||||
565,1466,1743,1200,70810
|
|
||||||
566,1152,1386,980,49590
|
|
||||||
567,1439,1703,1000,67290
|
|
||||||
568,2026,2400,720,51240
|
|
||||||
569,1772,2146,1030,48540
|
|
||||||
570,1511,1822,420,72410
|
|
||||||
571,1199,1461,1070,54370
|
|
||||||
572,1834,2184,830,94460
|
|
||||||
573,1143,1375,940,85160
|
|
||||||
574,1494,1794,550,52130
|
|
||||||
575,1770,2131,1140,54650
|
|
||||||
576,1455,1747,750,69320
|
|
||||||
577,1141,1372,620,51480
|
|
||||||
578,1586,1886,660,50060
|
|
||||||
579,1701,2034,660,62180
|
|
||||||
580,1860,2246,410,79780
|
|
||||||
581,1167,1406,440,42860
|
|
||||||
582,1424,1716,630,54410
|
|
||||||
583,1710,2053,730,69390
|
|
||||||
584,1408,1708,220,42810
|
|
||||||
585,1517,1831,610,30840
|
|
||||||
586,1227,1476,720,56260
|
|
||||||
587,1609,1930,740,76470
|
|
||||||
588,1553,1831,740,35680
|
|
||||||
589,1814,2174,770,90070
|
|
||||||
590,1240,1493,590,33120
|
|
||||||
591,1206,1437,1330,54060
|
|
||||||
592,1847,2186,910,75120
|
|
||||||
593,1009,1202,330,41600
|
|
||||||
594,1624,1946,870,20270
|
|
||||||
595,1612,1931,790,60060
|
|
||||||
596,1498,1805,1270,82270
|
|
||||||
597,946,1125,590,29170
|
|
||||||
598,1563,1872,1080,68420
|
|
||||||
599,1664,2016,830,59130
|
|
||||||
600,1619,1947,910,74330
|
|
||||||
601,1433,1722,830,77080
|
|
||||||
602,1241,1489,1380,76250
|
|
||||||
603,1429,1720,1180,59540
|
|
||||||
604,1241,1488,770,54690
|
|
||||||
605,1078,1306,680,84360
|
|
||||||
606,1690,2065,910,51420
|
|
||||||
607,1289,1536,540,65120
|
|
||||||
608,1581,1894,760,49380
|
|
||||||
609,1608,1945,760,37830
|
|
||||||
610,1344,1608,730,35980
|
|
||||||
611,1513,1804,430,69190
|
|
||||||
612,1529,1839,1000,50590
|
|
||||||
613,1677,2014,660,60800
|
|
||||||
614,1015,1229,930,31180
|
|
||||||
615,1438,1751,760,77790
|
|
||||||
616,1426,1718,370,47570
|
|
||||||
617,1412,1701,630,69130
|
|
||||||
618,1622,1944,360,75970
|
|
||||||
619,1503,1791,630,68350
|
|
||||||
620,1501,1789,670,41680
|
|
||||||
621,1971,2342,690,86560
|
|
||||||
622,1383,1687,830,81390
|
|
||||||
623,1371,1635,720,50730
|
|
||||||
624,1508,1823,520,71290
|
|
||||||
625,1057,1284,750,70110
|
|
||||||
626,1411,1680,1070,61590
|
|
||||||
627,1466,1746,590,69370
|
|
||||||
628,1545,1888,600,67110
|
|
||||||
629,2044,2408,380,82020
|
|
||||||
630,1887,2264,830,62050
|
|
||||||
631,1505,1836,940,61730
|
|
||||||
632,1422,1722,560,58660
|
|
||||||
633,1564,1869,1030,53370
|
|
||||||
634,1510,1810,730,39700
|
|
||||||
635,1568,1920,890,53750
|
|
||||||
636,1933,2338,1140,44730
|
|
||||||
637,1501,1822,590,49350
|
|
||||||
638,1593,1911,580,43340
|
|
||||||
639,1812,2189,310,78090
|
|
||||||
640,1580,1895,720,54950
|
|
||||||
641,1440,1749,490,75530
|
|
||||||
642,1100,1331,1010,57330
|
|
||||||
643,1534,1841,680,87930
|
|
||||||
644,1299,1555,1020,56850
|
|
||||||
645,1767,2121,1050,78430
|
|
||||||
646,1368,1649,740,63660
|
|
||||||
647,1393,1670,410,62960
|
|
||||||
648,1327,1590,770,81870
|
|
||||||
649,1514,1794,1400,54820
|
|
||||||
650,1989,2414,860,116320
|
|
||||||
651,1334,1584,840,57200
|
|
||||||
652,1533,1817,950,84360
|
|
||||||
653,1809,2145,940,36530
|
|
||||||
654,1607,1933,930,81260
|
|
||||||
655,1165,1387,1060,82350
|
|
||||||
656,1193,1430,560,80830
|
|
||||||
657,1709,2065,670,30610
|
|
||||||
658,1525,1839,540,51310
|
|
||||||
659,1348,1623,1010,72940
|
|
||||||
660,1132,1366,1340,52450
|
|
||||||
661,1667,2020,980,66070
|
|
||||||
662,1427,1720,630,43190
|
|
||||||
663,1211,1447,1110,40730
|
|
||||||
664,1717,2048,700,78530
|
|
||||||
665,1766,2111,580,94690
|
|
||||||
666,1086,1299,1050,44400
|
|
||||||
667,1410,1692,790,73800
|
|
||||||
668,1476,1760,600,37390
|
|
||||||
669,1068,1278,440,64120
|
|
||||||
670,1485,1785,1340,66160
|
|
||||||
671,1461,1739,1250,22310
|
|
||||||
672,1685,2010,990,62380
|
|
||||||
673,1624,1958,290,63850
|
|
||||||
674,1658,2000,350,36210
|
|
||||||
675,1427,1677,210,54590
|
|
||||||
676,1755,2072,810,69610
|
|
||||||
677,1211,1472,790,65390
|
|
||||||
678,1591,1896,780,78130
|
|
||||||
679,1797,2126,730,55710
|
|
||||||
680,1519,1823,1040,69210
|
|
||||||
681,1637,1958,760,59940
|
|
||||||
682,1451,1750,570,72550
|
|
||||||
683,1203,1446,620,44260
|
|
||||||
684,1884,2262,310,56910
|
|
||||||
685,1540,1820,310,82390
|
|
||||||
686,1121,1332,790,54590
|
|
||||||
687,1307,1562,490,69990
|
|
||||||
688,1475,1775,230,72740
|
|
||||||
689,1160,1401,900,35360
|
|
||||||
690,1078,1276,640,94370
|
|
||||||
691,1191,1436,840,43520
|
|
||||||
692,1317,1569,780,36000
|
|
||||||
693,1548,1858,480,99480
|
|
||||||
694,1560,1883,1040,83220
|
|
||||||
695,1297,1529,870,52940
|
|
||||||
696,1645,1958,530,93360
|
|
||||||
697,1225,1455,750,73590
|
|
||||||
698,1421,1704,840,53840
|
|
||||||
699,1655,1956,800,47350
|
|
||||||
700,1615,1928,660,65080
|
|
||||||
701,1872,2262,560,62050
|
|
||||||
702,1317,1581,910,30020
|
|
||||||
703,1434,1729,480,49510
|
|
||||||
704,1791,2167,700,64320
|
|
||||||
705,932,1120,660,35590
|
|
||||||
706,1609,1924,1170,63050
|
|
||||||
707,1495,1793,1020,65300
|
|
||||||
708,1769,2153,580,69560
|
|
||||||
709,1693,2032,610,41910
|
|
||||||
710,1247,1497,590,28330
|
|
||||||
711,1502,1815,190,55980
|
|
||||||
712,1360,1612,490,61080
|
|
||||||
713,1542,1844,680,51380
|
|
||||||
714,1631,1947,670,84410
|
|
||||||
715,1246,1482,1070,60680
|
|
||||||
716,1990,2384,1110,64690
|
|
||||||
717,967,1154,560,45780
|
|
||||||
718,1582,1894,1100,41800
|
|
||||||
719,1430,1743,970,53230
|
|
||||||
720,1827,2160,930,36160
|
|
||||||
721,1118,1338,1040,40450
|
|
||||||
722,1766,2109,1120,57910
|
|
||||||
723,1799,2173,910,36280
|
|
||||||
724,1167,1411,440,39190
|
|
||||||
725,1493,1795,530,62380
|
|
||||||
726,1445,1734,900,21470
|
|
||||||
727,1033,1237,740,34610
|
|
||||||
728,1440,1711,1020,88120
|
|
||||||
729,1487,1773,970,59190
|
|
||||||
730,1854,2205,890,36290
|
|
||||||
731,1748,2086,550,53760
|
|
||||||
732,1937,2310,520,66300
|
|
||||||
733,1641,1999,950,93000
|
|
||||||
734,1659,1999,650,65660
|
|
||||||
735,1743,2061,860,81930
|
|
||||||
736,1449,1733,320,60060
|
|
||||||
737,1098,1309,860,59530
|
|
||||||
738,1121,1351,900,46380
|
|
||||||
739,1526,1858,550,76200
|
|
||||||
740,1358,1645,770,56860
|
|
||||||
741,1336,1616,710,86620
|
|
||||||
742,1502,1802,840,49730
|
|
||||||
743,1534,1858,860,88370
|
|
||||||
744,1418,1699,870,49160
|
|
||||||
745,854,1018,660,77740
|
|
||||||
746,1450,1728,930,38560
|
|
||||||
747,1474,1776,1020,51990
|
|
||||||
748,1524,1819,1190,39970
|
|
||||||
749,1361,1638,1140,46040
|
|
||||||
750,1398,1683,490,49500
|
|
||||||
751,1085,1308,1170,76670
|
|
||||||
752,1660,1979,480,75800
|
|
||||||
753,1648,2017,930,81720
|
|
||||||
754,1453,1749,890,58440
|
|
||||||
755,1323,1591,680,85720
|
|
||||||
756,1385,1643,740,70940
|
|
||||||
757,1250,1506,990,62420
|
|
||||||
758,1389,1683,680,56880
|
|
||||||
759,1486,1758,820,101820
|
|
||||||
760,1655,1993,440,86890
|
|
||||||
761,1645,1963,900,47300
|
|
||||||
762,1464,1771,1080,31270
|
|
||||||
763,1197,1428,830,65410
|
|
||||||
764,1878,2264,310,54200
|
|
||||||
765,1150,1378,730,67390
|
|
||||||
766,1562,1881,740,54530
|
|
||||||
767,1596,1939,960,79760
|
|
||||||
768,1119,1345,790,78060
|
|
||||||
769,1116,1347,700,74080
|
|
||||||
770,1934,2349,750,52990
|
|
||||||
771,1299,1540,590,70580
|
|
||||||
772,1417,1689,570,34310
|
|
||||||
773,1235,1503,660,74160
|
|
||||||
774,1497,1815,700,59190
|
|
||||||
775,1430,1704,1070,43370
|
|
||||||
776,1537,1877,660,17670
|
|
||||||
777,1444,1742,840,56710
|
|
||||||
778,1477,1798,850,59820
|
|
||||||
779,1041,1246,600,36190
|
|
||||||
780,1226,1472,710,60440
|
|
||||||
781,1489,1783,450,75300
|
|
||||||
782,1549,1871,740,74080
|
|
||||||
783,1073,1280,1240,60440
|
|
||||||
784,1473,1785,570,80720
|
|
||||||
785,2013,2396,580,47060
|
|
||||||
786,1975,2368,450,86830
|
|
||||||
787,1561,1877,790,56790
|
|
||||||
788,1427,1723,1040,67090
|
|
||||||
789,1441,1747,670,44370
|
|
||||||
790,1275,1548,370,82970
|
|
||||||
791,1574,1876,620,56230
|
|
||||||
792,1511,1791,1010,53760
|
|
||||||
793,1428,1713,550,55390
|
|
||||||
794,1388,1672,800,73500
|
|
||||||
795,1057,1280,610,41050
|
|
||||||
796,1440,1747,1090,67320
|
|
||||||
797,1349,1610,700,65890
|
|
||||||
798,1536,1808,830,56380
|
|
||||||
799,2019,2420,850,85670
|
|
||||||
800,1236,1508,1260,70830
|
|
||||||
801,1436,1715,1030,48180
|
|
||||||
802,1862,2248,1160,51910
|
|
||||||
803,1200,1442,880,44320
|
|
||||||
804,1360,1650,420,58940
|
|
||||||
805,1722,2078,770,73610
|
|
||||||
806,1577,1902,910,54060
|
|
||||||
807,1850,2214,1110,85000
|
|
||||||
808,1447,1730,510,49030
|
|
||||||
809,1496,1800,780,63300
|
|
||||||
810,1679,2008,790,84300
|
|
||||||
811,994,1194,1090,81390
|
|
||||||
812,1354,1635,1270,95900
|
|
||||||
813,1597,1918,1260,71830
|
|
||||||
814,1873,2252,330,79310
|
|
||||||
815,1218,1459,540,87890
|
|
||||||
816,1458,1746,720,48610
|
|
||||||
817,1546,1860,670,73160
|
|
||||||
818,1608,1962,770,36280
|
|
||||||
819,1822,2160,860,49720
|
|
||||||
820,1716,2038,410,44400
|
|
||||||
821,1072,1296,900,47590
|
|
||||||
822,1330,1604,480,51460
|
|
||||||
823,1588,1892,540,57750
|
|
||||||
824,1425,1733,760,66000
|
|
||||||
825,1778,2133,280,45950
|
|
||||||
826,1363,1630,1120,53900
|
|
||||||
827,1609,1928,160,37920
|
|
||||||
828,1671,2024,620,63100
|
|
||||||
829,1379,1636,440,36770
|
|
||||||
830,1218,1452,870,43910
|
|
||||||
831,1724,2101,900,66390
|
|
||||||
832,986,1179,710,59160
|
|
||||||
833,1330,1606,590,38510
|
|
||||||
834,1437,1725,910,46220
|
|
||||||
835,1327,1609,1320,41500
|
|
||||||
836,1651,2009,1000,58160
|
|
||||||
837,1211,1462,670,38530
|
|
||||||
838,1916,2277,610,55880
|
|
||||||
839,1638,1937,1050,70940
|
|
||||||
840,1172,1413,480,53940
|
|
||||||
841,1350,1606,770,43030
|
|
||||||
842,1528,1843,340,59820
|
|
||||||
843,1305,1557,580,55500
|
|
||||||
844,1463,1751,900,49990
|
|
||||||
845,1409,1727,700,42980
|
|
||||||
846,1419,1743,860,65970
|
|
||||||
847,1535,1819,540,59290
|
|
||||||
848,1474,1745,970,63020
|
|
||||||
849,919,1099,1560,73810
|
|
||||||
850,2067,2492,790,70230
|
|
||||||
851,1977,2362,1020,59950
|
|
||||||
852,1293,1558,790,78100
|
|
||||||
853,1477,1790,880,16370
|
|
||||||
854,1582,1906,550,92640
|
|
||||||
855,1481,1789,550,63540
|
|
||||||
856,1214,1455,950,87220
|
|
||||||
857,1206,1460,810,41990
|
|
||||||
858,1653,1982,390,79410
|
|
||||||
859,1152,1393,860,54380
|
|
||||||
860,1458,1757,850,58600
|
|
||||||
861,1249,1510,660,48950
|
|
||||||
862,1939,2333,830,40670
|
|
||||||
863,1591,1919,640,52340
|
|
||||||
864,1180,1397,750,39140
|
|
||||||
865,1846,2195,1170,41090
|
|
||||||
866,780,951,790,25600
|
|
||||||
867,1565,1854,900,100900
|
|
||||||
868,1648,1959,370,77080
|
|
||||||
869,1775,2104,980,105150
|
|
||||||
870,1439,1732,1170,80580
|
|
||||||
871,1487,1776,800,46230
|
|
||||||
872,1800,2158,1100,98260
|
|
||||||
873,1690,2024,1070,75930
|
|
||||||
874,1209,1452,830,52050
|
|
||||||
875,1859,2222,1210,87000
|
|
||||||
876,1691,2023,540,60270
|
|
||||||
877,1259,1493,100,88270
|
|
||||||
878,1771,2138,820,57820
|
|
||||||
879,1205,1468,1210,61210
|
|
||||||
880,1792,2131,810,76420
|
|
||||||
881,1263,1516,780,70980
|
|
||||||
882,1344,1605,1160,76740
|
|
||||||
883,1819,2187,590,47920
|
|
||||||
884,1357,1625,1140,52160
|
|
||||||
885,1396,1673,690,32740
|
|
||||||
886,1118,1337,560,72270
|
|
||||||
887,1655,1986,1150,77430
|
|
||||||
888,1156,1398,140,92370
|
|
||||||
889,1451,1734,670,34880
|
|
||||||
890,1539,1829,650,46580
|
|
||||||
891,1549,1851,1220,70620
|
|
||||||
892,1582,1910,1080,66390
|
|
||||||
893,1387,1663,850,82080
|
|
||||||
894,1200,1436,1060,76440
|
|
||||||
895,1299,1560,770,96610
|
|
||||||
896,1174,1429,1110,54340
|
|
|
File diff suppressed because it is too large
Load Diff
599
lab_2/lab2.ipynb
599
lab_2/lab2.ipynb
@ -1,599 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"**Lab2 Pibd-31 Malafeev**"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"*Загрузка трёх других датасетов(не своего варианта)*"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 97,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
|
||||||
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
|
||||||
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Далее будут выполнены в Markdown пукнты лабораторной 2-8 с пометкой каждого пункта."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"2.Проблемной областью явлются: датасет stores.csv - магазины, kc_house_data.csv - датасет продажи домов и Forber Billionares.csv - датасет миллионеров."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"3.Объектами наблюдениями явлются магазины, дома и миллионеры. Связи между объектами нет, единственная схожесть - магазин и дом являются зданиями."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"4.Датасет миллионеров нужны например для сайта forbes - чтобы составить тир лист. В целом, другие датасеты тоже подходят для составления тир листа)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"5.Технический проект - тир лист, на входе датасет"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"6.Пункт будем выполнять в коде, оставлю к каждому комменты:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 98,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"id 0\n",
|
|
||||||
"date 0\n",
|
|
||||||
"price 0\n",
|
|
||||||
"bedrooms 0\n",
|
|
||||||
"bathrooms 0\n",
|
|
||||||
"sqft_living 0\n",
|
|
||||||
"sqft_lot 0\n",
|
|
||||||
"floors 0\n",
|
|
||||||
"waterfront 0\n",
|
|
||||||
"view 0\n",
|
|
||||||
"condition 0\n",
|
|
||||||
"grade 0\n",
|
|
||||||
"sqft_above 0\n",
|
|
||||||
"sqft_basement 0\n",
|
|
||||||
"yr_built 0\n",
|
|
||||||
"yr_renovated 0\n",
|
|
||||||
"zipcode 0\n",
|
|
||||||
"lat 0\n",
|
|
||||||
"long 0\n",
|
|
||||||
"sqft_living15 0\n",
|
|
||||||
"sqft_lot15 0\n",
|
|
||||||
"dtype: int64\n",
|
|
||||||
"Store ID 0\n",
|
|
||||||
"Store_Area 0\n",
|
|
||||||
"Items_Available 0\n",
|
|
||||||
"Daily_Customer_Count 0\n",
|
|
||||||
"Store_Sales 0\n",
|
|
||||||
"dtype: int64\n",
|
|
||||||
"Rank 0\n",
|
|
||||||
"Name 0\n",
|
|
||||||
"Networth 0\n",
|
|
||||||
"Age 0\n",
|
|
||||||
"Country 0\n",
|
|
||||||
"Source 0\n",
|
|
||||||
"Industry 0\n",
|
|
||||||
"dtype: int64\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Проверка на пропущенные значения\n",
|
|
||||||
"print(df.isnull().sum())\n",
|
|
||||||
"print(df2.isnull().sum())\n",
|
|
||||||
"print(df3.isnull().sum())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 99,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
" id price bedrooms bathrooms sqft_living \\\n",
|
|
||||||
"count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
|
|
||||||
"mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
|
|
||||||
"std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
|
|
||||||
"min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
|
|
||||||
"25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
|
|
||||||
"50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
|
|
||||||
"75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
|
|
||||||
"max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
|
|
||||||
"\n",
|
|
||||||
" sqft_lot floors waterfront view condition \\\n",
|
|
||||||
"count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|
||||||
"mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
|
|
||||||
"std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
|
|
||||||
"min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
|
|
||||||
"25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
|
|
||||||
"50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
|
|
||||||
"75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
|
|
||||||
"max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
|
|
||||||
"\n",
|
|
||||||
" grade sqft_above sqft_basement yr_built yr_renovated \\\n",
|
|
||||||
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|
||||||
"mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
|
|
||||||
"std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
|
|
||||||
"min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
|
|
||||||
"25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
|
|
||||||
"50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
|
|
||||||
"75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
|
|
||||||
"max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
|
|
||||||
"\n",
|
|
||||||
" zipcode lat long sqft_living15 sqft_lot15 \n",
|
|
||||||
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|
||||||
"mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
|
|
||||||
"std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
|
|
||||||
"min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
|
|
||||||
"25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
|
|
||||||
"50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
|
|
||||||
"75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
|
|
||||||
"max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 \n",
|
|
||||||
" Store ID Store_Area Items_Available Daily_Customer_Count \\\n",
|
|
||||||
"count 896.000000 896.000000 896.000000 896.000000 \n",
|
|
||||||
"mean 448.500000 1485.409598 1782.035714 786.350446 \n",
|
|
||||||
"std 258.797218 250.237011 299.872053 265.389281 \n",
|
|
||||||
"min 1.000000 775.000000 932.000000 10.000000 \n",
|
|
||||||
"25% 224.750000 1316.750000 1575.500000 600.000000 \n",
|
|
||||||
"50% 448.500000 1477.000000 1773.500000 780.000000 \n",
|
|
||||||
"75% 672.250000 1653.500000 1982.750000 970.000000 \n",
|
|
||||||
"max 896.000000 2229.000000 2667.000000 1560.000000 \n",
|
|
||||||
"\n",
|
|
||||||
" Store_Sales \n",
|
|
||||||
"count 896.000000 \n",
|
|
||||||
"mean 59351.305804 \n",
|
|
||||||
"std 17190.741895 \n",
|
|
||||||
"min 14920.000000 \n",
|
|
||||||
"25% 46530.000000 \n",
|
|
||||||
"50% 58605.000000 \n",
|
|
||||||
"75% 71872.500000 \n",
|
|
||||||
"max 116320.000000 \n",
|
|
||||||
" Rank Networth Age\n",
|
|
||||||
"count 2600.000000 2600.000000 2600.000000\n",
|
|
||||||
"mean 1269.570769 4.860750 64.271923\n",
|
|
||||||
"std 728.146364 10.659671 13.220607\n",
|
|
||||||
"min 1.000000 1.000000 19.000000\n",
|
|
||||||
"25% 637.000000 1.500000 55.000000\n",
|
|
||||||
"50% 1292.000000 2.400000 64.000000\n",
|
|
||||||
"75% 1929.000000 4.500000 74.000000\n",
|
|
||||||
"max 2578.000000 219.000000 100.000000\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Статистика по числовым данным для выявления аномальных распределений\n",
|
|
||||||
"print(df.describe())\n",
|
|
||||||
"print(df2.describe())\n",
|
|
||||||
"print(df3.describe())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"7.По перым трём строкам кода, т.е после проверки на пропущенные значения выявлено, что их нет. А дальше я обнаружил аномалию: в датасете миллионеров есть столбец networth - чистое количество денег во всех формах ( в миллиардах ), в этом солбце минимальное значение является единицей, медиана в районе 2.4, а максимальное - 219. В ЭТОМ СТОЛБЦЕ АНОМАЛИЯ"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"8.Наши датасеты довольно информационные. Например у миллионер датасета можно посмотреть фио, сколько денег, что он сделал. Датасет по продаже домов гораздо информационнее, является лидером по наполненности и соответствует реальности. А вот датасет магазинов слабоват, можно например добавить: количество филлиалов, работников, прибыль"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"9.Возьмём датасет магазинов, будем удалять столбцы, где площадь ниже 1500 (по тз надо)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 100,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
" Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales\n",
|
|
||||||
"0 1 1659 1961 530 66490\n",
|
|
||||||
"4 5 1770 2111 450 46620\n",
|
|
||||||
"6 7 1542 1858 1030 72240\n",
|
|
||||||
"11 12 1751 2098 720 57620\n",
|
|
||||||
"12 13 1746 2064 1050 60470\n",
|
|
||||||
".. ... ... ... ... ...\n",
|
|
||||||
"882 883 1819 2187 590 47920\n",
|
|
||||||
"886 887 1655 1986 1150 77430\n",
|
|
||||||
"889 890 1539 1829 650 46580\n",
|
|
||||||
"890 891 1549 1851 1220 70620\n",
|
|
||||||
"891 892 1582 1910 1080 66390\n",
|
|
||||||
"\n",
|
|
||||||
"[415 rows x 5 columns]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df2_filtered = df2[df2['Store_Area'] >= 1500]\n",
|
|
||||||
"print(df2_filtered)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Теперь в датасете магазнов price заменим у всех на константное значение - 1 500 000"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 101,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
" id date price bedrooms bathrooms sqft_living \\\n",
|
|
||||||
"0 7129300520 20141013T000000 1500000 3 1.00 1180 \n",
|
|
||||||
"1 6414100192 20141209T000000 1500000 3 2.25 2570 \n",
|
|
||||||
"2 5631500400 20150225T000000 1500000 2 1.00 770 \n",
|
|
||||||
"3 2487200875 20141209T000000 1500000 4 3.00 1960 \n",
|
|
||||||
"4 1954400510 20150218T000000 1500000 3 2.00 1680 \n",
|
|
||||||
"... ... ... ... ... ... ... \n",
|
|
||||||
"21608 263000018 20140521T000000 1500000 3 2.50 1530 \n",
|
|
||||||
"21609 6600060120 20150223T000000 1500000 4 2.50 2310 \n",
|
|
||||||
"21610 1523300141 20140623T000000 1500000 2 0.75 1020 \n",
|
|
||||||
"21611 291310100 20150116T000000 1500000 3 2.50 1600 \n",
|
|
||||||
"21612 1523300157 20141015T000000 1500000 2 0.75 1020 \n",
|
|
||||||
"\n",
|
|
||||||
" sqft_lot floors waterfront view ... grade sqft_above \\\n",
|
|
||||||
"0 5650 1.0 0 0 ... 7 1180 \n",
|
|
||||||
"1 7242 2.0 0 0 ... 7 2170 \n",
|
|
||||||
"2 10000 1.0 0 0 ... 6 770 \n",
|
|
||||||
"3 5000 1.0 0 0 ... 7 1050 \n",
|
|
||||||
"4 8080 1.0 0 0 ... 8 1680 \n",
|
|
||||||
"... ... ... ... ... ... ... ... \n",
|
|
||||||
"21608 1131 3.0 0 0 ... 8 1530 \n",
|
|
||||||
"21609 5813 2.0 0 0 ... 8 2310 \n",
|
|
||||||
"21610 1350 2.0 0 0 ... 7 1020 \n",
|
|
||||||
"21611 2388 2.0 0 0 ... 8 1600 \n",
|
|
||||||
"21612 1076 2.0 0 0 ... 7 1020 \n",
|
|
||||||
"\n",
|
|
||||||
" sqft_basement yr_built yr_renovated zipcode lat long \\\n",
|
|
||||||
"0 0 1955 0 98178 47.5112 -122.257 \n",
|
|
||||||
"1 400 1951 1991 98125 47.7210 -122.319 \n",
|
|
||||||
"2 0 1933 0 98028 47.7379 -122.233 \n",
|
|
||||||
"3 910 1965 0 98136 47.5208 -122.393 \n",
|
|
||||||
"4 0 1987 0 98074 47.6168 -122.045 \n",
|
|
||||||
"... ... ... ... ... ... ... \n",
|
|
||||||
"21608 0 2009 0 98103 47.6993 -122.346 \n",
|
|
||||||
"21609 0 2014 0 98146 47.5107 -122.362 \n",
|
|
||||||
"21610 0 2009 0 98144 47.5944 -122.299 \n",
|
|
||||||
"21611 0 2004 0 98027 47.5345 -122.069 \n",
|
|
||||||
"21612 0 2008 0 98144 47.5941 -122.299 \n",
|
|
||||||
"\n",
|
|
||||||
" sqft_living15 sqft_lot15 \n",
|
|
||||||
"0 1340 5650 \n",
|
|
||||||
"1 1690 7639 \n",
|
|
||||||
"2 2720 8062 \n",
|
|
||||||
"3 1360 5000 \n",
|
|
||||||
"4 1800 7503 \n",
|
|
||||||
"... ... ... \n",
|
|
||||||
"21608 1530 1509 \n",
|
|
||||||
"21609 1830 7200 \n",
|
|
||||||
"21610 1020 2007 \n",
|
|
||||||
"21611 1410 1287 \n",
|
|
||||||
"21612 1020 1357 \n",
|
|
||||||
"\n",
|
|
||||||
"[21613 rows x 21 columns]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df['price'] = 1500000\n",
|
|
||||||
"print(df)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Теперь у миллионеров в networth подставим среднее по столбцу:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 102,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
" Networth\n",
|
|
||||||
"0 4.86075\n",
|
|
||||||
"1 4.86075\n",
|
|
||||||
"2 4.86075\n",
|
|
||||||
"3 4.86075\n",
|
|
||||||
"4 4.86075\n",
|
|
||||||
"... ...\n",
|
|
||||||
"2595 4.86075\n",
|
|
||||||
"2596 4.86075\n",
|
|
||||||
"2597 4.86075\n",
|
|
||||||
"2598 4.86075\n",
|
|
||||||
"2599 4.86075\n",
|
|
||||||
"\n",
|
|
||||||
"[2600 rows x 1 columns]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"networth_mean = df3['Networth'].mean()\n",
|
|
||||||
"df3['Networth'] = networth_mean\n",
|
|
||||||
"print(df3[['Networth']])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"10.КОД"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 103,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Train df: (15129, 21), Validation df: (3242, 21), Test df: (3242, 21)\n",
|
|
||||||
"Train df2: (627, 5), Validation df2: (134, 5), Test df2: (135, 5)\n",
|
|
||||||
"Train df3: (1820, 7), Validation df3: (390, 7), Test df3: (390, 7)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)\n",
|
|
||||||
"val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42) \n",
|
|
||||||
"\n",
|
|
||||||
"train_df2, temp_df2 = train_test_split(df2, test_size=0.3, random_state=42)\n",
|
|
||||||
"val_df2, test_df2 = train_test_split(temp_df2, test_size=0.5, random_state=42)\n",
|
|
||||||
"\n",
|
|
||||||
"train_df3, temp_df3 = train_test_split(df3, test_size=0.3, random_state=42)\n",
|
|
||||||
"val_df3, test_df3 = train_test_split(temp_df3, test_size=0.5, random_state=42)\n",
|
|
||||||
"print(f\"Train df: {train_df.shape}, Validation df: {val_df.shape}, Test df: {test_df.shape}\")\n",
|
|
||||||
"print(f\"Train df2: {train_df2.shape}, Validation df2: {val_df2.shape}, Test df2: {test_df2.shape}\")\n",
|
|
||||||
"print(f\"Train df3: {train_df3.shape}, Validation df3: {val_df3.shape}, Test df3: {test_df3.shape}\")\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Было сделаное разбиение на три выборки: 70%, 15% и 15%. Подключена была библиотека scikit-learn и функция train_test_split , как сказано в пункте 15. Вполне сбалансированные"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"12.Качаем библиотеку imbalanced-learn, достаём нужные функции и погнали"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 104,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Class distribution after oversampling (df):\n",
|
|
||||||
"price_category\n",
|
|
||||||
"Low 10787\n",
|
|
||||||
"Medium 10787\n",
|
|
||||||
"High 10787\n",
|
|
||||||
"Luxury 10787\n",
|
|
||||||
"Name: count, dtype: int64\n",
|
|
||||||
"Class distribution after undersampling (df):\n",
|
|
||||||
"price_category\n",
|
|
||||||
"Low 1465\n",
|
|
||||||
"Medium 1465\n",
|
|
||||||
"High 1465\n",
|
|
||||||
"Luxury 1465\n",
|
|
||||||
"Name: count, dtype: int64\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from imblearn.over_sampling import RandomOverSampler\n",
|
|
||||||
"from imblearn.under_sampling import RandomUnderSampler\n",
|
|
||||||
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
|
||||||
"df['price_category'] = pd.cut(df['price'], bins=[0, 300000, 600000, 1000000, float('inf')],\n",
|
|
||||||
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
|
||||||
"\n",
|
|
||||||
"y = df['price_category']\n",
|
|
||||||
"X = df.drop(columns=['price', 'price_category'])\n",
|
|
||||||
"\n",
|
|
||||||
"oversampler = RandomOverSampler(random_state=42)\n",
|
|
||||||
"X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
|
|
||||||
"\n",
|
|
||||||
"undersampler = RandomUnderSampler(random_state=42)\n",
|
|
||||||
"X_resampled_under, y_resampled_under = undersampler.fit_resample(X, y)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after oversampling (df):\")\n",
|
|
||||||
"print(pd.Series(y_resampled).value_counts())\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after undersampling (df):\")\n",
|
|
||||||
"print(pd.Series(y_resampled_under).value_counts())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 105,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Class distribution after oversampling (df3):\n",
|
|
||||||
"AGE_category\n",
|
|
||||||
"Young 1401\n",
|
|
||||||
"Middle-aged 1401\n",
|
|
||||||
"Senior 1401\n",
|
|
||||||
"Elderly 1401\n",
|
|
||||||
"Name: count, dtype: int64\n",
|
|
||||||
"Class distribution after undersampling (df3):\n",
|
|
||||||
"AGE_category\n",
|
|
||||||
"Young 15\n",
|
|
||||||
"Middle-aged 15\n",
|
|
||||||
"Senior 15\n",
|
|
||||||
"Elderly 15\n",
|
|
||||||
"Name: count, dtype: int64\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")\n",
|
|
||||||
"\n",
|
|
||||||
"df3['AGE_category'] = pd.cut(df3['Age'], bins=[0, 30, 50, 70, float('inf')],\n",
|
|
||||||
" labels=['Young', 'Middle-aged', 'Senior', 'Elderly'])\n",
|
|
||||||
"\n",
|
|
||||||
"y3 = df3['AGE_category']\n",
|
|
||||||
"X3 = df3.drop(columns=['Age', 'AGE_category'])\n",
|
|
||||||
"\n",
|
|
||||||
"oversampler3 = RandomOverSampler(random_state=42)\n",
|
|
||||||
"X_resampled_3, y_resampled_3 = oversampler3.fit_resample(X3, y3)\n",
|
|
||||||
"\n",
|
|
||||||
"undersampler3 = RandomUnderSampler(random_state=42)\n",
|
|
||||||
"X_resampled_3_under, y_resampled_3_under = undersampler3.fit_resample(X3, y3)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after oversampling (df3):\")\n",
|
|
||||||
"print(pd.Series(y_resampled_3).value_counts())\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after undersampling (df3):\")\n",
|
|
||||||
"print(pd.Series(y_resampled_3_under).value_counts())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 106,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Class distribution after oversampling (df2):\n",
|
|
||||||
"Sales_category\n",
|
|
||||||
"Low 598\n",
|
|
||||||
"Medium 598\n",
|
|
||||||
"High 598\n",
|
|
||||||
"Luxury 0\n",
|
|
||||||
"Name: count, dtype: int64\n",
|
|
||||||
"Class distribution after undersampling (df2):\n",
|
|
||||||
"Sales_category\n",
|
|
||||||
"Low 7\n",
|
|
||||||
"Medium 7\n",
|
|
||||||
"High 7\n",
|
|
||||||
"Luxury 0\n",
|
|
||||||
"Name: count, dtype: int64\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
|
||||||
"\n",
|
|
||||||
"df2['Sales_category'] = pd.cut(df2['Store_Sales'], bins=[0, 50000, 100000, 200000, float('inf')],\n",
|
|
||||||
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
|
||||||
"\n",
|
|
||||||
"y2 = df2['Sales_category']\n",
|
|
||||||
"X2 = df2.drop(columns=['Store_Sales', 'Sales_category'])\n",
|
|
||||||
"\n",
|
|
||||||
"oversampler2 = RandomOverSampler(random_state=42)\n",
|
|
||||||
"X_resampled_2, y_resampled_2 = oversampler2.fit_resample(X2, y2)\n",
|
|
||||||
"\n",
|
|
||||||
"undersampler2 = RandomUnderSampler(random_state=42)\n",
|
|
||||||
"X_resampled_2_under, y_resampled_2_under = undersampler2.fit_resample(X2, y2)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after oversampling (df2):\")\n",
|
|
||||||
"print(pd.Series(y_resampled_2).value_counts())\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Class distribution after undersampling (df2):\")\n",
|
|
||||||
"print(pd.Series(y_resampled_2_under).value_counts())"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "miivenv",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
1000
lab_3/lab3.ipynb
1000
lab_3/lab3.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user