Compare commits
No commits in common. "main" and "lab1" have entirely different histories.
File diff suppressed because it is too large
Load Diff
@ -1,897 +0,0 @@
|
||||
Store ID ,Store_Area,Items_Available,Daily_Customer_Count,Store_Sales
|
||||
1,1659,1961,530,66490
|
||||
2,1461,1752,210,39820
|
||||
3,1340,1609,720,54010
|
||||
4,1451,1748,620,53730
|
||||
5,1770,2111,450,46620
|
||||
6,1442,1733,760,45260
|
||||
7,1542,1858,1030,72240
|
||||
8,1261,1507,1020,37720
|
||||
9,1090,1321,680,46310
|
||||
10,1030,1235,1130,44150
|
||||
11,1187,1439,1090,71280
|
||||
12,1751,2098,720,57620
|
||||
13,1746,2064,1050,60470
|
||||
14,1615,1931,1160,59130
|
||||
15,1469,1756,770,66360
|
||||
16,1644,1950,790,78870
|
||||
17,1578,1907,1440,77250
|
||||
18,1703,2045,670,38170
|
||||
19,1438,1731,1030,63540
|
||||
20,1940,2340,980,40190
|
||||
21,1421,1700,370,43460
|
||||
22,1458,1746,690,68890
|
||||
23,1719,2065,950,52780
|
||||
24,1449,1752,620,50680
|
||||
25,1234,1488,840,41880
|
||||
26,1732,2073,820,70050
|
||||
27,1475,1777,1100,25820
|
||||
28,1390,1648,980,60530
|
||||
29,1642,1943,710,78100
|
||||
30,1715,2071,650,84860
|
||||
31,1439,1746,990,80140
|
||||
32,1250,1508,990,14920
|
||||
33,1331,1608,880,60460
|
||||
34,1784,2163,620,74560
|
||||
35,1375,1648,1020,72430
|
||||
36,1871,2230,700,45460
|
||||
37,1442,1744,610,41570
|
||||
38,1174,1411,1080,62870
|
||||
39,1839,2204,1010,55170
|
||||
40,1270,1516,10,45480
|
||||
41,1435,1725,1250,49550
|
||||
42,965,1152,600,48140
|
||||
43,1665,2001,730,67640
|
||||
44,1780,2117,780,39730
|
||||
45,1009,1194,520,35800
|
||||
46,1227,1471,870,49270
|
||||
47,1769,2087,690,66510
|
||||
48,1660,1982,910,62530
|
||||
49,1472,1776,1260,59980
|
||||
50,1408,1688,1040,76350
|
||||
51,1514,1820,910,81820
|
||||
52,1565,1880,1300,57830
|
||||
53,1074,1288,320,70450
|
||||
54,1864,2240,530,67000
|
||||
55,1570,1898,980,64090
|
||||
56,1417,1701,740,48670
|
||||
57,1734,2060,1240,66210
|
||||
58,1470,1763,1080,83660
|
||||
59,1761,2104,1080,70770
|
||||
60,1756,2070,460,53870
|
||||
61,1704,2045,300,71300
|
||||
62,2011,2391,530,46100
|
||||
63,1472,1748,600,49100
|
||||
64,1310,1561,860,65920
|
||||
65,1544,1821,590,58660
|
||||
66,1707,2052,920,69130
|
||||
67,1881,2262,570,49080
|
||||
68,1416,1681,290,72710
|
||||
69,1631,1941,650,33430
|
||||
70,1318,1576,710,42430
|
||||
71,1692,2019,850,56650
|
||||
72,1152,1380,530,33580
|
||||
73,891,1073,630,67370
|
||||
74,1468,1749,700,71780
|
||||
75,1539,1833,650,84840
|
||||
76,1635,1956,720,82070
|
||||
77,1267,1520,450,26770
|
||||
78,1250,1475,1390,65560
|
||||
79,1720,2044,960,38660
|
||||
80,1462,1761,600,65660
|
||||
81,1431,1711,620,40700
|
||||
82,1539,1858,1020,88910
|
||||
83,1441,1723,330,57860
|
||||
84,1572,1884,1410,42670
|
||||
85,1287,1525,1200,90180
|
||||
86,1468,1760,280,51280
|
||||
87,1931,2342,940,97260
|
||||
88,1252,1506,850,39650
|
||||
89,1238,1468,960,45720
|
||||
90,1479,1758,420,42060
|
||||
91,1590,1912,830,65350
|
||||
92,2169,2617,600,67080
|
||||
93,1838,2205,400,54030
|
||||
94,1385,1655,760,56360
|
||||
95,1921,2305,1470,77120
|
||||
96,1975,2385,500,50810
|
||||
97,1853,2235,1120,60960
|
||||
98,1816,2171,1160,61180
|
||||
99,1785,2147,820,63660
|
||||
100,1579,1899,1140,41190
|
||||
101,1096,1321,900,78420
|
||||
102,1919,2294,760,65580
|
||||
103,1262,1500,1170,89080
|
||||
104,1374,1655,1080,94170
|
||||
105,1309,1587,1000,50950
|
||||
106,1207,1434,690,65180
|
||||
107,1692,2031,810,69310
|
||||
108,1929,2311,630,79210
|
||||
109,1573,1878,650,23740
|
||||
110,1415,1700,920,36330
|
||||
111,1162,1382,1260,51700
|
||||
112,1485,1787,800,62950
|
||||
113,1897,2248,1330,56010
|
||||
114,1607,1927,940,45080
|
||||
115,1909,2287,1210,46830
|
||||
116,1274,1503,660,64750
|
||||
117,1157,1379,770,80780
|
||||
118,1712,2046,460,31180
|
||||
119,1500,1798,860,56710
|
||||
120,1682,2017,780,49390
|
||||
121,1441,1727,890,66000
|
||||
122,1525,1835,900,32770
|
||||
123,1408,1669,530,46580
|
||||
124,1947,2333,790,79780
|
||||
125,1164,1390,370,35510
|
||||
126,1787,2137,610,80970
|
||||
127,1871,2241,500,61150
|
||||
128,1718,2051,750,49210
|
||||
129,1365,1636,980,79950
|
||||
130,1368,1654,530,68740
|
||||
131,1342,1595,910,57480
|
||||
132,1076,1270,620,72630
|
||||
133,1396,1672,1170,50070
|
||||
134,1713,2071,900,40490
|
||||
135,1370,1638,980,51850
|
||||
136,1667,1993,740,42840
|
||||
137,1638,1972,810,60940
|
||||
138,1581,1905,810,62280
|
||||
139,1795,2187,300,76530
|
||||
140,1179,1412,790,85130
|
||||
141,1978,2374,800,48590
|
||||
142,1688,2042,760,73080
|
||||
143,1214,1456,530,48950
|
||||
144,1504,1805,540,48560
|
||||
145,1498,1770,620,59380
|
||||
146,1462,1762,1010,51190
|
||||
147,1442,1750,130,58920
|
||||
148,1250,1486,730,50360
|
||||
149,1229,1480,830,38070
|
||||
150,1936,2300,1060,49170
|
||||
151,1369,1629,770,39740
|
||||
152,1662,1986,70,63730
|
||||
153,1548,1855,670,85330
|
||||
154,1649,1963,490,27410
|
||||
155,1393,1663,670,37320
|
||||
156,1450,1734,380,71120
|
||||
157,1613,1921,1200,72800
|
||||
158,1408,1696,350,34410
|
||||
159,775,932,1090,42530
|
||||
160,1275,1534,1230,54300
|
||||
161,1740,2078,680,50780
|
||||
162,1372,1657,580,45020
|
||||
163,1414,1723,680,69600
|
||||
164,2044,2474,340,80340
|
||||
165,1823,2176,700,37810
|
||||
166,955,1133,580,46140
|
||||
167,1465,1763,680,99570
|
||||
168,1331,1606,630,38650
|
||||
169,1232,1487,860,49800
|
||||
170,1481,1765,490,69910
|
||||
171,1343,1599,870,44910
|
||||
172,1539,1837,990,78470
|
||||
173,1007,1207,670,47460
|
||||
174,1762,2145,490,33460
|
||||
175,1527,1832,580,44090
|
||||
176,1356,1619,700,42620
|
||||
177,1536,1848,670,69450
|
||||
178,1605,1902,390,73120
|
||||
179,1704,2032,590,48300
|
||||
180,1626,1941,1350,58090
|
||||
181,1612,1939,840,74250
|
||||
182,1174,1396,1100,40930
|
||||
183,1923,2339,950,70930
|
||||
184,1702,2053,950,64670
|
||||
185,1398,1692,650,77420
|
||||
186,1437,1717,230,32330
|
||||
187,1524,1796,1060,41080
|
||||
188,1660,1985,1180,42860
|
||||
189,1302,1569,710,68450
|
||||
190,1666,2000,480,39730
|
||||
191,1391,1649,810,83750
|
||||
192,1778,2148,1140,69940
|
||||
193,1462,1770,1070,67710
|
||||
194,1751,2115,790,67360
|
||||
195,1652,1982,690,52460
|
||||
196,1841,2215,610,88760
|
||||
197,1496,1791,1240,67030
|
||||
198,1504,1827,840,78230
|
||||
199,1524,1808,460,62270
|
||||
200,1148,1371,940,49760
|
||||
201,1468,1744,590,73660
|
||||
202,1310,1558,890,72320
|
||||
203,1321,1579,770,68890
|
||||
204,992,1192,900,34180
|
||||
205,1540,1857,1020,58260
|
||||
206,1807,2149,910,38120
|
||||
207,1526,1853,660,49070
|
||||
208,1406,1677,480,61660
|
||||
209,1703,2055,1080,37830
|
||||
210,1575,1872,690,52270
|
||||
211,1309,1572,510,52280
|
||||
212,1488,1807,1030,70810
|
||||
213,1658,1988,370,71530
|
||||
214,1863,2245,640,77260
|
||||
215,1458,1725,750,75550
|
||||
216,1604,1909,370,33730
|
||||
217,1575,1899,840,66270
|
||||
218,1525,1829,840,55820
|
||||
219,1451,1737,890,68430
|
||||
220,1390,1687,620,73990
|
||||
221,1442,1742,310,62800
|
||||
222,1620,1922,550,33740
|
||||
223,1251,1527,380,63830
|
||||
224,1318,1606,1200,24410
|
||||
225,1647,1962,800,70020
|
||||
226,1829,2175,870,92240
|
||||
227,1852,2227,1220,68230
|
||||
228,1699,2053,1080,81870
|
||||
229,1325,1595,540,73860
|
||||
230,1350,1634,880,77120
|
||||
231,1347,1628,120,72350
|
||||
232,1397,1661,1410,49160
|
||||
233,1245,1499,570,45650
|
||||
234,1366,1649,940,52780
|
||||
235,1378,1658,760,90960
|
||||
236,1767,2110,1200,64950
|
||||
237,1184,1434,670,47230
|
||||
238,1257,1505,950,83250
|
||||
239,1863,2247,480,51950
|
||||
240,1881,2244,920,66030
|
||||
241,1329,1609,1150,68590
|
||||
242,1539,1848,750,47140
|
||||
243,1557,1861,370,69940
|
||||
244,2007,2397,610,65890
|
||||
245,1185,1418,1150,89310
|
||||
246,1657,2003,1070,58540
|
||||
247,1294,1539,790,78130
|
||||
248,1296,1559,1070,92300
|
||||
249,1733,2097,730,56170
|
||||
250,1641,1976,620,46050
|
||||
251,1373,1648,530,43390
|
||||
252,1550,1845,700,61750
|
||||
253,1583,1907,680,21830
|
||||
254,1428,1719,1060,39800
|
||||
255,1604,1925,670,54370
|
||||
256,1439,1735,400,62470
|
||||
257,1648,2003,910,82930
|
||||
258,1025,1231,760,63720
|
||||
259,2001,2394,540,79180
|
||||
260,1145,1370,350,38210
|
||||
261,1174,1426,980,25950
|
||||
262,913,1106,860,56610
|
||||
263,1199,1433,1020,73710
|
||||
264,1875,2254,1120,70400
|
||||
265,1153,1397,1020,50440
|
||||
266,1240,1492,940,66840
|
||||
267,1381,1660,970,50170
|
||||
268,1701,2030,830,60140
|
||||
269,1206,1456,920,37130
|
||||
270,1476,1777,660,42890
|
||||
271,1189,1439,780,26220
|
||||
272,1837,2220,340,50840
|
||||
273,1319,1571,1190,25630
|
||||
274,1617,1901,490,60770
|
||||
275,1631,1967,1090,69600
|
||||
276,1517,1805,1040,41740
|
||||
277,1764,2109,1210,50130
|
||||
278,1572,1869,1030,21750
|
||||
279,1855,2197,1170,80490
|
||||
280,1327,1571,730,34020
|
||||
281,1270,1515,720,60240
|
||||
282,1734,2073,500,39460
|
||||
283,1533,1848,1070,56440
|
||||
284,1390,1646,800,46840
|
||||
285,1856,2216,1020,64820
|
||||
286,1000,1215,1070,52520
|
||||
287,1313,1586,420,45940
|
||||
288,1494,1799,510,38970
|
||||
289,1386,1674,1210,58610
|
||||
290,1979,2364,660,30810
|
||||
291,1057,1264,360,47730
|
||||
292,902,1093,1210,64640
|
||||
293,1347,1622,560,44860
|
||||
294,1314,1576,360,55660
|
||||
295,1513,1803,970,57530
|
||||
296,1305,1548,480,75200
|
||||
297,1180,1436,690,37330
|
||||
298,1142,1352,710,35280
|
||||
299,1471,1768,780,70610
|
||||
300,1075,1288,630,49720
|
||||
301,1578,1885,220,68850
|
||||
302,1585,1916,1110,50740
|
||||
303,1391,1648,720,77070
|
||||
304,1577,1892,560,74730
|
||||
305,1092,1314,600,76530
|
||||
306,1375,1681,440,68900
|
||||
307,1523,1813,520,44960
|
||||
308,1373,1654,410,41490
|
||||
309,1550,1871,590,74320
|
||||
310,1614,1946,740,73800
|
||||
311,1566,1889,610,56400
|
||||
312,2019,2396,540,71570
|
||||
313,1494,1806,1450,43640
|
||||
314,1659,2008,620,35120
|
||||
315,1766,2131,340,58670
|
||||
316,1293,1554,970,75800
|
||||
317,1375,1659,1080,76640
|
||||
318,1236,1484,560,31890
|
||||
319,1332,1586,630,61670
|
||||
320,1513,1825,980,75950
|
||||
321,1208,1459,930,41490
|
||||
322,1190,1429,470,66170
|
||||
323,1448,1734,680,37980
|
||||
324,1771,2147,430,62710
|
||||
325,1365,1645,830,60470
|
||||
326,1510,1810,950,35230
|
||||
327,1458,1736,870,48550
|
||||
328,1808,2157,730,56810
|
||||
329,1615,1954,760,41080
|
||||
330,1640,1948,960,51270
|
||||
331,1060,1273,860,57500
|
||||
332,1633,1968,330,81470
|
||||
333,1222,1473,630,49570
|
||||
334,1619,1957,1280,45580
|
||||
335,1624,1973,1440,44660
|
||||
336,1887,2278,570,76240
|
||||
337,1320,1583,540,43720
|
||||
338,1450,1750,480,46700
|
||||
339,1455,1764,390,84690
|
||||
340,966,1172,900,85470
|
||||
341,1922,2290,290,80410
|
||||
342,1678,1999,740,46650
|
||||
343,1638,1952,690,81840
|
||||
344,1145,1375,950,63590
|
||||
345,2004,2390,930,50130
|
||||
346,1954,2378,810,45820
|
||||
347,1577,1879,760,86710
|
||||
348,1766,2138,580,49980
|
||||
349,1362,1634,770,82940
|
||||
350,1886,2228,1530,40350
|
||||
351,1291,1546,420,93950
|
||||
352,1584,1897,1210,47310
|
||||
353,1397,1686,850,21300
|
||||
354,1445,1709,1340,62180
|
||||
355,1433,1707,1160,61460
|
||||
356,1269,1511,500,54360
|
||||
357,1798,2134,820,72050
|
||||
358,1514,1822,670,48090
|
||||
359,1015,1216,460,27310
|
||||
360,1495,1799,950,57160
|
||||
361,1759,2095,980,34190
|
||||
362,1219,1468,850,35600
|
||||
363,1571,1877,580,54670
|
||||
364,1404,1670,620,76730
|
||||
365,1124,1369,650,63540
|
||||
366,1514,1837,1130,36690
|
||||
367,1207,1476,720,87370
|
||||
368,1484,1774,940,59800
|
||||
369,1398,1678,920,48030
|
||||
370,1769,2112,660,96650
|
||||
371,1111,1322,610,65500
|
||||
372,1078,1305,1190,55530
|
||||
373,1876,2254,1340,21650
|
||||
374,1909,2306,820,31940
|
||||
375,1940,2343,1130,84690
|
||||
376,1391,1683,890,68390
|
||||
377,1496,1774,810,75490
|
||||
378,1412,1699,680,39200
|
||||
379,1121,1345,320,85670
|
||||
380,1691,2026,700,60530
|
||||
381,1599,1946,940,78090
|
||||
382,1208,1467,910,50720
|
||||
383,1454,1737,870,23090
|
||||
384,1555,1881,1260,91360
|
||||
385,1554,1852,440,48120
|
||||
386,1491,1800,980,75620
|
||||
387,1415,1697,1200,39420
|
||||
388,1487,1801,890,51130
|
||||
389,1339,1589,1050,33890
|
||||
390,1320,1562,610,87170
|
||||
391,1509,1799,960,38600
|
||||
392,1406,1680,860,60980
|
||||
393,1264,1502,800,79410
|
||||
394,1905,2280,1060,82350
|
||||
395,1209,1464,600,36740
|
||||
396,1546,1829,380,27720
|
||||
397,1689,2044,1140,32260
|
||||
398,1153,1381,730,53270
|
||||
399,2063,2493,810,51480
|
||||
400,1848,2254,1000,59970
|
||||
401,1718,2056,1220,83600
|
||||
402,1480,1773,360,63020
|
||||
403,1439,1724,900,50920
|
||||
404,1576,1896,750,56450
|
||||
405,1948,2374,930,89540
|
||||
406,1613,1893,1180,46030
|
||||
407,896,1059,870,75110
|
||||
408,1625,1943,680,74520
|
||||
409,1303,1587,1310,102310
|
||||
410,1340,1605,1000,53400
|
||||
411,1410,1704,1130,59760
|
||||
412,1432,1719,990,49540
|
||||
413,1891,2280,360,51560
|
||||
414,1322,1583,720,49510
|
||||
415,1378,1670,950,58610
|
||||
416,1462,1732,840,68260
|
||||
417,1440,1733,1120,65310
|
||||
418,1421,1724,920,52090
|
||||
419,1280,1530,1240,43860
|
||||
420,1431,1710,840,74170
|
||||
421,1161,1404,430,58380
|
||||
422,1175,1405,810,91200
|
||||
423,1395,1662,920,90940
|
||||
424,1443,1755,880,49330
|
||||
425,1247,1523,1350,53500
|
||||
426,1788,2133,1000,54590
|
||||
427,1138,1375,1220,57450
|
||||
428,1709,2042,430,33240
|
||||
429,1777,2145,520,80790
|
||||
430,1612,1918,580,61000
|
||||
431,1618,1943,460,47620
|
||||
432,1311,1571,470,72090
|
||||
433,1365,1638,680,102920
|
||||
434,1249,1503,950,61970
|
||||
435,1373,1635,840,61040
|
||||
436,1536,1836,990,52060
|
||||
437,1744,2101,520,69570
|
||||
438,1513,1820,520,66020
|
||||
439,1297,1561,1070,40000
|
||||
440,1908,2304,990,79500
|
||||
441,1721,2076,710,76300
|
||||
442,1243,1491,430,69030
|
||||
443,1472,1766,1290,57140
|
||||
444,1307,1570,1080,41710
|
||||
445,1628,1959,890,71480
|
||||
446,1556,1869,1000,33010
|
||||
447,1179,1428,1290,74570
|
||||
448,1768,2123,860,49590
|
||||
449,1378,1656,1010,73170
|
||||
450,1685,2015,1160,79220
|
||||
451,1474,1759,880,75880
|
||||
452,1794,2137,670,67610
|
||||
453,1086,1291,930,69090
|
||||
454,1808,2154,990,35220
|
||||
455,1501,1790,450,53940
|
||||
456,1353,1598,690,56660
|
||||
457,1455,1757,650,67520
|
||||
458,1165,1411,1020,38620
|
||||
459,1332,1610,880,38890
|
||||
460,1396,1668,760,79270
|
||||
461,1513,1821,690,42880
|
||||
462,1618,1940,630,44240
|
||||
463,1845,2233,370,43190
|
||||
464,1172,1411,460,74550
|
||||
465,1436,1737,770,57090
|
||||
466,1738,2065,750,56480
|
||||
467,2229,2667,660,87410
|
||||
468,1490,1783,730,81370
|
||||
469,1060,1279,670,97360
|
||||
470,2015,2436,680,77960
|
||||
471,1611,1919,960,71240
|
||||
472,1187,1417,1230,58940
|
||||
473,1430,1690,800,78950
|
||||
474,1543,1840,450,36380
|
||||
475,1836,2195,940,45160
|
||||
476,1463,1764,1060,69050
|
||||
477,1213,1462,560,56830
|
||||
478,1244,1480,860,93530
|
||||
479,1745,2108,730,46920
|
||||
480,933,1121,940,55990
|
||||
481,1764,2132,920,40840
|
||||
482,1675,2002,1050,64990
|
||||
483,1688,2046,380,53550
|
||||
484,1842,2204,930,51320
|
||||
485,1316,1597,980,36560
|
||||
486,1440,1719,580,66050
|
||||
487,1760,2111,680,52400
|
||||
488,1323,1571,850,27970
|
||||
489,1230,1466,730,67100
|
||||
490,1540,1838,570,43710
|
||||
491,1167,1388,620,38600
|
||||
492,1429,1695,890,53890
|
||||
493,1491,1770,800,52610
|
||||
494,1313,1574,920,43130
|
||||
495,1609,1943,920,40300
|
||||
496,1109,1342,760,49750
|
||||
497,1207,1440,500,43840
|
||||
498,902,1087,680,56820
|
||||
499,1191,1422,770,36350
|
||||
500,1335,1601,460,50820
|
||||
501,1382,1660,1070,83720
|
||||
502,1588,1906,450,46970
|
||||
503,1918,2284,310,78020
|
||||
504,1484,1774,880,45080
|
||||
505,1334,1607,370,55160
|
||||
506,1556,1846,760,72020
|
||||
507,1784,2142,950,64010
|
||||
508,1244,1477,890,27840
|
||||
509,1496,1787,800,58070
|
||||
510,1719,2058,700,51760
|
||||
511,1678,2022,1050,66050
|
||||
512,1247,1490,330,65750
|
||||
513,1191,1421,980,65820
|
||||
514,1832,2208,1090,46760
|
||||
515,1271,1523,1140,50940
|
||||
516,1735,2084,820,56440
|
||||
517,1627,1948,890,32610
|
||||
518,1351,1616,650,62770
|
||||
519,1520,1817,850,63600
|
||||
520,1490,1788,360,45840
|
||||
521,1777,2117,780,38280
|
||||
522,1688,2037,590,50960
|
||||
523,1537,1836,670,39480
|
||||
524,1622,1968,340,69610
|
||||
525,1148,1384,730,47800
|
||||
526,1001,1194,1210,44890
|
||||
527,1857,2236,1280,67420
|
||||
528,1552,1869,710,78870
|
||||
529,1700,2064,940,70310
|
||||
530,1554,1844,670,38530
|
||||
531,1482,1777,800,77570
|
||||
532,1275,1517,790,59920
|
||||
533,1642,1981,720,54450
|
||||
534,1381,1633,1270,50250
|
||||
535,1381,1634,930,30790
|
||||
536,1057,1262,1490,35420
|
||||
537,1192,1445,810,43470
|
||||
538,1601,1920,600,61000
|
||||
539,1622,1968,210,64780
|
||||
540,1607,1909,460,39030
|
||||
541,2214,2647,740,65900
|
||||
542,1633,1936,1320,46050
|
||||
543,1546,1845,760,59070
|
||||
544,1475,1753,920,44670
|
||||
545,1270,1519,920,58390
|
||||
546,1185,1420,880,80370
|
||||
547,1614,1938,1110,53230
|
||||
548,1141,1353,1370,72000
|
||||
549,1244,1481,410,84040
|
||||
550,869,1050,850,52540
|
||||
551,2049,2465,720,63510
|
||||
552,1883,2262,570,42240
|
||||
553,1526,1842,690,39580
|
||||
554,1165,1390,1220,54610
|
||||
555,1832,2185,840,87330
|
||||
556,1723,2072,560,88410
|
||||
557,932,1138,820,89760
|
||||
558,1137,1374,700,101780
|
||||
559,1231,1472,810,70290
|
||||
560,1237,1512,1070,88210
|
||||
561,1371,1650,540,87160
|
||||
562,1767,2158,530,41540
|
||||
563,1748,2092,580,49170
|
||||
564,1212,1440,500,63950
|
||||
565,1466,1743,1200,70810
|
||||
566,1152,1386,980,49590
|
||||
567,1439,1703,1000,67290
|
||||
568,2026,2400,720,51240
|
||||
569,1772,2146,1030,48540
|
||||
570,1511,1822,420,72410
|
||||
571,1199,1461,1070,54370
|
||||
572,1834,2184,830,94460
|
||||
573,1143,1375,940,85160
|
||||
574,1494,1794,550,52130
|
||||
575,1770,2131,1140,54650
|
||||
576,1455,1747,750,69320
|
||||
577,1141,1372,620,51480
|
||||
578,1586,1886,660,50060
|
||||
579,1701,2034,660,62180
|
||||
580,1860,2246,410,79780
|
||||
581,1167,1406,440,42860
|
||||
582,1424,1716,630,54410
|
||||
583,1710,2053,730,69390
|
||||
584,1408,1708,220,42810
|
||||
585,1517,1831,610,30840
|
||||
586,1227,1476,720,56260
|
||||
587,1609,1930,740,76470
|
||||
588,1553,1831,740,35680
|
||||
589,1814,2174,770,90070
|
||||
590,1240,1493,590,33120
|
||||
591,1206,1437,1330,54060
|
||||
592,1847,2186,910,75120
|
||||
593,1009,1202,330,41600
|
||||
594,1624,1946,870,20270
|
||||
595,1612,1931,790,60060
|
||||
596,1498,1805,1270,82270
|
||||
597,946,1125,590,29170
|
||||
598,1563,1872,1080,68420
|
||||
599,1664,2016,830,59130
|
||||
600,1619,1947,910,74330
|
||||
601,1433,1722,830,77080
|
||||
602,1241,1489,1380,76250
|
||||
603,1429,1720,1180,59540
|
||||
604,1241,1488,770,54690
|
||||
605,1078,1306,680,84360
|
||||
606,1690,2065,910,51420
|
||||
607,1289,1536,540,65120
|
||||
608,1581,1894,760,49380
|
||||
609,1608,1945,760,37830
|
||||
610,1344,1608,730,35980
|
||||
611,1513,1804,430,69190
|
||||
612,1529,1839,1000,50590
|
||||
613,1677,2014,660,60800
|
||||
614,1015,1229,930,31180
|
||||
615,1438,1751,760,77790
|
||||
616,1426,1718,370,47570
|
||||
617,1412,1701,630,69130
|
||||
618,1622,1944,360,75970
|
||||
619,1503,1791,630,68350
|
||||
620,1501,1789,670,41680
|
||||
621,1971,2342,690,86560
|
||||
622,1383,1687,830,81390
|
||||
623,1371,1635,720,50730
|
||||
624,1508,1823,520,71290
|
||||
625,1057,1284,750,70110
|
||||
626,1411,1680,1070,61590
|
||||
627,1466,1746,590,69370
|
||||
628,1545,1888,600,67110
|
||||
629,2044,2408,380,82020
|
||||
630,1887,2264,830,62050
|
||||
631,1505,1836,940,61730
|
||||
632,1422,1722,560,58660
|
||||
633,1564,1869,1030,53370
|
||||
634,1510,1810,730,39700
|
||||
635,1568,1920,890,53750
|
||||
636,1933,2338,1140,44730
|
||||
637,1501,1822,590,49350
|
||||
638,1593,1911,580,43340
|
||||
639,1812,2189,310,78090
|
||||
640,1580,1895,720,54950
|
||||
641,1440,1749,490,75530
|
||||
642,1100,1331,1010,57330
|
||||
643,1534,1841,680,87930
|
||||
644,1299,1555,1020,56850
|
||||
645,1767,2121,1050,78430
|
||||
646,1368,1649,740,63660
|
||||
647,1393,1670,410,62960
|
||||
648,1327,1590,770,81870
|
||||
649,1514,1794,1400,54820
|
||||
650,1989,2414,860,116320
|
||||
651,1334,1584,840,57200
|
||||
652,1533,1817,950,84360
|
||||
653,1809,2145,940,36530
|
||||
654,1607,1933,930,81260
|
||||
655,1165,1387,1060,82350
|
||||
656,1193,1430,560,80830
|
||||
657,1709,2065,670,30610
|
||||
658,1525,1839,540,51310
|
||||
659,1348,1623,1010,72940
|
||||
660,1132,1366,1340,52450
|
||||
661,1667,2020,980,66070
|
||||
662,1427,1720,630,43190
|
||||
663,1211,1447,1110,40730
|
||||
664,1717,2048,700,78530
|
||||
665,1766,2111,580,94690
|
||||
666,1086,1299,1050,44400
|
||||
667,1410,1692,790,73800
|
||||
668,1476,1760,600,37390
|
||||
669,1068,1278,440,64120
|
||||
670,1485,1785,1340,66160
|
||||
671,1461,1739,1250,22310
|
||||
672,1685,2010,990,62380
|
||||
673,1624,1958,290,63850
|
||||
674,1658,2000,350,36210
|
||||
675,1427,1677,210,54590
|
||||
676,1755,2072,810,69610
|
||||
677,1211,1472,790,65390
|
||||
678,1591,1896,780,78130
|
||||
679,1797,2126,730,55710
|
||||
680,1519,1823,1040,69210
|
||||
681,1637,1958,760,59940
|
||||
682,1451,1750,570,72550
|
||||
683,1203,1446,620,44260
|
||||
684,1884,2262,310,56910
|
||||
685,1540,1820,310,82390
|
||||
686,1121,1332,790,54590
|
||||
687,1307,1562,490,69990
|
||||
688,1475,1775,230,72740
|
||||
689,1160,1401,900,35360
|
||||
690,1078,1276,640,94370
|
||||
691,1191,1436,840,43520
|
||||
692,1317,1569,780,36000
|
||||
693,1548,1858,480,99480
|
||||
694,1560,1883,1040,83220
|
||||
695,1297,1529,870,52940
|
||||
696,1645,1958,530,93360
|
||||
697,1225,1455,750,73590
|
||||
698,1421,1704,840,53840
|
||||
699,1655,1956,800,47350
|
||||
700,1615,1928,660,65080
|
||||
701,1872,2262,560,62050
|
||||
702,1317,1581,910,30020
|
||||
703,1434,1729,480,49510
|
||||
704,1791,2167,700,64320
|
||||
705,932,1120,660,35590
|
||||
706,1609,1924,1170,63050
|
||||
707,1495,1793,1020,65300
|
||||
708,1769,2153,580,69560
|
||||
709,1693,2032,610,41910
|
||||
710,1247,1497,590,28330
|
||||
711,1502,1815,190,55980
|
||||
712,1360,1612,490,61080
|
||||
713,1542,1844,680,51380
|
||||
714,1631,1947,670,84410
|
||||
715,1246,1482,1070,60680
|
||||
716,1990,2384,1110,64690
|
||||
717,967,1154,560,45780
|
||||
718,1582,1894,1100,41800
|
||||
719,1430,1743,970,53230
|
||||
720,1827,2160,930,36160
|
||||
721,1118,1338,1040,40450
|
||||
722,1766,2109,1120,57910
|
||||
723,1799,2173,910,36280
|
||||
724,1167,1411,440,39190
|
||||
725,1493,1795,530,62380
|
||||
726,1445,1734,900,21470
|
||||
727,1033,1237,740,34610
|
||||
728,1440,1711,1020,88120
|
||||
729,1487,1773,970,59190
|
||||
730,1854,2205,890,36290
|
||||
731,1748,2086,550,53760
|
||||
732,1937,2310,520,66300
|
||||
733,1641,1999,950,93000
|
||||
734,1659,1999,650,65660
|
||||
735,1743,2061,860,81930
|
||||
736,1449,1733,320,60060
|
||||
737,1098,1309,860,59530
|
||||
738,1121,1351,900,46380
|
||||
739,1526,1858,550,76200
|
||||
740,1358,1645,770,56860
|
||||
741,1336,1616,710,86620
|
||||
742,1502,1802,840,49730
|
||||
743,1534,1858,860,88370
|
||||
744,1418,1699,870,49160
|
||||
745,854,1018,660,77740
|
||||
746,1450,1728,930,38560
|
||||
747,1474,1776,1020,51990
|
||||
748,1524,1819,1190,39970
|
||||
749,1361,1638,1140,46040
|
||||
750,1398,1683,490,49500
|
||||
751,1085,1308,1170,76670
|
||||
752,1660,1979,480,75800
|
||||
753,1648,2017,930,81720
|
||||
754,1453,1749,890,58440
|
||||
755,1323,1591,680,85720
|
||||
756,1385,1643,740,70940
|
||||
757,1250,1506,990,62420
|
||||
758,1389,1683,680,56880
|
||||
759,1486,1758,820,101820
|
||||
760,1655,1993,440,86890
|
||||
761,1645,1963,900,47300
|
||||
762,1464,1771,1080,31270
|
||||
763,1197,1428,830,65410
|
||||
764,1878,2264,310,54200
|
||||
765,1150,1378,730,67390
|
||||
766,1562,1881,740,54530
|
||||
767,1596,1939,960,79760
|
||||
768,1119,1345,790,78060
|
||||
769,1116,1347,700,74080
|
||||
770,1934,2349,750,52990
|
||||
771,1299,1540,590,70580
|
||||
772,1417,1689,570,34310
|
||||
773,1235,1503,660,74160
|
||||
774,1497,1815,700,59190
|
||||
775,1430,1704,1070,43370
|
||||
776,1537,1877,660,17670
|
||||
777,1444,1742,840,56710
|
||||
778,1477,1798,850,59820
|
||||
779,1041,1246,600,36190
|
||||
780,1226,1472,710,60440
|
||||
781,1489,1783,450,75300
|
||||
782,1549,1871,740,74080
|
||||
783,1073,1280,1240,60440
|
||||
784,1473,1785,570,80720
|
||||
785,2013,2396,580,47060
|
||||
786,1975,2368,450,86830
|
||||
787,1561,1877,790,56790
|
||||
788,1427,1723,1040,67090
|
||||
789,1441,1747,670,44370
|
||||
790,1275,1548,370,82970
|
||||
791,1574,1876,620,56230
|
||||
792,1511,1791,1010,53760
|
||||
793,1428,1713,550,55390
|
||||
794,1388,1672,800,73500
|
||||
795,1057,1280,610,41050
|
||||
796,1440,1747,1090,67320
|
||||
797,1349,1610,700,65890
|
||||
798,1536,1808,830,56380
|
||||
799,2019,2420,850,85670
|
||||
800,1236,1508,1260,70830
|
||||
801,1436,1715,1030,48180
|
||||
802,1862,2248,1160,51910
|
||||
803,1200,1442,880,44320
|
||||
804,1360,1650,420,58940
|
||||
805,1722,2078,770,73610
|
||||
806,1577,1902,910,54060
|
||||
807,1850,2214,1110,85000
|
||||
808,1447,1730,510,49030
|
||||
809,1496,1800,780,63300
|
||||
810,1679,2008,790,84300
|
||||
811,994,1194,1090,81390
|
||||
812,1354,1635,1270,95900
|
||||
813,1597,1918,1260,71830
|
||||
814,1873,2252,330,79310
|
||||
815,1218,1459,540,87890
|
||||
816,1458,1746,720,48610
|
||||
817,1546,1860,670,73160
|
||||
818,1608,1962,770,36280
|
||||
819,1822,2160,860,49720
|
||||
820,1716,2038,410,44400
|
||||
821,1072,1296,900,47590
|
||||
822,1330,1604,480,51460
|
||||
823,1588,1892,540,57750
|
||||
824,1425,1733,760,66000
|
||||
825,1778,2133,280,45950
|
||||
826,1363,1630,1120,53900
|
||||
827,1609,1928,160,37920
|
||||
828,1671,2024,620,63100
|
||||
829,1379,1636,440,36770
|
||||
830,1218,1452,870,43910
|
||||
831,1724,2101,900,66390
|
||||
832,986,1179,710,59160
|
||||
833,1330,1606,590,38510
|
||||
834,1437,1725,910,46220
|
||||
835,1327,1609,1320,41500
|
||||
836,1651,2009,1000,58160
|
||||
837,1211,1462,670,38530
|
||||
838,1916,2277,610,55880
|
||||
839,1638,1937,1050,70940
|
||||
840,1172,1413,480,53940
|
||||
841,1350,1606,770,43030
|
||||
842,1528,1843,340,59820
|
||||
843,1305,1557,580,55500
|
||||
844,1463,1751,900,49990
|
||||
845,1409,1727,700,42980
|
||||
846,1419,1743,860,65970
|
||||
847,1535,1819,540,59290
|
||||
848,1474,1745,970,63020
|
||||
849,919,1099,1560,73810
|
||||
850,2067,2492,790,70230
|
||||
851,1977,2362,1020,59950
|
||||
852,1293,1558,790,78100
|
||||
853,1477,1790,880,16370
|
||||
854,1582,1906,550,92640
|
||||
855,1481,1789,550,63540
|
||||
856,1214,1455,950,87220
|
||||
857,1206,1460,810,41990
|
||||
858,1653,1982,390,79410
|
||||
859,1152,1393,860,54380
|
||||
860,1458,1757,850,58600
|
||||
861,1249,1510,660,48950
|
||||
862,1939,2333,830,40670
|
||||
863,1591,1919,640,52340
|
||||
864,1180,1397,750,39140
|
||||
865,1846,2195,1170,41090
|
||||
866,780,951,790,25600
|
||||
867,1565,1854,900,100900
|
||||
868,1648,1959,370,77080
|
||||
869,1775,2104,980,105150
|
||||
870,1439,1732,1170,80580
|
||||
871,1487,1776,800,46230
|
||||
872,1800,2158,1100,98260
|
||||
873,1690,2024,1070,75930
|
||||
874,1209,1452,830,52050
|
||||
875,1859,2222,1210,87000
|
||||
876,1691,2023,540,60270
|
||||
877,1259,1493,100,88270
|
||||
878,1771,2138,820,57820
|
||||
879,1205,1468,1210,61210
|
||||
880,1792,2131,810,76420
|
||||
881,1263,1516,780,70980
|
||||
882,1344,1605,1160,76740
|
||||
883,1819,2187,590,47920
|
||||
884,1357,1625,1140,52160
|
||||
885,1396,1673,690,32740
|
||||
886,1118,1337,560,72270
|
||||
887,1655,1986,1150,77430
|
||||
888,1156,1398,140,92370
|
||||
889,1451,1734,670,34880
|
||||
890,1539,1829,650,46580
|
||||
891,1549,1851,1220,70620
|
||||
892,1582,1910,1080,66390
|
||||
893,1387,1663,850,82080
|
||||
894,1200,1436,1060,76440
|
||||
895,1299,1560,770,96610
|
||||
896,1174,1429,1110,54340
|
|
File diff suppressed because it is too large
Load Diff
599
lab_2/lab2.ipynb
599
lab_2/lab2.ipynb
@ -1,599 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Lab2 Pibd-31 Malafeev**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"*Загрузка трёх других датасетов(не своего варианта)*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
||||
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
||||
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Далее будут выполнены в Markdown пукнты лабораторной 2-8 с пометкой каждого пункта."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2.Проблемной областью явлются: датасет stores.csv - магазины, kc_house_data.csv - датасет продажи домов и Forber Billionares.csv - датасет миллионеров."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3.Объектами наблюдениями явлются магазины, дома и миллионеры. Связи между объектами нет, единственная схожесть - магазин и дом являются зданиями."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"4.Датасет миллионеров нужны например для сайта forbes - чтобы составить тир лист. В целом, другие датасеты тоже подходят для составления тир листа)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"5.Технический проект - тир лист, на входе датасет"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"6.Пункт будем выполнять в коде, оставлю к каждому комменты:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"id 0\n",
|
||||
"date 0\n",
|
||||
"price 0\n",
|
||||
"bedrooms 0\n",
|
||||
"bathrooms 0\n",
|
||||
"sqft_living 0\n",
|
||||
"sqft_lot 0\n",
|
||||
"floors 0\n",
|
||||
"waterfront 0\n",
|
||||
"view 0\n",
|
||||
"condition 0\n",
|
||||
"grade 0\n",
|
||||
"sqft_above 0\n",
|
||||
"sqft_basement 0\n",
|
||||
"yr_built 0\n",
|
||||
"yr_renovated 0\n",
|
||||
"zipcode 0\n",
|
||||
"lat 0\n",
|
||||
"long 0\n",
|
||||
"sqft_living15 0\n",
|
||||
"sqft_lot15 0\n",
|
||||
"dtype: int64\n",
|
||||
"Store ID 0\n",
|
||||
"Store_Area 0\n",
|
||||
"Items_Available 0\n",
|
||||
"Daily_Customer_Count 0\n",
|
||||
"Store_Sales 0\n",
|
||||
"dtype: int64\n",
|
||||
"Rank 0\n",
|
||||
"Name 0\n",
|
||||
"Networth 0\n",
|
||||
"Age 0\n",
|
||||
"Country 0\n",
|
||||
"Source 0\n",
|
||||
"Industry 0\n",
|
||||
"dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Проверка на пропущенные значения\n",
|
||||
"print(df.isnull().sum())\n",
|
||||
"print(df2.isnull().sum())\n",
|
||||
"print(df3.isnull().sum())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" id price bedrooms bathrooms sqft_living \\\n",
|
||||
"count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
|
||||
"mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
|
||||
"std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
|
||||
"min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
|
||||
"25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
|
||||
"50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
|
||||
"75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
|
||||
"max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
|
||||
"\n",
|
||||
" sqft_lot floors waterfront view condition \\\n",
|
||||
"count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||
"mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
|
||||
"std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
|
||||
"min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
|
||||
"25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
|
||||
"50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
|
||||
"75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
|
||||
"max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
|
||||
"\n",
|
||||
" grade sqft_above sqft_basement yr_built yr_renovated \\\n",
|
||||
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||
"mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
|
||||
"std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
|
||||
"min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
|
||||
"25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
|
||||
"50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
|
||||
"75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
|
||||
"max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
|
||||
"\n",
|
||||
" zipcode lat long sqft_living15 sqft_lot15 \n",
|
||||
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
||||
"mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
|
||||
"std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
|
||||
"min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
|
||||
"25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
|
||||
"50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
|
||||
"75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
|
||||
"max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 \n",
|
||||
" Store ID Store_Area Items_Available Daily_Customer_Count \\\n",
|
||||
"count 896.000000 896.000000 896.000000 896.000000 \n",
|
||||
"mean 448.500000 1485.409598 1782.035714 786.350446 \n",
|
||||
"std 258.797218 250.237011 299.872053 265.389281 \n",
|
||||
"min 1.000000 775.000000 932.000000 10.000000 \n",
|
||||
"25% 224.750000 1316.750000 1575.500000 600.000000 \n",
|
||||
"50% 448.500000 1477.000000 1773.500000 780.000000 \n",
|
||||
"75% 672.250000 1653.500000 1982.750000 970.000000 \n",
|
||||
"max 896.000000 2229.000000 2667.000000 1560.000000 \n",
|
||||
"\n",
|
||||
" Store_Sales \n",
|
||||
"count 896.000000 \n",
|
||||
"mean 59351.305804 \n",
|
||||
"std 17190.741895 \n",
|
||||
"min 14920.000000 \n",
|
||||
"25% 46530.000000 \n",
|
||||
"50% 58605.000000 \n",
|
||||
"75% 71872.500000 \n",
|
||||
"max 116320.000000 \n",
|
||||
" Rank Networth Age\n",
|
||||
"count 2600.000000 2600.000000 2600.000000\n",
|
||||
"mean 1269.570769 4.860750 64.271923\n",
|
||||
"std 728.146364 10.659671 13.220607\n",
|
||||
"min 1.000000 1.000000 19.000000\n",
|
||||
"25% 637.000000 1.500000 55.000000\n",
|
||||
"50% 1292.000000 2.400000 64.000000\n",
|
||||
"75% 1929.000000 4.500000 74.000000\n",
|
||||
"max 2578.000000 219.000000 100.000000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Статистика по числовым данным для выявления аномальных распределений\n",
|
||||
"print(df.describe())\n",
|
||||
"print(df2.describe())\n",
|
||||
"print(df3.describe())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"7.По перым трём строкам кода, т.е после проверки на пропущенные значения выявлено, что их нет. А дальше я обнаружил аномалию: в датасете миллионеров есть столбец networth - чистое количество денег во всех формах ( в миллиардах ), в этом солбце минимальное значение является единицей, медиана в районе 2.4, а максимальное - 219. В ЭТОМ СТОЛБЦЕ АНОМАЛИЯ"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"8.Наши датасеты довольно информационные. Например у миллионер датасета можно посмотреть фио, сколько денег, что он сделал. Датасет по продаже домов гораздо информационнее, является лидером по наполненности и соответствует реальности. А вот датасет магазинов слабоват, можно например добавить: количество филлиалов, работников, прибыль"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"9.Возьмём датасет магазинов, будем удалять столбцы, где площадь ниже 1500 (по тз надо)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales\n",
|
||||
"0 1 1659 1961 530 66490\n",
|
||||
"4 5 1770 2111 450 46620\n",
|
||||
"6 7 1542 1858 1030 72240\n",
|
||||
"11 12 1751 2098 720 57620\n",
|
||||
"12 13 1746 2064 1050 60470\n",
|
||||
".. ... ... ... ... ...\n",
|
||||
"882 883 1819 2187 590 47920\n",
|
||||
"886 887 1655 1986 1150 77430\n",
|
||||
"889 890 1539 1829 650 46580\n",
|
||||
"890 891 1549 1851 1220 70620\n",
|
||||
"891 892 1582 1910 1080 66390\n",
|
||||
"\n",
|
||||
"[415 rows x 5 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df2_filtered = df2[df2['Store_Area'] >= 1500]\n",
|
||||
"print(df2_filtered)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Теперь в датасете магазнов price заменим у всех на константное значение - 1 500 000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" id date price bedrooms bathrooms sqft_living \\\n",
|
||||
"0 7129300520 20141013T000000 1500000 3 1.00 1180 \n",
|
||||
"1 6414100192 20141209T000000 1500000 3 2.25 2570 \n",
|
||||
"2 5631500400 20150225T000000 1500000 2 1.00 770 \n",
|
||||
"3 2487200875 20141209T000000 1500000 4 3.00 1960 \n",
|
||||
"4 1954400510 20150218T000000 1500000 3 2.00 1680 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"21608 263000018 20140521T000000 1500000 3 2.50 1530 \n",
|
||||
"21609 6600060120 20150223T000000 1500000 4 2.50 2310 \n",
|
||||
"21610 1523300141 20140623T000000 1500000 2 0.75 1020 \n",
|
||||
"21611 291310100 20150116T000000 1500000 3 2.50 1600 \n",
|
||||
"21612 1523300157 20141015T000000 1500000 2 0.75 1020 \n",
|
||||
"\n",
|
||||
" sqft_lot floors waterfront view ... grade sqft_above \\\n",
|
||||
"0 5650 1.0 0 0 ... 7 1180 \n",
|
||||
"1 7242 2.0 0 0 ... 7 2170 \n",
|
||||
"2 10000 1.0 0 0 ... 6 770 \n",
|
||||
"3 5000 1.0 0 0 ... 7 1050 \n",
|
||||
"4 8080 1.0 0 0 ... 8 1680 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"21608 1131 3.0 0 0 ... 8 1530 \n",
|
||||
"21609 5813 2.0 0 0 ... 8 2310 \n",
|
||||
"21610 1350 2.0 0 0 ... 7 1020 \n",
|
||||
"21611 2388 2.0 0 0 ... 8 1600 \n",
|
||||
"21612 1076 2.0 0 0 ... 7 1020 \n",
|
||||
"\n",
|
||||
" sqft_basement yr_built yr_renovated zipcode lat long \\\n",
|
||||
"0 0 1955 0 98178 47.5112 -122.257 \n",
|
||||
"1 400 1951 1991 98125 47.7210 -122.319 \n",
|
||||
"2 0 1933 0 98028 47.7379 -122.233 \n",
|
||||
"3 910 1965 0 98136 47.5208 -122.393 \n",
|
||||
"4 0 1987 0 98074 47.6168 -122.045 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"21608 0 2009 0 98103 47.6993 -122.346 \n",
|
||||
"21609 0 2014 0 98146 47.5107 -122.362 \n",
|
||||
"21610 0 2009 0 98144 47.5944 -122.299 \n",
|
||||
"21611 0 2004 0 98027 47.5345 -122.069 \n",
|
||||
"21612 0 2008 0 98144 47.5941 -122.299 \n",
|
||||
"\n",
|
||||
" sqft_living15 sqft_lot15 \n",
|
||||
"0 1340 5650 \n",
|
||||
"1 1690 7639 \n",
|
||||
"2 2720 8062 \n",
|
||||
"3 1360 5000 \n",
|
||||
"4 1800 7503 \n",
|
||||
"... ... ... \n",
|
||||
"21608 1530 1509 \n",
|
||||
"21609 1830 7200 \n",
|
||||
"21610 1020 2007 \n",
|
||||
"21611 1410 1287 \n",
|
||||
"21612 1020 1357 \n",
|
||||
"\n",
|
||||
"[21613 rows x 21 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df['price'] = 1500000\n",
|
||||
"print(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Теперь у миллионеров в networth подставим среднее по столбцу:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Networth\n",
|
||||
"0 4.86075\n",
|
||||
"1 4.86075\n",
|
||||
"2 4.86075\n",
|
||||
"3 4.86075\n",
|
||||
"4 4.86075\n",
|
||||
"... ...\n",
|
||||
"2595 4.86075\n",
|
||||
"2596 4.86075\n",
|
||||
"2597 4.86075\n",
|
||||
"2598 4.86075\n",
|
||||
"2599 4.86075\n",
|
||||
"\n",
|
||||
"[2600 rows x 1 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"networth_mean = df3['Networth'].mean()\n",
|
||||
"df3['Networth'] = networth_mean\n",
|
||||
"print(df3[['Networth']])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"10.КОД"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train df: (15129, 21), Validation df: (3242, 21), Test df: (3242, 21)\n",
|
||||
"Train df2: (627, 5), Validation df2: (134, 5), Test df2: (135, 5)\n",
|
||||
"Train df3: (1820, 7), Validation df3: (390, 7), Test df3: (390, 7)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)\n",
|
||||
"val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42) \n",
|
||||
"\n",
|
||||
"train_df2, temp_df2 = train_test_split(df2, test_size=0.3, random_state=42)\n",
|
||||
"val_df2, test_df2 = train_test_split(temp_df2, test_size=0.5, random_state=42)\n",
|
||||
"\n",
|
||||
"train_df3, temp_df3 = train_test_split(df3, test_size=0.3, random_state=42)\n",
|
||||
"val_df3, test_df3 = train_test_split(temp_df3, test_size=0.5, random_state=42)\n",
|
||||
"print(f\"Train df: {train_df.shape}, Validation df: {val_df.shape}, Test df: {test_df.shape}\")\n",
|
||||
"print(f\"Train df2: {train_df2.shape}, Validation df2: {val_df2.shape}, Test df2: {test_df2.shape}\")\n",
|
||||
"print(f\"Train df3: {train_df3.shape}, Validation df3: {val_df3.shape}, Test df3: {test_df3.shape}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Было сделаное разбиение на три выборки: 70%, 15% и 15%. Подключена была библиотека scikit-learn и функция train_test_split , как сказано в пункте 15. Вполне сбалансированные"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"12.Качаем библиотеку imbalanced-learn, достаём нужные функции и погнали"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Class distribution after oversampling (df):\n",
|
||||
"price_category\n",
|
||||
"Low 10787\n",
|
||||
"Medium 10787\n",
|
||||
"High 10787\n",
|
||||
"Luxury 10787\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Class distribution after undersampling (df):\n",
|
||||
"price_category\n",
|
||||
"Low 1465\n",
|
||||
"Medium 1465\n",
|
||||
"High 1465\n",
|
||||
"Luxury 1465\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from imblearn.over_sampling import RandomOverSampler\n",
|
||||
"from imblearn.under_sampling import RandomUnderSampler\n",
|
||||
"df = pd.read_csv(\".//datasetlab2//kc_house_data.csv\", sep=\",\")\n",
|
||||
"df['price_category'] = pd.cut(df['price'], bins=[0, 300000, 600000, 1000000, float('inf')],\n",
|
||||
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
||||
"\n",
|
||||
"y = df['price_category']\n",
|
||||
"X = df.drop(columns=['price', 'price_category'])\n",
|
||||
"\n",
|
||||
"oversampler = RandomOverSampler(random_state=42)\n",
|
||||
"X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
|
||||
"\n",
|
||||
"undersampler = RandomUnderSampler(random_state=42)\n",
|
||||
"X_resampled_under, y_resampled_under = undersampler.fit_resample(X, y)\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after oversampling (df):\")\n",
|
||||
"print(pd.Series(y_resampled).value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after undersampling (df):\")\n",
|
||||
"print(pd.Series(y_resampled_under).value_counts())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Class distribution after oversampling (df3):\n",
|
||||
"AGE_category\n",
|
||||
"Young 1401\n",
|
||||
"Middle-aged 1401\n",
|
||||
"Senior 1401\n",
|
||||
"Elderly 1401\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Class distribution after undersampling (df3):\n",
|
||||
"AGE_category\n",
|
||||
"Young 15\n",
|
||||
"Middle-aged 15\n",
|
||||
"Senior 15\n",
|
||||
"Elderly 15\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df3 = pd.read_csv(\".//datasetlab2//Forbes Billionaires.csv\", sep=\",\")\n",
|
||||
"\n",
|
||||
"df3['AGE_category'] = pd.cut(df3['Age'], bins=[0, 30, 50, 70, float('inf')],\n",
|
||||
" labels=['Young', 'Middle-aged', 'Senior', 'Elderly'])\n",
|
||||
"\n",
|
||||
"y3 = df3['AGE_category']\n",
|
||||
"X3 = df3.drop(columns=['Age', 'AGE_category'])\n",
|
||||
"\n",
|
||||
"oversampler3 = RandomOverSampler(random_state=42)\n",
|
||||
"X_resampled_3, y_resampled_3 = oversampler3.fit_resample(X3, y3)\n",
|
||||
"\n",
|
||||
"undersampler3 = RandomUnderSampler(random_state=42)\n",
|
||||
"X_resampled_3_under, y_resampled_3_under = undersampler3.fit_resample(X3, y3)\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after oversampling (df3):\")\n",
|
||||
"print(pd.Series(y_resampled_3).value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after undersampling (df3):\")\n",
|
||||
"print(pd.Series(y_resampled_3_under).value_counts())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Class distribution after oversampling (df2):\n",
|
||||
"Sales_category\n",
|
||||
"Low 598\n",
|
||||
"Medium 598\n",
|
||||
"High 598\n",
|
||||
"Luxury 0\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Class distribution after undersampling (df2):\n",
|
||||
"Sales_category\n",
|
||||
"Low 7\n",
|
||||
"Medium 7\n",
|
||||
"High 7\n",
|
||||
"Luxury 0\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df2 = pd.read_csv(\".//datasetlab2//Stores.csv\", sep=\",\")\n",
|
||||
"\n",
|
||||
"df2['Sales_category'] = pd.cut(df2['Store_Sales'], bins=[0, 50000, 100000, 200000, float('inf')],\n",
|
||||
" labels=['Low', 'Medium', 'High', 'Luxury'])\n",
|
||||
"\n",
|
||||
"y2 = df2['Sales_category']\n",
|
||||
"X2 = df2.drop(columns=['Store_Sales', 'Sales_category'])\n",
|
||||
"\n",
|
||||
"oversampler2 = RandomOverSampler(random_state=42)\n",
|
||||
"X_resampled_2, y_resampled_2 = oversampler2.fit_resample(X2, y2)\n",
|
||||
"\n",
|
||||
"undersampler2 = RandomUnderSampler(random_state=42)\n",
|
||||
"X_resampled_2_under, y_resampled_2_under = undersampler2.fit_resample(X2, y2)\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after oversampling (df2):\")\n",
|
||||
"print(pd.Series(y_resampled_2).value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Class distribution after undersampling (df2):\")\n",
|
||||
"print(pd.Series(y_resampled_2_under).value_counts())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "miivenv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because it is too large
Load Diff
1000
lab_3/lab3.ipynb
1000
lab_3/lab3.ipynb
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
496
lab_4/lab4.ipynb
496
lab_4/lab4.ipynb
@ -1,496 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Lab 4 Malafeev PIbd-31**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1.Для начала выберем бизнес-цели для задач регрессии и классификации."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Классификация. Цель: определить, откликнется ли клиент на маркетинговую кампанию. Столбец целевой переменной - Response, 1 - откликнулся, 0 - нет. Признаки - Возраст, Уровень дохода. (Age, Income)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Регрессия. Цель: прогноз расходов клиента. Столбец целевой переменной: Total_Spending - общие расходы, будут считаться по всем расходам. Признаки такие же."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2.Достижимый уровень качества:\n",
|
||||
"Классификация:\n",
|
||||
"Оценка метрики accuracy: ориентир 70-80% (с учетом ограниченных признаков).\n",
|
||||
"Регрессия:\n",
|
||||
"MSE (среднеквадратичная ошибка): минимизация, ориентир в зависимости от разброса целевой переменной.\n",
|
||||
"R^2 > 0.6"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3.Ориентир. Классификация:\n",
|
||||
"DummyClassifier, предсказывающий самый частый класс, даст accuracy ~50-60%.\n",
|
||||
"Регрессия:\n",
|
||||
"Прогноз среднего значения целевой переменной."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"X_class_train: (1568, 2), y_class_train: (1568,)\n",
|
||||
"X_reg_train: (1568, 2), y_reg_train: (1568,)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"\n",
|
||||
"data = pd.read_csv(\".//datasetlab1//marketing_campaign.csv\", sep=\"\\t\")\n",
|
||||
"data2 = pd.read_csv(\".//datasetlab1//marketing_campaign2.csv\", sep=\"\\t\")\n",
|
||||
"\n",
|
||||
"# Преобразуем данные для классификации (дата для отклика на кампанию)\n",
|
||||
"data['Age'] = 2024 - data['Year_Birth'] \n",
|
||||
"data = data[['Age', 'Income', 'Response']] \n",
|
||||
"\n",
|
||||
"X_class = data[['Age', 'Income']]\n",
|
||||
"y_class = data['Response']\n",
|
||||
"\n",
|
||||
"# Преобразуем данные для регрессии (прогноз расходов)\n",
|
||||
"data2['Age'] = 2024 - data2['Year_Birth'] \n",
|
||||
"data2['Total_Spending'] = (data2['MntWines'] + data2['MntFruits'] + data2['MntMeatProducts'] +\n",
|
||||
" data2['MntFishProducts'] + data2['MntSweetProducts'] + data2['MntGoldProds'])\n",
|
||||
"data2 = data2[['Age', 'Income', 'Total_Spending']] \n",
|
||||
"\n",
|
||||
"# Разделение на признаки и целевую переменную для регрессии\n",
|
||||
"X_reg = data2[['Age', 'Income']]\n",
|
||||
"y_reg = data2['Total_Spending']\n",
|
||||
"\n",
|
||||
"# Масштабирование данных\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"X_class_scaled = scaler.fit_transform(X_class)\n",
|
||||
"X_reg_scaled = scaler.fit_transform(X_reg)\n",
|
||||
"\n",
|
||||
"# Разделение на тренировочные и тестовые выборки\n",
|
||||
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class_scaled, y_class, test_size=0.3, random_state=42)\n",
|
||||
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg_scaled, y_reg, test_size=0.3, random_state=42)\n",
|
||||
"\n",
|
||||
"# Проверим, что все выглядит правильно\n",
|
||||
"print(f\"X_class_train: {X_train_class.shape}, y_class_train: {y_train_class.shape}\")\n",
|
||||
"print(f\"X_reg_train: {X_train_reg.shape}, y_reg_train: {y_train_reg.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"5-6.Выбор трёх моделей и построение конвейера"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Logistic Regression - Средняя точность модели: 0.8475 ± 0.0027\n",
|
||||
"Random Forest - Средняя точность модели: 0.8258 ± 0.0099\n",
|
||||
"SVM - Средняя точность модели: 0.8529 ± 0.0027\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import train_test_split, cross_val_score\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"\n",
|
||||
"# Удаляем строки с пропущенными значениями\n",
|
||||
"X_class_scaled = X_class.dropna()\n",
|
||||
"y_class = y_class[X_class_scaled.index]\n",
|
||||
"\n",
|
||||
"models = [\n",
|
||||
" ('Logistic Regression', LogisticRegression(max_iter=1000)),\n",
|
||||
" ('Random Forest', RandomForestClassifier(n_estimators=100)),\n",
|
||||
" ('SVM', SVC())\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Создаем конвейер\n",
|
||||
"imputer = SimpleImputer(strategy='mean') \n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"\n",
|
||||
"for name, model in models:\n",
|
||||
" pipe = Pipeline([\n",
|
||||
" ('imputer', imputer),\n",
|
||||
" ('scaler', scaler),\n",
|
||||
" ('classifier', model)\n",
|
||||
" ])\n",
|
||||
" \n",
|
||||
" scores = cross_val_score(pipe, X_class_scaled, y_class, cv=5, scoring='accuracy')\n",
|
||||
" print(f\"{name} - Средняя точность модели: {scores.mean():.4f} ± {scores.std():.4f}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Вот такие модели и конвейр я выбрал: Imputer: Заполняет пропущенные значения средним (если они есть).\n",
|
||||
"Scaler: Масштабирует данные с помощью StandardScaler.\n",
|
||||
"Classifier: Используются три модели:\n",
|
||||
"LogisticRegression: Логистическая регрессия.\n",
|
||||
"RandomForestClassifier: Случайный лес.\n",
|
||||
"SVC: Метод опорных векторов (SVM)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"7.Теперь сделаем настройку гиперпараметров."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fitting 5 folds for each of 6 candidates, totalling 30 fits\n",
|
||||
"Logistic Regression - Лучшие гиперпараметры: {'classifier__C': 0.1, 'classifier__solver': 'lbfgs'}\n",
|
||||
"Logistic Regression - Лучшая точность: 0.8484\n",
|
||||
"--------------------------------------------------\n",
|
||||
"Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
|
||||
"Random Forest - Лучшие гиперпараметры: {'classifier__max_depth': 10, 'classifier__n_estimators': 100}\n",
|
||||
"Random Forest - Лучшая точность: 0.8520\n",
|
||||
"--------------------------------------------------\n",
|
||||
"Fitting 5 folds for each of 6 candidates, totalling 30 fits\n",
|
||||
"SVM - Лучшие гиперпараметры: {'classifier__C': 1, 'classifier__kernel': 'rbf'}\n",
|
||||
"SVM - Лучшая точность: 0.8529\n",
|
||||
"--------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import GridSearchCV\n",
|
||||
"models = [\n",
|
||||
" ('Logistic Regression', LogisticRegression(max_iter=1000), {'classifier__C': [0.1, 1, 10], 'classifier__solver': ['lbfgs', 'liblinear']}),\n",
|
||||
" ('Random Forest', RandomForestClassifier(n_estimators=100), {'classifier__n_estimators': [50, 100, 200], 'classifier__max_depth': [10, 20, None]}),\n",
|
||||
" ('SVM', SVC(), {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear', 'rbf']})\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for name, model, param_grid in models:\n",
|
||||
" pipe = Pipeline([\n",
|
||||
" ('imputer', imputer),\n",
|
||||
" ('scaler', scaler),\n",
|
||||
" ('classifier', model)\n",
|
||||
" ])\n",
|
||||
" \n",
|
||||
" grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)\n",
|
||||
" grid_search.fit(X_class_scaled, y_class)\n",
|
||||
"\n",
|
||||
" print(f\"{name} - Лучшие гиперпараметры: {grid_search.best_params_}\")\n",
|
||||
" print(f\"{name} - Лучшая точность: {grid_search.best_score_:.4f}\")\n",
|
||||
" print(\"-\" * 50)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Тут мы проходим по моделям и настраиваем гиперпараметры с помощью GridSearchCV с помощью кросс-валидации. Параметры: cv=5: 5 фолдов для кросс-валидации.\n",
|
||||
"scoring='accuracy': Мы используем точность как метрику.\n",
|
||||
"n_jobs=-1: Используем все доступные процессоры для ускорения вычислений.\n",
|
||||
"verbose=1: Подробный вывод процесса."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"8.Обучим модели"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fitting 5 folds for each of 6 candidates, totalling 30 fits\n",
|
||||
"Fitting 5 folds for each of 6 candidates, totalling 30 fits\n",
|
||||
"Fitting 5 folds for each of 6 candidates, totalling 30 fits\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"best_models = {} \n",
|
||||
"for name, model, param_grid in models: \n",
|
||||
" grid_search.fit(X_class_scaled, y_class)\n",
|
||||
" best_models[name] = grid_search.best_estimator_ \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"9.Оценим модели."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
|
||||
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
||||
"\n",
|
||||
"# Оценка качества классификации\n",
|
||||
"for name, model in best_models.items():\n",
|
||||
" y_pred_class = model.predict(X_class_scaled) # Предсказание для классификации\n",
|
||||
"\n",
|
||||
"# Оценка качества регрессии\n",
|
||||
"for name, model in best_models.items():\n",
|
||||
" y_pred_reg = model.predict(X_reg_scaled) # Предсказание для регрессии\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Т.к. вывод слишком длинный, приложу его тут(вылазит Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...):<br>Оценка качества для модели Logistic Regression:<br>\n",
|
||||
"Accuracy: 0.8528880866425993<br>\n",
|
||||
"Precision: 0.8181818181818182<br>\n",
|
||||
"Recall: 0.02702702702702703<br>\n",
|
||||
"F1-Score: 0.05232558139534884<br>\n",
|
||||
"ROC AUC: Не поддерживается для этой модели<br>\n",
|
||||
"<br>\n",
|
||||
"<br>\n",
|
||||
"Оценка качества для модели Random Forest:<br>\n",
|
||||
"Accuracy: 0.8528880866425993<br>\n",
|
||||
"Precision: 0.8181818181818182<br>\n",
|
||||
"Recall: 0.02702702702702703<br>\n",
|
||||
"F1-Score: 0.05232558139534884<br>\n",
|
||||
"ROC AUC: Не поддерживается для этой модели<br>\n",
|
||||
"<br>\n",
|
||||
"<br>\n",
|
||||
"Оценка качества для модели SVM:<br>\n",
|
||||
"Accuracy: 0.8528880866425993<br>\n",
|
||||
"Precision: 0.8181818181818182<br>\n",
|
||||
"Recall: 0.02702702702702703<br>\n",
|
||||
"F1-Score: 0.05232558139534884<br>\n",
|
||||
"ROC AUC: Не поддерживается для этой модели<br>\n",
|
||||
"<br>\n",
|
||||
"<br>Задача регрессии: <br>\n",
|
||||
"Оценка качества для модели Logistic Regression:<br>\n",
|
||||
"MAE: 605.7982142857143<br>\n",
|
||||
"MSE: 729533.7598214286<br>\n",
|
||||
"RMSE: 854.1274845252485<br>\n",
|
||||
"R²: -1.0122722045012051<br>\n",
|
||||
"<br>\n",
|
||||
"<br>\n",
|
||||
"Оценка качества для модели Random Forest:<br>\n",
|
||||
"MAE: 605.7982142857143<br>\n",
|
||||
"MSE: 729533.7598214286<br>\n",
|
||||
"RMSE: 854.1274845252485<br>\n",
|
||||
"R²: -1.0122722045012051<br>\n",
|
||||
"<br>\n",
|
||||
"<br>\n",
|
||||
"Оценка качества для модели SVM:<br>\n",
|
||||
"MAE: 605.7982142857143<br>\n",
|
||||
"MSE: 729533.7598214286<br>\n",
|
||||
"RMSE: 854.1274845252485<br>\n",
|
||||
"R²: -1.0122722045012051<br>\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Почему выбрал эти метирки:<br>Классификация (Отклик на предложение)<br>\n",
|
||||
"Целевая переменная — бинарная (0 и 1), где 1 — откликнулся, а 0 — не откликнулся. Для классификации подходящими метриками являются:<br>\n",
|
||||
"<br>\n",
|
||||
"Accuracy (Точность):\n",
|
||||
"Это доля правильно классифицированных объектов среди всех. \n",
|
||||
"Подходит для оценки общей эффективности модели. Однако важно учитывать, что если классы несбалансированы, точность может быть обманчивой.<br>\n",
|
||||
"Precision (Точность):\n",
|
||||
"\n",
|
||||
"Это доля истинных положительных случаев среди всех предсказанных положительных случаев.\n",
|
||||
"Важна для задач, где важно минимизировать количество ложных срабатываний, например, когда модель ошибочно классифицирует клиента как откликнувшегося (True Positive).<br>\n",
|
||||
"Recall (Полнота):\n",
|
||||
"\n",
|
||||
"Это доля истинных положительных случаев среди всех истинных положительных случаев.\n",
|
||||
"Важно для задач, где важно не пропустить откликнувшихся клиентов (False Negatives).<br>\n",
|
||||
"F1-Score:\n",
|
||||
"\n",
|
||||
"Это гармоническое среднее между точностью и полнотой.\n",
|
||||
"Подходит для оценки моделей в случаях, когда важно иметь баланс между точностью и полнотой, особенно в ситуациях с несбалансированными классами.<br>\n",
|
||||
"ROC AUC:\n",
|
||||
"Площадь под кривой ROC, которая отображает способность модели различать положительные и отрицательные классы.\n",
|
||||
"Чем выше значение AUC, тем лучше модель справляется с разделением классов."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Регрессия (Прогноз расходов)<br>\n",
|
||||
"Целевая переменная — это числовое значение (расходы клиента). Для задач регрессии используются другие метрики:<br>\n",
|
||||
"\n",
|
||||
"Mean Absolute Error (MAE):\n",
|
||||
"\n",
|
||||
"Это средняя абсолютная ошибка предсказания.\n",
|
||||
"Простой и интерпретируемый показатель, который описывает среднее отклонение предсказанных значений от фактических.<br>\n",
|
||||
"Mean Squared Error (MSE):\n",
|
||||
"\n",
|
||||
"Это средняя квадратичная ошибка.\n",
|
||||
"Чувствителен к большим ошибкам, так как квадратичный штраф увеличивает вес больших отклонений, что полезно, если вы хотите минимизировать большие ошибки.<br>\n",
|
||||
"Root Mean Squared Error (RMSE):\n",
|
||||
"\n",
|
||||
"Это квадратный корень из MSE.\n",
|
||||
"Подходит для задач, где важно учитывать большие ошибки, так как более чувствителен к выбросам.<br>\n",
|
||||
"R-squared (R²):\n",
|
||||
"\n",
|
||||
"Это коэффициент детерминации, который показывает, какая доля дисперсии целевой переменной объясняется моделью.\n",
|
||||
"R² может быть полезен для оценки того, насколько хорошо модель объясняет вариацию целевой переменной, но не всегда подходит, если модель имеет много выбросов или некорректно подогнана.<br>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"9.Оценка"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating model: Logistic Regression\n",
|
||||
"Train Accuracy: 0.8476, Test Accuracy: 0.8586\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n",
|
||||
"Train Error (MSE): 732205.3240, Test Error (MSE): 723300.1101\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n",
|
||||
"Evaluating model: Random Forest\n",
|
||||
"Train Accuracy: 0.8476, Test Accuracy: 0.8586\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n",
|
||||
"Train Error (MSE): 732205.3240, Test Error (MSE): 723300.1101\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n",
|
||||
"Evaluating model: SVM\n",
|
||||
"Train Accuracy: 0.8476, Test Accuracy: 0.8586\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n",
|
||||
"Train Error (MSE): 732205.3240, Test Error (MSE): 723300.1101\n",
|
||||
"Bias: 0.0000, Variance: 0.0000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from sklearn.metrics import mean_squared_error, accuracy_score\n",
|
||||
"\n",
|
||||
"# Оценка смещения и дисперсии для классификации и регрессии\n",
|
||||
"def evaluate_bias_variance(model, X_train, y_train, X_test, y_test, task='classification'):\n",
|
||||
" # Прогнозы на обучающих и тестовых данных\n",
|
||||
" y_train_pred = model.predict(X_train)\n",
|
||||
" y_test_pred = model.predict(X_test)\n",
|
||||
"\n",
|
||||
" if task == 'classification':\n",
|
||||
" # Для классификации считаем точность\n",
|
||||
" train_accuracy = accuracy_score(y_train, y_train_pred)\n",
|
||||
" test_accuracy = accuracy_score(y_test, y_test_pred)\n",
|
||||
" print(f\"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}\")\n",
|
||||
" elif task == 'regression':\n",
|
||||
" # Для регрессии считаем среднеквадратичную ошибку (MSE)\n",
|
||||
" train_error = mean_squared_error(y_train, y_train_pred)\n",
|
||||
" test_error = mean_squared_error(y_test, y_test_pred)\n",
|
||||
" print(f\"Train Error (MSE): {train_error:.4f}, Test Error (MSE): {test_error:.4f}\")\n",
|
||||
"\n",
|
||||
" # Для оценки смещения и дисперсии на тестовых данных\n",
|
||||
" bias = np.mean(y_test_pred - y_train_pred[:len(y_test_pred)]) # Смещение: разница между тестом и обучением\n",
|
||||
" variance = np.var(y_test_pred - y_train_pred[:len(y_test_pred)]) # Дисперсия: варьирование прогнозов\n",
|
||||
"\n",
|
||||
" print(f\"Bias: {bias:.4f}, Variance: {variance:.4f}\")\n",
|
||||
"\n",
|
||||
"# Оценим для каждой из моделей\n",
|
||||
"for name, model in best_models.items():\n",
|
||||
" print(f\"Evaluating model: {name}\")\n",
|
||||
" # Для классификации\n",
|
||||
" evaluate_bias_variance(model, X_train_class, y_train_class, X_test_class, y_test_class, task='classification') \n",
|
||||
" # Для регрессии\n",
|
||||
" evaluate_bias_variance(model, X_train_reg, y_train_reg, X_test_reg, y_test_reg, task='regression') \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Конец"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "miivenv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
277
lab_5/lab5.ipynb
277
lab_5/lab5.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user