From cc642627db633388859bd1ea1dc7a80b2e7ebded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A2=D1=83=D0=BA=D0=B0=D0=B5=D0=B2=D0=B0=20=D0=90=D0=BB?= =?UTF-8?q?=D1=8C=D1=84=D0=B8=D1=8F?= Date: Tue, 7 Jan 2025 00:58:14 +0400 Subject: [PATCH] LAB_3 --- .vs/ProjectSettings.json | 3 - .vs/slnx.sqlite | Bin 90112 -> 0 bytes data/students_education.csv | 2561 +++++++++++++++++------------------ lab3.ipynb | 1881 +++++++++++++++++++++++++ 4 files changed, 3087 insertions(+), 1358 deletions(-) delete mode 100644 .vs/ProjectSettings.json delete mode 100644 .vs/slnx.sqlite create mode 100644 lab3.ipynb diff --git a/.vs/ProjectSettings.json b/.vs/ProjectSettings.json deleted file mode 100644 index f8b4888..0000000 --- a/.vs/ProjectSettings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "CurrentProjectSetting": null -} \ No newline at end of file diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite deleted file mode 100644 index 5549866b267a494f50540cdc7e592f130c8c716c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 90112 zcmeI4e{37qeZcQ@6e&{TJ=wBE+p?WYlfh!`lf~a+g8-o^%4RH6GDSHtJeE7&9c?BQ zsgOs-QBxiq(Vhr93=V@&=c={-}k-m`@HY>-MjZlZr)gHYO2uabhjldx?{5OaH=xYYQ@zD$XpLM+7?`?otZ2Wkf6h=QMWsaktPkm!*dh%N6?ciGz-wSa5 z8{C)1-W@BCJ-=TSkpHnEFhMNcdzueKUJ%*68_l+&-rwBau6J51YNOe1YRyi&X1s0R zS#7q|qEagkD}?gpLatH}ilvppi^A;#)lm(H1pHbr4VJG83%A|PR-=2f;Xovh&Ac+) z%#yUN4mXY;VVtcw!X#3#D&*JK)<|DV>-pSfA^N#zrvedCWZx7usoqk(UH4vxO=Bv# z%WH)pJr9Q#3@!r@i%J#qtrV^l%ECsucs*CXC0r}q5^`IW^odQjlR@sHkzt3tOECVP`zu| zOKe`W$5fh=WwqC9)|)NTMGLF9n8ybP%Gq6Z0wjU5x!cp!Z403{i=Y&2?ZdEK>&Oy` z&vNJ0{X0MlyBAwA+=uj&!!)$(3Xx+Bgj#_iLH^NO4?bAX652XvUt1D@lG zIKAD`4)bXKLc`kGDeCVoYiL2sd1=N#{0|TE%I*%HCFx8>o1l?bX>O}MP1@e63S{!A z6tCl%6wg=06UAU4aw*C_K)NT=W=VZDlF$spu#h>dTczTSE$hxh%UDVd_o6bhWB49J za}1L`6Nx3gsdjrf&<1Ay2h2nu5{VQ*c!p)L z(H(o5T^c#d(sr$`9z>Z8*=GWQNI|4S=5^%ShSXKtT8&KGUfIOZ@$;5)*5DXQZ!xl9 z7leB}D*UoWwfOy4@5b=r*7b=F$wwE7Z zC9>}r*mIpekCF8hc^zU&@Mtr9u}9cch0DdkW#F3B)7I-fwR=y+%OZQ`HCKzmN?|p( zwN?@2epioJjZV`z$U9sh5}RlDp3uW&Q`NL)`;H#d=6c)6@wxqJu$=Bj`f}e;*e__K z8)N>+g$wL0-E(@c;5C|B>#48w)iz$Wxi2}F-*|)zZS4FPv1WAd3LA*b&$ACyy-w3M z^WT`Oj@xFg!{yAJ$L47_rB)x0qsKe9@Uz;nP#C9N_-73IArKFN}>d&%D8r$9r9B95-zm8~DA1 zs>)00xGct!>4ccfEH8;tLRLgIorNh)Hdo~SD+B|-G#Vltjl z#O17%6f?QX@#SX#G2Tcmr|XS+V=0pnm4=*2Wn)=!S&^{e^`tD;m1Ihc zrBh10E;rH*%tKAZQt71H5R<880<#elmoxDOMv!TwQj(fYV=}6;97`s%88NYxN@1dM zOw2aaB@s6!%h^<7DG^%|8<~{6oT)E~ih^M$8|fu6o5sT;D-C5Sy^K2*kI%6_=6qxm zpTKNO=B{jPF|n8mqkm(@(03X1UG(Eml#4@WfB+Bx0zd!=00AHX1b_e#00KY&2z;Um zOtRcOYktIl!#$l08Xs{vF~*6k_tP%tLbwEnzQv$#p}$8z{X~O-&H(`+00e*l5C8%| z00;m9AOHk_01)_i5}4&Ku>&9C;O4n!*nK|0F*ePe54t`CKtBI}Zt6P>`U(0+^yld7 z=uOl|zl4e?h0Y?5{}KN;{2%ha!9U>d@YnfIlkW(G|2F)4;kUz`@OpS5jHdnzE5i#2 z00AHX1b_e#00KY&2mk>f@c0RQ>YSfR1%rP4srHvtSu>A8dy6miI_-&nS$rcj|K@|! zXZ_4m?1aBo+u4=%-=wKc6It+&?)@Zk#?PE(gZ{qOZ1om*b}hj#%=?)v7WA~ff`5Xd zS5uX~@q;;wa_?S{>YqF9XD;AkJ9j&6wYJl3%Bo9#_LQHAV)@-pzo*s|Nt0aSGbjDb zc`TNt?hxfqMf^+xOU=%aU(%BLEp74sc58+j@bsLY$q)m&od*8)eh+`qUH^W4&sAz} z*3Zo2Quky%-%qtqpCD+0{_T#^$I3*KOJ;h@qiL|3?-CegR>=Ly)^2=1~t=p{kHMK{j$2r3oBBgn=W5gJYyU~(fR+nk^G1fqhZ6sNtQ{TKo&i~Kx z=gIT`576J?9e}6NpP+A|chT4Q=lHLpIrJ6&3;h40FQ8vRZT_eHee_vW;=j$m!=Fdb zpafs#H~8;A{xF9YfdCKy0zd!=00AHX1b_e#00KbZLn3gJOR?s*gKJOa9O9m09UCcU zr?|5$-8?ySn#;1oH%#VdxC^XjljQVqF3P$$Mot~$&a^odxEt#F^==xldSzzek{Pvuz0g#nwaa}p+Ka1 z$F>CTxF&lMVN#R50WoD>n!65@gdLXZTMeNJZk8qc3qfK&-Bbv25&RzSBn0$#Wc$GH zwr-qQ<=!apk*Y`*r&ncc35@xyTMqfo=L0?9D=$G-Q1UjgRWb{S!#)sr{P%RJu0zd!=00AHX z1b_e#00KY&2mpbP2>~A$WY3LrEIWNxPtTAvIIpLt_4JgUo+Rl^L{H~P>Yvrq6M8zM zr^og5n2%%FX_6q4@FWS7WQrt{BngouNRkPX1W4j1$v8=TB;iOh#&JwgfBp}@$Dsc} z|Bn6|{|dly^atp7(Ep&{ij?P6?6kV$NxV6TTu-?P{;OP^-DC;@i51yLwooC$#{K1o_d;5g{_pE&7@u|tmf{WHk-1UA9?#=`Mw-;=E4xZi(l!Z*X{ z2mUj@DC0fxpY}b4^XzXT@XieV%pdulWSoco^qQXfhka*txpC|t^vyEHx&M@Jp0N)8 zL%s;PASeIETm9%?ugd;p!nkgr=kQ;zP(S@QDm0J(1GrP<{GWXPpP4?gApji*0zd!= z00AHX1b_e#00KY&2mk>fa3lzj^M82$KN3R?odg0v00;m9AOHk_01yBIKmZ5;fg?-+ z&i{|_;6vAe01yBIKmZ5;0U!VbfB+Bx0zlx%5Pf00fQ<0XY9ZGD8g=1p+_-2mk>f z00e*l5C8%|00;nqBTN9E|BvwCL)U=-5C8%|00;m9AOHk_01yBIK;Xy_fY1LQnW2V` z0s$ZZ1b_e#00KY&2mk>f00e-*5hg&M|3d+eL23R6d~fO-Q`3{zLT?A(n)qIT^WWgU zH1_UTdF=Tk3<$dZfeHBDiM$}PdpDYGMZLedyIt?JR@6qb-PD?$cFlO(zO&kFsYRt$ z999VB%Y|H}AQVe0g%^d}2dbkQ4hi_RUK%W46&7y0o2^FoX2XF<9-Db(xS1tsTODp3 zKf*X$bA(BxU{%PkudR{3me%vR%|i5Z&rSs*qR74}YEr$Wdb{qu4x7eQa+lW%LwX(# zEf`z|AQqJ>=36OTDU^kca`Aeud`q}ixFzJaD(l4(uJ(GNRKZr^zpb^ki)Jkw-OjzH zqIR9yw2V=N3_`iES|}Gv`NF1fn`#u=T2tGtE?613im0uE$8Bq4g@C{q^0}3QOSDp0 zE7&4xi~+E8Ggr=E&6OA8sZ`W5*u#~otoB>2-5Y(W)oe6XWmpC7@t}Iwu$S1pXpgBh zC(CND*Q_^Nq>C0-Z!wP#4wSRI>;y;xWplTusoNGpZx%r**xH9-xz>>-5})PHtNV9= z7Rq;8{q6Q(xo{}DcWE*Zso}V)>2al7DogFQ+RaPyU9~o7x)EjGf>Z7&^NRPa#}RX0 z9qfC9tyiNDw?cZvy-W99%6XAbBvn+H6{6>)mIqaEha z{Dp?Kvs2XHUDnWomh;k#f%qRDGbx_0 zh$o7{K;%-CeSmaNq|K80Y9yfeS4oy%#J&kMFY(e zD#H!-k1*KQ9AVrr@X?1?ea0l6widusQdikC(9%xkU|g_Xi;ZfmU~$o;M! zu^OGGagcYoKqNNL?meN0$)>7l&GsEVrp@)Xk>hjw(_lH>jr8Tdp|D@jMmNU%kqZ~t zTe|1;UcqZLwboN#>8owLYI9$5F2C^z7uwkQFJjH;-W4_wnV)ALsCu2IZRWo*R~@&_ zT!+hSG*GlWa5-4#~L>(>Wb zwSyKfMX-D4$YO?ANnVDf_BF{{-CP{B+*}_KIeFnSAKd1FX7oP1_L^}17tag$-^QZV zSKJwA*R{L0uF*-)c#_f^c9~+Sgr7p6$IqPuI7rFWP5fAImmb<-;aR6WWYVGR@stw| z(IMVk{PabS-95`Y!lCyw_8>+M9}L~LK|jJ?dKEt=5BC_MxT!B>_ZDUXk=s#r@0Mp( zu_9^GzE456Ly%%~?y1x*+_#RQw`o!xnP}}gDCTPP*X|$J7qFkd;JHT=C0l`9&AR7E zx6mkzluM||?8i)Cl;=wpuSu1^aZ-FUy9l=pj*vTp;U z*YSr~)|fY&O5sKK#!*G;-chw-ccFcDYVw`7rrtk@26=FZAH%y23D>TJzC0|e4HZuY zvRbp=5r3c?kG5}E;uHG?i&bKHRYSwY(!rx;y*CWN4-E0?QAV$*J8D~z+S&-CXGR-6 zw5pLtd$g+2*XL0nl1Q*`Id)6jq=zt_-Y?uOCPQQ1k`4y$0iI&%{s|46VevIQ?y1J; zK>NU3mR_F!zcZ2Dox^MaTDVOIwQc)oA4FVskyT+;u%?%NCXWwswy}1e-1}1Ip3~EO z01iKj\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Institution TypeGenderAgeDeviceIT StudentLocationFinancial ConditionInternet TypeNetwork TypeFlexibility Leveleducation_Collegeeducation_Schooleducation_University
0PrivateMale23TabNoTownMidWifi4GModerateFalseFalseTrue
1PrivateFemale23MobileNoTownMidMobile Data4GModerateFalseFalseTrue
2PublicFemale18MobileNoTownMidWifi4GModerateTrueFalseFalse
3PrivateFemale11MobileNoTownMidMobile Data4GModerateFalseTrueFalse
4PrivateFemale18MobileNoTownPoorMobile Data3GLowFalseTrueFalse
..........................................
1200PrivateFemale18MobileNoTownMidWifi4GLowTrueFalseFalse
1201PrivateFemale18MobileNoRuralMidWifi4GModerateTrueFalseFalse
1202PrivateMale11MobileNoTownMidMobile Data3GModerateFalseTrueFalse
1203PrivateFemale18MobileNoRuralMidWifi4GLowTrueFalseFalse
1204PrivateFemale11MobileNoTownPoorMobile Data3GModerateFalseTrueFalse
\n", + "

1205 rows × 13 columns

\n", + "" + ], + "text/plain": [ + " Institution Type Gender Age Device IT Student Location \\\n", + "0 Private Male 23 Tab No Town \n", + "1 Private Female 23 Mobile No Town \n", + "2 Public Female 18 Mobile No Town \n", + "3 Private Female 11 Mobile No Town \n", + "4 Private Female 18 Mobile No Town \n", + "... ... ... ... ... ... ... \n", + "1200 Private Female 18 Mobile No Town \n", + "1201 Private Female 18 Mobile No Rural \n", + "1202 Private Male 11 Mobile No Town \n", + "1203 Private Female 18 Mobile No Rural \n", + "1204 Private Female 11 Mobile No Town \n", + "\n", + " Financial Condition Internet Type Network Type Flexibility Level \\\n", + "0 Mid Wifi 4G Moderate \n", + "1 Mid Mobile Data 4G Moderate \n", + "2 Mid Wifi 4G Moderate \n", + "3 Mid Mobile Data 4G Moderate \n", + "4 Poor Mobile Data 3G Low \n", + "... ... ... ... ... \n", + "1200 Mid Wifi 4G Low \n", + "1201 Mid Wifi 4G Moderate \n", + "1202 Mid Mobile Data 3G Moderate \n", + "1203 Mid Wifi 4G Low \n", + "1204 Poor Mobile Data 3G Moderate \n", + "\n", + " education_College education_School education_University \n", + "0 False False True \n", + "1 False False True \n", + "2 True False False \n", + "3 False True False \n", + "4 False True False \n", + "... ... ... ... \n", + "1200 True False False \n", + "1201 True False False \n", + "1202 False True False \n", + "1203 True False False \n", + "1204 False True False \n", + "\n", + "[1205 rows x 13 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Применение one-hot encoding\n", + "df_encoded = pd.get_dummies(df, columns=['Education Level'], prefix='education')\n", + "\n", + "# Результат\n", + "df_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Institution TypeGenderAgeDeviceIT StudentLocationFinancial ConditionInternet TypeNetwork TypeFlexibility Leveleducation_Collegeeducation_Schooleducation_Universityage_group
0PrivateMale23TabNoTownMidWifi4GModerateFalseFalseTrue19-23
1PrivateFemale23MobileNoTownMidMobile Data4GModerateFalseFalseTrue19-23
2PublicFemale18MobileNoTownMidWifi4GModerateTrueFalseFalse10-18
3PrivateFemale11MobileNoTownMidMobile Data4GModerateFalseTrueFalse10-18
4PrivateFemale18MobileNoTownPoorMobile Data3GLowFalseTrueFalse10-18
.............................................
1200PrivateFemale18MobileNoTownMidWifi4GLowTrueFalseFalse10-18
1201PrivateFemale18MobileNoRuralMidWifi4GModerateTrueFalseFalse10-18
1202PrivateMale11MobileNoTownMidMobile Data3GModerateFalseTrueFalse10-18
1203PrivateFemale18MobileNoRuralMidWifi4GLowTrueFalseFalse10-18
1204PrivateFemale11MobileNoTownPoorMobile Data3GModerateFalseTrueFalse10-18
\n", + "

1205 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Institution Type Gender Age Device IT Student Location \\\n", + "0 Private Male 23 Tab No Town \n", + "1 Private Female 23 Mobile No Town \n", + "2 Public Female 18 Mobile No Town \n", + "3 Private Female 11 Mobile No Town \n", + "4 Private Female 18 Mobile No Town \n", + "... ... ... ... ... ... ... \n", + "1200 Private Female 18 Mobile No Town \n", + "1201 Private Female 18 Mobile No Rural \n", + "1202 Private Male 11 Mobile No Town \n", + "1203 Private Female 18 Mobile No Rural \n", + "1204 Private Female 11 Mobile No Town \n", + "\n", + " Financial Condition Internet Type Network Type Flexibility Level \\\n", + "0 Mid Wifi 4G Moderate \n", + "1 Mid Mobile Data 4G Moderate \n", + "2 Mid Wifi 4G Moderate \n", + "3 Mid Mobile Data 4G Moderate \n", + "4 Poor Mobile Data 3G Low \n", + "... ... ... ... ... \n", + "1200 Mid Wifi 4G Low \n", + "1201 Mid Wifi 4G Moderate \n", + "1202 Mid Mobile Data 3G Moderate \n", + "1203 Mid Wifi 4G Low \n", + "1204 Poor Mobile Data 3G Moderate \n", + "\n", + " education_College education_School education_University age_group \n", + "0 False False True 19-23 \n", + "1 False False True 19-23 \n", + "2 True False False 10-18 \n", + "3 False True False 10-18 \n", + "4 False True False 10-18 \n", + "... ... ... ... ... \n", + "1200 True False False 10-18 \n", + "1201 True False False 10-18 \n", + "1202 False True False 10-18 \n", + "1203 True False False 10-18 \n", + "1204 False True False 10-18 \n", + "\n", + "[1205 rows x 14 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Дискретизация признака 'age'\n", + "bins = [0, 18, 23, 28]\n", + "labels = ['10-18', '19-23', '24-28']\n", + "df_encoded['age_group'] = pd.cut(df['Age'], bins=bins, labels=labels)\n", + "\n", + "# Результат\n", + "df_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Institution TypeGenderAgeDeviceIT StudentLocationFinancial ConditionInternet TypeNetwork TypeFlexibility Leveleducation_Collegeeducation_Schooleducation_Universityage_groupinternet
0PrivateMale23TabNoTownMidWifi4GModerateFalseFalseTrue19-23Wifi_4G
1PrivateFemale23MobileNoTownMidMobile Data4GModerateFalseFalseTrue19-23Mobile Data_4G
2PublicFemale18MobileNoTownMidWifi4GModerateTrueFalseFalse10-18Wifi_4G
3PrivateFemale11MobileNoTownMidMobile Data4GModerateFalseTrueFalse10-18Mobile Data_4G
4PrivateFemale18MobileNoTownPoorMobile Data3GLowFalseTrueFalse10-18Mobile Data_3G
................................................
1200PrivateFemale18MobileNoTownMidWifi4GLowTrueFalseFalse10-18Wifi_4G
1201PrivateFemale18MobileNoRuralMidWifi4GModerateTrueFalseFalse10-18Wifi_4G
1202PrivateMale11MobileNoTownMidMobile Data3GModerateFalseTrueFalse10-18Mobile Data_3G
1203PrivateFemale18MobileNoRuralMidWifi4GLowTrueFalseFalse10-18Wifi_4G
1204PrivateFemale11MobileNoTownPoorMobile Data3GModerateFalseTrueFalse10-18Mobile Data_3G
\n", + "

1205 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " Institution Type Gender Age Device IT Student Location \\\n", + "0 Private Male 23 Tab No Town \n", + "1 Private Female 23 Mobile No Town \n", + "2 Public Female 18 Mobile No Town \n", + "3 Private Female 11 Mobile No Town \n", + "4 Private Female 18 Mobile No Town \n", + "... ... ... ... ... ... ... \n", + "1200 Private Female 18 Mobile No Town \n", + "1201 Private Female 18 Mobile No Rural \n", + "1202 Private Male 11 Mobile No Town \n", + "1203 Private Female 18 Mobile No Rural \n", + "1204 Private Female 11 Mobile No Town \n", + "\n", + " Financial Condition Internet Type Network Type Flexibility Level \\\n", + "0 Mid Wifi 4G Moderate \n", + "1 Mid Mobile Data 4G Moderate \n", + "2 Mid Wifi 4G Moderate \n", + "3 Mid Mobile Data 4G Moderate \n", + "4 Poor Mobile Data 3G Low \n", + "... ... ... ... ... \n", + "1200 Mid Wifi 4G Low \n", + "1201 Mid Wifi 4G Moderate \n", + "1202 Mid Mobile Data 3G Moderate \n", + "1203 Mid Wifi 4G Low \n", + "1204 Poor Mobile Data 3G Moderate \n", + "\n", + " education_College education_School education_University age_group \\\n", + "0 False False True 19-23 \n", + "1 False False True 19-23 \n", + "2 True False False 10-18 \n", + "3 False True False 10-18 \n", + "4 False True False 10-18 \n", + "... ... ... ... ... \n", + "1200 True False False 10-18 \n", + "1201 True False False 10-18 \n", + "1202 False True False 10-18 \n", + "1203 True False False 10-18 \n", + "1204 False True False 10-18 \n", + "\n", + " internet \n", + "0 Wifi_4G \n", + "1 Mobile Data_4G \n", + "2 Wifi_4G \n", + "3 Mobile Data_4G \n", + "4 Mobile Data_3G \n", + "... ... \n", + "1200 Wifi_4G \n", + "1201 Wifi_4G \n", + "1202 Mobile Data_3G \n", + "1203 Wifi_4G \n", + "1204 Mobile Data_3G \n", + "\n", + "[1205 rows x 15 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Создание нового признака 'internet'\n", + "df_encoded['internet'] = df_encoded['Internet Type'] + '_' + df_encoded['Network Type'] \n", + "df_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Institution TypeGenderAgeDeviceIT StudentLocationFinancial ConditionInternet TypeNetwork TypeFlexibility Leveleducation_Collegeeducation_Schooleducation_Universityage_groupinternetage_normalizedage_standardized
0PrivateMale23TabNoTownMidWifi4GModerateFalseFalseTrue19-23Wifi_4G0.7777781.018272
1PrivateFemale23MobileNoTownMidMobile Data4GModerateFalseFalseTrue19-23Mobile Data_4G0.7777781.018272
2PublicFemale18MobileNoTownMidWifi4GModerateTrueFalseFalse10-18Wifi_4G0.5000000.160338
3PrivateFemale11MobileNoTownMidMobile Data4GModerateFalseTrueFalse10-18Mobile Data_4G0.111111-1.040771
4PrivateFemale18MobileNoTownPoorMobile Data3GLowFalseTrueFalse10-18Mobile Data_3G0.5000000.160338
......................................................
1200PrivateFemale18MobileNoTownMidWifi4GLowTrueFalseFalse10-18Wifi_4G0.5000000.160338
1201PrivateFemale18MobileNoRuralMidWifi4GModerateTrueFalseFalse10-18Wifi_4G0.5000000.160338
1202PrivateMale11MobileNoTownMidMobile Data3GModerateFalseTrueFalse10-18Mobile Data_3G0.111111-1.040771
1203PrivateFemale18MobileNoRuralMidWifi4GLowTrueFalseFalse10-18Wifi_4G0.5000000.160338
1204PrivateFemale11MobileNoTownPoorMobile Data3GModerateFalseTrueFalse10-18Mobile Data_3G0.111111-1.040771
\n", + "

1205 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " Institution Type Gender Age Device IT Student Location \\\n", + "0 Private Male 23 Tab No Town \n", + "1 Private Female 23 Mobile No Town \n", + "2 Public Female 18 Mobile No Town \n", + "3 Private Female 11 Mobile No Town \n", + "4 Private Female 18 Mobile No Town \n", + "... ... ... ... ... ... ... \n", + "1200 Private Female 18 Mobile No Town \n", + "1201 Private Female 18 Mobile No Rural \n", + "1202 Private Male 11 Mobile No Town \n", + "1203 Private Female 18 Mobile No Rural \n", + "1204 Private Female 11 Mobile No Town \n", + "\n", + " Financial Condition Internet Type Network Type Flexibility Level \\\n", + "0 Mid Wifi 4G Moderate \n", + "1 Mid Mobile Data 4G Moderate \n", + "2 Mid Wifi 4G Moderate \n", + "3 Mid Mobile Data 4G Moderate \n", + "4 Poor Mobile Data 3G Low \n", + "... ... ... ... ... \n", + "1200 Mid Wifi 4G Low \n", + "1201 Mid Wifi 4G Moderate \n", + "1202 Mid Mobile Data 3G Moderate \n", + "1203 Mid Wifi 4G Low \n", + "1204 Poor Mobile Data 3G Moderate \n", + "\n", + " education_College education_School education_University age_group \\\n", + "0 False False True 19-23 \n", + "1 False False True 19-23 \n", + "2 True False False 10-18 \n", + "3 False True False 10-18 \n", + "4 False True False 10-18 \n", + "... ... ... ... ... \n", + "1200 True False False 10-18 \n", + "1201 True False False 10-18 \n", + "1202 False True False 10-18 \n", + "1203 True False False 10-18 \n", + "1204 False True False 10-18 \n", + "\n", + " internet age_normalized age_standardized \n", + "0 Wifi_4G 0.777778 1.018272 \n", + "1 Mobile Data_4G 0.777778 1.018272 \n", + "2 Wifi_4G 0.500000 0.160338 \n", + "3 Mobile Data_4G 0.111111 -1.040771 \n", + "4 Mobile Data_3G 0.500000 0.160338 \n", + "... ... ... ... \n", + "1200 Wifi_4G 0.500000 0.160338 \n", + "1201 Wifi_4G 0.500000 0.160338 \n", + "1202 Mobile Data_3G 0.111111 -1.040771 \n", + "1203 Wifi_4G 0.500000 0.160338 \n", + "1204 Mobile Data_3G 0.111111 -1.040771 \n", + "\n", + "[1205 rows x 17 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "\n", + "# Создаем экземпляры масштабировщиков\n", + "minmax_scaler = MinMaxScaler()\n", + "standard_scaler = StandardScaler()\n", + "\n", + "# Нормировка\n", + "df_encoded['age_normalized'] = minmax_scaler.fit_transform(df_encoded[['Age']])\n", + "\n", + "# Стандартизация\n", + "df_encoded['age_standardized'] = standard_scaler.fit_transform(df_encoded[['Age']])\n", + "\n", + "df_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Institution TypeGenderAgeDeviceIT StudentLocationFinancial ConditionInternet TypeNetwork TypeFlexibility Leveleducation_Collegeeducation_Schooleducation_Universityage_groupinternetage_normalizedage_standardized
001232010122001150.7777781.018272
100231010022001120.7777781.018272
210181010122100050.5000000.160338
300111010022010020.111111-1.040771
400181011011010010.5000000.160338
......................................................
120000181010121100050.5000000.160338
120100181000122100050.5000000.160338
120201111010012010010.111111-1.040771
120300181000121100050.5000000.160338
120400111011012010010.111111-1.040771
\n", + "

1205 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " Institution Type Gender Age Device IT Student Location \\\n", + "0 0 1 23 2 0 1 \n", + "1 0 0 23 1 0 1 \n", + "2 1 0 18 1 0 1 \n", + "3 0 0 11 1 0 1 \n", + "4 0 0 18 1 0 1 \n", + "... ... ... ... ... ... ... \n", + "1200 0 0 18 1 0 1 \n", + "1201 0 0 18 1 0 0 \n", + "1202 0 1 11 1 0 1 \n", + "1203 0 0 18 1 0 0 \n", + "1204 0 0 11 1 0 1 \n", + "\n", + " Financial Condition Internet Type Network Type Flexibility Level \\\n", + "0 0 1 2 2 \n", + "1 0 0 2 2 \n", + "2 0 1 2 2 \n", + "3 0 0 2 2 \n", + "4 1 0 1 1 \n", + "... ... ... ... ... \n", + "1200 0 1 2 1 \n", + "1201 0 1 2 2 \n", + "1202 0 0 1 2 \n", + "1203 0 1 2 1 \n", + "1204 1 0 1 2 \n", + "\n", + " education_College education_School education_University age_group \\\n", + "0 0 0 1 1 \n", + "1 0 0 1 1 \n", + "2 1 0 0 0 \n", + "3 0 1 0 0 \n", + "4 0 1 0 0 \n", + "... ... ... ... ... \n", + "1200 1 0 0 0 \n", + "1201 1 0 0 0 \n", + "1202 0 1 0 0 \n", + "1203 1 0 0 0 \n", + "1204 0 1 0 0 \n", + "\n", + " internet age_normalized age_standardized \n", + "0 5 0.777778 1.018272 \n", + "1 2 0.777778 1.018272 \n", + "2 5 0.500000 0.160338 \n", + "3 2 0.111111 -1.040771 \n", + "4 1 0.500000 0.160338 \n", + "... ... ... ... \n", + "1200 5 0.500000 0.160338 \n", + "1201 5 0.500000 0.160338 \n", + "1202 1 0.111111 -1.040771 \n", + "1203 5 0.500000 0.160338 \n", + "1204 1 0.111111 -1.040771 \n", + "\n", + "[1205 rows x 17 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "# Преобразование категориальных переменных в числовые \n", + "label_encoder = LabelEncoder()\n", + "df_encoded['education_College'] = label_encoder.fit_transform(df_encoded['education_College'])\n", + "df_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Предсказательная способность': np.float64(0.941908713692946),\n", + " 'Скорость вычисления (с):': 1.1279711723327637,\n", + " 'Надежность': np.float64(0.005626647816237885),\n", + " 'Корреляция': array([0.01320147, 0.01701565, 0.11855808, 0.07147731, 0.00064522,\n", + " 0.01339934, 0.0080009 , 0.02973182, 0.00393624, 0. ,\n", + " 0.09713269, 0.14855715, 0.12622933, 0.03761528, 0.13008942,\n", + " 0.14516678]),\n", + " 'Цельность (%)': Education Level 0.0\n", + " Institution Type 0.0\n", + " Gender 0.0\n", + " Age 0.0\n", + " Device 0.0\n", + " IT Student 0.0\n", + " Location 0.0\n", + " Financial Condition 0.0\n", + " Internet Type 0.0\n", + " Network Type 0.0\n", + " Flexibility Level 0.0\n", + " age_group 0.0\n", + " age_normalized 0.0\n", + " age_standardized 0.0\n", + " dtype: float64}" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from time import time\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.feature_selection import mutual_info_classif\n", + "\n", + "\n", + "# Определение данных\n", + "X = df_encoded.drop('IT Student', axis=1) # Набор признаков\n", + "y = df_encoded['IT Student'] # Целевая переменная\n", + "\n", + "# Предсказательная способность\n", + "start_time = time()\n", + "model = RandomForestClassifier() \n", + "scores = cross_val_score(model, X, y, cv=5)\n", + "end_time = time()\n", + "\n", + "# Надежность \n", + "bootstrap_scores = []\n", + "for _ in range(100):\n", + " sample = df_encoded.sample(frac=1, replace=True)\n", + " sample_X = sample.drop('IT Student', axis=1)\n", + " sample_y = sample['IT Student']\n", + " bootstrap_score = accuracy_score(sample_y, model.fit(sample_X, sample_y).predict(sample_X))\n", + " bootstrap_scores.append(bootstrap_score)\n", + "\n", + "# Корреляция\n", + "correlations = mutual_info_classif(X, y, discrete_features='auto')\n", + "\n", + "# Цельность\n", + "null_percent = df.isnull().mean() * 100\n", + "\n", + "# Сборка всех метрик\n", + "quality_metrics = {\n", + " 'Предсказательная способность': scores.mean(),\n", + " 'Скорость вычисления (с):': end_time - start_time,\n", + " 'Надежность': np.std(bootstrap_scores),\n", + " 'Корреляция': correlations,\n", + " 'Цельность (%)': null_percent\n", + "}\n", + "\n", + "quality_metrics\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}