From 6a292ad36495d10258fda288fd0ee94f8f80bd78 Mon Sep 17 00:00:00 2001 From: "annalyovushkina@yandex.ru" Date: Fri, 29 Nov 2024 03:01:55 +0400 Subject: [PATCH] =?UTF-8?q?=D1=84=D0=B8=D0=BD=D0=B0=D0=BB=20=D1=82=D0=BE?= =?UTF-8?q?=D1=87=D0=BD=D0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lab_4/__pycache__/utils.cpython-312.pyc | Bin 2826 -> 0 bytes lab_4/utils.py | 79 ------------------------ 2 files changed, 79 deletions(-) delete mode 100644 lab_4/__pycache__/utils.cpython-312.pyc delete mode 100644 lab_4/utils.py diff --git a/lab_4/__pycache__/utils.cpython-312.pyc b/lab_4/__pycache__/utils.cpython-312.pyc deleted file mode 100644 index 8d965a85f693d9f63443297b54e49ad0ec6741b7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2826 zcma)8O>7fK6yEjP>$M&KBnX00RHg;e7#RZv8k!nV3IxKRfbi2sk*tk(94}e#nw@om zqqP*F9wKqct*Jn2r1p>s4v|__tw1VLFO_<+>7mvNq)ME63nC{@eKX#59Jny{?##}- z_vXFteQ$REiblg6w8YK9sb669l0JAX>_7AT41BF}}|KP9LHFpvEkzk-+qdj+2&ejOMOR(&byKe|zhcLLWcWes0~QRtLx z$s?v*!tju=Oj*@)7A97Xlr`0IL=#&TQ_qW3(QQHK1-M>^ebigqXA5)iX#441;im^V z&g`~$cwkNOQ3=~$5az(jLwEizZ}T>H3oN|dW_UB!p)J__TY6kgv^krf;%RiXpcU{e z*rFY@g9>l^<^W+k@W}trU7PTOq*M#l!nKGkD1qr6;NumGM#7ri_N!w~@_g->3wd)i z*69I`BffJS#3ET<4!K(_&#qy&d+7Fd$PT}!WTYUD0j3l=y4PF;BIxRfHDpKN*Y!@6 z(ORq)uO&Kse&5)low0T3$2#XbW45Dq%nsXeJ3)O-iE=-{97fQ_D0v8y(PddzWP%hR zS%H!ask&t#t7u}3NM({>kQ!jDOQRWCOCyj+lz%B^r(kHBF{|oR2(#%94V zO9aF`*C<*RjS@mJE4ogHZBPAY+tr#F8zV2dJVnx@71$jR2#oY;laP zjp`tx=W*N64N7uq3h*q;%mZ40Z=Lf4DVJ7P*>S&#Q_K_)is_^gj37GJGh~O_Tb=TX zoMuwD04T;|fYhybb||#snQ9$~YbWMXUS$)sr;v6pq!>syEF>!ms+3(M=63CYY9*}& zk1SBVo066i9LXhH)d{uUWa=Dtx;haw>1!t?I$?&+Y11@JC)naIG3hJA5j3njzOrJ% ziN>Y69m%UGC*fK5%EAe6kkXMnCOV;$VVXdMv- zbfU*QRdQ_;j@W|T)gqP?sM0jFbcjW|VGBy@P3=uKKQwV2jzuyd>u_>T$ZBa~c=TfK z^5~?p@6f^Ag{m=gofM~4T^`O1Pe7gJ4o_4pRU?^l)!EH>51jK3PB>F6qvb5!oP8&l z2zHj>BIn@EKF_T|C-p3zZ3O$4s*T{l%E@QJ{m*0Dn_Yv=cyj6J@>liP{#ODYJ@8r( z;^8$;2!+?eoV0!E&ObZ%H{j32>&fHwFRnI{V}Hl4)un6yN{RX5rj%%gl67gvE5R2M z=ZDwB+}@$46k0gAczbzlWvH>|$kT~+!M9gz`1|M2E=(=XE{kgtCnXjS-mN{4_beX0 zSNlO&KKp(2XJPfsPhAa^eHPEwrEEQ(ZN_^a^nBO%ZQuP5R(e-Ie)7rh+268{kN^2; zJ$|JwU0D;k9lMr`tI0q{f+39c)?aOd8!2Pc=M`xhUbTooUlZ}bg5+4bwx<8-5MEpgRtxn=w(ifq21vP7z}yPm)FIYho^S_$oN&vhe&SL}azv|IhDtN(H<=@r;V_Vi z8G{E#UZ`lsu&eHcIXMRY Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]: - """ - Splits a Pandas dataframe into three subsets (train, val, and test) - following fractional ratios provided by the user, where each subset is - stratified by the values in a specific column (that is, each subset has - the same relative frequency of the values in the column). It performs this - splitting by running train_test_split() twice. - - Parameters - ---------- - df_input : Pandas dataframe - Input dataframe to be split. - stratify_colname : str - The name of the column that will be used for stratification. Usually - this column would be for the label. - frac_train : float - frac_val : float - frac_test : float - The ratios with which the dataframe will be split into train, val, and - test data. The values should be expressed as float fractions and should - sum to 1.0. - random_state : int, None, or RandomStateInstance - Value to be passed to train_test_split(). - - Returns - ------- - df_train, df_val, df_test : - Dataframes containing the three splits. - """ - - if frac_train + frac_val + frac_test != 1.0: - raise ValueError( - "fractions %f, %f, %f do not add up to 1.0" - % (frac_train, frac_val, frac_test) - ) - - if stratify_colname not in df_input.columns: - raise ValueError("%s is not a column in the dataframe" % (stratify_colname)) - - X = df_input # Contains all columns. - y = df_input[ - [stratify_colname] - ] # Dataframe of just the column on which to stratify. - - # Split original dataframe into train and temp dataframes. - df_train, df_temp, y_train, y_temp = train_test_split( - X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state - ) - - if frac_val <= 0: - assert len(df_input) == len(df_train) + len(df_temp) - return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp - - # Split the temp dataframe into val and test dataframes. - relative_frac_test = frac_test / (frac_val + frac_test) - df_val, df_test, y_val, y_test = train_test_split( - df_temp, - y_temp, - stratify=y_temp, - test_size=relative_frac_test, - random_state=random_state, - ) - - assert len(df_input) == len(df_train) + len(df_val) + len(df_test) - return df_train, df_val, df_test, y_train, y_val, y_test