MAI/LabWork01/LabWork6/ConvertorDataFrame.py

45 lines
1.7 KiB
Python
Raw Normal View History

2023-11-22 22:49:45 +04:00
import pandas as pd
def covertorDataFrame():
df = pd.read_csv("../../res/Stores.csv")
2023-11-22 23:40:21 +04:00
# кол-во строчек для считывания
countMainRows = 35
2023-11-22 22:49:45 +04:00
2023-11-22 23:40:21 +04:00
# получаем указанное кол-во строчек
mainDF = df.head(countMainRows)
2023-11-22 22:49:45 +04:00
2023-11-22 23:40:21 +04:00
mainDF['TextStoreArea'] = mainDF['Store_Area'].apply(
2023-11-22 22:49:45 +04:00
lambda x: 'Small_Area' if x <= 1100 else ('Average_Area' if 1100 < x <= 1700 else 'Big_Area'))
2023-11-22 23:40:21 +04:00
mainDF['TextStoreSales'] = mainDF['Store_Sales'].apply(
2023-11-22 22:49:45 +04:00
lambda x: 'Small_Sales' if x <= 50000 else ('Average_Sales' if 50000 < x <= 80000 else 'Big_Sales'))
2023-11-22 23:40:21 +04:00
mainDF['TextDailyCustomerCount'] = mainDF['Daily_Customer_Count'].apply(
2023-11-22 22:49:45 +04:00
lambda x: 'Small_Customer' if x <= 400 else ('Average_Customer' if 400 < x <= 900 else 'Big_Customer'))
# using dictionary to convert specific columns
convert_dict = {'Store_ID': str,
'Store_Area': str,
'Items_Available': str,
'Daily_Customer_Count': str,
'Store_Sales': str
}
2023-11-22 23:40:21 +04:00
mainDF = mainDF.astype(convert_dict)
2023-11-22 22:49:45 +04:00
2023-11-22 23:40:21 +04:00
# генеральная выборка
newDfGeneral = mainDF.iloc[0:25]
2023-11-22 22:49:45 +04:00
2023-11-22 23:40:21 +04:00
# выборка для проверки
newDfSupport = mainDF.iloc[25:35]
print(newDfSupport[['TextStoreSales', 'TextStoreSales', 'TextStoreArea']])
return [newDfGeneral[['TextDailyCustomerCount', 'TextStoreArea', 'TextStoreSales']],
newDfSupport[['TextDailyCustomerCount', 'TextStoreArea', 'TextStoreSales']]]
2023-11-22 22:49:45 +04:00
# [['Store_Area', 'Store_Sales', 'Daily_Customer_Count', 'TextStoreArea']]
# [['Store_ID', 'Store_Area', 'TextStoreArea', 'Items_Available', 'Daily_Customer_Count', 'Store_Sales']]