Compare commits
123 Commits
alexandrov
...
podkorytov
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a492e2a6df | ||
| b26c54a7e4 | |||
| 9e6286a3a4 | |||
| 4a6bb8139e | |||
| 8b9050cce3 | |||
| 3e08abf42b | |||
| c6d41e1157 | |||
| 6a9310501a | |||
| bed476a27b | |||
| 2607c0dbfd | |||
| be253bf939 | |||
| 9ab1a0f1ca | |||
| 8bd93ee83e | |||
| 1fddfd2362 | |||
| 994129b8a9 | |||
| 79b5e5bb12 | |||
| 08aa85abbc | |||
| de50a5f08d | |||
| c37eca50a6 | |||
| 2906d3886f | |||
| e034d93062 | |||
| d19941c6ec | |||
| 2a51665e61 | |||
| 879a1c5730 | |||
|
|
78bec04c10 | ||
| c212c98a90 | |||
|
|
25acce2c79 | ||
|
|
db918284b5 | ||
| 71cad406c2 | |||
| a076fd78ae | |||
| 124f682c8b | |||
| 8834f99ecf | |||
| dd0d45ef93 | |||
| c7060e6719 | |||
|
|
23bc64c816 | ||
|
|
be1b6a74ae | ||
|
|
32821e551a | ||
|
|
231aa0d062 | ||
|
|
10799cb639 | ||
|
|
0f61b37f8b | ||
|
|
3a68c16a44 | ||
|
|
481361b7e0 | ||
| 0c414d7ab4 | |||
| d61b7c24f2 | |||
| b5fa7754bb | |||
| d575910860 | |||
| 5894881f24 | |||
| 92ec657bcd | |||
| 346241253f | |||
|
|
ed5c549a0b | ||
| 65b47c7d0e | |||
| f7af263316 | |||
| c45de91019 | |||
| 4fad5585c1 | |||
| c9d485daca | |||
|
|
1638a80b4a | ||
| 6a9602359c | |||
| cee99b90a5 | |||
| bb7b8e6ac0 | |||
| 18ea7ee729 | |||
| 200d8dee7e | |||
| 4e1980e638 | |||
| a43eb72079 | |||
|
|
464b437c69 | ||
| 0b422e70f9 | |||
| b0accdaf06 | |||
|
|
716e7b7ee6 | ||
| 145b7336b8 | |||
| bea977d84c | |||
|
|
1e03e8b1d2 | ||
| ad5ed23a4c | |||
|
|
1e1a73de10 | ||
|
|
226dd4efe9 | ||
|
|
c0217ad0d3 | ||
|
|
caab9f2f8b | ||
|
|
d2580ffa9e | ||
| a98d914e7c | |||
| a4985e4d76 | |||
| 3bb04b059b | |||
| a9e1145b0e | |||
| f44ba0d0a2 | |||
| ccf3bfb561 | |||
| 4f349a1d49 | |||
| f8075403a3 | |||
| c20695af79 | |||
| 33dba33cc4 | |||
| 41e0e8598f | |||
| 53a25975f9 | |||
| 5e00a83340 | |||
| 2239c15572 | |||
| 07333219ed | |||
| 5891b16f9d | |||
| 81874f0f84 | |||
| ce6105bee6 | |||
| ca3b734361 | |||
| 2f1d67dc8f | |||
| b9ec1fd145 | |||
| f84f7abaa9 | |||
| 5445cef67d | |||
| b967af636c | |||
| ad60c6221e | |||
|
|
8942f824d5 | ||
| 106e02f76b | |||
|
|
abd650a641 | ||
| 15936c6996 | |||
|
|
c03b5e3a94 | ||
| 16db685d3d | |||
| 84fe84a15a | |||
| 406315ddf7 | |||
| d592186245 | |||
| 1f70bc7eb8 | |||
| 7ccd400417 | |||
|
|
c15ab42cd4 | ||
| 5eb35fe26d | |||
|
|
ef485bf514 | ||
|
|
3a868e5545 | ||
| fc2fe74052 | |||
| 35826f2461 | |||
| 7781a379c3 | |||
| adca415462 | |||
| 72507eb3af | |||
| 516c7aea4f | |||
|
|
f11ba4d365 |
6
.idea/IIS_2023_1.iml
generated
@@ -1,8 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.8 (venv)" jdkType="Python SDK" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.9 (PyCharmProjects)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
7
.idea/discord.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DiscordProjectSettings">
|
||||
<option name="show" value="ASK" />
|
||||
<option name="description" value="" />
|
||||
</component>
|
||||
</project>
|
||||
8
.idea/misc.xml
generated
@@ -1,4 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (venv)" project-jdk-type="Python SDK" />
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.9 (PyCharmProjects)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (PyCharmProjects)" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
72
.idea/workspace.xml
generated
@@ -1,7 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="AutoImportSettings">
|
||||
<option name="autoReloadType" value="SELECTIVE" />
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="0ceb130e-88da-4a20-aad6-17f5ab4226ac" name="Changes" comment="" />
|
||||
<list default="true" id="0ceb130e-88da-4a20-aad6-17f5ab4226ac" name="Changes" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
@@ -15,30 +22,50 @@
|
||||
</option>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_BRANCH_BY_REPOSITORY">
|
||||
<map>
|
||||
<entry key="$PROJECT_DIR$" value="main" />
|
||||
</map>
|
||||
</option>
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="MarkdownSettingsMigration">
|
||||
<option name="stateVersion" value="1" />
|
||||
</component>
|
||||
<component name="ProjectColorInfo">{
|
||||
"associatedIndex": 2
|
||||
}</component>
|
||||
<component name="ProjectId" id="2VlZqWiOX68aCf0o2y0AtYJWURS" />
|
||||
<component name="ProjectLevelVcsManager">
|
||||
<ConfirmationsSetting value="1" id="Add" />
|
||||
</component>
|
||||
<component name="ProjectViewState">
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent"><![CDATA[{
|
||||
"keyToString": {
|
||||
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"last_opened_file_path": "D:/ulstukek/Course4/IIS/labs"
|
||||
<component name="PropertiesComponent">{
|
||||
"keyToString": {
|
||||
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"WebServerToolWindowFactoryState": "false",
|
||||
"git-widget-placeholder": "senkin__alexander__lab__1",
|
||||
"last_opened_file_path": "D:/ulstukek/Course4/IIS/labs",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "reference.settings.ide.settings.new.ui",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
}
|
||||
}]]></component>
|
||||
}</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="D:\ulstukek\Course4\IIS\IISLabs\IIS_2023_1\zavrazhnova_svetlana_lab_3" />
|
||||
<recent name="D:\ulstukek\Course4\IIS\IISLabs\IIS_2023_1\zavrazhnova_svetlana_lab_1" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.zavrazhnova_svetlana_lab3_2">
|
||||
<component name="RunManager">
|
||||
<configuration name="zavrazhnova_svetlana_lab3_2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="IIS_2023_1" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
@@ -51,6 +78,7 @@
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/zavrazhnova_svetlana_lab_3/zavrazhnova_svetlana_lab3_2.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
@@ -72,6 +100,7 @@
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/zavrazhnova_svetlana_lab_2/zavrazhnova_svetlana_lab_2.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
@@ -93,6 +122,7 @@
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/zavrazhnova_svetlana_lab_3/zavrazhnova_svetlana_lab_3_1.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
@@ -104,9 +134,11 @@
|
||||
</configuration>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab_3_1" />
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab_2" />
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
|
||||
<item itemvalue="Python.zavrazhnova_svetlana_lab_3_1" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
@@ -118,18 +150,38 @@
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1695412818437</updated>
|
||||
<workItem from="1697735437405" duration="1706000" />
|
||||
<workItem from="1697740229646" duration="3802000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
<component name="Vcs.Log.Tabs.Properties">
|
||||
<option name="TAB_STATES">
|
||||
<map>
|
||||
<entry key="MAIN">
|
||||
<value>
|
||||
<State />
|
||||
<State>
|
||||
<option name="FILTERS">
|
||||
<map>
|
||||
<entry key="branch">
|
||||
<value>
|
||||
<list>
|
||||
<option value="HEAD" />
|
||||
</list>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</State>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||
<SUITE FILE_PATH="coverage/PyCharmProjects$senkin_alexander_lab_1.coverage" NAME="senkin_alexander_lab_1 Coverage Results" MODIFIED="1697744262965" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/senkin_alexander_lab_1" />
|
||||
</component>
|
||||
</project>
|
||||
47
abanin_daniil_lab_1/README.md
Normal file
@@ -0,0 +1,47 @@
|
||||
## Лабораторная работа №1
|
||||
|
||||
### Работа с типовыми наборами данных и различными моделями
|
||||
|
||||
### ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, numpy, matplotlib, sklearn
|
||||
* запустить проект (стартовая точка класс lab1)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`,
|
||||
* Библиотеки numpy, matplotlib, sklearn
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
|
||||
* Программа гененерирует данные с make_moonsmake_moons (noise=0.3, random_state=rs)
|
||||
* Сравнивает три типа моделей: инейная, полиномиальная, гребневая полиномиальная регрессии
|
||||
|
||||
### Примеры работы:
|
||||
|
||||
#### Результаты:
|
||||
MAE - средняя абсолютная ошибка, измеряет среднюю абсолютную разницу между прогнозируемыми значениями модели и фактическими значениями целевой переменной
|
||||
MSE - средняя квадратическая ошибка, измеряет среднюю квадратичную разницу между прогнозируемыми значениями модели и фактическими значениями целевой переменной
|
||||
|
||||
Чем меньше значения показателей, тем лучше модель справляется с предсказанием
|
||||
|
||||
Линейная регрессия
|
||||
MAE 0.2959889435199454
|
||||
MSE 0.13997968555679302
|
||||
|
||||
Полиномиальная регрессия
|
||||
MAE 0.21662135861071705
|
||||
MSE 0.08198825629271855
|
||||
|
||||
Гребневая полиномиальная регрессия
|
||||
MAE 0.2102788716636562
|
||||
MSE 0.07440133949387796
|
||||
|
||||
Лучший результат показала модель **Гребневая полиномиальная регрессия**
|
||||
|
||||

|
||||

|
||||

|
||||
BIN
abanin_daniil_lab_1/greb_reg.jpg
Normal file
|
After Width: | Height: | Size: 59 KiB |
66
abanin_daniil_lab_1/lab1.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.colors import ListedColormap
|
||||
from sklearn.linear_model import LinearRegression, Ridge
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.datasets import make_moons
|
||||
from sklearn import metrics
|
||||
|
||||
cm_bright = ListedColormap(['#8B0000', '#FF0000'])
|
||||
cm_bright1 = ListedColormap(['#FF4500', '#FFA500'])
|
||||
|
||||
|
||||
def create_moons():
|
||||
x, y = make_moons(noise=0.3, random_state=0)
|
||||
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.4, random_state=42)
|
||||
|
||||
linear_regretion(X_train, X_test, y_train, y_test)
|
||||
polynomial_regretion(X_train, X_test, y_train, y_test)
|
||||
ridge_regretion(X_train, X_test, y_train, y_test)
|
||||
|
||||
|
||||
def linear_regretion(x_train, x_test, y_train, y_test):
|
||||
model = LinearRegression().fit(x_train, y_train)
|
||||
y_predict = model.intercept_ + model.coef_ * x_test
|
||||
plt.title('Линейная регрессия')
|
||||
print('Линейная регрессия')
|
||||
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright1, alpha=0.7)
|
||||
plt.plot(x_test, y_predict, color='red')
|
||||
print('MAE', metrics.mean_absolute_error(y_test, y_predict[:, 1]))
|
||||
print('MSE', metrics.mean_squared_error(y_test, y_predict[:, 1]))
|
||||
plt.show()
|
||||
|
||||
|
||||
def polynomial_regretion(x_train, x_test, y_train, y_test):
|
||||
polynomial_features = PolynomialFeatures(degree=3)
|
||||
X_polynomial = polynomial_features.fit_transform(x_train, y_train)
|
||||
base_model = LinearRegression()
|
||||
base_model.fit(X_polynomial, y_train)
|
||||
y_predict = base_model.predict(X_polynomial)
|
||||
plt.title('Полиномиальная регрессия')
|
||||
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright1, alpha=0.7)
|
||||
plt.plot(x_train, y_predict, color='blue')
|
||||
plt.show()
|
||||
print('Полиномиальная регрессия')
|
||||
print('MAE', metrics.mean_absolute_error(y_train, y_predict))
|
||||
print('MSE', metrics.mean_squared_error(y_train, y_predict))
|
||||
|
||||
|
||||
def ridge_regretion(X_train, X_test, y_train, y_test):
|
||||
model = Pipeline([('poly', PolynomialFeatures(degree=3)), ('ridge', Ridge(alpha=1.0))])
|
||||
model.fit(X_train, y_train)
|
||||
y_predict = model.predict(X_test)
|
||||
plt.title('Гребневая полиномиальная регрессия')
|
||||
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
|
||||
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright1, alpha=0.7)
|
||||
plt.plot(X_test, y_predict, color='blue')
|
||||
plt.show()
|
||||
print('Гребневая полиномиальная регрессия')
|
||||
print('MAE', metrics.mean_absolute_error(y_test, y_predict))
|
||||
print('MSE', metrics.mean_squared_error(y_test, y_predict))
|
||||
|
||||
|
||||
create_moons()
|
||||
BIN
abanin_daniil_lab_1/lin_reg.jpg
Normal file
|
After Width: | Height: | Size: 30 KiB |
BIN
abanin_daniil_lab_1/pol_reg.jpg
Normal file
|
After Width: | Height: | Size: 63 KiB |
41
abanin_daniil_lab_2/README.md
Normal file
@@ -0,0 +1,41 @@
|
||||
## Лабораторная работа №2
|
||||
|
||||
### Ранжирование признаков
|
||||
|
||||
## ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, numpy, matplotlib, sklearn
|
||||
* запустить проект (стартовая точка lab2)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`, библиотеки numpy, matplotlib, sklearn
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
|
||||
* Генерирует данные и обучает такие модели, как: LinearRegression, RandomizedLasso, Recursive Feature Elimination (RFE)
|
||||
* Производиться ранжирование признаков с помощью моделей LinearRegression, RandomizedLasso, Recursive Feature Elimination (RFE)
|
||||
* Отображение получившихся результатов: 4 самых важных признака по среднему значению, значения признаков для каждой модели
|
||||
|
||||
|
||||
### 4 самых важных признака по среднему значению
|
||||
* Параметр - x4, значение - 0.56
|
||||
* Параметр - x1, значение - 0.45
|
||||
* Параметр - x2, значение - 0.33
|
||||
* Параметр - x9, значение - 0.33
|
||||
|
||||
####Linear Regression
|
||||
[('x1', 1.0), ('x4', 0.69), ('x2', 0.61), ('x11', 0.59), ('x3', 0.51), ('x13', 0.48), ('x5', 0.19), ('x12', 0.19), ('x14', 0.12), ('x8', 0.03), ('x6', 0.02), ('x10', 0.01), ('x7', 0.0), ('x9', 0.0)]
|
||||
|
||||
####Recursive Feature Elimination
|
||||
[('x9', 1.0), ('x7', 0.86), ('x10', 0.71), ('x6', 0.57), ('x8', 0.43), ('x14', 0.29), ('x12', 0.14), ('x1', 0.0), ('x2', 0.0), ('x3', 0.0), ('x4', 0.0), ('x5', 0.0), ('x11', 0.0), ('x13', 0.0)]
|
||||
|
||||
####Randomize Lasso
|
||||
[('x4', 1.0), ('x2', 0.37), ('x1', 0.36), ('x5', 0.32), ('x6', 0.02), ('x8', 0.02), ('x3', 0.01), ('x7', 0.0), ('x9', 0.0), ('x10', 0.0), ('x11', 0.0), ('x12', 0.0), ('x13', 0.0), ('x14', 0.0)]
|
||||
|
||||
#### Результаты:
|
||||
|
||||

|
||||
76
abanin_daniil_lab_2/RadomizedLasso.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from sklearn.utils import check_X_y, check_random_state
|
||||
from sklearn.linear_model import Lasso
|
||||
from scipy.sparse import issparse
|
||||
from scipy import sparse
|
||||
|
||||
|
||||
def _rescale_data(x, weights):
|
||||
if issparse(x):
|
||||
size = weights.shape[0]
|
||||
weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
|
||||
x_rescaled = x * weight_dia
|
||||
else:
|
||||
x_rescaled = x * (1 - weights)
|
||||
|
||||
return x_rescaled
|
||||
|
||||
|
||||
class RandomizedLasso(Lasso):
|
||||
"""
|
||||
Randomized version of scikit-learns Lasso class.
|
||||
|
||||
Randomized LASSO is a generalization of the LASSO. The LASSO penalises
|
||||
the absolute value of the coefficients with a penalty term proportional
|
||||
to `alpha`, but the randomized LASSO changes the penalty to a randomly
|
||||
chosen value in the range `[alpha, alpha/weakness]`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
weakness : float
|
||||
Weakness value for randomized LASSO. Must be in (0, 1].
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.linear_model.LogisticRegression : learns logistic regression models
|
||||
using the same algorithm.
|
||||
"""
|
||||
def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True,
|
||||
precompute=False, copy_X=True, max_iter=1000,
|
||||
tol=1e-4, warm_start=False, positive=False,
|
||||
random_state=None, selection='cyclic'):
|
||||
self.weakness = weakness
|
||||
super(RandomizedLasso, self).__init__(
|
||||
alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, copy_X=copy_X,
|
||||
max_iter=max_iter, tol=tol, warm_start=warm_start,
|
||||
positive=positive, random_state=random_state,
|
||||
selection=selection)
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||||
The training input samples.
|
||||
|
||||
y : array-like, shape = [n_samples]
|
||||
The target values.
|
||||
"""
|
||||
if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
|
||||
raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)
|
||||
|
||||
X, y = check_X_y(X, y, accept_sparse=True)
|
||||
|
||||
n_features = X.shape[1]
|
||||
weakness = 1. - self.weakness
|
||||
random_state = check_random_state(self.random_state)
|
||||
|
||||
weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))
|
||||
|
||||
# TODO: I am afraid this will do double normalization if set to true
|
||||
#X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,
|
||||
# sample_weight=None, return_mean=False)
|
||||
|
||||
# TODO: Check if this is a problem if it happens before standardization
|
||||
X_rescaled = _rescale_data(X, weights)
|
||||
return super(RandomizedLasso, self).fit(X_rescaled, y)
|
||||
BIN
abanin_daniil_lab_2/__pycache__/RadomizedLasso.cpython-39.pyc
Normal file
81
abanin_daniil_lab_2/lab2.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from RadomizedLasso import RandomizedLasso
|
||||
from sklearn.feature_selection import RFE
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import numpy as np
|
||||
|
||||
names = ["x%s" % i for i in range(1, 15)]
|
||||
|
||||
|
||||
def start_point():
|
||||
X,Y = generation_data()
|
||||
# Линейная модель
|
||||
lr = LinearRegression()
|
||||
lr.fit(X, Y)
|
||||
# Рекурсивное сокращение признаков
|
||||
rfe = RFE(lr)
|
||||
rfe.fit(X, Y)
|
||||
# Случайное Лассо
|
||||
randomized_lasso = RandomizedLasso(alpha=.01)
|
||||
randomized_lasso.fit(X, Y)
|
||||
|
||||
ranks = {"Linear Regression": rank_to_dict(lr.coef_), "Recursive Feature Elimination": rank_to_dict(rfe.ranking_),
|
||||
"Randomize Lasso": rank_to_dict(randomized_lasso.coef_)}
|
||||
|
||||
get_estimation(ranks)
|
||||
print_sorted_data(ranks)
|
||||
|
||||
|
||||
def generation_data():
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
return X, Y
|
||||
|
||||
|
||||
def rank_to_dict(ranks):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
return dict(zip(names, ranks))
|
||||
|
||||
|
||||
def get_estimation(ranks: {}):
|
||||
mean = {}
|
||||
#«Бежим» по списку ranks
|
||||
for key, value in ranks.items():
|
||||
for item in value.items():
|
||||
if(item[0] not in mean):
|
||||
mean[item[0]] = 0
|
||||
mean[item[0]] += item[1]
|
||||
|
||||
for key, value in mean.items():
|
||||
res = value/len(ranks)
|
||||
mean[key] = round(res, 2)
|
||||
|
||||
mean_sorted = sorted(mean.items(), key=lambda item: item[1], reverse=True)
|
||||
print("Средние значения")
|
||||
print(mean_sorted)
|
||||
|
||||
|
||||
print("4 самых важных признака по среднему значению")
|
||||
for item in mean_sorted[:4]:
|
||||
print('Параметр - {0}, значение - {1}'.format(item[0], item[1]))
|
||||
|
||||
|
||||
|
||||
def print_sorted_data(ranks: {}):
|
||||
print()
|
||||
for key, value in ranks.items():
|
||||
ranks[key] = sorted(value.items(), key=lambda item: item[1], reverse=True)
|
||||
for key, value in ranks.items():
|
||||
print(key)
|
||||
print(value)
|
||||
|
||||
|
||||
start_point()
|
||||
BIN
abanin_daniil_lab_2/result.png
Normal file
|
After Width: | Height: | Size: 178 KiB |
27
abanin_daniil_lab_3/README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
## Лабораторная работа №3
|
||||
|
||||
### Деревья решений
|
||||
|
||||
## Cтудент группы ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, numpy, matplotlib, sklearn
|
||||
* запустить проект (lab3)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`, библиотеки numpy, matplotlib, sklearn
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
|
||||
* Выполняет ранжирование признаков для регрессионной модели
|
||||
* По данным "Eligibility Prediction for Loan" решает задачу классификации (с помощью дерева решений), в которой необходимо выявить риски выдачи кредита и определить его статус (выдан или отказ). В качестве исходных данных используются три признака: Credit_History - соответствие кредитной истории стандартам банка, ApplicantIncome - доход заявителя, LoanAmount - сумма кредита.
|
||||
|
||||
### Примеры работы:
|
||||
|
||||
#### Результаты:
|
||||
* Наиболее важным параметром при выдачи кредита оказался доход заявителя - ApplicantIncome, затем LoanAmount - сумма выдаваемого кредита
|
||||
|
||||

|
||||
33
abanin_daniil_lab_3/lab3.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
FILE_PATH = "loan.csv"
|
||||
REQUIRED_COLUMNS = ['Credit_History', 'LoanAmount', 'ApplicantIncome']
|
||||
TARGET_COLUMN = 'Loan_Status'
|
||||
|
||||
|
||||
def print_classifier_info(feature_importance):
|
||||
feature_names = REQUIRED_COLUMNS
|
||||
embarked_score = feature_importance[-3:].sum()
|
||||
scores = np.append(feature_importance[:2], embarked_score)
|
||||
scores = map(lambda score: round(score, 2), scores)
|
||||
print(dict(zip(feature_names, scores)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = pd.read_csv(FILE_PATH)
|
||||
|
||||
X = data[REQUIRED_COLUMNS]
|
||||
y = data[TARGET_COLUMN]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
classifier_tree = DecisionTreeClassifier(random_state=42)
|
||||
classifier_tree.fit(X_train, y_train)
|
||||
|
||||
print_classifier_info(classifier_tree.feature_importances_)
|
||||
print("Оценка качества (задача классификации) - ", classifier_tree.score(X_test, y_test))
|
||||
615
abanin_daniil_lab_3/loan.csv
Normal file
@@ -0,0 +1,615 @@
|
||||
Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
|
||||
LP001002,Male,No,0,1,No,5849,0.0,360.0,1.0,0,Y,0.0
|
||||
LP001003,Male,Yes,1,1,No,4583,1508.0,128.0,360.0,1,Rural,0.0
|
||||
LP001005,Male,Yes,0,1,Yes,3000,0.0,66.0,360.0,1,Urban,1.0
|
||||
LP001006,Male,Yes,0,0,No,2583,2358.0,120.0,360.0,1,Urban,1.0
|
||||
LP001008,Male,No,0,1,No,6000,0.0,141.0,360.0,1,Urban,1.0
|
||||
LP001011,Male,Yes,2,1,Yes,5417,4196.0,267.0,360.0,1,Urban,1.0
|
||||
LP001013,Male,Yes,0,0,No,2333,1516.0,95.0,360.0,1,Urban,1.0
|
||||
LP001014,Male,Yes,3+,1,No,3036,2504.0,158.0,360.0,0,Semiurban,0.0
|
||||
LP001018,Male,Yes,2,1,No,4006,1526.0,168.0,360.0,1,Urban,1.0
|
||||
LP001020,Male,Yes,1,1,No,12841,10968.0,349.0,360.0,1,Semiurban,0.0
|
||||
LP001024,Male,Yes,2,1,No,3200,700.0,70.0,360.0,1,Urban,1.0
|
||||
LP001027,Male,Yes,2,1,,2500,1840.0,109.0,360.0,1,Urban,1.0
|
||||
LP001028,Male,Yes,2,1,No,3073,8106.0,200.0,360.0,1,Urban,1.0
|
||||
LP001029,Male,No,0,1,No,1853,2840.0,114.0,360.0,1,Rural,0.0
|
||||
LP001030,Male,Yes,2,1,No,1299,1086.0,17.0,120.0,1,Urban,1.0
|
||||
LP001032,Male,No,0,1,No,4950,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP001034,Male,No,1,0,No,3596,0.0,100.0,240.0,0,Urban,1.0
|
||||
LP001036,Female,No,0,1,No,3510,0.0,76.0,360.0,0,Urban,0.0
|
||||
LP001038,Male,Yes,0,0,No,4887,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP001041,Male,Yes,0,1,,2600,3500.0,115.0,,1,Urban,1.0
|
||||
LP001043,Male,Yes,0,0,No,7660,0.0,104.0,360.0,0,Urban,0.0
|
||||
LP001046,Male,Yes,1,1,No,5955,5625.0,315.0,360.0,1,Urban,1.0
|
||||
LP001047,Male,Yes,0,0,No,2600,1911.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001050,,Yes,2,0,No,3365,1917.0,112.0,360.0,0,Rural,0.0
|
||||
LP001052,Male,Yes,1,1,,3717,2925.0,151.0,360.0,0,Semiurban,0.0
|
||||
LP001066,Male,Yes,0,1,Yes,9560,0.0,191.0,360.0,1,Semiurban,1.0
|
||||
LP001068,Male,Yes,0,1,No,2799,2253.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001073,Male,Yes,2,0,No,4226,1040.0,110.0,360.0,1,Urban,1.0
|
||||
LP001086,Male,No,0,0,No,1442,0.0,35.0,360.0,1,Urban,0.0
|
||||
LP001087,Female,No,2,1,,3750,2083.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001091,Male,Yes,1,1,,4166,3369.0,201.0,360.0,0,Urban,0.0
|
||||
LP001095,Male,No,0,1,No,3167,0.0,74.0,360.0,1,Urban,0.0
|
||||
LP001097,Male,No,1,1,Yes,4692,0.0,106.0,360.0,1,Rural,0.0
|
||||
LP001098,Male,Yes,0,1,No,3500,1667.0,114.0,360.0,1,Semiurban,1.0
|
||||
LP001100,Male,No,3+,1,No,12500,3000.0,320.0,360.0,1,Rural,0.0
|
||||
LP001106,Male,Yes,0,1,No,2275,2067.0,0.0,360.0,1,Urban,1.0
|
||||
LP001109,Male,Yes,0,1,No,1828,1330.0,100.0,,0,Urban,0.0
|
||||
LP001112,Female,Yes,0,1,No,3667,1459.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001114,Male,No,0,1,No,4166,7210.0,184.0,360.0,1,Urban,1.0
|
||||
LP001116,Male,No,0,0,No,3748,1668.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP001119,Male,No,0,1,No,3600,0.0,80.0,360.0,1,Urban,0.0
|
||||
LP001120,Male,No,0,1,No,1800,1213.0,47.0,360.0,1,Urban,1.0
|
||||
LP001123,Male,Yes,0,1,No,2400,0.0,75.0,360.0,0,Urban,1.0
|
||||
LP001131,Male,Yes,0,1,No,3941,2336.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001136,Male,Yes,0,0,Yes,4695,0.0,96.0,,1,Urban,1.0
|
||||
LP001137,Female,No,0,1,No,3410,0.0,88.0,,1,Urban,1.0
|
||||
LP001138,Male,Yes,1,1,No,5649,0.0,44.0,360.0,1,Urban,1.0
|
||||
LP001144,Male,Yes,0,1,No,5821,0.0,144.0,360.0,1,Urban,1.0
|
||||
LP001146,Female,Yes,0,1,No,2645,3440.0,120.0,360.0,0,Urban,0.0
|
||||
LP001151,Female,No,0,1,No,4000,2275.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001155,Female,Yes,0,0,No,1928,1644.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001157,Female,No,0,1,No,3086,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001164,Female,No,0,1,No,4230,0.0,112.0,360.0,1,Semiurban,0.0
|
||||
LP001179,Male,Yes,2,1,No,4616,0.0,134.0,360.0,1,Urban,0.0
|
||||
LP001186,Female,Yes,1,1,Yes,11500,0.0,286.0,360.0,0,Urban,0.0
|
||||
LP001194,Male,Yes,2,1,No,2708,1167.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001195,Male,Yes,0,1,No,2132,1591.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001197,Male,Yes,0,1,No,3366,2200.0,135.0,360.0,1,Rural,0.0
|
||||
LP001198,Male,Yes,1,1,No,8080,2250.0,180.0,360.0,1,Urban,1.0
|
||||
LP001199,Male,Yes,2,0,No,3357,2859.0,144.0,360.0,1,Urban,1.0
|
||||
LP001205,Male,Yes,0,1,No,2500,3796.0,120.0,360.0,1,Urban,1.0
|
||||
LP001206,Male,Yes,3+,1,No,3029,0.0,99.0,360.0,1,Urban,1.0
|
||||
LP001207,Male,Yes,0,0,Yes,2609,3449.0,165.0,180.0,0,Rural,0.0
|
||||
LP001213,Male,Yes,1,1,No,4945,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP001222,Female,No,0,1,No,4166,0.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001225,Male,Yes,0,1,No,5726,4595.0,258.0,360.0,1,Semiurban,0.0
|
||||
LP001228,Male,No,0,0,No,3200,2254.0,126.0,180.0,0,Urban,0.0
|
||||
LP001233,Male,Yes,1,1,No,10750,0.0,312.0,360.0,1,Urban,1.0
|
||||
LP001238,Male,Yes,3+,0,Yes,7100,0.0,125.0,60.0,1,Urban,1.0
|
||||
LP001241,Female,No,0,1,No,4300,0.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP001243,Male,Yes,0,1,No,3208,3066.0,172.0,360.0,1,Urban,1.0
|
||||
LP001245,Male,Yes,2,0,Yes,1875,1875.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001248,Male,No,0,1,No,3500,0.0,81.0,300.0,1,Semiurban,1.0
|
||||
LP001250,Male,Yes,3+,0,No,4755,0.0,95.0,,0,Semiurban,0.0
|
||||
LP001253,Male,Yes,3+,1,Yes,5266,1774.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001255,Male,No,0,1,No,3750,0.0,113.0,480.0,1,Urban,0.0
|
||||
LP001256,Male,No,0,1,No,3750,4750.0,176.0,360.0,1,Urban,0.0
|
||||
LP001259,Male,Yes,1,1,Yes,1000,3022.0,110.0,360.0,1,Urban,0.0
|
||||
LP001263,Male,Yes,3+,1,No,3167,4000.0,180.0,300.0,0,Semiurban,0.0
|
||||
LP001264,Male,Yes,3+,0,Yes,3333,2166.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP001265,Female,No,0,1,No,3846,0.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP001266,Male,Yes,1,1,Yes,2395,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001267,Female,Yes,2,1,No,1378,1881.0,167.0,360.0,1,Urban,0.0
|
||||
LP001273,Male,Yes,0,1,No,6000,2250.0,265.0,360.0,0,Semiurban,0.0
|
||||
LP001275,Male,Yes,1,1,No,3988,0.0,50.0,240.0,1,Urban,1.0
|
||||
LP001279,Male,No,0,1,No,2366,2531.0,136.0,360.0,1,Semiurban,1.0
|
||||
LP001280,Male,Yes,2,0,No,3333,2000.0,99.0,360.0,0,Semiurban,1.0
|
||||
LP001282,Male,Yes,0,1,No,2500,2118.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001289,Male,No,0,1,No,8566,0.0,210.0,360.0,1,Urban,1.0
|
||||
LP001310,Male,Yes,0,1,No,5695,4167.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP001316,Male,Yes,0,1,No,2958,2900.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001318,Male,Yes,2,1,No,6250,5654.0,188.0,180.0,1,Semiurban,1.0
|
||||
LP001319,Male,Yes,2,0,No,3273,1820.0,81.0,360.0,1,Urban,1.0
|
||||
LP001322,Male,No,0,1,No,4133,0.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001325,Male,No,0,0,No,3620,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001326,Male,No,0,1,,6782,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP001327,Female,Yes,0,1,No,2484,2302.0,137.0,360.0,1,Semiurban,1.0
|
||||
LP001333,Male,Yes,0,1,No,1977,997.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP001334,Male,Yes,0,0,No,4188,0.0,115.0,180.0,1,Semiurban,1.0
|
||||
LP001343,Male,Yes,0,1,No,1759,3541.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001345,Male,Yes,2,0,No,4288,3263.0,133.0,180.0,1,Urban,1.0
|
||||
LP001349,Male,No,0,1,No,4843,3806.0,151.0,360.0,1,Semiurban,1.0
|
||||
LP001350,Male,Yes,,1,No,13650,0.0,0.0,360.0,1,Urban,1.0
|
||||
LP001356,Male,Yes,0,1,No,4652,3583.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001357,Male,,,1,No,3816,754.0,160.0,360.0,1,Urban,1.0
|
||||
LP001367,Male,Yes,1,1,No,3052,1030.0,100.0,360.0,1,Urban,1.0
|
||||
LP001369,Male,Yes,2,1,No,11417,1126.0,225.0,360.0,1,Urban,1.0
|
||||
LP001370,Male,No,0,0,,7333,0.0,120.0,360.0,1,Rural,0.0
|
||||
LP001379,Male,Yes,2,1,No,3800,3600.0,216.0,360.0,0,Urban,0.0
|
||||
LP001384,Male,Yes,3+,0,No,2071,754.0,94.0,480.0,1,Semiurban,1.0
|
||||
LP001385,Male,No,0,1,No,5316,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001387,Female,Yes,0,1,,2929,2333.0,139.0,360.0,1,Semiurban,1.0
|
||||
LP001391,Male,Yes,0,0,No,3572,4114.0,152.0,,0,Rural,0.0
|
||||
LP001392,Female,No,1,1,Yes,7451,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001398,Male,No,0,1,,5050,0.0,118.0,360.0,1,Semiurban,1.0
|
||||
LP001401,Male,Yes,1,1,No,14583,0.0,185.0,180.0,1,Rural,1.0
|
||||
LP001404,Female,Yes,0,1,No,3167,2283.0,154.0,360.0,1,Semiurban,1.0
|
||||
LP001405,Male,Yes,1,1,No,2214,1398.0,85.0,360.0,0,Urban,1.0
|
||||
LP001421,Male,Yes,0,1,No,5568,2142.0,175.0,360.0,1,Rural,0.0
|
||||
LP001422,Female,No,0,1,No,10408,0.0,259.0,360.0,1,Urban,1.0
|
||||
LP001426,Male,Yes,,1,No,5667,2667.0,180.0,360.0,1,Rural,1.0
|
||||
LP001430,Female,No,0,1,No,4166,0.0,44.0,360.0,1,Semiurban,1.0
|
||||
LP001431,Female,No,0,1,No,2137,8980.0,137.0,360.0,0,Semiurban,1.0
|
||||
LP001432,Male,Yes,2,1,No,2957,0.0,81.0,360.0,1,Semiurban,1.0
|
||||
LP001439,Male,Yes,0,0,No,4300,2014.0,194.0,360.0,1,Rural,1.0
|
||||
LP001443,Female,No,0,1,No,3692,0.0,93.0,360.0,0,Rural,1.0
|
||||
LP001448,,Yes,3+,1,No,23803,0.0,370.0,360.0,1,Rural,1.0
|
||||
LP001449,Male,No,0,1,No,3865,1640.0,0.0,360.0,1,Rural,1.0
|
||||
LP001451,Male,Yes,1,1,Yes,10513,3850.0,160.0,180.0,0,Urban,0.0
|
||||
LP001465,Male,Yes,0,1,No,6080,2569.0,182.0,360.0,0,Rural,0.0
|
||||
LP001469,Male,No,0,1,Yes,20166,0.0,650.0,480.0,0,Urban,1.0
|
||||
LP001473,Male,No,0,1,No,2014,1929.0,74.0,360.0,1,Urban,1.0
|
||||
LP001478,Male,No,0,1,No,2718,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP001482,Male,Yes,0,1,Yes,3459,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001487,Male,No,0,1,No,4895,0.0,102.0,360.0,1,Semiurban,1.0
|
||||
LP001488,Male,Yes,3+,1,No,4000,7750.0,290.0,360.0,1,Semiurban,0.0
|
||||
LP001489,Female,Yes,0,1,No,4583,0.0,84.0,360.0,1,Rural,0.0
|
||||
LP001491,Male,Yes,2,1,Yes,3316,3500.0,88.0,360.0,1,Urban,1.0
|
||||
LP001492,Male,No,0,1,No,14999,0.0,242.0,360.0,0,Semiurban,0.0
|
||||
LP001493,Male,Yes,2,0,No,4200,1430.0,129.0,360.0,1,Rural,0.0
|
||||
LP001497,Male,Yes,2,1,No,5042,2083.0,185.0,360.0,1,Rural,0.0
|
||||
LP001498,Male,No,0,1,No,5417,0.0,168.0,360.0,1,Urban,1.0
|
||||
LP001504,Male,No,0,1,Yes,6950,0.0,175.0,180.0,1,Semiurban,1.0
|
||||
LP001507,Male,Yes,0,1,No,2698,2034.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001508,Male,Yes,2,1,No,11757,0.0,187.0,180.0,1,Urban,1.0
|
||||
LP001514,Female,Yes,0,1,No,2330,4486.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001516,Female,Yes,2,1,No,14866,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP001518,Male,Yes,1,1,No,1538,1425.0,30.0,360.0,1,Urban,1.0
|
||||
LP001519,Female,No,0,1,No,10000,1666.0,225.0,360.0,1,Rural,0.0
|
||||
LP001520,Male,Yes,0,1,No,4860,830.0,125.0,360.0,1,Semiurban,1.0
|
||||
LP001528,Male,No,0,1,No,6277,0.0,118.0,360.0,0,Rural,0.0
|
||||
LP001529,Male,Yes,0,1,Yes,2577,3750.0,152.0,360.0,1,Rural,1.0
|
||||
LP001531,Male,No,0,1,No,9166,0.0,244.0,360.0,1,Urban,0.0
|
||||
LP001532,Male,Yes,2,0,No,2281,0.0,113.0,360.0,1,Rural,0.0
|
||||
LP001535,Male,No,0,1,No,3254,0.0,50.0,360.0,1,Urban,1.0
|
||||
LP001536,Male,Yes,3+,1,No,39999,0.0,600.0,180.0,0,Semiurban,1.0
|
||||
LP001541,Male,Yes,1,1,No,6000,0.0,160.0,360.0,0,Rural,1.0
|
||||
LP001543,Male,Yes,1,1,No,9538,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP001546,Male,No,0,1,,2980,2083.0,120.0,360.0,1,Rural,1.0
|
||||
LP001552,Male,Yes,0,1,No,4583,5625.0,255.0,360.0,1,Semiurban,1.0
|
||||
LP001560,Male,Yes,0,0,No,1863,1041.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP001562,Male,Yes,0,1,No,7933,0.0,275.0,360.0,1,Urban,0.0
|
||||
LP001565,Male,Yes,1,1,No,3089,1280.0,121.0,360.0,0,Semiurban,0.0
|
||||
LP001570,Male,Yes,2,1,No,4167,1447.0,158.0,360.0,1,Rural,1.0
|
||||
LP001572,Male,Yes,0,1,No,9323,0.0,75.0,180.0,1,Urban,1.0
|
||||
LP001574,Male,Yes,0,1,No,3707,3166.0,182.0,,1,Rural,1.0
|
||||
LP001577,Female,Yes,0,1,No,4583,0.0,112.0,360.0,1,Rural,0.0
|
||||
LP001578,Male,Yes,0,1,No,2439,3333.0,129.0,360.0,1,Rural,1.0
|
||||
LP001579,Male,No,0,1,No,2237,0.0,63.0,480.0,0,Semiurban,0.0
|
||||
LP001580,Male,Yes,2,1,No,8000,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001581,Male,Yes,0,0,,1820,1769.0,95.0,360.0,1,Rural,1.0
|
||||
LP001585,,Yes,3+,1,No,51763,0.0,700.0,300.0,1,Urban,1.0
|
||||
LP001586,Male,Yes,3+,0,No,3522,0.0,81.0,180.0,1,Rural,0.0
|
||||
LP001594,Male,Yes,0,1,No,5708,5625.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001603,Male,Yes,0,0,Yes,4344,736.0,87.0,360.0,1,Semiurban,0.0
|
||||
LP001606,Male,Yes,0,1,No,3497,1964.0,116.0,360.0,1,Rural,1.0
|
||||
LP001608,Male,Yes,2,1,No,2045,1619.0,101.0,360.0,1,Rural,1.0
|
||||
LP001610,Male,Yes,3+,1,No,5516,11300.0,495.0,360.0,0,Semiurban,0.0
|
||||
LP001616,Male,Yes,1,1,No,3750,0.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001630,Male,No,0,0,No,2333,1451.0,102.0,480.0,0,Urban,0.0
|
||||
LP001633,Male,Yes,1,1,No,6400,7250.0,180.0,360.0,0,Urban,0.0
|
||||
LP001634,Male,No,0,1,No,1916,5063.0,67.0,360.0,0,Rural,0.0
|
||||
LP001636,Male,Yes,0,1,No,4600,0.0,73.0,180.0,1,Semiurban,1.0
|
||||
LP001637,Male,Yes,1,1,No,33846,0.0,260.0,360.0,1,Semiurban,0.0
|
||||
LP001639,Female,Yes,0,1,No,3625,0.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP001640,Male,Yes,0,1,Yes,39147,4750.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001641,Male,Yes,1,1,Yes,2178,0.0,66.0,300.0,0,Rural,0.0
|
||||
LP001643,Male,Yes,0,1,No,2383,2138.0,58.0,360.0,0,Rural,1.0
|
||||
LP001644,,Yes,0,1,Yes,674,5296.0,168.0,360.0,1,Rural,1.0
|
||||
LP001647,Male,Yes,0,1,No,9328,0.0,188.0,180.0,1,Rural,1.0
|
||||
LP001653,Male,No,0,0,No,4885,0.0,48.0,360.0,1,Rural,1.0
|
||||
LP001656,Male,No,0,1,No,12000,0.0,164.0,360.0,1,Semiurban,0.0
|
||||
LP001657,Male,Yes,0,0,No,6033,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP001658,Male,No,0,1,No,3858,0.0,76.0,360.0,1,Semiurban,1.0
|
||||
LP001664,Male,No,0,1,No,4191,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001665,Male,Yes,1,1,No,3125,2583.0,170.0,360.0,1,Semiurban,0.0
|
||||
LP001666,Male,No,0,1,No,8333,3750.0,187.0,360.0,1,Rural,1.0
|
||||
LP001669,Female,No,0,0,No,1907,2365.0,120.0,,1,Urban,1.0
|
||||
LP001671,Female,Yes,0,1,No,3416,2816.0,113.0,360.0,0,Semiurban,1.0
|
||||
LP001673,Male,No,0,1,Yes,11000,0.0,83.0,360.0,1,Urban,0.0
|
||||
LP001674,Male,Yes,1,0,No,2600,2500.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001677,Male,No,2,1,No,4923,0.0,166.0,360.0,0,Semiurban,1.0
|
||||
LP001682,Male,Yes,3+,0,No,3992,0.0,0.0,180.0,1,Urban,0.0
|
||||
LP001688,Male,Yes,1,0,No,3500,1083.0,135.0,360.0,1,Urban,1.0
|
||||
LP001691,Male,Yes,2,0,No,3917,0.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP001692,Female,No,0,0,No,4408,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001693,Female,No,0,1,No,3244,0.0,80.0,360.0,1,Urban,1.0
|
||||
LP001698,Male,No,0,0,No,3975,2531.0,55.0,360.0,1,Rural,1.0
|
||||
LP001699,Male,No,0,1,No,2479,0.0,59.0,360.0,1,Urban,1.0
|
||||
LP001702,Male,No,0,1,No,3418,0.0,127.0,360.0,1,Semiurban,0.0
|
||||
LP001708,Female,No,0,1,No,10000,0.0,214.0,360.0,1,Semiurban,0.0
|
||||
LP001711,Male,Yes,3+,1,No,3430,1250.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001713,Male,Yes,1,1,Yes,7787,0.0,240.0,360.0,1,Urban,1.0
|
||||
LP001715,Male,Yes,3+,0,Yes,5703,0.0,130.0,360.0,1,Rural,1.0
|
||||
LP001716,Male,Yes,0,1,No,3173,3021.0,137.0,360.0,1,Urban,1.0
|
||||
LP001720,Male,Yes,3+,0,No,3850,983.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001722,Male,Yes,0,1,No,150,1800.0,135.0,360.0,1,Rural,0.0
|
||||
LP001726,Male,Yes,0,1,No,3727,1775.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001732,Male,Yes,2,1,,5000,0.0,72.0,360.0,0,Semiurban,0.0
|
||||
LP001734,Female,Yes,2,1,No,4283,2383.0,127.0,360.0,0,Semiurban,1.0
|
||||
LP001736,Male,Yes,0,1,No,2221,0.0,60.0,360.0,0,Urban,0.0
|
||||
LP001743,Male,Yes,2,1,No,4009,1717.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001744,Male,No,0,1,No,2971,2791.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001749,Male,Yes,0,1,No,7578,1010.0,175.0,,1,Semiurban,1.0
|
||||
LP001750,Male,Yes,0,1,No,6250,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001751,Male,Yes,0,1,No,3250,0.0,170.0,360.0,1,Rural,0.0
|
||||
LP001754,Male,Yes,,0,Yes,4735,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001758,Male,Yes,2,1,No,6250,1695.0,210.0,360.0,1,Semiurban,1.0
|
||||
LP001760,Male,,,1,No,4758,0.0,158.0,480.0,1,Semiurban,1.0
|
||||
LP001761,Male,No,0,1,Yes,6400,0.0,200.0,360.0,1,Rural,1.0
|
||||
LP001765,Male,Yes,1,1,No,2491,2054.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001768,Male,Yes,0,1,,3716,0.0,42.0,180.0,1,Rural,1.0
|
||||
LP001770,Male,No,0,0,No,3189,2598.0,120.0,,1,Rural,1.0
|
||||
LP001776,Female,No,0,1,No,8333,0.0,280.0,360.0,1,Semiurban,1.0
|
||||
LP001778,Male,Yes,1,1,No,3155,1779.0,140.0,360.0,1,Semiurban,1.0
|
||||
LP001784,Male,Yes,1,1,No,5500,1260.0,170.0,360.0,1,Rural,1.0
|
||||
LP001786,Male,Yes,0,1,,5746,0.0,255.0,360.0,0,Urban,0.0
|
||||
LP001788,Female,No,0,1,Yes,3463,0.0,122.0,360.0,0,Urban,1.0
|
||||
LP001790,Female,No,1,1,No,3812,0.0,112.0,360.0,1,Rural,1.0
|
||||
LP001792,Male,Yes,1,1,No,3315,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001798,Male,Yes,2,1,No,5819,5000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001800,Male,Yes,1,0,No,2510,1983.0,140.0,180.0,1,Urban,0.0
|
||||
LP001806,Male,No,0,1,No,2965,5701.0,155.0,60.0,1,Urban,1.0
|
||||
LP001807,Male,Yes,2,1,Yes,6250,1300.0,108.0,360.0,1,Rural,1.0
|
||||
LP001811,Male,Yes,0,0,No,3406,4417.0,123.0,360.0,1,Semiurban,1.0
|
||||
LP001813,Male,No,0,1,Yes,6050,4333.0,120.0,180.0,1,Urban,0.0
|
||||
LP001814,Male,Yes,2,1,No,9703,0.0,112.0,360.0,1,Urban,1.0
|
||||
LP001819,Male,Yes,1,0,No,6608,0.0,137.0,180.0,1,Urban,1.0
|
||||
LP001824,Male,Yes,1,1,No,2882,1843.0,123.0,480.0,1,Semiurban,1.0
|
||||
LP001825,Male,Yes,0,1,No,1809,1868.0,90.0,360.0,1,Urban,1.0
|
||||
LP001835,Male,Yes,0,0,No,1668,3890.0,201.0,360.0,0,Semiurban,0.0
|
||||
LP001836,Female,No,2,1,No,3427,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001841,Male,No,0,0,Yes,2583,2167.0,104.0,360.0,1,Rural,1.0
|
||||
LP001843,Male,Yes,1,0,No,2661,7101.0,279.0,180.0,1,Semiurban,1.0
|
||||
LP001844,Male,No,0,1,Yes,16250,0.0,192.0,360.0,0,Urban,0.0
|
||||
LP001846,Female,No,3+,1,No,3083,0.0,255.0,360.0,1,Rural,1.0
|
||||
LP001849,Male,No,0,0,No,6045,0.0,115.0,360.0,0,Rural,0.0
|
||||
LP001854,Male,Yes,3+,1,No,5250,0.0,94.0,360.0,1,Urban,0.0
|
||||
LP001859,Male,Yes,0,1,No,14683,2100.0,304.0,360.0,1,Rural,0.0
|
||||
LP001864,Male,Yes,3+,0,No,4931,0.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001865,Male,Yes,1,1,No,6083,4250.0,330.0,360.0,0,Urban,1.0
|
||||
LP001868,Male,No,0,1,No,2060,2209.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001870,Female,No,1,1,No,3481,0.0,155.0,36.0,1,Semiurban,0.0
|
||||
LP001871,Female,No,0,1,No,7200,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001872,Male,No,0,1,Yes,5166,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001875,Male,No,0,1,No,4095,3447.0,151.0,360.0,1,Rural,1.0
|
||||
LP001877,Male,Yes,2,1,No,4708,1387.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001882,Male,Yes,3+,1,No,4333,1811.0,160.0,360.0,0,Urban,1.0
|
||||
LP001883,Female,No,0,1,,3418,0.0,135.0,360.0,1,Rural,0.0
|
||||
LP001884,Female,No,1,1,No,2876,1560.0,90.0,360.0,1,Urban,1.0
|
||||
LP001888,Female,No,0,1,No,3237,0.0,30.0,360.0,1,Urban,1.0
|
||||
LP001891,Male,Yes,0,1,No,11146,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001892,Male,No,0,1,No,2833,1857.0,126.0,360.0,1,Rural,1.0
|
||||
LP001894,Male,Yes,0,1,No,2620,2223.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001896,Male,Yes,2,1,No,3900,0.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001900,Male,Yes,1,1,No,2750,1842.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001903,Male,Yes,0,1,No,3993,3274.0,207.0,360.0,1,Semiurban,1.0
|
||||
LP001904,Male,Yes,0,1,No,3103,1300.0,80.0,360.0,1,Urban,1.0
|
||||
LP001907,Male,Yes,0,1,No,14583,0.0,436.0,360.0,1,Semiurban,1.0
|
||||
LP001908,Female,Yes,0,0,No,4100,0.0,124.0,360.0,0,Rural,1.0
|
||||
LP001910,Male,No,1,0,Yes,4053,2426.0,158.0,360.0,0,Urban,0.0
|
||||
LP001914,Male,Yes,0,1,No,3927,800.0,112.0,360.0,1,Semiurban,1.0
|
||||
LP001915,Male,Yes,2,1,No,2301,985.7999878,78.0,180.0,1,Urban,1.0
|
||||
LP001917,Female,No,0,1,No,1811,1666.0,54.0,360.0,1,Urban,1.0
|
||||
LP001922,Male,Yes,0,1,No,20667,0.0,0.0,360.0,1,Rural,0.0
|
||||
LP001924,Male,No,0,1,No,3158,3053.0,89.0,360.0,1,Rural,1.0
|
||||
LP001925,Female,No,0,1,Yes,2600,1717.0,99.0,300.0,1,Semiurban,0.0
|
||||
LP001926,Male,Yes,0,1,No,3704,2000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001931,Female,No,0,1,No,4124,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001935,Male,No,0,1,No,9508,0.0,187.0,360.0,1,Rural,1.0
|
||||
LP001936,Male,Yes,0,1,No,3075,2416.0,139.0,360.0,1,Rural,1.0
|
||||
LP001938,Male,Yes,2,1,No,4400,0.0,127.0,360.0,0,Semiurban,0.0
|
||||
LP001940,Male,Yes,2,1,No,3153,1560.0,134.0,360.0,1,Urban,1.0
|
||||
LP001945,Female,No,,1,No,5417,0.0,143.0,480.0,0,Urban,0.0
|
||||
LP001947,Male,Yes,0,1,No,2383,3334.0,172.0,360.0,1,Semiurban,1.0
|
||||
LP001949,Male,Yes,3+,1,,4416,1250.0,110.0,360.0,1,Urban,1.0
|
||||
LP001953,Male,Yes,1,1,No,6875,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001954,Female,Yes,1,1,No,4666,0.0,135.0,360.0,1,Urban,1.0
|
||||
LP001955,Female,No,0,1,No,5000,2541.0,151.0,480.0,1,Rural,0.0
|
||||
LP001963,Male,Yes,1,1,No,2014,2925.0,113.0,360.0,1,Urban,0.0
|
||||
LP001964,Male,Yes,0,0,No,1800,2934.0,93.0,360.0,0,Urban,0.0
|
||||
LP001972,Male,Yes,,0,No,2875,1750.0,105.0,360.0,1,Semiurban,1.0
|
||||
LP001974,Female,No,0,1,No,5000,0.0,132.0,360.0,1,Rural,1.0
|
||||
LP001977,Male,Yes,1,1,No,1625,1803.0,96.0,360.0,1,Urban,1.0
|
||||
LP001978,Male,No,0,1,No,4000,2500.0,140.0,360.0,1,Rural,1.0
|
||||
LP001990,Male,No,0,0,No,2000,0.0,0.0,360.0,1,Urban,0.0
|
||||
LP001993,Female,No,0,1,No,3762,1666.0,135.0,360.0,1,Rural,1.0
|
||||
LP001994,Female,No,0,1,No,2400,1863.0,104.0,360.0,0,Urban,0.0
|
||||
LP001996,Male,No,0,1,No,20233,0.0,480.0,360.0,1,Rural,0.0
|
||||
LP001998,Male,Yes,2,0,No,7667,0.0,185.0,360.0,0,Rural,1.0
|
||||
LP002002,Female,No,0,1,No,2917,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002004,Male,No,0,0,No,2927,2405.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP002006,Female,No,0,1,No,2507,0.0,56.0,360.0,1,Rural,1.0
|
||||
LP002008,Male,Yes,2,1,Yes,5746,0.0,144.0,84.0,0,Rural,1.0
|
||||
LP002024,,Yes,0,1,No,2473,1843.0,159.0,360.0,1,Rural,0.0
|
||||
LP002031,Male,Yes,1,0,No,3399,1640.0,111.0,180.0,1,Urban,1.0
|
||||
LP002035,Male,Yes,2,1,No,3717,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP002036,Male,Yes,0,1,No,2058,2134.0,88.0,360.0,0,Urban,1.0
|
||||
LP002043,Female,No,1,1,No,3541,0.0,112.0,360.0,0,Semiurban,1.0
|
||||
LP002050,Male,Yes,1,1,Yes,10000,0.0,155.0,360.0,1,Rural,0.0
|
||||
LP002051,Male,Yes,0,1,No,2400,2167.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002053,Male,Yes,3+,1,No,4342,189.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP002054,Male,Yes,2,0,No,3601,1590.0,0.0,360.0,1,Rural,1.0
|
||||
LP002055,Female,No,0,1,No,3166,2985.0,132.0,360.0,0,Rural,1.0
|
||||
LP002065,Male,Yes,3+,1,No,15000,0.0,300.0,360.0,1,Rural,1.0
|
||||
LP002067,Male,Yes,1,1,Yes,8666,4983.0,376.0,360.0,0,Rural,0.0
|
||||
LP002068,Male,No,0,1,No,4917,0.0,130.0,360.0,0,Rural,1.0
|
||||
LP002082,Male,Yes,0,1,Yes,5818,2160.0,184.0,360.0,1,Semiurban,1.0
|
||||
LP002086,Female,Yes,0,1,No,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002087,Female,No,0,1,No,2500,0.0,67.0,360.0,1,Urban,1.0
|
||||
LP002097,Male,No,1,1,No,4384,1793.0,117.0,360.0,1,Urban,1.0
|
||||
LP002098,Male,No,0,1,No,2935,0.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002100,Male,No,,1,No,2833,0.0,71.0,360.0,1,Urban,1.0
|
||||
LP002101,Male,Yes,0,1,,63337,0.0,490.0,180.0,1,Urban,1.0
|
||||
LP002103,,Yes,1,1,Yes,9833,1833.0,182.0,180.0,1,Urban,1.0
|
||||
LP002106,Male,Yes,,1,Yes,5503,4490.0,70.0,,1,Semiurban,1.0
|
||||
LP002110,Male,Yes,1,1,,5250,688.0,160.0,360.0,1,Rural,1.0
|
||||
LP002112,Male,Yes,2,1,Yes,2500,4600.0,176.0,360.0,1,Rural,1.0
|
||||
LP002113,Female,No,3+,0,No,1830,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP002114,Female,No,0,1,No,4160,0.0,71.0,360.0,1,Semiurban,1.0
|
||||
LP002115,Male,Yes,3+,0,No,2647,1587.0,173.0,360.0,1,Rural,0.0
|
||||
LP002116,Female,No,0,1,No,2378,0.0,46.0,360.0,1,Rural,0.0
|
||||
LP002119,Male,Yes,1,0,No,4554,1229.0,158.0,360.0,1,Urban,1.0
|
||||
LP002126,Male,Yes,3+,0,No,3173,0.0,74.0,360.0,1,Semiurban,1.0
|
||||
LP002128,Male,Yes,2,1,,2583,2330.0,125.0,360.0,1,Rural,1.0
|
||||
LP002129,Male,Yes,0,1,No,2499,2458.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002130,Male,Yes,,0,No,3523,3230.0,152.0,360.0,0,Rural,0.0
|
||||
LP002131,Male,Yes,2,0,No,3083,2168.0,126.0,360.0,1,Urban,1.0
|
||||
LP002137,Male,Yes,0,1,No,6333,4583.0,259.0,360.0,0,Semiurban,1.0
|
||||
LP002138,Male,Yes,0,1,No,2625,6250.0,187.0,360.0,1,Rural,1.0
|
||||
LP002139,Male,Yes,0,1,No,9083,0.0,228.0,360.0,1,Semiurban,1.0
|
||||
LP002140,Male,No,0,1,No,8750,4167.0,308.0,360.0,1,Rural,0.0
|
||||
LP002141,Male,Yes,3+,1,No,2666,2083.0,95.0,360.0,1,Rural,1.0
|
||||
LP002142,Female,Yes,0,1,Yes,5500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002143,Female,Yes,0,1,No,2423,505.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002144,Female,No,,1,No,3813,0.0,116.0,180.0,1,Urban,1.0
|
||||
LP002149,Male,Yes,2,1,No,8333,3167.0,165.0,360.0,1,Rural,1.0
|
||||
LP002151,Male,Yes,1,1,No,3875,0.0,67.0,360.0,1,Urban,0.0
|
||||
LP002158,Male,Yes,0,0,No,3000,1666.0,100.0,480.0,0,Urban,0.0
|
||||
LP002160,Male,Yes,3+,1,No,5167,3167.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP002161,Female,No,1,1,No,4723,0.0,81.0,360.0,1,Semiurban,0.0
|
||||
LP002170,Male,Yes,2,1,No,5000,3667.0,236.0,360.0,1,Semiurban,1.0
|
||||
LP002175,Male,Yes,0,1,No,4750,2333.0,130.0,360.0,1,Urban,1.0
|
||||
LP002178,Male,Yes,0,1,No,3013,3033.0,95.0,300.0,0,Urban,1.0
|
||||
LP002180,Male,No,0,1,Yes,6822,0.0,141.0,360.0,1,Rural,1.0
|
||||
LP002181,Male,No,0,0,No,6216,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP002187,Male,No,0,1,No,2500,0.0,96.0,480.0,1,Semiurban,0.0
|
||||
LP002188,Male,No,0,1,No,5124,0.0,124.0,,0,Rural,0.0
|
||||
LP002190,Male,Yes,1,1,No,6325,0.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP002191,Male,Yes,0,1,No,19730,5266.0,570.0,360.0,1,Rural,0.0
|
||||
LP002194,Female,No,0,1,Yes,15759,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002197,Male,Yes,2,1,No,5185,0.0,155.0,360.0,1,Semiurban,1.0
|
||||
LP002201,Male,Yes,2,1,Yes,9323,7873.0,380.0,300.0,1,Rural,1.0
|
||||
LP002205,Male,No,1,1,No,3062,1987.0,111.0,180.0,0,Urban,0.0
|
||||
LP002209,Female,No,0,1,,2764,1459.0,110.0,360.0,1,Urban,1.0
|
||||
LP002211,Male,Yes,0,1,No,4817,923.0,120.0,180.0,1,Urban,1.0
|
||||
LP002219,Male,Yes,3+,1,No,8750,4996.0,130.0,360.0,1,Rural,1.0
|
||||
LP002223,Male,Yes,0,1,No,4310,0.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP002224,Male,No,0,1,No,3069,0.0,71.0,480.0,1,Urban,0.0
|
||||
LP002225,Male,Yes,2,1,No,5391,0.0,130.0,360.0,1,Urban,1.0
|
||||
LP002226,Male,Yes,0,1,,3333,2500.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002229,Male,No,0,1,No,5941,4232.0,296.0,360.0,1,Semiurban,1.0
|
||||
LP002231,Female,No,0,1,No,6000,0.0,156.0,360.0,1,Urban,1.0
|
||||
LP002234,Male,No,0,1,Yes,7167,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002236,Male,Yes,2,1,No,4566,0.0,100.0,360.0,1,Urban,0.0
|
||||
LP002237,Male,No,1,1,,3667,0.0,113.0,180.0,1,Urban,1.0
|
||||
LP002239,Male,No,0,0,No,2346,1600.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002243,Male,Yes,0,0,No,3010,3136.0,0.0,360.0,0,Urban,0.0
|
||||
LP002244,Male,Yes,0,1,No,2333,2417.0,136.0,360.0,1,Urban,1.0
|
||||
LP002250,Male,Yes,0,1,No,5488,0.0,125.0,360.0,1,Rural,1.0
|
||||
LP002255,Male,No,3+,1,No,9167,0.0,185.0,360.0,1,Rural,1.0
|
||||
LP002262,Male,Yes,3+,1,No,9504,0.0,275.0,360.0,1,Rural,1.0
|
||||
LP002263,Male,Yes,0,1,No,2583,2115.0,120.0,360.0,0,Urban,1.0
|
||||
LP002265,Male,Yes,2,0,No,1993,1625.0,113.0,180.0,1,Semiurban,1.0
|
||||
LP002266,Male,Yes,2,1,No,3100,1400.0,113.0,360.0,1,Urban,1.0
|
||||
LP002272,Male,Yes,2,1,No,3276,484.0,135.0,360.0,0,Semiurban,1.0
|
||||
LP002277,Female,No,0,1,No,3180,0.0,71.0,360.0,0,Urban,0.0
|
||||
LP002281,Male,Yes,0,1,No,3033,1459.0,95.0,360.0,1,Urban,1.0
|
||||
LP002284,Male,No,0,0,No,3902,1666.0,109.0,360.0,1,Rural,1.0
|
||||
LP002287,Female,No,0,1,No,1500,1800.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002288,Male,Yes,2,0,No,2889,0.0,45.0,180.0,0,Urban,0.0
|
||||
LP002296,Male,No,0,0,No,2755,0.0,65.0,300.0,1,Rural,0.0
|
||||
LP002297,Male,No,0,1,No,2500,20000.0,103.0,360.0,1,Semiurban,1.0
|
||||
LP002300,Female,No,0,0,No,1963,0.0,53.0,360.0,1,Semiurban,1.0
|
||||
LP002301,Female,No,0,1,Yes,7441,0.0,194.0,360.0,1,Rural,0.0
|
||||
LP002305,Female,No,0,1,No,4547,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002308,Male,Yes,0,0,No,2167,2400.0,115.0,360.0,1,Urban,1.0
|
||||
LP002314,Female,No,0,0,No,2213,0.0,66.0,360.0,1,Rural,1.0
|
||||
LP002315,Male,Yes,1,1,No,8300,0.0,152.0,300.0,0,Semiurban,0.0
|
||||
LP002317,Male,Yes,3+,1,No,81000,0.0,360.0,360.0,0,Rural,0.0
|
||||
LP002318,Female,No,1,0,Yes,3867,0.0,62.0,360.0,1,Semiurban,0.0
|
||||
LP002319,Male,Yes,0,1,,6256,0.0,160.0,360.0,0,Urban,1.0
|
||||
LP002328,Male,Yes,0,0,No,6096,0.0,218.0,360.0,0,Rural,0.0
|
||||
LP002332,Male,Yes,0,0,No,2253,2033.0,110.0,360.0,1,Rural,1.0
|
||||
LP002335,Female,Yes,0,0,No,2149,3237.0,178.0,360.0,0,Semiurban,0.0
|
||||
LP002337,Female,No,0,1,No,2995,0.0,60.0,360.0,1,Urban,1.0
|
||||
LP002341,Female,No,1,1,No,2600,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP002342,Male,Yes,2,1,Yes,1600,20000.0,239.0,360.0,1,Urban,0.0
|
||||
LP002345,Male,Yes,0,1,No,1025,2773.0,112.0,360.0,1,Rural,1.0
|
||||
LP002347,Male,Yes,0,1,No,3246,1417.0,138.0,360.0,1,Semiurban,1.0
|
||||
LP002348,Male,Yes,0,1,No,5829,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002357,Female,No,0,0,No,2720,0.0,80.0,,0,Urban,0.0
|
||||
LP002361,Male,Yes,0,1,No,1820,1719.0,100.0,360.0,1,Urban,1.0
|
||||
LP002362,Male,Yes,1,1,No,7250,1667.0,110.0,,0,Urban,0.0
|
||||
LP002364,Male,Yes,0,1,No,14880,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002366,Male,Yes,0,1,No,2666,4300.0,121.0,360.0,1,Rural,1.0
|
||||
LP002367,Female,No,1,0,No,4606,0.0,81.0,360.0,1,Rural,0.0
|
||||
LP002368,Male,Yes,2,1,No,5935,0.0,133.0,360.0,1,Semiurban,1.0
|
||||
LP002369,Male,Yes,0,1,No,2920,16.12000084,87.0,360.0,1,Rural,1.0
|
||||
LP002370,Male,No,0,0,No,2717,0.0,60.0,180.0,1,Urban,1.0
|
||||
LP002377,Female,No,1,1,Yes,8624,0.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP002379,Male,No,0,1,No,6500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002386,Male,No,0,1,,12876,0.0,405.0,360.0,1,Semiurban,1.0
|
||||
LP002387,Male,Yes,0,1,No,2425,2340.0,143.0,360.0,1,Semiurban,1.0
|
||||
LP002390,Male,No,0,1,No,3750,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002393,Female,,,1,No,10047,0.0,0.0,240.0,1,Semiurban,1.0
|
||||
LP002398,Male,No,0,1,No,1926,1851.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP002401,Male,Yes,0,1,No,2213,1125.0,0.0,360.0,1,Urban,1.0
|
||||
LP002403,Male,No,0,1,Yes,10416,0.0,187.0,360.0,0,Urban,0.0
|
||||
LP002407,Female,Yes,0,0,Yes,7142,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002408,Male,No,0,1,No,3660,5064.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP002409,Male,Yes,0,1,No,7901,1833.0,180.0,360.0,1,Rural,1.0
|
||||
LP002418,Male,No,3+,0,No,4707,1993.0,148.0,360.0,1,Semiurban,1.0
|
||||
LP002422,Male,No,1,1,No,37719,0.0,152.0,360.0,1,Semiurban,1.0
|
||||
LP002424,Male,Yes,0,1,No,7333,8333.0,175.0,300.0,0,Rural,1.0
|
||||
LP002429,Male,Yes,1,1,Yes,3466,1210.0,130.0,360.0,1,Rural,1.0
|
||||
LP002434,Male,Yes,2,0,No,4652,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002435,Male,Yes,0,1,,3539,1376.0,55.0,360.0,1,Rural,0.0
|
||||
LP002443,Male,Yes,2,1,No,3340,1710.0,150.0,360.0,0,Rural,0.0
|
||||
LP002444,Male,No,1,0,Yes,2769,1542.0,190.0,360.0,0,Semiurban,0.0
|
||||
LP002446,Male,Yes,2,0,No,2309,1255.0,125.0,360.0,0,Rural,0.0
|
||||
LP002447,Male,Yes,2,0,No,1958,1456.0,60.0,300.0,0,Urban,1.0
|
||||
LP002448,Male,Yes,0,1,No,3948,1733.0,149.0,360.0,0,Rural,0.0
|
||||
LP002449,Male,Yes,0,1,No,2483,2466.0,90.0,180.0,0,Rural,1.0
|
||||
LP002453,Male,No,0,1,Yes,7085,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002455,Male,Yes,2,1,No,3859,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002459,Male,Yes,0,1,No,4301,0.0,118.0,360.0,1,Urban,1.0
|
||||
LP002467,Male,Yes,0,1,No,3708,2569.0,173.0,360.0,1,Urban,0.0
|
||||
LP002472,Male,No,2,1,No,4354,0.0,136.0,360.0,1,Rural,1.0
|
||||
LP002473,Male,Yes,0,1,No,8334,0.0,160.0,360.0,1,Semiurban,0.0
|
||||
LP002478,,Yes,0,1,Yes,2083,4083.0,160.0,360.0,0,Semiurban,1.0
|
||||
LP002484,Male,Yes,3+,1,No,7740,0.0,128.0,180.0,1,Urban,1.0
|
||||
LP002487,Male,Yes,0,1,No,3015,2188.0,153.0,360.0,1,Rural,1.0
|
||||
LP002489,Female,No,1,0,,5191,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002493,Male,No,0,1,No,4166,0.0,98.0,360.0,0,Semiurban,0.0
|
||||
LP002494,Male,No,0,1,No,6000,0.0,140.0,360.0,1,Rural,1.0
|
||||
LP002500,Male,Yes,3+,0,No,2947,1664.0,70.0,180.0,0,Urban,0.0
|
||||
LP002501,,Yes,0,1,No,16692,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002502,Female,Yes,2,0,,210,2917.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002505,Male,Yes,0,1,No,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002515,Male,Yes,1,1,Yes,3450,2079.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002517,Male,Yes,1,0,No,2653,1500.0,113.0,180.0,0,Rural,0.0
|
||||
LP002519,Male,Yes,3+,1,No,4691,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002522,Female,No,0,1,Yes,2500,0.0,93.0,360.0,0,Urban,1.0
|
||||
LP002524,Male,No,2,1,No,5532,4648.0,162.0,360.0,1,Rural,1.0
|
||||
LP002527,Male,Yes,2,1,Yes,16525,1014.0,150.0,360.0,1,Rural,1.0
|
||||
LP002529,Male,Yes,2,1,No,6700,1750.0,230.0,300.0,1,Semiurban,1.0
|
||||
LP002530,,Yes,2,1,No,2873,1872.0,132.0,360.0,0,Semiurban,0.0
|
||||
LP002531,Male,Yes,1,1,Yes,16667,2250.0,86.0,360.0,1,Semiurban,1.0
|
||||
LP002533,Male,Yes,2,1,No,2947,1603.0,0.0,360.0,1,Urban,0.0
|
||||
LP002534,Female,No,0,0,No,4350,0.0,154.0,360.0,1,Rural,1.0
|
||||
LP002536,Male,Yes,3+,0,No,3095,0.0,113.0,360.0,1,Rural,1.0
|
||||
LP002537,Male,Yes,0,1,No,2083,3150.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002541,Male,Yes,0,1,No,10833,0.0,234.0,360.0,1,Semiurban,1.0
|
||||
LP002543,Male,Yes,2,1,No,8333,0.0,246.0,360.0,1,Semiurban,1.0
|
||||
LP002544,Male,Yes,1,0,No,1958,2436.0,131.0,360.0,1,Rural,1.0
|
||||
LP002545,Male,No,2,1,No,3547,0.0,80.0,360.0,0,Rural,0.0
|
||||
LP002547,Male,Yes,1,1,No,18333,0.0,500.0,360.0,1,Urban,0.0
|
||||
LP002555,Male,Yes,2,1,Yes,4583,2083.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002556,Male,No,0,1,No,2435,0.0,75.0,360.0,1,Urban,0.0
|
||||
LP002560,Male,No,0,0,No,2699,2785.0,96.0,360.0,0,Semiurban,1.0
|
||||
LP002562,Male,Yes,1,0,No,5333,1131.0,186.0,360.0,0,Urban,1.0
|
||||
LP002571,Male,No,0,0,No,3691,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002582,Female,No,0,0,Yes,17263,0.0,225.0,360.0,1,Semiurban,1.0
|
||||
LP002585,Male,Yes,0,1,No,3597,2157.0,119.0,360.0,0,Rural,0.0
|
||||
LP002586,Female,Yes,1,1,No,3326,913.0,105.0,84.0,1,Semiurban,1.0
|
||||
LP002587,Male,Yes,0,0,No,2600,1700.0,107.0,360.0,1,Rural,1.0
|
||||
LP002588,Male,Yes,0,1,No,4625,2857.0,111.0,12.0,0,Urban,1.0
|
||||
LP002600,Male,Yes,1,1,Yes,2895,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002602,Male,No,0,1,No,6283,4416.0,209.0,360.0,0,Rural,0.0
|
||||
LP002603,Female,No,0,1,No,645,3683.0,113.0,480.0,1,Rural,1.0
|
||||
LP002606,Female,No,0,1,No,3159,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002615,Male,Yes,2,1,No,4865,5624.0,208.0,360.0,1,Semiurban,1.0
|
||||
LP002618,Male,Yes,1,0,No,4050,5302.0,138.0,360.0,0,Rural,0.0
|
||||
LP002619,Male,Yes,0,0,No,3814,1483.0,124.0,300.0,1,Semiurban,1.0
|
||||
LP002622,Male,Yes,2,1,No,3510,4416.0,243.0,360.0,1,Rural,1.0
|
||||
LP002624,Male,Yes,0,1,No,20833,6667.0,480.0,360.0,0,Urban,1.0
|
||||
LP002625,,No,0,1,No,3583,0.0,96.0,360.0,1,Urban,0.0
|
||||
LP002626,Male,Yes,0,1,Yes,2479,3013.0,188.0,360.0,1,Urban,1.0
|
||||
LP002634,Female,No,1,1,No,13262,0.0,40.0,360.0,1,Urban,1.0
|
||||
LP002637,Male,No,0,0,No,3598,1287.0,100.0,360.0,1,Rural,0.0
|
||||
LP002640,Male,Yes,1,1,No,6065,2004.0,250.0,360.0,1,Semiurban,1.0
|
||||
LP002643,Male,Yes,2,1,No,3283,2035.0,148.0,360.0,1,Urban,1.0
|
||||
LP002648,Male,Yes,0,1,No,2130,6666.0,70.0,180.0,1,Semiurban,0.0
|
||||
LP002652,Male,No,0,1,No,5815,3666.0,311.0,360.0,1,Rural,0.0
|
||||
LP002659,Male,Yes,3+,1,No,3466,3428.0,150.0,360.0,1,Rural,1.0
|
||||
LP002670,Female,Yes,2,1,No,2031,1632.0,113.0,480.0,1,Semiurban,1.0
|
||||
LP002682,Male,Yes,,0,No,3074,1800.0,123.0,360.0,0,Semiurban,0.0
|
||||
LP002683,Male,No,0,1,No,4683,1915.0,185.0,360.0,1,Semiurban,0.0
|
||||
LP002684,Female,No,0,0,No,3400,0.0,95.0,360.0,1,Rural,0.0
|
||||
LP002689,Male,Yes,2,0,No,2192,1742.0,45.0,360.0,1,Semiurban,1.0
|
||||
LP002690,Male,No,0,1,No,2500,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002692,Male,Yes,3+,1,Yes,5677,1424.0,100.0,360.0,1,Rural,1.0
|
||||
LP002693,Male,Yes,2,1,Yes,7948,7166.0,480.0,360.0,1,Rural,1.0
|
||||
LP002697,Male,No,0,1,No,4680,2087.0,0.0,360.0,1,Semiurban,0.0
|
||||
LP002699,Male,Yes,2,1,Yes,17500,0.0,400.0,360.0,1,Rural,1.0
|
||||
LP002705,Male,Yes,0,1,No,3775,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002706,Male,Yes,1,0,No,5285,1430.0,161.0,360.0,0,Semiurban,1.0
|
||||
LP002714,Male,No,1,0,No,2679,1302.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002716,Male,No,0,0,No,6783,0.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002717,Male,Yes,0,1,No,1025,5500.0,216.0,360.0,0,Rural,1.0
|
||||
LP002720,Male,Yes,3+,1,No,4281,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002723,Male,No,2,1,No,3588,0.0,110.0,360.0,0,Rural,0.0
|
||||
LP002729,Male,No,1,1,No,11250,0.0,196.0,360.0,0,Semiurban,0.0
|
||||
LP002731,Female,No,0,0,Yes,18165,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP002732,Male,No,0,0,,2550,2042.0,126.0,360.0,1,Rural,1.0
|
||||
LP002734,Male,Yes,0,1,No,6133,3906.0,324.0,360.0,1,Urban,1.0
|
||||
LP002738,Male,No,2,1,No,3617,0.0,107.0,360.0,1,Semiurban,1.0
|
||||
LP002739,Male,Yes,0,0,No,2917,536.0,66.0,360.0,1,Rural,0.0
|
||||
LP002740,Male,Yes,3+,1,No,6417,0.0,157.0,180.0,1,Rural,1.0
|
||||
LP002741,Female,Yes,1,1,No,4608,2845.0,140.0,180.0,1,Semiurban,1.0
|
||||
LP002743,Female,No,0,1,No,2138,0.0,99.0,360.0,0,Semiurban,0.0
|
||||
LP002753,Female,No,1,1,,3652,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002755,Male,Yes,1,0,No,2239,2524.0,128.0,360.0,1,Urban,1.0
|
||||
LP002757,Female,Yes,0,0,No,3017,663.0,102.0,360.0,0,Semiurban,1.0
|
||||
LP002767,Male,Yes,0,1,No,2768,1950.0,155.0,360.0,1,Rural,1.0
|
||||
LP002768,Male,No,0,0,No,3358,0.0,80.0,36.0,1,Semiurban,0.0
|
||||
LP002772,Male,No,0,1,No,2526,1783.0,145.0,360.0,1,Rural,1.0
|
||||
LP002776,Female,No,0,1,No,5000,0.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002777,Male,Yes,0,1,No,2785,2016.0,110.0,360.0,1,Rural,1.0
|
||||
LP002778,Male,Yes,2,1,Yes,6633,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP002784,Male,Yes,1,0,No,2492,2375.0,0.0,360.0,1,Rural,1.0
|
||||
LP002785,Male,Yes,1,1,No,3333,3250.0,158.0,360.0,1,Urban,1.0
|
||||
LP002788,Male,Yes,0,0,No,2454,2333.0,181.0,360.0,0,Urban,0.0
|
||||
LP002789,Male,Yes,0,1,No,3593,4266.0,132.0,180.0,0,Rural,0.0
|
||||
LP002792,Male,Yes,1,1,No,5468,1032.0,26.0,360.0,1,Semiurban,1.0
|
||||
LP002794,Female,No,0,1,No,2667,1625.0,84.0,360.0,0,Urban,1.0
|
||||
LP002795,Male,Yes,3+,1,Yes,10139,0.0,260.0,360.0,1,Semiurban,1.0
|
||||
LP002798,Male,Yes,0,1,No,3887,2669.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002804,Female,Yes,0,1,No,4180,2306.0,182.0,360.0,1,Semiurban,1.0
|
||||
LP002807,Male,Yes,2,0,No,3675,242.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP002813,Female,Yes,1,1,Yes,19484,0.0,600.0,360.0,1,Semiurban,1.0
|
||||
LP002820,Male,Yes,0,1,No,5923,2054.0,211.0,360.0,1,Rural,1.0
|
||||
LP002821,Male,No,0,0,Yes,5800,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002832,Male,Yes,2,1,No,8799,0.0,258.0,360.0,0,Urban,0.0
|
||||
LP002833,Male,Yes,0,0,No,4467,0.0,120.0,360.0,0,Rural,1.0
|
||||
LP002836,Male,No,0,1,No,3333,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP002837,Male,Yes,3+,1,No,3400,2500.0,123.0,360.0,0,Rural,0.0
|
||||
LP002840,Female,No,0,1,No,2378,0.0,9.0,360.0,1,Urban,0.0
|
||||
LP002841,Male,Yes,0,1,No,3166,2064.0,104.0,360.0,0,Urban,0.0
|
||||
LP002842,Male,Yes,1,1,No,3417,1750.0,186.0,360.0,1,Urban,1.0
|
||||
LP002847,Male,Yes,,1,No,5116,1451.0,165.0,360.0,0,Urban,0.0
|
||||
LP002855,Male,Yes,2,1,No,16666,0.0,275.0,360.0,1,Urban,1.0
|
||||
LP002862,Male,Yes,2,0,No,6125,1625.0,187.0,480.0,1,Semiurban,0.0
|
||||
LP002863,Male,Yes,3+,1,No,6406,0.0,150.0,360.0,1,Semiurban,0.0
|
||||
LP002868,Male,Yes,2,1,No,3159,461.0,108.0,84.0,1,Urban,1.0
|
||||
LP002872,,Yes,0,1,No,3087,2210.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP002874,Male,No,0,1,No,3229,2739.0,110.0,360.0,1,Urban,1.0
|
||||
LP002877,Male,Yes,1,1,No,1782,2232.0,107.0,360.0,1,Rural,1.0
|
||||
LP002888,Male,No,0,1,,3182,2917.0,161.0,360.0,1,Urban,1.0
|
||||
LP002892,Male,Yes,2,1,No,6540,0.0,205.0,360.0,1,Semiurban,1.0
|
||||
LP002893,Male,No,0,1,No,1836,33837.0,90.0,360.0,1,Urban,0.0
|
||||
LP002894,Female,Yes,0,1,No,3166,0.0,36.0,360.0,1,Semiurban,1.0
|
||||
LP002898,Male,Yes,1,1,No,1880,0.0,61.0,360.0,0,Rural,0.0
|
||||
LP002911,Male,Yes,1,1,No,2787,1917.0,146.0,360.0,0,Rural,0.0
|
||||
LP002912,Male,Yes,1,1,No,4283,3000.0,172.0,84.0,1,Rural,0.0
|
||||
LP002916,Male,Yes,0,1,No,2297,1522.0,104.0,360.0,1,Urban,1.0
|
||||
LP002917,Female,No,0,0,No,2165,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP002925,,No,0,1,No,4750,0.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002926,Male,Yes,2,1,Yes,2726,0.0,106.0,360.0,0,Semiurban,0.0
|
||||
LP002928,Male,Yes,0,1,No,3000,3416.0,56.0,180.0,1,Semiurban,1.0
|
||||
LP002931,Male,Yes,2,1,Yes,6000,0.0,205.0,240.0,1,Semiurban,0.0
|
||||
LP002933,,No,3+,1,Yes,9357,0.0,292.0,360.0,1,Semiurban,1.0
|
||||
LP002936,Male,Yes,0,1,No,3859,3300.0,142.0,180.0,1,Rural,1.0
|
||||
LP002938,Male,Yes,0,1,Yes,16120,0.0,260.0,360.0,1,Urban,1.0
|
||||
LP002940,Male,No,0,0,No,3833,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002941,Male,Yes,2,0,Yes,6383,1000.0,187.0,360.0,1,Rural,0.0
|
||||
LP002943,Male,No,,1,No,2987,0.0,88.0,360.0,0,Semiurban,0.0
|
||||
LP002945,Male,Yes,0,1,Yes,9963,0.0,180.0,360.0,1,Rural,1.0
|
||||
LP002948,Male,Yes,2,1,No,5780,0.0,192.0,360.0,1,Urban,1.0
|
||||
LP002949,Female,No,3+,1,,416,41667.0,350.0,180.0,0,Urban,0.0
|
||||
LP002950,Male,Yes,0,0,,2894,2792.0,155.0,360.0,1,Rural,1.0
|
||||
LP002953,Male,Yes,3+,1,No,5703,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002958,Male,No,0,1,No,3676,4301.0,172.0,360.0,1,Rural,1.0
|
||||
LP002959,Female,Yes,1,1,No,12000,0.0,496.0,360.0,1,Semiurban,1.0
|
||||
LP002960,Male,Yes,0,0,No,2400,3800.0,0.0,180.0,1,Urban,0.0
|
||||
LP002961,Male,Yes,1,1,No,3400,2500.0,173.0,360.0,1,Semiurban,1.0
|
||||
LP002964,Male,Yes,2,0,No,3987,1411.0,157.0,360.0,1,Rural,1.0
|
||||
LP002974,Male,Yes,0,1,No,3232,1950.0,108.0,360.0,1,Rural,1.0
|
||||
LP002978,Female,No,0,1,No,2900,0.0,71.0,360.0,1,Rural,1.0
|
||||
LP002979,Male,Yes,3+,1,No,4106,0.0,40.0,180.0,1,Rural,1.0
|
||||
LP002983,Male,Yes,1,1,No,8072,240.0,253.0,360.0,1,Urban,1.0
|
||||
LP002984,Male,Yes,2,1,No,7583,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP002990,Female,No,0,1,Yes,4583,0.0,133.0,360.0,0,Semiurban,0.0
|
||||
|
BIN
abanin_daniil_lab_3/result.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
26
abanin_daniil_lab_4/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
## Лабораторная работа №4
|
||||
|
||||
### Ранжирование признаков
|
||||
|
||||
## ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, pandas, matplotlib, scipy
|
||||
* запустить проект (стартовая точка lab4)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`, библиотеки pandas, matplotlib, scipy
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
Программа читает данные из csv файла. На основе имеющейся информации кластеризует заявителей на различные группы по риску выдачи кредита.
|
||||
При кластеризации используются такие признаки, как: ApplicantIncome - доход заявителя, LoanAmount - сумма займа в тысячах, Credit_History -
|
||||
статус кредитной истории заявителя (соответствие рекомендациям), Self_Employed - самозанятость (Да/Нет), Education - наличие образования
|
||||
|
||||
### Тест
|
||||
|
||||

|
||||
|
||||
По результатам кластеризации дендрограммой видно, что было проведено эффективное разбиение данных. На диаграмме показаны различные группы заявителей по рискам выдачи кредита
|
||||
23
abanin_daniil_lab_4/lab4.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from scipy.cluster import hierarchy
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
def start():
|
||||
data = pd.read_csv('loan.csv')
|
||||
x = data[['ApplicantIncome', 'LoanAmount', 'Credit_History', 'Self_Employed', 'Education']]
|
||||
plt.figure(1, figsize=(16, 9))
|
||||
plt.title('Дендрограмма кластеризации заявителей')
|
||||
|
||||
hierarchy.dendrogram(hierarchy.linkage(x, method='single'),
|
||||
truncate_mode='lastp',
|
||||
p=20,
|
||||
orientation='top',
|
||||
leaf_rotation=90,
|
||||
leaf_font_size=8,
|
||||
show_contracted=True)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
start()
|
||||
615
abanin_daniil_lab_4/loan.csv
Normal file
@@ -0,0 +1,615 @@
|
||||
Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
|
||||
LP001002,Male,No,0,1,0.0,5849,0.0,360.0,1.0,0,Y,0.0
|
||||
LP001003,Male,Yes,1,1,0.0,4583,1508.0,128.0,360.0,1,Rural,0.0
|
||||
LP001005,Male,Yes,0,1,1.0,3000,0.0,66.0,360.0,1,Urban,1.0
|
||||
LP001006,Male,Yes,0,0,0.0,2583,2358.0,120.0,360.0,1,Urban,1.0
|
||||
LP001008,Male,No,0,1,0.0,6000,0.0,141.0,360.0,1,Urban,1.0
|
||||
LP001011,Male,Yes,2,1,1.0,5417,4196.0,267.0,360.0,1,Urban,1.0
|
||||
LP001013,Male,Yes,0,0,0.0,2333,1516.0,95.0,360.0,1,Urban,1.0
|
||||
LP001014,Male,Yes,3+,1,0.0,3036,2504.0,158.0,360.0,0,Semiurban,0.0
|
||||
LP001018,Male,Yes,2,1,0.0,4006,1526.0,168.0,360.0,1,Urban,1.0
|
||||
LP001020,Male,Yes,1,1,0.0,12841,10968.0,349.0,360.0,1,Semiurban,0.0
|
||||
LP001024,Male,Yes,2,1,0.0,3200,700.0,70.0,360.0,1,Urban,1.0
|
||||
LP001027,Male,Yes,2,1,0.0,2500,1840.0,109.0,360.0,1,Urban,1.0
|
||||
LP001028,Male,Yes,2,1,0.0,3073,8106.0,200.0,360.0,1,Urban,1.0
|
||||
LP001029,Male,No,0,1,0.0,1853,2840.0,114.0,360.0,1,Rural,0.0
|
||||
LP001030,Male,Yes,2,1,0.0,1299,1086.0,17.0,120.0,1,Urban,1.0
|
||||
LP001032,Male,No,0,1,0.0,4950,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP001034,Male,No,1,0,0.0,3596,0.0,100.0,240.0,0,Urban,1.0
|
||||
LP001036,Female,No,0,1,0.0,3510,0.0,76.0,360.0,0,Urban,0.0
|
||||
LP001038,Male,Yes,0,0,0.0,4887,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP001041,Male,Yes,0,1,0.0,2600,3500.0,115.0,,1,Urban,1.0
|
||||
LP001043,Male,Yes,0,0,0.0,7660,0.0,104.0,360.0,0,Urban,0.0
|
||||
LP001046,Male,Yes,1,1,0.0,5955,5625.0,315.0,360.0,1,Urban,1.0
|
||||
LP001047,Male,Yes,0,0,0.0,2600,1911.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001050,,Yes,2,0,0.0,3365,1917.0,112.0,360.0,0,Rural,0.0
|
||||
LP001052,Male,Yes,1,1,0.0,3717,2925.0,151.0,360.0,0,Semiurban,0.0
|
||||
LP001066,Male,Yes,0,1,1.0,9560,0.0,191.0,360.0,1,Semiurban,1.0
|
||||
LP001068,Male,Yes,0,1,0.0,2799,2253.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001073,Male,Yes,2,0,0.0,4226,1040.0,110.0,360.0,1,Urban,1.0
|
||||
LP001086,Male,No,0,0,0.0,1442,0.0,35.0,360.0,1,Urban,0.0
|
||||
LP001087,Female,No,2,1,0.0,3750,2083.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001091,Male,Yes,1,1,0.0,4166,3369.0,201.0,360.0,0,Urban,0.0
|
||||
LP001095,Male,No,0,1,0.0,3167,0.0,74.0,360.0,1,Urban,0.0
|
||||
LP001097,Male,No,1,1,1.0,4692,0.0,106.0,360.0,1,Rural,0.0
|
||||
LP001098,Male,Yes,0,1,0.0,3500,1667.0,114.0,360.0,1,Semiurban,1.0
|
||||
LP001100,Male,No,3+,1,0.0,12500,3000.0,320.0,360.0,1,Rural,0.0
|
||||
LP001106,Male,Yes,0,1,0.0,2275,2067.0,0.0,360.0,1,Urban,1.0
|
||||
LP001109,Male,Yes,0,1,0.0,1828,1330.0,100.0,,0,Urban,0.0
|
||||
LP001112,Female,Yes,0,1,0.0,3667,1459.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001114,Male,No,0,1,0.0,4166,7210.0,184.0,360.0,1,Urban,1.0
|
||||
LP001116,Male,No,0,0,0.0,3748,1668.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP001119,Male,No,0,1,0.0,3600,0.0,80.0,360.0,1,Urban,0.0
|
||||
LP001120,Male,No,0,1,0.0,1800,1213.0,47.0,360.0,1,Urban,1.0
|
||||
LP001123,Male,Yes,0,1,0.0,2400,0.0,75.0,360.0,0,Urban,1.0
|
||||
LP001131,Male,Yes,0,1,0.0,3941,2336.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001136,Male,Yes,0,0,1.0,4695,0.0,96.0,,1,Urban,1.0
|
||||
LP001137,Female,No,0,1,0.0,3410,0.0,88.0,,1,Urban,1.0
|
||||
LP001138,Male,Yes,1,1,0.0,5649,0.0,44.0,360.0,1,Urban,1.0
|
||||
LP001144,Male,Yes,0,1,0.0,5821,0.0,144.0,360.0,1,Urban,1.0
|
||||
LP001146,Female,Yes,0,1,0.0,2645,3440.0,120.0,360.0,0,Urban,0.0
|
||||
LP001151,Female,No,0,1,0.0,4000,2275.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001155,Female,Yes,0,0,0.0,1928,1644.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001157,Female,No,0,1,0.0,3086,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001164,Female,No,0,1,0.0,4230,0.0,112.0,360.0,1,Semiurban,0.0
|
||||
LP001179,Male,Yes,2,1,0.0,4616,0.0,134.0,360.0,1,Urban,0.0
|
||||
LP001186,Female,Yes,1,1,1.0,11500,0.0,286.0,360.0,0,Urban,0.0
|
||||
LP001194,Male,Yes,2,1,0.0,2708,1167.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001195,Male,Yes,0,1,0.0,2132,1591.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001197,Male,Yes,0,1,0.0,3366,2200.0,135.0,360.0,1,Rural,0.0
|
||||
LP001198,Male,Yes,1,1,0.0,8080,2250.0,180.0,360.0,1,Urban,1.0
|
||||
LP001199,Male,Yes,2,0,0.0,3357,2859.0,144.0,360.0,1,Urban,1.0
|
||||
LP001205,Male,Yes,0,1,0.0,2500,3796.0,120.0,360.0,1,Urban,1.0
|
||||
LP001206,Male,Yes,3+,1,0.0,3029,0.0,99.0,360.0,1,Urban,1.0
|
||||
LP001207,Male,Yes,0,0,1.0,2609,3449.0,165.0,180.0,0,Rural,0.0
|
||||
LP001213,Male,Yes,1,1,0.0,4945,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP001222,Female,No,0,1,0.0,4166,0.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001225,Male,Yes,0,1,0.0,5726,4595.0,258.0,360.0,1,Semiurban,0.0
|
||||
LP001228,Male,No,0,0,0.0,3200,2254.0,126.0,180.0,0,Urban,0.0
|
||||
LP001233,Male,Yes,1,1,0.0,10750,0.0,312.0,360.0,1,Urban,1.0
|
||||
LP001238,Male,Yes,3+,0,1.0,7100,0.0,125.0,60.0,1,Urban,1.0
|
||||
LP001241,Female,No,0,1,0.0,4300,0.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP001243,Male,Yes,0,1,0.0,3208,3066.0,172.0,360.0,1,Urban,1.0
|
||||
LP001245,Male,Yes,2,0,1.0,1875,1875.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001248,Male,No,0,1,0.0,3500,0.0,81.0,300.0,1,Semiurban,1.0
|
||||
LP001250,Male,Yes,3+,0,0.0,4755,0.0,95.0,,0,Semiurban,0.0
|
||||
LP001253,Male,Yes,3+,1,1.0,5266,1774.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001255,Male,No,0,1,0.0,3750,0.0,113.0,480.0,1,Urban,0.0
|
||||
LP001256,Male,No,0,1,0.0,3750,4750.0,176.0,360.0,1,Urban,0.0
|
||||
LP001259,Male,Yes,1,1,1.0,1000,3022.0,110.0,360.0,1,Urban,0.0
|
||||
LP001263,Male,Yes,3+,1,0.0,3167,4000.0,180.0,300.0,0,Semiurban,0.0
|
||||
LP001264,Male,Yes,3+,0,1.0,3333,2166.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP001265,Female,No,0,1,0.0,3846,0.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP001266,Male,Yes,1,1,1.0,2395,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001267,Female,Yes,2,1,0.0,1378,1881.0,167.0,360.0,1,Urban,0.0
|
||||
LP001273,Male,Yes,0,1,0.0,6000,2250.0,265.0,360.0,0,Semiurban,0.0
|
||||
LP001275,Male,Yes,1,1,0.0,3988,0.0,50.0,240.0,1,Urban,1.0
|
||||
LP001279,Male,No,0,1,0.0,2366,2531.0,136.0,360.0,1,Semiurban,1.0
|
||||
LP001280,Male,Yes,2,0,0.0,3333,2000.0,99.0,360.0,0,Semiurban,1.0
|
||||
LP001282,Male,Yes,0,1,0.0,2500,2118.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001289,Male,No,0,1,0.0,8566,0.0,210.0,360.0,1,Urban,1.0
|
||||
LP001310,Male,Yes,0,1,0.0,5695,4167.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP001316,Male,Yes,0,1,0.0,2958,2900.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001318,Male,Yes,2,1,0.0,6250,5654.0,188.0,180.0,1,Semiurban,1.0
|
||||
LP001319,Male,Yes,2,0,0.0,3273,1820.0,81.0,360.0,1,Urban,1.0
|
||||
LP001322,Male,No,0,1,0.0,4133,0.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001325,Male,No,0,0,0.0,3620,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001326,Male,No,0,1,0.0,6782,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP001327,Female,Yes,0,1,0.0,2484,2302.0,137.0,360.0,1,Semiurban,1.0
|
||||
LP001333,Male,Yes,0,1,0.0,1977,997.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP001334,Male,Yes,0,0,0.0,4188,0.0,115.0,180.0,1,Semiurban,1.0
|
||||
LP001343,Male,Yes,0,1,0.0,1759,3541.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001345,Male,Yes,2,0,0.0,4288,3263.0,133.0,180.0,1,Urban,1.0
|
||||
LP001349,Male,No,0,1,0.0,4843,3806.0,151.0,360.0,1,Semiurban,1.0
|
||||
LP001350,Male,Yes,,1,0.0,13650,0.0,0.0,360.0,1,Urban,1.0
|
||||
LP001356,Male,Yes,0,1,0.0,4652,3583.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001357,Male,,,1,0.0,3816,754.0,160.0,360.0,1,Urban,1.0
|
||||
LP001367,Male,Yes,1,1,0.0,3052,1030.0,100.0,360.0,1,Urban,1.0
|
||||
LP001369,Male,Yes,2,1,0.0,11417,1126.0,225.0,360.0,1,Urban,1.0
|
||||
LP001370,Male,No,0,0,0.0,7333,0.0,120.0,360.0,1,Rural,0.0
|
||||
LP001379,Male,Yes,2,1,0.0,3800,3600.0,216.0,360.0,0,Urban,0.0
|
||||
LP001384,Male,Yes,3+,0,0.0,2071,754.0,94.0,480.0,1,Semiurban,1.0
|
||||
LP001385,Male,No,0,1,0.0,5316,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001387,Female,Yes,0,1,0.0,2929,2333.0,139.0,360.0,1,Semiurban,1.0
|
||||
LP001391,Male,Yes,0,0,0.0,3572,4114.0,152.0,,0,Rural,0.0
|
||||
LP001392,Female,No,1,1,1.0,7451,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001398,Male,No,0,1,0.0,5050,0.0,118.0,360.0,1,Semiurban,1.0
|
||||
LP001401,Male,Yes,1,1,0.0,14583,0.0,185.0,180.0,1,Rural,1.0
|
||||
LP001404,Female,Yes,0,1,0.0,3167,2283.0,154.0,360.0,1,Semiurban,1.0
|
||||
LP001405,Male,Yes,1,1,0.0,2214,1398.0,85.0,360.0,0,Urban,1.0
|
||||
LP001421,Male,Yes,0,1,0.0,5568,2142.0,175.0,360.0,1,Rural,0.0
|
||||
LP001422,Female,No,0,1,0.0,10408,0.0,259.0,360.0,1,Urban,1.0
|
||||
LP001426,Male,Yes,,1,0.0,5667,2667.0,180.0,360.0,1,Rural,1.0
|
||||
LP001430,Female,No,0,1,0.0,4166,0.0,44.0,360.0,1,Semiurban,1.0
|
||||
LP001431,Female,No,0,1,0.0,2137,8980.0,137.0,360.0,0,Semiurban,1.0
|
||||
LP001432,Male,Yes,2,1,0.0,2957,0.0,81.0,360.0,1,Semiurban,1.0
|
||||
LP001439,Male,Yes,0,0,0.0,4300,2014.0,194.0,360.0,1,Rural,1.0
|
||||
LP001443,Female,No,0,1,0.0,3692,0.0,93.0,360.0,0,Rural,1.0
|
||||
LP001448,,Yes,3+,1,0.0,23803,0.0,370.0,360.0,1,Rural,1.0
|
||||
LP001449,Male,No,0,1,0.0,3865,1640.0,0.0,360.0,1,Rural,1.0
|
||||
LP001451,Male,Yes,1,1,1.0,10513,3850.0,160.0,180.0,0,Urban,0.0
|
||||
LP001465,Male,Yes,0,1,0.0,6080,2569.0,182.0,360.0,0,Rural,0.0
|
||||
LP001469,Male,No,0,1,1.0,20166,0.0,650.0,480.0,0,Urban,1.0
|
||||
LP001473,Male,No,0,1,0.0,2014,1929.0,74.0,360.0,1,Urban,1.0
|
||||
LP001478,Male,No,0,1,0.0,2718,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP001482,Male,Yes,0,1,1.0,3459,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001487,Male,No,0,1,0.0,4895,0.0,102.0,360.0,1,Semiurban,1.0
|
||||
LP001488,Male,Yes,3+,1,0.0,4000,7750.0,290.0,360.0,1,Semiurban,0.0
|
||||
LP001489,Female,Yes,0,1,0.0,4583,0.0,84.0,360.0,1,Rural,0.0
|
||||
LP001491,Male,Yes,2,1,1.0,3316,3500.0,88.0,360.0,1,Urban,1.0
|
||||
LP001492,Male,No,0,1,0.0,14999,0.0,242.0,360.0,0,Semiurban,0.0
|
||||
LP001493,Male,Yes,2,0,0.0,4200,1430.0,129.0,360.0,1,Rural,0.0
|
||||
LP001497,Male,Yes,2,1,0.0,5042,2083.0,185.0,360.0,1,Rural,0.0
|
||||
LP001498,Male,No,0,1,0.0,5417,0.0,168.0,360.0,1,Urban,1.0
|
||||
LP001504,Male,No,0,1,1.0,6950,0.0,175.0,180.0,1,Semiurban,1.0
|
||||
LP001507,Male,Yes,0,1,0.0,2698,2034.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001508,Male,Yes,2,1,0.0,11757,0.0,187.0,180.0,1,Urban,1.0
|
||||
LP001514,Female,Yes,0,1,0.0,2330,4486.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001516,Female,Yes,2,1,0.0,14866,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP001518,Male,Yes,1,1,0.0,1538,1425.0,30.0,360.0,1,Urban,1.0
|
||||
LP001519,Female,No,0,1,0.0,10000,1666.0,225.0,360.0,1,Rural,0.0
|
||||
LP001520,Male,Yes,0,1,0.0,4860,830.0,125.0,360.0,1,Semiurban,1.0
|
||||
LP001528,Male,No,0,1,0.0,6277,0.0,118.0,360.0,0,Rural,0.0
|
||||
LP001529,Male,Yes,0,1,1.0,2577,3750.0,152.0,360.0,1,Rural,1.0
|
||||
LP001531,Male,No,0,1,0.0,9166,0.0,244.0,360.0,1,Urban,0.0
|
||||
LP001532,Male,Yes,2,0,0.0,2281,0.0,113.0,360.0,1,Rural,0.0
|
||||
LP001535,Male,No,0,1,0.0,3254,0.0,50.0,360.0,1,Urban,1.0
|
||||
LP001536,Male,Yes,3+,1,0.0,39999,0.0,600.0,180.0,0,Semiurban,1.0
|
||||
LP001541,Male,Yes,1,1,0.0,6000,0.0,160.0,360.0,0,Rural,1.0
|
||||
LP001543,Male,Yes,1,1,0.0,9538,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP001546,Male,No,0,1,0.0,2980,2083.0,120.0,360.0,1,Rural,1.0
|
||||
LP001552,Male,Yes,0,1,0.0,4583,5625.0,255.0,360.0,1,Semiurban,1.0
|
||||
LP001560,Male,Yes,0,0,0.0,1863,1041.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP001562,Male,Yes,0,1,0.0,7933,0.0,275.0,360.0,1,Urban,0.0
|
||||
LP001565,Male,Yes,1,1,0.0,3089,1280.0,121.0,360.0,0,Semiurban,0.0
|
||||
LP001570,Male,Yes,2,1,0.0,4167,1447.0,158.0,360.0,1,Rural,1.0
|
||||
LP001572,Male,Yes,0,1,0.0,9323,0.0,75.0,180.0,1,Urban,1.0
|
||||
LP001574,Male,Yes,0,1,0.0,3707,3166.0,182.0,,1,Rural,1.0
|
||||
LP001577,Female,Yes,0,1,0.0,4583,0.0,112.0,360.0,1,Rural,0.0
|
||||
LP001578,Male,Yes,0,1,0.0,2439,3333.0,129.0,360.0,1,Rural,1.0
|
||||
LP001579,Male,No,0,1,0.0,2237,0.0,63.0,480.0,0,Semiurban,0.0
|
||||
LP001580,Male,Yes,2,1,0.0,8000,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001581,Male,Yes,0,0,0.0,1820,1769.0,95.0,360.0,1,Rural,1.0
|
||||
LP001585,,Yes,3+,1,0.0,51763,0.0,700.0,300.0,1,Urban,1.0
|
||||
LP001586,Male,Yes,3+,0,0.0,3522,0.0,81.0,180.0,1,Rural,0.0
|
||||
LP001594,Male,Yes,0,1,0.0,5708,5625.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001603,Male,Yes,0,0,1.0,4344,736.0,87.0,360.0,1,Semiurban,0.0
|
||||
LP001606,Male,Yes,0,1,0.0,3497,1964.0,116.0,360.0,1,Rural,1.0
|
||||
LP001608,Male,Yes,2,1,0.0,2045,1619.0,101.0,360.0,1,Rural,1.0
|
||||
LP001610,Male,Yes,3+,1,0.0,5516,11300.0,495.0,360.0,0,Semiurban,0.0
|
||||
LP001616,Male,Yes,1,1,0.0,3750,0.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001630,Male,No,0,0,0.0,2333,1451.0,102.0,480.0,0,Urban,0.0
|
||||
LP001633,Male,Yes,1,1,0.0,6400,7250.0,180.0,360.0,0,Urban,0.0
|
||||
LP001634,Male,No,0,1,0.0,1916,5063.0,67.0,360.0,0,Rural,0.0
|
||||
LP001636,Male,Yes,0,1,0.0,4600,0.0,73.0,180.0,1,Semiurban,1.0
|
||||
LP001637,Male,Yes,1,1,0.0,33846,0.0,260.0,360.0,1,Semiurban,0.0
|
||||
LP001639,Female,Yes,0,1,0.0,3625,0.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP001640,Male,Yes,0,1,1.0,39147,4750.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001641,Male,Yes,1,1,1.0,2178,0.0,66.0,300.0,0,Rural,0.0
|
||||
LP001643,Male,Yes,0,1,0.0,2383,2138.0,58.0,360.0,0,Rural,1.0
|
||||
LP001644,,Yes,0,1,1.0,674,5296.0,168.0,360.0,1,Rural,1.0
|
||||
LP001647,Male,Yes,0,1,0.0,9328,0.0,188.0,180.0,1,Rural,1.0
|
||||
LP001653,Male,No,0,0,0.0,4885,0.0,48.0,360.0,1,Rural,1.0
|
||||
LP001656,Male,No,0,1,0.0,12000,0.0,164.0,360.0,1,Semiurban,0.0
|
||||
LP001657,Male,Yes,0,0,0.0,6033,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP001658,Male,No,0,1,0.0,3858,0.0,76.0,360.0,1,Semiurban,1.0
|
||||
LP001664,Male,No,0,1,0.0,4191,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001665,Male,Yes,1,1,0.0,3125,2583.0,170.0,360.0,1,Semiurban,0.0
|
||||
LP001666,Male,No,0,1,0.0,8333,3750.0,187.0,360.0,1,Rural,1.0
|
||||
LP001669,Female,No,0,0,0.0,1907,2365.0,120.0,,1,Urban,1.0
|
||||
LP001671,Female,Yes,0,1,0.0,3416,2816.0,113.0,360.0,0,Semiurban,1.0
|
||||
LP001673,Male,No,0,1,1.0,11000,0.0,83.0,360.0,1,Urban,0.0
|
||||
LP001674,Male,Yes,1,0,0.0,2600,2500.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001677,Male,No,2,1,0.0,4923,0.0,166.0,360.0,0,Semiurban,1.0
|
||||
LP001682,Male,Yes,3+,0,0.0,3992,0.0,0.0,180.0,1,Urban,0.0
|
||||
LP001688,Male,Yes,1,0,0.0,3500,1083.0,135.0,360.0,1,Urban,1.0
|
||||
LP001691,Male,Yes,2,0,0.0,3917,0.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP001692,Female,No,0,0,0.0,4408,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001693,Female,No,0,1,0.0,3244,0.0,80.0,360.0,1,Urban,1.0
|
||||
LP001698,Male,No,0,0,0.0,3975,2531.0,55.0,360.0,1,Rural,1.0
|
||||
LP001699,Male,No,0,1,0.0,2479,0.0,59.0,360.0,1,Urban,1.0
|
||||
LP001702,Male,No,0,1,0.0,3418,0.0,127.0,360.0,1,Semiurban,0.0
|
||||
LP001708,Female,No,0,1,0.0,10000,0.0,214.0,360.0,1,Semiurban,0.0
|
||||
LP001711,Male,Yes,3+,1,0.0,3430,1250.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001713,Male,Yes,1,1,1.0,7787,0.0,240.0,360.0,1,Urban,1.0
|
||||
LP001715,Male,Yes,3+,0,1.0,5703,0.0,130.0,360.0,1,Rural,1.0
|
||||
LP001716,Male,Yes,0,1,0.0,3173,3021.0,137.0,360.0,1,Urban,1.0
|
||||
LP001720,Male,Yes,3+,0,0.0,3850,983.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001722,Male,Yes,0,1,0.0,150,1800.0,135.0,360.0,1,Rural,0.0
|
||||
LP001726,Male,Yes,0,1,0.0,3727,1775.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001732,Male,Yes,2,1,0.0,5000,0.0,72.0,360.0,0,Semiurban,0.0
|
||||
LP001734,Female,Yes,2,1,0.0,4283,2383.0,127.0,360.0,0,Semiurban,1.0
|
||||
LP001736,Male,Yes,0,1,0.0,2221,0.0,60.0,360.0,0,Urban,0.0
|
||||
LP001743,Male,Yes,2,1,0.0,4009,1717.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001744,Male,No,0,1,0.0,2971,2791.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001749,Male,Yes,0,1,0.0,7578,1010.0,175.0,,1,Semiurban,1.0
|
||||
LP001750,Male,Yes,0,1,0.0,6250,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001751,Male,Yes,0,1,0.0,3250,0.0,170.0,360.0,1,Rural,0.0
|
||||
LP001754,Male,Yes,,0,1.0,4735,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001758,Male,Yes,2,1,0.0,6250,1695.0,210.0,360.0,1,Semiurban,1.0
|
||||
LP001760,Male,,,1,0.0,4758,0.0,158.0,480.0,1,Semiurban,1.0
|
||||
LP001761,Male,No,0,1,1.0,6400,0.0,200.0,360.0,1,Rural,1.0
|
||||
LP001765,Male,Yes,1,1,0.0,2491,2054.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001768,Male,Yes,0,1,0.0,3716,0.0,42.0,180.0,1,Rural,1.0
|
||||
LP001770,Male,No,0,0,0.0,3189,2598.0,120.0,,1,Rural,1.0
|
||||
LP001776,Female,No,0,1,0.0,8333,0.0,280.0,360.0,1,Semiurban,1.0
|
||||
LP001778,Male,Yes,1,1,0.0,3155,1779.0,140.0,360.0,1,Semiurban,1.0
|
||||
LP001784,Male,Yes,1,1,0.0,5500,1260.0,170.0,360.0,1,Rural,1.0
|
||||
LP001786,Male,Yes,0,1,0.0,5746,0.0,255.0,360.0,0,Urban,0.0
|
||||
LP001788,Female,No,0,1,1.0,3463,0.0,122.0,360.0,0,Urban,1.0
|
||||
LP001790,Female,No,1,1,0.0,3812,0.0,112.0,360.0,1,Rural,1.0
|
||||
LP001792,Male,Yes,1,1,0.0,3315,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001798,Male,Yes,2,1,0.0,5819,5000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001800,Male,Yes,1,0,0.0,2510,1983.0,140.0,180.0,1,Urban,0.0
|
||||
LP001806,Male,No,0,1,0.0,2965,5701.0,155.0,60.0,1,Urban,1.0
|
||||
LP001807,Male,Yes,2,1,1.0,6250,1300.0,108.0,360.0,1,Rural,1.0
|
||||
LP001811,Male,Yes,0,0,0.0,3406,4417.0,123.0,360.0,1,Semiurban,1.0
|
||||
LP001813,Male,No,0,1,1.0,6050,4333.0,120.0,180.0,1,Urban,0.0
|
||||
LP001814,Male,Yes,2,1,0.0,9703,0.0,112.0,360.0,1,Urban,1.0
|
||||
LP001819,Male,Yes,1,0,0.0,6608,0.0,137.0,180.0,1,Urban,1.0
|
||||
LP001824,Male,Yes,1,1,0.0,2882,1843.0,123.0,480.0,1,Semiurban,1.0
|
||||
LP001825,Male,Yes,0,1,0.0,1809,1868.0,90.0,360.0,1,Urban,1.0
|
||||
LP001835,Male,Yes,0,0,0.0,1668,3890.0,201.0,360.0,0,Semiurban,0.0
|
||||
LP001836,Female,No,2,1,0.0,3427,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001841,Male,No,0,0,1.0,2583,2167.0,104.0,360.0,1,Rural,1.0
|
||||
LP001843,Male,Yes,1,0,0.0,2661,7101.0,279.0,180.0,1,Semiurban,1.0
|
||||
LP001844,Male,No,0,1,1.0,16250,0.0,192.0,360.0,0,Urban,0.0
|
||||
LP001846,Female,No,3+,1,0.0,3083,0.0,255.0,360.0,1,Rural,1.0
|
||||
LP001849,Male,No,0,0,0.0,6045,0.0,115.0,360.0,0,Rural,0.0
|
||||
LP001854,Male,Yes,3+,1,0.0,5250,0.0,94.0,360.0,1,Urban,0.0
|
||||
LP001859,Male,Yes,0,1,0.0,14683,2100.0,304.0,360.0,1,Rural,0.0
|
||||
LP001864,Male,Yes,3+,0,0.0,4931,0.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001865,Male,Yes,1,1,0.0,6083,4250.0,330.0,360.0,0,Urban,1.0
|
||||
LP001868,Male,No,0,1,0.0,2060,2209.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001870,Female,No,1,1,0.0,3481,0.0,155.0,36.0,1,Semiurban,0.0
|
||||
LP001871,Female,No,0,1,0.0,7200,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001872,Male,No,0,1,1.0,5166,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001875,Male,No,0,1,0.0,4095,3447.0,151.0,360.0,1,Rural,1.0
|
||||
LP001877,Male,Yes,2,1,0.0,4708,1387.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001882,Male,Yes,3+,1,0.0,4333,1811.0,160.0,360.0,0,Urban,1.0
|
||||
LP001883,Female,No,0,1,0.0,3418,0.0,135.0,360.0,1,Rural,0.0
|
||||
LP001884,Female,No,1,1,0.0,2876,1560.0,90.0,360.0,1,Urban,1.0
|
||||
LP001888,Female,No,0,1,0.0,3237,0.0,30.0,360.0,1,Urban,1.0
|
||||
LP001891,Male,Yes,0,1,0.0,11146,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001892,Male,No,0,1,0.0,2833,1857.0,126.0,360.0,1,Rural,1.0
|
||||
LP001894,Male,Yes,0,1,0.0,2620,2223.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001896,Male,Yes,2,1,0.0,3900,0.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001900,Male,Yes,1,1,0.0,2750,1842.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001903,Male,Yes,0,1,0.0,3993,3274.0,207.0,360.0,1,Semiurban,1.0
|
||||
LP001904,Male,Yes,0,1,0.0,3103,1300.0,80.0,360.0,1,Urban,1.0
|
||||
LP001907,Male,Yes,0,1,0.0,14583,0.0,436.0,360.0,1,Semiurban,1.0
|
||||
LP001908,Female,Yes,0,0,0.0,4100,0.0,124.0,360.0,0,Rural,1.0
|
||||
LP001910,Male,No,1,0,1.0,4053,2426.0,158.0,360.0,0,Urban,0.0
|
||||
LP001914,Male,Yes,0,1,0.0,3927,800.0,112.0,360.0,1,Semiurban,1.0
|
||||
LP001915,Male,Yes,2,1,0.0,2301,985.7999878,78.0,180.0,1,Urban,1.0
|
||||
LP001917,Female,No,0,1,0.0,1811,1666.0,54.0,360.0,1,Urban,1.0
|
||||
LP001922,Male,Yes,0,1,0.0,20667,0.0,0.0,360.0,1,Rural,0.0
|
||||
LP001924,Male,No,0,1,0.0,3158,3053.0,89.0,360.0,1,Rural,1.0
|
||||
LP001925,Female,No,0,1,1.0,2600,1717.0,99.0,300.0,1,Semiurban,0.0
|
||||
LP001926,Male,Yes,0,1,0.0,3704,2000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001931,Female,No,0,1,0.0,4124,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001935,Male,No,0,1,0.0,9508,0.0,187.0,360.0,1,Rural,1.0
|
||||
LP001936,Male,Yes,0,1,0.0,3075,2416.0,139.0,360.0,1,Rural,1.0
|
||||
LP001938,Male,Yes,2,1,0.0,4400,0.0,127.0,360.0,0,Semiurban,0.0
|
||||
LP001940,Male,Yes,2,1,0.0,3153,1560.0,134.0,360.0,1,Urban,1.0
|
||||
LP001945,Female,No,,1,0.0,5417,0.0,143.0,480.0,0,Urban,0.0
|
||||
LP001947,Male,Yes,0,1,0.0,2383,3334.0,172.0,360.0,1,Semiurban,1.0
|
||||
LP001949,Male,Yes,3+,1,0.0,4416,1250.0,110.0,360.0,1,Urban,1.0
|
||||
LP001953,Male,Yes,1,1,0.0,6875,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001954,Female,Yes,1,1,0.0,4666,0.0,135.0,360.0,1,Urban,1.0
|
||||
LP001955,Female,No,0,1,0.0,5000,2541.0,151.0,480.0,1,Rural,0.0
|
||||
LP001963,Male,Yes,1,1,0.0,2014,2925.0,113.0,360.0,1,Urban,0.0
|
||||
LP001964,Male,Yes,0,0,0.0,1800,2934.0,93.0,360.0,0,Urban,0.0
|
||||
LP001972,Male,Yes,,0,0.0,2875,1750.0,105.0,360.0,1,Semiurban,1.0
|
||||
LP001974,Female,No,0,1,0.0,5000,0.0,132.0,360.0,1,Rural,1.0
|
||||
LP001977,Male,Yes,1,1,0.0,1625,1803.0,96.0,360.0,1,Urban,1.0
|
||||
LP001978,Male,No,0,1,0.0,4000,2500.0,140.0,360.0,1,Rural,1.0
|
||||
LP001990,Male,No,0,0,0.0,2000,0.0,0.0,360.0,1,Urban,0.0
|
||||
LP001993,Female,No,0,1,0.0,3762,1666.0,135.0,360.0,1,Rural,1.0
|
||||
LP001994,Female,No,0,1,0.0,2400,1863.0,104.0,360.0,0,Urban,0.0
|
||||
LP001996,Male,No,0,1,0.0,20233,0.0,480.0,360.0,1,Rural,0.0
|
||||
LP001998,Male,Yes,2,0,0.0,7667,0.0,185.0,360.0,0,Rural,1.0
|
||||
LP002002,Female,No,0,1,0.0,2917,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002004,Male,No,0,0,0.0,2927,2405.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP002006,Female,No,0,1,0.0,2507,0.0,56.0,360.0,1,Rural,1.0
|
||||
LP002008,Male,Yes,2,1,1.0,5746,0.0,144.0,84.0,0,Rural,1.0
|
||||
LP002024,,Yes,0,1,0.0,2473,1843.0,159.0,360.0,1,Rural,0.0
|
||||
LP002031,Male,Yes,1,0,0.0,3399,1640.0,111.0,180.0,1,Urban,1.0
|
||||
LP002035,Male,Yes,2,1,0.0,3717,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP002036,Male,Yes,0,1,0.0,2058,2134.0,88.0,360.0,0,Urban,1.0
|
||||
LP002043,Female,No,1,1,0.0,3541,0.0,112.0,360.0,0,Semiurban,1.0
|
||||
LP002050,Male,Yes,1,1,1.0,10000,0.0,155.0,360.0,1,Rural,0.0
|
||||
LP002051,Male,Yes,0,1,0.0,2400,2167.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002053,Male,Yes,3+,1,0.0,4342,189.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP002054,Male,Yes,2,0,0.0,3601,1590.0,0.0,360.0,1,Rural,1.0
|
||||
LP002055,Female,No,0,1,0.0,3166,2985.0,132.0,360.0,0,Rural,1.0
|
||||
LP002065,Male,Yes,3+,1,0.0,15000,0.0,300.0,360.0,1,Rural,1.0
|
||||
LP002067,Male,Yes,1,1,1.0,8666,4983.0,376.0,360.0,0,Rural,0.0
|
||||
LP002068,Male,No,0,1,0.0,4917,0.0,130.0,360.0,0,Rural,1.0
|
||||
LP002082,Male,Yes,0,1,1.0,5818,2160.0,184.0,360.0,1,Semiurban,1.0
|
||||
LP002086,Female,Yes,0,1,0.0,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002087,Female,No,0,1,0.0,2500,0.0,67.0,360.0,1,Urban,1.0
|
||||
LP002097,Male,No,1,1,0.0,4384,1793.0,117.0,360.0,1,Urban,1.0
|
||||
LP002098,Male,No,0,1,0.0,2935,0.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002100,Male,No,,1,0.0,2833,0.0,71.0,360.0,1,Urban,1.0
|
||||
LP002101,Male,Yes,0,1,0.0,63337,0.0,490.0,180.0,1,Urban,1.0
|
||||
LP002103,,Yes,1,1,1.0,9833,1833.0,182.0,180.0,1,Urban,1.0
|
||||
LP002106,Male,Yes,,1,1.0,5503,4490.0,70.0,,1,Semiurban,1.0
|
||||
LP002110,Male,Yes,1,1,0.0,5250,688.0,160.0,360.0,1,Rural,1.0
|
||||
LP002112,Male,Yes,2,1,1.0,2500,4600.0,176.0,360.0,1,Rural,1.0
|
||||
LP002113,Female,No,3+,0,0.0,1830,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP002114,Female,No,0,1,0.0,4160,0.0,71.0,360.0,1,Semiurban,1.0
|
||||
LP002115,Male,Yes,3+,0,0.0,2647,1587.0,173.0,360.0,1,Rural,0.0
|
||||
LP002116,Female,No,0,1,0.0,2378,0.0,46.0,360.0,1,Rural,0.0
|
||||
LP002119,Male,Yes,1,0,0.0,4554,1229.0,158.0,360.0,1,Urban,1.0
|
||||
LP002126,Male,Yes,3+,0,0.0,3173,0.0,74.0,360.0,1,Semiurban,1.0
|
||||
LP002128,Male,Yes,2,1,0.0,2583,2330.0,125.0,360.0,1,Rural,1.0
|
||||
LP002129,Male,Yes,0,1,0.0,2499,2458.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002130,Male,Yes,,0,0.0,3523,3230.0,152.0,360.0,0,Rural,0.0
|
||||
LP002131,Male,Yes,2,0,0.0,3083,2168.0,126.0,360.0,1,Urban,1.0
|
||||
LP002137,Male,Yes,0,1,0.0,6333,4583.0,259.0,360.0,0,Semiurban,1.0
|
||||
LP002138,Male,Yes,0,1,0.0,2625,6250.0,187.0,360.0,1,Rural,1.0
|
||||
LP002139,Male,Yes,0,1,0.0,9083,0.0,228.0,360.0,1,Semiurban,1.0
|
||||
LP002140,Male,No,0,1,0.0,8750,4167.0,308.0,360.0,1,Rural,0.0
|
||||
LP002141,Male,Yes,3+,1,0.0,2666,2083.0,95.0,360.0,1,Rural,1.0
|
||||
LP002142,Female,Yes,0,1,1.0,5500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002143,Female,Yes,0,1,0.0,2423,505.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002144,Female,No,,1,0.0,3813,0.0,116.0,180.0,1,Urban,1.0
|
||||
LP002149,Male,Yes,2,1,0.0,8333,3167.0,165.0,360.0,1,Rural,1.0
|
||||
LP002151,Male,Yes,1,1,0.0,3875,0.0,67.0,360.0,1,Urban,0.0
|
||||
LP002158,Male,Yes,0,0,0.0,3000,1666.0,100.0,480.0,0,Urban,0.0
|
||||
LP002160,Male,Yes,3+,1,0.0,5167,3167.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP002161,Female,No,1,1,0.0,4723,0.0,81.0,360.0,1,Semiurban,0.0
|
||||
LP002170,Male,Yes,2,1,0.0,5000,3667.0,236.0,360.0,1,Semiurban,1.0
|
||||
LP002175,Male,Yes,0,1,0.0,4750,2333.0,130.0,360.0,1,Urban,1.0
|
||||
LP002178,Male,Yes,0,1,0.0,3013,3033.0,95.0,300.0,0,Urban,1.0
|
||||
LP002180,Male,No,0,1,1.0,6822,0.0,141.0,360.0,1,Rural,1.0
|
||||
LP002181,Male,No,0,0,0.0,6216,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP002187,Male,No,0,1,0.0,2500,0.0,96.0,480.0,1,Semiurban,0.0
|
||||
LP002188,Male,No,0,1,0.0,5124,0.0,124.0,,0,Rural,0.0
|
||||
LP002190,Male,Yes,1,1,0.0,6325,0.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP002191,Male,Yes,0,1,0.0,19730,5266.0,570.0,360.0,1,Rural,0.0
|
||||
LP002194,Female,No,0,1,1.0,15759,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002197,Male,Yes,2,1,0.0,5185,0.0,155.0,360.0,1,Semiurban,1.0
|
||||
LP002201,Male,Yes,2,1,1.0,9323,7873.0,380.0,300.0,1,Rural,1.0
|
||||
LP002205,Male,No,1,1,0.0,3062,1987.0,111.0,180.0,0,Urban,0.0
|
||||
LP002209,Female,No,0,1,0.0,2764,1459.0,110.0,360.0,1,Urban,1.0
|
||||
LP002211,Male,Yes,0,1,0.0,4817,923.0,120.0,180.0,1,Urban,1.0
|
||||
LP002219,Male,Yes,3+,1,0.0,8750,4996.0,130.0,360.0,1,Rural,1.0
|
||||
LP002223,Male,Yes,0,1,0.0,4310,0.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP002224,Male,No,0,1,0.0,3069,0.0,71.0,480.0,1,Urban,0.0
|
||||
LP002225,Male,Yes,2,1,0.0,5391,0.0,130.0,360.0,1,Urban,1.0
|
||||
LP002226,Male,Yes,0,1,0.0,3333,2500.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002229,Male,No,0,1,0.0,5941,4232.0,296.0,360.0,1,Semiurban,1.0
|
||||
LP002231,Female,No,0,1,0.0,6000,0.0,156.0,360.0,1,Urban,1.0
|
||||
LP002234,Male,No,0,1,1.0,7167,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002236,Male,Yes,2,1,0.0,4566,0.0,100.0,360.0,1,Urban,0.0
|
||||
LP002237,Male,No,1,1,0.0,3667,0.0,113.0,180.0,1,Urban,1.0
|
||||
LP002239,Male,No,0,0,0.0,2346,1600.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002243,Male,Yes,0,0,0.0,3010,3136.0,0.0,360.0,0,Urban,0.0
|
||||
LP002244,Male,Yes,0,1,0.0,2333,2417.0,136.0,360.0,1,Urban,1.0
|
||||
LP002250,Male,Yes,0,1,0.0,5488,0.0,125.0,360.0,1,Rural,1.0
|
||||
LP002255,Male,No,3+,1,0.0,9167,0.0,185.0,360.0,1,Rural,1.0
|
||||
LP002262,Male,Yes,3+,1,0.0,9504,0.0,275.0,360.0,1,Rural,1.0
|
||||
LP002263,Male,Yes,0,1,0.0,2583,2115.0,120.0,360.0,0,Urban,1.0
|
||||
LP002265,Male,Yes,2,0,0.0,1993,1625.0,113.0,180.0,1,Semiurban,1.0
|
||||
LP002266,Male,Yes,2,1,0.0,3100,1400.0,113.0,360.0,1,Urban,1.0
|
||||
LP002272,Male,Yes,2,1,0.0,3276,484.0,135.0,360.0,0,Semiurban,1.0
|
||||
LP002277,Female,No,0,1,0.0,3180,0.0,71.0,360.0,0,Urban,0.0
|
||||
LP002281,Male,Yes,0,1,0.0,3033,1459.0,95.0,360.0,1,Urban,1.0
|
||||
LP002284,Male,No,0,0,0.0,3902,1666.0,109.0,360.0,1,Rural,1.0
|
||||
LP002287,Female,No,0,1,0.0,1500,1800.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002288,Male,Yes,2,0,0.0,2889,0.0,45.0,180.0,0,Urban,0.0
|
||||
LP002296,Male,No,0,0,0.0,2755,0.0,65.0,300.0,1,Rural,0.0
|
||||
LP002297,Male,No,0,1,0.0,2500,20000.0,103.0,360.0,1,Semiurban,1.0
|
||||
LP002300,Female,No,0,0,0.0,1963,0.0,53.0,360.0,1,Semiurban,1.0
|
||||
LP002301,Female,No,0,1,1.0,7441,0.0,194.0,360.0,1,Rural,0.0
|
||||
LP002305,Female,No,0,1,0.0,4547,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002308,Male,Yes,0,0,0.0,2167,2400.0,115.0,360.0,1,Urban,1.0
|
||||
LP002314,Female,No,0,0,0.0,2213,0.0,66.0,360.0,1,Rural,1.0
|
||||
LP002315,Male,Yes,1,1,0.0,8300,0.0,152.0,300.0,0,Semiurban,0.0
|
||||
LP002317,Male,Yes,3+,1,0.0,81000,0.0,360.0,360.0,0,Rural,0.0
|
||||
LP002318,Female,No,1,0,1.0,3867,0.0,62.0,360.0,1,Semiurban,0.0
|
||||
LP002319,Male,Yes,0,1,0.0,6256,0.0,160.0,360.0,0,Urban,1.0
|
||||
LP002328,Male,Yes,0,0,0.0,6096,0.0,218.0,360.0,0,Rural,0.0
|
||||
LP002332,Male,Yes,0,0,0.0,2253,2033.0,110.0,360.0,1,Rural,1.0
|
||||
LP002335,Female,Yes,0,0,0.0,2149,3237.0,178.0,360.0,0,Semiurban,0.0
|
||||
LP002337,Female,No,0,1,0.0,2995,0.0,60.0,360.0,1,Urban,1.0
|
||||
LP002341,Female,No,1,1,0.0,2600,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP002342,Male,Yes,2,1,1.0,1600,20000.0,239.0,360.0,1,Urban,0.0
|
||||
LP002345,Male,Yes,0,1,0.0,1025,2773.0,112.0,360.0,1,Rural,1.0
|
||||
LP002347,Male,Yes,0,1,0.0,3246,1417.0,138.0,360.0,1,Semiurban,1.0
|
||||
LP002348,Male,Yes,0,1,0.0,5829,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002357,Female,No,0,0,0.0,2720,0.0,80.0,,0,Urban,0.0
|
||||
LP002361,Male,Yes,0,1,0.0,1820,1719.0,100.0,360.0,1,Urban,1.0
|
||||
LP002362,Male,Yes,1,1,0.0,7250,1667.0,110.0,,0,Urban,0.0
|
||||
LP002364,Male,Yes,0,1,0.0,14880,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002366,Male,Yes,0,1,0.0,2666,4300.0,121.0,360.0,1,Rural,1.0
|
||||
LP002367,Female,No,1,0,0.0,4606,0.0,81.0,360.0,1,Rural,0.0
|
||||
LP002368,Male,Yes,2,1,0.0,5935,0.0,133.0,360.0,1,Semiurban,1.0
|
||||
LP002369,Male,Yes,0,1,0.0,2920,16.12000084,87.0,360.0,1,Rural,1.0
|
||||
LP002370,Male,No,0,0,0.0,2717,0.0,60.0,180.0,1,Urban,1.0
|
||||
LP002377,Female,No,1,1,1.0,8624,0.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP002379,Male,No,0,1,0.0,6500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002386,Male,No,0,1,0.0,12876,0.0,405.0,360.0,1,Semiurban,1.0
|
||||
LP002387,Male,Yes,0,1,0.0,2425,2340.0,143.0,360.0,1,Semiurban,1.0
|
||||
LP002390,Male,No,0,1,0.0,3750,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002393,Female,,,1,0.0,10047,0.0,0.0,240.0,1,Semiurban,1.0
|
||||
LP002398,Male,No,0,1,0.0,1926,1851.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP002401,Male,Yes,0,1,0.0,2213,1125.0,0.0,360.0,1,Urban,1.0
|
||||
LP002403,Male,No,0,1,1.0,10416,0.0,187.0,360.0,0,Urban,0.0
|
||||
LP002407,Female,Yes,0,0,1.0,7142,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002408,Male,No,0,1,0.0,3660,5064.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP002409,Male,Yes,0,1,0.0,7901,1833.0,180.0,360.0,1,Rural,1.0
|
||||
LP002418,Male,No,3+,0,0.0,4707,1993.0,148.0,360.0,1,Semiurban,1.0
|
||||
LP002422,Male,No,1,1,0.0,37719,0.0,152.0,360.0,1,Semiurban,1.0
|
||||
LP002424,Male,Yes,0,1,0.0,7333,8333.0,175.0,300.0,0,Rural,1.0
|
||||
LP002429,Male,Yes,1,1,1.0,3466,1210.0,130.0,360.0,1,Rural,1.0
|
||||
LP002434,Male,Yes,2,0,0.0,4652,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002435,Male,Yes,0,1,0.0,3539,1376.0,55.0,360.0,1,Rural,0.0
|
||||
LP002443,Male,Yes,2,1,0.0,3340,1710.0,150.0,360.0,0,Rural,0.0
|
||||
LP002444,Male,No,1,0,1.0,2769,1542.0,190.0,360.0,0,Semiurban,0.0
|
||||
LP002446,Male,Yes,2,0,0.0,2309,1255.0,125.0,360.0,0,Rural,0.0
|
||||
LP002447,Male,Yes,2,0,0.0,1958,1456.0,60.0,300.0,0,Urban,1.0
|
||||
LP002448,Male,Yes,0,1,0.0,3948,1733.0,149.0,360.0,0,Rural,0.0
|
||||
LP002449,Male,Yes,0,1,0.0,2483,2466.0,90.0,180.0,0,Rural,1.0
|
||||
LP002453,Male,No,0,1,1.0,7085,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002455,Male,Yes,2,1,0.0,3859,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002459,Male,Yes,0,1,0.0,4301,0.0,118.0,360.0,1,Urban,1.0
|
||||
LP002467,Male,Yes,0,1,0.0,3708,2569.0,173.0,360.0,1,Urban,0.0
|
||||
LP002472,Male,No,2,1,0.0,4354,0.0,136.0,360.0,1,Rural,1.0
|
||||
LP002473,Male,Yes,0,1,0.0,8334,0.0,160.0,360.0,1,Semiurban,0.0
|
||||
LP002478,,Yes,0,1,1.0,2083,4083.0,160.0,360.0,0,Semiurban,1.0
|
||||
LP002484,Male,Yes,3+,1,0.0,7740,0.0,128.0,180.0,1,Urban,1.0
|
||||
LP002487,Male,Yes,0,1,0.0,3015,2188.0,153.0,360.0,1,Rural,1.0
|
||||
LP002489,Female,No,1,0,0.0,5191,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002493,Male,No,0,1,0.0,4166,0.0,98.0,360.0,0,Semiurban,0.0
|
||||
LP002494,Male,No,0,1,0.0,6000,0.0,140.0,360.0,1,Rural,1.0
|
||||
LP002500,Male,Yes,3+,0,0.0,2947,1664.0,70.0,180.0,0,Urban,0.0
|
||||
LP002501,,Yes,0,1,0.0,16692,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002502,Female,Yes,2,0,0.0,210,2917.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002505,Male,Yes,0,1,0.0,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002515,Male,Yes,1,1,1.0,3450,2079.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002517,Male,Yes,1,0,0.0,2653,1500.0,113.0,180.0,0,Rural,0.0
|
||||
LP002519,Male,Yes,3+,1,0.0,4691,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002522,Female,No,0,1,1.0,2500,0.0,93.0,360.0,0,Urban,1.0
|
||||
LP002524,Male,No,2,1,0.0,5532,4648.0,162.0,360.0,1,Rural,1.0
|
||||
LP002527,Male,Yes,2,1,1.0,16525,1014.0,150.0,360.0,1,Rural,1.0
|
||||
LP002529,Male,Yes,2,1,0.0,6700,1750.0,230.0,300.0,1,Semiurban,1.0
|
||||
LP002530,,Yes,2,1,0.0,2873,1872.0,132.0,360.0,0,Semiurban,0.0
|
||||
LP002531,Male,Yes,1,1,1.0,16667,2250.0,86.0,360.0,1,Semiurban,1.0
|
||||
LP002533,Male,Yes,2,1,0.0,2947,1603.0,0.0,360.0,1,Urban,0.0
|
||||
LP002534,Female,No,0,0,0.0,4350,0.0,154.0,360.0,1,Rural,1.0
|
||||
LP002536,Male,Yes,3+,0,0.0,3095,0.0,113.0,360.0,1,Rural,1.0
|
||||
LP002537,Male,Yes,0,1,0.0,2083,3150.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002541,Male,Yes,0,1,0.0,10833,0.0,234.0,360.0,1,Semiurban,1.0
|
||||
LP002543,Male,Yes,2,1,0.0,8333,0.0,246.0,360.0,1,Semiurban,1.0
|
||||
LP002544,Male,Yes,1,0,0.0,1958,2436.0,131.0,360.0,1,Rural,1.0
|
||||
LP002545,Male,No,2,1,0.0,3547,0.0,80.0,360.0,0,Rural,0.0
|
||||
LP002547,Male,Yes,1,1,0.0,18333,0.0,500.0,360.0,1,Urban,0.0
|
||||
LP002555,Male,Yes,2,1,1.0,4583,2083.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002556,Male,No,0,1,0.0,2435,0.0,75.0,360.0,1,Urban,0.0
|
||||
LP002560,Male,No,0,0,0.0,2699,2785.0,96.0,360.0,0,Semiurban,1.0
|
||||
LP002562,Male,Yes,1,0,0.0,5333,1131.0,186.0,360.0,0,Urban,1.0
|
||||
LP002571,Male,No,0,0,0.0,3691,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002582,Female,No,0,0,1.0,17263,0.0,225.0,360.0,1,Semiurban,1.0
|
||||
LP002585,Male,Yes,0,1,0.0,3597,2157.0,119.0,360.0,0,Rural,0.0
|
||||
LP002586,Female,Yes,1,1,0.0,3326,913.0,105.0,84.0,1,Semiurban,1.0
|
||||
LP002587,Male,Yes,0,0,0.0,2600,1700.0,107.0,360.0,1,Rural,1.0
|
||||
LP002588,Male,Yes,0,1,0.0,4625,2857.0,111.0,12.0,0,Urban,1.0
|
||||
LP002600,Male,Yes,1,1,1.0,2895,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002602,Male,No,0,1,0.0,6283,4416.0,209.0,360.0,0,Rural,0.0
|
||||
LP002603,Female,No,0,1,0.0,645,3683.0,113.0,480.0,1,Rural,1.0
|
||||
LP002606,Female,No,0,1,0.0,3159,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002615,Male,Yes,2,1,0.0,4865,5624.0,208.0,360.0,1,Semiurban,1.0
|
||||
LP002618,Male,Yes,1,0,0.0,4050,5302.0,138.0,360.0,0,Rural,0.0
|
||||
LP002619,Male,Yes,0,0,0.0,3814,1483.0,124.0,300.0,1,Semiurban,1.0
|
||||
LP002622,Male,Yes,2,1,0.0,3510,4416.0,243.0,360.0,1,Rural,1.0
|
||||
LP002624,Male,Yes,0,1,0.0,20833,6667.0,480.0,360.0,0,Urban,1.0
|
||||
LP002625,,No,0,1,0.0,3583,0.0,96.0,360.0,1,Urban,0.0
|
||||
LP002626,Male,Yes,0,1,1.0,2479,3013.0,188.0,360.0,1,Urban,1.0
|
||||
LP002634,Female,No,1,1,0.0,13262,0.0,40.0,360.0,1,Urban,1.0
|
||||
LP002637,Male,No,0,0,0.0,3598,1287.0,100.0,360.0,1,Rural,0.0
|
||||
LP002640,Male,Yes,1,1,0.0,6065,2004.0,250.0,360.0,1,Semiurban,1.0
|
||||
LP002643,Male,Yes,2,1,0.0,3283,2035.0,148.0,360.0,1,Urban,1.0
|
||||
LP002648,Male,Yes,0,1,0.0,2130,6666.0,70.0,180.0,1,Semiurban,0.0
|
||||
LP002652,Male,No,0,1,0.0,5815,3666.0,311.0,360.0,1,Rural,0.0
|
||||
LP002659,Male,Yes,3+,1,0.0,3466,3428.0,150.0,360.0,1,Rural,1.0
|
||||
LP002670,Female,Yes,2,1,0.0,2031,1632.0,113.0,480.0,1,Semiurban,1.0
|
||||
LP002682,Male,Yes,,0,0.0,3074,1800.0,123.0,360.0,0,Semiurban,0.0
|
||||
LP002683,Male,No,0,1,0.0,4683,1915.0,185.0,360.0,1,Semiurban,0.0
|
||||
LP002684,Female,No,0,0,0.0,3400,0.0,95.0,360.0,1,Rural,0.0
|
||||
LP002689,Male,Yes,2,0,0.0,2192,1742.0,45.0,360.0,1,Semiurban,1.0
|
||||
LP002690,Male,No,0,1,0.0,2500,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002692,Male,Yes,3+,1,1.0,5677,1424.0,100.0,360.0,1,Rural,1.0
|
||||
LP002693,Male,Yes,2,1,1.0,7948,7166.0,480.0,360.0,1,Rural,1.0
|
||||
LP002697,Male,No,0,1,0.0,4680,2087.0,0.0,360.0,1,Semiurban,0.0
|
||||
LP002699,Male,Yes,2,1,1.0,17500,0.0,400.0,360.0,1,Rural,1.0
|
||||
LP002705,Male,Yes,0,1,0.0,3775,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002706,Male,Yes,1,0,0.0,5285,1430.0,161.0,360.0,0,Semiurban,1.0
|
||||
LP002714,Male,No,1,0,0.0,2679,1302.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002716,Male,No,0,0,0.0,6783,0.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002717,Male,Yes,0,1,0.0,1025,5500.0,216.0,360.0,0,Rural,1.0
|
||||
LP002720,Male,Yes,3+,1,0.0,4281,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002723,Male,No,2,1,0.0,3588,0.0,110.0,360.0,0,Rural,0.0
|
||||
LP002729,Male,No,1,1,0.0,11250,0.0,196.0,360.0,0,Semiurban,0.0
|
||||
LP002731,Female,No,0,0,1.0,18165,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP002732,Male,No,0,0,0.0,2550,2042.0,126.0,360.0,1,Rural,1.0
|
||||
LP002734,Male,Yes,0,1,0.0,6133,3906.0,324.0,360.0,1,Urban,1.0
|
||||
LP002738,Male,No,2,1,0.0,3617,0.0,107.0,360.0,1,Semiurban,1.0
|
||||
LP002739,Male,Yes,0,0,0.0,2917,536.0,66.0,360.0,1,Rural,0.0
|
||||
LP002740,Male,Yes,3+,1,0.0,6417,0.0,157.0,180.0,1,Rural,1.0
|
||||
LP002741,Female,Yes,1,1,0.0,4608,2845.0,140.0,180.0,1,Semiurban,1.0
|
||||
LP002743,Female,No,0,1,0.0,2138,0.0,99.0,360.0,0,Semiurban,0.0
|
||||
LP002753,Female,No,1,1,0.0,3652,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002755,Male,Yes,1,0,0.0,2239,2524.0,128.0,360.0,1,Urban,1.0
|
||||
LP002757,Female,Yes,0,0,0.0,3017,663.0,102.0,360.0,0,Semiurban,1.0
|
||||
LP002767,Male,Yes,0,1,0.0,2768,1950.0,155.0,360.0,1,Rural,1.0
|
||||
LP002768,Male,No,0,0,0.0,3358,0.0,80.0,36.0,1,Semiurban,0.0
|
||||
LP002772,Male,No,0,1,0.0,2526,1783.0,145.0,360.0,1,Rural,1.0
|
||||
LP002776,Female,No,0,1,0.0,5000,0.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002777,Male,Yes,0,1,0.0,2785,2016.0,110.0,360.0,1,Rural,1.0
|
||||
LP002778,Male,Yes,2,1,1.0,6633,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP002784,Male,Yes,1,0,0.0,2492,2375.0,0.0,360.0,1,Rural,1.0
|
||||
LP002785,Male,Yes,1,1,0.0,3333,3250.0,158.0,360.0,1,Urban,1.0
|
||||
LP002788,Male,Yes,0,0,0.0,2454,2333.0,181.0,360.0,0,Urban,0.0
|
||||
LP002789,Male,Yes,0,1,0.0,3593,4266.0,132.0,180.0,0,Rural,0.0
|
||||
LP002792,Male,Yes,1,1,0.0,5468,1032.0,26.0,360.0,1,Semiurban,1.0
|
||||
LP002794,Female,No,0,1,0.0,2667,1625.0,84.0,360.0,0,Urban,1.0
|
||||
LP002795,Male,Yes,3+,1,1.0,10139,0.0,260.0,360.0,1,Semiurban,1.0
|
||||
LP002798,Male,Yes,0,1,0.0,3887,2669.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002804,Female,Yes,0,1,0.0,4180,2306.0,182.0,360.0,1,Semiurban,1.0
|
||||
LP002807,Male,Yes,2,0,0.0,3675,242.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP002813,Female,Yes,1,1,1.0,19484,0.0,600.0,360.0,1,Semiurban,1.0
|
||||
LP002820,Male,Yes,0,1,0.0,5923,2054.0,211.0,360.0,1,Rural,1.0
|
||||
LP002821,Male,No,0,0,1.0,5800,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002832,Male,Yes,2,1,0.0,8799,0.0,258.0,360.0,0,Urban,0.0
|
||||
LP002833,Male,Yes,0,0,0.0,4467,0.0,120.0,360.0,0,Rural,1.0
|
||||
LP002836,Male,No,0,1,0.0,3333,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP002837,Male,Yes,3+,1,0.0,3400,2500.0,123.0,360.0,0,Rural,0.0
|
||||
LP002840,Female,No,0,1,0.0,2378,0.0,9.0,360.0,1,Urban,0.0
|
||||
LP002841,Male,Yes,0,1,0.0,3166,2064.0,104.0,360.0,0,Urban,0.0
|
||||
LP002842,Male,Yes,1,1,0.0,3417,1750.0,186.0,360.0,1,Urban,1.0
|
||||
LP002847,Male,Yes,,1,0.0,5116,1451.0,165.0,360.0,0,Urban,0.0
|
||||
LP002855,Male,Yes,2,1,0.0,16666,0.0,275.0,360.0,1,Urban,1.0
|
||||
LP002862,Male,Yes,2,0,0.0,6125,1625.0,187.0,480.0,1,Semiurban,0.0
|
||||
LP002863,Male,Yes,3+,1,0.0,6406,0.0,150.0,360.0,1,Semiurban,0.0
|
||||
LP002868,Male,Yes,2,1,0.0,3159,461.0,108.0,84.0,1,Urban,1.0
|
||||
LP002872,,Yes,0,1,0.0,3087,2210.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP002874,Male,No,0,1,0.0,3229,2739.0,110.0,360.0,1,Urban,1.0
|
||||
LP002877,Male,Yes,1,1,0.0,1782,2232.0,107.0,360.0,1,Rural,1.0
|
||||
LP002888,Male,No,0,1,0.0,3182,2917.0,161.0,360.0,1,Urban,1.0
|
||||
LP002892,Male,Yes,2,1,0.0,6540,0.0,205.0,360.0,1,Semiurban,1.0
|
||||
LP002893,Male,No,0,1,0.0,1836,33837.0,90.0,360.0,1,Urban,0.0
|
||||
LP002894,Female,Yes,0,1,0.0,3166,0.0,36.0,360.0,1,Semiurban,1.0
|
||||
LP002898,Male,Yes,1,1,0.0,1880,0.0,61.0,360.0,0,Rural,0.0
|
||||
LP002911,Male,Yes,1,1,0.0,2787,1917.0,146.0,360.0,0,Rural,0.0
|
||||
LP002912,Male,Yes,1,1,0.0,4283,3000.0,172.0,84.0,1,Rural,0.0
|
||||
LP002916,Male,Yes,0,1,0.0,2297,1522.0,104.0,360.0,1,Urban,1.0
|
||||
LP002917,Female,No,0,0,0.0,2165,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP002925,,No,0,1,0.0,4750,0.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002926,Male,Yes,2,1,1.0,2726,0.0,106.0,360.0,0,Semiurban,0.0
|
||||
LP002928,Male,Yes,0,1,0.0,3000,3416.0,56.0,180.0,1,Semiurban,1.0
|
||||
LP002931,Male,Yes,2,1,1.0,6000,0.0,205.0,240.0,1,Semiurban,0.0
|
||||
LP002933,,No,3+,1,1.0,9357,0.0,292.0,360.0,1,Semiurban,1.0
|
||||
LP002936,Male,Yes,0,1,0.0,3859,3300.0,142.0,180.0,1,Rural,1.0
|
||||
LP002938,Male,Yes,0,1,1.0,16120,0.0,260.0,360.0,1,Urban,1.0
|
||||
LP002940,Male,No,0,0,0.0,3833,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002941,Male,Yes,2,0,1.0,6383,1000.0,187.0,360.0,1,Rural,0.0
|
||||
LP002943,Male,No,,1,0.0,2987,0.0,88.0,360.0,0,Semiurban,0.0
|
||||
LP002945,Male,Yes,0,1,1.0,9963,0.0,180.0,360.0,1,Rural,1.0
|
||||
LP002948,Male,Yes,2,1,0.0,5780,0.0,192.0,360.0,1,Urban,1.0
|
||||
LP002949,Female,No,3+,1,0.0,416,41667.0,350.0,180.0,0,Urban,0.0
|
||||
LP002950,Male,Yes,0,0,0.0,2894,2792.0,155.0,360.0,1,Rural,1.0
|
||||
LP002953,Male,Yes,3+,1,0.0,5703,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002958,Male,No,0,1,0.0,3676,4301.0,172.0,360.0,1,Rural,1.0
|
||||
LP002959,Female,Yes,1,1,0.0,12000,0.0,496.0,360.0,1,Semiurban,1.0
|
||||
LP002960,Male,Yes,0,0,0.0,2400,3800.0,0.0,180.0,1,Urban,0.0
|
||||
LP002961,Male,Yes,1,1,0.0,3400,2500.0,173.0,360.0,1,Semiurban,1.0
|
||||
LP002964,Male,Yes,2,0,0.0,3987,1411.0,157.0,360.0,1,Rural,1.0
|
||||
LP002974,Male,Yes,0,1,0.0,3232,1950.0,108.0,360.0,1,Rural,1.0
|
||||
LP002978,Female,No,0,1,0.0,2900,0.0,71.0,360.0,1,Rural,1.0
|
||||
LP002979,Male,Yes,3+,1,0.0,4106,0.0,40.0,180.0,1,Rural,1.0
|
||||
LP002983,Male,Yes,1,1,0.0,8072,240.0,253.0,360.0,1,Urban,1.0
|
||||
LP002984,Male,Yes,2,1,0.0,7583,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP002990,Female,No,0,1,1.0,4583,0.0,133.0,360.0,0,Semiurban,0.0
|
||||
|
BIN
abanin_daniil_lab_4/result.png
Normal file
|
After Width: | Height: | Size: 92 KiB |
38
abanin_daniil_lab_5/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
## Лабораторная работа №5
|
||||
|
||||
### Ранжирование признаков
|
||||
|
||||
## ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, pandas, matplotlib, sklearn
|
||||
* запустить проект (стартовая точка lab4)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`, библиотеки pandas, matplotlib, sklearn
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
Программа решает задачу регрессии, используя полиномиальную регрессию.
|
||||
Цель - предсказать сумму займа (LoanAmount), используя имеющиеся признаки: ApplicantIncome - доход заявителя, Credit_History - статус соответствия кредитной истории стандартам банка,
|
||||
Education - наличие образования, Married - заявитель женат/замужем (Да/Нет), Self_Employed - самозанятый (Да/Нет)
|
||||
|
||||
### Тест
|
||||
Зелёные маркеры на графике - тестовые результаты
|
||||
Красные маркеры на графике - предсказанные результаты
|
||||
|
||||
При небольшом объёме тестовых данных, алгоритм показывает неплохие результаты обучения
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Но при увеличении объёма данных, алгоритм теряет свою эффективность
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Вывод: На малых объёмах данных алгоритм показывает свою эффективность. Но при большем объём стоит использовать другие методы для данного набора информации
|
||||
BIN
abanin_daniil_lab_5/grade_1.png
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
abanin_daniil_lab_5/grade_2.png
Normal file
|
After Width: | Height: | Size: 10 KiB |
33
abanin_daniil_lab_5/lab5.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn import metrics
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.pipeline import Pipeline
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def start():
|
||||
data = pd.read_csv('loan.csv')
|
||||
x = data[['ApplicantIncome', 'Credit_History', 'Education', 'Married', 'Self_Employed']]
|
||||
y = data[['LoanAmount']]
|
||||
|
||||
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)
|
||||
|
||||
poly = Pipeline([('poly', PolynomialFeatures(degree=3)),
|
||||
('linear', LinearRegression())])
|
||||
poly.fit(x_train, y_train)
|
||||
|
||||
y_predicted = poly.predict(x_test)
|
||||
|
||||
print('Оценка обучения:')
|
||||
print(metrics.r2_score(y_test, y_predicted))
|
||||
|
||||
plt.figure(1, figsize=(16, 9))
|
||||
plt.title('Сравнение результатов обучения')
|
||||
plt.scatter(x=[i for i in range(len(x_test))], y=y_test, c='green', s=5)
|
||||
plt.scatter(x=[i for i in range(len(x_test))], y=y_predicted, c='red', s=5)
|
||||
plt.show()
|
||||
|
||||
|
||||
start()
|
||||
615
abanin_daniil_lab_5/loan.csv
Normal file
@@ -0,0 +1,615 @@
|
||||
Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
|
||||
LP001002,Male,0.0,0,1,0.0,5849,0.0,360.0,1.0,0,Y,0.0
|
||||
LP001003,Male,1.0,1,1,0.0,4583,1508.0,128.0,360.0,1,Rural,0.0
|
||||
LP001005,Male,1.0,0,1,1.0,3000,0.0,66.0,360.0,1,Urban,1.0
|
||||
LP001006,Male,1.0,0,0,0.0,2583,2358.0,120.0,360.0,1,Urban,1.0
|
||||
LP001008,Male,0.0,0,1,0.0,6000,0.0,141.0,360.0,1,Urban,1.0
|
||||
LP001011,Male,1.0,2,1,1.0,5417,4196.0,267.0,360.0,1,Urban,1.0
|
||||
LP001013,Male,1.0,0,0,0.0,2333,1516.0,95.0,360.0,1,Urban,1.0
|
||||
LP001014,Male,1.0,3+,1,0.0,3036,2504.0,158.0,360.0,0,Semiurban,0.0
|
||||
LP001018,Male,1.0,2,1,0.0,4006,1526.0,168.0,360.0,1,Urban,1.0
|
||||
LP001020,Male,1.0,1,1,0.0,12841,10968.0,349.0,360.0,1,Semiurban,0.0
|
||||
LP001024,Male,1.0,2,1,0.0,3200,700.0,70.0,360.0,1,Urban,1.0
|
||||
LP001027,Male,1.0,2,1,0.0,2500,1840.0,109.0,360.0,1,Urban,1.0
|
||||
LP001028,Male,1.0,2,1,0.0,3073,8106.0,200.0,360.0,1,Urban,1.0
|
||||
LP001029,Male,0.0,0,1,0.0,1853,2840.0,114.0,360.0,1,Rural,0.0
|
||||
LP001030,Male,1.0,2,1,0.0,1299,1086.0,17.0,120.0,1,Urban,1.0
|
||||
LP001032,Male,0.0,0,1,0.0,4950,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP001034,Male,0.0,1,0,0.0,3596,0.0,100.0,240.0,0,Urban,1.0
|
||||
LP001036,Female,0.0,0,1,0.0,3510,0.0,76.0,360.0,0,Urban,0.0
|
||||
LP001038,Male,1.0,0,0,0.0,4887,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP001041,Male,1.0,0,1,0.0,2600,3500.0,115.0,,1,Urban,1.0
|
||||
LP001043,Male,1.0,0,0,0.0,7660,0.0,104.0,360.0,0,Urban,0.0
|
||||
LP001046,Male,1.0,1,1,0.0,5955,5625.0,315.0,360.0,1,Urban,1.0
|
||||
LP001047,Male,1.0,0,0,0.0,2600,1911.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001050,,1.0,2,0,0.0,3365,1917.0,112.0,360.0,0,Rural,0.0
|
||||
LP001052,Male,1.0,1,1,0.0,3717,2925.0,151.0,360.0,0,Semiurban,0.0
|
||||
LP001066,Male,1.0,0,1,1.0,9560,0.0,191.0,360.0,1,Semiurban,1.0
|
||||
LP001068,Male,1.0,0,1,0.0,2799,2253.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001073,Male,1.0,2,0,0.0,4226,1040.0,110.0,360.0,1,Urban,1.0
|
||||
LP001086,Male,0.0,0,0,0.0,1442,0.0,35.0,360.0,1,Urban,0.0
|
||||
LP001087,Female,0.0,2,1,0.0,3750,2083.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001091,Male,1.0,1,1,0.0,4166,3369.0,201.0,360.0,0,Urban,0.0
|
||||
LP001095,Male,0.0,0,1,0.0,3167,0.0,74.0,360.0,1,Urban,0.0
|
||||
LP001097,Male,0.0,1,1,1.0,4692,0.0,106.0,360.0,1,Rural,0.0
|
||||
LP001098,Male,1.0,0,1,0.0,3500,1667.0,114.0,360.0,1,Semiurban,1.0
|
||||
LP001100,Male,0.0,3+,1,0.0,12500,3000.0,320.0,360.0,1,Rural,0.0
|
||||
LP001106,Male,1.0,0,1,0.0,2275,2067.0,0.0,360.0,1,Urban,1.0
|
||||
LP001109,Male,1.0,0,1,0.0,1828,1330.0,100.0,,0,Urban,0.0
|
||||
LP001112,Female,1.0,0,1,0.0,3667,1459.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001114,Male,0.0,0,1,0.0,4166,7210.0,184.0,360.0,1,Urban,1.0
|
||||
LP001116,Male,0.0,0,0,0.0,3748,1668.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP001119,Male,0.0,0,1,0.0,3600,0.0,80.0,360.0,1,Urban,0.0
|
||||
LP001120,Male,0.0,0,1,0.0,1800,1213.0,47.0,360.0,1,Urban,1.0
|
||||
LP001123,Male,1.0,0,1,0.0,2400,0.0,75.0,360.0,0,Urban,1.0
|
||||
LP001131,Male,1.0,0,1,0.0,3941,2336.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001136,Male,1.0,0,0,1.0,4695,0.0,96.0,,1,Urban,1.0
|
||||
LP001137,Female,0.0,0,1,0.0,3410,0.0,88.0,,1,Urban,1.0
|
||||
LP001138,Male,1.0,1,1,0.0,5649,0.0,44.0,360.0,1,Urban,1.0
|
||||
LP001144,Male,1.0,0,1,0.0,5821,0.0,144.0,360.0,1,Urban,1.0
|
||||
LP001146,Female,1.0,0,1,0.0,2645,3440.0,120.0,360.0,0,Urban,0.0
|
||||
LP001151,Female,0.0,0,1,0.0,4000,2275.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001155,Female,1.0,0,0,0.0,1928,1644.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001157,Female,0.0,0,1,0.0,3086,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001164,Female,0.0,0,1,0.0,4230,0.0,112.0,360.0,1,Semiurban,0.0
|
||||
LP001179,Male,1.0,2,1,0.0,4616,0.0,134.0,360.0,1,Urban,0.0
|
||||
LP001186,Female,1.0,1,1,1.0,11500,0.0,286.0,360.0,0,Urban,0.0
|
||||
LP001194,Male,1.0,2,1,0.0,2708,1167.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001195,Male,1.0,0,1,0.0,2132,1591.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001197,Male,1.0,0,1,0.0,3366,2200.0,135.0,360.0,1,Rural,0.0
|
||||
LP001198,Male,1.0,1,1,0.0,8080,2250.0,180.0,360.0,1,Urban,1.0
|
||||
LP001199,Male,1.0,2,0,0.0,3357,2859.0,144.0,360.0,1,Urban,1.0
|
||||
LP001205,Male,1.0,0,1,0.0,2500,3796.0,120.0,360.0,1,Urban,1.0
|
||||
LP001206,Male,1.0,3+,1,0.0,3029,0.0,99.0,360.0,1,Urban,1.0
|
||||
LP001207,Male,1.0,0,0,1.0,2609,3449.0,165.0,180.0,0,Rural,0.0
|
||||
LP001213,Male,1.0,1,1,0.0,4945,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP001222,Female,0.0,0,1,0.0,4166,0.0,116.0,360.0,0,Semiurban,0.0
|
||||
LP001225,Male,1.0,0,1,0.0,5726,4595.0,258.0,360.0,1,Semiurban,0.0
|
||||
LP001228,Male,0.0,0,0,0.0,3200,2254.0,126.0,180.0,0,Urban,0.0
|
||||
LP001233,Male,1.0,1,1,0.0,10750,0.0,312.0,360.0,1,Urban,1.0
|
||||
LP001238,Male,1.0,3+,0,1.0,7100,0.0,125.0,60.0,1,Urban,1.0
|
||||
LP001241,Female,0.0,0,1,0.0,4300,0.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP001243,Male,1.0,0,1,0.0,3208,3066.0,172.0,360.0,1,Urban,1.0
|
||||
LP001245,Male,1.0,2,0,1.0,1875,1875.0,97.0,360.0,1,Semiurban,1.0
|
||||
LP001248,Male,0.0,0,1,0.0,3500,0.0,81.0,300.0,1,Semiurban,1.0
|
||||
LP001250,Male,1.0,3+,0,0.0,4755,0.0,95.0,,0,Semiurban,0.0
|
||||
LP001253,Male,1.0,3+,1,1.0,5266,1774.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001255,Male,0.0,0,1,0.0,3750,0.0,113.0,480.0,1,Urban,0.0
|
||||
LP001256,Male,0.0,0,1,0.0,3750,4750.0,176.0,360.0,1,Urban,0.0
|
||||
LP001259,Male,1.0,1,1,1.0,1000,3022.0,110.0,360.0,1,Urban,0.0
|
||||
LP001263,Male,1.0,3+,1,0.0,3167,4000.0,180.0,300.0,0,Semiurban,0.0
|
||||
LP001264,Male,1.0,3+,0,1.0,3333,2166.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP001265,Female,0.0,0,1,0.0,3846,0.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP001266,Male,1.0,1,1,1.0,2395,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001267,Female,1.0,2,1,0.0,1378,1881.0,167.0,360.0,1,Urban,0.0
|
||||
LP001273,Male,1.0,0,1,0.0,6000,2250.0,265.0,360.0,0,Semiurban,0.0
|
||||
LP001275,Male,1.0,1,1,0.0,3988,0.0,50.0,240.0,1,Urban,1.0
|
||||
LP001279,Male,0.0,0,1,0.0,2366,2531.0,136.0,360.0,1,Semiurban,1.0
|
||||
LP001280,Male,1.0,2,0,0.0,3333,2000.0,99.0,360.0,0,Semiurban,1.0
|
||||
LP001282,Male,1.0,0,1,0.0,2500,2118.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001289,Male,0.0,0,1,0.0,8566,0.0,210.0,360.0,1,Urban,1.0
|
||||
LP001310,Male,1.0,0,1,0.0,5695,4167.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP001316,Male,1.0,0,1,0.0,2958,2900.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001318,Male,1.0,2,1,0.0,6250,5654.0,188.0,180.0,1,Semiurban,1.0
|
||||
LP001319,Male,1.0,2,0,0.0,3273,1820.0,81.0,360.0,1,Urban,1.0
|
||||
LP001322,Male,0.0,0,1,0.0,4133,0.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001325,Male,0.0,0,0,0.0,3620,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001326,Male,0.0,0,1,0.0,6782,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP001327,Female,1.0,0,1,0.0,2484,2302.0,137.0,360.0,1,Semiurban,1.0
|
||||
LP001333,Male,1.0,0,1,0.0,1977,997.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP001334,Male,1.0,0,0,0.0,4188,0.0,115.0,180.0,1,Semiurban,1.0
|
||||
LP001343,Male,1.0,0,1,0.0,1759,3541.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001345,Male,1.0,2,0,0.0,4288,3263.0,133.0,180.0,1,Urban,1.0
|
||||
LP001349,Male,0.0,0,1,0.0,4843,3806.0,151.0,360.0,1,Semiurban,1.0
|
||||
LP001350,Male,1.0,,1,0.0,13650,0.0,0.0,360.0,1,Urban,1.0
|
||||
LP001356,Male,1.0,0,1,0.0,4652,3583.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001357,Male,0.0,,1,0.0,3816,754.0,160.0,360.0,1,Urban,1.0
|
||||
LP001367,Male,1.0,1,1,0.0,3052,1030.0,100.0,360.0,1,Urban,1.0
|
||||
LP001369,Male,1.0,2,1,0.0,11417,1126.0,225.0,360.0,1,Urban,1.0
|
||||
LP001370,Male,0.0,0,0,0.0,7333,0.0,120.0,360.0,1,Rural,0.0
|
||||
LP001379,Male,1.0,2,1,0.0,3800,3600.0,216.0,360.0,0,Urban,0.0
|
||||
LP001384,Male,1.0,3+,0,0.0,2071,754.0,94.0,480.0,1,Semiurban,1.0
|
||||
LP001385,Male,0.0,0,1,0.0,5316,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001387,Female,1.0,0,1,0.0,2929,2333.0,139.0,360.0,1,Semiurban,1.0
|
||||
LP001391,Male,1.0,0,0,0.0,3572,4114.0,152.0,,0,Rural,0.0
|
||||
LP001392,Female,0.0,1,1,1.0,7451,0.0,0.0,360.0,1,Semiurban,1.0
|
||||
LP001398,Male,0.0,0,1,0.0,5050,0.0,118.0,360.0,1,Semiurban,1.0
|
||||
LP001401,Male,1.0,1,1,0.0,14583,0.0,185.0,180.0,1,Rural,1.0
|
||||
LP001404,Female,1.0,0,1,0.0,3167,2283.0,154.0,360.0,1,Semiurban,1.0
|
||||
LP001405,Male,1.0,1,1,0.0,2214,1398.0,85.0,360.0,0,Urban,1.0
|
||||
LP001421,Male,1.0,0,1,0.0,5568,2142.0,175.0,360.0,1,Rural,0.0
|
||||
LP001422,Female,0.0,0,1,0.0,10408,0.0,259.0,360.0,1,Urban,1.0
|
||||
LP001426,Male,1.0,,1,0.0,5667,2667.0,180.0,360.0,1,Rural,1.0
|
||||
LP001430,Female,0.0,0,1,0.0,4166,0.0,44.0,360.0,1,Semiurban,1.0
|
||||
LP001431,Female,0.0,0,1,0.0,2137,8980.0,137.0,360.0,0,Semiurban,1.0
|
||||
LP001432,Male,1.0,2,1,0.0,2957,0.0,81.0,360.0,1,Semiurban,1.0
|
||||
LP001439,Male,1.0,0,0,0.0,4300,2014.0,194.0,360.0,1,Rural,1.0
|
||||
LP001443,Female,0.0,0,1,0.0,3692,0.0,93.0,360.0,0,Rural,1.0
|
||||
LP001448,,1.0,3+,1,0.0,23803,0.0,370.0,360.0,1,Rural,1.0
|
||||
LP001449,Male,0.0,0,1,0.0,3865,1640.0,0.0,360.0,1,Rural,1.0
|
||||
LP001451,Male,1.0,1,1,1.0,10513,3850.0,160.0,180.0,0,Urban,0.0
|
||||
LP001465,Male,1.0,0,1,0.0,6080,2569.0,182.0,360.0,0,Rural,0.0
|
||||
LP001469,Male,0.0,0,1,1.0,20166,0.0,650.0,480.0,0,Urban,1.0
|
||||
LP001473,Male,0.0,0,1,0.0,2014,1929.0,74.0,360.0,1,Urban,1.0
|
||||
LP001478,Male,0.0,0,1,0.0,2718,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP001482,Male,1.0,0,1,1.0,3459,0.0,25.0,120.0,1,Semiurban,1.0
|
||||
LP001487,Male,0.0,0,1,0.0,4895,0.0,102.0,360.0,1,Semiurban,1.0
|
||||
LP001488,Male,1.0,3+,1,0.0,4000,7750.0,290.0,360.0,1,Semiurban,0.0
|
||||
LP001489,Female,1.0,0,1,0.0,4583,0.0,84.0,360.0,1,Rural,0.0
|
||||
LP001491,Male,1.0,2,1,1.0,3316,3500.0,88.0,360.0,1,Urban,1.0
|
||||
LP001492,Male,0.0,0,1,0.0,14999,0.0,242.0,360.0,0,Semiurban,0.0
|
||||
LP001493,Male,1.0,2,0,0.0,4200,1430.0,129.0,360.0,1,Rural,0.0
|
||||
LP001497,Male,1.0,2,1,0.0,5042,2083.0,185.0,360.0,1,Rural,0.0
|
||||
LP001498,Male,0.0,0,1,0.0,5417,0.0,168.0,360.0,1,Urban,1.0
|
||||
LP001504,Male,0.0,0,1,1.0,6950,0.0,175.0,180.0,1,Semiurban,1.0
|
||||
LP001507,Male,1.0,0,1,0.0,2698,2034.0,122.0,360.0,1,Semiurban,1.0
|
||||
LP001508,Male,1.0,2,1,0.0,11757,0.0,187.0,180.0,1,Urban,1.0
|
||||
LP001514,Female,1.0,0,1,0.0,2330,4486.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001516,Female,1.0,2,1,0.0,14866,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP001518,Male,1.0,1,1,0.0,1538,1425.0,30.0,360.0,1,Urban,1.0
|
||||
LP001519,Female,0.0,0,1,0.0,10000,1666.0,225.0,360.0,1,Rural,0.0
|
||||
LP001520,Male,1.0,0,1,0.0,4860,830.0,125.0,360.0,1,Semiurban,1.0
|
||||
LP001528,Male,0.0,0,1,0.0,6277,0.0,118.0,360.0,0,Rural,0.0
|
||||
LP001529,Male,1.0,0,1,1.0,2577,3750.0,152.0,360.0,1,Rural,1.0
|
||||
LP001531,Male,0.0,0,1,0.0,9166,0.0,244.0,360.0,1,Urban,0.0
|
||||
LP001532,Male,1.0,2,0,0.0,2281,0.0,113.0,360.0,1,Rural,0.0
|
||||
LP001535,Male,0.0,0,1,0.0,3254,0.0,50.0,360.0,1,Urban,1.0
|
||||
LP001536,Male,1.0,3+,1,0.0,39999,0.0,600.0,180.0,0,Semiurban,1.0
|
||||
LP001541,Male,1.0,1,1,0.0,6000,0.0,160.0,360.0,0,Rural,1.0
|
||||
LP001543,Male,1.0,1,1,0.0,9538,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP001546,Male,0.0,0,1,0.0,2980,2083.0,120.0,360.0,1,Rural,1.0
|
||||
LP001552,Male,1.0,0,1,0.0,4583,5625.0,255.0,360.0,1,Semiurban,1.0
|
||||
LP001560,Male,1.0,0,0,0.0,1863,1041.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP001562,Male,1.0,0,1,0.0,7933,0.0,275.0,360.0,1,Urban,0.0
|
||||
LP001565,Male,1.0,1,1,0.0,3089,1280.0,121.0,360.0,0,Semiurban,0.0
|
||||
LP001570,Male,1.0,2,1,0.0,4167,1447.0,158.0,360.0,1,Rural,1.0
|
||||
LP001572,Male,1.0,0,1,0.0,9323,0.0,75.0,180.0,1,Urban,1.0
|
||||
LP001574,Male,1.0,0,1,0.0,3707,3166.0,182.0,,1,Rural,1.0
|
||||
LP001577,Female,1.0,0,1,0.0,4583,0.0,112.0,360.0,1,Rural,0.0
|
||||
LP001578,Male,1.0,0,1,0.0,2439,3333.0,129.0,360.0,1,Rural,1.0
|
||||
LP001579,Male,0.0,0,1,0.0,2237,0.0,63.0,480.0,0,Semiurban,0.0
|
||||
LP001580,Male,1.0,2,1,0.0,8000,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001581,Male,1.0,0,0,0.0,1820,1769.0,95.0,360.0,1,Rural,1.0
|
||||
LP001585,,1.0,3+,1,0.0,51763,0.0,700.0,300.0,1,Urban,1.0
|
||||
LP001586,Male,1.0,3+,0,0.0,3522,0.0,81.0,180.0,1,Rural,0.0
|
||||
LP001594,Male,1.0,0,1,0.0,5708,5625.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP001603,Male,1.0,0,0,1.0,4344,736.0,87.0,360.0,1,Semiurban,0.0
|
||||
LP001606,Male,1.0,0,1,0.0,3497,1964.0,116.0,360.0,1,Rural,1.0
|
||||
LP001608,Male,1.0,2,1,0.0,2045,1619.0,101.0,360.0,1,Rural,1.0
|
||||
LP001610,Male,1.0,3+,1,0.0,5516,11300.0,495.0,360.0,0,Semiurban,0.0
|
||||
LP001616,Male,1.0,1,1,0.0,3750,0.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001630,Male,0.0,0,0,0.0,2333,1451.0,102.0,480.0,0,Urban,0.0
|
||||
LP001633,Male,1.0,1,1,0.0,6400,7250.0,180.0,360.0,0,Urban,0.0
|
||||
LP001634,Male,0.0,0,1,0.0,1916,5063.0,67.0,360.0,0,Rural,0.0
|
||||
LP001636,Male,1.0,0,1,0.0,4600,0.0,73.0,180.0,1,Semiurban,1.0
|
||||
LP001637,Male,1.0,1,1,0.0,33846,0.0,260.0,360.0,1,Semiurban,0.0
|
||||
LP001639,Female,1.0,0,1,0.0,3625,0.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP001640,Male,1.0,0,1,1.0,39147,4750.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001641,Male,1.0,1,1,1.0,2178,0.0,66.0,300.0,0,Rural,0.0
|
||||
LP001643,Male,1.0,0,1,0.0,2383,2138.0,58.0,360.0,0,Rural,1.0
|
||||
LP001644,,1.0,0,1,1.0,674,5296.0,168.0,360.0,1,Rural,1.0
|
||||
LP001647,Male,1.0,0,1,0.0,9328,0.0,188.0,180.0,1,Rural,1.0
|
||||
LP001653,Male,0.0,0,0,0.0,4885,0.0,48.0,360.0,1,Rural,1.0
|
||||
LP001656,Male,0.0,0,1,0.0,12000,0.0,164.0,360.0,1,Semiurban,0.0
|
||||
LP001657,Male,1.0,0,0,0.0,6033,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP001658,Male,0.0,0,1,0.0,3858,0.0,76.0,360.0,1,Semiurban,1.0
|
||||
LP001664,Male,0.0,0,1,0.0,4191,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001665,Male,1.0,1,1,0.0,3125,2583.0,170.0,360.0,1,Semiurban,0.0
|
||||
LP001666,Male,0.0,0,1,0.0,8333,3750.0,187.0,360.0,1,Rural,1.0
|
||||
LP001669,Female,0.0,0,0,0.0,1907,2365.0,120.0,,1,Urban,1.0
|
||||
LP001671,Female,1.0,0,1,0.0,3416,2816.0,113.0,360.0,0,Semiurban,1.0
|
||||
LP001673,Male,0.0,0,1,1.0,11000,0.0,83.0,360.0,1,Urban,0.0
|
||||
LP001674,Male,1.0,1,0,0.0,2600,2500.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001677,Male,0.0,2,1,0.0,4923,0.0,166.0,360.0,0,Semiurban,1.0
|
||||
LP001682,Male,1.0,3+,0,0.0,3992,0.0,0.0,180.0,1,Urban,0.0
|
||||
LP001688,Male,1.0,1,0,0.0,3500,1083.0,135.0,360.0,1,Urban,1.0
|
||||
LP001691,Male,1.0,2,0,0.0,3917,0.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP001692,Female,0.0,0,0,0.0,4408,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP001693,Female,0.0,0,1,0.0,3244,0.0,80.0,360.0,1,Urban,1.0
|
||||
LP001698,Male,0.0,0,0,0.0,3975,2531.0,55.0,360.0,1,Rural,1.0
|
||||
LP001699,Male,0.0,0,1,0.0,2479,0.0,59.0,360.0,1,Urban,1.0
|
||||
LP001702,Male,0.0,0,1,0.0,3418,0.0,127.0,360.0,1,Semiurban,0.0
|
||||
LP001708,Female,0.0,0,1,0.0,10000,0.0,214.0,360.0,1,Semiurban,0.0
|
||||
LP001711,Male,1.0,3+,1,0.0,3430,1250.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001713,Male,1.0,1,1,1.0,7787,0.0,240.0,360.0,1,Urban,1.0
|
||||
LP001715,Male,1.0,3+,0,1.0,5703,0.0,130.0,360.0,1,Rural,1.0
|
||||
LP001716,Male,1.0,0,1,0.0,3173,3021.0,137.0,360.0,1,Urban,1.0
|
||||
LP001720,Male,1.0,3+,0,0.0,3850,983.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP001722,Male,1.0,0,1,0.0,150,1800.0,135.0,360.0,1,Rural,0.0
|
||||
LP001726,Male,1.0,0,1,0.0,3727,1775.0,131.0,360.0,1,Semiurban,1.0
|
||||
LP001732,Male,1.0,2,1,0.0,5000,0.0,72.0,360.0,0,Semiurban,0.0
|
||||
LP001734,Female,1.0,2,1,0.0,4283,2383.0,127.0,360.0,0,Semiurban,1.0
|
||||
LP001736,Male,1.0,0,1,0.0,2221,0.0,60.0,360.0,0,Urban,0.0
|
||||
LP001743,Male,1.0,2,1,0.0,4009,1717.0,116.0,360.0,1,Semiurban,1.0
|
||||
LP001744,Male,0.0,0,1,0.0,2971,2791.0,144.0,360.0,1,Semiurban,1.0
|
||||
LP001749,Male,1.0,0,1,0.0,7578,1010.0,175.0,,1,Semiurban,1.0
|
||||
LP001750,Male,1.0,0,1,0.0,6250,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001751,Male,1.0,0,1,0.0,3250,0.0,170.0,360.0,1,Rural,0.0
|
||||
LP001754,Male,1.0,,0,1.0,4735,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001758,Male,1.0,2,1,0.0,6250,1695.0,210.0,360.0,1,Semiurban,1.0
|
||||
LP001760,Male,0.0,,1,0.0,4758,0.0,158.0,480.0,1,Semiurban,1.0
|
||||
LP001761,Male,0.0,0,1,1.0,6400,0.0,200.0,360.0,1,Rural,1.0
|
||||
LP001765,Male,1.0,1,1,0.0,2491,2054.0,104.0,360.0,1,Semiurban,1.0
|
||||
LP001768,Male,1.0,0,1,0.0,3716,0.0,42.0,180.0,1,Rural,1.0
|
||||
LP001770,Male,0.0,0,0,0.0,3189,2598.0,120.0,,1,Rural,1.0
|
||||
LP001776,Female,0.0,0,1,0.0,8333,0.0,280.0,360.0,1,Semiurban,1.0
|
||||
LP001778,Male,1.0,1,1,0.0,3155,1779.0,140.0,360.0,1,Semiurban,1.0
|
||||
LP001784,Male,1.0,1,1,0.0,5500,1260.0,170.0,360.0,1,Rural,1.0
|
||||
LP001786,Male,1.0,0,1,0.0,5746,0.0,255.0,360.0,0,Urban,0.0
|
||||
LP001788,Female,0.0,0,1,1.0,3463,0.0,122.0,360.0,0,Urban,1.0
|
||||
LP001790,Female,0.0,1,1,0.0,3812,0.0,112.0,360.0,1,Rural,1.0
|
||||
LP001792,Male,1.0,1,1,0.0,3315,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP001798,Male,1.0,2,1,0.0,5819,5000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001800,Male,1.0,1,0,0.0,2510,1983.0,140.0,180.0,1,Urban,0.0
|
||||
LP001806,Male,0.0,0,1,0.0,2965,5701.0,155.0,60.0,1,Urban,1.0
|
||||
LP001807,Male,1.0,2,1,1.0,6250,1300.0,108.0,360.0,1,Rural,1.0
|
||||
LP001811,Male,1.0,0,0,0.0,3406,4417.0,123.0,360.0,1,Semiurban,1.0
|
||||
LP001813,Male,0.0,0,1,1.0,6050,4333.0,120.0,180.0,1,Urban,0.0
|
||||
LP001814,Male,1.0,2,1,0.0,9703,0.0,112.0,360.0,1,Urban,1.0
|
||||
LP001819,Male,1.0,1,0,0.0,6608,0.0,137.0,180.0,1,Urban,1.0
|
||||
LP001824,Male,1.0,1,1,0.0,2882,1843.0,123.0,480.0,1,Semiurban,1.0
|
||||
LP001825,Male,1.0,0,1,0.0,1809,1868.0,90.0,360.0,1,Urban,1.0
|
||||
LP001835,Male,1.0,0,0,0.0,1668,3890.0,201.0,360.0,0,Semiurban,0.0
|
||||
LP001836,Female,0.0,2,1,0.0,3427,0.0,138.0,360.0,1,Urban,0.0
|
||||
LP001841,Male,0.0,0,0,1.0,2583,2167.0,104.0,360.0,1,Rural,1.0
|
||||
LP001843,Male,1.0,1,0,0.0,2661,7101.0,279.0,180.0,1,Semiurban,1.0
|
||||
LP001844,Male,0.0,0,1,1.0,16250,0.0,192.0,360.0,0,Urban,0.0
|
||||
LP001846,Female,0.0,3+,1,0.0,3083,0.0,255.0,360.0,1,Rural,1.0
|
||||
LP001849,Male,0.0,0,0,0.0,6045,0.0,115.0,360.0,0,Rural,0.0
|
||||
LP001854,Male,1.0,3+,1,0.0,5250,0.0,94.0,360.0,1,Urban,0.0
|
||||
LP001859,Male,1.0,0,1,0.0,14683,2100.0,304.0,360.0,1,Rural,0.0
|
||||
LP001864,Male,1.0,3+,0,0.0,4931,0.0,128.0,360.0,0,Semiurban,0.0
|
||||
LP001865,Male,1.0,1,1,0.0,6083,4250.0,330.0,360.0,0,Urban,1.0
|
||||
LP001868,Male,0.0,0,1,0.0,2060,2209.0,134.0,360.0,1,Semiurban,1.0
|
||||
LP001870,Female,0.0,1,1,0.0,3481,0.0,155.0,36.0,1,Semiurban,0.0
|
||||
LP001871,Female,0.0,0,1,0.0,7200,0.0,120.0,360.0,1,Rural,1.0
|
||||
LP001872,Male,0.0,0,1,1.0,5166,0.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP001875,Male,0.0,0,1,0.0,4095,3447.0,151.0,360.0,1,Rural,1.0
|
||||
LP001877,Male,1.0,2,1,0.0,4708,1387.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001882,Male,1.0,3+,1,0.0,4333,1811.0,160.0,360.0,0,Urban,1.0
|
||||
LP001883,Female,0.0,0,1,0.0,3418,0.0,135.0,360.0,1,Rural,0.0
|
||||
LP001884,Female,0.0,1,1,0.0,2876,1560.0,90.0,360.0,1,Urban,1.0
|
||||
LP001888,Female,0.0,0,1,0.0,3237,0.0,30.0,360.0,1,Urban,1.0
|
||||
LP001891,Male,1.0,0,1,0.0,11146,0.0,136.0,360.0,1,Urban,1.0
|
||||
LP001892,Male,0.0,0,1,0.0,2833,1857.0,126.0,360.0,1,Rural,1.0
|
||||
LP001894,Male,1.0,0,1,0.0,2620,2223.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP001896,Male,1.0,2,1,0.0,3900,0.0,90.0,360.0,1,Semiurban,1.0
|
||||
LP001900,Male,1.0,1,1,0.0,2750,1842.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001903,Male,1.0,0,1,0.0,3993,3274.0,207.0,360.0,1,Semiurban,1.0
|
||||
LP001904,Male,1.0,0,1,0.0,3103,1300.0,80.0,360.0,1,Urban,1.0
|
||||
LP001907,Male,1.0,0,1,0.0,14583,0.0,436.0,360.0,1,Semiurban,1.0
|
||||
LP001908,Female,1.0,0,0,0.0,4100,0.0,124.0,360.0,0,Rural,1.0
|
||||
LP001910,Male,0.0,1,0,1.0,4053,2426.0,158.0,360.0,0,Urban,0.0
|
||||
LP001914,Male,1.0,0,1,0.0,3927,800.0,112.0,360.0,1,Semiurban,1.0
|
||||
LP001915,Male,1.0,2,1,0.0,2301,985.7999878,78.0,180.0,1,Urban,1.0
|
||||
LP001917,Female,0.0,0,1,0.0,1811,1666.0,54.0,360.0,1,Urban,1.0
|
||||
LP001922,Male,1.0,0,1,0.0,20667,0.0,0.0,360.0,1,Rural,0.0
|
||||
LP001924,Male,0.0,0,1,0.0,3158,3053.0,89.0,360.0,1,Rural,1.0
|
||||
LP001925,Female,0.0,0,1,1.0,2600,1717.0,99.0,300.0,1,Semiurban,0.0
|
||||
LP001926,Male,1.0,0,1,0.0,3704,2000.0,120.0,360.0,1,Rural,1.0
|
||||
LP001931,Female,0.0,0,1,0.0,4124,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP001935,Male,0.0,0,1,0.0,9508,0.0,187.0,360.0,1,Rural,1.0
|
||||
LP001936,Male,1.0,0,1,0.0,3075,2416.0,139.0,360.0,1,Rural,1.0
|
||||
LP001938,Male,1.0,2,1,0.0,4400,0.0,127.0,360.0,0,Semiurban,0.0
|
||||
LP001940,Male,1.0,2,1,0.0,3153,1560.0,134.0,360.0,1,Urban,1.0
|
||||
LP001945,Female,0.0,,1,0.0,5417,0.0,143.0,480.0,0,Urban,0.0
|
||||
LP001947,Male,1.0,0,1,0.0,2383,3334.0,172.0,360.0,1,Semiurban,1.0
|
||||
LP001949,Male,1.0,3+,1,0.0,4416,1250.0,110.0,360.0,1,Urban,1.0
|
||||
LP001953,Male,1.0,1,1,0.0,6875,0.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP001954,Female,1.0,1,1,0.0,4666,0.0,135.0,360.0,1,Urban,1.0
|
||||
LP001955,Female,0.0,0,1,0.0,5000,2541.0,151.0,480.0,1,Rural,0.0
|
||||
LP001963,Male,1.0,1,1,0.0,2014,2925.0,113.0,360.0,1,Urban,0.0
|
||||
LP001964,Male,1.0,0,0,0.0,1800,2934.0,93.0,360.0,0,Urban,0.0
|
||||
LP001972,Male,1.0,,0,0.0,2875,1750.0,105.0,360.0,1,Semiurban,1.0
|
||||
LP001974,Female,0.0,0,1,0.0,5000,0.0,132.0,360.0,1,Rural,1.0
|
||||
LP001977,Male,1.0,1,1,0.0,1625,1803.0,96.0,360.0,1,Urban,1.0
|
||||
LP001978,Male,0.0,0,1,0.0,4000,2500.0,140.0,360.0,1,Rural,1.0
|
||||
LP001990,Male,0.0,0,0,0.0,2000,0.0,0.0,360.0,1,Urban,0.0
|
||||
LP001993,Female,0.0,0,1,0.0,3762,1666.0,135.0,360.0,1,Rural,1.0
|
||||
LP001994,Female,0.0,0,1,0.0,2400,1863.0,104.0,360.0,0,Urban,0.0
|
||||
LP001996,Male,0.0,0,1,0.0,20233,0.0,480.0,360.0,1,Rural,0.0
|
||||
LP001998,Male,1.0,2,0,0.0,7667,0.0,185.0,360.0,0,Rural,1.0
|
||||
LP002002,Female,0.0,0,1,0.0,2917,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002004,Male,0.0,0,0,0.0,2927,2405.0,111.0,360.0,1,Semiurban,1.0
|
||||
LP002006,Female,0.0,0,1,0.0,2507,0.0,56.0,360.0,1,Rural,1.0
|
||||
LP002008,Male,1.0,2,1,1.0,5746,0.0,144.0,84.0,0,Rural,1.0
|
||||
LP002024,,1.0,0,1,0.0,2473,1843.0,159.0,360.0,1,Rural,0.0
|
||||
LP002031,Male,1.0,1,0,0.0,3399,1640.0,111.0,180.0,1,Urban,1.0
|
||||
LP002035,Male,1.0,2,1,0.0,3717,0.0,120.0,360.0,1,Semiurban,1.0
|
||||
LP002036,Male,1.0,0,1,0.0,2058,2134.0,88.0,360.0,0,Urban,1.0
|
||||
LP002043,Female,0.0,1,1,0.0,3541,0.0,112.0,360.0,0,Semiurban,1.0
|
||||
LP002050,Male,1.0,1,1,1.0,10000,0.0,155.0,360.0,1,Rural,0.0
|
||||
LP002051,Male,1.0,0,1,0.0,2400,2167.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002053,Male,1.0,3+,1,0.0,4342,189.0,124.0,360.0,1,Semiurban,1.0
|
||||
LP002054,Male,1.0,2,0,0.0,3601,1590.0,0.0,360.0,1,Rural,1.0
|
||||
LP002055,Female,0.0,0,1,0.0,3166,2985.0,132.0,360.0,0,Rural,1.0
|
||||
LP002065,Male,1.0,3+,1,0.0,15000,0.0,300.0,360.0,1,Rural,1.0
|
||||
LP002067,Male,1.0,1,1,1.0,8666,4983.0,376.0,360.0,0,Rural,0.0
|
||||
LP002068,Male,0.0,0,1,0.0,4917,0.0,130.0,360.0,0,Rural,1.0
|
||||
LP002082,Male,1.0,0,1,1.0,5818,2160.0,184.0,360.0,1,Semiurban,1.0
|
||||
LP002086,Female,1.0,0,1,0.0,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002087,Female,0.0,0,1,0.0,2500,0.0,67.0,360.0,1,Urban,1.0
|
||||
LP002097,Male,0.0,1,1,0.0,4384,1793.0,117.0,360.0,1,Urban,1.0
|
||||
LP002098,Male,0.0,0,1,0.0,2935,0.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002100,Male,0.0,,1,0.0,2833,0.0,71.0,360.0,1,Urban,1.0
|
||||
LP002101,Male,1.0,0,1,0.0,63337,0.0,490.0,180.0,1,Urban,1.0
|
||||
LP002103,,1.0,1,1,1.0,9833,1833.0,182.0,180.0,1,Urban,1.0
|
||||
LP002106,Male,1.0,,1,1.0,5503,4490.0,70.0,,1,Semiurban,1.0
|
||||
LP002110,Male,1.0,1,1,0.0,5250,688.0,160.0,360.0,1,Rural,1.0
|
||||
LP002112,Male,1.0,2,1,1.0,2500,4600.0,176.0,360.0,1,Rural,1.0
|
||||
LP002113,Female,0.0,3+,0,0.0,1830,0.0,0.0,360.0,0,Urban,0.0
|
||||
LP002114,Female,0.0,0,1,0.0,4160,0.0,71.0,360.0,1,Semiurban,1.0
|
||||
LP002115,Male,1.0,3+,0,0.0,2647,1587.0,173.0,360.0,1,Rural,0.0
|
||||
LP002116,Female,0.0,0,1,0.0,2378,0.0,46.0,360.0,1,Rural,0.0
|
||||
LP002119,Male,1.0,1,0,0.0,4554,1229.0,158.0,360.0,1,Urban,1.0
|
||||
LP002126,Male,1.0,3+,0,0.0,3173,0.0,74.0,360.0,1,Semiurban,1.0
|
||||
LP002128,Male,1.0,2,1,0.0,2583,2330.0,125.0,360.0,1,Rural,1.0
|
||||
LP002129,Male,1.0,0,1,0.0,2499,2458.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002130,Male,1.0,,0,0.0,3523,3230.0,152.0,360.0,0,Rural,0.0
|
||||
LP002131,Male,1.0,2,0,0.0,3083,2168.0,126.0,360.0,1,Urban,1.0
|
||||
LP002137,Male,1.0,0,1,0.0,6333,4583.0,259.0,360.0,0,Semiurban,1.0
|
||||
LP002138,Male,1.0,0,1,0.0,2625,6250.0,187.0,360.0,1,Rural,1.0
|
||||
LP002139,Male,1.0,0,1,0.0,9083,0.0,228.0,360.0,1,Semiurban,1.0
|
||||
LP002140,Male,0.0,0,1,0.0,8750,4167.0,308.0,360.0,1,Rural,0.0
|
||||
LP002141,Male,1.0,3+,1,0.0,2666,2083.0,95.0,360.0,1,Rural,1.0
|
||||
LP002142,Female,1.0,0,1,1.0,5500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002143,Female,1.0,0,1,0.0,2423,505.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002144,Female,0.0,,1,0.0,3813,0.0,116.0,180.0,1,Urban,1.0
|
||||
LP002149,Male,1.0,2,1,0.0,8333,3167.0,165.0,360.0,1,Rural,1.0
|
||||
LP002151,Male,1.0,1,1,0.0,3875,0.0,67.0,360.0,1,Urban,0.0
|
||||
LP002158,Male,1.0,0,0,0.0,3000,1666.0,100.0,480.0,0,Urban,0.0
|
||||
LP002160,Male,1.0,3+,1,0.0,5167,3167.0,200.0,360.0,1,Semiurban,1.0
|
||||
LP002161,Female,0.0,1,1,0.0,4723,0.0,81.0,360.0,1,Semiurban,0.0
|
||||
LP002170,Male,1.0,2,1,0.0,5000,3667.0,236.0,360.0,1,Semiurban,1.0
|
||||
LP002175,Male,1.0,0,1,0.0,4750,2333.0,130.0,360.0,1,Urban,1.0
|
||||
LP002178,Male,1.0,0,1,0.0,3013,3033.0,95.0,300.0,0,Urban,1.0
|
||||
LP002180,Male,0.0,0,1,1.0,6822,0.0,141.0,360.0,1,Rural,1.0
|
||||
LP002181,Male,0.0,0,0,0.0,6216,0.0,133.0,360.0,1,Rural,0.0
|
||||
LP002187,Male,0.0,0,1,0.0,2500,0.0,96.0,480.0,1,Semiurban,0.0
|
||||
LP002188,Male,0.0,0,1,0.0,5124,0.0,124.0,,0,Rural,0.0
|
||||
LP002190,Male,1.0,1,1,0.0,6325,0.0,175.0,360.0,1,Semiurban,1.0
|
||||
LP002191,Male,1.0,0,1,0.0,19730,5266.0,570.0,360.0,1,Rural,0.0
|
||||
LP002194,Female,0.0,0,1,1.0,15759,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002197,Male,1.0,2,1,0.0,5185,0.0,155.0,360.0,1,Semiurban,1.0
|
||||
LP002201,Male,1.0,2,1,1.0,9323,7873.0,380.0,300.0,1,Rural,1.0
|
||||
LP002205,Male,0.0,1,1,0.0,3062,1987.0,111.0,180.0,0,Urban,0.0
|
||||
LP002209,Female,0.0,0,1,0.0,2764,1459.0,110.0,360.0,1,Urban,1.0
|
||||
LP002211,Male,1.0,0,1,0.0,4817,923.0,120.0,180.0,1,Urban,1.0
|
||||
LP002219,Male,1.0,3+,1,0.0,8750,4996.0,130.0,360.0,1,Rural,1.0
|
||||
LP002223,Male,1.0,0,1,0.0,4310,0.0,130.0,360.0,0,Semiurban,1.0
|
||||
LP002224,Male,0.0,0,1,0.0,3069,0.0,71.0,480.0,1,Urban,0.0
|
||||
LP002225,Male,1.0,2,1,0.0,5391,0.0,130.0,360.0,1,Urban,1.0
|
||||
LP002226,Male,1.0,0,1,0.0,3333,2500.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002229,Male,0.0,0,1,0.0,5941,4232.0,296.0,360.0,1,Semiurban,1.0
|
||||
LP002231,Female,0.0,0,1,0.0,6000,0.0,156.0,360.0,1,Urban,1.0
|
||||
LP002234,Male,0.0,0,1,1.0,7167,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002236,Male,1.0,2,1,0.0,4566,0.0,100.0,360.0,1,Urban,0.0
|
||||
LP002237,Male,0.0,1,1,0.0,3667,0.0,113.0,180.0,1,Urban,1.0
|
||||
LP002239,Male,0.0,0,0,0.0,2346,1600.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002243,Male,1.0,0,0,0.0,3010,3136.0,0.0,360.0,0,Urban,0.0
|
||||
LP002244,Male,1.0,0,1,0.0,2333,2417.0,136.0,360.0,1,Urban,1.0
|
||||
LP002250,Male,1.0,0,1,0.0,5488,0.0,125.0,360.0,1,Rural,1.0
|
||||
LP002255,Male,0.0,3+,1,0.0,9167,0.0,185.0,360.0,1,Rural,1.0
|
||||
LP002262,Male,1.0,3+,1,0.0,9504,0.0,275.0,360.0,1,Rural,1.0
|
||||
LP002263,Male,1.0,0,1,0.0,2583,2115.0,120.0,360.0,0,Urban,1.0
|
||||
LP002265,Male,1.0,2,0,0.0,1993,1625.0,113.0,180.0,1,Semiurban,1.0
|
||||
LP002266,Male,1.0,2,1,0.0,3100,1400.0,113.0,360.0,1,Urban,1.0
|
||||
LP002272,Male,1.0,2,1,0.0,3276,484.0,135.0,360.0,0,Semiurban,1.0
|
||||
LP002277,Female,0.0,0,1,0.0,3180,0.0,71.0,360.0,0,Urban,0.0
|
||||
LP002281,Male,1.0,0,1,0.0,3033,1459.0,95.0,360.0,1,Urban,1.0
|
||||
LP002284,Male,0.0,0,0,0.0,3902,1666.0,109.0,360.0,1,Rural,1.0
|
||||
LP002287,Female,0.0,0,1,0.0,1500,1800.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002288,Male,1.0,2,0,0.0,2889,0.0,45.0,180.0,0,Urban,0.0
|
||||
LP002296,Male,0.0,0,0,0.0,2755,0.0,65.0,300.0,1,Rural,0.0
|
||||
LP002297,Male,0.0,0,1,0.0,2500,20000.0,103.0,360.0,1,Semiurban,1.0
|
||||
LP002300,Female,0.0,0,0,0.0,1963,0.0,53.0,360.0,1,Semiurban,1.0
|
||||
LP002301,Female,0.0,0,1,1.0,7441,0.0,194.0,360.0,1,Rural,0.0
|
||||
LP002305,Female,0.0,0,1,0.0,4547,0.0,115.0,360.0,1,Semiurban,1.0
|
||||
LP002308,Male,1.0,0,0,0.0,2167,2400.0,115.0,360.0,1,Urban,1.0
|
||||
LP002314,Female,0.0,0,0,0.0,2213,0.0,66.0,360.0,1,Rural,1.0
|
||||
LP002315,Male,1.0,1,1,0.0,8300,0.0,152.0,300.0,0,Semiurban,0.0
|
||||
LP002317,Male,1.0,3+,1,0.0,81000,0.0,360.0,360.0,0,Rural,0.0
|
||||
LP002318,Female,0.0,1,0,1.0,3867,0.0,62.0,360.0,1,Semiurban,0.0
|
||||
LP002319,Male,1.0,0,1,0.0,6256,0.0,160.0,360.0,0,Urban,1.0
|
||||
LP002328,Male,1.0,0,0,0.0,6096,0.0,218.0,360.0,0,Rural,0.0
|
||||
LP002332,Male,1.0,0,0,0.0,2253,2033.0,110.0,360.0,1,Rural,1.0
|
||||
LP002335,Female,1.0,0,0,0.0,2149,3237.0,178.0,360.0,0,Semiurban,0.0
|
||||
LP002337,Female,0.0,0,1,0.0,2995,0.0,60.0,360.0,1,Urban,1.0
|
||||
LP002341,Female,0.0,1,1,0.0,2600,0.0,160.0,360.0,1,Urban,0.0
|
||||
LP002342,Male,1.0,2,1,1.0,1600,20000.0,239.0,360.0,1,Urban,0.0
|
||||
LP002345,Male,1.0,0,1,0.0,1025,2773.0,112.0,360.0,1,Rural,1.0
|
||||
LP002347,Male,1.0,0,1,0.0,3246,1417.0,138.0,360.0,1,Semiurban,1.0
|
||||
LP002348,Male,1.0,0,1,0.0,5829,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002357,Female,0.0,0,0,0.0,2720,0.0,80.0,,0,Urban,0.0
|
||||
LP002361,Male,1.0,0,1,0.0,1820,1719.0,100.0,360.0,1,Urban,1.0
|
||||
LP002362,Male,1.0,1,1,0.0,7250,1667.0,110.0,,0,Urban,0.0
|
||||
LP002364,Male,1.0,0,1,0.0,14880,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002366,Male,1.0,0,1,0.0,2666,4300.0,121.0,360.0,1,Rural,1.0
|
||||
LP002367,Female,0.0,1,0,0.0,4606,0.0,81.0,360.0,1,Rural,0.0
|
||||
LP002368,Male,1.0,2,1,0.0,5935,0.0,133.0,360.0,1,Semiurban,1.0
|
||||
LP002369,Male,1.0,0,1,0.0,2920,16.12000084,87.0,360.0,1,Rural,1.0
|
||||
LP002370,Male,0.0,0,0,0.0,2717,0.0,60.0,180.0,1,Urban,1.0
|
||||
LP002377,Female,0.0,1,1,1.0,8624,0.0,150.0,360.0,1,Semiurban,1.0
|
||||
LP002379,Male,0.0,0,1,0.0,6500,0.0,105.0,360.0,0,Rural,0.0
|
||||
LP002386,Male,0.0,0,1,0.0,12876,0.0,405.0,360.0,1,Semiurban,1.0
|
||||
LP002387,Male,1.0,0,1,0.0,2425,2340.0,143.0,360.0,1,Semiurban,1.0
|
||||
LP002390,Male,0.0,0,1,0.0,3750,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002393,Female,0.0,,1,0.0,10047,0.0,0.0,240.0,1,Semiurban,1.0
|
||||
LP002398,Male,0.0,0,1,0.0,1926,1851.0,50.0,360.0,1,Semiurban,1.0
|
||||
LP002401,Male,1.0,0,1,0.0,2213,1125.0,0.0,360.0,1,Urban,1.0
|
||||
LP002403,Male,0.0,0,1,1.0,10416,0.0,187.0,360.0,0,Urban,0.0
|
||||
LP002407,Female,1.0,0,0,1.0,7142,0.0,138.0,360.0,1,Rural,1.0
|
||||
LP002408,Male,0.0,0,1,0.0,3660,5064.0,187.0,360.0,1,Semiurban,1.0
|
||||
LP002409,Male,1.0,0,1,0.0,7901,1833.0,180.0,360.0,1,Rural,1.0
|
||||
LP002418,Male,0.0,3+,0,0.0,4707,1993.0,148.0,360.0,1,Semiurban,1.0
|
||||
LP002422,Male,0.0,1,1,0.0,37719,0.0,152.0,360.0,1,Semiurban,1.0
|
||||
LP002424,Male,1.0,0,1,0.0,7333,8333.0,175.0,300.0,0,Rural,1.0
|
||||
LP002429,Male,1.0,1,1,1.0,3466,1210.0,130.0,360.0,1,Rural,1.0
|
||||
LP002434,Male,1.0,2,0,0.0,4652,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002435,Male,1.0,0,1,0.0,3539,1376.0,55.0,360.0,1,Rural,0.0
|
||||
LP002443,Male,1.0,2,1,0.0,3340,1710.0,150.0,360.0,0,Rural,0.0
|
||||
LP002444,Male,0.0,1,0,1.0,2769,1542.0,190.0,360.0,0,Semiurban,0.0
|
||||
LP002446,Male,1.0,2,0,0.0,2309,1255.0,125.0,360.0,0,Rural,0.0
|
||||
LP002447,Male,1.0,2,0,0.0,1958,1456.0,60.0,300.0,0,Urban,1.0
|
||||
LP002448,Male,1.0,0,1,0.0,3948,1733.0,149.0,360.0,0,Rural,0.0
|
||||
LP002449,Male,1.0,0,1,0.0,2483,2466.0,90.0,180.0,0,Rural,1.0
|
||||
LP002453,Male,0.0,0,1,1.0,7085,0.0,84.0,360.0,1,Semiurban,1.0
|
||||
LP002455,Male,1.0,2,1,0.0,3859,0.0,96.0,360.0,1,Semiurban,1.0
|
||||
LP002459,Male,1.0,0,1,0.0,4301,0.0,118.0,360.0,1,Urban,1.0
|
||||
LP002467,Male,1.0,0,1,0.0,3708,2569.0,173.0,360.0,1,Urban,0.0
|
||||
LP002472,Male,0.0,2,1,0.0,4354,0.0,136.0,360.0,1,Rural,1.0
|
||||
LP002473,Male,1.0,0,1,0.0,8334,0.0,160.0,360.0,1,Semiurban,0.0
|
||||
LP002478,,1.0,0,1,1.0,2083,4083.0,160.0,360.0,0,Semiurban,1.0
|
||||
LP002484,Male,1.0,3+,1,0.0,7740,0.0,128.0,180.0,1,Urban,1.0
|
||||
LP002487,Male,1.0,0,1,0.0,3015,2188.0,153.0,360.0,1,Rural,1.0
|
||||
LP002489,Female,0.0,1,0,0.0,5191,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002493,Male,0.0,0,1,0.0,4166,0.0,98.0,360.0,0,Semiurban,0.0
|
||||
LP002494,Male,0.0,0,1,0.0,6000,0.0,140.0,360.0,1,Rural,1.0
|
||||
LP002500,Male,1.0,3+,0,0.0,2947,1664.0,70.0,180.0,0,Urban,0.0
|
||||
LP002501,,1.0,0,1,0.0,16692,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002502,Female,1.0,2,0,0.0,210,2917.0,98.0,360.0,1,Semiurban,1.0
|
||||
LP002505,Male,1.0,0,1,0.0,4333,2451.0,110.0,360.0,1,Urban,0.0
|
||||
LP002515,Male,1.0,1,1,1.0,3450,2079.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002517,Male,1.0,1,0,0.0,2653,1500.0,113.0,180.0,0,Rural,0.0
|
||||
LP002519,Male,1.0,3+,1,0.0,4691,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002522,Female,0.0,0,1,1.0,2500,0.0,93.0,360.0,0,Urban,1.0
|
||||
LP002524,Male,0.0,2,1,0.0,5532,4648.0,162.0,360.0,1,Rural,1.0
|
||||
LP002527,Male,1.0,2,1,1.0,16525,1014.0,150.0,360.0,1,Rural,1.0
|
||||
LP002529,Male,1.0,2,1,0.0,6700,1750.0,230.0,300.0,1,Semiurban,1.0
|
||||
LP002530,,1.0,2,1,0.0,2873,1872.0,132.0,360.0,0,Semiurban,0.0
|
||||
LP002531,Male,1.0,1,1,1.0,16667,2250.0,86.0,360.0,1,Semiurban,1.0
|
||||
LP002533,Male,1.0,2,1,0.0,2947,1603.0,0.0,360.0,1,Urban,0.0
|
||||
LP002534,Female,0.0,0,0,0.0,4350,0.0,154.0,360.0,1,Rural,1.0
|
||||
LP002536,Male,1.0,3+,0,0.0,3095,0.0,113.0,360.0,1,Rural,1.0
|
||||
LP002537,Male,1.0,0,1,0.0,2083,3150.0,128.0,360.0,1,Semiurban,1.0
|
||||
LP002541,Male,1.0,0,1,0.0,10833,0.0,234.0,360.0,1,Semiurban,1.0
|
||||
LP002543,Male,1.0,2,1,0.0,8333,0.0,246.0,360.0,1,Semiurban,1.0
|
||||
LP002544,Male,1.0,1,0,0.0,1958,2436.0,131.0,360.0,1,Rural,1.0
|
||||
LP002545,Male,0.0,2,1,0.0,3547,0.0,80.0,360.0,0,Rural,0.0
|
||||
LP002547,Male,1.0,1,1,0.0,18333,0.0,500.0,360.0,1,Urban,0.0
|
||||
LP002555,Male,1.0,2,1,1.0,4583,2083.0,160.0,360.0,1,Semiurban,1.0
|
||||
LP002556,Male,0.0,0,1,0.0,2435,0.0,75.0,360.0,1,Urban,0.0
|
||||
LP002560,Male,0.0,0,0,0.0,2699,2785.0,96.0,360.0,0,Semiurban,1.0
|
||||
LP002562,Male,1.0,1,0,0.0,5333,1131.0,186.0,360.0,0,Urban,1.0
|
||||
LP002571,Male,0.0,0,0,0.0,3691,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002582,Female,0.0,0,0,1.0,17263,0.0,225.0,360.0,1,Semiurban,1.0
|
||||
LP002585,Male,1.0,0,1,0.0,3597,2157.0,119.0,360.0,0,Rural,0.0
|
||||
LP002586,Female,1.0,1,1,0.0,3326,913.0,105.0,84.0,1,Semiurban,1.0
|
||||
LP002587,Male,1.0,0,0,0.0,2600,1700.0,107.0,360.0,1,Rural,1.0
|
||||
LP002588,Male,1.0,0,1,0.0,4625,2857.0,111.0,12.0,0,Urban,1.0
|
||||
LP002600,Male,1.0,1,1,1.0,2895,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002602,Male,0.0,0,1,0.0,6283,4416.0,209.0,360.0,0,Rural,0.0
|
||||
LP002603,Female,0.0,0,1,0.0,645,3683.0,113.0,480.0,1,Rural,1.0
|
||||
LP002606,Female,0.0,0,1,0.0,3159,0.0,100.0,360.0,1,Semiurban,1.0
|
||||
LP002615,Male,1.0,2,1,0.0,4865,5624.0,208.0,360.0,1,Semiurban,1.0
|
||||
LP002618,Male,1.0,1,0,0.0,4050,5302.0,138.0,360.0,0,Rural,0.0
|
||||
LP002619,Male,1.0,0,0,0.0,3814,1483.0,124.0,300.0,1,Semiurban,1.0
|
||||
LP002622,Male,1.0,2,1,0.0,3510,4416.0,243.0,360.0,1,Rural,1.0
|
||||
LP002624,Male,1.0,0,1,0.0,20833,6667.0,480.0,360.0,0,Urban,1.0
|
||||
LP002625,,0.0,0,1,0.0,3583,0.0,96.0,360.0,1,Urban,0.0
|
||||
LP002626,Male,1.0,0,1,1.0,2479,3013.0,188.0,360.0,1,Urban,1.0
|
||||
LP002634,Female,0.0,1,1,0.0,13262,0.0,40.0,360.0,1,Urban,1.0
|
||||
LP002637,Male,0.0,0,0,0.0,3598,1287.0,100.0,360.0,1,Rural,0.0
|
||||
LP002640,Male,1.0,1,1,0.0,6065,2004.0,250.0,360.0,1,Semiurban,1.0
|
||||
LP002643,Male,1.0,2,1,0.0,3283,2035.0,148.0,360.0,1,Urban,1.0
|
||||
LP002648,Male,1.0,0,1,0.0,2130,6666.0,70.0,180.0,1,Semiurban,0.0
|
||||
LP002652,Male,0.0,0,1,0.0,5815,3666.0,311.0,360.0,1,Rural,0.0
|
||||
LP002659,Male,1.0,3+,1,0.0,3466,3428.0,150.0,360.0,1,Rural,1.0
|
||||
LP002670,Female,1.0,2,1,0.0,2031,1632.0,113.0,480.0,1,Semiurban,1.0
|
||||
LP002682,Male,1.0,,0,0.0,3074,1800.0,123.0,360.0,0,Semiurban,0.0
|
||||
LP002683,Male,0.0,0,1,0.0,4683,1915.0,185.0,360.0,1,Semiurban,0.0
|
||||
LP002684,Female,0.0,0,0,0.0,3400,0.0,95.0,360.0,1,Rural,0.0
|
||||
LP002689,Male,1.0,2,0,0.0,2192,1742.0,45.0,360.0,1,Semiurban,1.0
|
||||
LP002690,Male,0.0,0,1,0.0,2500,0.0,55.0,360.0,1,Semiurban,1.0
|
||||
LP002692,Male,1.0,3+,1,1.0,5677,1424.0,100.0,360.0,1,Rural,1.0
|
||||
LP002693,Male,1.0,2,1,1.0,7948,7166.0,480.0,360.0,1,Rural,1.0
|
||||
LP002697,Male,0.0,0,1,0.0,4680,2087.0,0.0,360.0,1,Semiurban,0.0
|
||||
LP002699,Male,1.0,2,1,1.0,17500,0.0,400.0,360.0,1,Rural,1.0
|
||||
LP002705,Male,1.0,0,1,0.0,3775,0.0,110.0,360.0,1,Semiurban,1.0
|
||||
LP002706,Male,1.0,1,0,0.0,5285,1430.0,161.0,360.0,0,Semiurban,1.0
|
||||
LP002714,Male,0.0,1,0,0.0,2679,1302.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002716,Male,0.0,0,0,0.0,6783,0.0,130.0,360.0,1,Semiurban,1.0
|
||||
LP002717,Male,1.0,0,1,0.0,1025,5500.0,216.0,360.0,0,Rural,1.0
|
||||
LP002720,Male,1.0,3+,1,0.0,4281,0.0,100.0,360.0,1,Urban,1.0
|
||||
LP002723,Male,0.0,2,1,0.0,3588,0.0,110.0,360.0,0,Rural,0.0
|
||||
LP002729,Male,0.0,1,1,0.0,11250,0.0,196.0,360.0,0,Semiurban,0.0
|
||||
LP002731,Female,0.0,0,0,1.0,18165,0.0,125.0,360.0,1,Urban,1.0
|
||||
LP002732,Male,0.0,0,0,0.0,2550,2042.0,126.0,360.0,1,Rural,1.0
|
||||
LP002734,Male,1.0,0,1,0.0,6133,3906.0,324.0,360.0,1,Urban,1.0
|
||||
LP002738,Male,0.0,2,1,0.0,3617,0.0,107.0,360.0,1,Semiurban,1.0
|
||||
LP002739,Male,1.0,0,0,0.0,2917,536.0,66.0,360.0,1,Rural,0.0
|
||||
LP002740,Male,1.0,3+,1,0.0,6417,0.0,157.0,180.0,1,Rural,1.0
|
||||
LP002741,Female,1.0,1,1,0.0,4608,2845.0,140.0,180.0,1,Semiurban,1.0
|
||||
LP002743,Female,0.0,0,1,0.0,2138,0.0,99.0,360.0,0,Semiurban,0.0
|
||||
LP002753,Female,0.0,1,1,0.0,3652,0.0,95.0,360.0,1,Semiurban,1.0
|
||||
LP002755,Male,1.0,1,0,0.0,2239,2524.0,128.0,360.0,1,Urban,1.0
|
||||
LP002757,Female,1.0,0,0,0.0,3017,663.0,102.0,360.0,0,Semiurban,1.0
|
||||
LP002767,Male,1.0,0,1,0.0,2768,1950.0,155.0,360.0,1,Rural,1.0
|
||||
LP002768,Male,0.0,0,0,0.0,3358,0.0,80.0,36.0,1,Semiurban,0.0
|
||||
LP002772,Male,0.0,0,1,0.0,2526,1783.0,145.0,360.0,1,Rural,1.0
|
||||
LP002776,Female,0.0,0,1,0.0,5000,0.0,103.0,360.0,0,Semiurban,0.0
|
||||
LP002777,Male,1.0,0,1,0.0,2785,2016.0,110.0,360.0,1,Rural,1.0
|
||||
LP002778,Male,1.0,2,1,1.0,6633,0.0,0.0,360.0,0,Rural,0.0
|
||||
LP002784,Male,1.0,1,0,0.0,2492,2375.0,0.0,360.0,1,Rural,1.0
|
||||
LP002785,Male,1.0,1,1,0.0,3333,3250.0,158.0,360.0,1,Urban,1.0
|
||||
LP002788,Male,1.0,0,0,0.0,2454,2333.0,181.0,360.0,0,Urban,0.0
|
||||
LP002789,Male,1.0,0,1,0.0,3593,4266.0,132.0,180.0,0,Rural,0.0
|
||||
LP002792,Male,1.0,1,1,0.0,5468,1032.0,26.0,360.0,1,Semiurban,1.0
|
||||
LP002794,Female,0.0,0,1,0.0,2667,1625.0,84.0,360.0,0,Urban,1.0
|
||||
LP002795,Male,1.0,3+,1,1.0,10139,0.0,260.0,360.0,1,Semiurban,1.0
|
||||
LP002798,Male,1.0,0,1,0.0,3887,2669.0,162.0,360.0,1,Semiurban,1.0
|
||||
LP002804,Female,1.0,0,1,0.0,4180,2306.0,182.0,360.0,1,Semiurban,1.0
|
||||
LP002807,Male,1.0,2,0,0.0,3675,242.0,108.0,360.0,1,Semiurban,1.0
|
||||
LP002813,Female,1.0,1,1,1.0,19484,0.0,600.0,360.0,1,Semiurban,1.0
|
||||
LP002820,Male,1.0,0,1,0.0,5923,2054.0,211.0,360.0,1,Rural,1.0
|
||||
LP002821,Male,0.0,0,0,1.0,5800,0.0,132.0,360.0,1,Semiurban,1.0
|
||||
LP002832,Male,1.0,2,1,0.0,8799,0.0,258.0,360.0,0,Urban,0.0
|
||||
LP002833,Male,1.0,0,0,0.0,4467,0.0,120.0,360.0,0,Rural,1.0
|
||||
LP002836,Male,0.0,0,1,0.0,3333,0.0,70.0,360.0,1,Urban,1.0
|
||||
LP002837,Male,1.0,3+,1,0.0,3400,2500.0,123.0,360.0,0,Rural,0.0
|
||||
LP002840,Female,0.0,0,1,0.0,2378,0.0,9.0,360.0,1,Urban,0.0
|
||||
LP002841,Male,1.0,0,1,0.0,3166,2064.0,104.0,360.0,0,Urban,0.0
|
||||
LP002842,Male,1.0,1,1,0.0,3417,1750.0,186.0,360.0,1,Urban,1.0
|
||||
LP002847,Male,1.0,,1,0.0,5116,1451.0,165.0,360.0,0,Urban,0.0
|
||||
LP002855,Male,1.0,2,1,0.0,16666,0.0,275.0,360.0,1,Urban,1.0
|
||||
LP002862,Male,1.0,2,0,0.0,6125,1625.0,187.0,480.0,1,Semiurban,0.0
|
||||
LP002863,Male,1.0,3+,1,0.0,6406,0.0,150.0,360.0,1,Semiurban,0.0
|
||||
LP002868,Male,1.0,2,1,0.0,3159,461.0,108.0,84.0,1,Urban,1.0
|
||||
LP002872,,1.0,0,1,0.0,3087,2210.0,136.0,360.0,0,Semiurban,0.0
|
||||
LP002874,Male,0.0,0,1,0.0,3229,2739.0,110.0,360.0,1,Urban,1.0
|
||||
LP002877,Male,1.0,1,1,0.0,1782,2232.0,107.0,360.0,1,Rural,1.0
|
||||
LP002888,Male,0.0,0,1,0.0,3182,2917.0,161.0,360.0,1,Urban,1.0
|
||||
LP002892,Male,1.0,2,1,0.0,6540,0.0,205.0,360.0,1,Semiurban,1.0
|
||||
LP002893,Male,0.0,0,1,0.0,1836,33837.0,90.0,360.0,1,Urban,0.0
|
||||
LP002894,Female,1.0,0,1,0.0,3166,0.0,36.0,360.0,1,Semiurban,1.0
|
||||
LP002898,Male,1.0,1,1,0.0,1880,0.0,61.0,360.0,0,Rural,0.0
|
||||
LP002911,Male,1.0,1,1,0.0,2787,1917.0,146.0,360.0,0,Rural,0.0
|
||||
LP002912,Male,1.0,1,1,0.0,4283,3000.0,172.0,84.0,1,Rural,0.0
|
||||
LP002916,Male,1.0,0,1,0.0,2297,1522.0,104.0,360.0,1,Urban,1.0
|
||||
LP002917,Female,0.0,0,0,0.0,2165,0.0,70.0,360.0,1,Semiurban,1.0
|
||||
LP002925,,0.0,0,1,0.0,4750,0.0,94.0,360.0,1,Semiurban,1.0
|
||||
LP002926,Male,1.0,2,1,1.0,2726,0.0,106.0,360.0,0,Semiurban,0.0
|
||||
LP002928,Male,1.0,0,1,0.0,3000,3416.0,56.0,180.0,1,Semiurban,1.0
|
||||
LP002931,Male,1.0,2,1,1.0,6000,0.0,205.0,240.0,1,Semiurban,0.0
|
||||
LP002933,,0.0,3+,1,1.0,9357,0.0,292.0,360.0,1,Semiurban,1.0
|
||||
LP002936,Male,1.0,0,1,0.0,3859,3300.0,142.0,180.0,1,Rural,1.0
|
||||
LP002938,Male,1.0,0,1,1.0,16120,0.0,260.0,360.0,1,Urban,1.0
|
||||
LP002940,Male,0.0,0,0,0.0,3833,0.0,110.0,360.0,1,Rural,1.0
|
||||
LP002941,Male,1.0,2,0,1.0,6383,1000.0,187.0,360.0,1,Rural,0.0
|
||||
LP002943,Male,0.0,,1,0.0,2987,0.0,88.0,360.0,0,Semiurban,0.0
|
||||
LP002945,Male,1.0,0,1,1.0,9963,0.0,180.0,360.0,1,Rural,1.0
|
||||
LP002948,Male,1.0,2,1,0.0,5780,0.0,192.0,360.0,1,Urban,1.0
|
||||
LP002949,Female,0.0,3+,1,0.0,416,41667.0,350.0,180.0,0,Urban,0.0
|
||||
LP002950,Male,1.0,0,0,0.0,2894,2792.0,155.0,360.0,1,Rural,1.0
|
||||
LP002953,Male,1.0,3+,1,0.0,5703,0.0,128.0,360.0,1,Urban,1.0
|
||||
LP002958,Male,0.0,0,1,0.0,3676,4301.0,172.0,360.0,1,Rural,1.0
|
||||
LP002959,Female,1.0,1,1,0.0,12000,0.0,496.0,360.0,1,Semiurban,1.0
|
||||
LP002960,Male,1.0,0,0,0.0,2400,3800.0,0.0,180.0,1,Urban,0.0
|
||||
LP002961,Male,1.0,1,1,0.0,3400,2500.0,173.0,360.0,1,Semiurban,1.0
|
||||
LP002964,Male,1.0,2,0,0.0,3987,1411.0,157.0,360.0,1,Rural,1.0
|
||||
LP002974,Male,1.0,0,1,0.0,3232,1950.0,108.0,360.0,1,Rural,1.0
|
||||
LP002978,Female,0.0,0,1,0.0,2900,0.0,71.0,360.0,1,Rural,1.0
|
||||
LP002979,Male,1.0,3+,1,0.0,4106,0.0,40.0,180.0,1,Rural,1.0
|
||||
LP002983,Male,1.0,1,1,0.0,8072,240.0,253.0,360.0,1,Urban,1.0
|
||||
LP002984,Male,1.0,2,1,0.0,7583,0.0,187.0,360.0,1,Urban,1.0
|
||||
LP002990,Female,0.0,0,1,1.0,4583,0.0,133.0,360.0,0,Semiurban,0.0
|
||||
|
BIN
abanin_daniil_lab_5/result_1.png
Normal file
|
After Width: | Height: | Size: 66 KiB |
BIN
abanin_daniil_lab_5/result_2.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
40
alexandrov_dmitrii_lab_4/lab4.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from scipy.cluster import hierarchy
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
def start():
|
||||
data = pd.read_csv('sberbank_data.csv', index_col='id')
|
||||
x = data[['full_sq', 'price_doc']]
|
||||
plt.figure(1, figsize=(16, 9))
|
||||
plt.title('Дендрограмма кластеризации цен')
|
||||
|
||||
prices = [0, 0, 0, 0]
|
||||
for ind, val in x.iterrows():
|
||||
val = val['price_doc'] / val['full_sq']
|
||||
if val < 100000:
|
||||
prices[0] = prices[0] + 1
|
||||
elif val < 300000:
|
||||
prices[1] = prices[1] + 1
|
||||
elif val < 500000:
|
||||
prices[2] = prices[2] + 1
|
||||
else:
|
||||
prices[3] = prices[3] + 1
|
||||
print('Результаты подчсёта ручного распределения:')
|
||||
print('низких цен:'+str(prices[0]))
|
||||
print('средних цен:'+str(prices[1]))
|
||||
print('высоких цен:'+str(prices[2]))
|
||||
print('премиальных цен:'+str(prices[3]))
|
||||
|
||||
hierarchy.dendrogram(hierarchy.linkage(x, method='single'),
|
||||
truncate_mode='lastp',
|
||||
p=15,
|
||||
orientation='top',
|
||||
leaf_rotation=90,
|
||||
leaf_font_size=8,
|
||||
show_contracted=True)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
start()
|
||||
27
alexandrov_dmitrii_lab_4/readme.md
Normal file
@@ -0,0 +1,27 @@
|
||||
### Задание
|
||||
Использовать метод кластеризации по варианту для выбранных данных по варианту, самостоятельно сформулировав задачу.
|
||||
Интерпретировать результаты и оценить, насколько хорошо он подходит для
|
||||
решения сформулированной вами задачи.
|
||||
|
||||
Вариант 1: dendrogram
|
||||
|
||||
Была сформулирована следующая задача: необходимо разбить записи на кластеры в зависимости от цен и площади.
|
||||
|
||||
### Запуск программы
|
||||
Файл lab4.py содержит и запускает программу, аргументов и настройки ~~вроде~~ не требует.
|
||||
|
||||
### Описание программы
|
||||
Программа считывает цены и площади из файла статистики сбербанка по рынку недвижимости.
|
||||
Поскольку по заданию требуется оценить машинную кластеризацию, для сравнения программа подсчитывает и выводит в консоль количество записей в каждом из выделенных вручную классов цен.
|
||||
Далее программа кластеризует данные с помощью алгоритма ближайших точек (на другие памяти нету) и выводит дендрограмму на основе кластеризации.
|
||||
Выводимая дендрограмма ограничена 15 последними (верхними) объединениями.
|
||||
|
||||
### Результаты тестирования
|
||||
По результатам тестирования, можно сказать следующее:
|
||||
* Последние объединения в дендрограмме - объединения выбросов с 'основным' кластером, то есть 10-20 записей с кластером с более чем 28000 записями.
|
||||
* Это правильная информация, так как ручная классификация показывает, что премиальных (аномально больших) цен как раз порядка 20, остальные относятся к другим классам.
|
||||
* Поскольку в имеющихся данных нет ограничений по ценам, выбросы аномально высоких цен при использовании данного алгоритма формируют отдельные кластеры, что негативно сказывается на наглядности.
|
||||
* Ценовое ограничение также не дало положительнх результатов: снова сформировался 'основной' кластер, с которым последними объединялись отдельные значения.
|
||||
* Значит, сам алгоритм не эффективен.
|
||||
|
||||
Итого: Алгоритм ближайших точек слишком чувствителен к выбросам, поэтому можно признать его неэффективным для необработанных данных. Дендрограмма как средство визуализации скорее уступает по наглядности диаграмме рассеяния.
|
||||
28896
alexandrov_dmitrii_lab_4/sberbank_data.csv
Normal file
76
alexandrov_dmitrii_lab_6/lab6.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
data = pd.read_csv('sberbank_data.csv', index_col='id')
|
||||
x = data[['timestamp', 'full_sq', 'floor', 'max_floor', 'build_year', 'num_room', 'material', 'kremlin_km']]
|
||||
|
||||
x = x.replace('NA', 0)
|
||||
x.fillna(0, inplace=True)
|
||||
|
||||
col_date = []
|
||||
|
||||
for val in x['timestamp']:
|
||||
col_date.append(val.split('-', 1)[0])
|
||||
|
||||
x = x.drop(columns='timestamp')
|
||||
x['timestamp'] = col_date
|
||||
|
||||
y = []
|
||||
for val in data['price_doc']:
|
||||
if val < 1500000:
|
||||
y.append('low')
|
||||
elif val < 3000000:
|
||||
y.append('medium')
|
||||
elif val < 5500000:
|
||||
y.append('high')
|
||||
elif val < 10000000:
|
||||
y.append('premium')
|
||||
else:
|
||||
y.append('oligarch')
|
||||
|
||||
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=42)
|
||||
|
||||
min_scores = []
|
||||
med_scores = []
|
||||
max_scores = []
|
||||
|
||||
|
||||
def do_test(iters_num):
|
||||
global x_train, x_test, y_train, y_test, min_scores, med_scores, max_scores
|
||||
|
||||
print("Testing iterations number "+str(iters_num)+":")
|
||||
scores = []
|
||||
|
||||
for i in range(10):
|
||||
neuro = MLPClassifier(max_iter=200)
|
||||
neuro.fit(x_train, y_train)
|
||||
scr = neuro.score(x_test, y_test)
|
||||
print("res"+str(i+1)+": "+str(scr))
|
||||
scores.append(scr)
|
||||
|
||||
print("Medium result: "+str(np.mean(scores)))
|
||||
|
||||
min_scores.append(np.min(scores))
|
||||
med_scores.append(np.mean(scores))
|
||||
max_scores.append(np.max(scores))
|
||||
|
||||
|
||||
def start():
|
||||
global min_scores, med_scores, max_scores
|
||||
|
||||
iter_nums = [200, 400, 600, 800, 1000]
|
||||
|
||||
for num in iter_nums:
|
||||
do_test(num)
|
||||
|
||||
plt.figure(1, figsize=(16, 9))
|
||||
plt.plot(iter_nums, min_scores, c='r')
|
||||
plt.plot(iter_nums, med_scores, c='b')
|
||||
plt.plot(iter_nums, max_scores, c='b')
|
||||
plt.show()
|
||||
|
||||
|
||||
start()
|
||||
149
alexandrov_dmitrii_lab_6/readme.md
Normal file
@@ -0,0 +1,149 @@
|
||||
### Задание
|
||||
Использовать нейронную сеть по варианту для выбранных данных по варианту, самостоятельно сформулировав задачу.
|
||||
Интерпретировать результаты и оценить, насколько хорошо она подходит для
|
||||
решения сформулированной вами задачи.
|
||||
|
||||
Вариант 1: MLPClassifier
|
||||
|
||||
Была сформулирована следующая задача: необходимо классифицировать жильё по стоимости на основе избранных признаков при помощи нейронной сети.
|
||||
|
||||
### Запуск программы
|
||||
Файл lab6.py содержит и запускает программу, аргументов и настройки ~~вроде~~ не требует.
|
||||
|
||||
### Описание программы
|
||||
Программа считывает цены на жильё как выходные данные и следующие данные как входные: год размещения объявления, площадь, этаж, количество этажей, год постройки, количество комнат, материал, расстояние до кремля (условного центра).
|
||||
Далее она обрабатывает данные (цифровизирует нулевые данные), оставляет только год объявления. Цены распределяются по пяти классам.
|
||||
|
||||
После обработки программа делит данные на 99% обучающего материала и 1% тестового.
|
||||
Эти данные обрабатываются по 10 раз для идентичных моделей нейронных сетей, использующих метод градиентного спуска "adam", с разной настройкой максимального количества поколений: 200, 400, 600, 800, 1000.
|
||||
Считаются оценка модели. Для каждой модели запоминаются минимальный, максимальный и средний результаты. В консоль выводятся все результаты.
|
||||
В конце программа показывает графики зависимости результатов от максимального количества поколений модели.
|
||||
|
||||
### Результаты тестирования
|
||||
По результатам тестирования, можно сказать следующее:
|
||||
* В общем, модель даёт средний результат в районе 40-50% точности, что недостаточно.
|
||||
* Увеличение максимального количества поколений влияет сильнее всего на минимальные оценки, сужая разброс точности.
|
||||
* Нельзя сказать, что увеличение максимального количества поколений сильно улучшит модель: максимум на 10% точности.
|
||||
|
||||
Пример консольного вывода:
|
||||
>Testing iterations number 200:
|
||||
>
|
||||
>res1: 0.3806228373702422
|
||||
>
|
||||
>res2: 0.6055363321799307
|
||||
>
|
||||
>res3: 0.4809688581314879
|
||||
>
|
||||
>res4: 0.4913494809688581
|
||||
>
|
||||
>res5: 0.4844290657439446
|
||||
>
|
||||
>res6: 0.2975778546712803
|
||||
>
|
||||
>res7: 0.48788927335640137
|
||||
>
|
||||
>res8: 0.06228373702422145
|
||||
>
|
||||
>res9: 0.6193771626297578
|
||||
>
|
||||
>res10: 0.47750865051903113
|
||||
>
|
||||
>Medium result: 0.4387543252595155
|
||||
>
|
||||
>Testing iterations number 400:
|
||||
>
|
||||
>res1: 0.6124567474048442
|
||||
>
|
||||
>res2: 0.4290657439446367
|
||||
>
|
||||
>res3: 0.3217993079584775
|
||||
>
|
||||
>res4: 0.5467128027681661
|
||||
>
|
||||
>res5: 0.48788927335640137
|
||||
>
|
||||
>res6: 0.40484429065743943
|
||||
>
|
||||
>res7: 0.6020761245674741
|
||||
>
|
||||
>res8: 0.4186851211072664
|
||||
>
|
||||
>res9: 0.42214532871972316
|
||||
>
|
||||
>res10: 0.370242214532872
|
||||
>
|
||||
>Medium result: 0.46159169550173
|
||||
>
|
||||
>Testing iterations number 600:
|
||||
>
|
||||
>res1: 0.4359861591695502
|
||||
>
|
||||
>res2: 0.2560553633217993
|
||||
>
|
||||
>res3: 0.5363321799307958
|
||||
>
|
||||
>res4: 0.5778546712802768
|
||||
>
|
||||
>res5: 0.35986159169550175
|
||||
>
|
||||
>res6: 0.356401384083045
|
||||
>
|
||||
>res7: 0.49480968858131485
|
||||
>
|
||||
>res8: 0.5121107266435986
|
||||
>
|
||||
>res9: 0.5224913494809689
|
||||
>
|
||||
>res10: 0.5190311418685121
|
||||
>
|
||||
>Medium result: 0.4570934256055363
|
||||
>
|
||||
>Testing iterations number 800:
|
||||
>
|
||||
>res1: 0.25951557093425603
|
||||
>
|
||||
>res2: 0.4083044982698962
|
||||
>
|
||||
>res3: 0.5224913494809689
|
||||
>
|
||||
>res4: 0.5986159169550173
|
||||
>
|
||||
>res5: 0.24567474048442905
|
||||
>
|
||||
>res6: 0.4013840830449827
|
||||
>
|
||||
>res7: 0.21453287197231835
|
||||
>
|
||||
>res8: 0.4671280276816609
|
||||
>
|
||||
>res9: 0.40484429065743943
|
||||
>
|
||||
>res10: 0.38408304498269896
|
||||
>
|
||||
>Medium result: 0.3906574394463667
|
||||
>
|
||||
>Testing iterations number 1000:
|
||||
>
|
||||
>res1: 0.4186851211072664
|
||||
>
|
||||
>res2: 0.5017301038062284
|
||||
>
|
||||
>res3: 0.5121107266435986
|
||||
>
|
||||
>res4: 0.3806228373702422
|
||||
>
|
||||
>res5: 0.44982698961937717
|
||||
>
|
||||
>res6: 0.5986159169550173
|
||||
>
|
||||
>res7: 0.5570934256055363
|
||||
>
|
||||
>res8: 0.4290657439446367
|
||||
>
|
||||
>res9: 0.32525951557093424
|
||||
>
|
||||
>res10: 0.41522491349480967
|
||||
>
|
||||
>Medium result: 0.4588235294117647
|
||||
|
||||
Итого: Для отобранных данных нейронная модель с методом градиентного спуска "adam" показала себя не лучшим образом. Возможно, другие методы могут выдать лучшие результаты, либо необходима более обширная модификация модели.
|
||||
28896
alexandrov_dmitrii_lab_6/sberbank_data.csv
Normal file
2795
alexandrov_dmitrii_lab_7/data.txt
Normal file
96
alexandrov_dmitrii_lab_7/lab7.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
from keras_preprocessing.sequence import pad_sequences
|
||||
from keras_preprocessing.text import Tokenizer
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, LSTM, Embedding, Dropout
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
|
||||
|
||||
def recreate_model(predictors, labels, model, filepath, epoch_num):
|
||||
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
append_epochs(predictors, labels, model, epoch_num)
|
||||
|
||||
|
||||
def append_epochs(predictors, labels, model, filepath, epoch_num):
|
||||
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
|
||||
desired_callbacks = [checkpoint]
|
||||
model.fit(predictors, labels, epochs=epoch_num, verbose=1, callbacks=desired_callbacks)
|
||||
|
||||
|
||||
def generate_text(tokenizer, seed_text, next_words, model, max_seq_length):
|
||||
for _ in range(next_words):
|
||||
token_list = tokenizer.texts_to_sequences([seed_text])[0]
|
||||
token_list = pad_sequences([token_list], maxlen=max_seq_length - 1, padding='pre')
|
||||
predicted = np.argmax(model.predict(token_list), axis=-1)
|
||||
output_word = ""
|
||||
for word, index in tokenizer.word_index.items():
|
||||
if index == predicted:
|
||||
output_word = word
|
||||
break
|
||||
seed_text += " " + output_word
|
||||
return seed_text
|
||||
|
||||
|
||||
def start():
|
||||
flag = -1
|
||||
while flag < 1 or flag > 2:
|
||||
flag = int(input("Select model and text (1 - eng, 2 - ru): "))
|
||||
|
||||
if flag == 1:
|
||||
file = open("data.txt").read()
|
||||
filepath = "model_eng.hdf5"
|
||||
elif flag == 2:
|
||||
file = open("rus_data.txt").read()
|
||||
filepath = "model_rus.hdf5"
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
tokenizer = Tokenizer()
|
||||
tokenizer.fit_on_texts([file])
|
||||
words_count = len(tokenizer.word_index) + 1
|
||||
|
||||
input_sequences = []
|
||||
for line in file.split('\n'):
|
||||
token_list = tokenizer.texts_to_sequences([line])[0]
|
||||
for i in range(1, len(token_list)):
|
||||
n_gram_sequence = token_list[:i + 1]
|
||||
input_sequences.append(n_gram_sequence)
|
||||
|
||||
max_seq_length = max([len(x) for x in input_sequences])
|
||||
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_length, padding='pre')
|
||||
|
||||
predictors, labels = input_sequences[:, :-1], input_sequences[:, -1]
|
||||
|
||||
model = Sequential()
|
||||
model.add(Embedding(words_count, 100, input_length=max_seq_length - 1))
|
||||
model.add(LSTM(150))
|
||||
model.add(Dropout(0.15))
|
||||
model.add(Dense(words_count, activation='softmax'))
|
||||
|
||||
flag = input("Do you want to recreate the model ? (print yes): ")
|
||||
if flag == 'yes':
|
||||
flag = input("Are you sure? (print yes): ")
|
||||
if flag == 'yes':
|
||||
num = int(input("Select number of epoch: "))
|
||||
if 0 < num < 100:
|
||||
recreate_model(predictors, labels, model, filepath, num)
|
||||
|
||||
model.load_weights(filepath)
|
||||
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
|
||||
flag = input("Do you want to train the model ? (print yes): ")
|
||||
if flag == 'yes':
|
||||
flag = input("Are you sure? (print yes): ")
|
||||
if flag == 'yes':
|
||||
num = int(input("Select number of epoch: "))
|
||||
if 0 < num < 100:
|
||||
append_epochs(predictors, labels, model, filepath, num)
|
||||
|
||||
flag = 'y'
|
||||
while flag == 'y':
|
||||
seed = input("Enter seed: ")
|
||||
print(generate_text(tokenizer, seed, 25, model, max_seq_length))
|
||||
flag = input("Continue? (print \'y\'): ")
|
||||
|
||||
|
||||
start()
|
||||
BIN
alexandrov_dmitrii_lab_7/model_eng.hdf5
Normal file
BIN
alexandrov_dmitrii_lab_7/model_rus.hdf5
Normal file
49
alexandrov_dmitrii_lab_7/readme.md
Normal file
@@ -0,0 +1,49 @@
|
||||
### Задание
|
||||
Выбрать художественный текст(четные варианты – русскоязычный, нечетные – англоязычный)и обучить на нем рекуррентную нейронную сеть для решения задачи генерации. Подобрать архитектуру и параметры так, чтобы приблизиться к максимально осмысленному результату. Далее разбиться на пары четный-нечетный вариант, обменяться разработанными сетями и проверить, как архитектура товарища справляется с вашим текстом.
|
||||
|
||||
Вариант 1: первостепенно - английский текст. Кооперироваться, впрочем, не с кем.
|
||||
|
||||
### Запуск программы
|
||||
Файл lab7.py содержит и запускает программу, аргументов и настройки ~~вроде~~ не требует.
|
||||
|
||||
### Описание программы
|
||||
Программа представляет собой консольное приложение-инструмент для работы с моделями. Она может создавать и обучать однородные модели для разных текстов.
|
||||
В файлах хранятся два текста: англоязычный data.txt (Остров сокровищ) и русскоязычный rus_data.txt (Хоббит). Также там хранятся две сохранённые обученные модели:
|
||||
* model_eng - модель, обученная на английском тексте. На текущий момент 27 эпох обучения.
|
||||
* model_rus - модель, обученная на русском тексте. На текущий момент 12 эпох обучения.
|
||||
Обучение проходило 1 день.
|
||||
|
||||
В программе необходимо выбрать загружаемый текст и соответствующую модель, в данный момент подключается русскоязычная модель.
|
||||
|
||||
Программа содержит методы пересоздания модели и дообучения модели (передаётся модель и количество эпох дообучения). Оба метода отключены и могут быть подключены обратно при необходимости.
|
||||
|
||||
После возможных пересоздания и дообучения моделей программа запрашивает текст-кодовое слово, которое модели будет необходимо продолжить, сгенерировав свой текст.
|
||||
|
||||
Сама модель имеет следующую архитектуру:
|
||||
* слой, преобразующий слова в векторы плотности, Embedding с входом, равным числу слов, с выходом 100, и с длиной ввода, равной длине максимального слова.
|
||||
* слой с блоками долгой краткосрочной памятью, составляющая рекуррентную сеть, LSTM со 150 блоками.
|
||||
* слой, задающий степень разрыва нейронных связей между соседними слоями, Dropout с процентом разрыва 15.
|
||||
* слой вычисления взвешенных сумм Dense с числом нейронов, равным числу слов в тексте и функцией активации 'softmax'
|
||||
|
||||
### Результаты тестирования
|
||||
По результатам дневного обучения можно сказать следующее:
|
||||
|
||||
Модель успешно генерирует бессмысленные последовательности слов, которые либо состоят из обрывков фраз, либо случайно (но достаточно часто) складываются в осмысленные словосочетания, но не более.
|
||||
|
||||
Примеры генераций (первое слово - код генерации):
|
||||
|
||||
Модель, обученная на 'Острове сокровищ', 27 эпох обучения:
|
||||
>ship that he said with the buccaneers a gentleman and neither can read and figure but what is it anyway ah 'deposed' that's it is a
|
||||
>
|
||||
>chest said the doctor touching the black spot mind by the arm who is the ship there's long john now you are the first that were
|
||||
>
|
||||
>silver said the doctor if you can get the treasure you can find the ship there's been a man that has lost his score out he
|
||||
|
||||
Модель, обученная на 'Хоббите', 12 эпох обучения:
|
||||
>дракон и тут они услыхали про смога он понял что он стал видел и разозлился как слоны у гэндальфа хороши но все это было бы он
|
||||
>
|
||||
>поле он не мог сообразить что он делал то в живых и слышал бильбо как раз доедал пуще прежнего а бильбо все таки уж не мог
|
||||
>
|
||||
>паук направился к нему толстому из свертков они добрались до рассвета и даже дальше не останавливаясь а именно что гоблины обидело бильбо они не мог ничего
|
||||
|
||||
Итого: Даже такая простая модель с таким малым количеством эпох обучения может иногда сгенерировать нечто осмысленное. Однако для генерации нормального текста необходимо длительное обучение и более сложная модель, из нескольких слоёв LSTM и Dropout после них, что, однако, потребовало бы вычислительные мощности, которых у меня нет в наличии. Иначе следует взять очень маленький текст.
|
||||
6838
alexandrov_dmitrii_lab_7/rus_data.txt
Normal file
115
antonov_dmitry_lab_7/README.md
Normal file
@@ -0,0 +1,115 @@
|
||||
# Лаб 7 RNN
|
||||
|
||||
Выбрать художественный текст (четные варианты – русскоязычный,
|
||||
нечетные – англоязычный) и обучить на нем рекуррентную нейронную сеть
|
||||
для решения задачи генерации. Подобрать архитектуру и параметры так,
|
||||
чтобы приблизиться к максимально осмысленному результату. Далее
|
||||
разбиться на пары четный-нечетный вариант, обменяться разработанными
|
||||
сетями и проверить, как архитектура товарища справляется с вашим текстом.
|
||||
В завершении подобрать компромиссную архитектуру, справляющуюся
|
||||
достаточно хорошо с обоими видами текстов.
|
||||
|
||||
# Вариант 3
|
||||
|
||||
Рекуррентная нейронная сеть и задача
|
||||
генерации текста
|
||||
|
||||
# Запуск
|
||||
|
||||
Выполнением скрипта файла (вывод в консоль).
|
||||
|
||||
# Описание модели:
|
||||
|
||||
Использованы библиотеки:
|
||||
* numpy (np): популярная библиотека для научных вычислений.
|
||||
* tensorflow (tf): библиотека для тренировки нейросетей.
|
||||
* Sequential: тип Keras модель которая позволяет создавать нейросети слой за слоем.
|
||||
* Embedding, LSTM, Dense: различные типы слоев в нейросетях.
|
||||
* Tokenizer: класс для конвертации слов в числовой понятный для нейросети формат.
|
||||
<p></p>
|
||||
Каждая строка текста переводится в числа с помощью Tokernizer.
|
||||
Класс Tokenizer в Keras - это утилита обработки текста, которая преобразует текст в
|
||||
последовательность целых чисел. Он присваивает уникальное целое число (индекс) каждому слову
|
||||
в тексте и создает словарь, который сопоставляет каждое слово с соответствующим индексом.
|
||||
Это позволяет вам работать с текстовыми данными в формате, который может быть передан в нейронную сеть.
|
||||
Все это записывается в input_sequences.
|
||||
|
||||
Строим RNN модель используя Keras:
|
||||
|
||||
* Embedding: Этот слой превращает числа в векторы плотности фиксированного размера. Так же известного
|
||||
как "word embeddings". Вложения слов - это плотные векторные представления слов в непрерывном
|
||||
векторном пространстве.Они позволяют нейронной сети изучать и понимать взаимосвязи между словами
|
||||
на основе их контекста в содержании текста.
|
||||
* LSTM: это тип рекуррентной нейронной сети (RNN), которая предназначена для обработки
|
||||
зависимостей в последовательностях.
|
||||
* Dense: полносвязный слой с множеством нейронов, нейронов столько же сколько и уникальных слов.
|
||||
Он выводит вероятность следующего слова.
|
||||
|
||||
* Модель обучаем на разном количестве эпох, по умолчанию epochs = 100 (итераций по всему набору данных).
|
||||
|
||||
Определеяем функцию generate_text которая принимает стартовое слово, а также, число слов для генерации.
|
||||
Модель генерирует текст путем многократного предсказания следующего слова на основе предыдущих слов в
|
||||
начальном тексте.
|
||||
|
||||
* В конце мы получаем сгенерированную на основе текста последовательность.
|
||||
|
||||
# Задача генерации англоязычного текста
|
||||
На вход подаем историю с похожими повторяющимися слова. Историю сохраняем в файл.
|
||||
Задача проверить насколько сеть не станет повторять текст, а будет действительно генерировать
|
||||
относительно новый текст.
|
||||
|
||||
# Результаты
|
||||
Тестируется английский текст, приложенный в репозитории.
|
||||
* на 50 эпохах ответ на I want
|
||||
* I want to soar high up in the sky like to glide through the clouds feeling the wind beneath my wings i want to fly i want to fly i want to fly i want to fly i want to fly i want to fly i want to fly i want to
|
||||
* на 100 эпох ответ на I want
|
||||
* I want to fly i want to soar high up in the sky like a bird to glide through the clouds feeling the wind beneath my wings i want to fly i want to fly i want to spread my wings and soar into the open sky to glide far above the
|
||||
* на 150 эпохах ответ на I want
|
||||
* I want to fly i want to spread my wings and soar into the open sky to glide far above the earth unbounded by gravity i want to fly i want to fly i want to fly i want to soar high up in the sky like a bird to glide through
|
||||
* на 220 эпохах ответ на I want
|
||||
* I want to fly i want to soar high up in the sky like a bird to glide through the clouds feeling the wind beneath my wings i want to fly i want to fly i want to fly i want to fly i want to fly i want to fly i
|
||||
|
||||
* На 220 эпохах результаты хуже, это произошло скорее всего из-за переобучения(грубый повтор).
|
||||
* На 50 эпохах нейронная сеть плохо обучена (из 1 места плюс повтор)
|
||||
* На 100 эпохах средний результат (из 2 мест)
|
||||
* На 150 эпохах нейронная сеть показывает наилучший результат (из 3 разных мест без повтора)
|
||||
|
||||
Так же модель работает и на русском тексте. Вот что сгенерировала модель на 150 эпохах.
|
||||
Предложения взяты из разных мест и выглядят осмысленно.
|
||||
"Я хочу летать потому что в этом заложено желание преодолевать границы хочу чувствовать себя
|
||||
свободным словно ветер несущим меня к новым приключениям я хочу летать и продолжать этот бескрайний
|
||||
полет вперед ибо в этом полете заключена вся суть моего существования существования существования
|
||||
существования существования трудности трудности трудности неважными хочу летать потому что."
|
||||
|
||||
Чем больше текст мы берем, тем более интересные результаты получаем, но моих вычислительных мощностей уже не хватит.
|
||||
Так же чем больше прогонов, тем лучше модель, но тоже не до бесконечности можно получить хороший результат.
|
||||
<p>
|
||||
<div>Обучение</div>
|
||||
<img src="screens/img_2.png" width="650" title="Обучение">
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<div>Результат</div>
|
||||
<img src="screens/img_3.png" width="650" title="Результат">
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<div>Обучение 1</div>
|
||||
<img src="screens/step1.png" width="650" title="Обучение 1">
|
||||
</p>
|
||||
<p>
|
||||
<div>Обучение 2</div>
|
||||
<img src="screens/step2.png" width="650" title="Обучение 2">
|
||||
</p>
|
||||
<p>
|
||||
<div>Обучение 3</div>
|
||||
<img src="screens/step3.png" width="650" title="Обучение 3">
|
||||
</p>
|
||||
<p>
|
||||
<div>Обучение 4</div>
|
||||
<img src="screens/step4.png" width="650" title="Обучение 4">
|
||||
</p>
|
||||
<p>
|
||||
<div>Обучение 5</div>
|
||||
<img src="screens/step5.png" width="650" title="Обучение 5">
|
||||
</p>
|
||||
55
antonov_dmitry_lab_7/lab7.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import numpy as np
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Embedding, LSTM, Dense
|
||||
from keras.preprocessing.text import Tokenizer
|
||||
from keras_preprocessing.sequence import pad_sequences
|
||||
|
||||
# загрузка текста
|
||||
with open('rus.txt', encoding='utf-8') as file:
|
||||
text = file.read()
|
||||
|
||||
tokenizer = Tokenizer()
|
||||
tokenizer.fit_on_texts([text])
|
||||
total_words = len(tokenizer.word_index) + 1
|
||||
|
||||
input_sequences = []
|
||||
for line in text.split('\n'):
|
||||
token_list = tokenizer.texts_to_sequences([line])[0]
|
||||
for i in range(1, len(token_list)):
|
||||
n_gram_sequence = token_list[:i + 1]
|
||||
input_sequences.append(n_gram_sequence)
|
||||
|
||||
max_sequence_length = max([len(x) for x in input_sequences])
|
||||
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
|
||||
|
||||
predictors, labels = input_sequences[:, :-1], input_sequences[:, -1]
|
||||
|
||||
# создание RNN модели
|
||||
model = Sequential()
|
||||
model.add(Embedding(total_words, 100, input_length=max_sequence_length - 1))
|
||||
model.add(LSTM(150))
|
||||
model.add(Dense(total_words, activation='softmax'))
|
||||
|
||||
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
|
||||
# тренировка модели
|
||||
model.fit(predictors, labels, epochs=150, verbose=1)
|
||||
|
||||
|
||||
# генерация текста на основе модели
|
||||
def generate_text(seed_text, next_words, model, max_sequence_length):
|
||||
for _ in range(next_words):
|
||||
token_list = tokenizer.texts_to_sequences([seed_text])[0]
|
||||
token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
|
||||
predicted = np.argmax(model.predict(token_list), axis=-1)
|
||||
output_word = ""
|
||||
for word, index in tokenizer.word_index.items():
|
||||
if index == predicted:
|
||||
output_word = word
|
||||
break
|
||||
seed_text += " " + output_word
|
||||
return seed_text
|
||||
|
||||
|
||||
generated_text = generate_text("Я хочу", 50, model, max_sequence_length)
|
||||
print(generated_text)
|
||||
BIN
antonov_dmitry_lab_7/my_model.h5
Normal file
11
antonov_dmitry_lab_7/rus.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
Я хочу летать. Почувствовать ветер в лицо, свободно парить в небесах. Я хочу летать, словно птица, освободившись от земных оков. Летать, словно орел, покоряя небесные просторы. Я хочу летать, чувствовать каждый момент поднятия в воздух, каждый поворот, каждое крыло, взмахнувшее в танце с аэродинамикой.
|
||||
|
||||
Я хочу летать над горами, смотреть на вершины, которые кажутся такими далекими с земли. Хочу летать над океанами, наблюдая за волнами, встречая закаты, окрашивающие водную гладь в огонь. Я хочу летать над городами, где жизнь бурлит своим ритмом, а улицы выглядят как мозаика, расстилающаяся под ногами.
|
||||
|
||||
Я хочу летать, ощущать тот подъем, когда ты понимаешь, что земля осталась позади, а ты – свободен, как никогда. Я хочу летать и видеть этот мир с высоты, где все проблемы кажутся такими маленькими и неважными. Хочу летать и чувствовать себя частью этого огромного космического танца, где звезды танцуют свои вечерние вальсы.
|
||||
|
||||
Я хочу летать, несмотря ни на что, преодолевая любые преграды. Хочу летать, потому что в этом чувствую свое настоящее "я". Летать – значит освобождаться от гравитации рутины, подниматься над повседневностью, смотреть на мир с высоты своей мечты.
|
||||
|
||||
Я хочу летать, потому что в этом заключена свобода души. Хочу ощутить, как воздух обволакивает меня, как каждая клетка моего тела ощущает эту свободу. Хочу летать, потому что это моя мечта, которая дает мне силы двигаться вперед, преодолевая все трудности.
|
||||
|
||||
Я хочу летать, потому что в этом заложено желание преодолевать границы. Хочу чувствовать себя свободным, словно ветер, несущим меня к новым приключениям. Я хочу летать и продолжать этот бескрайний полет вперед, ибо в этом полете заключена вся суть моего существования.
|
||||
BIN
antonov_dmitry_lab_7/screens/img.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
antonov_dmitry_lab_7/screens/img_1.png
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
antonov_dmitry_lab_7/screens/img_2.png
Normal file
|
After Width: | Height: | Size: 44 KiB |
BIN
antonov_dmitry_lab_7/screens/img_3.png
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
antonov_dmitry_lab_7/screens/img_4.png
Normal file
|
After Width: | Height: | Size: 46 KiB |
BIN
antonov_dmitry_lab_7/screens/step1.png
Normal file
|
After Width: | Height: | Size: 76 KiB |
BIN
antonov_dmitry_lab_7/screens/step2.png
Normal file
|
After Width: | Height: | Size: 81 KiB |
BIN
antonov_dmitry_lab_7/screens/step3.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
antonov_dmitry_lab_7/screens/step4.png
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
antonov_dmitry_lab_7/screens/step5.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
16
antonov_dmitry_lab_7/small.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
I want to fly. I want to soar high up in the sky like a bird. To glide through the clouds, feeling the wind beneath my wings. I want to fly.
|
||||
|
||||
I imagine what it would be like, to be able to spread my arms and take off into the endless blue. To swoop and dive and twirl through the air unencumbered by gravity's pull. I want to fly.
|
||||
|
||||
I watch the birds outside my window, floating effortlessly on the breeze. How I wish I could join them up there. To break free of the bounds of this earth and taste the freedom of flight. I want to fly.
|
||||
|
||||
Over and over I dream of flying. I flap my arms but remain stuck to the ground. Still I gaze up hopefully at the sky. One day, I tell myself. One day I will fly. I want to fly.
|
||||
I want to fly. I want to spread my wings and soar into the open sky. To glide far above the earth unbounded by gravity. I want to fly.
|
||||
|
||||
Ever since I was a child I've dreamed of flying. I would flap my arms trying in vain to take off. I envied the birds and their gift of flight. On windy days, I'd run with the breeze, hoping it would lift me up. But my feet stayed planted. Still my desire to fly remained.
|
||||
|
||||
As I grew up, my dreams of flying never left. I'd gaze out plane windows high above the earth and ache to sprout wings. I'd watch birds for hours wishing I could join their effortless flight. At night I'd have vivid dreams of gliding among the clouds. Then I'd awake still earthbound and sigh. My longing to fly unchanged.
|
||||
|
||||
I want to know what it feels like to swoop and dive through the air. To loop and twirl on the wind currents with ease. To soar untethered by gravity's grip. But I'm trapped on the ground, wings useless and weighted. Still I stare upwards hoping. Still I imagine what could be. Still I want to fly.
|
||||
|
||||
They say it's impossible, that humans aren't meant for flight. But I refuse to let go of this dream. I gaze up, envying the way the birds own the sky while my feet stay planted. I flap and I hope. And still I want to fly.
|
||||
109
arutunyan_dmitry_lab_2/README.md
Normal file
@@ -0,0 +1,109 @@
|
||||
|
||||
## Лабораторная работа 2. Вариант 4.
|
||||
### Задание
|
||||
Выполнить ранжирование признаков. Отобразить получившиеся значения\оценки каждого признака каждым методом\моделью и среднюю оценку. Провести анализ получившихся результатов. Какие четыре признака оказались самыми важными по среднему значению?
|
||||
|
||||
Модели:
|
||||
|
||||
- Гребневая регрессия `Ridge`,
|
||||
- Случайное Лассо `RandomizedLasso`,
|
||||
- Рекурсивное сокращение признаков `Recursive Feature Elimination – RFE`
|
||||
|
||||
> **Warning**
|
||||
>
|
||||
> Модель "случайное лассо" `RandomizedLasso` была признана устаревшей в бибилотеке `scikit` версии 0.20. Её безболезненной заменой назван регрессор случайного леса `RandomForestRegressor`. Он будет использоваться в данной лабораторной вместо устаревшей функции.
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
|
||||
### Используемые технологии
|
||||
- Библиотека `numpy`, используемая для обработки массивов данных и вычислений
|
||||
- Библиотека `sklearn` - большой набор функционала для анализа данных. Из неё были использованы инструменты:
|
||||
- `LinearRegression` - инструмент работы с моделью "Линейная регрессия"
|
||||
- `Ridge` - инструмент работы с моделью "Гребневая регрессия"
|
||||
- `RFE` - инструмент оценки важности признаков "Рекурсивное сокращение признаков"
|
||||
- `RandomForestRegressor` - инструмент работы с моделью "Регрессор случайного леса"
|
||||
- `MinMaxScaler` - инструмент масштабирования значений в заданный диапазон
|
||||
|
||||
### Описание работы
|
||||
Программа генерирует данные для обучения моделей. Сначала генерируются признаки в количестве 14-ти штук, важность которых модели предстоит выявить.
|
||||
```python
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
```
|
||||
Затем задаётся функция зависимости выходных параметров от входных, представляющая собой регриссионную проблему Фридмана.
|
||||
```python
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
|
||||
```
|
||||
После чего, задаются зависимости переменных `x11 x12 x13 x14` от переменных `x1 x2 x3 x4`.
|
||||
```python
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
```
|
||||
Первая группа переменных должна быть обозначена моделями как наименее значимая.
|
||||
|
||||
#### Работа с моделями
|
||||
Первая модель `Ridge` - модель гребневой регрессии.
|
||||
```python
|
||||
ridge = Ridge(alpha=1)
|
||||
ridge.fit(X, Y)
|
||||
```
|
||||
Данная модель не предоставляет прямого способа оценки важности признаков, так как она использует линейную комбинацию всех признаков с коэффициентами, которые оптимизируются во время обучения модели. Можно лишь оценить относительную важность признаков на основе абсолютных значений коэффициентов, которые были найдены в процессе обучения. Получить данные коэфициенты от модели можно с помощью метода `.coef_`.
|
||||
|
||||
Вторая модель `RandomForestRegressor` - алгоритм ансамбля случайных деревьев решений. Он строит множество деревьев, каждое из которых обучается на случайной подвыборке данных и случайном подмножестве признаков.
|
||||
```python
|
||||
rfr = RandomForestRegressor()
|
||||
rfr.fit(X, Y)
|
||||
```
|
||||
Важность признаков в Random Forest Regressor определяется на основе того, как сильно каждый признак влияет на уменьшение неопределенности в предсказаниях модели. Для получения оценок важности в данной модели используется функция `.feature_importances_`.
|
||||
|
||||
Третий инструмент `Recursive Feature Elimination – RFE` - алгоритм отбора признаков, который используется для оценки и ранжирования признаков по их важности.
|
||||
```python
|
||||
lr = LinearRegression()
|
||||
lr.fit(X, Y)
|
||||
rfe = RFE(lr)
|
||||
rfe.fit(X,Y)
|
||||
```
|
||||
Оценка важности признаков в RFE происходит путем анализа, как изменяется производительность модели при удалении каждого признака. В зависимости от этого, каждый признак получает ранг. Массив рангов признаков извлекается функцией `.ranking_`
|
||||
|
||||
#### Нормализация оценок
|
||||
Модели `Ridge` и `RandomForestRegressor` рабботают по одинаковой логике вывода значимости оценок. В данных моделях оценки значимости параметров - веса значимости, которые они представляют для модели. Очевидно, что чем выше данный показатеь, тем более значимым является признак. Для нормализации оценок необходимо взять их по модулю и привести их к диапазону от 0 до 1.
|
||||
```python
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
```
|
||||
Инструмент `Recursive Feature Elimination – RFE` работает иначе. Класс выдает не веса при коэффициентах регрессии, а именно ранг для каждого признака. Так наиболее важные признаки будут иметь ранг – "1", а менее важные признаки ранг больше "1". Коэффициенты остальных моделей тем важнее, чем больше их абсолютное значение. Для нормализации таких рангов от 0 до 1, необходимо просто взять обратное число от величины ранга признака.
|
||||
```python
|
||||
new_ranks = [float(1 / x) for x in ranks]
|
||||
new_ranks = map(lambda x: round(x, 2), new_ranks)
|
||||
```
|
||||
|
||||
#### Оценка работы моделей
|
||||
Для оценки результатов выведем выявленные оценки значимости признаков каждой модели, а также средние оценки значимости признаков всех моделей.
|
||||
```
|
||||
Ridge
|
||||
[('x4', 1.0), ('x1', 0.98), ('x2', 0.8), ('x14', 0.61), ('x5', 0.54), ('x12', 0.39), ('x3', 0.25), ('x13', 0.19), ('x11', 0.16), ('x6', 0.08), ('x8', 0.07), ('x7', 0.02), ('x10', 0.02), ('x9', 0.0)]
|
||||
Recursive Feature Elimination
|
||||
[('x1', 1.0), ('x2', 1.0), ('x3', 1.0), ('x4', 1.0), ('x5', 1.0), ('x11', 1.0), ('x13', 1.0), ('x12', 0.5), ('x14', 0.33), ('x8', 0.25), ('x6', 0.2), ('x10', 0.17), ('x7', 0.14), ('x9', 0.12)]
|
||||
Random Forest Regression
|
||||
[('x14', 1.0), ('x2', 0.84), ('x4', 0.77), ('x1', 0.74), ('x11', 0.36), ('x12', 0.35), ('x5', 0.28), ('x3', 0.12), ('x13', 0.12), ('x6', 0.01), ('x7', 0.01), ('x8', 0.01), ('x9', 0.01), ('x10', 0.0)]
|
||||
Mean
|
||||
[('x4', 0.92), ('x1', 0.91), ('x2', 0.88), ('x14', 0.65), ('x5', 0.61), ('x11', 0.51), ('x3', 0.46), ('x13', 0.44), ('x12', 0.41), ('x8', 0.11), ('x6', 0.1), ('x7', 0.06), ('x10', 0.06), ('x9', 0.04)]
|
||||
|
||||
```
|
||||
- Модель `Ridge` верно выявила значимость признаков `x1, x2, x4, х5`, но потеряла значимый признак `x3` и ошибочно включила признак `x14` в значимые.
|
||||
- Модель `RandomForestRegressor` также верно выявила значимость признаков `x1, x2, x4`, но потеряла значимые признаки `x3, х5` и ошибочно включила признак `x14` в значимые.
|
||||
- Инсрумент `Recursive Feature Elimination – RFE` безошибочно выделил все значимые признаки `x1, x2, х3, x4, x5`, но ошибочно отметил признаки `x11, x13` как значимые.
|
||||
- В среднем значимыми признаками были верно выявлены `x1, x2, x4, х5`, но значимый признак `x3` был потерян, а признаки `x11, х14` были признаны ошибочно значимыми.
|
||||
|
||||
|
||||
### Вывод
|
||||
Хужё всех показала себя модель `RandomForestRegressor`, потеряв два значимых признака и добавив один лишний. Модель `Ridge`и инструмент `Recursive Feature Elimination – RFE` допустили по одной ошибке, однако последний не потерял ни одного значимого признака. Значимость в среднем получилась неудовлетворительна и выдала три ошибки, как и первая модель.
|
||||
|
||||
Исходя из этого, можно сделать вывод, что для ранжирования признаков лучше использовать специально созданные для этого инструменты по типу `Recursive Feature Elimination – RFE`, а не использовать коэфициенты признаков регрессионных моделей.
|
||||
69
arutunyan_dmitry_lab_2/main.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from operator import itemgetter
|
||||
import numpy as np
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.feature_selection import RFE
|
||||
from sklearn.linear_model import LinearRegression, Ridge
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14)) # Генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков
|
||||
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1)) # Задаем функцию-выход: регрессионную проблему Фридмана
|
||||
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4)) # Добавляем зависимость признаков
|
||||
|
||||
ridge = Ridge(alpha=1) # Создаём модель гребневой регрессии и обучаем её
|
||||
ridge.fit(X, Y)
|
||||
|
||||
lr = LinearRegression() # Создаём модель линейной регрессии и обучаем её
|
||||
lr.fit(X, Y)
|
||||
rfe = RFE(lr) # На основе линейной модели выполняем рекурсивное сокращение признаков
|
||||
rfe.fit(X,Y)
|
||||
|
||||
rfr = RandomForestRegressor() # Создаём и обучаем регрессор случайного леса (используется вместо устаревшего рандомизированного лассо)
|
||||
rfr.fit(X, Y)
|
||||
|
||||
|
||||
def rank_ridge_rfr_to_dict(ranks, names): # Метод нормализации оценок важности для модели гребневой регрессии и регрессора случайного леса
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
return dict(zip(names, ranks))
|
||||
|
||||
|
||||
def rank_rfe_to_dict(ranks, names): # Метод нормализации оценок важности для модели рекурсивного сокращения признаков
|
||||
new_ranks = [float(1 / x) for x in ranks]
|
||||
new_ranks = map(lambda x: round(x, 2), new_ranks)
|
||||
return dict(zip(names, new_ranks))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
names = ["x%s" % i for i in range(1, 15)]
|
||||
ranks = dict()
|
||||
|
||||
ranks["Ridge"] = rank_ridge_rfr_to_dict(ridge.coef_, names)
|
||||
ranks["Recursive Feature Elimination"] = rank_rfe_to_dict(rfe.ranking_, names)
|
||||
ranks["Random Forest Regression"] = rank_ridge_rfr_to_dict(rfr.feature_importances_, names)
|
||||
|
||||
for key, value in ranks.items(): # Вывод нормализованных оценок важности признаков каждой модели
|
||||
ranks[key] = sorted(value.items(), key=itemgetter(1), reverse=True)
|
||||
for key, value in ranks.items():
|
||||
print(key)
|
||||
print(value)
|
||||
|
||||
mean = {} # Нахождение средних значений оценок важности по 3м моделям
|
||||
for key, value in ranks.items():
|
||||
for item in value:
|
||||
if item[0] not in mean:
|
||||
mean[item[0]] = 0
|
||||
mean[item[0]] += item[1]
|
||||
for key, value in mean.items():
|
||||
res = value / len(ranks)
|
||||
mean[key] = round(res, 2)
|
||||
mean = sorted(mean.items(), key=itemgetter(1), reverse=True)
|
||||
print("Mean")
|
||||
print(mean)
|
||||
BIN
arutunyan_dmitry_lab_3/FullParam.png
Normal file
|
After Width: | Height: | Size: 154 KiB |
BIN
arutunyan_dmitry_lab_3/ImpParam.png
Normal file
|
After Width: | Height: | Size: 178 KiB |
170
arutunyan_dmitry_lab_3/README.md
Normal file
@@ -0,0 +1,170 @@
|
||||
|
||||
## Лабораторная работа 3. Вариант 4.
|
||||
### Задание
|
||||
Выполнить ранжирование признаков и решить с помощью библиотечной реализации дерева решений
|
||||
задачу классификации на 99% данных из курсовой работы. Проверить
|
||||
работу модели на оставшемся проценте, сделать вывод.
|
||||
|
||||
Модель:
|
||||
|
||||
- Дерево решений `DecisionTreeClassifier`.
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
После этого в папке `static` сгенерируются 2 графика, по которым оценивается результат выполнения программы.
|
||||
|
||||
### Используемые технологии
|
||||
- Библиотека `numpy`, используемая для обработки массивов данных и вычислений
|
||||
- Библиотека `pyplot`, используемая для построения графиков.
|
||||
- Библиотека `pandas`, используемая для работы с данными для анализа scv формата.
|
||||
- Библиотека `sklearn` - большой набор функционала для анализа данных. Из неё были использованы инструменты:
|
||||
- `DecisionTreeClassifier` - инструмент работы с моделью "Дерево решений"
|
||||
- `metrics` - набор инструменов для оценки моделей
|
||||
- `MinMaxScaler` - инструмент масштабирования значений в заданный диапазон
|
||||
|
||||
### Описание работы
|
||||
#### Описание набора данных
|
||||
Набор данных - набор для определения возможности наличия ССЗ заболеваний у челоека
|
||||
|
||||
Названия столбцов набора данных и их описание:
|
||||
|
||||
* HeartDisease - Имеет ли человек ССЗ (No / Yes),
|
||||
* BMI - Индекс массы тела человека (float),
|
||||
* Smoking - Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (No / Yes),
|
||||
* AlcoholDrinking - Сильно ли человек употребляет алкоголь (No / Yes),
|
||||
* Stroke - Был ли у человека инсульт (No / Yes),
|
||||
* PhysicalHealth - Сколько дней за последний месяц человек чувствовал себя плохо (0-30),
|
||||
* MentalHealth - Сколько дней за последний месяц человек чувствовал себя удручённо (0-30),
|
||||
* DiffWalking - Ииспытывает ли человек трудности при ходьбе (No / Yes),
|
||||
* Sex - Пол (female, male),
|
||||
* AgeCategory - Возрастная категория (18-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64, 65-69, 70-74, 75-79, 80 or older),
|
||||
* Race - Национальная принадлежность человека (White, Black, Hispanic, American Indian/Alaskan Native, Asian, Other),
|
||||
* Diabetic - Был ли у человека диабет (No / Yes),
|
||||
* PhysicalActivity - Занимался ли человек спротом за последний месяц (No / Yes),
|
||||
* GenHealth - Общее самочувствие человека (Excellent, Very good, Good, Fair, Poor),
|
||||
* SleepTime - Сколько человек в среднем спит за 24 часа (0-24),
|
||||
* Asthma - Была ли у человека астма (No / Yes),
|
||||
* KidneyDisease - Было ли у человека заболевание почек (No / Yes),
|
||||
* SkinCancer - Был ли у человека рак кожи (No / Yes).
|
||||
|
||||
Ссылка на страницу набора на kuggle: [Indicators of Heart Disease](https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease/data)
|
||||
|
||||
#### Оцифровка и нормализация данных
|
||||
Для нормальной работы с данными, необходимо исключить из них все нечисловые значения. После этого, представить все строковые значения параметров как числовые и очистить датасет от "мусора". Для удаления нечисловых значений воспользуемся функцией `.dropna()`. Мы исключаем строки с нечисловыми значениями, поскольку данные предварительно были очищены (указано в описании датасета) и строк данных достаточно с избытком для обучение модели: `400.000`.
|
||||
|
||||
После этого, переведём все строковые значения данных в числовые методами прямой оцифровки, разделения на группы, ранжирования.
|
||||
|
||||
Процесс оцифровки данных столбцов со строковыми значениями:
|
||||
|
||||
- Имеет ли человек ССЗ (0 / 1)
|
||||
- Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (0 / 1)
|
||||
- Сильно ли человек употребляет алкоголь (0 / 1)
|
||||
- Был ли у человека инсульт (0 / 1)
|
||||
- Ииспытывает ли человек трудности при ходьбе (0 / 1)
|
||||
- Пол (Ж - 0 / М - 1)
|
||||
- Возрастная категория (средний возраст каждого диапазона)
|
||||
- Национальная принадлежность человека
|
||||
- White - Европиойды - 0
|
||||
- Black - Негройды - 1
|
||||
- Hispanic - Испанцы - 2
|
||||
- American Indian/Alaskan Native - Индусы - 3
|
||||
- Asian - Азиаты - 4
|
||||
- Other - Другие - 5
|
||||
- Был ли у человека диабет (0 / 1)
|
||||
- Занимался ли человек спротом за последний месяц (0 / 1)
|
||||
- Общее самочувствие человека
|
||||
- Excellent - Отлично - 4
|
||||
- Very good - Очень хорошо - 3
|
||||
- Good - Хорошо - 2
|
||||
- Fair - Нормально - 1
|
||||
- "Poor" / "Other..." - Плохое или другое - 0
|
||||
- Была ли у человека астма (0 / 1)
|
||||
- Было ли у человека заболевание почек (0 /1)
|
||||
- Был ли у человека рак кожи (0 / 1)
|
||||
|
||||
После оцифровки значений необходимо избавиться от строк с возможными остаточнымии данными ("мусором"). Для этого переведём автоматически все значения датасета в числовые функцией `to_numeric` и непереводимые отметим как `NaN` (параметр `errors='coerce'`). После снова сотрём строки содержащие нечисловые значения методом `.dropna()` и сохраним нормализованный датасет в новый csv файл:
|
||||
```python
|
||||
df = df.applymap(pd.to_numeric, errors='coerce').dropna()
|
||||
df.to_csv(fileout, index=False)
|
||||
```
|
||||
|
||||
#### Выявление значимых параметров
|
||||
В выбранном датасете параметром предсказания `y` выступает столбец данных `HeartDisease`. Остальные столбцы считаются параметрами для решения задачи предсказания `x`, которые необходимо проранжировать по важности. Чтобы разделить выборку данных на обучаемую и тестовую, воспользуемся функцией `.iloc`.
|
||||
```python
|
||||
x_train = df[["BMI", "Smoking", "AlcoholDrinking", "Stroke", "PhysicalHealth",
|
||||
"MentalHealth", "DiffWalking", "Sex", "AgeCategory", "Race", "Diabetic",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", "Asthma", "KidneyDisease", "SkinCancer"]].iloc[
|
||||
0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df[["BMI", "Smoking", "AlcoholDrinking", "Stroke", "PhysicalHealth",
|
||||
"MentalHealth", "DiffWalking", "Sex", "AgeCategory", "Race", "Diabetic",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", "Asthma", "KidneyDisease", "SkinCancer"]].iloc[
|
||||
round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
```
|
||||
Где `round(len(df) / 100 * 99)` - 99ти процентная строка в датасете.
|
||||
|
||||
Теперь, обучим модель на данных `x_train` и `y_train` и получим значимость каждого признака в модели с помощью метода `.feature_importances_`. После отмасштабируем значения важности признаков.
|
||||
```python
|
||||
ranks = np.abs(dtc.feature_importances_)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(len(x_train.columns), 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
ranks = dict(zip(x_train.columns, ranks))
|
||||
ranks = dict(sorted(ranks.items(), key=lambda x: x[1], reverse=True))
|
||||
```
|
||||
|
||||
Чтобы отсеять значимые параметры от незначимых, условимся, что параметры, с оценкой значимости меньше `0.05` будут считаться незначимыми. Выведем список параметров с пометками:
|
||||
```
|
||||
X ranging results:
|
||||
* BMI: 1.0 - Approved
|
||||
* SleepTime: 0.26 - Approved
|
||||
* PhysicalHealth: 0.18 - Approved
|
||||
* GenHealth: 0.16 - Approved
|
||||
* MentalHealth: 0.15 - Approved
|
||||
* AgeCategory: 0.14 - Approved
|
||||
* Race: 0.07 - Approved
|
||||
* PhysicalActivity: 0.06 - Approved
|
||||
* Stroke: 0.04 - Eliminated
|
||||
* Smoking: 0.03 - Eliminated
|
||||
* Asthma: 0.03 - Eliminated
|
||||
* SkinCancer: 0.03 - Eliminated
|
||||
* DiffWalking: 0.02 - Eliminated
|
||||
* Sex: 0.02 - Eliminated
|
||||
* AlcoholDrinking: 0.0 - Eliminated
|
||||
* Diabetic: 0.0 - Eliminated
|
||||
* KidneyDisease: 0.0 - Eliminated
|
||||
```
|
||||
|
||||
Где `Approved` - параметр значим и будет использоваться в предсказании, а `Eliminated` - параметр незначим и будет исключён.
|
||||
|
||||
#### Решение задачи кластеризации на полном наборе признаков
|
||||
Чтобы решить задачу кластеризации моделью `DecisionTreeClassifier`, воспользуемся методом `.predict()`. Оценку качества решения и графики будем строить теми же методами, что в 1й лабораторной работе.
|
||||
|
||||
График решения задачи классификации на полном наборе признаков:
|
||||
|
||||

|
||||
|
||||
#### Решение задачи кластеризации, используя только значимые признаки
|
||||
Согласно предыдущему пункту, значимыми признаками модели были выявлены:
|
||||
* BMI
|
||||
* SleepTime
|
||||
* PhysicalHealth
|
||||
* GenHealth
|
||||
* MentalHealth
|
||||
* AgeCategory
|
||||
* Race
|
||||
* PhysicalActivity
|
||||
Обучим модель только с их использованием, решим задачу классификации и построим график.
|
||||
|
||||
График решения задачи классификации, используя только значимые признаки:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
Согласно среднеквадратической ошибке и коэфициенту детерминации, модель, обученная только на значимых признаков отработала точнее, чем модель, обученная на полном наборе признаков. Это значит, что ранжирование было проведено верно и дало полезный результат. О логической оценке исключённых данных сказать ничего не получится, поскольку действительную зависимость результата от параметров значет только медицинский эксперт.
|
||||
|
||||
Исходя их общих значений точности, обе модели показали хорошие результаты и могут быть применимы к решению задачи классификации на данном наборе данных.
|
||||
221
arutunyan_dmitry_lab_3/main.py
Normal file
@@ -0,0 +1,221 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn import metrics
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
|
||||
'''
|
||||
Названия столбцов набора данных и их описание:
|
||||
|
||||
* HeartDisease - Имеет ли человек ССЗ (No / Yes),
|
||||
* BMI - Индекс массы тела человека (float),
|
||||
* Smoking - Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (No / Yes),
|
||||
* AlcoholDrinking - Сильно ли человек употребляет алкоголь (No / Yes),
|
||||
* Stroke - Был ли у человека инсульт (No / Yes),
|
||||
* PhysicalHealth - Сколько дней за последний месяц человек чувствовал себя плохо (0-30),
|
||||
* MentalHealth - Сколько дней за последний месяц человек чувствовал себя удручённо (0-30),
|
||||
* DiffWalking - Ииспытывает ли человек трудности при ходьбе (No / Yes),
|
||||
* Sex - Пол (female, male),
|
||||
* AgeCategory - Возрастная категория (18-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64, 65-69, 70-74, 75-79, 80 or older),
|
||||
* Race - Национальная принадлежность человека (White, Black, Hispanic, American Indian/Alaskan Native, Asian, Other),
|
||||
* Diabetic - Был ли у человека диабет (No / Yes),
|
||||
* PhysicalActivity - Занимался ли человек спротом за последний месяц (No / Yes),
|
||||
* GenHealth - Общее самочувствие человека (Excellent, Very good, Good, Fair, Poor),
|
||||
* SleepTime - Сколько человек в среднем спит за 24 часа (0-24),
|
||||
* Asthma - Была ли у человека астма (No / Yes),
|
||||
* KidneyDisease - Было ли у человека заболевание почек (No / Yes),
|
||||
* SkinCancer - Был ли у человека рак кожи (No / Yes).
|
||||
'''
|
||||
|
||||
|
||||
# Метод оцифровки и нормализации данных
|
||||
def normalisation(filename):
|
||||
fileout = "P:\\ULSTU\\ИИС\\Datasets\\heart_2020_norm.csv"
|
||||
df = pd.read_csv(filename, sep=',').dropna() # Считываем данные с csv файла и удаляем строки, содержащие NaN
|
||||
|
||||
for index, row in df.iterrows():
|
||||
if index % 10000 == 0:
|
||||
print("normalisation running . . . " + str(round((index / len(df) * 100), 2)) +'%')
|
||||
if "Yes" in row["HeartDisease"]: # Имеет ли человек ССЗ (0 / 1)
|
||||
df.at[index, "HeartDisease"] = 1
|
||||
else:
|
||||
df.at[index, "HeartDisease"] = 0
|
||||
if "Yes" in row["Smoking"]: # Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (0 / 1)
|
||||
df.at[index, "Smoking"] = 1
|
||||
else:
|
||||
df.at[index, "Smoking"] = 0
|
||||
if "Yes" in row["AlcoholDrinking"]: # Сильно ли человек употребляет алкоголь (0 / 1)
|
||||
df.at[index, "AlcoholDrinking"] = 1
|
||||
else:
|
||||
df.at[index, "AlcoholDrinking"] = 0
|
||||
if "Yes" in row["Stroke"]: # Был ли у человека инсульт (0 / 1)
|
||||
df.at[index, "Stroke"] = 1
|
||||
else:
|
||||
df.at[index, "Stroke"] = 0
|
||||
if "Yes" in row["DiffWalking"]: # Ииспытывает ли человек трудности при ходьбе (0 / 1)
|
||||
df.at[index, "DiffWalking"] = 1
|
||||
else:
|
||||
df.at[index, "DiffWalking"] = 0
|
||||
if "Female" in row["Sex"]: # Пол (Ж - 0 / М - 1)
|
||||
df.at[index, "Sex"] = 0
|
||||
else:
|
||||
df.at[index, "Sex"] = 1
|
||||
if "18-24" in row["AgeCategory"]: # Возрастная категория (средний возраст каждого диапазона)
|
||||
df.at[index, "AgeCategory"] = (18 + 24) / 2
|
||||
elif "25-29" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (25 + 29) / 2
|
||||
elif "30-34" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (30 + 34) / 2
|
||||
elif "35-39" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (35 + 39) / 2
|
||||
elif "40-44" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (40 + 44) / 2
|
||||
elif "45-49" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (45 + 49) / 2
|
||||
elif "50-54" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (50 + 54) / 2
|
||||
elif "55-59" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (55 + 59) / 2
|
||||
elif "60-64" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (60 + 64) / 2
|
||||
elif "65-69" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (65 + 69) / 2
|
||||
elif "70-74" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (70 + 74) / 2
|
||||
elif "75-79" in row["AgeCategory"]:
|
||||
df.at[index, "AgeCategory"] = (75 + 79) / 2
|
||||
else:
|
||||
df.at[index, "AgeCategory"] = (25 + 29) / 2
|
||||
if "White" in row["Race"]: # Национальная принадлежность человека
|
||||
df.at[index, "Race"] = 0 # White - Европиойды - 0
|
||||
elif "Black" in row["Race"]: # Black - Негройды - 1
|
||||
df.at[index, "Race"] = 1 # Hispanic - Испанцы - 2
|
||||
elif "Hispanic" in row["Race"]: # American Indian/Alaskan Native - Индусы - 3
|
||||
df.at[index, "Race"] = 2 # Asian - Азиаты - 4
|
||||
elif "American Indian/Alaskan Native" in row["Race"]: # Other - Другие - 5
|
||||
df.at[index, "Race"] = 3
|
||||
elif "Asian" in row["Race"]:
|
||||
df.at[index, "Race"] = 4
|
||||
else:
|
||||
df.at[index, "Race"] = 5
|
||||
if "Yes" in row["Diabetic"]: # Был ли у человека диабет (0 / 1)
|
||||
df.at[index, "Diabetic"] = 1
|
||||
else:
|
||||
df.at[index, "Diabetic"] = 0
|
||||
if "Yes" in row["PhysicalActivity"]: # Занимался ли человек спротом за последний месяц (0 / 1)
|
||||
df.at[index, "PhysicalActivity"] = 1
|
||||
else:
|
||||
df.at[index, "PhysicalActivity"] = 0
|
||||
if "Excellent" in row["GenHealth"]: # Общее самочувствие человека
|
||||
df.at[index, "GenHealth"] = 4 # Excellent - Отлично - 4
|
||||
elif "Very good" in row["GenHealth"]: # Very good - Очень хорошо - 3
|
||||
df.at[index, "GenHealth"] = 3 # Good - Хорошо - 2
|
||||
elif "Good" in row["GenHealth"]: # Fair - Нормально - 1
|
||||
df.at[index, "GenHealth"] = 2 # "Poor" / "Other..." - Плохое или другое - 0
|
||||
elif "Fair" in row["GenHealth"]:
|
||||
df.at[index, "GenHealth"] = 1
|
||||
else:
|
||||
df.at[index, "GenHealth"] = 0
|
||||
if "Yes" in row["Asthma"]: # Была ли у человека астма (0 / 1)
|
||||
df.at[index, "Asthma"] = 1
|
||||
else:
|
||||
df.at[index, "Asthma"] = 0
|
||||
if "Yes" in row["KidneyDisease"]: # Было ли у человека заболевание почек (0 /1)
|
||||
df.at[index, "KidneyDisease"] = 1
|
||||
else:
|
||||
df.at[index, "KidneyDisease"] = 0
|
||||
if "Yes" in row["SkinCancer"]: # Был ли у человека рак кожи (0 / 1)
|
||||
df.at[index, "SkinCancer"] = 1
|
||||
else:
|
||||
df.at[index, "SkinCancer"] = 0
|
||||
|
||||
df = df.applymap(pd.to_numeric, errors='coerce').dropna() # Гарантированно убираем все нечисловые значения из датасета
|
||||
df.to_csv(fileout, index=False) # Сохраняем нормализованный датасет для дальнейшей работы
|
||||
return fileout
|
||||
|
||||
|
||||
# Метод ранжирования параметров по степени важности
|
||||
def param_range(filename, elim_kp):
|
||||
df = pd.read_csv(filename, sep=',') # Считываем нормализованные данные и разделяем их на выборки
|
||||
x_train = df[["BMI", "Smoking", "AlcoholDrinking", "Stroke", "PhysicalHealth",
|
||||
"MentalHealth", "DiffWalking", "Sex", "AgeCategory", "Race", "Diabetic",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", "Asthma", "KidneyDisease", "SkinCancer"]].iloc[
|
||||
0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df[["BMI", "Smoking", "AlcoholDrinking", "Stroke", "PhysicalHealth",
|
||||
"MentalHealth", "DiffWalking", "Sex", "AgeCategory", "Race", "Diabetic",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", "Asthma", "KidneyDisease", "SkinCancer"]].iloc[
|
||||
round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
|
||||
dtc = DecisionTreeClassifier(random_state=241) # Создаём модель дерева решений
|
||||
dtc.fit(x_train.values, y_train.values) # Обучаем модель на данных
|
||||
y_predict = dtc.predict(x_test.values) # Решаем задачу классификации на полном наборе признаков
|
||||
err = pred_errors(y_predict, y_test.values) # Рассчитываем ошибки предсказания
|
||||
make_plots(y_test.values, y_predict, err[0], err[1], "Полный набор данных") # Строим графики
|
||||
|
||||
ranks = np.abs(dtc.feature_importances_) # Получаем значимость каждого признака в модели
|
||||
minmax = MinMaxScaler() # Шкалируем и нормализуем значимость
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(len(x_train.columns), 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
ranks = dict(zip(x_train.columns, ranks))
|
||||
ranks = dict(sorted(ranks.items(), key=lambda x: x[1], reverse=True)) # Сортируем оценки по максимуму и записываем в словарь
|
||||
|
||||
print("X ranging results: \n")
|
||||
del_keys = [] # Исключаем параметры, важность которых меньше elim_kp
|
||||
for key, value in ranks.items():
|
||||
if value >= elim_kp:
|
||||
print(" * " + key + ": " + str(value) + " - Approved")
|
||||
else:
|
||||
print(" * " + key + ": " + str(value) + " - Eliminated")
|
||||
del_keys.append(key)
|
||||
|
||||
for key in del_keys:
|
||||
ranks.pop(key)
|
||||
|
||||
return filename, ranks.keys()
|
||||
|
||||
|
||||
# Метод решения задачи классификации, основанный только на значимых параметрах
|
||||
def most_valuable_prediction(params):
|
||||
filename = params[0]
|
||||
val_p = params[1]
|
||||
df = pd.read_csv(filename, sep=',')
|
||||
x_train = df[val_p].iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df[val_p].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
|
||||
dtc = DecisionTreeClassifier(random_state=241)
|
||||
dtc.fit(x_train.values, y_train.values)
|
||||
y_predict = dtc.predict(x_test.values)
|
||||
err = pred_errors(y_predict, y_test.values)
|
||||
make_plots(y_test.values, y_predict, err[0], err[1], "Только важные параметры")
|
||||
|
||||
|
||||
# Метод рассчёта ошибок
|
||||
def pred_errors(y_predict, y_test):
|
||||
mid_square = np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3) # Рассчёт среднеквадратичной ошибки модели
|
||||
det_kp = np.round(metrics.accuracy_score (y_test, y_predict), 2) # Рассчёт коэфициента детерминации модели
|
||||
return mid_square, det_kp
|
||||
|
||||
|
||||
# Метод отрисовки графиков
|
||||
def make_plots(y_test, y_predict, mid_sqrt, det_kp, title):
|
||||
plt.plot(y_test, c="red", label="\"y\" исходная") # Создание графика исходной функции
|
||||
plt.plot(y_predict, c="green", label="\"y\" предсказанная \n"
|
||||
"Ср^2 = " + str(mid_sqrt) + "\n"
|
||||
"Кд = " + str(det_kp)) # Создание графика предсказанной функции
|
||||
plt.legend(loc='lower left')
|
||||
plt.title(title)
|
||||
plt.savefig('static/' + title + '.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Работа системы в комплексе
|
||||
# Здесь elim_kp - значение пороговой значимости параметра (выбран эмпирически)
|
||||
most_valuable_prediction(param_range(normalisation("P:\\ULSTU\\ИИС\\Datasets\\heart_2020_cleaned.csv"), 0.05))
|
||||
|
||||
|
||||
131
arutunyan_dmitry_lab_4/README.md
Normal file
@@ -0,0 +1,131 @@
|
||||
|
||||
## Лабораторная работа 4. Вариант 4.
|
||||
### Задание
|
||||
Использовать метод кластеризации по варианту для данных из курсовой работы. Самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо он подходит для
|
||||
решения сформулированной задачи.
|
||||
|
||||
Алгоритм кластеризации:
|
||||
|
||||
- Пространственная кластеризация данных с шумом на основе плотности `DBSCAN`.
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
После этого в папке `static` сгенерируются 3 графика, по которым оценивается результат выполнения программы.
|
||||
|
||||
### Используемые технологии
|
||||
- Библиотека `numpy`, используемая для обработки массивов данных и вычислений
|
||||
- Библиотека `pyplot`, используемая для построения графиков.
|
||||
- Библиотека `pandas`, используемая для работы с данными для анализа scv формата.
|
||||
- Библиотека `sklearn` - большой набор функционала для анализа данных. Из неё были использованы инструменты:
|
||||
- `DBSCAN` - инструмент работы с моделью "Пространственная кластеризация данных с шумом на основе плотности"
|
||||
- `metrics` - набор инструменов для оценки моделей
|
||||
- `LinearRegression` - инструмент работы с моделью "Линейная регрессия"
|
||||
|
||||
`DBSCAN` - это алгоритм кластеризации, который используется для кластеризации данных на основе плотности, что позволяет обнаруживать кластеры произвольной формы и обнаруживать выбросы (шум). `DBSCAN` может быть полезным при предварительной обработке данных перед задачей предсказания:
|
||||
- Удаление выбросов (шума): `DBSCAN` может помочь в идентификации и удалении выбросов из данных.
|
||||
- Генерация новых признаков: `DBSCAN` может быть использован для генерации новых признаков на основе кластеров.
|
||||
|
||||
### Описание работы
|
||||
#### Описание набора данных
|
||||
Набор данных - набор для определения возможности наличия ССЗ заболеваний у челоека
|
||||
|
||||
Названия столбцов набора данных и их описание:
|
||||
|
||||
* HeartDisease - Имеет ли человек ССЗ (No / Yes),
|
||||
* BMI - Индекс массы тела человека (float),
|
||||
* Smoking - Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (No / Yes),
|
||||
* AlcoholDrinking - Сильно ли человек употребляет алкоголь (No / Yes),
|
||||
* Stroke - Был ли у человека инсульт (No / Yes),
|
||||
* PhysicalHealth - Сколько дней за последний месяц человек чувствовал себя плохо (0-30),
|
||||
* MentalHealth - Сколько дней за последний месяц человек чувствовал себя удручённо (0-30),
|
||||
* DiffWalking - Ииспытывает ли человек трудности при ходьбе (No / Yes),
|
||||
* Sex - Пол (female, male),
|
||||
* AgeCategory - Возрастная категория (18-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64, 65-69, 70-74, 75-79, 80 or older),
|
||||
* Race - Национальная принадлежность человека (White, Black, Hispanic, American Indian/Alaskan Native, Asian, Other),
|
||||
* Diabetic - Был ли у человека диабет (No / Yes),
|
||||
* PhysicalActivity - Занимался ли человек спротом за последний месяц (No / Yes),
|
||||
* GenHealth - Общее самочувствие человека (Excellent, Very good, Good, Fair, Poor),
|
||||
* SleepTime - Сколько человек в среднем спит за 24 часа (0-24),
|
||||
* Asthma - Была ли у человека астма (No / Yes),
|
||||
* KidneyDisease - Было ли у человека заболевание почек (No / Yes),
|
||||
* SkinCancer - Был ли у человека рак кожи (No / Yes).
|
||||
|
||||
Ссылка на страницу набора на kuggle: [Indicators of Heart Disease](https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease/data)
|
||||
|
||||
#### Формулировка задачи
|
||||
Согласно прописанным в литературе варантам использования, `DBSCAN` может помочь в идентификации и удалении выбросов из данных, а также может быть использован для генерации новых признаков на основе кластеров. Исходя из этого сформулируем задачу:
|
||||
> "В наборе данных с помощью `DBSCAN` определить и исключить строки содержащие шум, а также сгенерировать новый признак для данных на сонове кластеров. Проверить результат через решение задачи предсказания моделью линейной регрессии на исходных и модифицированных данных"
|
||||
|
||||
#### Использование алгоритма `DBSCAN`
|
||||
Чтобы эффективно использовать алгоритм `DBSCAN` необходимо правильно определить два параметра: `eps` - радиус окрестности вокруг каждой точки и `min_samples` - минимальное количество точек, которые должны находиться в окрестности, чтобы рассматривать ее как ядро кластера.
|
||||
|
||||
Начнём с получения датасета из csv файла и признаков кластеризации:
|
||||
```python
|
||||
df = pd.read_csv(filein, sep=',').iloc[0:10000]
|
||||
x = df.drop("HeartDisease", axis=1)
|
||||
```
|
||||
> **Warning**
|
||||
>
|
||||
> Алгоритм `DBSCAN` - очень жадная по памяти программа. В худшем случае алгоритм может занимать Q(N^2) оперативной памяти устройства, поэтому исследование получится провести лишь на частичной выборке в 10000 строк данных.
|
||||
|
||||
Для нахождения оптимального значения параметра `eps` воспользуемся методом рассчёта средней плотности данных. Для этого необходимо найти суммы максимальных и минимальных значений каждого признака и взять среднее арифметическое этих двух значений:
|
||||
|
||||
```python
|
||||
eps_opt = (x.max().values.mean() + x.min().values.mean()) / 2
|
||||
```
|
||||
Оптимальное значение параметра `min_samples` будем искать эмпирически. Условимся, что нам будет достаточно разделить высе данные на 6 кластеров (пусть это будут степени риска возникновения ССЗ), но нам нельзя терять в качестве выбросов более 10% данных. Тогда мы будем варьировать параметр `min_samples` от 1 до кол-ва всех данных и закончим эксперимент при выполнении одного из указанных условий:
|
||||
|
||||
```python
|
||||
developed_data = []
|
||||
for i in range(len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
dbscan = DBSCAN(eps=eps_opt, min_samples=i)
|
||||
clusters = dbscan.fit_predict(x.values)
|
||||
if len(set(clusters)) <= 7:
|
||||
developed_data = clusters
|
||||
break
|
||||
if list(clusters).count(-1) / len(clusters) >= 0.1:
|
||||
developed_data = clusters
|
||||
break
|
||||
```
|
||||
|
||||
Таким образом в массиве `developed_data` мы получим значение кластеров для каждй строки датасета. Добавим её как дополнительный признак.
|
||||
|
||||
График кластеров для значений датасета:
|
||||
|
||||

|
||||
|
||||
#### Решение задачи предсказания
|
||||
Создадим два обучающих модуля. В 1м удалим все строки с кластером `-1`, что указывает на то, что они шум и воспользуемся дополнительным признаком `DBSCAN`:
|
||||
```python
|
||||
df_mod = df.loc[df["DBSCAN"] != -1]
|
||||
x_train_mod = df_mod.drop("HeartDisease", axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train_mod = df_mod["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test_mod = df_mod.drop("HeartDisease", axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test_mod = df_mod["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
```
|
||||
Во 2м модуле для разделения на выборки оставим исходные данные:
|
||||
```python
|
||||
x_train = df.drop(["HeartDisease", "DBSCAN"], axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df.drop(["HeartDisease", "DBSCAN"], axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
```
|
||||
Создаим две модели регрессии и на каждой решим задачу предсказания. Вычислим ошибки и построим графики.
|
||||
|
||||
График решения задачи предсказания на модифицированных данных:
|
||||
|
||||

|
||||
|
||||
График решения задачи предсказания на исходных данных:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
Согласно графиком, модель, обученная на исходных данных показала результат лучше, чем модель, обученная на модифицированных данных. Получается, что на данном наборе, используя алгоритм `DBSCAN`, мы не только невероятно увеличиваем затратность памяти на обучение модели, но и отрицательно влияем на результат её работы. Это означает, что использование алгоритма на таком наборе данных абсолютно нецелесообразно.
|
||||
|
||||
Связанно это может быть с большим количеством бинарных признаков в данных. В таких случаях задачи кластеризации решаются сравнительно хуже.
|
||||
BIN
arutunyan_dmitry_lab_4/dbscan.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
96
arutunyan_dmitry_lab_4/main.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn import metrics
|
||||
from sklearn.cluster import DBSCAN
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
filein = "P:\\ULSTU\\ИИС\\Datasets\\heart_2020_norm.csv"
|
||||
fileout = "P:\\ULSTU\\ИИС\\Datasets\\heart_2020_classified.csv"
|
||||
|
||||
|
||||
# Метод устранения шумов и кластеризации данных алгоритмом DBSCAN
|
||||
def dbscan():
|
||||
df = pd.read_csv(filein, sep=',').iloc[0:10000] # Считывание датасета
|
||||
x = df.drop("HeartDisease", axis=1) # Определение кластеризуемых параметров
|
||||
|
||||
eps_opt = (x.max().values.mean() + x.min().values.mean()) / 2 # Рассчёт опционального радиуса окрестности методом средней плотности
|
||||
|
||||
developed_data = [] # Подбор значения минимального количества точек в окрестности
|
||||
for i in range(len(x)): # - Начинаем с одной точки
|
||||
if i == 0:
|
||||
continue # - Увеличиваем значение кол-ва точек на 1
|
||||
dbscan = DBSCAN(eps=eps_opt, min_samples=i) # - Обучаем модель и получаем массив кластеров
|
||||
clusters = dbscan.fit_predict(x.values)
|
||||
if len(set(clusters)) <= 7: # - Прекращаем увеличивать значение точек, если кол-во кластеров уменьшилось до требуемого
|
||||
developed_data = clusters
|
||||
break
|
||||
if list(clusters).count(-1) / len(clusters) >= 0.1: # - Или если "шум" превышает 10% от данных
|
||||
developed_data = clusters
|
||||
break
|
||||
|
||||
make_plot(x, developed_data)
|
||||
df["DBSCAN"] = developed_data
|
||||
df.to_csv(fileout, index=False) # Сохраняем полученные кластеры как доп. столбец датасета
|
||||
|
||||
|
||||
# Метод оценки эффективности кластеризации DBSCAN
|
||||
def linear_reg(): # Создаём две выборки данных
|
||||
df = pd.read_csv(fileout, sep=',') # В 1й избавляемся от "шумов" и используем столбец кластеров как признак
|
||||
df_mod = df.loc[df["DBSCAN"] != -1]
|
||||
x_train_mod = df_mod.drop("HeartDisease", axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train_mod = df_mod["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test_mod = df_mod.drop("HeartDisease", axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test_mod = df_mod["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
# Во 2й оставляем обычные данные
|
||||
x_train = df.drop(["HeartDisease", "DBSCAN"], axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df.drop(["HeartDisease", "DBSCAN"], axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
|
||||
lr_mod = LinearRegression() # Обучаем модель без "шума" и с признаком кластеров
|
||||
lr_mod.fit(x_train_mod.values, y_train_mod.values)
|
||||
y_mod_pred = lr_mod.predict(x_test_mod.values)
|
||||
err = pred_errors(y_mod_pred, y_test_mod.values)
|
||||
make_plots(y_test_mod.values, y_mod_pred, err[0], err[1], "Регрессия с кластеризацией dbscan")
|
||||
|
||||
lr = LinearRegression() # Обучаем модель на исходных данных
|
||||
lr.fit(x_train.values, y_train.values)
|
||||
y_pred = lr.predict(x_test.values)
|
||||
err = pred_errors(y_pred, y_test.values)
|
||||
make_plots(y_test.values, y_pred, err[0], err[1], "Чистая линейная регрессия")
|
||||
|
||||
|
||||
# Метод рассчёта ошибок
|
||||
def pred_errors(y_predict, y_test):
|
||||
mid_square = np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3) # Рассчёт среднеквадратичной ошибки модели
|
||||
det_kp = np.round(metrics.r2_score (y_test, y_predict), 2) # Рассчёт коэфициента детерминации модели
|
||||
return mid_square, det_kp
|
||||
|
||||
|
||||
# Метод отрисовки графиков
|
||||
def make_plots(y_test, y_predict, mid_sqrt, det_kp, title):
|
||||
plt.plot(y_test, c="red", label="\"y\" исходная") # Создание графика исходной функции
|
||||
plt.plot(y_predict, c="green", label="\"y\" предсказанная \n"
|
||||
"Ср^2 = " + str(mid_sqrt) + "\n"
|
||||
"Кд = " + str(det_kp)) # Создание графика предсказанной функции
|
||||
plt.legend(loc='lower left')
|
||||
plt.title(title)
|
||||
plt.savefig('static/' + title + '.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
# Метод построения графика кластеризации
|
||||
def make_plot(x, c):
|
||||
plt.scatter(x.values[:, 0], x.values[:, 13], c=c, cmap='viridis')
|
||||
plt.xlabel('BMI')
|
||||
plt.ylabel('SleepTime')
|
||||
plt.colorbar()
|
||||
plt.title('DBSCAN Clustering')
|
||||
plt.savefig('static/dbscan.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dbscan()
|
||||
linear_reg()
|
||||
BIN
arutunyan_dmitry_lab_4/reg.png
Normal file
|
After Width: | Height: | Size: 50 KiB |
BIN
arutunyan_dmitry_lab_4/regdbscan.png
Normal file
|
After Width: | Height: | Size: 46 KiB |
94
arutunyan_dmitry_lab_5/README.md
Normal file
@@ -0,0 +1,94 @@
|
||||
|
||||
## Лабораторная работа 5. Вариант 4.
|
||||
### Задание
|
||||
Использовать регрессию по варианту для данных из курсовой работы. Самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо он подходит для
|
||||
решения сформулированной задачи.
|
||||
|
||||
Модель регрессии:
|
||||
|
||||
- Гребневая регрессия `Ridge`.
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
После этого в папке `static` сгенерируются 2 графика, по которым оценивается результат выполнения программы.
|
||||
|
||||
### Используемые технологии
|
||||
- Библиотека `numpy`, используемая для обработки массивов данных и вычислений
|
||||
- Библиотека `pyplot`, используемая для построения графиков.
|
||||
- Библиотека `pandas`, используемая для работы с данными для анализа scv формата.
|
||||
- Библиотека `sklearn` - большой набор функционала для анализа данных. Из неё были использованы инструменты:
|
||||
- `Ridge` - инструмент работы с моделью "Гребневая регрессия"
|
||||
- `metrics` - набор инструменов для оценки моделей
|
||||
|
||||
`Ridge` - это линейная регрессионная модель с регуляризацией L2, которая может быть использована для решения задачи регрессии.
|
||||
|
||||
### Описание работы
|
||||
#### Описание набора данных
|
||||
Набор данных - набор для определения возможности наличия ССЗ заболеваний у челоека
|
||||
|
||||
Названия столбцов набора данных и их описание:
|
||||
|
||||
* HeartDisease - Имеет ли человек ССЗ (No / Yes),
|
||||
* BMI - Индекс массы тела человека (float),
|
||||
* Smoking - Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (No / Yes),
|
||||
* AlcoholDrinking - Сильно ли человек употребляет алкоголь (No / Yes),
|
||||
* Stroke - Был ли у человека инсульт (No / Yes),
|
||||
* PhysicalHealth - Сколько дней за последний месяц человек чувствовал себя плохо (0-30),
|
||||
* MentalHealth - Сколько дней за последний месяц человек чувствовал себя удручённо (0-30),
|
||||
* DiffWalking - Ииспытывает ли человек трудности при ходьбе (No / Yes),
|
||||
* Sex - Пол (female, male),
|
||||
* AgeCategory - Возрастная категория (18-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64, 65-69, 70-74, 75-79, 80 or older),
|
||||
* Race - Национальная принадлежность человека (White, Black, Hispanic, American Indian/Alaskan Native, Asian, Other),
|
||||
* Diabetic - Был ли у человека диабет (No / Yes),
|
||||
* PhysicalActivity - Занимался ли человек спротом за последний месяц (No / Yes),
|
||||
* GenHealth - Общее самочувствие человека (Excellent, Very good, Good, Fair, Poor),
|
||||
* SleepTime - Сколько человек в среднем спит за 24 часа (0-24),
|
||||
* Asthma - Была ли у человека астма (No / Yes),
|
||||
* KidneyDisease - Было ли у человека заболевание почек (No / Yes),
|
||||
* SkinCancer - Был ли у человека рак кожи (No / Yes).
|
||||
|
||||
Ссылка на страницу набора на kuggle: [Indicators of Heart Disease](https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease/data)
|
||||
|
||||
#### Формулировка задачи
|
||||
Поскольку модель гребневой регрессии используется для решения задачи регресси, то попробуем на ней предсказать поведение параметров при обучении на всех признаках, и на значимых признаках, найденных ранее в лабораторной №3. Сформулируем задачу:
|
||||
> "Решить задачу предсказания с помощью моделей гребневой регрессии, обученных на всех признаках и только на значимых признаках. Сравнить результаты работы моделей"
|
||||
|
||||
#### Решение задачи предсказания
|
||||
|
||||
Создадим два обучающих модуля. В 1й включим все признаки. Разделим даныые на выборки. Пусть обучающая выборка будет 99% данных, а тестовая - 1% соответсвенно:
|
||||
```python
|
||||
x_train = df.drop("HeartDisease", axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df.drop("HeartDisease", axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
```
|
||||
Тогда во 2м модуле используем только признаки, названные значимыми в 3й лабораторной, а именно:
|
||||
* BMI
|
||||
* SleepTime
|
||||
* PhysicalHealth
|
||||
* GenHealth
|
||||
* MentalHealth
|
||||
* AgeCategory
|
||||
* Race
|
||||
* PhysicalActivity
|
||||
|
||||
Обучим две модели гребневой регнессии на данных из разных модулей. Решим задачу предсказания, найдём ошибки и построим графики.
|
||||
|
||||
График решения задачи предсказания моделью гребневой регрессии с использованием всех признаков:
|
||||
|
||||

|
||||
|
||||
График решения задачи предсказания моделью гребневой регрессии с использованием значимых признаков:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
Согласно графиком, среднеквадратическая ошибка обеих моделей достаточна низкая. что свидетельствует достаточно точному соответствию истиных и полученных значений, однако коэффициент детерминации моделей имеет очень низкое значение, что свидетельствует практически полному непониманию модели зависимостей в данных.
|
||||
> **Note**
|
||||
>
|
||||
> Модель `Ridge` имеет коэффициент регуляризации `alpha`, который помогает избавиться модели от переобучения, однако даже при стандартном его значении в единицу, модель показывает очень низкий коэффициент детерминации, поэтому варьирование его значения не принесёт никаких результатов.
|
||||
|
||||
Исходя из полученных результатов можно сделать вывод, что модель гребневой регрессии неприменима к данному набору данных.
|
||||
BIN
arutunyan_dmitry_lab_5/all.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
arutunyan_dmitry_lab_5/imp.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
65
arutunyan_dmitry_lab_5/main.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn import metrics
|
||||
from sklearn.linear_model import Ridge
|
||||
|
||||
filein = "P:\\ULSTU\\ИИС\\Datasets\\heart_2020_norm.csv"
|
||||
|
||||
|
||||
# Метод решения задачи предсказания на всех признаках данных
|
||||
def ridge_all():
|
||||
df = pd.read_csv(filein, sep=',')
|
||||
|
||||
x_train = df.drop("HeartDisease", axis=1).iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df.drop("HeartDisease", axis=1).iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
|
||||
rid = Ridge(alpha=1.0)
|
||||
rid.fit(x_train.values, y_train.values)
|
||||
y_predict = rid.predict(x_test.values)
|
||||
err = pred_errors(y_predict, y_test.values)
|
||||
make_plots(y_test.values, y_predict, err[0], err[1], "Гребневая регрессия (все признаки)")
|
||||
|
||||
|
||||
# Метод решения задачи предсказания на значимых признаках данных
|
||||
def ridge_valuable():
|
||||
df = pd.read_csv(filein, sep=',')
|
||||
|
||||
x_train = df[["BMI", "PhysicalHealth", "MentalHealth", "AgeCategory", "Race",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", ]].iloc[0:round(len(df) / 100 * 99)]
|
||||
y_train = df["HeartDisease"].iloc[0:round(len(df) / 100 * 99)]
|
||||
x_test = df[["BMI", "PhysicalHealth", "MentalHealth", "AgeCategory", "Race",
|
||||
"PhysicalActivity", "GenHealth", "SleepTime", ]].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
y_test = df["HeartDisease"].iloc[round(len(df) / 100 * 99):len(df)]
|
||||
|
||||
rid = Ridge(alpha=1.0)
|
||||
rid.fit(x_train.values, y_train.values)
|
||||
y_predict = rid.predict(x_test.values)
|
||||
err = pred_errors(y_predict, y_test.values)
|
||||
make_plots(y_test.values, y_predict, err[0], err[1], "Гребневая регрессия (значимые признаки)")
|
||||
|
||||
|
||||
# Метод рассчёта ошибок
|
||||
def pred_errors(y_predict, y_test):
|
||||
mid_square = np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3) # Рассчёт среднеквадратичной ошибки модели
|
||||
det_kp = np.round(metrics.r2_score (y_test, y_predict), 2) # Рассчёт коэфициента детерминации модели
|
||||
return mid_square, det_kp
|
||||
|
||||
|
||||
# Метод отрисовки графиков
|
||||
def make_plots(y_test, y_predict, mid_sqrt, det_kp, title):
|
||||
plt.plot(y_test, c="red", label="\"y\" исходная") # Создание графика исходной функции
|
||||
plt.plot(y_predict, c="green", label="\"y\" предсказанная \n"
|
||||
"Ср^2 = " + str(mid_sqrt) + "\n"
|
||||
"Кд = " + str(det_kp)) # Создание графика предсказанной функции
|
||||
plt.legend(loc='lower left')
|
||||
plt.title(title)
|
||||
plt.savefig('static/' + title + '.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ridge_all()
|
||||
ridge_valuable()
|
||||
BIN
arutunyan_dmitry_lab_6/1.png
Normal file
|
After Width: | Height: | Size: 216 KiB |
BIN
arutunyan_dmitry_lab_6/2.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
110
arutunyan_dmitry_lab_6/README.md
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
## Лабораторная работа 6. Вариант 4.
|
||||
### Задание
|
||||
Использовать нейронную сеть `MLPRegressor` для данных из курсовой работы. Самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо он подходит для решения сформулированной задачи.
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
После этого в папке `static` сгенерируются график, по которому оценивается результат выполнения программы.
|
||||
|
||||
### Используемые технологии
|
||||
- Библиотека `numpy`, используемая для обработки массивов данных и вычислений
|
||||
- Библиотека `pyplot`, используемая для построения графиков.
|
||||
- Библиотека `pandas`, используемая для работы с данными для анализа scv формата.
|
||||
- Библиотека `sklearn` - большой набор функционала для анализа данных. Из неё были использованы инструменты:
|
||||
- `train_test_split` - разделитель данных на обучающиую и тестовую выборки
|
||||
- `metrics` - набор инструменов для оценки моделей
|
||||
- `MLPRegressor` - инструмент работы с моделью "Многослойный перцептрон для задачи регрессии"
|
||||
|
||||
`MLPRegressor` - это тип искусственной нейронной сети, состоящей из нескольких слоев нейронов, включая входной слой, скрытые слои и выходной слой.
|
||||
Этот класс позволяет создавать и обучать MLP-модель для предсказания непрерывных числовых значений.
|
||||
|
||||
### Описание работы
|
||||
#### Описание набора данных
|
||||
Набор данных - набор для определения возможности наличия ССЗ заболеваний у челоека
|
||||
|
||||
Названия столбцов набора данных и их описание:
|
||||
|
||||
* HeartDisease - Имеет ли человек ССЗ (No / Yes),
|
||||
* BMI - Индекс массы тела человека (float),
|
||||
* Smoking - Выкурил ли человек хотя бы 5 пачек сигарет за всю жизнь (No / Yes),
|
||||
* AlcoholDrinking - Сильно ли человек употребляет алкоголь (No / Yes),
|
||||
* Stroke - Был ли у человека инсульт (No / Yes),
|
||||
* PhysicalHealth - Сколько дней за последний месяц человек чувствовал себя плохо (0-30),
|
||||
* MentalHealth - Сколько дней за последний месяц человек чувствовал себя удручённо (0-30),
|
||||
* DiffWalking - Ииспытывает ли человек трудности при ходьбе (No / Yes),
|
||||
* Sex - Пол (female, male),
|
||||
* AgeCategory - Возрастная категория (18-24, 25-29, 30-34, 35-39, 40-44, 45-49, 50-54, 55-59, 60-64, 65-69, 70-74, 75-79, 80 or older),
|
||||
* Race - Национальная принадлежность человека (White, Black, Hispanic, American Indian/Alaskan Native, Asian, Other),
|
||||
* Diabetic - Был ли у человека диабет (No / Yes),
|
||||
* PhysicalActivity - Занимался ли человек спротом за последний месяц (No / Yes),
|
||||
* GenHealth - Общее самочувствие человека (Excellent, Very good, Good, Fair, Poor),
|
||||
* SleepTime - Сколько человек в среднем спит за 24 часа (0-24),
|
||||
* Asthma - Была ли у человека астма (No / Yes),
|
||||
* KidneyDisease - Было ли у человека заболевание почек (No / Yes),
|
||||
* SkinCancer - Был ли у человека рак кожи (No / Yes).
|
||||
|
||||
Ссылка на страницу набора на kuggle: [Indicators of Heart Disease](https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease/data)
|
||||
|
||||
#### Формулировка задачи
|
||||
Поскольку модель `MLPRegressor` используется для решения задачи регресси, то попробуем на ней предсказать поведение параметров при обучении на всех признаках, варьируя конфигурации модели. Сформулируем задачу:
|
||||
> "Решить задачу предсказания с помощью нейронной сети, обученной на всех признаках при различных конфигурациях. Сравнить результаты работы моделей"
|
||||
|
||||
#### Решение задачи предсказания
|
||||
Из csv файла выргузим набор данных, выделим параметр для предсказания - (столбец `HeartDisease`), и его признаки - все остальные столбцы. Разделим данные на обучающую и тестовые выборки, при условии, что 99.9% данных - для обучения, а остальные для тестов:
|
||||
```python
|
||||
х, y = [df.drop("HeartDisease", axis=1).values, df["HeartDisease"].values]
|
||||
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.001, random_state=42)
|
||||
```
|
||||
Создадим класс нейронной сети и определим варьируемые конфигурации.
|
||||
|
||||
`hidden_layer_sizes ` - параметр, принимающий на вход количество скрытых слоёв нейронной сети и количество нейронов в каждом слое. Для определения его наилучшего значения необходимо взять минимальное количество слоёв и нейронов в слое и постепенно увеличивать его, до тех пор, пока качество модели не перестанет улучшаться или не будет достаточным.
|
||||
> **Note**
|
||||
>
|
||||
> Экспериментально для нейронной сети `MLPRegressor` было выявленно наилучшее значение равное 100 слоям нейронной сети по 50 нейронов в каждой. Для прелоставления данных процесс оказался очень длительным, поэтому будет указан только наилучший результат.
|
||||
|
||||
`activation` - функция активации. В классе представлена 4мя решениями:
|
||||
- `identity` - функция `f(x) = x`, абсолютно линейная идентичная функция для приведения работы нейронной сети ближе к модели линейной регрессии,
|
||||
- `logistic` - логистическая сигмовидная функция вида `f(x) = 1 / (1 + exp(-x))`,
|
||||
- `tanh` - гиперболическая функция тангенса `f(x) = tanh(x)`,
|
||||
- `relu` - функция выпрямленной линейной единицы измерения `f(x) = max(0, x)`, проверяет больше ли х нуля (используется чаще всего).
|
||||
|
||||
`solver` - метод оптимизации весов. Существует в 3х вариациях:
|
||||
- `Bfgs` - оптимизатор из семейства квазиньютоновских методов,
|
||||
> **Warning**
|
||||
>
|
||||
> Оптимизатор из семейства квазиньютоновских методов показал себя как очень жадный по времени выполнения алгоритм при этом использующий большие коэфициенты весов, что приводило к едиичным, но слишком большим погрешностям на данных. Поэтому в эксперименте варьирования он не принимал участия.
|
||||
|
||||
- `sgd` - метод стозастического градиентного спуска (классика),
|
||||
- `adam` - оптимизированный метод стозастического градиентного спуска Кингмы, Дидерика и Джимми Барнсома.
|
||||
|
||||
```python
|
||||
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)
|
||||
mlp.fit(x_train, y_train)
|
||||
y_predict = mlp.predict(x_test)
|
||||
err = pred_errors(y_predict, y_test)
|
||||
```
|
||||
Проведём эксперимент варьирования конфигураций, посчитаем ошибки предсказания и выберем наилучшую нейронную сеть.
|
||||
|
||||
#### Эксперимент варьирования
|
||||
Рассмотрим различные функции активации.
|
||||
|
||||
Графики решения задачи предсказания на разных функциях активации:
|
||||
|
||||

|
||||
|
||||
Теперь для выбранной функции подберём лучший метод оптимизации весов.
|
||||
|
||||
Грфики решения задачи предсказания на разных методах оптимизации весов:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
Согласно графиком, наилучшие результаты показала нейронаая сеть с функцией активации гиперболического тангенса `tanh` и методом оптимизации весов путём оптимизированного стозастического градиентного спуска Кингмы, Дидерика и Джимми Барнсома `adam`.
|
||||
|
||||
В целом нейронная сеть справилась неудовлетворительно с задачей предсказания, показав хоть и небольшую среднеквадратическую ошибку в 0.25, но очень низкий коэфициент детерминации в 0.23 максимально.
|
||||
|
||||
Это значит, что теоретически модель может предсказать результат по признакам, однако понимания зависимостей результата от последних у неё мало.
|
||||
46
arutunyan_dmitry_lab_6/main.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn import metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.neural_network import MLPRegressor
|
||||
|
||||
filein = "P:\\ULSTU\\ИИС\\Datasets\\heart_2020_norm.csv"
|
||||
|
||||
|
||||
# Метод обучения нейронной сети
|
||||
def reg_neural_net():
|
||||
df = pd.read_csv(filein, sep=',')
|
||||
x, y = [df.drop("HeartDisease", axis=1).values,
|
||||
df["HeartDisease"].values]
|
||||
|
||||
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.001, random_state=42)
|
||||
|
||||
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), activation='tanh', solver='adam', random_state=15000)
|
||||
mlp.fit(x_train, y_train)
|
||||
y_predict = mlp.predict(x_test)
|
||||
err = pred_errors(y_predict, y_test)
|
||||
make_plots(y_test, y_predict, err[0], err[1], "Нейронная сеть")
|
||||
|
||||
|
||||
# Метод рассчёта ошибок
|
||||
def pred_errors(y_predict, y_test):
|
||||
mid_square = np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3) # Рассчёт среднеквадратичной ошибки модели
|
||||
det_kp = np.round(metrics.r2_score(y_test, y_predict), 2) # Рассчёт коэфициента детерминации модели
|
||||
return mid_square, det_kp
|
||||
|
||||
|
||||
# Метод отрисовки графиков
|
||||
def make_plots(y_test, y_predict, mid_sqrt, det_kp, title):
|
||||
plt.plot(y_test, c="red", label="\"y\" исходная") # Создание графика исходной функции
|
||||
plt.plot(y_predict, c="green", label="\"y\" предсказанная \n"
|
||||
"Ср^2 = " + str(mid_sqrt) + "\n"
|
||||
"Кд = " + str(det_kp)) # Создание графика предсказанной функции
|
||||
plt.legend(loc='lower left')
|
||||
plt.title(title)
|
||||
plt.savefig('static/' + title + '.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
reg_neural_net()
|
||||
83
basharin_sevastyan_lab_1/README.md
Normal file
@@ -0,0 +1,83 @@
|
||||
## Лабораторная работа 1. Вариант 4.
|
||||
### Задание
|
||||
Построить графики, отобразить
|
||||
качество моделей, объяснить полученные результаты.
|
||||
|
||||
Данные: `make_circles (noise=0.2, factor=0.5, random_state=rs)`
|
||||
|
||||
Модели:
|
||||
- Линейная регресся
|
||||
- Полиномиальная регрессия (со степенью 4)
|
||||
- Гребневая полиномиальная регресся (со степенью 4, alpha = 1.0)
|
||||
|
||||
### Как запустить
|
||||
Для запуска программы необходимо с помощью командной строки в корневой директории файлов прокета прописать:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
После будет запущена программа и сгенерированы 3 графика.
|
||||
|
||||
### Используемые технологии
|
||||
- `numpy` (псевдоним `np`): NumPy - это библиотека для научных вычислений в Python.
|
||||
- `matplotlib.pyplot` (псевдоним `plt`): Matplotlib - это библиотека для создания статических, анимированных и интерактивных визуализаций в Python. `pyplot` - это модуль Matplotlib, который используется для создания графиков и диаграмм.
|
||||
- `matplotlib.colors.ListedColormap` - этот модуль Matplotlib используется для создания цветных схем цветовых карт, которые могут быть использованы для визуализации данных.
|
||||
- `sklearn` (scikit-learn): Scikit-learn - это библиотека для машинного обучения и анализа данных в Python. Из данной библиотеки были использованы следующие модули:
|
||||
- `model_selection` - Этот модуль scikit-learn предоставляет инструменты для разделения данных на обучающие и тестовые наборы.
|
||||
- `linear_model` - содержит реализации линейных моделей, таких как линейная регрессия, логистическая регрессия и другие.
|
||||
- `pipeline` - позволяет объединить несколько этапов обработки данных и построения моделей в одну конвейерную цепочку.
|
||||
- `PolynomialFeatures` - Этот класс scikit-learn используется для генерации полиномиальных признаков, позволяя моделям учитывать нелинейные зависимости в данных.
|
||||
- `make_circles` - Эта функция scikit-learn создает набор данных, представляющий собой два класса, расположенных в форме двух пересекающихся окружностей. Это удобно для демонстрации работы различных моделей классификации.
|
||||
- `LinearRegression` - линейная регрессия - это алгоритм машинного обучения, используемый для задач бинарной классификации.
|
||||
|
||||
### Описание работы
|
||||
Программа генерирует данные, разделяет данные на тестовые и обучающие для моделей по заданию.
|
||||
```python
|
||||
rs = randrange(50)
|
||||
X, y = make_circles(noise=0.2, factor=0.5, random_state=rs)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rs)
|
||||
```
|
||||
`X_train` и `y_train` используются для обучения, а на данных `X_test` и `y_test` - оценка их качества.
|
||||
|
||||
Поскольку все модели в задании регрессионные, результаты работы будем оценивать через решение задачи предсказания.
|
||||
|
||||
Для оценки будем использовать следующие критерии: среднеквадратическому отклонению и коэфициенту детерминации. Чем ошибка меньше и чем коэфициент детерминации больше, тем лучше.
|
||||
```python
|
||||
np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3) #среднеквадратическое отклонение
|
||||
np.round(metrics.r2_score(y_test, y_predict), 2) #коэфициент детерминации
|
||||
```
|
||||
Оценочные параметры округлены с помощью функции `round` до 3х и 2х знаков после запятой.
|
||||
|
||||
### Линейная регрессия
|
||||
Для создания модели линейной регрессии воспользуемся `LinearRegression`.
|
||||
```python
|
||||
linear_reg = LinearRegression()
|
||||
```
|
||||
Обучим её и предскажем с её помощью `y` на тестовой выборке `x_text`.
|
||||
```python
|
||||
model.fit(X_train, y_train)
|
||||
y_predict = model.predict(X_test)
|
||||
```
|
||||
График для оценки результатов:
|
||||

|
||||
|
||||
#### Полиномиальная регрессия
|
||||
Добавим 3 недостающих члена к линейной модели, возведённых в соответствующие степени 2, 3 и 4.
|
||||
```python
|
||||
poly_reg = make_pipeline(PolynomialFeatures(degree=4), StandardScaler(), LogisticRegression(random_state=rs))
|
||||
```
|
||||
График для оценки результатов:
|
||||

|
||||
|
||||
#### Полиномиальная гребневая регрессия
|
||||
Линейная регрессия является разновидностью полиномиальной регрессии со степенью ведущего члена равной 1.
|
||||
```python
|
||||
ridge_poly_reg = make_pipeline(PolynomialFeatures(degree=4), StandardScaler(), LogisticRegression(penalty='l2', C=1.0, random_state=rs))
|
||||
```
|
||||
График для оценки результатов:
|
||||

|
||||
|
||||
Точность измерений:
|
||||

|
||||
|
||||
### Вывод
|
||||
Наиболее низкое среднеквадратичное отклонение и наиболее высокий коэффициент детерминации показала модель полиномиальной и полиномиальной гребневой регрессии. Это означает, что они являются лучшими моделями для данного набора данных.
|
||||
BIN
basharin_sevastyan_lab_1/linear.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
60
basharin_sevastyan_lab_1/main.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from random import randrange
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.colors import ListedColormap
|
||||
from sklearn import metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
|
||||
from sklearn.datasets import make_circles
|
||||
|
||||
rs = randrange(50)
|
||||
X, y = make_circles(noise=0.2, factor=0.5, random_state=rs) # Сгенерируем данные
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
|
||||
random_state=rs) # Разделим данные на обучающий и тестовый наборы
|
||||
|
||||
# Линейная модель
|
||||
linear_reg = LinearRegression()
|
||||
# Полиномиальная регрессия (со степенью 4)
|
||||
poly_reg = make_pipeline(PolynomialFeatures(degree=4), StandardScaler(), LogisticRegression(random_state=rs))
|
||||
# Гребневая полиномиальная регрессия (со степенью 4 и alpha=1.0)
|
||||
ridge_poly_reg = make_pipeline(PolynomialFeatures(degree=4), StandardScaler(), LogisticRegression(penalty='l2', C=1.0,
|
||||
random_state=rs))
|
||||
|
||||
|
||||
# Обучение моделей
|
||||
def mid_sq_n_det(name, model):
|
||||
model.fit(X_train, y_train)
|
||||
y_predict = model.predict(X_test)
|
||||
print(f'Рассчёт среднеквадратичной ошибки для {name}: '
|
||||
f'{np.round(np.sqrt(metrics.mean_squared_error(y_test, y_predict)),3)}') # Рассчёт среднеквадратичной ошибки модели
|
||||
print(f'Рассчёт коэфициента детерминации для {name}: {np.round(metrics.r2_score(y_test, y_predict), 2)}') # Рассчёт коэфициента детерминации модели
|
||||
return name, model
|
||||
|
||||
|
||||
# Графики
|
||||
models = [mid_sq_n_det("Линейная регрессия", linear_reg),
|
||||
mid_sq_n_det("Полиномиальная регрессия (со степенью 4)", poly_reg),
|
||||
mid_sq_n_det("Гребневая полиномиальная регрессия (со степенью 4, alpha = 1.0)", ridge_poly_reg)]
|
||||
|
||||
cmap_background = ListedColormap(['#FFAAAA', '#AAAAFF'])
|
||||
cmap_points = ListedColormap(['#FF0000', '#0000FF'])
|
||||
|
||||
plt.figure(figsize=(15, 4))
|
||||
for i, (name, model) in enumerate(models):
|
||||
plt.subplot(1, 3, i + 1)
|
||||
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
|
||||
np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
|
||||
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
Z = Z.reshape(xx.shape)
|
||||
plt.contourf(xx, yy, Z, cmap=cmap_background, alpha=0.5)
|
||||
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cmap_points, marker='o', label='Тестовые точки')
|
||||
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cmap_points, marker='x', label='Обучающие точки')
|
||||
plt.legend()
|
||||
plt.title(name)
|
||||
plt.text(0.5, -1.2, 'Красный класс', color='r', fontsize=12)
|
||||
plt.text(0.5, -1.7, 'Синий класс', color='b', fontsize=12)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
BIN
basharin_sevastyan_lab_1/poly.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
basharin_sevastyan_lab_1/result.png
Normal file
|
After Width: | Height: | Size: 31 KiB |
BIN
basharin_sevastyan_lab_1/ridge.png
Normal file
|
After Width: | Height: | Size: 44 KiB |
55
belyaeva_ekaterina_lab_2/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
## Задание
|
||||
|
||||
Используя код из пункта «Решение задачи ранжирования признаков», выполните ранжирование признаков с помощью указанных по варианту моделей. Отобразите получившиеся оценки каждого признака каждой моделью и среднюю оценку. Проведите анализ получившихся результатов. Какие четыре признака оказались самыми важными по среднему значению? (Названия\индексы признаков и будут ответом на задание).
|
||||
|
||||
Вариант 6:
|
||||
|
||||
- Гребневая регрессия (Ridge)
|
||||
- Сокращение признаков Случайными деревьями (Random Forest Regressor)
|
||||
- Линейная корреляция (f_regression)
|
||||
|
||||
|
||||
## Как запустить лабораторную
|
||||
Запустить файл main.py
|
||||
## Используемые технологии
|
||||
Библиотеки numpy, scikit-learn, их компоненты
|
||||
## Описание лабораторной (программы)
|
||||
Данный код выполняет оценку важности признаков в задаче регрессии.
|
||||
|
||||
Сначала генерируются исходные данные с использованием 14 признаков (X) и функции-выхода (Y), которая представляет собой регрессионную проблему Фридмана. Затем используются две модели - гребневая регрессия (Ridge) и случайный лес (Random Forest) - для обучения на данных и оценки важности признаков.
|
||||
|
||||
Затем вычисляются коэффициенты корреляции между признаками и целевой переменной, и результаты сохраняются в словаре ranks с ключом "Correlation".
|
||||
|
||||
Далее в цикле вычисляются средние значения оценок важности признаков для каждого признака. Результаты сохраняются в словаре mean.
|
||||
|
||||
Как результат, программа выводит оценки важности для каждой модели и средние значения важности для каждого признака
|
||||
## Результат
|
||||
|
||||
В результате получаем следующее:
|
||||
|
||||
Ridge
|
||||
[('x4', 1.0), ('x14', 0.92), ('x1', 0.76), ('x2', 0.75), ('x12', 0.67), ('x5', 0.61), ('x11', 0.59), ('x6', 0.08), ('x8', 0.08), ('x3', 0.06), ('x7', 0.03), ('x10', 0.01), ('x9', 0.0), ('x13', 0.0)]
|
||||
Random Forest
|
||||
[('x14', 1.0), ('x2', 0.76), ('x1', 0.66), ('x4', 0.55), ('x11', 0.29), ('x12', 0.28), ('x5', 0.23), ('x3', 0.1), ('x13', 0.09), ('x7', 0.01), ('x6', 0.0), ('x8', 0.0), ('x9', 0.0), ('x10', 0.0)]
|
||||
Correlation
|
||||
[('x4', 1.0), ('x14', 0.98), ('x2', 0.45), ('x12', 0.44), ('x1', 0.3), ('x11', 0.29), ('x5', 0.04), ('x8', 0.02), ('x7', 0.01), ('x9', 0.01), ('x3', 0.0), ('x6', 0.0), ('x10', 0.0), ('x13', 0.0)]
|
||||
Mean Importance:
|
||||
x14 : 0.97
|
||||
x4 : 0.85
|
||||
x2 : 0.65
|
||||
x1 : 0.57
|
||||
x12 : 0.46
|
||||
x11 : 0.39
|
||||
x5 : 0.29
|
||||
x3 : 0.05
|
||||
x6 : 0.03
|
||||
x8 : 0.03
|
||||
x13 : 0.03
|
||||
x7 : 0.02
|
||||
x9 : 0.0
|
||||
x10 : 0.0
|
||||
|
||||
Вывод: Самым важным признаком в среднем оказался х14, потом х4 и далее по убывающей - х2, х1, х12, х11, х5. Остальные признаки показали минимальную значимость или не имеют ее совсем.
|
||||
|
||||
Но стоит отметить, что несмотря на среднюю оценку признаков, разные модели выявили их значимость по-разному, что можно увидеть в тексте выше.
|
||||
Корреляция и гребневая регрессия показали чуть более схожий результат, нежели сокращение признаков случайными деревьями, хотя стоит заметить, что результаты всех моделей все равно отличаются.
|
||||
74
belyaeva_ekaterina_lab_2/main.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.feature_selection import f_regression
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import numpy as np
|
||||
|
||||
# генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
# Задаем функцию-выход: регрессионную проблему Фридмана
|
||||
Y = (10 * np.sin(np.pi*X[:, 0]*X[:, 1]) + 20*(X[:, 2] - .5)**2 +
|
||||
10*X[:, 3] + 5*X[:, 4]**5 + np.random.normal(0, 1))
|
||||
# Добавляем зависимость признаков
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
|
||||
# Гребневая регрессия
|
||||
ridge = Ridge(alpha=7)
|
||||
ridge.fit(X, Y)
|
||||
# Случайные деревья
|
||||
rf = RandomForestRegressor(n_estimators=100, random_state=0)
|
||||
rf.fit(X, Y)
|
||||
|
||||
ranks = {}
|
||||
|
||||
names = ["x%s" % i for i in range(1, 15)]
|
||||
|
||||
def rank_to_dict(ranks, names):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
return dict(zip(names, ranks))
|
||||
|
||||
ranks["Ridge"] = rank_to_dict(ridge.coef_, names)
|
||||
ranks["Random Forest"] = rank_to_dict(rf.feature_importances_, names)
|
||||
|
||||
# Вычисляем коэффициенты корреляции между признаками и целевой переменной
|
||||
correlation_coeffs = f_regression(X, Y)[0]
|
||||
|
||||
# Добавляем результаты корреляции в словарь ranks
|
||||
ranks["Correlation"] = rank_to_dict(correlation_coeffs, names)
|
||||
|
||||
# Создаем пустой словарь для данных
|
||||
mean = {}
|
||||
|
||||
# Бежим по словарю ranks
|
||||
for key, value in ranks.items():
|
||||
# Пробегаемся по словарю значений ranks, которые являются парой имя:оценка
|
||||
for item in value.items():
|
||||
# Имя будет ключом для нашего mean
|
||||
# Если элемента с текущим ключом в mean нет - добавляем
|
||||
if item[0] not in mean:
|
||||
mean[item[0]] = 0
|
||||
# Суммируем значения по каждому ключу-имени признака
|
||||
mean[item[0]] += item[1]
|
||||
|
||||
# Находим среднее по каждому признаку
|
||||
for key, value in mean.items():
|
||||
res = value / len(ranks)
|
||||
mean[key] = round(res, 2)
|
||||
|
||||
# Сортируем и распечатываем список
|
||||
mean = sorted(mean.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
for key, value in ranks.items():
|
||||
ranks[key] = sorted(value.items(), key=lambda x: x[1], reverse=True)
|
||||
for key, value in ranks.items():
|
||||
print(key)
|
||||
print(value)
|
||||
|
||||
print("Mean Importance:")
|
||||
for item in mean:
|
||||
print(item[0], ":", item[1])
|
||||
BIN
gordeeva_anna_lab_2/Lasso_screen.png
Normal file
|
After Width: | Height: | Size: 61 KiB |
BIN
gordeeva_anna_lab_2/RFE_screen.png
Normal file
|
After Width: | Height: | Size: 43 KiB |
BIN
gordeeva_anna_lab_2/RandLasso_screen.png
Normal file
|
After Width: | Height: | Size: 43 KiB |
97
gordeeva_anna_lab_2/lab2.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import streamlit as st
|
||||
import numpy as np
|
||||
from sklearn.linear_model import Lasso
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.linear_model import LassoCV
|
||||
from sklearn.feature_selection import SelectFromModel
|
||||
from sklearn.feature_selection import RFE
|
||||
|
||||
st.header("Лабораторная работа 2. Вариант 7. Лассо, случайное лассо, рекурсивное сокращение признаков")
|
||||
|
||||
# генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков
|
||||
np.random.seed(0) #делаем случайные числа предсказуемыми, чтобы при каждом сбросе, рандомные числа были одинаковы
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
# Задаем функцию-выход: регрессионную проблему Фридмана
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
|
||||
# Добавляем зависимость признаков
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
|
||||
|
||||
# Создание списка пар в формате: номер признака - средняя оценка
|
||||
names = ["x%s" % i for i in range(1, 15)] # Список имен признаков
|
||||
|
||||
def random_lasso(X, Y, n_subsets=100):
|
||||
n_samples, n_features = X.shape
|
||||
selected_features = np.zeros(n_features)
|
||||
|
||||
for _ in range(n_subsets):
|
||||
# Создаем случайное подмножество признаков
|
||||
subset_indices = np.random.choice(n_features, int(n_features * 0.7), replace=False)
|
||||
X_subset = X[:, subset_indices]
|
||||
|
||||
# Создаем LassoCV модель
|
||||
lasso_cv = LassoCV(alphas=[0.05])
|
||||
|
||||
# Обучаем модель на подмножестве признаков
|
||||
lasso_cv.fit(X_subset, Y)
|
||||
|
||||
# Определяем, какие признаки были выбраны
|
||||
selected_features[subset_indices] += (lasso_cv.coef_ != 0)
|
||||
|
||||
# Вычисляем, какие признаки были выбраны чаще всего
|
||||
most_selected_features = np.where(selected_features > n_subsets / 2)[0]
|
||||
|
||||
return most_selected_features
|
||||
|
||||
def rank_to_dict(ranks, name):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(-1, 1)).ravel()
|
||||
ranks = list(map(lambda x: round(x, 2), ranks))
|
||||
ranked_features = list(zip(name, ranks))
|
||||
return ranked_features
|
||||
|
||||
def mean_rank(ranks):
|
||||
total = sum(rank for _, rank in ranks)
|
||||
return total / len(ranks)
|
||||
|
||||
# Переключатели
|
||||
lasso_check = st.checkbox("Лассо")
|
||||
random_lasso_check = st.checkbox("Случайное лассо")
|
||||
RFE_check = st.checkbox("Рекурсивное сокращение признаков")
|
||||
|
||||
# Результаты
|
||||
if lasso_check:
|
||||
model_lasso = Lasso(alpha=.05)
|
||||
model_lasso.fit(X, Y)
|
||||
rank = rank_to_dict(model_lasso.coef_, names)
|
||||
mean = mean_rank(rank)
|
||||
st.write("Получившиеся оценки для каждого признака")
|
||||
st.table(rank)
|
||||
st.write("Средняя оценка: ", mean)
|
||||
|
||||
if random_lasso_check:
|
||||
selected_features = random_lasso(X, Y)
|
||||
X_subset = X[:, selected_features]
|
||||
lasso_cv = LassoCV(alphas=[0.05])
|
||||
lasso_cv.fit(X_subset, Y)
|
||||
rank = rank_to_dict(lasso_cv.coef_, [names[i] for i in selected_features])
|
||||
mean = mean_rank(rank)
|
||||
st.write("Получившиеся оценки")
|
||||
st.table(rank)
|
||||
st.write("Средняя оценка: ", mean)
|
||||
|
||||
if RFE_check:
|
||||
model_lasso = Lasso(alpha=0.05)
|
||||
rfe = RFE(model_lasso, n_features_to_select=4)
|
||||
rfe.fit(X, Y)
|
||||
selected_feature_indices = rfe.support_
|
||||
selected_feature_names = [name for i, name in enumerate(names) if selected_feature_indices[i]]
|
||||
rank = rank_to_dict(rfe.ranking_, selected_feature_names)
|
||||
mean = mean_rank(rank)
|
||||
st.write("Получившиеся оценки")
|
||||
st.table(rank)
|
||||
st.write("Средняя оценка: ", mean)
|
||||
|
||||
56
gordeeva_anna_lab_2/readme.md
Normal file
@@ -0,0 +1,56 @@
|
||||
## Задание
|
||||
Модели:
|
||||
* Лассо (Lasso)
|
||||
* Случайное лассо (RandomizedLasso)
|
||||
* Рекурсивное сокращение признаков (Recursive Feature Elimination – RFE)
|
||||
|
||||
## В чем различие каждой модели
|
||||
|
||||
Лассо (Lasso) автоматически отбирает наиболее важные признаки и уменьшает влияние менее важных.
|
||||
|
||||
Случайное лассо (RandomizedLasso) случайным образом выбирает подмножества признаков из исходных данных и применяет Лассо к каждому из них. Затем он объединяет результаты и определяет, какие признаки были выбраны чаще всего.
|
||||
|
||||
Рекурсивное сокращение признаков (Recursive Feature Elimination – RFE) оценивает важность каждого признака. Затем он удаляет наименее важный признак и повторяет процесс, пока не останется желаемое количество признаков.
|
||||
|
||||
|
||||
## Библиотеки
|
||||
Streamlit. Предоставляет простой способ создания веб-приложений для визуализации данных.
|
||||
|
||||
Numpy. Предоставляет возможность работать с массивами и матрицами.
|
||||
|
||||
Sklearn. Предоставляет инструменты и алгоритмы, которые упрощают задачи, связанные с машинным обучением.
|
||||
|
||||
## Функционал
|
||||
* Генерация исходных данных из 750 строк-наблюдений и 14 столбцов-признаков
|
||||
* Создание и обучение таких моделей, как лассо, случайное лассо и рекурсивное сокращение признаков.
|
||||
* Вывод получившихся оценок для признаков и средней оценки.
|
||||
|
||||
## Запуск
|
||||
Перед запуском необходимо запустить виртуальную среду venv. Так как я использую streamlit, то для запуска необходимо в терминал прописать следующую строку:
|
||||
```
|
||||
streamlit run lab1.py
|
||||
```
|
||||
Приложение развернется на локальном сервере и автоматически откроется в браузере.
|
||||
|
||||
## Скриншоты работы программы
|
||||
Лассо (Lasso)
|
||||
|
||||

|
||||
|
||||
Случайное лассо (RandomizedLasso)
|
||||
|
||||

|
||||
|
||||
Рекурсивное сокращение признаков (Recursive Feature Elimination – RFE)
|
||||
|
||||

|
||||
|
||||
## Вывод
|
||||
Модель лассо выводит все 14 признаков, наиболее важными признаками оказались под индексом
|
||||
1, 2, 4 и 5. Самый важный признак под номером 4. Средняя оценка по всем признакам 0.19.
|
||||
|
||||
Модель случайное лассо выводит наиболее важные признаки, такими признаками являются 1, 2, 4 и 5. Средняя оценка же по этим признакам равна 0.53. Она выше, так как мы исключаем маловажные признаки.
|
||||
|
||||
Модель рекурсивного сокращения признаков выводит 4 признака, так как я указала именно вывод 4 признаков в коде программы. Таким образом, модель отсекает маловажные признаки. Самым важным признаком оказался под номером 4. Средняя оценка: 0.25.
|
||||
|
||||
Как итог, можно сказать, что наиболее важными признаками являются 1, 2, 4 и 5. А самым важным из них является признак под номером 4.
|
||||
36
gusev_vladislav_lab_2/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
### Вариант 9
|
||||
### Задание на лабораторную работу:
|
||||
Выполнить ранжирование признаков с помощью указанных по варианту моделей:
|
||||
- Лассо (Lasso)
|
||||
- Сокращение признаков Случайными деревьями (Random Forest Regressor)
|
||||
- Линейная корреляция (f_regression)
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
Выполняем файл gusev_vladislav_lab_2.py, в консоль будут выведены результаты.
|
||||
|
||||
### Технологии
|
||||
NumPy - библиотека для работы с многомерными массивами. Sklearn - библиотека с большим количеством алгоритмов машинного обучения.
|
||||
|
||||
### По коду
|
||||
В начале генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков, задаем функцию-выход: регрессионную проблему Фридмана, добавляем зависимость признаков
|
||||
|
||||
Далее создаем пустой словарь для хранения рангов признаков, используем методы из библиотеки Sklearn: Lasso, RandomForestRegressor и f_regression для задания по варианту.
|
||||
|
||||
Далее необходимо объявить функцию def rank_to_dict(ranks, names): для соотнесения нашего списка рангов и списка оценок по признакам. Возвращает он словарь типа (имя_признака: оценка_признака) и оценки приведены к единому диапазону от 0 до 1 и округлены до сотых.
|
||||
|
||||
В конце формируем среднее по каждому признаку, сортируем по убыванию и выводим на экран.
|
||||
|
||||
Пример:
|
||||
|
||||

|
||||
|
||||
Признаки х4 и х14 имеют наивысшие ранги, что говорит об их наибольшей значимости для решения задачи
|
||||
|
||||
Далее x2 и x12 занимают второе место по значимости (средняя значимость)
|
||||
|
||||
х1, х11 ниже среднего
|
||||
|
||||
х5, х8, х7 низкая значимость
|
||||
|
||||
х9, х3, х13, х10, х6 очень низкая значимость
|
||||
|
||||
53
gusev_vladislav_lab_2/gusev_vladislav_lab_2.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from sklearn.linear_model import Lasso
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.feature_selection import f_regression
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import numpy as np
|
||||
|
||||
#генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
#Задаем функцию-выход: регрессионную проблему Фридмана
|
||||
Y = (10 * np.sin(np.pi*X[:,0]*X[:,1]) + 20*(X[:,2] - .5)**2 +
|
||||
10*X[:,3] + 5*X[:,4]**5 + np.random.normal(0,1))
|
||||
#Добавляем зависимость признаков
|
||||
X[:,10:] = X[:,:4] + np.random.normal(0, .025, (size,4))
|
||||
|
||||
names = ["x%s" % i for i in range(1,15)]
|
||||
#Создается пустой словарь для хранения рангов признаков
|
||||
ranks = {}
|
||||
|
||||
#Lasso
|
||||
lasso = Lasso(alpha=0.5)
|
||||
lasso.fit(X, Y)
|
||||
ranks["Lasso"] = dict(zip(names, lasso.coef_))
|
||||
#Случайные деревья
|
||||
rf = RandomForestRegressor(n_estimators=100)
|
||||
rf.fit(X, Y)
|
||||
ranks["Random Forest"] = dict(zip(names, rf.feature_importances_))
|
||||
#Линейная корреляция
|
||||
f_scores, p_values = f_regression(X, Y)
|
||||
ranks["f_regression"] = dict(zip(names, f_scores))
|
||||
|
||||
def rank_to_dict(ranks, names):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14,1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
return dict(zip(names, ranks))
|
||||
|
||||
mean = {}
|
||||
for key, value in ranks.items():
|
||||
for item in value.items():
|
||||
if(item[0] not in mean):
|
||||
mean[item[0]] = 0
|
||||
mean[item[0]] += item[1]
|
||||
|
||||
|
||||
sorted_mean = sorted(mean.items(), key=lambda x: x[1], reverse=True)
|
||||
result = {}
|
||||
for item in sorted_mean:
|
||||
result[item[0]] = item[1]
|
||||
print(f'{item[0]}: {item[1]}')
|
||||
|
||||
BIN
gusev_vladislav_lab_2/img.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
27
gusev_vladislav_lab_3/README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
### Вариант 9
|
||||
### Задание на лабораторную работу:
|
||||
Решите с помощью библиотечной реализации дерева решений задачу: Запрограммировать дерево решений как минимум на 99% ваших данных для задачи: Зависимость глубины алмаза (depth) от длины (x), ширины (y) и высоты алмаза (z) . Проверить работу модели на оставшемся проценте, сделать вывод.
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
Выполняем файл gusev_vladislav_lab_3.py, решение будет в консоли.
|
||||
|
||||
### Технологии
|
||||
Sklearn - библиотека с большим количеством алгоритмов машинного обучения. Нам понадобится библиотека для дерева решения регрессии sklearn.tree.DecisionTreeRegressor.
|
||||
|
||||
### По коду
|
||||
1) Для начала загружаем данные из csv файла
|
||||
2) Разделеям данные на признаки (X) и целевую переменную (y)
|
||||
3) Разделяем данные на обучающее и тестовые
|
||||
4) Обучаем дерево регрессией (model)
|
||||
5) Выводим важность признаков, предсказание значений на тестовой выборке и оценку производительности модели
|
||||
|
||||
Пример:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
- score: ~0.88. Это мера того, насколько хорошо модель соответствует данным. По значению 88% можно сказать, что модель хорошо соответствует данным.
|
||||
- feature_importances: ~0.26, ~0.34, ~0,39. Это говорит о важности признаков для нашей модели. Можно сказать, что высота (z) имеет наибольшую важность.
|
||||
- Mean Squared Error: 0.22. Это ошибка модели. Это говорит о том, что модель в среднем ошибается в 22% случаев.
|
||||
|
||||
По итогу можно сказать, что модель отработала хорошо, из-за score ~0.88.
|
||||
53944
gusev_vladislav_lab_3/diamonds_prices.csv
Normal file
31
gusev_vladislav_lab_3/gusev_vladislav_lab_3.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
# Загрузка данных из csv-файла
|
||||
data = pd.read_csv('diamonds_prices.csv', index_col='diamond_id')
|
||||
|
||||
# Разделение данных на признаки (X) и целевую переменную (y)
|
||||
X = data[['x', 'y', 'z']]
|
||||
|
||||
print (X.head())
|
||||
y = data['depth']
|
||||
|
||||
# Разделение данных на обучающую и тестовую выборки
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42)
|
||||
|
||||
#Решение с помощью дерева регрессии
|
||||
model = DecisionTreeRegressor()
|
||||
model.fit(X_train, y_train)
|
||||
test_score = model.score(X_test, y_test)
|
||||
# Получение важности признаков
|
||||
feature_importances = model.feature_importances_
|
||||
# Предсказание значений на тестовой выборке
|
||||
y_pred = model.predict(X_test)
|
||||
# Оценка производительности модели
|
||||
mse = mean_squared_error(y_test, y_pred)
|
||||
|
||||
print("score", test_score)
|
||||
print("feature_importances", feature_importances)
|
||||
print("Mean Squared Error: {:.2f}".format(mse))
|
||||
BIN
gusev_vladislav_lab_3/img.png
Normal file
|
After Width: | Height: | Size: 9.9 KiB |
20
gusev_vladislav_lab_4/README.md
Normal file
@@ -0,0 +1,20 @@
|
||||
### Вариант 9
|
||||
### Задание на лабораторную работу:
|
||||
Использовать метод кластеризации DBSCAN, самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо он подходит для решения сформулированной задачи.
|
||||
### Как запустить лабораторную работу:
|
||||
Выполняем файл gusev_vladislav_lab_1.py, на экране будет нарисовано 3 графика
|
||||
### Технологии
|
||||
Pandas - библиотека для анализа данных. Она предоставляет структуры данных и функции для работы с табличными данными. Mathplotlib - библиотека для визуализации данных двумерной и трехмерной графикой. Sklearn - библиотека с большим количеством алгоритмов машинного обучения.
|
||||
### По коду
|
||||
1) Загружаем данные из csv файла
|
||||
2) Выбираем 10000 данных (потому что при сильном увеличении данных метод DBSCAN сильно загружает систему и программа начинает виснуть)
|
||||
3) Создаем модель DBSCAN, предварительно выбрав нужные данные
|
||||
4) Применяем DBSCAN к данным и создаём график
|
||||
|
||||
Что получаем:
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
- По данному графику можно сказать, что в основном глубина алмазов розница от ~57-~66, а карат в районе 1 (0.6-1.4)
|
||||
- В целом на графике видно очень много шума (фиолетовые точки), но также немало более светлых - близких к красным. Визуально можно сказать, что эффективность этого метода 30%-40%.
|
||||
53944
gusev_vladislav_lab_4/diamonds_prices.csv
Normal file
25
gusev_vladislav_lab_4/gusev_vladislav_lab_4.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.cluster import DBSCAN
|
||||
# Загрузка данных из csv-файла
|
||||
data = pd.read_csv('diamonds_prices.csv', index_col='diamond_id')
|
||||
|
||||
|
||||
# Выбираем 10000 данных ()
|
||||
data_subset = data.head(10000)
|
||||
|
||||
# Выделяем признаки (карат и глубина)
|
||||
features = data_subset[['carat', 'depth']]
|
||||
|
||||
# Создание модели DBSCAN
|
||||
dbscan = DBSCAN(eps=0.1, min_samples=5)
|
||||
|
||||
# Применение DBSCAN к данным
|
||||
data_subset['cluster'] = dbscan.fit_predict(features)
|
||||
|
||||
# Создание графика для визуализации кластеров
|
||||
plt.scatter(data_subset['carat'], data_subset['depth'], c=data_subset['cluster'], cmap='rainbow')
|
||||
plt.xlabel('Карат (carat)')
|
||||
plt.ylabel('Глубина (depth)')
|
||||
plt.title('Кластеризация данных о карате и глубине алмазов')
|
||||
plt.show()
|
||||
BIN
gusev_vladislav_lab_4/img.png
Normal file
|
After Width: | Height: | Size: 68 KiB |
24
gusev_vladislav_lab_5/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
### Вариант 9
|
||||
### Задание на лабораторную работу:
|
||||
Использовать регрессию по варианту для данных из курсовой работы. Самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо он подходит для решения сформулированной задачи.
|
||||
### Как запустить лабораторную работу:
|
||||
Выполняем файл gusev_vladislav_lab_5.py, будет выведен график на экран.
|
||||
|
||||
### Технологии
|
||||
NumPy - библиотека для работы с многомерными массивами. Mathplotlib - библиотека для визуализации данных двумерной и трехмерной графикой. Sklearn - библиотека с большим количеством алгоритмов машинного обучения.
|
||||
|
||||
### Задача
|
||||
Мною было принято решение посмотреть, как зависит
|
||||
### По коду
|
||||
1) Для начала загружаем данные из csv файла
|
||||
2) Разделяем данные на обучающее и тестовые
|
||||
3) Рескейлим данные из столбца price, который был в диапозоне от 370 до 2700 к диапозону от 0 до 1
|
||||
4) Обучаем модель, находим R^2 (среднеквадратическая ошибка) и коэффициент детерминации
|
||||
5) Выводим графики
|
||||
|
||||
|
||||

|
||||
|
||||
### Вывод
|
||||
- Среднеквадарическая ошибка получилась довольно низкой, что говорит нам о точности тестовых и предсказанных значений, однако коэффициент детерминации получился крайне низким, даже отрицательным. Это значит, что модель не понимает зависимости данных.
|
||||
- Итог: гребневая модель регресси не применима к нашей задаче
|
||||