zavrazhnova_svetlana_lab_4 is ready
This commit is contained in:
parent
9644582307
commit
1e03e8b1d2
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
9
.idea/IIS_2023_1.iml
Normal file
9
.idea/IIS_2023_1.iml
Normal file
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/IIS_2023_1.iml" filepath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
24
zavrazhnova_svetlana_lab_4/README.md
Normal file
24
zavrazhnova_svetlana_lab_4/README.md
Normal file
@ -0,0 +1,24 @@
|
||||
# Задание:
|
||||
Использовать метод кластеризации linkage.
|
||||
|
||||
Задача: Группировка транзакций на основе их суммы, возраста и пола клиента с целью выявления схожих поведенческих характеристик и обнаружения возможных случаев мошенничества.
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
ЛР запускается в файле zavrazhnova_svetlana_lab_4.py через Run, сначала появится окно с графиком, а затем в консоли должны появится вычисления.
|
||||
|
||||
### Технологии
|
||||
Метод AgglomerativeClustering из библиотеки sklearn, который можно использовать для кластеризации данных, чтобы найти внутреннюю структуру или группы в данных, основываясь на их сходстве.
|
||||
Библиотека scipy для выполнения иерархической кластеризации и построения dendrogram
|
||||
|
||||
### Что делает лабораторная:
|
||||
Выполняет кластеризацию данных и анализ мошеннических операций в каждом кластере.
|
||||
|
||||
### Пример выходных значений:
|
||||
Отрисовывается в отдельном окне dendrogram
|
||||
![dendrogram.png](dendrogram.png)
|
||||
В консоли затем выводятся значения признаков "transaction_amount", "age" и "cluster_label" для каждой точки данных
|
||||
![signs.png](signs.png)
|
||||
а также среднее значение метки мошенничества для каждого кластера и количество транзакций мошенничества в каждом кластере
|
||||
![cluster.png](cluster.png)
|
||||
Еще выводятся значения точек данных, принадлежащих каждому кластеру, чтобы выявить характеристики и структуру каждого кластера.
|
||||
![characteristics.png](characteristics.png)
|
BIN
zavrazhnova_svetlana_lab_4/characteristics.png
Normal file
BIN
zavrazhnova_svetlana_lab_4/characteristics.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 45 KiB |
BIN
zavrazhnova_svetlana_lab_4/cluster.png
Normal file
BIN
zavrazhnova_svetlana_lab_4/cluster.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.1 KiB |
BIN
zavrazhnova_svetlana_lab_4/dendrogram.png
Normal file
BIN
zavrazhnova_svetlana_lab_4/dendrogram.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 28 KiB |
87
zavrazhnova_svetlana_lab_4/fraud_dataset.csv
Normal file
87
zavrazhnova_svetlana_lab_4/fraud_dataset.csv
Normal file
@ -0,0 +1,87 @@
|
||||
transaction_id,transaction_amount,location,merchant,age,gender,fraud_label
|
||||
1,1000.00,New York,ABC Corp,35,M,0
|
||||
2,500.00,Chicago,XYZ Inc,45,F,0
|
||||
3,2000.00,Los Angeles,ABC Corp,28,M,1
|
||||
4,1500.00,San Francisco,XYZ Inc,30,F,0
|
||||
5,800.00,Chicago,ABC Corp,50,F,0
|
||||
6,3000.00,New York,XYZ Inc,42,M,1
|
||||
7,1200.00,San Francisco,ABC Corp,55,F,0
|
||||
8,900.00,Los Angeles,XYZ Inc,37,M,0
|
||||
9,2500.00,Chicago,ABC Corp,33,F,1
|
||||
10,1800.00,New York,XYZ Inc,48,M,0
|
||||
11,750.00,San Francisco,ABC Corp,29,F,0
|
||||
12,2200.00,Chicago,XYZ Inc,51,M,0
|
||||
13,900.00,New York,ABC Corp,40,F,0
|
||||
14,1600.00,Los Angeles,XYZ Inc,26,M,0
|
||||
15,3000.00,San Francisco,ABC Corp,45,F,1
|
||||
16,1200.00,Chicago,XYZ Inc,34,M,0
|
||||
17,800.00,New York,ABC Corp,47,F,0
|
||||
18,1900.00,Los Angeles,XYZ Inc,32,M,0
|
||||
19,1100.00,San Francisco,ABC Corp,52,F,0
|
||||
20,4000.00,Chicago,XYZ Inc,38,M,1
|
||||
21,900.00,New York,ABC Corp,31,F,0
|
||||
22,1700.00,Los Angeles,XYZ Inc,49,M,0
|
||||
23,1000.00,San Francisco,ABC Corp,36,F,0
|
||||
24,2300.00,Chicago,XYZ Inc,27,M,1
|
||||
25,950.00,New York,ABC Corp,41,F,0
|
||||
26,1400.00,Los Angeles,XYZ Inc,54,M,0
|
||||
27,2800.00,San Francisco,ABC Corp,39,F,1
|
||||
28,1100.00,Chicago,XYZ Inc,44,M,0
|
||||
29,750.00,New York,ABC Corp,30,F,0
|
||||
30,2000.00,Los Angeles,XYZ Inc,46,M,0
|
||||
31,1250.00,San Francisco,ABC Corp,35,F,0
|
||||
32,2100.00,Chicago,XYZ Inc,43,M,0
|
||||
33,950.00,New York,ABC Corp,56,F,0
|
||||
34,1800.00,Los Angeles,XYZ Inc,29,M,0
|
||||
35,3200.00,San Francisco,ABC Corp,48,F,1
|
||||
36,1300.00,Chicago,XYZ Inc,37,M,0
|
||||
37,900.00,New York,ABC Corp,51,F,0
|
||||
38,2000.00,Los Angeles,XYZ Inc,33,M,0
|
||||
39,1050.00,San Francisco,ABC Corp,42,F,0
|
||||
40,2400.00,Chicago,XYZ Inc,26,M,0
|
||||
41,800.00,New York,ABC Corp,45,F,0
|
||||
42,1500.00,Los Angeles,XYZ Inc,31,M,0
|
||||
43,2800.00,San Francisco,ABC Corp,50,F,1
|
||||
44,1350.00,Chicago,XYZ Inc,28,M,0
|
||||
45,920.00,New York,ABC Corp,47,F,0
|
||||
46,2000.00,Los Angeles,XYZ Inc,36,M,0
|
||||
47,1125.00,San Francisco,ABC Corp,52,F,0
|
||||
48,1900.00,Chicago,XYZ Inc,38,M,1
|
||||
49,850.00,New York,ABC Corp,32,F,0
|
||||
50,1750.00,Los Angeles,XYZ Inc,49,M,0
|
||||
51,950.00,San Francisco,ABC Corp,27,F,0
|
||||
52,2300.00,Chicago,XYZ Inc,41,M,0
|
||||
53,850.00,New York,ABC Corp,54,F,0
|
||||
54,1600.00,Los Angeles,XYZ Inc,39,M,0
|
||||
55,3000.00,San Francisco,ABC Corp,46,F,1
|
||||
56,1250.00,Chicago,XYZ Inc,35,M,0
|
||||
57,800.00,New York,ABC Corp,56,F,0
|
||||
58,2200.00,Los Angeles,XYZ Inc,29,M,0
|
||||
59,1050.00,San Francisco,ABC Corp,48,F,0
|
||||
60,4000.00,Chicago,XYZ Inc,37,M,1
|
||||
61,950.00,New York,ABC Corp,30,F,0
|
||||
62,1700.00,Los Angeles,XYZ Inc,49,M,0
|
||||
63,1000.00,San Francisco,ABC Corp,36,F,0
|
||||
64,2800.00,Chicago,XYZ Inc,27,M,1
|
||||
65,900.00,New York,ABC Corp,41,F,0
|
||||
66,1400.00,Los Angeles,XYZ Inc,54,M,0
|
||||
67,3200.00,San Francisco,ABC Corp,39,F,1
|
||||
68,1100.00,Chicago,XYZ Inc,44,M,0
|
||||
69,750.00,New York,ABC Corp,30,F,0
|
||||
70,2000.00,Los Angeles,XYZ Inc,46,M,0
|
||||
71,1250.00,San Francisco,ABC Corp,35,F,0
|
||||
72,2100.00,Chicago,XYZ Inc,43,M,0
|
||||
73,950.00,New York,ABC Corp,56,F,0
|
||||
74,1800.00,Los Angeles,XYZ Inc,29,M,0
|
||||
75,3200.00,San Francisco,ABC Corp,48,F,1
|
||||
76,1300.00,Chicago,XYZ Inc,37,M,0
|
||||
77,900.00,New York,ABC Corp,51,F,0
|
||||
78,2000.00,Los Angeles,XYZ Inc,33,M,0
|
||||
79,1050.00,San Francisco,ABC Corp,42,F,0
|
||||
80,2400.00,Chicago,XYZ Inc,26,M,0
|
||||
81,800.00,New York,ABC Corp,45,F,0
|
||||
82,1500.00,Los Angeles,XYZ Inc,31,M,0
|
||||
83,2800.00,San Francisco,ABC Corp,50,F,1
|
||||
84,1350.00,Chicago,XYZ Inc,28,M,0
|
||||
85,920.00,New York,ABC Corp,47,F,0
|
||||
86,2000.00,Los Angeles,XYZ Inc,36,M,0
|
|
BIN
zavrazhnova_svetlana_lab_4/signs.png
Normal file
BIN
zavrazhnova_svetlana_lab_4/signs.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
37
zavrazhnova_svetlana_lab_4/zavrazhnova_svetlana_lab_4.py
Normal file
37
zavrazhnova_svetlana_lab_4/zavrazhnova_svetlana_lab_4.py
Normal file
@ -0,0 +1,37 @@
|
||||
import pandas as pd
|
||||
from sklearn.cluster import AgglomerativeClustering
|
||||
import matplotlib.pyplot as plt
|
||||
import scipy.cluster.hierarchy as sch
|
||||
|
||||
data = pd.read_csv('fraud_dataset.csv')
|
||||
|
||||
data = data.drop("transaction_id", axis=1)
|
||||
|
||||
data = pd.get_dummies(data, columns=["location", "merchant", "gender"])
|
||||
|
||||
features = ["transaction_amount", "age", "location_Chicago", "location_Los Angeles", "location_New York", "location_San Francisco", "merchant_ABC Corp", "merchant_XYZ Inc", "gender_F", "gender_M"]
|
||||
X = data[features].values
|
||||
|
||||
# Вычисление расстояний между точками и построение dendrogram
|
||||
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))
|
||||
|
||||
plt.xlabel('Instances')
|
||||
plt.ylabel('Euclidean distances')
|
||||
plt.title('Dendrogram')
|
||||
plt.show()
|
||||
|
||||
n_clusters = 3
|
||||
clustering_model = AgglomerativeClustering(n_clusters=n_clusters, linkage="ward")
|
||||
|
||||
data["cluster_label"] = clustering_model.fit_predict(X)
|
||||
|
||||
print(data[["transaction_amount", "age", "cluster_label"]])
|
||||
|
||||
fraud_rate = data.groupby("cluster_label")["fraud_label"].mean()
|
||||
print(fraud_rate)
|
||||
print(data.groupby(['fraud_label', "cluster_label"])["fraud_label"].count())
|
||||
|
||||
|
||||
for i in range(0, n_clusters):
|
||||
res = data[data['cluster_label'] == i].value_counts()
|
||||
print(res)
|
Loading…
Reference in New Issue
Block a user