zavrazhnova_svetlana_lab_4 is ready

This commit is contained in:
Svetlnkk 2023-10-19 21:51:39 +04:00
parent 9644582307
commit 1e03e8b1d2
11 changed files with 174 additions and 0 deletions

3
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

9
.idea/IIS_2023_1.iml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/IIS_2023_1.iml" filepath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,24 @@
# Задание:
Использовать метод кластеризации linkage.
Задача: Группировка транзакций на основе их суммы, возраста и пола клиента с целью выявления схожих поведенческих характеристик и обнаружения возможных случаев мошенничества.
### Как запустить лабораторную работу:
ЛР запускается в файле zavrazhnova_svetlana_lab_4.py через Run, сначала появится окно с графиком, а затем в консоли должны появится вычисления.
### Технологии
Метод AgglomerativeClustering из библиотеки sklearn, который можно использовать для кластеризации данных, чтобы найти внутреннюю структуру или группы в данных, основываясь на их сходстве.
Библиотека scipy для выполнения иерархической кластеризации и построения dendrogram
### Что делает лабораторная:
Выполняет кластеризацию данных и анализ мошеннических операций в каждом кластере.
### Пример выходных значений:
Отрисовывается в отдельном окне dendrogram
![dendrogram.png](dendrogram.png)
В консоли затем выводятся значения признаков "transaction_amount", "age" и "cluster_label" для каждой точки данных
![signs.png](signs.png)
а также среднее значение метки мошенничества для каждого кластера и количество транзакций мошенничества в каждом кластере
![cluster.png](cluster.png)
Еще выводятся значения точек данных, принадлежащих каждому кластеру, чтобы выявить характеристики и структуру каждого кластера.
![characteristics.png](characteristics.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -0,0 +1,87 @@
transaction_id,transaction_amount,location,merchant,age,gender,fraud_label
1,1000.00,New York,ABC Corp,35,M,0
2,500.00,Chicago,XYZ Inc,45,F,0
3,2000.00,Los Angeles,ABC Corp,28,M,1
4,1500.00,San Francisco,XYZ Inc,30,F,0
5,800.00,Chicago,ABC Corp,50,F,0
6,3000.00,New York,XYZ Inc,42,M,1
7,1200.00,San Francisco,ABC Corp,55,F,0
8,900.00,Los Angeles,XYZ Inc,37,M,0
9,2500.00,Chicago,ABC Corp,33,F,1
10,1800.00,New York,XYZ Inc,48,M,0
11,750.00,San Francisco,ABC Corp,29,F,0
12,2200.00,Chicago,XYZ Inc,51,M,0
13,900.00,New York,ABC Corp,40,F,0
14,1600.00,Los Angeles,XYZ Inc,26,M,0
15,3000.00,San Francisco,ABC Corp,45,F,1
16,1200.00,Chicago,XYZ Inc,34,M,0
17,800.00,New York,ABC Corp,47,F,0
18,1900.00,Los Angeles,XYZ Inc,32,M,0
19,1100.00,San Francisco,ABC Corp,52,F,0
20,4000.00,Chicago,XYZ Inc,38,M,1
21,900.00,New York,ABC Corp,31,F,0
22,1700.00,Los Angeles,XYZ Inc,49,M,0
23,1000.00,San Francisco,ABC Corp,36,F,0
24,2300.00,Chicago,XYZ Inc,27,M,1
25,950.00,New York,ABC Corp,41,F,0
26,1400.00,Los Angeles,XYZ Inc,54,M,0
27,2800.00,San Francisco,ABC Corp,39,F,1
28,1100.00,Chicago,XYZ Inc,44,M,0
29,750.00,New York,ABC Corp,30,F,0
30,2000.00,Los Angeles,XYZ Inc,46,M,0
31,1250.00,San Francisco,ABC Corp,35,F,0
32,2100.00,Chicago,XYZ Inc,43,M,0
33,950.00,New York,ABC Corp,56,F,0
34,1800.00,Los Angeles,XYZ Inc,29,M,0
35,3200.00,San Francisco,ABC Corp,48,F,1
36,1300.00,Chicago,XYZ Inc,37,M,0
37,900.00,New York,ABC Corp,51,F,0
38,2000.00,Los Angeles,XYZ Inc,33,M,0
39,1050.00,San Francisco,ABC Corp,42,F,0
40,2400.00,Chicago,XYZ Inc,26,M,0
41,800.00,New York,ABC Corp,45,F,0
42,1500.00,Los Angeles,XYZ Inc,31,M,0
43,2800.00,San Francisco,ABC Corp,50,F,1
44,1350.00,Chicago,XYZ Inc,28,M,0
45,920.00,New York,ABC Corp,47,F,0
46,2000.00,Los Angeles,XYZ Inc,36,M,0
47,1125.00,San Francisco,ABC Corp,52,F,0
48,1900.00,Chicago,XYZ Inc,38,M,1
49,850.00,New York,ABC Corp,32,F,0
50,1750.00,Los Angeles,XYZ Inc,49,M,0
51,950.00,San Francisco,ABC Corp,27,F,0
52,2300.00,Chicago,XYZ Inc,41,M,0
53,850.00,New York,ABC Corp,54,F,0
54,1600.00,Los Angeles,XYZ Inc,39,M,0
55,3000.00,San Francisco,ABC Corp,46,F,1
56,1250.00,Chicago,XYZ Inc,35,M,0
57,800.00,New York,ABC Corp,56,F,0
58,2200.00,Los Angeles,XYZ Inc,29,M,0
59,1050.00,San Francisco,ABC Corp,48,F,0
60,4000.00,Chicago,XYZ Inc,37,M,1
61,950.00,New York,ABC Corp,30,F,0
62,1700.00,Los Angeles,XYZ Inc,49,M,0
63,1000.00,San Francisco,ABC Corp,36,F,0
64,2800.00,Chicago,XYZ Inc,27,M,1
65,900.00,New York,ABC Corp,41,F,0
66,1400.00,Los Angeles,XYZ Inc,54,M,0
67,3200.00,San Francisco,ABC Corp,39,F,1
68,1100.00,Chicago,XYZ Inc,44,M,0
69,750.00,New York,ABC Corp,30,F,0
70,2000.00,Los Angeles,XYZ Inc,46,M,0
71,1250.00,San Francisco,ABC Corp,35,F,0
72,2100.00,Chicago,XYZ Inc,43,M,0
73,950.00,New York,ABC Corp,56,F,0
74,1800.00,Los Angeles,XYZ Inc,29,M,0
75,3200.00,San Francisco,ABC Corp,48,F,1
76,1300.00,Chicago,XYZ Inc,37,M,0
77,900.00,New York,ABC Corp,51,F,0
78,2000.00,Los Angeles,XYZ Inc,33,M,0
79,1050.00,San Francisco,ABC Corp,42,F,0
80,2400.00,Chicago,XYZ Inc,26,M,0
81,800.00,New York,ABC Corp,45,F,0
82,1500.00,Los Angeles,XYZ Inc,31,M,0
83,2800.00,San Francisco,ABC Corp,50,F,1
84,1350.00,Chicago,XYZ Inc,28,M,0
85,920.00,New York,ABC Corp,47,F,0
86,2000.00,Los Angeles,XYZ Inc,36,M,0
1 transaction_id transaction_amount location merchant age gender fraud_label
2 1 1000.00 New York ABC Corp 35 M 0
3 2 500.00 Chicago XYZ Inc 45 F 0
4 3 2000.00 Los Angeles ABC Corp 28 M 1
5 4 1500.00 San Francisco XYZ Inc 30 F 0
6 5 800.00 Chicago ABC Corp 50 F 0
7 6 3000.00 New York XYZ Inc 42 M 1
8 7 1200.00 San Francisco ABC Corp 55 F 0
9 8 900.00 Los Angeles XYZ Inc 37 M 0
10 9 2500.00 Chicago ABC Corp 33 F 1
11 10 1800.00 New York XYZ Inc 48 M 0
12 11 750.00 San Francisco ABC Corp 29 F 0
13 12 2200.00 Chicago XYZ Inc 51 M 0
14 13 900.00 New York ABC Corp 40 F 0
15 14 1600.00 Los Angeles XYZ Inc 26 M 0
16 15 3000.00 San Francisco ABC Corp 45 F 1
17 16 1200.00 Chicago XYZ Inc 34 M 0
18 17 800.00 New York ABC Corp 47 F 0
19 18 1900.00 Los Angeles XYZ Inc 32 M 0
20 19 1100.00 San Francisco ABC Corp 52 F 0
21 20 4000.00 Chicago XYZ Inc 38 M 1
22 21 900.00 New York ABC Corp 31 F 0
23 22 1700.00 Los Angeles XYZ Inc 49 M 0
24 23 1000.00 San Francisco ABC Corp 36 F 0
25 24 2300.00 Chicago XYZ Inc 27 M 1
26 25 950.00 New York ABC Corp 41 F 0
27 26 1400.00 Los Angeles XYZ Inc 54 M 0
28 27 2800.00 San Francisco ABC Corp 39 F 1
29 28 1100.00 Chicago XYZ Inc 44 M 0
30 29 750.00 New York ABC Corp 30 F 0
31 30 2000.00 Los Angeles XYZ Inc 46 M 0
32 31 1250.00 San Francisco ABC Corp 35 F 0
33 32 2100.00 Chicago XYZ Inc 43 M 0
34 33 950.00 New York ABC Corp 56 F 0
35 34 1800.00 Los Angeles XYZ Inc 29 M 0
36 35 3200.00 San Francisco ABC Corp 48 F 1
37 36 1300.00 Chicago XYZ Inc 37 M 0
38 37 900.00 New York ABC Corp 51 F 0
39 38 2000.00 Los Angeles XYZ Inc 33 M 0
40 39 1050.00 San Francisco ABC Corp 42 F 0
41 40 2400.00 Chicago XYZ Inc 26 M 0
42 41 800.00 New York ABC Corp 45 F 0
43 42 1500.00 Los Angeles XYZ Inc 31 M 0
44 43 2800.00 San Francisco ABC Corp 50 F 1
45 44 1350.00 Chicago XYZ Inc 28 M 0
46 45 920.00 New York ABC Corp 47 F 0
47 46 2000.00 Los Angeles XYZ Inc 36 M 0
48 47 1125.00 San Francisco ABC Corp 52 F 0
49 48 1900.00 Chicago XYZ Inc 38 M 1
50 49 850.00 New York ABC Corp 32 F 0
51 50 1750.00 Los Angeles XYZ Inc 49 M 0
52 51 950.00 San Francisco ABC Corp 27 F 0
53 52 2300.00 Chicago XYZ Inc 41 M 0
54 53 850.00 New York ABC Corp 54 F 0
55 54 1600.00 Los Angeles XYZ Inc 39 M 0
56 55 3000.00 San Francisco ABC Corp 46 F 1
57 56 1250.00 Chicago XYZ Inc 35 M 0
58 57 800.00 New York ABC Corp 56 F 0
59 58 2200.00 Los Angeles XYZ Inc 29 M 0
60 59 1050.00 San Francisco ABC Corp 48 F 0
61 60 4000.00 Chicago XYZ Inc 37 M 1
62 61 950.00 New York ABC Corp 30 F 0
63 62 1700.00 Los Angeles XYZ Inc 49 M 0
64 63 1000.00 San Francisco ABC Corp 36 F 0
65 64 2800.00 Chicago XYZ Inc 27 M 1
66 65 900.00 New York ABC Corp 41 F 0
67 66 1400.00 Los Angeles XYZ Inc 54 M 0
68 67 3200.00 San Francisco ABC Corp 39 F 1
69 68 1100.00 Chicago XYZ Inc 44 M 0
70 69 750.00 New York ABC Corp 30 F 0
71 70 2000.00 Los Angeles XYZ Inc 46 M 0
72 71 1250.00 San Francisco ABC Corp 35 F 0
73 72 2100.00 Chicago XYZ Inc 43 M 0
74 73 950.00 New York ABC Corp 56 F 0
75 74 1800.00 Los Angeles XYZ Inc 29 M 0
76 75 3200.00 San Francisco ABC Corp 48 F 1
77 76 1300.00 Chicago XYZ Inc 37 M 0
78 77 900.00 New York ABC Corp 51 F 0
79 78 2000.00 Los Angeles XYZ Inc 33 M 0
80 79 1050.00 San Francisco ABC Corp 42 F 0
81 80 2400.00 Chicago XYZ Inc 26 M 0
82 81 800.00 New York ABC Corp 45 F 0
83 82 1500.00 Los Angeles XYZ Inc 31 M 0
84 83 2800.00 San Francisco ABC Corp 50 F 1
85 84 1350.00 Chicago XYZ Inc 28 M 0
86 85 920.00 New York ABC Corp 47 F 0
87 86 2000.00 Los Angeles XYZ Inc 36 M 0

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,37 @@
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
data = pd.read_csv('fraud_dataset.csv')
data = data.drop("transaction_id", axis=1)
data = pd.get_dummies(data, columns=["location", "merchant", "gender"])
features = ["transaction_amount", "age", "location_Chicago", "location_Los Angeles", "location_New York", "location_San Francisco", "merchant_ABC Corp", "merchant_XYZ Inc", "gender_F", "gender_M"]
X = data[features].values
# Вычисление расстояний между точками и построение dendrogram
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))
plt.xlabel('Instances')
plt.ylabel('Euclidean distances')
plt.title('Dendrogram')
plt.show()
n_clusters = 3
clustering_model = AgglomerativeClustering(n_clusters=n_clusters, linkage="ward")
data["cluster_label"] = clustering_model.fit_predict(X)
print(data[["transaction_amount", "age", "cluster_label"]])
fraud_rate = data.groupby("cluster_label")["fraud_label"].mean()
print(fraud_rate)
print(data.groupby(['fraud_label', "cluster_label"])["fraud_label"].count())
for i in range(0, n_clusters):
res = data[data['cluster_label'] == i].value_counts()
print(res)