4.2 MiB
4.2 MiB
Лабораторная 11
Задача: Мультиклассовая классификация изображений на 5 категорий (daisy, dandelion, rose, sunflower, tulip)
Ссылка на датасет: https://www.kaggle.com/datasets/rahmasleam/flowers-dataset
In [24]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def load_images_from_folder(folder, target_size=(512, 512)):
images = []
labels = []
for label in os.listdir(folder):
if label in ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']:
label_folder = os.path.join(folder, label)
if os.path.isdir(label_folder):
for filename in os.listdir(label_folder):
img_path = os.path.join(label_folder, filename)
img = cv2.imread(img_path)
if img is not None:
img_resized = cv2.resize(img, target_size)
images.append(img_resized)
labels.append(label)
return images, labels
folder_path = "./static/csv/dataset_flower"
images, labels = load_images_from_folder(folder_path)
num_images_to_display = min(8, len(images))
def display_images(images, labels, max_images=10):
if not images:
print("Нет изображений для отображения.")
return
count = min(max_images, len(images))
cols = 4
rows = (count + cols - 1) // cols
plt.figure(figsize=(15, 5 * rows))
for i in range(count):
plt.subplot(rows, cols, i + 1)
plt.imshow(cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB))
plt.title(labels[i])
plt.axis('off')
plt.tight_layout()
plt.show()
images, labels = load_images_from_folder(folder_path)
display_images(images, labels)
# Преобразование в массивы
images = np.array(images)
labels = np.array(labels)
Предобработка изображений
In [25]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def preprocess_images(images):
processed_images = []
for img in images:
img_resized = cv2.resize(img, (128, 128))
img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
img_eq = cv2.equalizeHist(img_gray)
img_bin = cv2.adaptiveThreshold(img_eq, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
processed_images.append(img_bin)
return np.array(processed_images)
processed_images = preprocess_images(images)
def display_single_image(original, processed, index):
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(original[index], cv2.COLOR_BGR2RGB))
plt.title('Оригинальное изображение')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(processed[index], cmap='gray')
plt.title('Обработанное изображение')
plt.axis('off')
plt.show()
index = 0
display_single_image(images, processed_images, index)
Глубокое обучение
In [26]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.image import imread
import os
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
PIC_SIZE = 128
BATCH_SIZE = 8
def load_and_label_images_from_directory(directory, target_size=(PIC_SIZE, PIC_SIZE), allowed_labels=None):
images = []
labels = []
if allowed_labels is None:
allowed_labels = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
for label in os.listdir(directory):
if label in allowed_labels:
label_folder = os.path.join(directory, label)
if os.path.isdir(label_folder):
for filename in os.listdir(label_folder):
img_path = os.path.join(label_folder, filename)
if img_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
img = cv2.imread(img_path)
if img is not None:
img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
images.append(img_resized)
labels.append(label)
return np.array(images), np.array(labels)
train_images_raw, train_labels_raw = load_and_label_images_from_directory(folder_path)
def preprocess_images(images):
processed_images = []
for image in images:
normalized_image = image.astype(np.float32) / 255.0
processed_images.append(normalized_image)
return np.array(processed_images)
processed_images = preprocess_images(train_images_raw)
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(train_labels_raw)
labels_categorical = tf.keras.utils.to_categorical(labels_encoded)
X_train, X_test, y_train, y_test = train_test_split(processed_images, labels_categorical, test_size=0.2, random_state=42)
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))
def convert_to_grayscale(image, label):
image = tf.image.rgb_to_grayscale(image)
return image, label
train_ds = train_ds.map(convert_to_grayscale)
val_ds = val_ds.map(convert_to_grayscale)
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
print("Train dataset and validation dataset successfully prepared.")
print(f"Train shape: {X_train.shape}, Validation shape: {X_test.shape}")
print(f"Классы: {label_encoder.classes_}")
Обучение модели
In [27]:
from keras_tuner import HyperModel, RandomSearch
from tensorflow.keras import layers # type: ignore
import tensorflow as tf
class ImageClassifier(HyperModel):
def build(self, hp):
model = tf.keras.Sequential([ # type: ignore
layers.Conv2D(hp.Int('filters_1', 32, 128, step=32), (3, 3), activation='relu', padding='same', kernel_initializer="he_normal", input_shape=(PIC_SIZE, PIC_SIZE, 3)),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(hp.Int('filters_2', 32, 128, step=32), (3, 3), activation='relu', padding='same', kernel_initializer="he_normal"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(hp.Int('filters_3', 64, 256, step=64), (3, 3), activation='relu', padding='same', kernel_initializer="he_normal"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.GlobalAveragePooling2D(),
layers.Dense(hp.Int('units', 64, 256, step=64), activation='relu', kernel_initializer="he_normal"),
layers.Dropout(hp.Float('dropout', 0.1, 0.5, step=0.1)),
layers.Dense(len(label_encoder.classes_), activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='LOG')), # type: ignore
loss='categorical_crossentropy',
metrics=['accuracy']
)
return model
tuner = RandomSearch(
ImageClassifier(),
objective="val_accuracy",
max_trials=5,
executions_per_trial=1,
directory="D:/ktune",
project_name="image_classifier",
)
tuner.search(X_train, y_train, epochs=5, validation_split=0.2)
best_model = tuner.get_best_models(num_models=1)[0]
test_loss, test_acc = best_model.evaluate(X_test, y_test)
print(f"Тестовая точность: {test_acc * 100:.2f}%")
Информация о лучшей модели
In [28]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()
Качество предсказаний для каждого класса
In [29]:
X_train, X_val, y_train, y_val = train_test_split(processed_images, labels_categorical, test_size=0.2, random_state=42)
tuner.search(X_train, y_train, epochs=20, validation_data=(X_val, y_val))
best_model = tuner.get_best_models(num_models=1)[0]
y_pred = best_model.predict(X_val, batch_size=32)
prediction = y_pred[0:5]
images_list = X_val[0:5]
plt.figure(figsize=(20, 5))
predict_label = []
for i in range(5):
plt.subplot(1, 5, i + 1)
image = images_list[i]
plt.imshow(image)
prob = prediction[i]
class_idx = np.argmax(prob)
label = label_encoder.inverse_transform([class_idx])[0]
confidence = prob[class_idx]
predict_label.append(f"{label}: {confidence * 100:.2f}%")
plt.title(predict_label[i], fontsize=16)
plt.axis('off')
plt.tight_layout()
plt.show()
Оценка модели
In [31]:
tuner.search(X_train, y_train, epochs=20, validation_split=0.2)
history = best_model.fit(X_train, y_train, epochs=20, validation_data=(X_val, y_val))
best_model = tuner.get_best_models(num_models=1)[0]
test_loss, test_acc = best_model.evaluate(X_test, y_test)
print(f"Тестовая точность: {test_acc * 100:.2f}%")
train_loss, train_acc = best_model.evaluate(X_train, y_train)
print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
val_loss, val_acc = best_model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
predictions = best_model.predict(X_val)
predicted_classes = np.argmax(predictions, axis=1)
predicted_confidences = np.max(predictions, axis=1)
pd.DataFrame(history.history).plot(figsize=(15,8)) # type: ignore
plt.grid(True)
plt.gca().set_ylim(0,4)
plt.show()