Skip to content
Snippets Groups Projects
Commit e3183085 authored by Victor Demessance's avatar Victor Demessance
Browse files

[+] Add some analysis

parent 79a278f2
No related branches found
No related tags found
1 merge request!2Machine learning implementation
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode des SVM ## Entrainement d'un modèle avec la méthode des SVM
%% Cell type:markdown id: tags:
#### Chargement des données d'entrainement
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import cv2
import numpy as np import numpy as np
import random import random
from PIL import Image
``` ```
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
""" AVERAGE_SIZE_IMAGE = (127, 145) # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
We will create a dict with all the usefull datas of the training dataset
datas = {
"XXXX" (name of the file) : {
"img" : ndarray of the image,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
}
}
}
}
"""
def generate_empty_bbox(image_width, image_height): def generate_empty_bbox(image_width, image_height):
# Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels """
# Génération de coordonnées aléatoires pour le coin supérieur gauche de la boundebox Generate an empty box for images without label
x_min = random.randint(0, image_width - 127) """
y_min = random.randint(0, image_height - 145) # Generating random coords for the bbox
x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
# Calcul des coordonnées du coin inférieur droit de la boundebox y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
x_max = x_min + 127
y_max = y_min + 145 # Compute complete coords of the bbox
x_max = x_min + AVERAGE_SIZE_IMAGE[0]
y_max = y_min + AVERAGE_SIZE_IMAGE[1]
return (x_min, y_min, x_max, y_max) return (x_min, y_min, x_max, y_max)
def load_data(image_dir, label_dir): def load_data(image_dir, label_dir):
"""
Create a dict with all the usefull datas of the dataset
datas = {
"XXXX" (name of the file) : {
"img" : image as an array,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
}
}
}
}
"""
datas = {} datas = {}
for image_file in os.listdir(image_dir): for image_file in os.listdir(image_dir):
# Computing name and files paths # Computing name and files paths
image_path = image_dir + '/' + image_file image_path = image_dir + '/' + image_file
name = image_file.split('.')[0] name = image_file.split('.')[0]
label_path = label_dir + '/' + name + '.csv' label_path = label_dir + '/' + name + '.csv'
# Import image as array # Import image as array
image = cv2.imread(image_path) image = np.array(Image.open(image_path))
# Import labels as array # Import labels as array
with open(label_path, 'r') as file: with open(label_path, 'r') as file:
rows = file.readlines() rows = file.readlines()
label_data = {} label_data = {}
if rows == ['\n']: # Create a random empty label to balance model if rows == ['\n']: # Create a random empty label to balance model
# Create random coords for empty label # Create random coords for empty label
xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0]) xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])
# Get the cropped image (as array) of the label # Get the cropped image (as array) of the label
cropped_image = image[ymin:ymax, xmin:xmax] cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
label_data[0] = { label_data[0] = {
"name":"empty", "name":"empty",
"coord": (xmin, ymin, xmax, ymax), "coord": (xmin, ymin, xmax, ymax),
"img":cropped_image "img":cropped_image
} }
else: else:
for i, row in enumerate(rows): # One image can contain several labels for i, row in enumerate(rows): # One image can contain several labels
row = row.strip().split(",") row = row.strip().split(",")
# Compute coords of the label # Compute coords of the label
xmin, ymin, xmax, ymax = map(int, row[0:4]) xmin, ymin, xmax, ymax = map(int, row[0:4])
# Get the label name # Get the label name
class_name = row[4] class_name = row[4]
# Get the cropped image (as array) of the label # Get the cropped image (as array) of the label
cropped_image = image[ymin:ymax, xmin:xmax] cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
# Adding to the json # Adding to the json
label_data[i] = { label_data[i] = {
"name":class_name, "name":class_name,
"coord": (xmin, ymin, xmax, ymax), "coord": (xmin, ymin, xmax, ymax),
"img":cropped_image "img":cropped_image
} }
datas[name] = { datas[name] = {
"img" : image, "img" : image,
"labels" : label_data, "labels" : label_data,
} }
return datas return datas
# Dict to convert str class name to int
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
}
``` ```
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Creating the dict of the datas def create_xy(datas):
# Creating arrays with all labels datas & classes
X = []
Y = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(row["img"]).flatten()
X.append(image_as_array)
Y.append(name_to_int[row["name"]])
X = np.array(X)
Y = np.array(Y)
return X, Y
```
datas = load_data("../data/train/images", "../data/train/labels") %% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
``` python
# Training dataset
datas_train = load_data("../../data/train/images", "../../data/train/labels")
X_train, Y_train = create_xy(datas=datas_train)
# Validation dataset
datas_val = load_data("../../data/val/images", "../../data/val/labels")
X_val, Y_val = create_xy(datas=datas_val)
``` ```
%% Cell type:markdown id: tags:
### 4) Application de la méthode Adaboost
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def extract_features(img): from sklearn.ensemble import AdaBoostClassifier
# Convertion to gray level
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Color Hist adaboost_clf = AdaBoostClassifier(n_estimators=10) # To change
hist_color = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) adaboost_clf.fit(X_train, Y_train)
hist_color = cv2.normalize(hist_color, hist_color).flatten() y = adaboost_clf.predict(X_val)
# Gradient Hist print(f"Taux d'erreur : {np.mean(y != Y_val)}")
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5) ```
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
grad_mag = np.sqrt(sobelx**2 + sobely**2)
hist_gradient = cv2.calcHist([grad_mag.astype(np.uint8)], [0], None, [16], [0, 256])
hist_gradient = cv2.normalize(hist_gradient, hist_gradient).flatten()
return np.concatenate((hist_color, hist_gradient)) %% Output
c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
warnings.warn(
# Dict to convert str class name to int Taux d'erreur : 0.6302521008403361
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
}
%% Cell type:markdown id: tags:
# Creating arrays with all labels datas & classes ### 5) Test de la méthode Adaboost avec application des caractéristiques HOG
X_train = []
Y_train = []
for name, data in datas.items(): %% Cell type:code id: tags:
for row in data["labels"].values():
X_train.append(extract_features(row["img"]))
Y_train.append(name_to_int[row["name"]])
X_train = np.array(X_train) ``` python
Y_train = np.array(Y_train) from skimage.feature import hog
from skimage.color import rgb2gray
def extract_hog(datas):
# Creating X array with all HOG information of images
X = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(hog(rgb2gray(row["img"]))).flatten()
X.append(image_as_array)
return np.array(X)
# Update training dataset
X_train_HOG = extract_hog(datas=datas_train)
# Update validation dataset
X_val_HOG = extract_hog(datas=datas_val)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from sklearn import svm adaboost_clf = AdaBoostClassifier(n_estimators=10)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score adaboost_clf.fit(X_train_HOG, Y_train)
y_HOG = adaboost_clf.predict(X_val_HOG)
svm_model = svm.SVC(kernel='linear') # Choix du noyau linéaire print(f"Taux d'erreur : {np.mean(y_HOG != Y_val)}")
svm_model.fit(X_train, Y_train)
print(svm_model)
``` ```
%% Output %% Output
[1 1 0 ... 1 5 7] c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
warnings.warn(
Taux d'erreur : 0.5378151260504201
......
This diff is collapsed.
%% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode des SVM
%% Cell type:code id: tags:
``` python
import os
import numpy as np
import random
from PIL import Image
```
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags:
``` python
AVERAGE_SIZE_IMAGE = (127, 145) # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
def generate_empty_bbox(image_width, image_height):
"""
Generate an empty box for images without label
"""
# Generating random coords for the bbox
x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
# Compute complete coords of the bbox
x_max = x_min + AVERAGE_SIZE_IMAGE[0]
y_max = y_min + AVERAGE_SIZE_IMAGE[1]
return (x_min, y_min, x_max, y_max)
def load_data(image_dir, label_dir):
"""
Create a dict with all the usefull datas of the dataset
datas = {
"XXXX" (name of the file) : {
"img" : image as an array,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
}
}
}
}
"""
datas = {}
for image_file in os.listdir(image_dir):
# Computing name and files paths
image_path = image_dir + '/' + image_file
name = image_file.split('.')[0]
label_path = label_dir + '/' + name + '.csv'
# Import image as array
image = np.array(Image.open(image_path))
# Import labels as array
with open(label_path, 'r') as file:
rows = file.readlines()
label_data = {}
if rows == ['\n']: # Create a random empty label to balance model
# Create random coords for empty label
xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
label_data[0] = {
"name":"empty",
"coord": (xmin, ymin, xmax, ymax),
"img":cropped_image
}
else:
for i, row in enumerate(rows): # One image can contain several labels
row = row.strip().split(",")
# Compute coords of the label
xmin, ymin, xmax, ymax = map(int, row[0:4])
# Get the label name
class_name = row[4]
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
# Adding to the json
label_data[i] = {
"name":class_name,
"coord": (xmin, ymin, xmax, ymax),
"img":cropped_image
}
datas[name] = {
"img" : image,
"labels" : label_data,
}
return datas
# Dict to convert str class name to int
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
}
```
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags:
``` python
def create_xy(datas):
# Creating arrays with all labels datas & classes
X = []
Y = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(row["img"]).flatten()
X.append(image_as_array)
Y.append(name_to_int[row["name"]])
X = np.array(X)
Y = np.array(Y)
return X, Y
```
%% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
``` python
# Training dataset
datas_train = load_data("../../data/train/images", "../../data/train/labels")
X_train, Y_train = create_xy(datas=datas_train)
# Validation dataset
datas_val = load_data("../../data/val/images", "../../data/val/labels")
X_val, Y_val = create_xy(datas=datas_val)
```
%% Cell type:markdown id: tags:
### 4) Application de la méthode des SVM
%% Cell type:code id: tags:
``` python
from sklearn import svm
svm_model = svm.SVC(kernel='linear')
svm_model.fit(X_train, Y_train)
y = svm_model.predict(X_val)
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
```
%% Output
Taux d'erreur : 0.226890756302521
%% Cell type:markdown id: tags:
### 5) Test de la méthode SVM avec application des caractéristiques HOG
%% Cell type:code id: tags:
``` python
from skimage.feature import hog
from skimage.color import rgb2gray
def extract_hog(datas):
# Creating X array with all HOG information of images
X = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(hog(rgb2gray(row["img"]))).flatten()
X.append(image_as_array)
return np.array(X)
# Update training dataset
X_train_HOG = extract_hog(datas=datas_train)
# Update validation dataset
X_val_HOG = extract_hog(datas=datas_val)
```
%% Cell type:code id: tags:
``` python
svm_model = svm.SVC(kernel='linear')
svm_model.fit(X_train_HOG, Y_train)
y_HOG = svm_model.predict(X_val_HOG)
print(f"Taux d'erreur : {np.mean(y_HOG != Y_val)}")
```
%% Output
Taux d'erreur : 0.15966386554621848
%% Cell type:markdown id: tags:
### 6) Test de la méthode SVM avec application des LPB
%% Cell type:code id: tags:
``` python
import cv2
def extract_SIFT(datas):
# Creating X array with all HOG information of images
X = []
sift = cv2.SIFT_create()
for name, data in datas.items():
for row in data["labels"].values():
gray_image = cv2.cvtColor(data["img"], cv2.COLOR_RGB2GRAY)
keypoints, descriptors = sift.detectAndCompute(gray_image, None)
if descriptors is not None:
X.append(descriptors.flatten())
return np.array(X)
# Update training dataset
X_train_LBP = extract_SIFT(datas=datas_train)
# Update validation dataset
X_val_LBP = extract_SIFT(datas=datas_val)
```
%% Output
c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\skimage\feature\texture.py:360: UserWarning: Applying `local_binary_pattern` to floating-point images may give unexpected results when small numerical differences between adjacent pixels are present. It is recommended to use this function with images of integer dtype.
warnings.warn(
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[34], line 17
13 return np.array(X)
16 # Update training dataset
---> 17 X_train_LBP = extract_LBP(datas=datas_train)
19 # Update validation dataset
20 X_val_LBP = extract_LBP(datas=datas_val)
Cell In[34], line 13, in extract_LBP(datas)
10 image_as_array = np.array(hog(local_binary_pattern(rgb2gray(data["img"]), P = 8, R = 1))).flatten()
11 X.append(image_as_array)
---> 13 return np.array(X)
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (1071,) + inhomogeneous part.
%% Cell type:code id: tags:
``` python
svm_model = svm.SVC(kernel='linear')
svm_model.fit(X_train_LBP, Y_train)
y_LBP = svm_model.predict(X_val_LBP)
print(f"Taux d'erreur : {np.mean(y_LBP != Y_val)}")
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment