Skip to content
Snippets Groups Projects
Commit 2c9c47dd authored by Victor Demessance's avatar Victor Demessance
Browse files

[*] Update last commit

parent e3183085
No related branches found
No related tags found
1 merge request!2Machine learning implementation
%% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode des SVM
## Entrainement d'un modèle avec la méthode Adaboost
%% Cell type:code id: tags:
``` python
import os
import numpy as np
import random
from PIL import Image
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags:
``` python
AVERAGE_SIZE_IMAGE = (127, 145) # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
def generate_empty_bbox(image_width, image_height):
Generate an empty box for images without label
# Generating random coords for the bbox
x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
# Compute complete coords of the bbox
x_max = x_min + AVERAGE_SIZE_IMAGE[0]
y_max = y_min + AVERAGE_SIZE_IMAGE[1]
return (x_min, y_min, x_max, y_max)
def load_data(image_dir, label_dir):
Create a dict with all the usefull datas of the dataset
datas = {
"XXXX" (name of the file) : {
"img" : image as an array,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
datas = {}
for image_file in os.listdir(image_dir):
# Computing name and files paths
image_path = image_dir + '/' + image_file
name = image_file.split('.')[0]
label_path = label_dir + '/' + name + '.csv'
# Import image as array
image = np.array(
# Import labels as array
with open(label_path, 'r') as file:
rows = file.readlines()
label_data = {}
if rows == ['\n']: # Create a random empty label to balance model
# Create random coords for empty label
xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
label_data[0] = {
"coord": (xmin, ymin, xmax, ymax),
for i, row in enumerate(rows): # One image can contain several labels
row = row.strip().split(",")
# Compute coords of the label
xmin, ymin, xmax, ymax = map(int, row[0:4])
# Get the label name
class_name = row[4]
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
# Adding to the json
label_data[i] = {
"coord": (xmin, ymin, xmax, ymax),
datas[name] = {
"img" : image,
"labels" : label_data,
return datas
# Dict to convert str class name to int
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags:
``` python
def create_xy(datas):
# Creating arrays with all labels datas & classes
X = []
Y = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(row["img"]).flatten()
X = np.array(X)
Y = np.array(Y)
return X, Y
%% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
``` python
# Training dataset
datas_train = load_data("../../data/train/images", "../../data/train/labels")
X_train, Y_train = create_xy(datas=datas_train)
# Validation dataset
datas_val = load_data("../../data/val/images", "../../data/val/labels")
X_val, Y_val = create_xy(datas=datas_val)
%% Cell type:markdown id: tags:
### 4) Application de la méthode Adaboost
%% Cell type:code id: tags:
``` python
from sklearn.ensemble import AdaBoostClassifier
adaboost_clf = AdaBoostClassifier(n_estimators=10) # To change, Y_train)
y = adaboost_clf.predict(X_val)
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
%% Output
c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\ FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
Taux d'erreur : 0.6302521008403361
Taux d'erreur : 0.5294117647058824
%% Cell type:markdown id: tags:
### 5) Test de la méthode Adaboost avec application des caractéristiques HOG
%% Cell type:code id: tags:
``` python
from skimage.feature import hog
from skimage.color import rgb2gray
def extract_hog(datas):
# Creating X array with all HOG information of images
X = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(hog(rgb2gray(row["img"]))).flatten()
return np.array(X)
# Update training dataset
X_train_HOG = extract_hog(datas=datas_train)
# Update validation dataset
X_val_HOG = extract_hog(datas=datas_val)
%% Cell type:code id: tags:
``` python
adaboost_clf = AdaBoostClassifier(n_estimators=10), Y_train)
y_HOG = adaboost_clf.predict(X_val_HOG)
print(f"Taux d'erreur : {np.mean(y_HOG != Y_val)}")
%% Output
c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\ FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
Taux d'erreur : 0.5378151260504201
%% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode RandomTree
%% Cell type:code id: tags:
``` python
import os
import numpy as np
import random
from PIL import Image
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags:
``` python
AVERAGE_SIZE_IMAGE = (127, 145) # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
def generate_empty_bbox(image_width, image_height):
Generate an empty box for images without label
# Generating random coords for the bbox
x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
# Compute complete coords of the bbox
x_max = x_min + AVERAGE_SIZE_IMAGE[0]
y_max = y_min + AVERAGE_SIZE_IMAGE[1]
return (x_min, y_min, x_max, y_max)
def load_data(image_dir, label_dir):
Create a dict with all the usefull datas of the dataset
datas = {
"XXXX" (name of the file) : {
"img" : image as an array,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
datas = {}
for image_file in os.listdir(image_dir):
# Computing name and files paths
image_path = image_dir + '/' + image_file
name = image_file.split('.')[0]
label_path = label_dir + '/' + name + '.csv'
# Import image as array
image = np.array(
# Import labels as array
with open(label_path, 'r') as file:
rows = file.readlines()
label_data = {}
if rows == ['\n']: # Create a random empty label to balance model
# Create random coords for empty label
xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
label_data[0] = {
"coord": (xmin, ymin, xmax, ymax),
for i, row in enumerate(rows): # One image can contain several labels
row = row.strip().split(",")
# Compute coords of the label
xmin, ymin, xmax, ymax = map(int, row[0:4])
# Get the label name
class_name = row[4]
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
# Adding to the json
label_data[i] = {
"coord": (xmin, ymin, xmax, ymax),
datas[name] = {
"img" : image,
"labels" : label_data,
return datas
# Dict to convert str class name to int
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags:
``` python
def create_xy(datas):
# Creating arrays with all labels datas & classes
X = []
Y = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(row["img"]).flatten()
X = np.array(X)
Y = np.array(Y)
return X, Y
%% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
``` python
# Training dataset
datas_train = load_data("../../data/train/images", "../../data/train/labels")
X_train, Y_train = create_xy(datas=datas_train)
# Validation dataset
datas_val = load_data("../../data/val/images", "../../data/val/labels")
X_val, Y_val = create_xy(datas=datas_val)
%% Cell type:markdown id: tags:
### 4) Application de la méthode RandomTree
%% Cell type:code id: tags:
``` python
from sklearn.ensemble import RandomForestClassifier
adaboost_clf = RandomForestClassifier(n_estimators=50) # To change, Y_train)
y = adaboost_clf.predict(X_val)
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
%% Output
Taux d'erreur : 0.20168067226890757
%% Cell type:markdown id: tags:
### 5) Test de la méthode RandomTree avec application des caractéristiques HOG
%% Cell type:code id: tags:
``` python
from skimage.feature import hog
from skimage.color import rgb2gray
def extract_hog(datas):
# Creating X array with all HOG information of images
X = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(hog(rgb2gray(row["img"]))).flatten()
return np.array(X)
# Update training dataset
X_train_HOG = extract_hog(datas=datas_train)
# Update validation dataset
X_val_HOG = extract_hog(datas=datas_val)
%% Cell type:code id: tags:
``` python
adaboost_clf = RandomForestClassifier(n_estimators=10)
adaboost_clf = RandomForestClassifier(n_estimators=90), Y_train)
y_HOG = adaboost_clf.predict(X_val_HOG)
print(f"Taux d'erreur : {np.mean(y_HOG != Y_val)}")
%% Output
Taux d'erreur : 0.2689075630252101
Taux d'erreur : 0.18487394957983194
%% Cell type:markdown id: tags:
### 6) Détermination du meilleur paramètre
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
tab = []
for i in range(1, 100, 10):
adaboost_clf = RandomForestClassifier(n_estimators=i) # To change, Y_train)
y = adaboost_clf.predict(X_val)
tab.append(np.mean(y != Y_val)), tab, color='skyblue')
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
%% Output
KeyboardInterrupt Traceback (most recent call last)
Cell In[33], line 7
5 for i in range(1, 100, 10):
6 adaboost_clf = RandomForestClassifier(n_estimators=i) # To change
----> 7, Y_train)
8 y = adaboost_clf.predict(X_val)
9 tab.append(np.mean(y != Y_val))
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1467 estimator._validate_params()
1469 with config_context(
1470 skip_parameter_validation=(
1471 prefer_skip_nested_validation or global_skip_validation
1472 )
1473 ):
-> 1474 return fit_method(estimator, *args, **kwargs)
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\, in, X, y, sample_weight)
478 trees = [
479 self._make_estimator(append=False, random_state=random_state)
480 for i in range(n_more_estimators)
481 ]
483 # Parallel loop: we prefer the threading backend as the Cython code
484 # for fitting the trees is internally releasing the Python GIL
485 # making threading more efficient than multiprocessing in
486 # that case. However, for joblib 0.12+ we respect any
487 # parallel_backend contexts set at a higher level,
488 # since correctness does not rely on using threads.
--> 489 trees = Parallel(
490 n_jobs=self.n_jobs,
491 verbose=self.verbose,
492 prefer="threads",
493 )(
494 delayed(_parallel_build_trees)(
495 t,
496 self.bootstrap,
497 X,
498 y,
499 sample_weight,
500 i,
501 len(trees),
502 verbose=self.verbose,
503 class_weight=self.class_weight,
504 n_samples_bootstrap=n_samples_bootstrap,
505 missing_values_in_feature_mask=missing_values_in_feature_mask,
506 )
507 for i, t in enumerate(trees)
508 )
510 # Collect newly grown trees
511 self.estimators_.extend(trees)
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\, in Parallel.__call__(self, iterable)
62 config = get_config()
63 iterable_with_config = (
64 (_with_config(delayed_func, config), args, kwargs)
65 for delayed_func, args, kwargs in iterable
66 )
---> 67 return super().__call__(iterable_with_config)
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\, in Parallel.__call__(self, iterable)
1861 output = self._get_sequential_output(iterable)
1862 next(output)
-> 1863 return output if self.return_generator else list(output)
1865 # Let's create an ID that uniquely identifies the current call. If the
1866 # call is interrupted early and that the same instance is immediately
1867 # re-used, this id will be used to prevent workers that were
1868 # concurrently finalizing a task from the previous call to run the
1869 # callback.
1870 with self._lock:
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\, in Parallel._get_sequential_output(self, iterable)
1790 self.n_dispatched_batches += 1
1791 self.n_dispatched_tasks += 1
-> 1792 res = func(*args, **kwargs)
1793 self.n_completed_tasks += 1
1794 self.print_progress()
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\, in _FuncWrapper.__call__(self, *args, **kwargs)
127 config = {}
128 with config_context(**config):
--> 129 return self.function(*args, **kwargs)
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\ensemble\, in _parallel_build_trees(tree, bootstrap, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap, missing_values_in_feature_mask)
189 elif class_weight == "balanced_subsample":
190 curr_sample_weight *= compute_sample_weight("balanced", y, indices=indices)
--> 192 tree._fit(
193 X,
194 y,
195 sample_weight=curr_sample_weight,
196 check_input=False,
197 missing_values_in_feature_mask=missing_values_in_feature_mask,
198 )
199 else:
200 tree._fit(
201 X,
202 y,
205 missing_values_in_feature_mask=missing_values_in_feature_mask,
206 )
File c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\tree\, in BaseDecisionTree._fit(self, X, y, sample_weight, check_input, missing_values_in_feature_mask)
461 else:
462 builder = BestFirstTreeBuilder(
463 splitter,
464 min_samples_split,
469 self.min_impurity_decrease,
470 )
--> 472, X, y, sample_weight, missing_values_in_feature_mask)
474 if self.n_outputs_ == 1 and is_classifier(self):
475 self.n_classes_ = self.n_classes_[0]
[0.5126050420168067, 0.23529411764705882, 0.20168067226890757, 0.18487394957983194, 0.19327731092436976, 0.19327731092436976, 0.16806722689075632, 0.18487394957983194, 0.15126050420168066, 0.17647058823529413]
Taux d'erreur : 0.17647058823529413
%% Cell type:code id: tags:
``` python
tab = []
for i in range(1, 100, 10):
adaboost_clf = RandomForestClassifier(n_estimators=i), Y_train)
y_HOG = adaboost_clf.predict(X_val_HOG)
tab.append(np.mean(y_HOG != Y_val)), tab, color='skyblue')
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
%% Output
[0.5042016806722689, 0.2605042016806723, 0.20168067226890757, 0.18487394957983194, 0.226890756302521, 0.18487394957983194, 0.18487394957983194, 0.16806722689075632, 0.21008403361344538, 0.16806722689075632]
Taux d'erreur : 0.17647058823529413
%% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode des SVM
%% Cell type:code id: tags:
``` python
import os
import numpy as np
import random
from PIL import Image
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags:
``` python
AVERAGE_SIZE_IMAGE = (127, 145) # Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
def generate_empty_bbox(image_width, image_height):
Generate an empty box for images without label
# Generating random coords for the bbox
x_min = random.randint(0, image_width - AVERAGE_SIZE_IMAGE[0])
y_min = random.randint(0, image_height - AVERAGE_SIZE_IMAGE[1])
# Compute complete coords of the bbox
x_max = x_min + AVERAGE_SIZE_IMAGE[0]
y_max = y_min + AVERAGE_SIZE_IMAGE[1]
return (x_min, y_min, x_max, y_max)
def load_data(image_dir, label_dir):
Create a dict with all the usefull datas of the dataset
datas = {
"XXXX" (name of the file) : {
"img" : image as an array,
"labels" (data of the labels): {
"X" index of the label (0,1,...,n) : {
"name" : name of the label,
"coord" : coord of the label like xmin, ymin, xmax, ymax,
"img" : crooped img of the label,
datas = {}
for image_file in os.listdir(image_dir):
# Computing name and files paths
image_path = image_dir + '/' + image_file
name = image_file.split('.')[0]
label_path = label_dir + '/' + name + '.csv'
# Import image as array
image = np.array(
# Import labels as array
with open(label_path, 'r') as file:
rows = file.readlines()
label_data = {}
if rows == ['\n']: # Create a random empty label to balance model
# Create random coords for empty label
xmin, ymin, xmax, ymax = generate_empty_bbox(image.shape[1], image.shape[0])
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
label_data[0] = {
"coord": (xmin, ymin, xmax, ymax),
for i, row in enumerate(rows): # One image can contain several labels
row = row.strip().split(",")
# Compute coords of the label
xmin, ymin, xmax, ymax = map(int, row[0:4])
# Get the label name
class_name = row[4]
# Get the cropped image (as array) of the label
cropped_image = np.array(Image.fromarray(image[ymin:ymax, xmin:xmax]).resize(AVERAGE_SIZE_IMAGE))
# Adding to the json
label_data[i] = {
"coord": (xmin, ymin, xmax, ymax),
datas[name] = {
"img" : image,
"labels" : label_data,
return datas
# Dict to convert str class name to int
name_to_int = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
"stop": 3,
"ceder": 4,
"frouge": 5,
"forange": 6,
"fvert": 7,
"ff": 8,
"empty": 9
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags:
``` python
def create_xy(datas):
# Creating arrays with all labels datas & classes
X = []
Y = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(row["img"]).flatten()
X = np.array(X)
Y = np.array(Y)
return X, Y
%% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
``` python
# Training dataset
datas_train = load_data("../../data/train/images", "../../data/train/labels")
X_train, Y_train = create_xy(datas=datas_train)
# Validation dataset
datas_val = load_data("../../data/val/images", "../../data/val/labels")
X_val, Y_val = create_xy(datas=datas_val)
%% Cell type:markdown id: tags:
### 4) Application de la méthode des SVM
%% Cell type:code id: tags:
``` python
from sklearn import svm
svm_model = svm.SVC(kernel='linear'), Y_train)
y = svm_model.predict(X_val)
print(f"Taux d'erreur : {np.mean(y != Y_val)}")
%% Output
Taux d'erreur : 0.226890756302521
%% Cell type:markdown id: tags:
### 5) Test de la méthode SVM avec application des caractéristiques HOG
%% Cell type:code id: tags:
``` python
from skimage.feature import hog
from skimage.color import rgb2gray
def extract_hog(datas):
# Creating X array with all HOG information of images
X = []
for name, data in datas.items():
for row in data["labels"].values():
image_as_array = np.array(hog(rgb2gray(row["img"]))).flatten()
return np.array(X)
# Update training dataset
X_train_HOG = extract_hog(datas=datas_train)
# Update validation dataset
X_val_HOG = extract_hog(datas=datas_val)
%% Cell type:code id: tags:
``` python
svm_model = svm.SVC(kernel='linear'), Y_train)
y_HOG = svm_model.predict(X_val_HOG)
print(f"Taux d'erreur : {np.mean(y_HOG != Y_val)}")
%% Output
Taux d'erreur : 0.15966386554621848
%% Cell type:markdown id: tags:
### 6) Test de la méthode SVM avec application des LPB
%% Cell type:code id: tags:
``` python
import cv2
def extract_SIFT(datas):
# Creating X array with all HOG information of images
X = []
sift = cv2.SIFT_create()
for name, data in datas.items():
for row in data["labels"].values():
gray_image = cv2.cvtColor(data["img"], cv2.COLOR_RGB2GRAY)
keypoints, descriptors = sift.detectAndCompute(gray_image, None)
if descriptors is not None:
return np.array(X)
# Update training dataset
X_train_LBP = extract_SIFT(datas=datas_train)
# Update validation dataset
X_val_LBP = extract_SIFT(datas=datas_val)
%% Output
c:\Users\victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\skimage\feature\ UserWarning: Applying `local_binary_pattern` to floating-point images may give unexpected results when small numerical differences between adjacent pixels are present. It is recommended to use this function with images of integer dtype.
ValueError Traceback (most recent call last)
Cell In[34], line 17
13 return np.array(X)
16 # Update training dataset
---> 17 X_train_LBP = extract_LBP(datas=datas_train)
19 # Update validation dataset
20 X_val_LBP = extract_LBP(datas=datas_val)
Cell In[34], line 13, in extract_LBP(datas)
10 image_as_array = np.array(hog(local_binary_pattern(rgb2gray(data["img"]), P = 8, R = 1))).flatten()
11 X.append(image_as_array)
---> 13 return np.array(X)
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (1071,) + inhomogeneous part.
%% Cell type:code id: tags:
``` python
svm_model = svm.SVC(kernel='linear'), Y_train)
y_LBP = svm_model.predict(X_val_LBP)
print(f"Taux d'erreur : {np.mean(y_LBP != Y_val)}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment