Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SY32_project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Victor Demessance
SY32_project
Commits
34470249
Commit
34470249
authored
10 months ago
by
Victor Demessance
Browse files
Options
Downloads
Patches
Plain Diff
[*] Change knn best parameter computation output display
parent
14e7fec2
No related branches found
Branches containing commit
No related tags found
1 merge request
!2
Machine learning implementation
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
machine_learning/classification/knn.ipynb
+5
-5
5 additions, 5 deletions
machine_learning/classification/knn.ipynb
with
5 additions
and
5 deletions
machine_learning/classification/knn.ipynb
+
5
−
5
View file @
34470249
...
...
@@ -351,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 3
1
,
"execution_count": 3
8
,
"metadata": {},
"outputs": [
{
...
...
@@ -369,7 +369,7 @@
"output_type": "stream",
"text": [
"[0.18487394957983194, 0.16806722689075632, 0.11764705882352941, 0.13445378151260504, 0.12605042016806722, 0.12605042016806722, 0.11764705882352941, 0.13445378151260504, 0.14285714285714285, 0.14285714285714285, 0.13445378151260504, 0.14285714285714285, 0.13445378151260504, 0.13445378151260504, 0.15966386554621848, 0.14285714285714285, 0.15966386554621848, 0.15126050420168066, 0.15966386554621848]\n",
"Taux d'erreur
: 0.31092436974789917
\n"
"Taux d'erreur
minimum : 0.11764705882352941
\n"
]
}
],
...
...
@@ -379,14 +379,14 @@
"for i in range(1, 20, 1):\n",
" clf = KNeighborsClassifier(n_neighbors=i)\n",
" clf.fit(X_train_combined, Y_train)\n",
" y_
HOG
= clf.predict(X_val_combined)\n",
" tab.append(np.mean(y_
HOG
!= Y_val))\n",
" y_
combined
= clf.predict(X_val_combined)\n",
" tab.append(np.mean(y_
combined
!= Y_val))\n",
"\n",
"plt.bar(range(len(tab)), tab, color='skyblue')\n",
"plt.show()\n",
"\n",
"print(tab)\n",
"print(f\"Taux d'erreur
: {np.mean(y != Y_val
)}\")"
"print(f\"Taux d'erreur
minimum : {min(tab
)}\")"
]
}
],
...
...
%% Cell type:markdown id: tags:
## Entrainement d'un modèle avec la méthode Knn
%% Cell type:code id: tags:
```
python
import
os
import
numpy
as
np
import
random
from
PIL
import
Image
import
matplotlib.pyplot
as
plt
```
%% Cell type:markdown id: tags:
### 1) Fonctions de Preprocessing des datasets
%% Cell type:code id: tags:
```
python
AVERAGE_SIZE_IMAGE
=
(
127
,
145
)
# Thanks to the stats, we know that size of bbox will be (127, 145) -> Average size of labels
def
generate_empty_bbox
(
image_width
,
image_height
):
"""
Generate an empty box for images without label
"""
# Generating random coords for the bbox
x_min
=
random
.
randint
(
0
,
image_width
-
AVERAGE_SIZE_IMAGE
[
0
])
y_min
=
random
.
randint
(
0
,
image_height
-
AVERAGE_SIZE_IMAGE
[
1
])
# Compute complete coords of the bbox
x_max
=
x_min
+
AVERAGE_SIZE_IMAGE
[
0
]
y_max
=
y_min
+
AVERAGE_SIZE_IMAGE
[
1
]
return
(
x_min
,
y_min
,
x_max
,
y_max
)
def
load_data
(
image_dir
,
label_dir
):
"""
Create a dict with all the usefull datas of the dataset
datas = {
"
XXXX
"
(name of the file) : {
"
img
"
: image as an array,
"
labels
"
(data of the labels): {
"
X
"
index of the label (0,1,...,n) : {
"
name
"
: name of the label,
"
coord
"
: coord of the label like xmin, ymin, xmax, ymax,
"
img
"
: crooped img of the label,
}
}
}
}
"""
datas
=
{}
for
image_file
in
os
.
listdir
(
image_dir
):
# Computing name and files paths
image_path
=
image_dir
+
'
/
'
+
image_file
name
=
image_file
.
split
(
'
.
'
)[
0
]
label_path
=
label_dir
+
'
/
'
+
name
+
'
.csv
'
# Import image as array
image
=
np
.
array
(
Image
.
open
(
image_path
))
# Import labels as array
with
open
(
label_path
,
'
r
'
)
as
file
:
rows
=
file
.
readlines
()
label_data
=
{}
if
rows
==
[
'
\n
'
]:
# Create a random empty label to balance model
# Create random coords for empty label
xmin
,
ymin
,
xmax
,
ymax
=
generate_empty_bbox
(
image
.
shape
[
1
],
image
.
shape
[
0
])
# Get the cropped image (as array) of the label
cropped_image
=
np
.
array
(
Image
.
fromarray
(
image
[
ymin
:
ymax
,
xmin
:
xmax
]).
resize
(
AVERAGE_SIZE_IMAGE
))
label_data
[
0
]
=
{
"
name
"
:
"
empty
"
,
"
coord
"
:
(
xmin
,
ymin
,
xmax
,
ymax
),
"
img
"
:
cropped_image
}
else
:
for
i
,
row
in
enumerate
(
rows
):
# One image can contain several labels
row
=
row
.
strip
().
split
(
"
,
"
)
# Compute coords of the label
xmin
,
ymin
,
xmax
,
ymax
=
map
(
int
,
row
[
0
:
4
])
# Get the label name
class_name
=
row
[
4
]
# Get the cropped image (as array) of the label
cropped_image
=
np
.
array
(
Image
.
fromarray
(
image
[
ymin
:
ymax
,
xmin
:
xmax
]).
resize
(
AVERAGE_SIZE_IMAGE
))
# Adding to the json
label_data
[
i
]
=
{
"
name
"
:
class_name
,
"
coord
"
:
(
xmin
,
ymin
,
xmax
,
ymax
),
"
img
"
:
cropped_image
}
datas
[
name
]
=
{
"
img
"
:
image
,
"
labels
"
:
label_data
,
}
return
datas
# Dict to convert str class name to int
name_to_int
=
{
"
danger
"
:
0
,
"
interdiction
"
:
1
,
"
obligation
"
:
2
,
"
stop
"
:
3
,
"
ceder
"
:
4
,
"
frouge
"
:
5
,
"
forange
"
:
6
,
"
fvert
"
:
7
,
"
ff
"
:
8
,
"
empty
"
:
9
}
```
%% Cell type:markdown id: tags:
### 2) Fonction de création des datasets
%% Cell type:code id: tags:
```
python
def
create_xy
(
datas
):
# Creating arrays with all labels datas & classes
X
=
[]
Y
=
[]
for
name
,
data
in
datas
.
items
():
for
row
in
data
[
"
labels
"
].
values
():
image_as_array
=
np
.
array
(
row
[
"
img
"
]).
flatten
()
X
.
append
(
image_as_array
)
Y
.
append
(
name_to_int
[
row
[
"
name
"
]])
X
=
np
.
array
(
X
)
Y
=
np
.
array
(
Y
)
return
X
,
Y
```
%% Cell type:markdown id: tags:
### 3) Création des datasets
%% Cell type:code id: tags:
```
python
# Training dataset
datas_train
=
load_data
(
"
../../data/train/images
"
,
"
../../data/train/labels
"
)
X_train
,
Y_train
=
create_xy
(
datas
=
datas_train
)
# Validation dataset
datas_val
=
load_data
(
"
../../data/val/images
"
,
"
../../data/val/labels
"
)
X_val
,
Y_val
=
create_xy
(
datas
=
datas_val
)
```
%% Cell type:markdown id: tags:
### 4) Application de la méthode Knn
%% Cell type:code id: tags:
```
python
from
sklearn.neighbors
import
KNeighborsClassifier
clf
=
KNeighborsClassifier
(
n_neighbors
=
5
)
# To change
clf
.
fit
(
X_train
,
Y_train
)
y
=
clf
.
predict
(
X_val
)
print
(
f
"
Taux d
'
erreur :
{
np
.
mean
(
y
!
=
Y_val
)
}
"
)
```
%% Output
Taux d'erreur : 0.31092436974789917
%% Cell type:markdown id: tags:
### 5) Test de la méthode Knn avec application des caractéristiques HOG
%% Cell type:code id: tags:
```
python
from
skimage.feature
import
hog
from
skimage.color
import
rgb2gray
def
extract_hog
(
datas
):
# Creating X array with all HOG information of images
X
=
[]
for
name
,
data
in
datas
.
items
():
for
row
in
data
[
"
labels
"
].
values
():
image_as_array
=
np
.
array
(
hog
(
rgb2gray
(
row
[
"
img
"
]))).
flatten
()
X
.
append
(
image_as_array
)
return
np
.
array
(
X
)
# Update training dataset
X_train_HOG
=
extract_hog
(
datas
=
datas_train
)
# Update validation dataset
X_val_HOG
=
extract_hog
(
datas
=
datas_val
)
```
%% Cell type:code id: tags:
```
python
clf
=
KNeighborsClassifier
(
n_neighbors
=
8
)
clf
.
fit
(
X_train_HOG
,
Y_train
)
y_HOG
=
clf
.
predict
(
X_val_HOG
)
print
(
f
"
Taux d
'
erreur :
{
np
.
mean
(
y_HOG
!
=
Y_val
)
}
"
)
```
%% Output
Taux d'erreur : 0.17647058823529413
%% Cell type:markdown id: tags:
### 6) Test avec concaténation des informations de couleur
%% Cell type:code id: tags:
```
python
from
skimage.color
import
rgb2hsv
def
extract_color_features
(
datas
):
# Creating X array with all HOG information of images
X
=
[]
for
name
,
data
in
datas
.
items
():
for
row
in
data
[
"
labels
"
].
values
():
# Convertir l'image en espace colorimétrique HSV
hsv_image
=
rgb2hsv
(
row
[
"
img
"
])
# Calculer l'histogramme de couleur pour chaque canal
hue_hist
=
np
.
histogram
(
hsv_image
[:,:,
0
],
bins
=
10
,
range
=
(
0
,
1
),
density
=
True
)[
0
]
saturation_hist
=
np
.
histogram
(
hsv_image
[:,:,
1
],
bins
=
10
,
range
=
(
0
,
1
),
density
=
True
)[
0
]
value_hist
=
np
.
histogram
(
hsv_image
[:,:,
2
],
bins
=
10
,
range
=
(
0
,
1
),
density
=
True
)[
0
]
# Concaténer les histogrammes de couleur
color_features
=
np
.
concatenate
((
hue_hist
,
saturation_hist
,
value_hist
))
X
.
append
(
color_features
)
return
np
.
array
(
X
)
# Update training dataset
X_train_COLORS
=
extract_color_features
(
datas
=
datas_train
)
# Update validation dataset
X_val_COLORS
=
extract_color_features
(
datas
=
datas_val
)
```
%% Cell type:code id: tags:
```
python
X_train_combined
=
np
.
concatenate
((
X_train_HOG
,
X_train_COLORS
),
axis
=
1
)
X_val_combined
=
np
.
concatenate
((
X_val_HOG
,
X_val_COLORS
),
axis
=
1
)
clf
=
KNeighborsClassifier
(
n_neighbors
=
7
)
clf
.
fit
(
X_train_combined
,
Y_train
)
y_combined
=
clf
.
predict
(
X_val_combined
)
print
(
f
"
Taux d
'
erreur :
{
np
.
mean
(
y_combined
!
=
Y_val
)
}
"
)
```
%% Output
Taux d'erreur : 0.11764705882352941
%% Cell type:markdown id: tags:
### 7) Test des meilleurs paramètres à utiliser :
%% Cell type:code id: tags:
```
python
tab
=
[]
for
i
in
range
(
1
,
20
,
1
):
clf
=
KNeighborsClassifier
(
n_neighbors
=
i
)
clf
.
fit
(
X_train_combined
,
Y_train
)
y_
HOG
=
clf
.
predict
(
X_val_combined
)
tab
.
append
(
np
.
mean
(
y_
HOG
!=
Y_val
))
y_
combined
=
clf
.
predict
(
X_val_combined
)
tab
.
append
(
np
.
mean
(
y_
combined
!=
Y_val
))
plt
.
bar
(
range
(
len
(
tab
)),
tab
,
color
=
'
skyblue
'
)
plt
.
show
()
print
(
tab
)
print
(
f
"
Taux d
'
erreur
:
{
np
.
mean
(
y
!
=
Y_val
)
}
"
)
print
(
f
"
Taux d
'
erreur
minimum :
{
min
(
tab
)
}
"
)
```
%% Output
[0.18487394957983194, 0.16806722689075632, 0.11764705882352941, 0.13445378151260504, 0.12605042016806722, 0.12605042016806722, 0.11764705882352941, 0.13445378151260504, 0.14285714285714285, 0.14285714285714285, 0.13445378151260504, 0.14285714285714285, 0.13445378151260504, 0.13445378151260504, 0.15966386554621848, 0.14285714285714285, 0.15966386554621848, 0.15126050420168066, 0.15966386554621848]
Taux d'erreur
: 0.31092436974789917
Taux d'erreur
minimum : 0.11764705882352941
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment