Skip to content
Snippets Groups Projects
Commit 87c2b59b authored by Victor Demessance's avatar Victor Demessance
Browse files

[+] Create dataset & bbox visualisation

parent 61238b48
No related branches found
No related tags found
1 merge request!3Merge deep learning on main
File deleted
File deleted
File deleted
......@@ -6,11 +6,7 @@ from utils import *
# Creating a dataset object
dataset = Dataset(
image_dir="./data/train/images/",
label_dir="./data/train/labels/",
grid_sizes=[13, 26, 52],
anchors=ANCHORS,
transform=test_transform
)
label_dir="./data/train/labels/")
# Creating a dataloader object
loader = torch.utils.data.DataLoader(
......@@ -19,26 +15,13 @@ loader = torch.utils.data.DataLoader(
shuffle=True,
)
# Defining the grid size and the scaled anchors
GRID_SIZE = [13, 26, 52]
scaled_anchors = torch.tensor(ANCHORS) / (
1 / torch.tensor(GRID_SIZE).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
)
# Getting a batch from the dataloader
x, y = next(iter(loader))
# Getting the boxes coordinates from the labels
# and converting them into bounding boxes without scaling
boxes = []
for i in range(y[0].shape[1]):
anchor = scaled_anchors[i]
boxes += convert_cells_to_bboxes(
y[i], is_predictions=False, s=y[i].shape[2], anchors=anchor
)[0]
# Applying non-maximum suppression
boxes = nms(boxes, iou_threshold=1, threshold=0.7)
# Getting the boxes coordinates and converting them into bounding boxes
bboxes = []
for bbox in y:
bboxes.append([value.item() for value in bbox])
# Plotting the image with the bounding boxes
plot_image(x[0].permute(1,2,0).to("cpu"), boxes)
\ No newline at end of file
plot_bbox_image(x[0].to("cpu"), bboxes)
import torch
import albumentations as A
import cv2
from albumentations.pytorch import ToTensorV2
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load and save model variable
load_model = False
save_model = True
# model checkpoint file name
checkpoint_file = "checkpoint.pth.tar"
# Anchor boxes for each feature map scaled between 0 and 1
# 3 feature maps at 3 different scales based on YOLOv3 paper
ANCHORS = [
[(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
[(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
[(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
]
# Batch size for training
batch_size = 32
# Learning rate for training
leanring_rate = 1e-5
# Number of epochs for training
epochs = 20
# Image size
image_size = 416
# Grid cell sizes
s = [image_size // 32, image_size // 16, image_size // 8]
# Class labels
class_labels = [
"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
# Function to save checkpoint
def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
print("==> Saving checkpoint")
checkpoint = {
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
}
torch.save(checkpoint, filename)
# Function to load checkpoint
def load_checkpoint(checkpoint_file, model, optimizer, lr):
print("==> Loading checkpoint")
checkpoint = torch.load(checkpoint_file, map_location=device)
model.load_state_dict(checkpoint["state_dict"])
optimizer.load_state_dict(checkpoint["optimizer"])
for param_group in optimizer.param_groups:
param_group["lr"] = lr
# Transform for testing
test_transform = A.Compose(
[
# Rescale an image so that maximum side is equal to image_size
A.LongestMaxSize(max_size=image_size),
# Pad remaining areas with zeros
A.PadIfNeeded(
min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT, value=[0, 0, 0]
),
# Normalize the image
A.Normalize(
mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255
),
# Convert the image to PyTorch tensor
ToTensorV2()
],
# Augmentation for bounding boxes
bbox_params=A.BboxParams(
format="yolo",
min_visibility=0.4,
label_fields=[]
)
)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Device
NB_EPOCHS = 20 # Number of epochs for training
IMAGE_SIZE = 416 # Image size
......@@ -14,27 +14,16 @@ from PIL import Image
# Create a dataset class to load the images and labels from the folder
class Dataset(torch.utils.data.Dataset):
def __init__(
self, image_dir, label_dir, anchors,
image_size=416, grid_sizes=[13, 26, 52]
, transform=None):
self, image_dir, label_dir,
image_size=416):
# Image and label directories
self.image_dir = image_dir
self.label_dir = label_dir
# Image size
self.image_size = image_size
# Transformations
self.transform = transform
# Grid sizes for each scale
self.grid_sizes = grid_sizes
# Anchor boxes
self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # A COMPRENDRE
# Number of anchor boxes
self.num_anchors = self.anchors.shape[0] # A COMPRENDRE
# Number of anchor boxes per scale
self.num_anchors_per_scale = self.num_anchors // 3 # A COMPRENDRE
# Number of classes
self.num_classes = len(name_to_int)
self.num_classes = NB_CLASSES
# Ignore IoU threshold
self.iou_threshold = 0.5
......@@ -44,104 +33,38 @@ class Dataset(torch.utils.data.Dataset):
def __getitem__(self, idx):
# Getting the label path
label_path = os.path.join(self.label_dir, (str(idx).zfill(4)+".csv"))
if os.path.exists(label_path):
try:
# Getting the image path
img_path = os.path.join(self.image_dir, (str(idx).zfill(4)+".jpg"))
image = np.array(Image.open(img_path).convert("RGB"))
# Creating the label array
# 5 columns: x0, y0, width (currently x1), height (currently y1), class_label (currently as str)
# 5 columns: x0, y0, x1, y1, class_label (currently as str)
with open(label_path, "r") as file:
reader = csv.reader(file)
bboxes = list(reader)
if not bboxes:
# Compute empty bbox with "empty" classe
pass
# Process changes on bbox definition
for box in bboxes:
box[4] = name_to_int.get(box[4]) # Get the class name as int
box[:] = map(int, box)
box[2] = box[2] - box[0] # Compute width
box[3] = box[3] - box[1] # Compute height
# Normalise box
box[0] = box[0] / image.shape[1]
box[1] = box[1] / image.shape[0]
box[2] = box[2] / image.shape[1]
box[3] = box[3] / image.shape[0]
# Albumentations augmentations
if self.transform:
augs = self.transform(image=image, bboxes=bboxes)
image = augs["image"]
bboxes = augs["bboxes"]
# Below assumes 3 scale predictions (as paper) and same num of anchors per scale
# target : [probabilities, x, y, width, height, class_label]
targets = [torch.zeros((self.num_anchors_per_scale, s, s, 6)) for s in self.grid_sizes]
# Identify anchor box and cell for each bounding box
for box in bboxes:
# Calculate iou of bounding box with anchor boxes
iou_anchors = iou(torch.tensor(box[2:4]), self.anchors, is_pred=False)
# Selecting the best anchor box
anchor_indices = iou_anchors.argsort(descending=True, dim=0)
x, y, width, height, class_label = box
# At each scale, assigning the bounding box to the best matching anchor box
has_anchor = [False] * 3
for anchor_idx in anchor_indices:
scale_idx = anchor_idx // self.num_anchors_per_scale
anchor_on_scale = anchor_idx % self.num_anchors_per_scale
# Identifying the grid size for the scale
s = self.grid_sizes[scale_idx]
# Identifying the cell to which the bounding box belongs
i, j = int(s * y), int(s * x)
anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
# Check if the anchor box is already assigned
if not anchor_taken and not has_anchor[scale_idx]:
# Set the probability to 1
targets[scale_idx][anchor_on_scale, i, j, 0] = 1
# Calculating the center of the bounding box relative
# to the cell
x_cell, y_cell = s * x - j, s * y - i
# Calculating the width and height of the bounding box
# relative to the cell
width_cell, height_cell = (width * s, height * s)
# Idnetify the box coordinates
box_coordinates = torch.tensor(
[x_cell, y_cell, width_cell,
height_cell]
)
if box:
# Get the class name as int
box[4] = CLASSE_TO_INT.get(box[4])
# Assigning the box coordinates to the target
targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
# Convert values to int
box[:] = map(int, box)
# Assigning the class label to the target
targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
# Normalise coords
box[0] = box[0] / image.shape[1]
box[1] = box[1] / image.shape[0]
box[2] = box[2] / image.shape[1]
box[3] = box[3] / image.shape[0]
# Set the anchor box as assigned for the scale
has_anchor[scale_idx] = True
return image, bboxes
except Exception as e:
print(f"Erreur when processing index {idx}: {e}")
return self.__getitem__((idx + 1) % self.__len__())
# If the anchor box is already assigned, check if the
# IoU is greater than the threshold
elif not anchor_taken and iou_anchors[anchor_idx] > self.iou_threshold:
# Set the probability to -1 to ignore the anchor box
targets[scale_idx][anchor_on_scale, i, j, 0] = -1
# Return the image and the target
return image, tuple(targets)
else:
return
# Defining CNN Block
class CNNBlock(nn.Module):
......
import torch
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
# Dictionnaire de correspondance des noms de classe aux entiers
name_to_int = {
# Dictionary for mapping class names to integers
CLASSE_TO_INT = {
"danger": 0,
"interdiction": 1,
"obligation": 2,
......@@ -19,7 +18,8 @@ name_to_int = {
"empty": 9
}
int_to_class = {
# Dictionary for mapping integers to class names
INT_TO_CLASSE = {
0: "danger",
1: "interdiction",
2: "obligation",
......@@ -32,150 +32,96 @@ int_to_class = {
9: "empty"
}
# Defining a function to calculate Intersection over Union (IoU)
def iou(box1, box2, is_pred=True):
if is_pred:
# IoU score for prediction and label
# box1 (prediction) and box2 (label) are both in [x, y, width, height] format
# Box coordinates of prediction
b1_x1 = box1[..., 0:1] - box1[..., 2:3] / 2
b1_y1 = box1[..., 1:2] - box1[..., 3:4] / 2
b1_x2 = box1[..., 0:1] + box1[..., 2:3] / 2
b1_y2 = box1[..., 1:2] + box1[..., 3:4] / 2
# Box coordinates of ground truth
b2_x1 = box2[..., 0:1] - box2[..., 2:3] / 2
b2_y1 = box2[..., 1:2] - box2[..., 3:4] / 2
b2_x2 = box2[..., 0:1] + box2[..., 2:3] / 2
b2_y2 = box2[..., 1:2] + box2[..., 3:4] / 2
# Get the coordinates of the intersection rectangle
x1 = torch.max(b1_x1, b2_x1)
y1 = torch.max(b1_y1, b2_y1)
x2 = torch.min(b1_x2, b2_x2)
y2 = torch.min(b1_y2, b2_y2)
# Make sure the intersection is at least 0
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
# Calculate the union area
box1_area = abs((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
box2_area = abs((b2_x2 - b2_x1) * (b2_y2 - b2_y1))
union = box1_area + box2_area - intersection
# Calculate the IoU score
epsilon = 1e-6
iou_score = intersection / (union + epsilon)
# Return IoU score
return iou_score
else:
# IoU score based on width and height of bounding boxes
# Calculate intersection area
intersection_area = torch.min(box1[..., 0], box2[..., 0]) * torch.min(box1[..., 1], box2[..., 1])
# Calculate union area
box1_area = box1[..., 0] * box1[..., 1]
box2_area = box2[..., 0] * box2[..., 1]
union_area = box1_area + box2_area - intersection_area
# Calculate IoU score
iou_score = intersection_area / union_area
# Return IoU score
return iou_score
# Non-maximum suppression function to remove overlapping bounding boxes
def nms(bboxes, iou_threshold, threshold):
# Filter out bounding boxes with confidence below the threshold.
bboxes = [box for box in bboxes if box[1] > threshold]
# Sort the bounding boxes by confidence in descending order.
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
# Initialize the list of bounding boxes after non-maximum suppression.
bboxes_nms = []
while bboxes:
# Get the first bounding box.
first_box = bboxes.pop(0)
# Iterate over the remaining bounding boxes.
for box in bboxes:
# If the bounding boxes do not overlap or if the first bounding box has
# a higher confidence, then add the second bounding box to the list of
# bounding boxes after non-maximum suppression.
if box[0] != first_box[0] or iou(
torch.tensor(first_box[2:]),
torch.tensor(box[2:]),
) < iou_threshold:
# Check if box is not in bboxes_nms
if box not in bboxes_nms:
# Add box to bboxes_nms
bboxes_nms.append(box)
# Return bounding boxes after non-maximum suppression.
return bboxes_nms
# Function to convert cells to bounding boxes
def convert_cells_to_bboxes(predictions, anchors, s, is_predictions=True):
# Batch size used on predictions
batch_size = predictions.shape[0]
# Number of anchors
num_anchors = len(anchors)
# List of all the predictions
box_predictions = predictions[..., 1:5]
# If the input is predictions then we will pass the x and y coordinate
# through sigmoid function and width and height to exponent function and
# calculate the score and best class.
if is_predictions:
anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
box_predictions[..., 2:] = torch.exp(
box_predictions[..., 2:]) * anchors
scores = torch.sigmoid(predictions[..., 0:1])
best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
# Else we will just calculate scores and best class.
else:
scores = predictions[..., 0:1]
best_class = predictions[..., 5:6]
# Calculate cell indices
cell_indices = (
torch.arange(s)
.repeat(predictions.shape[0], 3, s, 1)
.unsqueeze(-1)
.to(predictions.device)
)
# Calculate x, y, width and height with proper scaling
x = 1 / s * (box_predictions[..., 0:1] + cell_indices)
y = 1 / s * (box_predictions[..., 1:2] +
cell_indices.permute(0, 1, 3, 2, 4))
width_height = 1 / s * box_predictions[..., 2:4]
# Concatinating the values and reshaping them in
# (BATCH_SIZE, num_anchors * S * S, 6) shape
converted_bboxes = torch.cat(
(best_class, scores, x, y, width_height), dim=-1
).reshape(batch_size, num_anchors * s * s, 6)
# Returning the reshaped and converted bounding box list
return converted_bboxes.tolist()
# Data labels key
CLASSES = ["danger", "interdiction", "obligation", "stop", "ceder", "frouge", "forange", "fvert", "ff", "empty"]
# Number of classes
NB_CLASSES = len(CLASSES)
# Function to calculate Intersection over Union (IoU)
def iou(box1, box2):
"""
Calcule l'Intersection over Union (IoU) entre deux boîtes englobantes.
Parameters:
box1 (tuple): Une boîte englobante sous la forme (x1, y1, x2, y2) où (x1, y1) est le coin supérieur gauche et (x2, y2) est le coin inférieur droit.
box2 (tuple): Une deuxième boîte englobante sous la même forme (x1, y1, x2, y2).
Returns:
float: La valeur IoU entre les deux boîtes englobantes.
"""
# Coordonnées des coins des boîtes
x1_box1, y1_box1, x2_box1, y2_box1 = box1
x1_box2, y1_box2, x2_box2, y2_box2 = box2
# Calcul des coordonnées de l'intersection
x1_inter = max(x1_box1, x1_box2)
y1_inter = max(y1_box1, y1_box2)
x2_inter = min(x2_box1, x2_box2)
y2_inter = min(y2_box1, y2_box2)
# Calcul de l'aire de l'intersection
inter_area = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)
# Calcul de l'aire des deux boîtes
box1_area = (x2_box1 - x1_box1) * (y2_box1 - y1_box1)
box2_area = (x2_box2 - x1_box2) * (y2_box2 - y1_box2)
# Calcul de l'aire de l'union
union_area = box1_area + box2_area - inter_area
# Calcul de l'IoU
iou = inter_area / union_area if union_area > 0 else 0
return iou
# Function to calculate Non Maximum Suppression (NMS)
def nms(bboxes, iou_threshold, score_threshold):
"""
Applique la Non-Maximum Suppression (NMS) pour supprimer les boîtes englobantes redondantes.
Parameters:
bboxes (list of tuples): Une liste de tuples sous la forme (x1, y1, x2, y2, score) où (x1, y1) est le coin supérieur gauche, (x2, y2) est le coin inférieur droit et score est la confiance de la détection.
iou_threshold (float): Le seuil d'IoU pour supprimer les boîtes redondantes.
score_threshold (float): Le seuil de confiance pour garder les boîtes.
Returns:
list of tuples: Les boîtes filtrées après l'application de la NMS.
"""
# Filtrer les boîtes avec un score inférieur au seuil de confiance
bboxes = [box for box in bboxes if box[4] >= score_threshold]
if len(bboxes) == 0:
return []
# Trier les boîtes par score de confiance décroissant
bboxes = sorted(bboxes, key=lambda x: x[4], reverse=True)
# Liste des boîtes conservées
selected_bboxes = []
while bboxes:
# Prendre la boîte avec le score le plus élevé
current_box = bboxes.pop(0)
selected_bboxes.append(current_box)
# Filtrer les boîtes restantes par IoU
bboxes = [
box for box in bboxes
if iou(current_box, box) < iou_threshold
]
return selected_bboxes
# Function to plot images with bounding boxes and class labels
def plot_image(image, boxes):
def plot_bbox_image(image, boxes):
# Getting the color map from matplotlib
colour_map = plt.get_cmap("tab20b")
# Getting 20 different colors from the color map for 20 different classes
colors = [colour_map(i) for i in np.linspace(0, 1, len(name_to_int))]
# Getting different colors from the color map for 20 different classes
colors = [colour_map(i) for i in np.linspace(0, 1, NB_CLASSES)]
# Reading the image with OpenCV
img = np.array(image)
......@@ -191,13 +137,12 @@ def plot_image(image, boxes):
# Plotting the bounding boxes and labels over the image
for box in boxes:
# Get the class from the box
class_pred = box[0]
class_pred = box[4]
box = box[2:]
x = box[0] * w
y = box[1] * h
width = box[2] * w
height = box[3] * h
width = box[2] * w - x
height = box[3] * h - y
# Create a Rectangle patch with the bounding box
rect = patches.Rectangle(
......@@ -210,15 +155,15 @@ def plot_image(image, boxes):
# Add the patch to the Axes
ax.add_patch(rect)
"""# Add class name to the patch
# Add class name to the patch
plt.text(
x,
y,
s=int_to_class[int(class_pred)],
s=INT_TO_CLASSE[int(class_pred)],
color="white",
verticalalignment="top",
bbox={"color": colors[int(class_pred)], "pad": 0},
) """
)
# Display the plot
plt.show()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment