[+] Create dataset & bbox visualisation

87c2b59b · Victor Demessance · 61238b48 · 61238b48 · 61238b48 · 61238b48
Commit 87c2b59b authored 10 months ago by Victor Demessance
--- a/deep_learning/__pycache__/ml_utils.cpython-312.pyc
+++ b/deep_learning/__pycache__/ml_utils.cpython-312.pyc
--- a/deep_learning/__pycache__/signClassifier.cpython-312.pyc
+++ b/deep_learning/__pycache__/signClassifier.cpython-312.pyc
--- a/deep_learning/__pycache__/utils.cpython-312.pyc
+++ b/deep_learning/__pycache__/utils.cpython-312.pyc
--- a/deep_learning/ml_main.py
+++ b/deep_learning/ml_main.py
@@ -6,11 +6,7 @@ from utils import *
 # Creating a dataset object 
 dataset = Dataset( 
    image_dir="./data/train/images/", 
-    label_dir="./data/train/labels/", 
-    grid_sizes=[13, 26, 52], 
-    anchors=ANCHORS, 
-    transform=test_transform
-) 
+    label_dir="./data/train/labels/") 
  
 # Creating a dataloader object 
 loader = torch.utils.data.DataLoader( 
@@ -19,26 +15,13 @@ loader = torch.utils.data.DataLoader(
    shuffle=True, 
 ) 
  
-# Defining the grid size and the scaled anchors 
-GRID_SIZE = [13, 26, 52] 
-scaled_anchors = torch.tensor(ANCHORS) / ( 
-    1 / torch.tensor(GRID_SIZE).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2) 
-) 
-  
 # Getting a batch from the dataloader 
 x, y = next(iter(loader)) 
  
-# Getting the boxes coordinates from the labels 
-# and converting them into bounding boxes without scaling 
-boxes = [] 
-for i in range(y[0].shape[1]): 
-    anchor = scaled_anchors[i] 
-    boxes += convert_cells_to_bboxes( 
-               y[i], is_predictions=False, s=y[i].shape[2], anchors=anchor 
-             )[0] 
-  
-# Applying non-maximum suppression 
-boxes = nms(boxes, iou_threshold=1, threshold=0.7) 
+# Getting the boxes coordinates and converting them into bounding boxes
+bboxes = []
+for bbox in y:
+    bboxes.append([value.item() for value in bbox])
  
 # Plotting the image with the bounding boxes 
-plot_image(x[0].permute(1,2,0).to("cpu"), boxes)
\ No newline at end of file
+plot_bbox_image(x[0].to("cpu"), bboxes)
--- a/deep_learning/ml_utils.py
+++ b/deep_learning/ml_utils.py
 import torch 
-import albumentations as A 
-import cv2
-from albumentations.pytorch import ToTensorV2 

-# Device 
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-# Load and save model variable 
-load_model = False
-save_model = True
-
-# model checkpoint file name 
-checkpoint_file = "checkpoint.pth.tar"
-
-# Anchor boxes for each feature map scaled between 0 and 1 
-# 3 feature maps at 3 different scales based on YOLOv3 paper 
-ANCHORS = [ 
-	[(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)], 
-	[(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)], 
-	[(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)], 
-] 
-
-# Batch size for training 
-batch_size = 32
-
-# Learning rate for training 
-leanring_rate = 1e-5
-
-# Number of epochs for training 
-epochs = 20
-
-# Image size 
-image_size = 416
-
-# Grid cell sizes 
-s = [image_size // 32, image_size // 16, image_size // 8] 
-
-# Class labels 
-class_labels = [ 
-	"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 
-	"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 
-	"pottedplant", "sheep", "sofa", "train", "tvmonitor"
-]
-
-
-# Function to save checkpoint 
-def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"): 
-	print("==> Saving checkpoint") 
-	checkpoint = { 
-		"state_dict": model.state_dict(), 
-		"optimizer": optimizer.state_dict(), 
-	} 
-	torch.save(checkpoint, filename)
-
-
-# Function to load checkpoint 
-def load_checkpoint(checkpoint_file, model, optimizer, lr): 
-	print("==> Loading checkpoint") 
-	checkpoint = torch.load(checkpoint_file, map_location=device) 
-	model.load_state_dict(checkpoint["state_dict"]) 
-	optimizer.load_state_dict(checkpoint["optimizer"]) 
-
-	for param_group in optimizer.param_groups: 
-		param_group["lr"] = lr 
-
-
-# Transform for testing 
-test_transform = A.Compose( 
-    [ 
-        # Rescale an image so that maximum side is equal to image_size 
-        A.LongestMaxSize(max_size=image_size), 
-        # Pad remaining areas with zeros 
-        A.PadIfNeeded(
-            min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT, value=[0, 0, 0]
-        ),
-
-        # Normalize the image 
-        A.Normalize( 
-            mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255
-        ), 
-        # Convert the image to PyTorch tensor 
-        ToTensorV2() 
-    ], 
-    # Augmentation for bounding boxes  
-    bbox_params=A.BboxParams( 
-                    format="yolo",  
-                    min_visibility=0.4,  
-                    label_fields=[] 
-                ) 
-)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # Device 
+NB_EPOCHS = 20  # Number of epochs for training 
+IMAGE_SIZE = 416  # Image size 
--- a/deep_learning/signClassifier.py
+++ b/deep_learning/signClassifier.py
@@ -14,27 +14,16 @@ from PIL import Image
 # Create a dataset class to load the images and labels from the folder 
 class Dataset(torch.utils.data.Dataset): 
    def __init__( 
-        self, image_dir, label_dir, anchors, 
-        image_size=416, grid_sizes=[13, 26, 52]
-        , transform=None): 
+        self, image_dir, label_dir,
+        image_size=416): 
                    
        # Image and label directories 
        self.image_dir = image_dir 
        self.label_dir = label_dir 
        # Image size 
        self.image_size = image_size 
-        # Transformations 
-        self.transform = transform
-        # Grid sizes for each scale 
-        self.grid_sizes = grid_sizes 
-        # Anchor boxes 
-        self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # A COMPRENDRE
-        # Number of anchor boxes 
-        self.num_anchors = self.anchors.shape[0]  # A COMPRENDRE
-        # Number of anchor boxes per scale 
-        self.num_anchors_per_scale = self.num_anchors // 3 # A COMPRENDRE
-        # Number of classes 
-        self.num_classes = len(name_to_int)
+
+        self.num_classes = NB_CLASSES
        # Ignore IoU threshold 
        self.iou_threshold = 0.5

@@ -44,104 +33,38 @@ class Dataset(torch.utils.data.Dataset):
    def __getitem__(self, idx): 
        # Getting the label path 
        label_path = os.path.join(self.label_dir, (str(idx).zfill(4)+".csv"))
-        if os.path.exists(label_path):
+        try:
            # Getting the image path 
            img_path = os.path.join(self.image_dir, (str(idx).zfill(4)+".jpg")) 
            image = np.array(Image.open(img_path).convert("RGB")) 

            # Creating the label array
-            # 5 columns: x0, y0, width (currently x1), height (currently y1), class_label (currently as str)
+            # 5 columns: x0, y0, x1, y1, class_label (currently as str)
            with open(label_path, "r") as file:
                        reader = csv.reader(file)
                        bboxes = list(reader)            
-            
-            if not bboxes:
-                # Compute empty bbox with "empty" classe
-                pass
-

            # Process changes on bbox definition
            for box in bboxes:
-                box[4] = name_to_int.get(box[4])  # Get the class name as int
-                box[:] = map(int, box)
-                box[2] = box[2] - box[0]         # Compute width
-                box[3] = box[3] - box[1]         # Compute height
-                
-                # Normalise box
-                box[0] = box[0] / image.shape[1]
-                box[1] = box[1] / image.shape[0]
-                box[2] = box[2] / image.shape[1]
-                box[3] = box[3] / image.shape[0]
-
-            # Albumentations augmentations 
-            if self.transform: 
-                augs = self.transform(image=image, bboxes=bboxes) 
-                image = augs["image"] 
-                bboxes = augs["bboxes"]
-
-            # Below assumes 3 scale predictions (as paper) and same num of anchors per scale 
-            # target : [probabilities, x, y, width, height, class_label] 
-            targets = [torch.zeros((self.num_anchors_per_scale, s, s, 6)) for s in self.grid_sizes] 
-            # Identify anchor box and cell for each bounding box 
-            for box in bboxes: 
-                # Calculate iou of bounding box with anchor boxes 
-                iou_anchors = iou(torch.tensor(box[2:4]), self.anchors, is_pred=False) 
-
-                # Selecting the best anchor box 
-                anchor_indices = iou_anchors.argsort(descending=True, dim=0) 
-                x, y, width, height, class_label = box 
-
-                # At each scale, assigning the bounding box to the best matching anchor box 
-                has_anchor = [False] * 3
-                for anchor_idx in anchor_indices: 
-                    scale_idx = anchor_idx // self.num_anchors_per_scale 
-                    anchor_on_scale = anchor_idx % self.num_anchors_per_scale 
-                    
-                    # Identifying the grid size for the scale 
-                    s = self.grid_sizes[scale_idx] 
-                    
-                    # Identifying the cell to which the bounding box belongs 
-                    i, j = int(s * y), int(s * x) 
-                    anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] 
-                    
-                    # Check if the anchor box is already assigned 
-                    if not anchor_taken and not has_anchor[scale_idx]: 
-
-                        # Set the probability to 1 
-                        targets[scale_idx][anchor_on_scale, i, j, 0] = 1
-
-                        # Calculating the center of the bounding box relative 
-                        # to the cell 
-                        x_cell, y_cell = s * x - j, s * y - i 
-
-                        # Calculating the width and height of the bounding box 
-                        # relative to the cell 
-                        width_cell, height_cell = (width * s, height * s) 
-
-                        # Idnetify the box coordinates 
-                        box_coordinates = torch.tensor( 
-                                            [x_cell, y_cell, width_cell, 
-                                            height_cell] 
-                                        ) 
+                if box:
+                    # Get the class name as int
+                    box[4] = CLASSE_TO_INT.get(box[4])  

-                        # Assigning the box coordinates to the target 
-                        targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates 
+                    # Convert values to int
+                    box[:] = map(int, box)

-                        # Assigning the class label to the target 
-                        targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) 
+                    # Normalise coords
+                    box[0] = box[0] / image.shape[1]
+                    box[1] = box[1] / image.shape[0]
+                    box[2] = box[2] / image.shape[1]
+                    box[3] = box[3] / image.shape[0]

-                        # Set the anchor box as assigned for the scale 
-                        has_anchor[scale_idx] = True
+            return image, bboxes
+        
+        except Exception as e:
+            print(f"Erreur when processing index {idx}: {e}")
+            return self.__getitem__((idx + 1) % self.__len__())

-                    # If the anchor box is already assigned, check if the 
-                    # IoU is greater than the threshold 
-                    elif not anchor_taken and iou_anchors[anchor_idx] > self.iou_threshold: 
-                        # Set the probability to -1 to ignore the anchor box 
-                        targets[scale_idx][anchor_on_scale, i, j, 0] = -1
-            # Return the image and the target 
-            return image, tuple(targets)
-        else:
-            return

 # Defining CNN Block 
 class CNNBlock(nn.Module): 

--- a/deep_learning/train.py
+++ b/deep_learning/train.py
--- a/deep_learning/utils.py
+++ b/deep_learning/utils.py
-import torch 
 from PIL import ImageFile 
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 import numpy as np 
 import matplotlib.pyplot as plt 
 import matplotlib.patches as patches 

-# Dictionnaire de correspondance des noms de classe aux entiers
-name_to_int = {
+# Dictionary for mapping class names to integers
+CLASSE_TO_INT = {
    "danger": 0,
    "interdiction": 1,
    "obligation": 2,
@@ -19,7 +18,8 @@ name_to_int = {
    "empty": 9
 }

-int_to_class = {
+# Dictionary for mapping integers to class names
+INT_TO_CLASSE = {
    0: "danger",
    1: "interdiction",
    2: "obligation",
@@ -32,150 +32,96 @@ int_to_class = {
    9: "empty"
 }

-# Defining a function to calculate Intersection over Union (IoU) 
-def iou(box1, box2, is_pred=True): 
-	if is_pred: 
-		# IoU score for prediction and label 
-		# box1 (prediction) and box2 (label) are both in [x, y, width, height] format 
-		
-		# Box coordinates of prediction 
-		b1_x1 = box1[..., 0:1] - box1[..., 2:3] / 2
-		b1_y1 = box1[..., 1:2] - box1[..., 3:4] / 2
-		b1_x2 = box1[..., 0:1] + box1[..., 2:3] / 2
-		b1_y2 = box1[..., 1:2] + box1[..., 3:4] / 2
-
-		# Box coordinates of ground truth 
-		b2_x1 = box2[..., 0:1] - box2[..., 2:3] / 2
-		b2_y1 = box2[..., 1:2] - box2[..., 3:4] / 2
-		b2_x2 = box2[..., 0:1] + box2[..., 2:3] / 2
-		b2_y2 = box2[..., 1:2] + box2[..., 3:4] / 2
-
-		# Get the coordinates of the intersection rectangle 
-		x1 = torch.max(b1_x1, b2_x1) 
-		y1 = torch.max(b1_y1, b2_y1) 
-		x2 = torch.min(b1_x2, b2_x2) 
-		y2 = torch.min(b1_y2, b2_y2) 
-		# Make sure the intersection is at least 0 
-		intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) 
-
-		# Calculate the union area 
-		box1_area = abs((b1_x2 - b1_x1) * (b1_y2 - b1_y1)) 
-		box2_area = abs((b2_x2 - b2_x1) * (b2_y2 - b2_y1)) 
-		union = box1_area + box2_area - intersection 
-
-		# Calculate the IoU score 
-		epsilon = 1e-6
-		iou_score = intersection / (union + epsilon) 
-
-		# Return IoU score 
-		return iou_score 
-	
-	else: 
-		# IoU score based on width and height of bounding boxes 
-		
-		# Calculate intersection area 
-		intersection_area = torch.min(box1[..., 0], box2[..., 0]) * torch.min(box1[..., 1], box2[..., 1]) 
-
-		# Calculate union area 
-		box1_area = box1[..., 0] * box1[..., 1] 
-		box2_area = box2[..., 0] * box2[..., 1] 
-		union_area = box1_area + box2_area - intersection_area 
-
-		# Calculate IoU score 
-		iou_score = intersection_area / union_area 
-
-		# Return IoU score 
-		return iou_score
-
-
-# Non-maximum suppression function to remove overlapping bounding boxes 
-def nms(bboxes, iou_threshold, threshold): 
-	# Filter out bounding boxes with confidence below the threshold. 
-	bboxes = [box for box in bboxes if box[1] > threshold] 
-
-	# Sort the bounding boxes by confidence in descending order. 
-	bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True) 
-
-	# Initialize the list of bounding boxes after non-maximum suppression. 
-	bboxes_nms = [] 
-
-	while bboxes: 
-		# Get the first bounding box. 
-		first_box = bboxes.pop(0) 
-
-		# Iterate over the remaining bounding boxes. 
-		for box in bboxes: 
-		# If the bounding boxes do not overlap or if the first bounding box has 
-		# a higher confidence, then add the second bounding box to the list of 
-		# bounding boxes after non-maximum suppression. 
-			if box[0] != first_box[0] or iou( 
-				torch.tensor(first_box[2:]), 
-				torch.tensor(box[2:]), 
-			) < iou_threshold: 
-				# Check if box is not in bboxes_nms 
-				if box not in bboxes_nms: 
-					# Add box to bboxes_nms 
-					bboxes_nms.append(box) 
-
-	# Return bounding boxes after non-maximum suppression. 
-	return bboxes_nms
-
-
-# Function to convert cells to bounding boxes 
-def convert_cells_to_bboxes(predictions, anchors, s, is_predictions=True): 
-	# Batch size used on predictions 
-	batch_size = predictions.shape[0] 
-	# Number of anchors 
-	num_anchors = len(anchors) 
-	# List of all the predictions 
-	box_predictions = predictions[..., 1:5] 
-
-	# If the input is predictions then we will pass the x and y coordinate 
-	# through sigmoid function and width and height to exponent function and 
-	# calculate the score and best class. 
-	if is_predictions: 
-		anchors = anchors.reshape(1, len(anchors), 1, 1, 2) 
-		box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2]) 
-		box_predictions[..., 2:] = torch.exp( 
-			box_predictions[..., 2:]) * anchors 
-		scores = torch.sigmoid(predictions[..., 0:1]) 
-		best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1) 
-	
-	# Else we will just calculate scores and best class. 
-	else: 
-		scores = predictions[..., 0:1] 
-		best_class = predictions[..., 5:6] 
-
-	# Calculate cell indices 
-	cell_indices = ( 
-		torch.arange(s) 
-		.repeat(predictions.shape[0], 3, s, 1) 
-		.unsqueeze(-1) 
-		.to(predictions.device) 
-	) 
-
-	# Calculate x, y, width and height with proper scaling 
-	x = 1 / s * (box_predictions[..., 0:1] + cell_indices) 
-	y = 1 / s * (box_predictions[..., 1:2] +
-				cell_indices.permute(0, 1, 3, 2, 4)) 
-	width_height = 1 / s * box_predictions[..., 2:4] 
-
-	# Concatinating the values and reshaping them in 
-	# (BATCH_SIZE, num_anchors * S * S, 6) shape 
-	converted_bboxes = torch.cat( 
-		(best_class, scores, x, y, width_height), dim=-1
-	).reshape(batch_size, num_anchors * s * s, 6) 
-
-	# Returning the reshaped and converted bounding box list 
-	return converted_bboxes.tolist()
-
+# Data labels key
+CLASSES = ["danger", "interdiction", "obligation", "stop", "ceder", "frouge", "forange", "fvert", "ff", "empty"]
+
+# Number of classes
+NB_CLASSES = len(CLASSES)
+
+# Function to calculate Intersection over Union (IoU) 
+def iou(box1, box2):
+    """
+    Calcule l'Intersection over Union (IoU) entre deux boîtes englobantes.
+
+    Parameters:
+    box1 (tuple): Une boîte englobante sous la forme (x1, y1, x2, y2) où (x1, y1) est le coin supérieur gauche et (x2, y2) est le coin inférieur droit.
+    box2 (tuple): Une deuxième boîte englobante sous la même forme (x1, y1, x2, y2).
+
+    Returns:
+    float: La valeur IoU entre les deux boîtes englobantes.
+    """
+    
+    # Coordonnées des coins des boîtes
+    x1_box1, y1_box1, x2_box1, y2_box1 = box1
+    x1_box2, y1_box2, x2_box2, y2_box2 = box2
+
+    # Calcul des coordonnées de l'intersection
+    x1_inter = max(x1_box1, x1_box2)
+    y1_inter = max(y1_box1, y1_box2)
+    x2_inter = min(x2_box1, x2_box2)
+    y2_inter = min(y2_box1, y2_box2)
+
+    # Calcul de l'aire de l'intersection
+    inter_area = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)
+
+    # Calcul de l'aire des deux boîtes
+    box1_area = (x2_box1 - x1_box1) * (y2_box1 - y1_box1)
+    box2_area = (x2_box2 - x1_box2) * (y2_box2 - y1_box2)
+
+    # Calcul de l'aire de l'union
+    union_area = box1_area + box2_area - inter_area
+
+    # Calcul de l'IoU
+    iou = inter_area / union_area if union_area > 0 else 0
+
+    return iou
+
+# Function to calculate Non Maximum Suppression (NMS) 
+def nms(bboxes, iou_threshold, score_threshold):
+    """
+    Applique la Non-Maximum Suppression (NMS) pour supprimer les boîtes englobantes redondantes.
+
+    Parameters:
+    bboxes (list of tuples): Une liste de tuples sous la forme (x1, y1, x2, y2, score) où (x1, y1) est le coin supérieur gauche, (x2, y2) est le coin inférieur droit et score est la confiance de la détection.
+    iou_threshold (float): Le seuil d'IoU pour supprimer les boîtes redondantes.
+    score_threshold (float): Le seuil de confiance pour garder les boîtes.
+
+    Returns:
+    list of tuples: Les boîtes filtrées après l'application de la NMS.
+    """
+    
+    # Filtrer les boîtes avec un score inférieur au seuil de confiance
+    bboxes = [box for box in bboxes if box[4] >= score_threshold]
+    
+    if len(bboxes) == 0:
+        return []
+
+    # Trier les boîtes par score de confiance décroissant
+    bboxes = sorted(bboxes, key=lambda x: x[4], reverse=True)
+    
+    # Liste des boîtes conservées
+    selected_bboxes = []
+
+    while bboxes:
+        # Prendre la boîte avec le score le plus élevé
+        current_box = bboxes.pop(0)
+        selected_bboxes.append(current_box)
+        
+        # Filtrer les boîtes restantes par IoU
+        bboxes = [
+            box for box in bboxes
+            if iou(current_box, box) < iou_threshold
+        ]
+    
+    return selected_bboxes

 # Function to plot images with bounding boxes and class labels 
-def plot_image(image, boxes): 
+def plot_bbox_image(image, boxes): 
 	# Getting the color map from matplotlib 
 	colour_map = plt.get_cmap("tab20b") 
-	# Getting 20 different colors from the color map for 20 different classes 
-	colors = [colour_map(i) for i in np.linspace(0, 1, len(name_to_int))] 
+
+	# Getting different colors from the color map for 20 different classes 
+	colors = [colour_map(i) for i in np.linspace(0, 1, NB_CLASSES)] 

 	# Reading the image with OpenCV 
 	img = np.array(image) 
@@ -191,13 +137,12 @@ def plot_image(image, boxes):
 	# Plotting the bounding boxes and labels over the image 
 	for box in boxes:
 		# Get the class from the box 
-		class_pred = box[0] 
+		class_pred = box[4] 
 		
-		box = box[2:] 
 		x = box[0] * w 
 		y = box[1] * h 
-		width = box[2] * w
-		height = box[3] * h 
+		width = box[2] * w - x
+		height = box[3] * h - y

 		# Create a Rectangle patch with the bounding box 
 		rect = patches.Rectangle( 
@@ -210,15 +155,15 @@ def plot_image(image, boxes):
 		# Add the patch to the Axes 
 		ax.add_patch(rect) 
 		
-		"""# Add class name to the patch 
+		# Add class name to the patch 
 		plt.text( 
 			x, 
 			y, 
-			s=int_to_class[int(class_pred)], 
+			s=INT_TO_CLASSE[int(class_pred)], 
 			color="white", 
 			verticalalignment="top", 
 			bbox={"color": colors[int(class_pred)], "pad": 0}, 
-		) """
+		)

 	# Display the plot 
 	plt.show()