Source code for tensormonk.detection.sample

""" TensorMONK's :: data :: Sample """

import os
import random
import numpy as np
from typing import Union
from PIL import Image as ImPIL
from ..utils import PillowUtils, ObjectUtils


[docs]class Sample(object): r"""Sample is an object that contains image path, labels, bounding boxes and points for object detection tasks that can localize landmark. It can augment data (random 90/180/270 rotates, random pad and random cropping) during training -- boxes and points are adjusted accordingly. The image can be resized along with boxes and points if Sample.OSIZE is initialized. Attributes (are set once): INVALID (float): In cases where some points are not available set the value to float("nan"). This allows to track those points after augmentation (must be filtered during loss computation -- tensormonk.loss.PointLoss automatically handles it). default = :obj:`float("nan")` OSIZE (tuple): (width, height) of output image, when not set returns image without resize along with its attributes (boxes and points) after augmentation. default = :obj:`None` RESIZE (bool): When True along with OSIZE != None will resize the image during augmentation and adjust the boxes and points to new image size. ROTATE_90 (bool): Enables random rotation (90/180/270). default = :obj:`True` ROTATE_90_PROBS (tuple): Probability of ROTATE_90. default = :obj:`(0.4, 0.6, 0.8)`. 40%, 20%, 20% and 20% probable to rotate 0, 90, 180, and 270 degrees respectively PAD (bool): Does random padding. default = :obj:`True` PAD_PERCENTAGE (float): Maximum percentage of height and width that is padded. Must be 0 < PAD_PERCENTAGE < 1. default = :obj:`0.1` CROP (bool): Does random cropping. default = :obj:`True` CROP_MIN_SIDE_PERCENTAGE (float): Minimum percentage of the size that must be retained. Must be 0 < CROP_MIN_SIDE_PERCENTAGE < 1. default = :obj:`0.3` CROP_MIN_OBJECT_SIDE (int): Minimum side of the object that has to be maintained after crop and resize. In case of multiple objects, at least one object will have min(w, h) >= CROP_MIN_OBJECT_SIDE. Must be 0 < CROP_MIN_OBJECT_SIDE < min(Sample.OSIZE). default = :obj:`16` CROP_N_ATTEMPTS (int): Number of attempts to find random crop, when failed randomly selects one object and extracts a crop around it. Depends on cpu (a larger number can slow down dataloader). default = :obj:`16` RETAIN_AREA (float): An object is retained only if original area * RETAIN_AREA >= visible area after a crop. default = :obj:`0.5` Args: image (str, required): Full path to image (does not accept ndarray or pillow image since large dataset can not fit in memory) labels (list/tuple/np.ndarray, required): labels of all the objects in the image. In order to use :class:`~tensormonk.loss.LabelLoss` use 0 for background. boxes (list/tuple/np.ndarray, required): bounding boxes for all the labels. Must be in pixel coordinates and ltrb form (:obj:`left`, :obj:`top`, :obj:`right`, :obj:`bottom`) points (list/tuple/np.ndarray, optional): [x, y, x, y, ...] points of all the bounding boxes. If points for some objects are missing use float("nan") and maintain all the labels to have same number of points. When not required use None. .. code-block:: python import torch from tensormonk.detection import Sample from torchvision import transforms Sample.OSIZE = 320, 320 Sample.RESIZE = True Sample.ROTATE_90 = False Sample.PAD = False Sample.CROP = True Sample.CROP_MIN_SIDE_PERCENTAGE = 0.3 Sample.CROP_MIN_OBJECT_SIDE = 16 Sample.CROP_N_ATTEMPTS = 8 data = [["./image1.jpg", [1], [[4, 6, 4, 6]]], ["./image2.jpg", [4, 6], [[4, 6, 4, 6], [2, 6, 3, 6]]]] class SomeDB(object): def __init__(self, data, osize: tuple): self.samples = [] for x in data: self.samples.append( Sample(image=x[0], labels=x[1], boxes=x[2], points=None)) self.transforms = transforms.RandomApply( [transforms.ColorJitter(0.1, 0.1, 0.1, 0.1), transforms.RandomGrayscale(p=0.25), transforms.ToTensor()]) def __len__(self): return len(self.samples) def __getitem__(self, idx): image, labels, boxes, points = self.samples[idx].augmented() tensor = self.transforms(image) labels = torch.from_numpy(labels).long() boxes = torch.from_numpy(boxes).float() if points is None: return image, labels, boxes points = torch.from_numpy(points).float() return image, labels, boxes, points dataset = SomeDB(data, (320, 320)) # To check how augmentation is working use the following to visualize dataset.samples[0].annotate_augmented() # To visualize original data dataset.samples[0].annotate() """ INVALID: float = float("nan") # no change required # to resize OSIZE: tuple = None # Has to be defined (width, height) RESIZE: bool = True # random rotate ROTATE_90: bool = True ROTATE_90_PROBS: tuple = (0.4, 0.6, 0.8) # random padding PAD: bool = True PAD_PERCENTAGE: float = 0.1 # random crop CROP: bool = True CROP_MIN_SIDE_PERCENTAGE: float = 0.3 CROP_MIN_OBJECT_SIDE: int = 16 CROP_N_ATTEMPTS: int = 16 RETAIN_AREA: float = 0.5 def __init__(self, image: str, labels: np.ndarray, boxes: np.ndarray, points: np.ndarray = None): self._image = self._labels = self._boxes = self._points = None self._is_boxes = self._is_points = False self.image = image self.labels = labels self.boxes = boxes self.points = points
[docs] def data(self): r"""Provides a copy of original data.""" return self.image, self.labels, self.boxes, self.points
[docs] def augmented(self): r"""Provides augmented data.""" image, labels, boxes, points = self.data() if self.ROTATE_90: image, boxes, points = self._rotate_90s(image, boxes, points) if self.PAD: image, boxes, points = self._pad(image, boxes, points) if self.CROP and self.OSIZE is not None: try: image, boxes, points = self._crop(image, boxes, points) except ValueError: pass if self.RESIZE and self.OSIZE is not None: image, boxes, points = self._resize(image, boxes, points) image, labels, boxes, points = self._validate_augmented( image, labels, boxes, points) return image, labels, boxes, points
[docs] def annotate_augmented(self): r"""To visualize augmented data.""" image, labels, boxes, points = self.augmented() return self.annotate([], image, boxes, points)
def _validate_boxes(self, boxes: np.ndarray, w: int, h: int): r"""Return valid boxes.""" area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) visible_boxes = boxes.copy() visible_boxes[:, 0::2] = visible_boxes[:, 0::2].clip(0, w) visible_boxes[:, 1::2] = visible_boxes[:, 1::2].clip(0, h) visible_area = ((visible_boxes[:, 2] - visible_boxes[:, 0]) * (visible_boxes[:, 3] - visible_boxes[:, 1])) valid = (visible_area / (area + 1e-6)) > self.RETAIN_AREA return valid def _validate_augmented(self, image: ImPIL.Image, labels: np.ndarray, boxes: np.ndarray, points: np.ndarray): r"""Return valid boxes, points, labels using boxes.""" if 0.9 >= self.RETAIN_AREA >= 0.1: valid = self._validate_boxes(boxes, *image.size) labels = labels[valid] boxes = boxes[valid] points = points[valid] return image, labels, boxes, points def _rotate_90s(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray): r"""Does 0/90/180/270 rotation.""" p = random.random() w, h = image.size if self.ROTATE_90_PROBS[1] >= p > self.ROTATE_90_PROBS[0]: image = image.transpose(ImPIL.ROTATE_90) if self.is_boxes: l, t, r, b = np.split(boxes, 4, 1) boxes = np.concatenate((t, w - r, b, w - l), 1) if self.is_points: x, y = np.split(points, 2, 2) points = np.concatenate((y, w - x), -1) elif self.ROTATE_90_PROBS[2] >= p > self.ROTATE_90_PROBS[1]: image = image.transpose(ImPIL.ROTATE_270) if self.is_boxes: l, t, r, b = np.split(boxes, 4, 1) boxes = np.concatenate((h - b, l, h - t, r), 1) if self.is_points: x, y = np.split(points, 2, 2) points = np.concatenate((h - y, x), -1) elif p > self.ROTATE_90_PROBS[2]: image = image.transpose(ImPIL.ROTATE_180) if self.is_boxes: l, t, r, b = np.split(boxes, 4, 1) boxes = np.concatenate((w - r, h - b, w - l, h - t), 1) if self.is_points: x, y = np.split(points, 2, 2) points = np.concatenate((w - x, h - y), -1) return image, boxes, points def _pad(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray): r"""Pads a maximum of Sample.PAD_PERCENTAGE * (w + h)/2 pixels.""" w, h = image.size pad = min(1, int(self.PAD_PERCENTAGE * (w + h) / 2.)) ox, oy = random.randint(0, pad), random.randint(0, pad) image = image.crop((-ox, -oy, w + ox, h + oy)) if boxes is not None: boxes[:, 0::2] += ox boxes[:, 1::2] += oy if points is not None: points[:, :, 0] += ox points[:, :, 1] += oy return image, boxes, points def _crop(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray): r"""Does random image crop, and adjusts boxes and points.""" (w, h), (ow, oh) = image.size, self.OSIZE new_points = None for _ in range(self.CROP_N_ATTEMPTS): # random side of square crop nw = (min(w, h) * (1 if random.random() <= 0.2 else random.uniform(self.CROP_MIN_SIDE_PERCENTAGE, 1.))) if (ow / oh) >= 1.: nh = nw * oh / ow else: nw, nh = nw * ow / oh, nw # aspect ratio variation p = random.random() if 0. < p < 0.4: nw = nw * random.uniform(0.8, 1.) elif 0.4 < p < 0.8: nh = nh * random.uniform(0.8, 1.) # random crop crop = random.randint(0, int(w-nw)), random.randint(0, int(h-nh)) crop = crop + (crop[0] + nw, crop[1] + nh) ious, iofs = ObjectUtils.compute_iou( boxes, np.array(crop).reshape(-1, 4), True) within_the_crop = (iofs >= 0.9) if ~ within_the_crop.any(): # No boxes (90% of the area) are within the crop continue # check if at least one box has minimum required size after resize rw = (boxes[:, 2] - boxes[:, 0]) / nw * ow rh = (boxes[:, 3] - boxes[:, 1]) / nh * oh valid_boxes = np.minimum(rw, rh) > self.CROP_MIN_OBJECT_SIDE if ~ (within_the_crop * valid_boxes).any(): continue new_image = image.crop(crop) new_boxes = boxes.copy() new_boxes[:, 0::2] = new_boxes[:, 0::2] - crop[0] new_boxes[:, 1::2] = new_boxes[:, 1::2] - crop[1] if points is not None: new_points = points.copy() new_points[:, :, 0] = new_points[:, :, 0] - crop[0] new_points[:, :, 1] = new_points[:, :, 1] - crop[1] return new_image, new_boxes, new_points # if the above fails pick a random box and build a crop around it pick = random.randint(0, len(boxes)-1) anchor = boxes[pick].copy() # adjust crop - change w & h till the resized width makes sense nw = sum(anchor[2:] - anchor[:2]) / 2. nw = random.uniform(nw * ow / (ow * 0.8), nw * ow / (self.CROP_MIN_OBJECT_SIDE * 1.25)) nh = nw * oh / ow left = random.randint(max(0, int(anchor[2] - nw)), int(anchor[0])) top = random.randint(max(0, int(anchor[3] - nw)), int(anchor[1])) crop = (left, top, int(left + nw), int(top + nh)) new_image = image.crop(crop) new_boxes, new_points = boxes.copy(), points.copy() new_boxes[:, 0::2] = new_boxes[:, 0::2] - crop[0] new_boxes[:, 1::2] = new_boxes[:, 1::2] - crop[1] if points is not None: new_points[:, :, 0] = new_points[:, :, 0] - crop[0] new_points[:, :, 1] = new_points[:, :, 1] - crop[1] return new_image, new_boxes, new_points def _resize(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray): r"""Resize image to Sample.OSIZE, and adjusts boxes and points.""" (w, h), (ow, oh) = image.size, self.OSIZE image = image.resize(self.OSIZE, ImPIL.BILINEAR) if boxes is not None: boxes[:, 0::2] = boxes[:, 0::2] / w * ow boxes[:, 1::2] = boxes[:, 1::2] / h * oh if points is not None: points[:, :, 0] = points[:, :, 0] / w * ow points[:, :, 1] = points[:, :, 1] / h * oh return image, boxes, points @property def image(self): r"""A property that returns pil image (reads every time).""" return ImPIL.open(self._image).convert("RGB") @image.setter def image(self, value: str): if not isinstance(value, str): raise TypeError("Sample: image must be str (full path)") if not os.path.isfile(value): raise FileNotFoundError self._image = value @property def image_name(self): r"""A property that returns image full path.""" return self._image @property def labels(self): r"""A property that returns a copy all of labels (np.ndarray) on the image.""" return self._labels.copy() @labels.setter def labels(self, value): if isinstance(value, (int, float, list, tuple, np.ndarray)): self._labels = np.array(value).astype(np.int) else: raise TypeError("Sample: labels must be int/list/tuple/ndarray") @property def is_boxes(self): r"""A property that returns :obj:`True` when boxes are available.""" return self._is_boxes @property def boxes(self): r"""A property that returns a copy all of boxes (np.ndarray) on the image in ltrb format.""" return self._boxes.copy() if ~ self.is_boxes else None @boxes.setter def boxes(self, value: Union[list, tuple, np.ndarray]): if value is None: self._boxes = None elif isinstance(value, (list, tuple, np.ndarray)): if self.labels is None: raise ValueError("Sample: boxes requires labels") value = np.array(value).astype(np.float32) assert self.labels.size * 4 == value.size, \ "boxes must be of shape (n_labels x 4)" self._boxes = value.reshape(self.labels.size, 4) self._is_boxes = True else: raise TypeError("Sample: boxes must be None/list/tuple/ndarray") @property def boxes_ltrb(self): return self.boxes @property def boxes_cxcywh(self): r"""A property that returns a copy all of boxes (np.ndarray) on the image in cxcywh format.""" boxes = self.boxes if self.is_boxes: boxes = (boxes[:, 0::2].mean(1), boxes[:, 1::2].mean(1), boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]) boxes = np.vstack(boxes).T return boxes @property def is_points(self): r"""A property that returns :obj:`True` when points are available.""" return self._is_points @property def points(self): r"""A property that returns a copy all of points (np.ndarray) on the image in pixel coordinates.""" return self._points.copy() if ~ self.is_points else None @points.setter def points(self, value: Union[list, tuple, np.ndarray]): if value is None: self._points = None elif isinstance(value, (list, tuple, np.ndarray)): self._points = np.array(value).astype(np.float32).reshape( self.labels.size, -1, 2) self._is_points = True else: raise TypeError("Sample: points must be None/list/tuple/ndarray") @property def fake_boxes(self): if self.is_boxes: return self.boxes # create fake boxes for augmentation purpose fake_boxes = [] for xys in self.points: valid = ~ np.isnan(xys).prod(1).astype(bool) if valid.size == xys.shape[0]: fake_boxes.append(np.array([0., 0., 1., 1.])) continue fake_boxes.append(np.concatenate((xys[valid].min(0) * 0.8, xys[valid].max(0) * 1.25))) return np.stack(fake_boxes) @property def points_cxcy(self): if self.is_points and self.is_boxes: boxes, points = self.boxes_cxcywh, self.points points[:, :, 0] -= boxes[:, [0]] points[:, :, 1] -= boxes[:, [1]] return points return None
[docs] def annotate(self, ids: list = [], image: ImPIL.Image = None, boxes: np.ndarray = None, points: np.ndarray = None): r"""Annotates boxes and points on the image.""" if image is None and boxes is None: image = self.image if self.labels is None: return image boxes, points = self.boxes, self.points if len(ids) > 0 and isinstance(ids, (list, tuple)): if self.is_boxes: boxes = boxes[ids] if self.is_points: points = points[ids] return PillowUtils.annotate_boxes( image, boxes, self.avoid_nans_to_visualize(points))
[docs] def avoid_nans_to_visualize(self, points: np.ndarray): r"""Removes nan's in the points.""" if points is None: return None if np.isnan(points).any(): if len(points[~ np.isnan(points)]) == 0: points = None else: points = points[~ np.isnan(points)] return points