""" TensorMONK's :: data :: Sample """
import os
import random
import numpy as np
from typing import Union
from PIL import Image as ImPIL
from ..utils import PillowUtils, ObjectUtils
[docs]class Sample(object):
r"""Sample is an object that contains image path, labels, bounding boxes
and points for object detection tasks that can localize landmark. It can
augment data (random 90/180/270 rotates, random pad and random cropping)
during training -- boxes and points are adjusted accordingly. The image can
be resized along with boxes and points if Sample.OSIZE is initialized.
Attributes (are set once):
INVALID (float): In cases where some points are not available set the
value to float("nan"). This allows to track those points after
augmentation (must be filtered during loss computation --
tensormonk.loss.PointLoss automatically handles it).
default = :obj:`float("nan")`
OSIZE (tuple): (width, height) of output image, when not set returns
image without resize along with its attributes (boxes and
points) after augmentation.
default = :obj:`None`
RESIZE (bool): When True along with OSIZE != None will resize the image
during augmentation and adjust the boxes and points to new image
size.
ROTATE_90 (bool): Enables random rotation (90/180/270).
default = :obj:`True`
ROTATE_90_PROBS (tuple): Probability of ROTATE_90.
default = :obj:`(0.4, 0.6, 0.8)`. 40%, 20%, 20% and 20% probable to
rotate 0, 90, 180, and 270 degrees respectively
PAD (bool): Does random padding.
default = :obj:`True`
PAD_PERCENTAGE (float): Maximum percentage of height and width that
is padded. Must be 0 < PAD_PERCENTAGE < 1.
default = :obj:`0.1`
CROP (bool): Does random cropping.
default = :obj:`True`
CROP_MIN_SIDE_PERCENTAGE (float): Minimum percentage of the size that
must be retained. Must be 0 < CROP_MIN_SIDE_PERCENTAGE < 1.
default = :obj:`0.3`
CROP_MIN_OBJECT_SIDE (int): Minimum side of the object that has to be
maintained after crop and resize. In case of multiple objects, at
least one object will have min(w, h) >= CROP_MIN_OBJECT_SIDE.
Must be 0 < CROP_MIN_OBJECT_SIDE < min(Sample.OSIZE).
default = :obj:`16`
CROP_N_ATTEMPTS (int): Number of attempts to find random crop, when
failed randomly selects one object and extracts a crop around it.
Depends on cpu (a larger number can slow down dataloader).
default = :obj:`16`
RETAIN_AREA (float): An object is retained only if
original area * RETAIN_AREA >= visible area after a crop.
default = :obj:`0.5`
Args:
image (str, required): Full path to image (does not accept ndarray or
pillow image since large dataset can not fit in memory)
labels (list/tuple/np.ndarray, required): labels of all the objects in
the image. In order to use :class:`~tensormonk.loss.LabelLoss`
use 0 for background.
boxes (list/tuple/np.ndarray, required): bounding boxes for all the
labels. Must be in pixel coordinates and ltrb form (:obj:`left`,
:obj:`top`, :obj:`right`, :obj:`bottom`)
points (list/tuple/np.ndarray, optional): [x, y, x, y, ...] points of
all the bounding boxes. If points for some objects are missing use
float("nan") and maintain all the labels to have same number of
points. When not required use None.
.. code-block:: python
import torch
from tensormonk.detection import Sample
from torchvision import transforms
Sample.OSIZE = 320, 320
Sample.RESIZE = True
Sample.ROTATE_90 = False
Sample.PAD = False
Sample.CROP = True
Sample.CROP_MIN_SIDE_PERCENTAGE = 0.3
Sample.CROP_MIN_OBJECT_SIDE = 16
Sample.CROP_N_ATTEMPTS = 8
data = [["./image1.jpg", [1], [[4, 6, 4, 6]]],
["./image2.jpg", [4, 6], [[4, 6, 4, 6], [2, 6, 3, 6]]]]
class SomeDB(object):
def __init__(self, data, osize: tuple):
self.samples = []
for x in data:
self.samples.append(
Sample(image=x[0], labels=x[1], boxes=x[2],
points=None))
self.transforms = transforms.RandomApply(
[transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
transforms.RandomGrayscale(p=0.25),
transforms.ToTensor()])
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
image, labels, boxes, points = self.samples[idx].augmented()
tensor = self.transforms(image)
labels = torch.from_numpy(labels).long()
boxes = torch.from_numpy(boxes).float()
if points is None:
return image, labels, boxes
points = torch.from_numpy(points).float()
return image, labels, boxes, points
dataset = SomeDB(data, (320, 320))
# To check how augmentation is working use the following to visualize
dataset.samples[0].annotate_augmented()
# To visualize original data
dataset.samples[0].annotate()
"""
INVALID: float = float("nan") # no change required
# to resize
OSIZE: tuple = None # Has to be defined (width, height)
RESIZE: bool = True
# random rotate
ROTATE_90: bool = True
ROTATE_90_PROBS: tuple = (0.4, 0.6, 0.8)
# random padding
PAD: bool = True
PAD_PERCENTAGE: float = 0.1
# random crop
CROP: bool = True
CROP_MIN_SIDE_PERCENTAGE: float = 0.3
CROP_MIN_OBJECT_SIDE: int = 16
CROP_N_ATTEMPTS: int = 16
RETAIN_AREA: float = 0.5
def __init__(self,
image: str,
labels: np.ndarray,
boxes: np.ndarray,
points: np.ndarray = None):
self._image = self._labels = self._boxes = self._points = None
self._is_boxes = self._is_points = False
self.image = image
self.labels = labels
self.boxes = boxes
self.points = points
[docs] def data(self):
r"""Provides a copy of original data."""
return self.image, self.labels, self.boxes, self.points
[docs] def augmented(self):
r"""Provides augmented data."""
image, labels, boxes, points = self.data()
if self.ROTATE_90:
image, boxes, points = self._rotate_90s(image, boxes, points)
if self.PAD:
image, boxes, points = self._pad(image, boxes, points)
if self.CROP and self.OSIZE is not None:
try:
image, boxes, points = self._crop(image, boxes, points)
except ValueError:
pass
if self.RESIZE and self.OSIZE is not None:
image, boxes, points = self._resize(image, boxes, points)
image, labels, boxes, points = self._validate_augmented(
image, labels, boxes, points)
return image, labels, boxes, points
[docs] def annotate_augmented(self):
r"""To visualize augmented data."""
image, labels, boxes, points = self.augmented()
return self.annotate([], image, boxes, points)
def _validate_boxes(self, boxes: np.ndarray, w: int, h: int):
r"""Return valid boxes."""
area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
visible_boxes = boxes.copy()
visible_boxes[:, 0::2] = visible_boxes[:, 0::2].clip(0, w)
visible_boxes[:, 1::2] = visible_boxes[:, 1::2].clip(0, h)
visible_area = ((visible_boxes[:, 2] - visible_boxes[:, 0]) *
(visible_boxes[:, 3] - visible_boxes[:, 1]))
valid = (visible_area / (area + 1e-6)) > self.RETAIN_AREA
return valid
def _validate_augmented(self, image: ImPIL.Image, labels: np.ndarray,
boxes: np.ndarray, points: np.ndarray):
r"""Return valid boxes, points, labels using boxes."""
if 0.9 >= self.RETAIN_AREA >= 0.1:
valid = self._validate_boxes(boxes, *image.size)
labels = labels[valid]
boxes = boxes[valid]
points = points[valid]
return image, labels, boxes, points
def _rotate_90s(self, image: ImPIL.Image, boxes: np.ndarray,
points: np.ndarray):
r"""Does 0/90/180/270 rotation."""
p = random.random()
w, h = image.size
if self.ROTATE_90_PROBS[1] >= p > self.ROTATE_90_PROBS[0]:
image = image.transpose(ImPIL.ROTATE_90)
if self.is_boxes:
l, t, r, b = np.split(boxes, 4, 1)
boxes = np.concatenate((t, w - r, b, w - l), 1)
if self.is_points:
x, y = np.split(points, 2, 2)
points = np.concatenate((y, w - x), -1)
elif self.ROTATE_90_PROBS[2] >= p > self.ROTATE_90_PROBS[1]:
image = image.transpose(ImPIL.ROTATE_270)
if self.is_boxes:
l, t, r, b = np.split(boxes, 4, 1)
boxes = np.concatenate((h - b, l, h - t, r), 1)
if self.is_points:
x, y = np.split(points, 2, 2)
points = np.concatenate((h - y, x), -1)
elif p > self.ROTATE_90_PROBS[2]:
image = image.transpose(ImPIL.ROTATE_180)
if self.is_boxes:
l, t, r, b = np.split(boxes, 4, 1)
boxes = np.concatenate((w - r, h - b, w - l, h - t), 1)
if self.is_points:
x, y = np.split(points, 2, 2)
points = np.concatenate((w - x, h - y), -1)
return image, boxes, points
def _pad(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray):
r"""Pads a maximum of Sample.PAD_PERCENTAGE * (w + h)/2 pixels."""
w, h = image.size
pad = min(1, int(self.PAD_PERCENTAGE * (w + h) / 2.))
ox, oy = random.randint(0, pad), random.randint(0, pad)
image = image.crop((-ox, -oy, w + ox, h + oy))
if boxes is not None:
boxes[:, 0::2] += ox
boxes[:, 1::2] += oy
if points is not None:
points[:, :, 0] += ox
points[:, :, 1] += oy
return image, boxes, points
def _crop(self, image: ImPIL.Image, boxes: np.ndarray, points: np.ndarray):
r"""Does random image crop, and adjusts boxes and points."""
(w, h), (ow, oh) = image.size, self.OSIZE
new_points = None
for _ in range(self.CROP_N_ATTEMPTS):
# random side of square crop
nw = (min(w, h) * (1 if random.random() <= 0.2 else
random.uniform(self.CROP_MIN_SIDE_PERCENTAGE,
1.)))
if (ow / oh) >= 1.:
nh = nw * oh / ow
else:
nw, nh = nw * ow / oh, nw
# aspect ratio variation
p = random.random()
if 0. < p < 0.4:
nw = nw * random.uniform(0.8, 1.)
elif 0.4 < p < 0.8:
nh = nh * random.uniform(0.8, 1.)
# random crop
crop = random.randint(0, int(w-nw)), random.randint(0, int(h-nh))
crop = crop + (crop[0] + nw, crop[1] + nh)
ious, iofs = ObjectUtils.compute_iou(
boxes, np.array(crop).reshape(-1, 4), True)
within_the_crop = (iofs >= 0.9)
if ~ within_the_crop.any():
# No boxes (90% of the area) are within the crop
continue
# check if at least one box has minimum required size after resize
rw = (boxes[:, 2] - boxes[:, 0]) / nw * ow
rh = (boxes[:, 3] - boxes[:, 1]) / nh * oh
valid_boxes = np.minimum(rw, rh) > self.CROP_MIN_OBJECT_SIDE
if ~ (within_the_crop * valid_boxes).any():
continue
new_image = image.crop(crop)
new_boxes = boxes.copy()
new_boxes[:, 0::2] = new_boxes[:, 0::2] - crop[0]
new_boxes[:, 1::2] = new_boxes[:, 1::2] - crop[1]
if points is not None:
new_points = points.copy()
new_points[:, :, 0] = new_points[:, :, 0] - crop[0]
new_points[:, :, 1] = new_points[:, :, 1] - crop[1]
return new_image, new_boxes, new_points
# if the above fails pick a random box and build a crop around it
pick = random.randint(0, len(boxes)-1)
anchor = boxes[pick].copy()
# adjust crop - change w & h till the resized width makes sense
nw = sum(anchor[2:] - anchor[:2]) / 2.
nw = random.uniform(nw * ow / (ow * 0.8),
nw * ow / (self.CROP_MIN_OBJECT_SIDE * 1.25))
nh = nw * oh / ow
left = random.randint(max(0, int(anchor[2] - nw)), int(anchor[0]))
top = random.randint(max(0, int(anchor[3] - nw)), int(anchor[1]))
crop = (left, top, int(left + nw), int(top + nh))
new_image = image.crop(crop)
new_boxes, new_points = boxes.copy(), points.copy()
new_boxes[:, 0::2] = new_boxes[:, 0::2] - crop[0]
new_boxes[:, 1::2] = new_boxes[:, 1::2] - crop[1]
if points is not None:
new_points[:, :, 0] = new_points[:, :, 0] - crop[0]
new_points[:, :, 1] = new_points[:, :, 1] - crop[1]
return new_image, new_boxes, new_points
def _resize(self, image: ImPIL.Image, boxes: np.ndarray,
points: np.ndarray):
r"""Resize image to Sample.OSIZE, and adjusts boxes and points."""
(w, h), (ow, oh) = image.size, self.OSIZE
image = image.resize(self.OSIZE, ImPIL.BILINEAR)
if boxes is not None:
boxes[:, 0::2] = boxes[:, 0::2] / w * ow
boxes[:, 1::2] = boxes[:, 1::2] / h * oh
if points is not None:
points[:, :, 0] = points[:, :, 0] / w * ow
points[:, :, 1] = points[:, :, 1] / h * oh
return image, boxes, points
@property
def image(self):
r"""A property that returns pil image (reads every time)."""
return ImPIL.open(self._image).convert("RGB")
@image.setter
def image(self, value: str):
if not isinstance(value, str):
raise TypeError("Sample: image must be str (full path)")
if not os.path.isfile(value):
raise FileNotFoundError
self._image = value
@property
def image_name(self):
r"""A property that returns image full path."""
return self._image
@property
def labels(self):
r"""A property that returns a copy all of labels (np.ndarray) on the
image."""
return self._labels.copy()
@labels.setter
def labels(self, value):
if isinstance(value, (int, float, list, tuple, np.ndarray)):
self._labels = np.array(value).astype(np.int)
else:
raise TypeError("Sample: labels must be int/list/tuple/ndarray")
@property
def is_boxes(self):
r"""A property that returns :obj:`True` when boxes are available."""
return self._is_boxes
@property
def boxes(self):
r"""A property that returns a copy all of boxes (np.ndarray) on the
image in ltrb format."""
return self._boxes.copy() if ~ self.is_boxes else None
@boxes.setter
def boxes(self, value: Union[list, tuple, np.ndarray]):
if value is None:
self._boxes = None
elif isinstance(value, (list, tuple, np.ndarray)):
if self.labels is None:
raise ValueError("Sample: boxes requires labels")
value = np.array(value).astype(np.float32)
assert self.labels.size * 4 == value.size, \
"boxes must be of shape (n_labels x 4)"
self._boxes = value.reshape(self.labels.size, 4)
self._is_boxes = True
else:
raise TypeError("Sample: boxes must be None/list/tuple/ndarray")
@property
def boxes_ltrb(self):
return self.boxes
@property
def boxes_cxcywh(self):
r"""A property that returns a copy all of boxes (np.ndarray) on the
image in cxcywh format."""
boxes = self.boxes
if self.is_boxes:
boxes = (boxes[:, 0::2].mean(1), boxes[:, 1::2].mean(1),
boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1])
boxes = np.vstack(boxes).T
return boxes
@property
def is_points(self):
r"""A property that returns :obj:`True` when points are available."""
return self._is_points
@property
def points(self):
r"""A property that returns a copy all of points (np.ndarray) on the
image in pixel coordinates."""
return self._points.copy() if ~ self.is_points else None
@points.setter
def points(self, value: Union[list, tuple, np.ndarray]):
if value is None:
self._points = None
elif isinstance(value, (list, tuple, np.ndarray)):
self._points = np.array(value).astype(np.float32).reshape(
self.labels.size, -1, 2)
self._is_points = True
else:
raise TypeError("Sample: points must be None/list/tuple/ndarray")
@property
def fake_boxes(self):
if self.is_boxes:
return self.boxes
# create fake boxes for augmentation purpose
fake_boxes = []
for xys in self.points:
valid = ~ np.isnan(xys).prod(1).astype(bool)
if valid.size == xys.shape[0]:
fake_boxes.append(np.array([0., 0., 1., 1.]))
continue
fake_boxes.append(np.concatenate((xys[valid].min(0) * 0.8,
xys[valid].max(0) * 1.25)))
return np.stack(fake_boxes)
@property
def points_cxcy(self):
if self.is_points and self.is_boxes:
boxes, points = self.boxes_cxcywh, self.points
points[:, :, 0] -= boxes[:, [0]]
points[:, :, 1] -= boxes[:, [1]]
return points
return None
[docs] def annotate(self, ids: list = [], image: ImPIL.Image = None,
boxes: np.ndarray = None, points: np.ndarray = None):
r"""Annotates boxes and points on the image."""
if image is None and boxes is None:
image = self.image
if self.labels is None:
return image
boxes, points = self.boxes, self.points
if len(ids) > 0 and isinstance(ids, (list, tuple)):
if self.is_boxes:
boxes = boxes[ids]
if self.is_points:
points = points[ids]
return PillowUtils.annotate_boxes(
image, boxes, self.avoid_nans_to_visualize(points))
[docs] def avoid_nans_to_visualize(self, points: np.ndarray):
r"""Removes nan's in the points."""
if points is None:
return None
if np.isnan(points).any():
if len(points[~ np.isnan(points)]) == 0:
points = None
else:
points = points[~ np.isnan(points)]
return points