pengfeidip
4/16/2019 - 12:18 PM

[deep learning]useful deep learning code! #dl #object detection

[deep learning]useful deep learning code! #dl #object detection

# 这是一个用于SSD流派的groundtruth与prior boxes的匹配函数以及详细的讲解
#  https://hellozhaozheng.github.io/z_post/PyTorch-SSD/#MultiBox
def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
    # threshold: (float) 确定是否匹配的交并比阈值
    # truths: (tensor: [num_obj, 4]) 存储真实 box 的边框坐标
    # priors: (tensor: [num_priors, 4], 即[8732, 4]), 存储推荐框的坐标, 注意, 此时的框是 default box, 而不是 SSD 网络预测出来的框的坐标, 预测的结果存储在 loc_data中, 其 shape 为[num_obj, 8732, 4].
    # variances: cfg['variance'], [0.1, 0.2], 用于将坐标转换成方便训练的形式(参考RCNN系列对边框坐标的处理)
    # labels: (tensor: [num_obj]), 代表了每个真实 box 对应的类别的编号
    # loc_t: (tensor: [batches, 8732, 4]),
    # conf_t: (tensor: [batches, 8732]),
    # idx: batches 中图片的序号, 标识当前正在处理的 image 在 batches 中的序号
    overlaps = jaccard(truths, point_form(priors)) # [A, B], 返回任意两个box之间的交并比, overlaps[i][j] 代表box_a中的第i个box与box_b中的第j个box之间的交并比.

    # 二部图匹配(Bipartite Matching)
    # [num_objs,1], 得到对于每个 gt box 来说的匹配度最高的 prior box, 前者存储交并比, 后者存储prior box在num_priors中的位置
    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) # keepdim=True, 因此shape为[num_objs,1]
    # [1, num_priors], 即[1,8732], 同理, 得到对于每个 prior box 来说的匹配度最高的 gt box
    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
    best_prior_idx.squeeze_(1) # 上面特意保留了维度(keepdim=True), 这里又都把维度 squeeze/reduce 了, 实际上只需用默认的 keepdim=False 就可以自动 squeeze/reduce 维度.
    best_prior_overlap.squeeze_(1)
    best_truth_idx.squeeze_(0)
    best_truth_overlap.squeeze_(0)

    best_truth_overlap.index_fill_(0, best_prior_idx, 2)
    # 维度压缩后变为[num_priors], best_prior_idx 维度为[num_objs],
    # 该语句会将与gt box匹配度最好的prior box 的交并比置为 2, 确保其最大, 以免防止某些 gtbox 没有匹配的 priorbox.

    # 假想一种极端情况, 所有的priorbox与某个gtbox(标记为G)的交并比为1, 而其他gtbox分别有一个交并比
    # 最高的priorbox, 但是肯定小于1(因为其他的gtbox与G的交并比肯定小于1), 这样一来, 就会使得所有
    # 的priorbox都与G匹配, 为了防止这种情况, 我们将那些对gtbox来说, 具有最高交并比的priorbox,
    # 强制进行互相匹配, 即令best_truth_idx[best_prior_idx[j]] = j, 详细见下面的for循环

    # 注意!!: 因为 gt box 的数量要远远少于 prior box 的数量, 因此, 同一个 gt box 会与多个 prior box 匹配.
    for j in range(best_prior_idx.size(0)): # range:0~num_obj-1
        best_truth_idx[best_prior_idx[j]] = j
        # best_prior_idx[j] 代表与box_a的第j个box交并比最高的 prior box 的下标, 将与该 gtbox
        # 匹配度最好的 prior box 的下标改为j, 由此,完成了该 gtbox 与第j个 prior box 的匹配.
        # 这里的循环只会进行num_obj次, 剩余的匹配为 best_truth_idx 中原本的值.
        # 这里处理的情况是, priorbox中第i个box与gtbox中第k个box的交并比最高,
        # 即 best_truth_idx[i]= k
        # 但是对于best_prior_idx[k]来说, 它却与priorbox的第l个box有着最高的交并比,
        # 即best_prior_idx[k]=l
        # 而对于gtbox的另一个边框gtbox[j]来说, 它与priorbox[i]的交并比最大,
        # 即但是对于best_prior_idx[j] = i.
        # 那么, 此时, 我们就应该将best_truth_idx[i]= k 修改成 best_truth_idx[i]= j.
        # 即令 priorbox[i] 与 gtbox[j]对应.
        # 这样做的原因: 防止某个gtbox没有匹配的 prior box.
    mathes = truths[best_truth_idx]
    # truths 的shape 为[num_objs, 4], 而best_truth_idx是一个指示下标的列表, 列表长度为 8732,
    # 列表中的下标范围为0~num_objs-1, 代表的是与每个priorbox匹配的gtbox的下标
    # 上面的表达式会返回一个shape为 [num_priors, 4], 即 [8732, 4] 的tensor, 代表的就是与每个priorbox匹配的gtbox的坐标值.
    conf = labels[best_truth_idx]+1 # 与上面的语句道理差不多, 这里得到的是每个prior box匹配的类别编号, shape 为[8732]
    conf[best_truth_overlap < threshold] = 0 # 将与gtbox的交并比小于阈值的置为0 , 即认为是非物体框
    loc = encode(matches, priors, variances) # 返回编码后的中心坐标和宽高.
    loc_t[idx] = loc # 设置第idx张图片的gt编码坐标信息
    conf_t[idx] = conf # 设置第idx张图片的编号信息.(大于0即为物体编号, 认为有物体, 小于0认为是背景)
"""
visualization of bounding boxes from the groundtruth file
"""

import csv
import cv2
import matplotlib.pyplot as plt
from random import randint
import os
%matplotlib inline



def bboxes_visualization(bboxes_file, num_saves, save_path):
    """
    bboxes_file:csv file contains all objects. 
    			format:[[im_full_name, xmin,ymin,xmax,ymax,class_name], [......], .....]
                check readme.md
    num_saves: number of images to be saved 
    save_path: path to save images 
    """
    if not os.path.exists(bboxes_file):
        raise Exception('No specified bounding boxes files !')
        
    if not os.path.exists(save_path):
        os.mkdir(save_path)
        print('Create the folder automatically .... \n')
    
    with open(bboxes_file) as f :
        info = list(csv.reader(f))
    
    color_set = [[255, 0, 0], 
                 [0, 255, 0],
                 [0, 0, 255],
                 [125, 125, 0]]

    im_name = 'aaa'
    k = 0
    for ind, i in enumerate(info):
        # if the new image, read the new image 
        if im_name != i[0]:
            k = k + 1
            im_name = i[0]
            im = cv2.imread(im_name)
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

        # plot rectangle in the image 
        coord = list(map(int,i[1:5]))
        color = color_set[randint(0, len(color_set)-1)]
        cv2.rectangle(im, (coord[0], coord[1]), (coord[2], coord[3]), color, 3)
        cv2.putText(im,i[5],(coord[0]+20, coord[1]+20),cv2.FONT_HERSHEY_COMPLEX,0.9,color,2)
        
        # 如果下面一个物体与此时的物体不属于同一个图像, 
        # 意味着一个图像中的所有物体显示完了
        if ind<=(len(info)-2):
            if info[ind+1][0] != im_name:
                plt.imshow(im)
                plt.savefig(os.path.join(save_path, im_name.split('/')[-1]))

        if k == num_saves+1:
            print("All images are saved in the " + save_path)
            break
            
if __name__ == '__main__':
  bboxes_visualization(bboxes_file='./refineDet/CSV/voc/07_test.csv', num_saves=30, save_path='saved_images')
        
# compute iou of boxes. The type of boxes is tensor-pyTorch
def compute_iou(box1, box2):
  '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
  Args:
  box1: (tensor) bounding boxes, sized [N,4].
  box2: (tensor) bounding boxes, sized [M,4].
  Return:
  (tensor) iou, sized [N,M].
  '''

  N = box1.size(0)
  M = box2.size(0)

  lt = torch.max(
  box1[:, :2].unsqueeze(1).expand(N, M, 2),  # [N,2] -> [N,1,2] -> [N,M,2]
  box2[:, :2].unsqueeze(0).expand(N, M, 2),  # [M,2] -> [1,M,2] -> [N,M,2]
  )

  rb = torch.min(
  box1[:, 2:].unsqueeze(1).expand(N, M, 2),  # [N,2] -> [N,1,2] -> [N,M,2]
  box2[:, 2:].unsqueeze(0).expand(N, M, 2),  # [M,2] -> [1,M,2] -> [N,M,2]
  )

  wh = rb - lt  # [N,M,2]
  wh[wh < 0] = 0  # clip at 0
  inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

  area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])  # [N,]
  area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])  # [M,]
  area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
  area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

  iou = inter.float() / (area1 + area2 - inter + 1e-5)
return iou
# data augmentation for classification ----  mixup
# CODE:https://github.com/facebookresearch/mixup-cifar10/blob/master/train.py
# BLOG:https://blog.csdn.net/ly244855983/article/details/78938667

# details of mixup 
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda
       x :  a batch of images 
       y:   a batch of labels 
    '''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index] 
    return mixed_x, y_a, y_b, lam
  
# mixup in training step for loss computation 
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
 
# train 
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    reg_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets,
                                                       args.alpha, use_cuda) # 得到mixup 的data
        inputs, targets_a, targets_b = map(Variable, (inputs,
                                                      targets_a, targets_b))
        outputs = net(inputs)  #mix-up data 的输出
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) # mix-up data的输出分别与两个原始label的loss
        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
                    + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        progress_bar(batch_idx, len(trainloader),
                     'Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), reg_loss/(batch_idx+1),
                        100.*correct/total, correct, total))
    return (train_loss/batch_idx, reg_loss/batch_idx, 100.*correct/total)

object detection csv file format :
[im_full_name, xmin, ymin, xmax, ymax, class_name ]

/root/voc/vocdevkit/07VOC/test/005167.jpg   242 169 270 215 bird
/root/voc/vocdevkit/07VOC/test/006505.jpg   5   51  494 277 sofa
/root/voc/vocdevkit/07VOC/test/006505.jpg   1   33  188 256 person
/root/voc/vocdevkit/07VOC/test/006505.jpg   132 10  321 277 person
/root/voc/vocdevkit/07VOC/test/006505.jpg   266 12  495 277 person
/root/voc/vocdevkit/07VOC/test/007977.jpg   82  28  500 375 person
/root/voc/vocdevkit/07VOC/test/003201.jpg   1   53  166 260 cow
/root/voc/vocdevkit/07VOC/test/003201.jpg   137 25  416 298 cow