SSD讲解以及代码解读(一)
阅读原文时间:2021年04月20日阅读:1

   本节首先讲解ssd网络特征提取部分(使用的是tensorflow2)。
   以下为ssd总体的框架图。前面为vgg-16网络(或者其它网络),然后之后需要提取一些特征层,转换为分类和回归,详见代码。

   为了便于理解,再看以下图:

注意以上的网络结构(特征提取部分)和代码可能稍稍不一样。接下来上代码(特征提取部分):

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import os
from tensorflow import keras
import numpy as np
from tensorflow.keras.layers import Dense, Flatten, Conv2D, SeparableConv2D,\
 BatchNormalization, ReLU, GlobalAveragePooling2D, ZeroPadding2D,\
 Activation, Softmax, DepthwiseConv2D, Dropout, MaxPool2D, Reshape
from tensorflow.keras import Model, Input
import math

atrous_conv2d = tf.nn.atrous_conv2d

#初始化的参数
default_params = {
        "img_shape":(300, 300, 3),  # 图片输入尺寸
        "num_classes":21,     # 预测类别20+1(背景)
        "no_annotation_label":21,
        "feat_layers":['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],  # 用于生成default box的特征层
        "feat_shapes":[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],    # 对应特征层的特征图尺寸
        "anchor_size_bounds":[0.15, 0.90],   # Smin = 0.15, Smax = 0.9
        # anchor_size_bounds=[0.20, 0.90],
        "anchor_sizes":[(21., 45.),   # 当前层与下一层的预测默认矩形边框尺寸,即Sk的值,与论文中的计算公式并不对应
                      (45., 99.),
                      (99., 153.),
                      (153., 207.),
                      (207., 261.),
                      (261., 315.)],
        "anchor_ratios":[[2, .5],   # 生成默认框的形状比例,不包含1:1的比例
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5, 3, 1./3],
                       [2, .5],
                       [2, .5]],
        "anchor_steps":[8, 16, 32, 64, 100, 300],   # 特征图上一步对应在原图上的跨度  anchor_step*feat_shapey与等于300
        "anchor_offset":0.5,  # 偏移
        "normalizations":[20, -1, -1, -1, -1, -1],  # 特征层是否正则处理
        "prior_scaling":[0.1, 0.1, 0.2, 0.2]  # 默认框与真实框的差异缩放比例
        }
#标签类别
VOC_LABELS = {
    'none': (0, 'Background'),
    'aeroplane': (1, 'Vehicle'),
    'bicycle': (2, 'Vehicle'),
    'bird': (3, 'Animal'),
    'boat': (4, 'Vehicle'),
    'bottle': (5, 'Indoor'),
    'bus': (6, 'Vehicle'),
    'car': (7, 'Vehicle'),
    'cat': (8, 'Animal'),
    'chair': (9, 'Indoor'),
    'cow': (10, 'Animal'),
    'diningtable': (11, 'Indoor'),
    'dog': (12, 'Animal'),
    'horse': (13, 'Animal'),
    'motorbike': (14, 'Vehicle'),
    'person': (15, 'Person'),
    'pottedplant': (16, 'Indoor'),
    'sheep': (17, 'Animal'),
    'sofa': (18, 'Indoor'),
    'train': (19, 'Vehicle'),
    'tvmonitor': (20, 'Indoor'),
}

# 建立SSD网络
def ssd_net(inputs,
            num_classes=default_params["num_classes"],
            feat_layers=default_params["feat_layers"],
            anchor_sizes=default_params["anchor_sizes"],
            anchor_ratios=default_params["anchor_ratios"],
            normalizations=default_params["normalizations"],
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=Softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    # 用于收集每一层的输出
    end_points = {}
    net = Conv2D(64, 3, padding='SAME')(inputs)
    net = Conv2D(64, 3, padding='SAME')(net)
    end_points['block1'] = net
    net = MaxPool2D(2, strides=2)(net)
    net = Conv2D(128, 3, padding='SAME')(net)
    net = Conv2D(128, 3, padding='SAME')(net)
    end_points['block2'] = net
    net = MaxPool2D(2, strides=2)(net)
    net = Conv2D(256, 3, padding='SAME')(net)
    net = Conv2D(256, 3, padding='SAME')(net)
    net = Conv2D(256, 3, padding='SAME')(net)
    end_points['block3'] = net
    net = MaxPool2D(2, strides=2)(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    end_points['block4'] = net
    net = MaxPool2D(2, strides=2)(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    net = Conv2D(512, 3, padding='SAME')(net)
    end_points['block5'] = net
    net = MaxPool2D(3, strides=1)(net)
    net = Conv2D(1024, 3, dilation_rate=6,padding='SAME')(net)
    end_points['block6'] = net
    net = Dropout(rate=dropout_keep_prob)(net)
    net = Conv2D(1024, 1, padding='SAME')(net)
    end_points['block7'] = net
    net = Dropout(rate=dropout_keep_prob)(net)

    net = Conv2D(256, 1, padding='SAME')(net)
    net = ZeroPadding2D()(net)
    net = Conv2D(512, 3, strides=2)(net)
    end_points['block8'] = net

    net = Conv2D(128, 1, padding='SAME')(net)
    net = ZeroPadding2D()(net)
    net = Conv2D(256, 3, strides=2)(net)
    end_points['block9'] = net

    net = Conv2D(128, 1, padding='SAME')(net)
    net = Conv2D(256, 3)(net)
    end_points['block10'] = net

    net = Conv2D(128, 1, padding='SAME')(net)
    net = ZeroPadding2D()(net) 
    net = Conv2D(256, 3)(net)
    end_points['block11'] = net
    # Prediction and localisations layers.
    # 预测类别和位置调整
    predictions = []
    logits = []
    localisations = []
    for i, layer in enumerate(feat_layers):
        # 接受特征层的输出,生成类别和位置预测
        p, l = ssd_multibox_layer(end_points[layer],
                                          num_classes,
                                          anchor_sizes[i],
                                          anchor_ratios[i],
                                          normalizations[i])
        # 收集每一层的预测结果
        predictions.append(prediction_fn(p))  # prediction_fc为softmax函数,预测类别
        logits.append(p)  # 概率
        localisations.append(l)  # 预测位置偏移

    return predictions, localisations, logits, end_points

#该函数主要是为了得到cls_pred, loc_pred也就是预测出类别和回归框
def ssd_multibox_layer(inputs,  # 输入的特征层
                       num_classes,
                       sizes,  # 当前层与下一层的预测默认矩形边框尺寸,即Sk的值
                       ratios=[1],  # 矩形框长宽比
                       normalization=-1,  # 是否正则化
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    生成预测中心偏移量和宽高调整比例
    """
    net = inputs
    if normalization > 0:
        net = BatchNormalization()(net)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.默认框位置偏移量预测
    num_loc_pred = num_anchors * 4
    loc_pred = Conv2D(num_loc_pred, 1)(net)
    loc_pred = Reshape(loc_pred.shape[1:-1]+[num_anchors, 4])(loc_pred)
    # Class prediction.默认框内目标类别预测
    num_cls_pred = num_anchors * num_classes
    cls_pred = Conv2D(num_cls_pred, 1)(net)
    cls_pred = Reshape(cls_pred.shape[1:-1]+[num_anchors, num_classes])(cls_pred)
    return cls_pred, loc_pred
inputs = keras.Input(shape=default_params["img_shape"])
predictions, localisations, logits, end_points = ssd_net(inputs,
            num_classes=default_params["num_classes"],
            feat_layers=default_params["feat_layers"],
            anchor_sizes=default_params["anchor_sizes"],
            anchor_ratios=default_params["anchor_ratios"],
            normalizations=default_params["normalizations"],
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=Softmax,
            reuse=None,
            scope='ssd_300_vgg')
model = keras.Model(inputs=inputs, outputs=[localisations, logits])

这一部分相对于之后的提取标签还是比较简单的,大致就是构造一个特征提取网络,然后保存感兴趣的网络特征层(end_points)。
SSD讲解以及代码解读(二)

参考文献
tensorflow官方地址
https://blog.csdn.net/qq_37541097/article/details/80917536
https://blog.csdn.net/u010712012/article/details/86555814

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章