ResNet

ResNet是由Kaiming He等4名华人提出，通过使用Residual Unit成功训练了152层的深度神经网络，在ILSVRC 2015比赛中获得冠军，取得了3.57%的top-5错误率，同时参数却比VGGNet少。ResNet的结构可以极快的加速超深神经网络的训练，模型的准确率也有较大的提升。之后很多方法都建立在ResNet的基础上完成的，例如检测，分割，识别等领域都纷纷使用ResNet。在ResNet推出不久，Google就借鉴了ResNet的精髓，提出了Inception V4和Inception-ResNet-V2，并通过融合这两个模型，在ILSVRC数据集上取得了惊人的3.08%的错误率。所以可见ResNet确实很好用。

网络结构

TensorFlow实现

导入包并设计Block模块组

1
2
3

import collections
import tensorflow as tf
slim = tf.contrib.slim

我们使用collections.namedtuple设计ResNet基本Block模块组的named tuple

'''
scope为生成的Block的名称
unit_fn为残差学习元生成函数
args是一个长度等于Block中单元数目的序列，序列中每个元素
包含第三层通道数，前两层通道数以及中间层步长(depth,depth_bottleneck,stride)
'''
class Block(collections.namedtuple('Block',['scope','unit_fn','args'])):
    'A named tuple describing a ResNet block'

以下面Block(‘block1’, bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)])为例

block1: 是这个Block的名称
bottleneck: 前面定义的残差学习单元（有三层）
[(256, 64, 1)] * 2 + [(256, 64, 2)]: 是一个列表，其中每个元素都对应一个bottleneck残差学习单元，前面两个元素都是(256, 64, 1),最后一个是(256, 64, 2)。每个元素都时一个3元组，即（depth, depth_bottleneck, stride）,代表构建的bottleneck残差学习单元中，第三层的输出通道为256（depth），前两层的输出通道数为64（depth_bottleneck）且中间那层的步长stride为1（stride）

定义部分方法

定义一个降采样subsample的方法

'''
降采样函数   
input为输入
factor为采样因子
使用slim.max_pool2d来实现
'''    
def subsample(inputs,factor,scope=None):
    if factor==1:
        return inputs
    else:
        return slim.max_pool2d(inputs,[1,1],stride=factor,scope=scope)

定义一个conv2d_same函数创建卷积层

def conv2d_same(inputs,num_outputs,kernel_size,stride,scope=None):
    if stride==1:
        return slim.conv2d(inputs,num_outputs,kernel_size,stride=1,padding='SAME',scope=scope)
    else:
        # 显式地pad zero
        pad_total=kernel_size - 1
        pad_beg=pad_total // 2
        pad_end=pad_total-pad_beg
        #使用tf.pad对图像进行填充
        inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
        return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
                        padding='VALID', scope=scope)

定义堆叠Blocks的函数

'''
net为输入
blocks为定义的Block的class的列表
outputs_collections是用来收集各个end_points的collections
'''
@slim.add_arg_scope
def stack_blocks_dense(net,blocks,outputs_collections=None):

    for block in blocks:
        with tf.variable_scope(block.scope,'block',[net])as sc:
            # 拿到每个残差学习单元的args
            for i,unit in enumerate(block.args):
                with tf.variable_scope('unit_%d'%(i+1),values=[net]):
                    #获取每个Block中的参数，包括第三层通道数，前两层通道数以及中间层步长
                    unit_depth,unit_depth_bottleneck,unit_stride=unit    
                    #unit_fn是Block类的残差神经元生成函数，它按顺序创建残差学习元并进行连接
                    net=block.unit_fn(net,
                                    depth=unit_depth,
                                    depth_bottleneck=unit_depth_bottleneck,
                                    stride=unit_stride)
            # 将输出的net添加到collection中
            net=slim.utils.collect_named_outputs(outputs_collections,sc.name,net)
    return net

创建ResNet通用的arg_scope

def resnet_arg_scope(is_training=True,
                    weight_decay=0.0001,
                    batch_norm_decay=0.997,
                    batch_norm_epsilon=1e-5,
                    batch_norm_scale=True):
    batch_norm_params = {
    'decay': batch_norm_decay,
    'epsilon': batch_norm_epsilon,
    'scale': batch_norm_scale,
    'updates_collections': tf.GraphKeys.UPDATE_OPS,
    'fused': None,  # Use fused batch norm if possible.
    }

    # 设置slim.conv2d函数的默认参数
    with slim.arg_scope(
    [slim.conv2d],
    weights_regularizer=slim.l2_regularizer(weight_decay),
    weights_initializer=slim.variance_scaling_initializer(),
    activation_fn=tf.nn.relu,
    normalizer_fn=slim.batch_norm,
    normalizer_params=batch_norm_params):
        with slim.arg_scope([slim.batch_norm], **batch_norm_params):
            with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
                return arg_sc

定义核心的bottleneck残差学习单元

# 利用add_arg_scope使bottleneck函数能够直接使用slim.arg_scope设置默认参数
@slim.add_arg_scope
def bottleneck(inputs,depth,depth_bottleneck,stride,
            outputs_collections=None,scope=None):
    with tf.variable_scope(scope,'bottleneck_v2',[inputs])as sc:
        # 获取输入的最后一个维度，即输入的通道数目
        # 参数min_rank=4可以限定最少为4个维度
        depth_in=slim.utils.last_dimension(inputs.get_shape(),min_rank=4)
        # 先对输入进行Batch Normalization，再进行非线性激活
        preact=slim.batch_norm(inputs,activation_fn=tf.nn.relu,scope='preact')

        # 如果残差神经元的输出通道数目和输入的通道数目相同，那么直接对图像进行降采样，以保证shortcut尺寸和经历三个卷积层后的输出的此存相同        
        if depth==depth_in:
            shortcut=subsample(inputs,stride,'shortcut')
        # 如果残差神经元的输出通道数目和输入的通道数目不同，利用尺寸为1x1的卷积核对输入进行卷积，使输入通道数相同；
        else:
            shortcut=slim.conv2d(preact,depth,[1,1],stride=stride,normalizer_fn=None,
                                activation_fn=None,scope='shortcut')
        # 然后，定义残差，即三个卷积层           
        residual=slim.conv2d(preact,depth_bottleneck,[1,1],stride=1,scope='conv1')
        residual=conv2d_same(residual,depth_bottleneck,3,stride,scope='conv2')
        residual=slim.conv2d(residual,depth,[1,1],stride=1,normalizer_fn=None,activation_fn=None,scope='conv3')

        # 将shortcut和residual相加，作为输出        
        output=shortcut+residual

        return slim.utils.collect_named_outputs(outputs_collections,sc.name,output)

定义生成ResNet V2的主函数

'''
input是输入
blocks包含残差学习元的参数
num_classes是输出分类数
global_pool标志是否加上最后一层全局平均年池化
include_root_block标志是否加上ResNet网络最前面通常使用的7×7卷积和最大池化
reuse标志是否重用
scope是整个网络的名称 
'''     
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              global_pool=True,
              include_root_block=True,
              reuse=None,
              scope=None):
    with tf.variable_scope(scope,'resnet_v2',[inputs],reuse=reuse) as sc:
        end_points_collection=sc.original_name_scope+'_end_points'
        with slim.arg_scope([slim.conv2d,bottleneck,stack_blocks_dense],
                            outputs_collections=end_points_collection):
            net=inputs
            if include_root_block:
                with slim.arg_scope([slim.conv2d],activation_fn=None,normalizer_fn=None):
                    #卷积核为7x7步长为2的卷积层
                    net=conv2d_same(net,64,7,stride=2,scope='conv1')
                #最大值池化
                net=slim.max_pool2d(net,[3,3],stride=2,scope='pool1')
            #调用stack_blocks_dense堆叠残差学习元，每个有三个卷积层
            net=stack_blocks_dense(net,blocks)
            #先做batch norm然后使用relu激活
            net=slim.batch_norm(net,activation_fn=tf.nn.relu,scope='postnorm')
            if global_pool:     
                #进行全局平均池化
                net=tf.reduce_mean(net,[1,2],name='pool5',keep_dims=True)
            #一个输出为num_classes的卷积层，不进行激活也不归一正则化。
            if num_classes is not None:
                net=slim.conv2d(net,num_classes,[1,1],activation_fn=None,normalizer_fn=None,scope='logits')
            # 将collection转化为Python的dict
            end_points=slim.utils.convert_collection_to_dict(end_points_collection)

            #使用softmax进行分类         
            if num_classes is not None:
                end_points['predictions']=slim.softmax(net,scope='predictions')
            return net,end_points

定义不同深度的ResNet网络结构

50层深度的ResNet

# 50层深度的ResNet网络配置
def resnet_v2_50(inputs, num_classes = None,
                global_pool = True,
                reuse = None,
                scope = 'resnet_v2_50'):
    blocks = [
        Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
        Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
    return resnet_v2(inputs, blocks, num_classes, global_pool,
                    include_root_block = True, reuse = reuse,
                    scope = scope)

101层深度的ResNet

# 101层深度的ResNet网络配置
def resnet_v2_101(inputs, num_classes = None,
                global_pool = True,
                reuse = None,
                scope = 'resnet_v2_101'):
    blocks = [
        Block('block1', bottleneck, [(256, 64, 1)] * 2+ [(256, 64, 2)]),
        Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
        Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
    return resnet_v2(inputs, blocks, num_classes, global_pool,
                    include_root_block = True, reuse = reuse,
                    scope = scope)

152层深度的ResNet

# 152层深度的ResNet网络配置
def resnet_v2_152(inputs, num_classes = None,
                global_pool = True,
                reuse = None,
                scope = 'resnet_v2_152'):
    blocks = [
        Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
        Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
    return resnet_v2(inputs, blocks, num_classes, global_pool,
                    include_root_block = True, reuse = reuse,
                    scope = scope)

200层深度的ResNet

# 200层深度的ResNet网络配置
def resnet_v2_200(inputs, num_classes = None,
                global_pool = True,
                reuse = None,
                scope = 'resnet_v2_200'):
    blocks = [
        Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
        Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
        Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
    return resnet_v2(inputs, blocks, num_classes, global_pool,
                    include_root_block = True, reuse = reuse,
                    scope = scope)

耗时测试

定义测试耗时函数

def time_tensorflow_run(session, target, info_string):
    num_steps_burn_in = 10  # 打印阈值
    total_duration = 0.0    # 每一轮所需要的迭代时间
    total_duration_aquared = 0.0  # 每一轮所需要的迭代时间的平方
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target)
        duration = time.time() - start_time    # 计算耗时
        if i >= num_steps_burn_in:
            if not i % 10:
                print("%s : step %d, duration = %.3f" % (datetime.now(), i - num_steps_burn_in, duration))
            total_duration += duration
            total_duration_aquared += duration * duration
    mn = total_duration / num_batches   # 计算均值
    vr = total_duration_aquared / num_batches - mn * mn  # 计算方差
    sd = math.sqrt(vr) # 计算标准差
    print("%s : %s across %d steps, %.3f +/- %.3f sec/batch" % (datetime.now(), info_string, num_batches, mn, sd))

耗时测试

batch_size = 32
height, width = 224, 224
inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(resnet_arg_scope(is_training = False)):
    net, end_points = resnet_v2_152(inputs, 1000)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    num_batches = 100
    time_tensorflow_run(sess, net, "Forward")

测试结果

2019-01-26 08:10:51.879413 : step 0, duration = 0.486
2019-01-26 08:10:56.748640 : step 10, duration = 0.487
2019-01-26 08:11:01.628659 : step 20, duration = 0.489
2019-01-26 08:11:06.511324 : step 30, duration = 0.489
2019-01-26 08:11:11.410210 : step 40, duration = 0.490
2019-01-26 08:11:16.311633 : step 50, duration = 0.491
2019-01-26 08:11:21.219118 : step 60, duration = 0.493
2019-01-26 08:11:26.133231 : step 70, duration = 0.492
2019-01-26 08:11:31.054586 : step 80, duration = 0.493
2019-01-26 08:11:35.984226 : step 90, duration = 0.494
2019-01-26 08:11:40.435636 : Forward across 100 steps, 0.490 +/- 0.002 sec/batch