TensorFlow学习笔记11:ResNet
ResNet
ResNet是由Kaiming He等4名华人提出,通过使用Residual Unit成功训练了152层的深度神经网络,在ILSVRC 2015比赛中获得冠军,取得了3.57%的top-5错误率,同时参数却比VGGNet少。ResNet的结构可以极快的加速超深神经网络的训练,模型的准确率也有较大的提升。之后很多方法都建立在ResNet的基础上完成的,例如检测,分割,识别等领域都纷纷使用ResNet。在ResNet推出不久,Google就借鉴了ResNet的精髓,提出了Inception V4和Inception-ResNet-V2,并通过融合这两个模型,在ILSVRC数据集上取得了惊人的3.08%的错误率。所以可见ResNet确实很好用。
网络结构
TensorFlow实现
导入包并设计Block模块组
1 | import collections |
我们使用collections.namedtuple设计ResNet基本Block模块组的named tuple1
2
3
4
5
6
7
8'''
scope为生成的Block的名称
unit_fn为残差学习元生成函数
args是一个长度等于Block中单元数目的序列,序列中每个元素
包含第三层通道数,前两层通道数以及中间层步长(depth,depth_bottleneck,stride)
'''
class Block(collections.namedtuple('Block',['scope','unit_fn','args'])):
'A named tuple describing a ResNet block'
以下面Block(‘block1’, bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)])为例
- block1: 是这个Block的名称
- bottleneck: 前面定义的残差学习单元(有三层)
- [(256, 64, 1)] * 2 + [(256, 64, 2)]: 是一个列表,其中每个元素都对应一个bottleneck残差学习单元,前面两个元素都是(256, 64, 1),最后一个是(256, 64, 2)。每个元素都时一个3元组,即(depth, depth_bottleneck, stride),代表构建的bottleneck残差学习单元中,第三层的输出通道为256(depth),前两层的输出通道数为64(depth_bottleneck)且中间那层的步长stride为1(stride)
定义部分方法
定义一个降采样subsample的方法
1
2
3
4
5
6
7
8
9
10
11'''
降采样函数
input为输入
factor为采样因子
使用slim.max_pool2d来实现
'''
def subsample(inputs,factor,scope=None):
if factor==1:
return inputs
else:
return slim.max_pool2d(inputs,[1,1],stride=factor,scope=scope)定义一个conv2d_same函数创建卷积层
1
2
3
4
5
6
7
8
9
10
11
12def conv2d_same(inputs,num_outputs,kernel_size,stride,scope=None):
if stride==1:
return slim.conv2d(inputs,num_outputs,kernel_size,stride=1,padding='SAME',scope=scope)
else:
# 显式地pad zero
pad_total=kernel_size - 1
pad_beg=pad_total // 2
pad_end=pad_total-pad_beg
#使用tf.pad对图像进行填充
inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
padding='VALID', scope=scope)定义堆叠Blocks的函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23'''
net为输入
blocks为定义的Block的class的列表
outputs_collections是用来收集各个end_points的collections
'''
def stack_blocks_dense(net,blocks,outputs_collections=None):
for block in blocks:
with tf.variable_scope(block.scope,'block',[net])as sc:
# 拿到每个残差学习单元的args
for i,unit in enumerate(block.args):
with tf.variable_scope('unit_%d'%(i+1),values=[net]):
#获取每个Block中的参数,包括第三层通道数,前两层通道数以及中间层步长
unit_depth,unit_depth_bottleneck,unit_stride=unit
#unit_fn是Block类的残差神经元生成函数,它按顺序创建残差学习元并进行连接
net=block.unit_fn(net,
depth=unit_depth,
depth_bottleneck=unit_depth_bottleneck,
stride=unit_stride)
# 将输出的net添加到collection中
net=slim.utils.collect_named_outputs(outputs_collections,sc.name,net)
return net创建ResNet通用的arg_scope
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24def resnet_arg_scope(is_training=True,
weight_decay=0.0001,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
'fused': None, # Use fused batch norm if possible.
}
# 设置slim.conv2d函数的默认参数
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc定义核心的bottleneck残差学习单元
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27# 利用add_arg_scope使bottleneck函数能够直接使用slim.arg_scope设置默认参数
def bottleneck(inputs,depth,depth_bottleneck,stride,
outputs_collections=None,scope=None):
with tf.variable_scope(scope,'bottleneck_v2',[inputs])as sc:
# 获取输入的最后一个维度,即输入的通道数目
# 参数min_rank=4可以限定最少为4个维度
depth_in=slim.utils.last_dimension(inputs.get_shape(),min_rank=4)
# 先对输入进行Batch Normalization,再进行非线性激活
preact=slim.batch_norm(inputs,activation_fn=tf.nn.relu,scope='preact')
# 如果残差神经元的输出通道数目和输入的通道数目相同,那么直接对图像进行降采样,以保证shortcut尺寸和经历三个卷积层后的输出的此存相同
if depth==depth_in:
shortcut=subsample(inputs,stride,'shortcut')
# 如果残差神经元的输出通道数目和输入的通道数目不同,利用尺寸为1x1的卷积核对输入进行卷积,使输入通道数相同;
else:
shortcut=slim.conv2d(preact,depth,[1,1],stride=stride,normalizer_fn=None,
activation_fn=None,scope='shortcut')
# 然后,定义残差,即三个卷积层
residual=slim.conv2d(preact,depth_bottleneck,[1,1],stride=1,scope='conv1')
residual=conv2d_same(residual,depth_bottleneck,3,stride,scope='conv2')
residual=slim.conv2d(residual,depth,[1,1],stride=1,normalizer_fn=None,activation_fn=None,scope='conv3')
# 将shortcut和residual相加,作为输出
output=shortcut+residual
return slim.utils.collect_named_outputs(outputs_collections,sc.name,output)
定义生成ResNet V2的主函数
1 | ''' |
定义不同深度的ResNet网络结构
50层深度的ResNet
1
2
3
4
5
6
7
8
9
10
11
12
13# 50层深度的ResNet网络配置
def resnet_v2_50(inputs, num_classes = None,
global_pool = True,
reuse = None,
scope = 'resnet_v2_50'):
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block = True, reuse = reuse,
scope = scope)101层深度的ResNet
1
2
3
4
5
6
7
8
9
10
11
12
13# 101层深度的ResNet网络配置
def resnet_v2_101(inputs, num_classes = None,
global_pool = True,
reuse = None,
scope = 'resnet_v2_101'):
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2+ [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block = True, reuse = reuse,
scope = scope)152层深度的ResNet
1
2
3
4
5
6
7
8
9
10
11
12
13# 152层深度的ResNet网络配置
def resnet_v2_152(inputs, num_classes = None,
global_pool = True,
reuse = None,
scope = 'resnet_v2_152'):
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block = True, reuse = reuse,
scope = scope)200层深度的ResNet
1
2
3
4
5
6
7
8
9
10
11
12
13# 200层深度的ResNet网络配置
def resnet_v2_200(inputs, num_classes = None,
global_pool = True,
reuse = None,
scope = 'resnet_v2_200'):
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block = True, reuse = reuse,
scope = scope)
耗时测试
定义测试耗时函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10 # 打印阈值
total_duration = 0.0 # 每一轮所需要的迭代时间
total_duration_aquared = 0.0 # 每一轮所需要的迭代时间的平方
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target)
duration = time.time() - start_time # 计算耗时
if i >= num_steps_burn_in:
if not i % 10:
print("%s : step %d, duration = %.3f" % (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_aquared += duration * duration
mn = total_duration / num_batches # 计算均值
vr = total_duration_aquared / num_batches - mn * mn # 计算方差
sd = math.sqrt(vr) # 计算标准差
print("%s : %s across %d steps, %.3f +/- %.3f sec/batch" % (datetime.now(), info_string, num_batches, mn, sd))耗时测试
1
2
3
4
5
6
7
8
9
10
11batch_size = 32
height, width = 224, 224
inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(resnet_arg_scope(is_training = False)):
net, end_points = resnet_v2_152(inputs, 1000)
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
num_batches = 100
time_tensorflow_run(sess, net, "Forward")测试结果
1
2
3
4
5
6
7
8
9
10
112019-01-26 08:10:51.879413 : step 0, duration = 0.486
2019-01-26 08:10:56.748640 : step 10, duration = 0.487
2019-01-26 08:11:01.628659 : step 20, duration = 0.489
2019-01-26 08:11:06.511324 : step 30, duration = 0.489
2019-01-26 08:11:11.410210 : step 40, duration = 0.490
2019-01-26 08:11:16.311633 : step 50, duration = 0.491
2019-01-26 08:11:21.219118 : step 60, duration = 0.493
2019-01-26 08:11:26.133231 : step 70, duration = 0.492
2019-01-26 08:11:31.054586 : step 80, duration = 0.493
2019-01-26 08:11:35.984226 : step 90, duration = 0.494
2019-01-26 08:11:40.435636 : Forward across 100 steps, 0.490 +/- 0.002 sec/batch