TensorFlow学习笔记10:Inception V3
Inception V3结构
值得借鉴的设计CNN的思想和Trick:
- Factorization into small convolutions很有效,可以降低参数量,减轻过拟合,增加网络非线性的表达能力。
- 卷积网络从输入到输出,应该让图片尺寸逐渐减少,输出通道逐渐增加,即让空间结构简化,将空间信息转化为高阶抽象的特征信息。
- Inception Module用多个分支提取不同抽象程度的高阶特征的思路很有效,可以丰富网络的表达能力。
TensorFlow实现
定义函数 inception_v3_arg_scope
函数 inception_v3_arg_scope 用来生成网络中经常用到的函数的默认参数,比如卷记的激活函数,权重初始化方式,标准化器等等。接下来嵌套一个slim.arg_scope
,对卷积层生成函数slim.conv2d
的几个人参数赋予默认值。最后返回定义好的scope。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45import tensorflow as tf
import tensorflow.contrib.slim as slim
#定义简单的函数产生截断的正态分布
trunc_normal = lambda stddev:tf.truncated_normal_initializer(0.0,stddev)
'''
定义函数 inception_v3_arg_scope 用来生成网络中经常用到的函数的默认参数
L2正则的Weight_decay默认值为0.0004
标准差stddev默认值为0.1
参数batch_norm_var_collection默认值为moving_vars
batch normalization参数字典:
衰减系数decay为0.9997
epsilon为0.001
updates_collections为tf.GraphKeys.UPDATE_OPS
字典variables_collections:
beta和gamma设置为None
moving_mean和moving_variance设置为batch_norm_var_collection
'''
def inception_v3_arg_scope(weight_decay=0.00004,stddev=0.1,
batch_norm_var_collection="moving_vars"):
batch_norm_params = {
"decay":0.9997,"epsilon":0.001,"updates_collections":tf.GraphKeys.UPDATE_OPS,
"variables_collections":{
"beta":None,"gamma":None,"moving_mean":[batch_norm_var_collection],
"moving_variance":[batch_norm_var_collection]
}
}
'''
嵌套slim.arg_scope
权重初始化器weight_initializer设置为trunc_normal(stddev)
激活函数设置为ReLU
标准化器设置为slim.batch_norm
标准化器的参数设置为batch_norm_patams
'''
with slim.arg_scope([slim.conv2d,slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
#对卷积层生成函数的几个参数赋予默认值
with slim.arg_scope([slim.conv2d],
weights_regularizer = tf.truncated_normal_initializer(stddev=stddev),
activation_fc = tf.nn.relu,
normalizer_fc = slim.batch_norm,
normalizer_params = batch_norm_params) as scope:
return scope
定义Inception V3的卷积部分
它可以生成Inception V3网络的卷积部分。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19'''
定义Inception V3的卷积部分
inputs为输入图片数据的tensor
scope为包含了函数默认参数的环境
用字典表end_points来保存某些关键节点供之后使用
'''
def inception_v3_base(inputs,scope=None):
end_points = {}
with tf.variable_scope(scope,"InceptionV3",[inputs]):
#对slim.conv2d,slim.max_pool2d,slim.avg_pool2d三个函数的参数设置默认值
with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],stride = 1,padding = "VALID"):
#各参数分别为输入的tensor,输出的通道,卷积核尺寸,步长stride,padding模式
net = slim.conv2d(inputs,num_outputs=32,kernel_size=[3,3],stride=2,scope="Conv2d_1a_3x3")
net = slim.conv2d(net,num_outputs=32,kernel_size=[3,3],scope="Conv2d_2a_3x3")
net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],padding="SAME",scope="Conv2d_2b_3x3")
net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_3a_3x3")
net = slim.conv2d(net,num_outputs=80,kernel_size=[1,1],scope="Conv2d_3b_1x1")
net = slim.conv2d(net,num_outputs=192,kernel_size=[3,3],scope="Conv2d_4a_3x3")
net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_5a_3x3")
定义第一个Inception模块
第一个Inception Module
第一个Inception Module名称为Mixed_5b。这个Inception Module中有4个分支:- 第一个分支有64输出通道的1×1卷积
- 第二个分支有48输出通道的1×1卷积,连接有64输出通道的5×5卷积
- 第三个分支有64输出通道的1×1卷积,再连续连接两个有96通道的3×3卷积
第四个分支有为3×3的平均池化,连接有32输出通道的1×1卷积
最后使用
tf.concat
将四个分支的输出合并在一起,生成这个Inception Module的最终输出。4个分支的输出通道数之和为64+64+96+32=256,即最终输出的图片尺寸为35×35×256。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#定义第一个Inception模块组
with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],
stride = 1,padding = "SAME"):
with tf.variable_scope("Mixed_5b"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net,num_outputs=48,kernel_size=[1,1],scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1,num_outputs=64,kernel_size=[5,5],scope="Conv2d_0b_5x5")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0b_3x3")
batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0c_3x3")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net,kernel_size=[3,3],scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3,num_outputs=32,kernel_size=[1,1],scope="Conv2d_0b_1x1")
net = tf.concat([batch_0,batch_1,batch_2,batch_3],3)
第二个Inception Module
第二个Inception Module的名称为Mixed_5c。它同样也有四个分支,唯一不同的是第四个分支最后接的是64输出通道的1×1卷积。因此我们输出的tensor的最终尺寸为35×5×288。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16# 第二个Inception模块
with tf.variable_scope("Mixed_5c"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)第三个Inception Module
第三个Inception Module的名称为Mixed_5d,和上一个Inception Module完全相同。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16# 第三个Inception模块
with tf.variable_scope("Mixed_5d"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)
定义第二个Inception模块组
第二个Inception模块组是一个非常大的模块组,包含了5个Inception Module。其中第二个到第五个Inception Module的结构非常相似。
第一个Inception Module
第一个Inception Module的名称为Mixed_6a,包含三个分支:- 第一个分支为384通道的3×3卷积,步长为2
- 第二个分支有三层,分别为64输出通道的1×1卷积,和两个96输出通道的3×3卷积,最后一层的步长为2。
- 第三个分支为3×3的池化层,步长为2
最后用tf.concat
将三个分支在输出通道上合并,最后的输出尺寸为17×17×(384+96+256)=17×17×7681
2
3
4
5
6
7
8
9
10
11
12
13
14
15# 定义第二个Inception模块组,第一个Inception模块
with tf.variable_scope("Mixed_6a"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=384, kernel_size=[3,3],
stride=2, padding="VALID",scope="Conv2d_1a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3],
stride=2, padding="VALID",scope="Conv2d_1a_1x1")
with tf.variable_scope("Branch_2"):
batch_2 = slim.max_pool2d(net,kernel_size=[3,3],stride=2,padding="VALID",
scope="MaxPool_1a_3x3")
net = tf.concat([batch_0, batch_1, batch_2], 3)
第二个Inception Module
名称为Mixed_6b,它有四个分支:- 第一个分支为193输出通道的1×1卷积
- 第二个分支有三个卷积层,分别为128输出通道的1×1卷积,128输出通道的1×7卷积,以及192输出通道的7×1卷积,这里用到了Factorization into small convolutions思想,串联的1×7卷积和7×1卷积相当于合成一个7×7卷积。大大减少了参数,减轻了过拟合,同事多了一个激活函数增加了非线性特征变换。
- 第三个分支有五个卷积层,分别为128输出通道的1×1卷积,128输出通道的7×1卷积,128输出通道的1×1卷积,128输出通道的1×7卷积,128输出通道的7×1卷积和192输出通道的1×7卷积
- 第四个分支为3×3的平均池化层,再连接192输出通道的1×1卷积。
最后四个分支合并,输出的tensor尺寸为17×17×(192+192+192+192)=17×17×7681
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19# 定义第二个Inception模块组,第一个Inception模块
with tf.variable_scope("Mixed_6b"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=128, kernel_size=[1,7], scope="Conv2d_0b_1x7")
batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1],scope="Conv2d_0c_7x1")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)
第三个Inception Module
名称为Mixed_6c,和前一个Inception Module非常相似,唯一不同的地方就是第二和第三分支中前几个卷积层的输出通道从128变成了160。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19# 定义第二个Inception模块组,第三个Inception模块
with tf.variable_scope("Mixed_6c"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1,7], scope="Conv2d_0b_1x7")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1],scope="Conv2d_0c_7x1")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)第四个Inception Module
名称为Mixed_d,和Mixed_6c完全一致。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19# 定义第二个Inception模块组,第四个Inception模块
with tf.variable_scope("Mixed_6d"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1,7], scope="Conv2d_0b_1x7")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1],scope="Conv2d_0c_7x1")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)第五个Inception Module
名称为Mixed_6e,和前两个Inception Module也完全一致。这是第二个Inception模块组的最后一个Inception Module。我们将Mixed_6e存储于end_points中,作为Auxiliary Classifier辅助模型的分类。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21# 定义第二个Inception模块组,第五个Inception模块
with tf.variable_scope("Mixed_6e"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1,7], scope="Conv2d_0b_1x7")
batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1],scope="Conv2d_0c_7x1")
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)
#第二个模块组的最后一个Inception模块,将Mixed_6e存储于end_points中
end_points["Mixed_6e"] = net
定义第三个Inception模块组
第三个Inception模块组包含了3个Inception Module。其中后两个Inception Module的结构非常相似。
第一个Inception Module
名称为Mixed_7a,包含了三个分支:- 第一个分支为192输出通道的1×1卷积,再接320输出通道的3×3卷积,步长为2
- 第二个分支有四个卷积层,分别为192输出通道的1×1卷积,192输出通道的1×7卷积,192输出通道的7×1卷积和192输出通道的3×3卷积。最后一个卷积层步长为2,padding为VALID
- 第三个分支为一个3×3最大池化层,步长为2,padding为VALID。
最后合并得到的tensor尺寸为为3×3×(320+192+768)=8×8×1280。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17# 定义第三个Inception模块组,第一个Inception模块
with tf.variable_scope("Mixed_7a"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[3, 3],stride=2,
padding="VALID",scope="Conv2d_1a_3x3")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[1,7], scope="Conv2d_0b_1x7")
batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1],scope="Conv2d_0c_7x1")
batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[3, 3], stride=2,
padding="VALID",scope="Conv2d_1a_3x3")
with tf.variable_scope("Branch_2"):
batch_2 = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding="VALID",
scope="MaxPool_1a_3x3")
net = tf.concat([batch_0, batch_1, batch_2], 3)
第二个Inception Module
名称为Mixed_7b,它有四个分支:- 第一个分支为320输出通道的1×1卷积
- 第二个分支先是一个384输出通道的1×1卷积,随后在分支内开两个分支,分别为384输出通道的1×3卷积和384输出通道的3×1卷积,然后用
tf.concat
合并,得到的tensor的尺寸为8×8×(384+384)=8×8×768 - 第三个分支先是48输出通道的1×1卷积,然后是384输出通道的3×3卷积,然后同样在分支内拆成两个分支,分别为384输出通道的1×3卷积和384输出通道的3×1卷积
- 第四个分支为一个3×3的平均池化层后接一个192输出通道的1×1卷积。
最后合并得到的tensor的尺寸为8×8×(320+768+768+192)=8×8×20481
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20# 定义第三个Inception模块组,第二个Inception模块
with tf.variable_scope("Mixed_7b"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = tf.concat([
slim.conv2d(batch_1,num_outputs=384,kernel_size=[1,3],scope="Conv2d_0b_1x3"),
slim.conv2d(batch_1,num_outputs=384,kernel_size=[3,1],scope="Conv2d_0b_3x1")],axis=3)
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net,num_outputs=448,kernel_size=[1,1],scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2,num_outputs=384,kernel_size=[3,3],scope="Conv2d_0b_3x3")
batch_2 = tf.concat([
slim.conv2d(batch_2,num_outputs=384,kernel_size=[1,3],scope="Conv2d_0c_1x3"),
slim.conv2d(batch_2,num_outputs=384,kernel_size=[3,1],scope="Conv2d_0d_3x1")],axis=3)
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net,kernel_size=[3,3],scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)
第三个Inception Module*
名称为Mixed_7c,它和前一个Inception Module完全一致。最后我们返回这个Inception Module的结果,作为inception_v3_base函数的最终输出1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 定义第三个Inception模块组,第三个Inception模块
with tf.variable_scope("Mixed_7c"):
with tf.variable_scope("Branch_0"):
batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1],scope="Conv2d_0a_1x1")
with tf.variable_scope("Branch_1"):
batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
batch_1 = tf.concat([
slim.conv2d(batch_1,num_outputs=384,kernel_size=[1,3],scope="Conv2d_0b_1x3"),
slim.conv2d(batch_1,num_outputs=384,kernel_size=[3,1],scope="Conv2d_0b_3x1")],axis=3)
with tf.variable_scope("Branch_2"):
batch_2 = slim.conv2d(net,num_outputs=448,kernel_size=[1,1],scope="Conv2d_0a_1x1")
batch_2 = slim.conv2d(batch_2,num_outputs=384,kernel_size=[3,3],scope="Conv2d_0b_3x3")
batch_2 = tf.concat([
slim.conv2d(batch_2,num_outputs=384,kernel_size=[1,3],scope="Conv2d_0c_1x3"),
slim.conv2d(batch_2,num_outputs=384,kernel_size=[3,1],scope="Conv2d_0d_3x1")],axis=3)
with tf.variable_scope("Branch_3"):
batch_3 = slim.avg_pool2d(net,kernel_size=[3,3],scope="AvgPool_0a_3x3")
batch_3 = slim.conv2d(batch_3,num_outputs=192,kernel_size=[1,1],scope="Conv2d_0b_1x1")
net = tf.concat([batch_0, batch_1, batch_2,batch_3], 3)
return net,end_points
实现Inception_v3函数
实现Inception V3网络的最后一部分——全局平均池化,Softmax和Auxiliary Logits。
函数Inception_v3的输入参数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16def inception_v3(inputs,num_classes=1000,is_training=True,droupot_keep_prob = 0.8,prediction_fn = slim.softmax,spatial_squeeze = True,reuse = None, scope="InceptionV3"):
"""
InceptionV3整个网络的构建
param :
inputs -- 输入tensor
num_classes -- 最后分类数目
is_training -- 是否是训练过程
droupot_keep_prob -- dropout保留节点比例
prediction_fn -- 最后分类函数,默认为softmax
patial_squeeze -- 是否对输出去除维度为1的维度
reuse -- 是否对网络和Variable重复使用
scope -- 函数默认参数环境
return:
logits -- 最后输出结果
end_points -- 包含辅助节点的重要节点字典表
"""Auxiliary Logits部分的逻辑
Auxiliary Logits作为辅助分类的节点,对分类结果预测有很大的帮助。通过end_points得到Mixed_6e后:- 连接一个5×5的平均池化,步长为3,padding设为VALID,这样输出的尺寸就从17×17×768变成5×5×768。
- 接着连接一个128输出通道的1×1卷积和一个768输出通道的5×5卷积,这里权重初始化方式重设为标准差为0.01的正态分布,padding设置为VALID,输出尺寸变为1×1×768
- 然后再接一个输出通道为num_classes的1×1卷积,不设激活函数和规范化函数权重初始方式重设为标准差为0.001的正态分布,这样输出就变成了1×1×1000
- 最后使用
tf.squeeze
函数消除输出tensor中前两个为1的维度。将辅助分类节点的输出aux_logits储存到字典表end_points中。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17aux_logits = end_points["Mixed_6e"]
with tf.variable_scope("AuxLogits"):
aux_logits = slim.avg_pool2d(aux_logits,kernel_size=[5,5],stride=3,
padding="VALID",scope="Avgpool_1a_5x5")
aux_logits = slim.conv2d(aux_logits,num_outputs=128,kernel_size=[1,1],scope="Conv2d_1b_1x1")
aux_logits = slim.conv2d(aux_logits,num_outputs=768,kernel_size=[5,5],
weights_initializer=trunc_normal(0.01),padding="VALID",
scope="Conv2d_2a_5x5")
aux_logits = slim.conv2d(aux_logits,num_outputs=num_classes,kernel_size=[1,1],
activation_fn=None,normalizer_fn=None,
weights_initializer=trunc_normal(0.001),scope="Conv2d_1b_1x1")
#消除tensor中前两个维度为1的维度
if spatial_squeeze:
aux_logits = tf.squeeze(aux_logits,axis=[1,2],name="SpatialSqueeze")
#将辅助节点分类的输出aux_logits存到end_points中
end_points["AuxLogits"] = aux_logits
分类预测部分逻辑
得到Mixed_7e即最后一个卷积层的输出后:- 接一个8×8全局平均池化,padding设置为VALID,tensor的尺寸就变成了1×1×2048
- 然后接一个Dropout层,节点保留率为dropout_keep_prob
- 接着连接一个输出通道为1000的1×1卷积,激活函数和规范化函数设为空
- 然后用
tf.squeeze
去除输出tensor中维数为1的维度 - 最后连接一个Softmax对结果进行分类预测,输出的结果存储到end_points中
1
2
3
4
5
6
7
8
9
10
11
12
13
14#正常分类预测
with tf.variable_scope("Logits"):
net = slim.avg_pool2d(net,kernel_size=[8,8],padding="VALID",
scope="Avgpool_1a_8x8")
net = slim.dropout(net,keep_prob=droupot_keep_prob,scope="Dropout_1b")
end_points["Logits"] = net
logits = slim.conv2d(net,num_outputs=num_classes,kernel_size=[1,1],activation_fn=None,
normalizer_fn=None,scope="Conv2d_1c_1x1")
if spatial_squeeze:
logits = tf.squeeze(logits,axis=[1,2],name="SpatialSqueeze")
end_points["Logits"] = logits
end_points["Predictions"] = prediction_fn(logits,scope="Predictions")
inception_v3函数实现代码汇总:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60def inception_v3(inputs,num_classes=1000,is_training=True,droupot_keep_prob = 0.8,
prediction_fn = slim.softmax,spatial_squeeze = True,reuse = None,scope="InceptionV3"):
"""
InceptionV3整个网络的构建
param :
inputs -- 输入tensor
num_classes -- 最后分类数目
is_training -- 是否是训练过程
droupot_keep_prob -- dropout保留节点比例
prediction_fn -- 最后分类函数,默认为softmax
patial_squeeze -- 是否对输出去除维度为1的维度
reuse -- 是否对网络和Variable重复使用
scope -- 函数默认参数环境
return:
logits -- 最后输出结果
end_points -- 包含辅助节点的重要节点字典表
"""
with tf.variable_scope(scope,"InceptionV3",[inputs,num_classes],
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm,slim.dropout],
is_training = is_training):
net,end_points = inception_v3_base(inputs,scope=scope) #前面定义的整个卷积网络部分
#辅助分类节点部分
with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],
stride = 1,padding = "SAME"):
#通过end_points取到Mixed_6e
aux_logits = end_points["Mixed_6e"]
with tf.variable_scope("AuxLogits"):
aux_logits = slim.avg_pool2d(aux_logits,kernel_size=[5,5],stride=3,
padding="VALID",scope="Avgpool_1a_5x5")
aux_logits = slim.conv2d(aux_logits,num_outputs=128,kernel_size=[1,1],scope="Conv2d_1b_1x1")
aux_logits = slim.conv2d(aux_logits,num_outputs=768,kernel_size=[5,5],
weights_initializer=trunc_normal(0.01),padding="VALID",
scope="Conv2d_2a_5x5")
aux_logits = slim.conv2d(aux_logits,num_outputs=num_classes,kernel_size=[1,1],
activation_fn=None,normalizer_fn=None,
weights_initializer=trunc_normal(0.001),scope="Conv2d_1b_1x1")
#消除tensor中前两个维度为1的维度
if spatial_squeeze:
aux_logits = tf.squeeze(aux_logits,axis=[1,2],name="SpatialSqueeze")
end_points["AuxLogits"] = aux_logits #将辅助节点分类的输出aux_logits存到end_points中
#正常分类预测
with tf.variable_scope("Logits"):
net = slim.avg_pool2d(net,kernel_size=[8,8],padding="VALID",
scope="Avgpool_1a_8x8")
net = slim.dropout(net,keep_prob=droupot_keep_prob,scope="Dropout_1b")
end_points["Logits"] = net
logits = slim.conv2d(net,num_outputs=num_classes,kernel_size=[1,1],activation_fn=None,
normalizer_fn=None,scope="Conv2d_1c_1x1")
if spatial_squeeze:
logits = tf.squeeze(logits,axis=[1,2],name="SpatialSqueeze")
end_points["Logits"] = logits
end_points["Predictions"] = prediction_fn(logits,scope="Predictions")
return logits,end_points