定义模型

TensorLayer提供两种模型定义模型,静态模型提供直观的代码风格来定义模型,而动态模型提供完全可控的前向传播。

静态模型

import tensorflow as tf
from tensorlayer.layers import Input, Dropout, Dense
from tensorlayer.models import Model

def get_model(inputs_shape):
    ni = Input(inputs_shape)
    nn = Dropout(keep=0.8)(ni)
    nn = Dense(n_units=800, act=tf.nn.relu, name="dense1")(nn)
    nn = Dropout(keep=0.8)(nn)
    nn = Dense(n_units=800, act=tf.nn.relu)(nn)
    nn = Dropout(keep=0.8)(nn)
    nn = Dense(n_units=10, act=tf.nn.relu)(nn)
    M = Model(inputs=ni, outputs=nn, name="mlp")
    return M

MLP = get_model([None, 784])
MLP.eval()
outputs = MLP(data)

动态模型

动态模型时,前一层的输出尺寸,需要手动输入到下一层中,以初始化下一层的参数。

class CustomModel(Model):

    def __init__(self):
        super(CustomModel, self).__init__()

        self.dropout1 = Dropout(keep=0.8)
        self.dense1 = Dense(n_units=800, act=tf.nn.relu, in_channels=784)
        self.dropout2 = Dropout(keep=0.8)#(self.dense1)
        self.dense2 = Dense(n_units=800, act=tf.nn.relu, in_channels=800)
        self.dropout3 = Dropout(keep=0.8)#(self.dense2)
        self.dense3 = Dense(n_units=10, act=tf.nn.relu, in_channels=800)

    def forward(self, x, foo=False):
        z = self.dropout1(x)
        z = self.dense1(z)
        z = self.dropout2(z)
        z = self.dense2(z)
        z = self.dropout3(z)
        out = self.dense3(z)
        if foo:
            out = tf.nn.relu(out)
        return out

MLP = CustomModel()
MLP.eval()
outputs = MLP(data, foo=True) # controls the forward here
outputs = MLP(data, foo=False)

切换训练/测试模式

# method 1: switch before forward
Model.train() # enable dropout, batch norm moving avg ...
output = Model(train_data)
... # training code here
Model.eval()  # disable dropout, batch norm moving avg ...
output = Model(test_data)
... # testing code here

# method 2: switch while forward
output = Model(train_data, is_train=True)
output = Model(test_data, is_train=False)

参数(层)复用

静态模型的层复用可以如下实现。

# create siamese network

def create_base_network(input_shape):
      '''Base network to be shared (eq. to feature extraction).
      '''
      input = Input(shape=input_shape)
      x = Flatten()(input)
      x = Dense(128, act=tf.nn.relu)(x)
      x = Dropout(0.9)(x)
      x = Dense(128, act=tf.nn.relu)(x)
      x = Dropout(0.9)(x)
      x = Dense(128, act=tf.nn.relu)(x)
      return Model(input, x)


def get_siamese_network(input_shape):
      """Create siamese network with shared base network as layer
      """
      base_layer = create_base_network(input_shape).as_layer() # convert model as layer

      ni_1 = Input(input_shape)
      ni_2 = Input(input_shape)
      nn_1 = base_layer(ni_1) # call base_layer twice
      nn_2 = base_layer(ni_2)
      return Model(inputs=[ni_1, ni_2], outputs=[nn_1, nn_2])

siamese_net = get_siamese_network([None, 784])

动态模型的层复用可在forward时简单地通过多次调用来实现。

class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.dense_shared = Dense(n_units=800, act=tf.nn.relu, in_channels=784)
        self.dense1 = Dense(n_units=10, act=tf.nn.relu, in_channels=800)
        self.dense2 = Dense(n_units=10, act=tf.nn.relu, in_channels=800)
        self.cat = Concat()

    def forward(self, x):
        x1 = self.dense_shared(x) # call dense_shared twice
        x2 = self.dense_shared(x)
        x1 = self.dense1(x1)
        x2 = self.dense2(x2)
        out = self.cat([x1, x2])
        return out

model = MyModel()

显示模型信息

print(MLP) # simply call print function

# Model(
#   (_inputlayer): Input(shape=[None, 784], name='_inputlayer')
#   (dropout): Dropout(keep=0.8, name='dropout')
#   (dense): Dense(n_units=800, relu, in_channels='784', name='dense')
#   (dropout_1): Dropout(keep=0.8, name='dropout_1')
#   (dense_1): Dense(n_units=800, relu, in_channels='800', name='dense_1')
#   (dropout_2): Dropout(keep=0.8, name='dropout_2')
#   (dense_2): Dense(n_units=10, relu, in_channels='800', name='dense_2')
# )

获取特定参数

我们可以通过层的名字或者参数的索引来获取特定参数。

# indexing
all_weights = MLP.weights
some_weights = MLP.weights[1:3]

# naming
some_weights = MLP.get_layer('dense1').weights

保存和恢复模型

我们提供两种方法保存和回复模型。

只保留参数

MLP.save_weights('./model_weights.h5') # by default, file will be in hdf5 format
MLP.load_weights('./model_weights.h5')

保留参数和网络结构

# When using Model.load(), there is no need to reimplement or declare the architecture of the model explicitly in code
MLP.save('./model.h5', save_weights=True)
MLP = Model.load('./model.h5', load_weights=True)

自定义层

全连接层的实现如下,以供参考。

z = f(x*W+b)

class Dense(Layer):
    def __init__(self, n_units, act=None, in_channels=None, name=None):
        super(Dense, self).__init__(name)

        self.n_units = n_units
        self.act = act
        self.in_channels = in_channels

        # for dynamic model, it needs the input shape to get the shape of W
        if self.in_channels is not None:
            self.build(self.in_channels)
            self._built = True

    def build(self, inputs_shape):
        if self.in_channels is None and len(inputs_shape) != 2:
            raise AssertionError("The input dimension must be rank 2, please reshape or flatten it")
        if self.in_channels:
            shape = [self.in_channels, self.n_units]
        else:
            self.in_channels = inputs_shape[1]
            shape = [inputs_shape[1], self.n_units]
        self.W = self._get_weights("weights", shape=tuple(shape))
        if self.b_init:
            self.b = self._get_weights("biases", shape=(self.n_units, ))

    @tf.function
    def forward(self, inputs):
        z = tf.matmul(inputs, self.W)
        if self.b_init:
            z = tf.add(z, self.b)
        if self.act:
            z = self.act(z)
        return z