理解LSTM:
http://colah.github.io/posts/2015-08-Understanding-LSTMs/
使用LSTM识别手写数字,准确率98.4%。
# -*- coding:utf-8 -*- import tensorflow as tf from tensorflow.contrib import rnn from tensorflow.examples.tutorials.mnist import input_data # 首先导入数据,看一下数据的形式 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) print("load mnist finished!") LEARNING_RATE = 1e-3 INPUT_SIZE = 28 # 100 TIMESTEP_SIZE = 28 HIDDEN_SIZE = 256 LAYER_NUMBER = 2 CLASS_NUMBER = 10 ITER_NUMBER = 2000 batch_size = tf.placeholder(tf.int32) keep_prob = tf.placeholder(tf.float32) xs = tf.placeholder(tf.float32, [None, 784]) image_xs = tf.reshape(xs, [-1, TIMESTEP_SIZE, INPUT_SIZE]) # -1 表示该维度由后面的维度自动计算得到 ys = tf.placeholder(tf.float32, [None, CLASS_NUMBER]) def get_drop(): # **步骤2:定义一层 LSTM_cell,只需要说明 hidden_size, 它会自动匹配输入的 X 的维度 lstm_cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=1.0, state_is_tuple=True) # **步骤3:添加 dropout layer, 一般只设置 output_keep_prob drop = rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob) return drop # **步骤4:调用 MultiRNNCell 来实现多层 LSTM multi_lstm_cell = rnn.MultiRNNCell([get_drop() for _ in range(LAYER_NUMBER)], state_is_tuple=True) # **步骤5:用全零来初始化state state = multi_lstm_cell.zero_state(batch_size, dtype=tf.float32) # ** 当 time_major==False 时, outputs.shape = [batch_size, timestep_size, hidden_size] # ** 所以,可以取 h_state = outputs[:, -1, :] 作为最后输出 # ** state.shape = [layer_num, 2, batch_size, hidden_size], # ** 或者,可以取 h_state = state[-1][1] 作为最后输出 # ** 最后输出维度是 [batch_size, hidden_size] # outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=X, initial_state=init_state, time_major=False) # h_state = outputs[:, -1, :] # 或者 h_state = state[-1][1] # *************** 为了更好的理解 LSTM 工作原理,我们把上面 步骤6 中的函数自己来实现 *************** # 通过查看文档你会发现, RNNCell 都提供了一个 __call__()函数(见最后附),我们可以用它来展开实现LSTM按时间步迭代。 # **步骤6:方法二,按时间步展开计算 outputs = list() with tf.variable_scope('RNN'): for timestep in range(TIMESTEP_SIZE): if timestep > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = multi_lstm_cell(image_xs[:, timestep, :], state) outputs.append(cell_output) h_state = outputs[-1] # 上面 LSTM 部分的输出会是一个 [hidden_size] 的tensor,我们要分类的话,还需要接一个 softmax 层 # 首先定义 softmax 的连接权重矩阵和偏置 # out_W = tf.placeholder(tf.float32, [hidden_size, class_num], name='out_Weights') # out_bias = tf.placeholder(tf.float32, [class_num], name='out_bias') # 开始训练和测试 weightes = tf.Variable(tf.truncated_normal([HIDDEN_SIZE, CLASS_NUMBER], stddev=0.1), dtype=tf.float32) biases = tf.Variable(tf.constant(0.1, shape=[CLASS_NUMBER]), dtype=tf.float32) predict_ys = tf.nn.softmax(tf.matmul(h_state, weightes) + biases) # 损失和评估函数 cross_entropy = -tf.reduce_mean(ys * tf.log(predict_ys)) train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(predict_ys,1), tf.argmax(ys,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) init_op = tf.global_variables_initializer() # 设置 GPU 按需增长 config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init_op) [test_xs, test_ys] = [mnist.test.images, mnist.test.labels] for i in range(ITER_NUMBER): [train_xs, train_ys] = mnist.train.next_batch(128) sess.run(train_op, feed_dict={xs: train_xs, ys: train_ys, keep_prob: 0.5, batch_size:128}) if (i + 1) % 50 == 0: train_accuracy = sess.run(accuracy, feed_dict={ xs: train_xs, ys: train_ys, keep_prob: 1.0, batch_size:128}) test_accuracy = sess.run(accuracy, feed_dict={ xs: test_xs, ys: test_ys, keep_prob: 1.0, batch_size: mnist.test.images.shape[0]}) # mnist.train.epochs_completed 表示已经迭代完成epoch数 print("Iter%d, step %d, train-test %g-%g" % (mnist.train.epochs_completed, (i + 1), train_accuracy, test_accuracy))
输出:
Iter0, step 50, train-test 0.828125-0.7162 Iter0, step 100, train-test 0.859375-0.8559 Iter0, step 150, train-test 0.90625-0.8824 Iter0, step 200, train-test 0.953125-0.924 Iter0, step 250, train-test 0.976562-0.9277 Iter0, step 300, train-test 0.945312-0.9409 Iter0, step 350, train-test 0.96875-0.945 Iter0, step 400, train-test 0.96875-0.9606 Iter1, step 450, train-test 0.96875-0.9478 Iter1, step 500, train-test 0.984375-0.9668 Iter1, step 550, train-test 0.984375-0.9595 Iter1, step 600, train-test 0.960938-0.969 Iter1, step 650, train-test 0.984375-0.9665 Iter1, step 700, train-test 1-0.9682 Iter1, step 750, train-test 0.976562-0.9642 Iter1, step 800, train-test 0.984375-0.9736 Iter1, step 850, train-test 0.96875-0.9679 Iter2, step 900, train-test 0.945312-0.9705 Iter2, step 950, train-test 0.992188-0.9744 Iter2, step 1000, train-test 0.96875-0.9776 Iter2, step 1050, train-test 0.976562-0.9772 Iter2, step 1100, train-test 0.992188-0.9804 Iter2, step 1150, train-test 0.992188-0.9766 Iter2, step 1200, train-test 1-0.9786 Iter2, step 1250, train-test 0.960938-0.9789 Iter3, step 1300, train-test 0.96875-0.9831 Iter3, step 1350, train-test 0.992188-0.9754 Iter3, step 1400, train-test 0.984375-0.9813 Iter3, step 1450, train-test 0.984375-0.9806 Iter3, step 1500, train-test 1-0.9824 Iter3, step 1550, train-test 1-0.9794 Iter3, step 1600, train-test 0.984375-0.9819 Iter3, step 1650, train-test 1-0.9843 Iter3, step 1700, train-test 0.992188-0.9805 Iter4, step 1750, train-test 0.96875-0.9819 Iter4, step 1800, train-test 1-0.9792 Iter4, step 1850, train-test 1-0.9805 Iter4, step 1900, train-test 0.984375-0.9831 Iter4, step 1950, train-test 1-0.9805 Iter4, step 2000, train-test 0.992188-0.9844
参考:
http://blog.csdn.net/jerr__y/article/details/61195257
http://blog.csdn.net/u014595019/article/details/52759104
LSTM文本多分类:http://blog.csdn.net/u010223750/article/details/53334313?locationNum=7&fps=1