1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
| # RNN
from __future__ import absolute_import, division, print_function
# 导入TensorFlow v2.
import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np
tf.compat.v1.enable_eager_execution() # 自己添加的
# MNIST 数据集参数
num_classes = 10 # 所有类别(数字 0-9)
num_features = 784 # 数据特征 (图像形状: 28*28)
# 训练参数
learning_rate = 0.001
training_steps = 1000
batch_size = 32
display_step = 100
# 网络参数
# MNIST的图像形状为28 * 28px,因此我们将为每个样本处理28个时间步长的28个序列。
num_input = 28 # 序列数
timesteps = 28 # 时间步长
num_units = 32 # LSTM层神经元数目
# 准备MNIST数据
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 转化为float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# 将图像展平为784个特征的一维向量(28*28)。
x_train, x_test = x_train.reshape([-1, 28, 28]), x_test.reshape([-1, num_features])
# 将图像值从[0,255]归一化到[0,1]
x_train, x_test = x_train / 255., x_test / 255.
# 使用tf.data API对数据进行随机排序和批处理
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
# 创建LSTM模型
class LSTM(Model):
# 创建TF模型
def __init__(self):
super(LSTM, self).__init__()
# RNN (LSTM) 隐含层
self.lstm_layer = layers.LSTM(units=num_units)
self.out = layers.Dense(num_classes)
# 前向传播
def call(self, x, is_training=False):
# LSTM层
x = self.lstm_layer(x)
# 输出层 (num_classes).
x = self.out(x)
if not is_training:
# tf 交叉熵接收没有经过softmax的概率输出,所以只有不是训练时才应用softmax
x = tf.nn.softmax(x)
return x
# 创建LSTM模型
lstm_net = LSTM()
# 交叉熵损失
# 注意,这将对概率输出应用'softmax'
def cross_entropy_loss(x, y):
# 将标签转换为int 64 作为tf交叉熵函数的输入
y = tf.cast(y, tf.int64)
# 对概率输出应用softmax并计算交叉熵
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
# 批中的平均损失
return tf.reduce_mean(loss)
# 准确率评估
def accuracy(y_pred, y_true):
# 预测类是预测向量(即argmax)分数最高的分量下标
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
# Adam 优化器
optimizer = tf.optimizers.Adam(learning_rate)
# 优化过程
def run_optimization(x, y):
# 将计算封装在GradientTape中以实现自动微分
with tf.GradientTape() as g:
# 前向传播
pred = lstm_net(x, is_training=True)
# 计算损失
loss = cross_entropy_loss(pred, y)
# 要更新的变量,即可训练变量
trainable_variables = lstm_net.trainable_variables
# 计算梯度
gradients = g.gradient(loss, trainable_variables)
# 按gradients更新 W 和 b
optimizer.apply_gradients(zip(gradients, trainable_variables))
# 针对给定步骤数进行训练
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
# 运行优化过程以更新W和b值
run_optimization(batch_x, batch_y)
if step % display_step == 0:
pred = lstm_net(batch_x, is_training=True)
loss = cross_entropy_loss(pred, batch_y)
acc = accuracy(pred, batch_y)
print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))
|