数据集说明:MNIST是一个入门级的计算机视觉数据集,它包含各种手写数字图片,每一张图片有对应的标签标记为数字几。数据集分为两部分,每个MNIST数据单元包含手写数字的图片“xs”和一个对应的标签“ys”:

  • 60000行的训练数据集:图片为mnist.train.images、标签为mnist.train.labels.
  • 10000行的测试数据集:图片为mnist. test.images、标签为mnist. test.labels。

图片“xs”包含2828个像素点,用一个数字数组来表示这张图片,将数组展开成一个向量,长度是 2828=784(会丢弃图片的二维结构信息),数据集的图片就是在784维向量空间里面的点

mnist.train.images 为张量[60000, 784],mnist. test.images为张量[10000, 784] ,第一个维度用来索引图片,第二个维度用来索引每张图片中的像素点(向量中的每个元素都表示某张图片里的某个像素的强度值,值介于0和1之间)。

mnist.train.labels为张量[60000, 10] ,图片的标签是一个one-hot向量除了某一位的数字是1以外其余各维度数字都是0(表示0到9的数字)。
</br>
程序说明:通过采用多种神经网络算法,实现手写数字的识别。</br>

1.自实现的神经网络

算法理论请参照:神经网络算法</br>
Ipynb演示文件:Ipynb文件</br>
Python代码:Python代码</br>

1
2
3
4
5
import numpy as np

def sigmoid(z):
"""实现S型函数方法"""
return 1.0 / (1.0 + np.exp(-z))
1
2
3
def sigmoid_prime(z):
"""# 实现sigmoid_prime,计算σ函数的导数"""
return sigmoid(z) * (1 - sigmoid(z))
1
2
3
4
5
6
7
8
9
10
11
12
class QuadraticCost(object):
"""二次代价函数"""

@staticmethod
def fn(a, y):
"""返回代价函数的整体误差"""
return 0.5 * np.linalg.norm(a - y) ** 2

@staticmethod
def delta(z, a, y):
"""返回二次代价函数输出层的误差:z为带权输入,a为激活值(实际输出值),y为实际值"""
return (a - y) * sigmoid_prime(z)
1
2
3
4
5
6
7
8
9
10
11
12
class CrossEntropyCost(object):
"""交叉熵代价函数"""

@staticmethod
def fn(a, y):
"""回代价函数的整体误差,np.nan_to_num 确保将nan转换成0.0"""
return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

@staticmethod
def delta(z, a, y):
"""返回二次代价函数输出层的误差:z为带权输入,a为激活值(实际输出值),y为实际值"""
return a - y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class Network(object):
def __init__(self, sizes=list(), cost=CrossEntropyCost, eta=3.0, mini_batch_size=25, epochs=20, lmbda=0.02):
# sizes为长度为3的列表,表示神经网络的层数为3。
self.num_layers = len(sizes)

# 若创建⼀个在第⼀层有2个神经元,第⼆层有3个神经元,最后层有1个神经元Network对象为:sizes=[2,3,1]
self.sizes = sizes

# 神经网络代价函数
self.cost = cost

# 随机初始化偏置:np.random.randn ⽣成均值为 0,标准差为1 的⾼斯分布。
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]

# 随机初始化权重:⽣成均值为 0,标准差为1/sqrt(input) 的⾼斯分布;zip函数接受任意多个序列作为参数,返回一个tuple列表。
self.weights = [np.random.randn(y, x) / np.sqrt(x) for x, y in zip(sizes[:-1], sizes[1:])]

self.eta = eta # eta代表学习率。
self.mini_batch_size = mini_batch_size # mini_batch_size代表每次迭代的数据数量。
self.epochs = epochs # epochs表示迭代次数。
self.lmbda = lmbda # lmbda为L2正则化参数.

def fit(self, train_data, test_data=None):
self.sgd(train_data, test_data)

def predict(self, x_predictions):
predictions = [np.argmax(self.feed_forward(x.reshape(784, 1))) for x in x_predictions]
return predictions

def score(self, test_data):
"""评估算法:通过前向传播算法获取测试数据集的网络的输出值,
将输出值与测试数据集的标签进行比对,获取准确率"""
n_test = len(test_data)
test_results = [(np.argmax(self.feed_forward(x.reshape(784, 1))), np.argmax(y))
for (x, y) in test_data]

return sum(int(x == y) for (x, y) in test_results) / float(n_test)

def feed_forward(self, x):
"""前向传播算法:对每⼀层计算神经元的激活值:σ(wx + b)"""
for b, w in zip(self.biases, self.weights):
x = sigmoid(np.dot(w, x) + b)

return x

def sgd(self, train_data, test_data):
"""实现小批量随机梯度下降算法:train_data是训练数据集列表,每个元素为(x, y);test_data用来评估
每次小批量迭代后的准确率;"""
n = len(train_data)
for j in xrange(self.epochs):
# 随机地将训练数据打乱,然后将它切分成每个大小为mini_batch_size的⼩批量数据集。
import random
random.shuffle(train_data)
mini_batches = [train_data[k:(k + self.mini_batch_size)] for k in xrange(0, n,
self.mini_batch_size)]

# 每⼀个 mini_batch应⽤⼀次梯度下降算法,根据单次梯度下降的迭代更新⽹络的权重和偏置。
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, n)

if test_data:
print "Epoch %s/%s accuracy: %s" % (j, self.epochs, self.score(test_data))

def update_mini_batch(self, mini_batch, train_dataset_length):
"""对每个mini_batch应用一次梯度下降,更新网络权重和偏置"""
nabla_b = [np.zeros(b.shape) for b in self.biases] # zeros返回来一个给定形状和类型的,用0填充的数组.
nabla_w = [np.zeros(w.shape) for w in self.weights]

# 对mini_batch中的每对训练数据应用反向传播算法,获取每对训练数据在每层上的代价函数的梯度和。
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.back_prop(x.reshape(784, 1), y.reshape(10, 1))
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

# 应用小批量随机梯度下降算法,更新网络权重和偏置。
self.weights = [(1 - self.eta * self.lmbda / train_dataset_length) * w - (self.eta / len(mini_batch)) * nw
for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (self.eta / len(mini_batch)) * nb
for b, nb in zip(self.biases, nabla_b)]

def back_prop(self, x, y):
"""反向传播算法:逐层获取代价函数关于权重和偏置的偏导数。"""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]

# 前向传播:计算并保存每层神经元的的带权输入以及激活值。
activation = x
activations = [x] # list to store all the activations, layer by layer
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation) + b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)

# 反向传播:首先计算并保存最后输出层的误差(偏置偏导数)以及权重偏导数。
delta = self.cost.delta(zs[-1], activations[-1], y)
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())

# 反向传播:自倒数第二层开始,逐层计算并保存输出误差(偏置偏导数)以及权重偏导数。
for l in xrange(2, self.num_layers):
z = zs[-l]
delta = np.dot(self.weights[-l + 1].transpose(), delta) * sigmoid_prime(z)
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
return nabla_b, nabla_w

def load(self, filename='NeuralNetwork.pickle'):
with open(filename, 'rb') as f:
data = pickle.load(f)
self.weights = data["weights"]
self.biases = data["biaes"]
self.sizes = data["sizes"]
self.cost = data["cost"]
self.eta = data["eta"]
self.mini_batch_size = data["mini_batch_size"]
self.epochs = data["epochs"]
print "Load model successfully!"

def save(self, filename='NeuralNetwork.pickle'):
data = {
"weights": self.weights,
"biaes": self.biases,
"sizes": self.sizes,
"cost": self.cost,
"eta": self.eta,
"mini_batch_size": self.mini_batch_size,
"epochs": self.epochs,
}
import pickle
with open(filename, 'wb') as f:
pickle.dump(data, f)
print "Save mode successfully!"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
if __name__ == '__main__':
"""数据集说明:minist 训练图片数据集总有55000条,测试图片数据集共有10000条"""
# 读取mnist数据集
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
mnist = read_data_sets("MNIST_data/", one_hot=True)

# zip函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。
train_data_set = zip(mnist.train.images, mnist.train.labels)
test_data_set = zip(mnist.test.images, mnist.test.labels)

cls = Network([784, 400, 10])

cls.fit(train_data_set, test_data_set)

cls.save()

print "Accuracy: %s" % cls.score(test_data_set)

2.Tensorflow实现的简单Softmax网络

算法理论请参照:神经网络算法</br>
Ipynb演示文件:Ipynb文件</br>
Python代码:Python代码</br>

1
2
3
4
5
# 小批量随机梯度下降迭代次数
hm_epochs = 20

# 小批量随机梯度下降每次迭代的数据数量
batch_size = 25
1
2
3
# 读取mnist数据集
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
mnist = read_data_sets("MNIST_data/", one_hot=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import tensorflow as tf

# x占位符:张量[None, 784]表示第一维可以有任意多个输入,第二位表示长度为784的图片
x = tf.placeholder(tf.float32, [None, 784])

# 权重变量
W = tf.Variable(tf.zeros([784, 10]))

# 偏置变量
b = tf.Variable(tf.zeros([10]))

# 输出层运用softmax算法
y = tf.nn.softmax(tf.matmul(x, W) + b)

# 占位符:表示x的实际标签值
y_ = tf.placeholder("float", [None, 10])
1
2
3
4
5
6
7
8
9
10
11
# 定义操作:softmax的交叉熵代价函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))

# 定义操作:选择梯度下降算法优化器,学习率为0.01,代价函数为softmax的交叉熵代价函数
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cross_entropy)

# 定义操作:获取预测准确的值,得tf.argmax返回最大的那个数值所在的下标。
correct_predictions = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

# 定义操作:初始化所有变量
init = tf.global_variables_initializer()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 创建一个会话对象,启动图执行图中的所有操作。
with tf.Session as sess:
# 执行操作:初始化所有变量
sess.run(init)

for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / float(batch_size))):
# 获取大小为batch_size的小批量数据
batch_xs, batch_ys = mnist.train.next_batch(batch_size)

# 执行操作:执行优化和交叉熵
_, c = sess.run([optimizer, cross_entropy], feed_dict={x: batch_xs, y_: batch_ys})

# 累积误差
epoch_loss += c

print "Epoch %d/%d, loss: %s" % (epoch + 1, hm_epochs, epoch_loss)

# 执行操作:找出测试数据集中那些预测正确的标签。
correct_predictions = sess.run(correct_predictions, feed_dict={x: mnist.test.images, y_: mnist.test.labels})

# 确定正确预测项的比例,把布尔值转换成浮点数,然后取平均值。
accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'))

print "Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})

3.Tensorflow实现的DNN Softmax网络

算法理论请参照:神经网络算法</br>
Ipynb演示文件:Ipynb文件</br>
Python代码:Python代码</br>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import tensorflow as tf

def neural_network_mode(x_train, sizes=list((784, 400, 10))):
"""定义神经网络模型"""
num_layers = len(sizes)
activation = x_train
for layer in range(num_layers - 1):
layer = {'weights': tf.Variable(tf.random_normal([sizes[layer], sizes[layer + 1]])),
'biases': tf.Variable(tf.random_normal([sizes[layer + 1]]))}

# input_data * weights + biases; define activation function
activation = tf.add(tf.matmul(activation, layer['weights']), layer['biases'])
activation = tf.nn.sigmoid(activation)

return activation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def train_neural_network(x_train, y_train, sizes, batch_size=25, hm_epochs=20):
output = neural_network_mode(x_train, sizes)

# 定义操作:softmax的交叉熵代价函数。
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y_train))

# 定义操作:选择AdamOptimizer(A Method for Stochastic Optimization)。
optimizer = tf.train.AdamOptimizer().minimize(cost)

# 定义操作:初始化所有变量。
init = tf.global_variables_initializer()

# 创建一个会话对象,启动图执行图中的所有操作。
with tf.Session() as sess:
# 执行操作:初始化所有变量
sess.run(init)

# train neural network
for epoch in range(hm_epochs):
epoch_loss = 0

for _ in range(int(mnist.train.num_examples / float(batch_size))):
# 获取大小为batch_size的小批量数据
epoch_x, epoch_y = mnist.train.next_batch(batch_size)

# 执行操作:执行优化和交叉熵
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})

# 累积误差
epoch_loss += c

print "Epoch %d/%d, loss: %f" % (epoch + 1, hm_epochs, epoch_loss)

# 找出测试数据集中那些预测正确的标签。
correct_predictions = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))

# 确定正确预测项的比例,把布尔值转换成浮点数,然后取平均值。
accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'))

print "Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})
1
2
3
4
5
6
7
8
9
10
11
12
13
if __name__ == '__main__':
# 读取mnist数据集
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
mnist = read_data_sets("MNIST_data/", one_hot=True)

# 定义神经网络结构
layers = [784, 600, 10]

# height x width
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')

train_neural_network(x, y, layers)

4.Tensorflow实现的卷积神经网络

算法理论请参照:神经网络算法</br>
Ipynb演示文件:Ipynb文件</br>
Python代码:Python代码</br>

1
2
3
4
#卷积函数:使用1步长(stride size),使用左右补零填充边距操作,使得输入和输出的像素相同。
import tensorflow as tf
def conv2d(xs, w):
return tf.nn.conv2d(input=xs, filter=w, strides=[1, 1, 1, 1], padding='SAME')
1
2
3
#池化函数:采用最大值池化,ksize为池化窗口的大小2x2,strides为移动的步长
def max_pool_2x2(xs):
return tf.nn.max_pool(xs, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
1
2
3
4
#权重初始化函数:在初始化时应该加入少量的噪声来打破对称性以及避免0梯度。
def weight_variable(shape):
initial = tf.truncated_normal(shape=shape, stddev=0.1)
return tf.Variable(initial)
1
2
3
4
#偏置初始化函数:ReLU使用一个较小的正数来初始化偏置,以避免神经元输出恒为0
def bias_variable(shape):
initial = tf.constant(shape=shape, value=0.1)
return tf.Variable(initial)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# 卷积网络定义:网络共分为六层,输入层 28×28、第一卷积层5×5、第一池化层2×2、第二卷积层5×5、第二池化层2×2、全连接层1024、输出层10
def convolution_neural_network(xs, n_classes=10):
weights = {
'w_conv1': weight_variable([5, 5, 1, 32]), # 第一层卷积:使用5x5的卷积核,获得32个特征,输入的通道为1,输出通道为32。
'w_conv2': weight_variable([5, 5, 32, 64]), # 第二层卷积:使用5x5的卷积核,获得64个特征,输入的通道为32,输出通道为64。
'w_fc': weight_variable([7 * 7 * 64, 1024]), # 全连接层:输入图片的尺寸减小到7x7,将其输入至1024个神经元的全连接层。
'w_out': weight_variable([1024, n_classes])
}

bias = {
'b_conv1': bias_variable([32]),
'b_conv2': bias_variable([64]),
'b_fc': bias_variable([1024]),
'b_out': bias_variable([n_classes])
}

# 将原始输入图片变成一个思维向量,第一维表示任意个输入,第二第三维分别对应图片的宽和高,
# 最后一维代表图片输出通道数(颜色)。由于输入为灰度图所以输出通道数为1,若为rgb彩色图,则为3。
xs = tf.reshape(xs, shape=[-1, 28, 28, 1])

# 第一卷积层:卷积后加上偏置项,然后输入ReLU激活神经元。
conv1 = tf.nn.relu(conv2d(xs, weights['w_conv1']) + bias['b_conv1'])
pool1 = max_pool_2x2(conv1) # 第一池化层
conv2 = tf.nn.relu(conv2d(pool1, weights['w_conv2']) + bias['b_conv2']) # 第二卷积层
pool2 = max_pool_2x2(conv2) # 第二池化层

# 全连接层:将池化层输出的张量,转换成二维向量,第一维表示可以任意输入,第二维表示64个7x7的图片的一维向量。
fc_input = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc_output = tf.nn.relu(tf.matmul(fc_input, weights['w_fc']) + bias['b_fc'])
fc_output = tf.nn.dropout(fc_output, keep_prob) # dropout:为了减少过拟合,以keep_prob的概率,随机丢弃一些神经元输出。

# 输出层
output = tf.matmul(fc_output, weights['w_out']) + bias['b_out']

return output
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 训练卷积网络
def train_neural_network(xs, ys, n_classes=10, batch_size=128, hm_epochs=10, keep_rate=0.8):
output = convolution_neural_network(xs, n_classes)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=ys))
optimizer = tf.train.AdamOptimizer().minimize(cost)

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y, keep_prob: keep_rate})
epoch_loss += c

print 'Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss

correct = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

print 'Accuracy:', accuracy.eval({x: mnist.test.images, y: mnist.test.labels, keep_prob: keep_rate})
1
2
3
4
5
6
7
8
9
10
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets

if __name__ == '__main__':
mnist = read_data_sets("MNIST_data/", one_hot=True) # 读取mnist数据集

x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
keep_prob = tf.placeholder('float')

train_neural_network(x, y, n_classes=10, batch_size=128, hm_epochs=10, keep_rate=0.8)

5.Tflearn实现的卷积神经网络

算法理论请参照:神经网络算法</br>
Ipynb演示文件:Ipynb文件</br>
Python代码:Python代码</br>

1
2
3
4
5
6
import os
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
import tflearn.datasets.mnist as mnist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
if __name__ == '__main__':
x, y, test_x, test_y = mnist.load_data(one_hot=True) # 读取mnist数据集
x = x.reshape([-1, 28, 28, 1])
test_x = test_x.reshape([-1, 28, 28, 1])

input_data = input_data(shape=[None, 28, 28, 1], name='input')

# nb_filter表示输出通道数,filter_size表示卷积核大小5x5
conv1 = conv_2d(input_data, nb_filter=32, filter_size=5, activation='relu')

# kernel_size表示池化窗口大小2x2
pool1 = max_pool_2d(conv1, kernel_size=2)

conv2 = conv_2d(pool1, nb_filter=64, filter_size=5, activation='relu')

pool2 = max_pool_2d(conv2, kernel_size=2)

fc = fully_connected(pool2, n_units=1024, activation='relu')
fc = dropout(fc, 0.8)

output = fully_connected(fc, n_units=10, activation='softmax')

network = regression(output, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(network, tensorboard_dir='log')

if os.path.exists('tflearncnn.model.index') and os.path.exists('tflearncnn.model.meta'):
model.load('tflearncnn.model')
else:
model.fit({'input': x}, {'targets': y}, n_epoch=10,
validation_set=({'input': test_x}, {'targets': test_y}),
snapshot_step=500, batch_size=128, show_metric=True, run_id='mnist')
model.save('tflearncnn.model')

# 做出预测
print model.predict([test_x[1]])