import numpy as np
import struct
import matplotlib.pyplot as plt
# 训练集文件
train_images_idx3_ubyte_file = ‘minist_data/train-images.idx3-ubyte‘
# 训练集标签文件
train_labels_idx1_ubyte_file = ‘minist_data/train-labels.idx1-ubyte‘
# 测试集文件
test_images_idx3_ubyte_file = ‘minist_data/t10k-images.idx3-ubyte‘
# 测试集标签文件
test_labels_idx1_ubyte_file = ‘minist_data/t10k-labels.idx1-ubyte‘
def decode_idx3_ubyte(idx3_ubyte_file):
"""
解析idx3文件的通用函数
:param idx3_ubyte_file: idx3文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, ‘rb‘).read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = ‘>iiii‘ #因为数据结构中前4行的数据类型都是32位整型,所以采用i格式,但我们需要读取前4行数据,所以需要4个i。我们后面会看到标签集中,只使用2个ii。
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
# print(‘魔数:%d, 图片数量: %d张, 图片大小: %d*%d‘ % (magic_number, num_images, num_rows, num_cols))
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header) #获得数据在缓存中的指针位置,从前面介绍的数据结构可以看出,读取了前4行之后,指针位置(即偏移位置offset)指向0016。
# print(offset)
fmt_image = ‘>‘ + str(image_size) + ‘B‘ #图像数据像素值的类型为unsigned char型,对应的format格式为B。这里还有加上图像大小784,是为了读取784个B格式数据,如果没有则只会读取一个值(即一副图像中的一个像素值)
# print(fmt_image,offset,struct.calcsize(fmt_image))
images = np.empty((num_images, num_rows, num_cols))
#plt.figure()
for i in range(num_images):
# if (i + 1) % 10000 == 0:
# print(‘已解析 %d‘ % (i + 1) + ‘张‘)
# print(offset)
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
#print(images[i])
offset += struct.calcsize(fmt_image)
# plt.imshow(images[i],‘gray‘)
# plt.pause(0.00001)
# plt.show()
#plt.show()
return images
def decode_idx1_ubyte(idx1_ubyte_file):
"""
解析idx1文件的通用函数
:param idx1_ubyte_file: idx1文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, ‘rb‘).read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = ‘>ii‘
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
# print(‘魔数:%d, 图片数量: %d张‘ % (magic_number, num_images))
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = ‘>B‘
labels = np.empty(num_images)
for i in range(num_images):
# if (i + 1) % 10000 == 0:
# # print (‘已解析 %d‘ % (i + 1) + ‘张‘)
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
"""
TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000803(2051) magic number
0004 32 bit integer 60000 number of images
0008 32 bit integer 28 number of rows
0012 32 bit integer 28 number of columns
0016 unsigned byte ?? pixel
0017 unsigned byte ?? pixel
........
xxxx unsigned byte ?? pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
:param idx_ubyte_file: idx文件路径
:return: n*row*col维np.array对象,n为图片数量
"""
return decode_idx3_ubyte(idx_ubyte_file)
def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
"""
TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000801(2049) magic number (MSB first)
0004 32 bit integer 60000 number of items
0008 unsigned byte ?? label
0009 unsigned byte ?? label
........
xxxx unsigned byte ?? label
The labels values are 0 to 9.
:param idx_ubyte_file: idx文件路径
:return: n*1维np.array对象,n为图片数量
"""
return decode_idx1_ubyte(idx_ubyte_file)
def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
"""
TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000803(2051) magic number
0004 32 bit integer 10000 number of images
0008 32 bit integer 28 number of rows
0012 32 bit integer 28 number of columns
0016 unsigned byte ?? pixel
0017 unsigned byte ?? pixel
........
xxxx unsigned byte ?? pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
:param idx_ubyte_file: idx文件路径
:return: n*row*col维np.array对象,n为图片数量
"""
return decode_idx3_ubyte(idx_ubyte_file)
def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
"""
TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000801(2049) magic number (MSB first)
0004 32 bit integer 10000 number of items
0008 unsigned byte ?? label
0009 unsigned byte ?? label
........
xxxx unsigned byte ?? label
The labels values are 0 to 9.
:param idx_ubyte_file: idx文件路径
:return: n*1维np.array对象,n为图片数量
"""
return decode_idx1_ubyte(idx_ubyte_file)
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
m = X.shape[0] #数据集的样本数
mini_batches = []
np.random.seed(seed)
#step1:打乱数据
# np.random.permutation函数传入的是数字时,返回的就是0~m-1的乱序结果,
#传入数组的话返回就是外汇返佣的乱序
permutation = list(np.random.permutation(m))
shuffled_X = X[permutation,:,:,:]
shuffled_Y = Y[permutation,:]
# step2:根据batch大小尽心分割数据集
num_complete_minibatches = math.floor(m/mini_batch_size) # batch的数量
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
#当数据集的大小不能整除batchsize时,剩下的数据就作为一个batch
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]
mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def convert_to_one_hot(Y, C):
#1.np.eye(C)是生成CxC大小的单位矩阵
#2.Y.reshape(-1)把Y转换为一维数组,长度为m
#3.np.eye(C)[Y.reshape(-1)]就是根据Y.reshape(-1)中的数字以此去取np.eye(C)单位矩阵中对应行,输出结果维度是mxC
Y = np.eye(C)[Y.reshape(-1)]
return Y
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import time
import cv2
import minist_project.cnn_utils as cnn_utils
from minist_project.input_data import load_train_images, load_train_labels, load_test_images, load_test_labels
#创建placeholder,这是为了后面向网络中传入我们的训练数据
def create_placeholders(n_H0, n_W0, n_C0, n_y):
X = tf.placeholder(tf.float32, [None, n_H0, n_W0, n_C0])
Y = tf.placeholder(tf.float32, [None, n_y])
return X, Y
#初始化卷积层的权重参数,全连接层的参数并不需要我们初始化,这一部分tensorflow会去做
def initialize_parameters():
tf.set_random_seed(1)
#tf.get_variable第二个参数是我们创建变量的维度,[f,f,c,filters]: f表示卷积核的大小,c卷积核的通道数(跟上一层的通道数一样),filters是卷积核的数量。
#W1是第一层卷积层的权重,W2是第二层
W1 = tf.get_variable("W1", [5,5,1,32], initializer=tf.contrib.layers.xavier_initializer(seed=0))
W2 = tf.get_variable("W2", [5,5,32,64], initializer=tf.contrib.layers.xavier_initializer(seed=0))
parameters = {"W1":W1, "W2":W2}
return parameters
#前向传播
#LeNet5网络结构
def forward_propagation(X, parameters):
#根据我们前面的LeNet5网络的参数进行构建即可
W1 = parameters["W1"]
W2 = parameters["W2"]
#conv1
#tf.nn.conv2d的第一个参数是我们的输入,第二个参数是卷积核,即我们前面初始化的权重参数
#第三个参数是步长,默认格式是[batch, height, width, channels],因为我们步长是定义在输入图像的宽高上面,只需要在宽高上面移动就好了,所以第一和第四个通常都为1
Z1 = tf.nn.conv2d(input=X,filter=W1,strides=[1,1,1,1],padding="VALID")
A1 = tf.nn.relu(Z1) #激活函数
#pool1
#参数定义和上面卷积的差不多
P1 = tf.nn.max_pool(value=A1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#conv2
Z2 = tf.nn.conv2d(input=P1,filter=W2,strides=[1,1,1,1],padding="VALID")
A2 = tf.nn.relu(Z2)
#pool2
P2 = tf.nn.max_pool(value=A2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#flatten
P = tf.contrib.layers.flatten(P2)
#fc1
#全连接层,第一个是我们的输入,第二个参数是我们指定的该层的神经元数量,默认激活函数是relu
f1 = tf.contrib.layers.fully_connected(P, 120)
#fc2
f2 = tf.contrib.layers.fully_connected(f1, 84)
#fc3——输出层
#这里是我们的输出层,注意在这里我们不需要激活函数,因为在计算代价的时候,已经包含了激活函数。
Z = tf.contrib.layers.fully_connected(f2, 10, activation_fn=None)
return Z
def compute_cost(Z3, Y):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))
return cost
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001,num_epochs=150,minibatch_size=64,print_cost=True,isPlot=True):
tf.reset_default_graph()
tf.set_random_seed(1)
seed = 3
(m, n_H0, n_W0, n_C0) = X_train.shape #获取数据集的维度
n_y = Y_train.shape[1]
costs = [] #用于存放我们每次迭代的代价
X, Y = create_placeholders(n_H0,n_W0,n_C0,n_y)
parameters = initialize_parameters()
Z5 = forward_propagation(X,parameters)
cost = compute_cost(Z5, Y)
#这里使用了Adam的优化器,Adam优化了我们minibatch梯度下降算法,使下降更快
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
saver = tf.train.Saver() #创建saver用于保存训练后的模型
total_time = 0 #记录每5次迭代的总时间
with tf.Session() as sess:
sess.run(init)
for epoch in range(1,num_epochs+1):
start_time = time.clock()
minibatch_cost = 0
num_minibatches = int(m / minibatch_size)
seed = seed + 1
#分割数据集
minibatches = cnn_utils.random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
#将我们的每个batch的数据传入我们的网络中,然后进行梯度下降
_, temp_cost = sess.run([optimizer, cost],feed_dict={X:minibatch_X, Y:minibatch_Y})
minibatch_cost += temp_cost / num_minibatches
end_time = time.clock()
minium = end_time - start_time
total_time += minium
if print_cost:
if epoch % 5 == 0:
print("当前是第 " + str(epoch) + " 代,成本值为:" + str(minibatch_cost) + " ; 每一个epoch花费时间:" + str(minium) + " 秒,10个epoch总的时间:" + str(total_time))
total_time = 0
if epoch % 5 == 0:
costs.append(minibatch_cost)
#保存模型
saver.save(sess, "model/model_LeNet5/minist-model")
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel("cost")
plt.xlabel("iterations (per tens)")
plt.title("Learning rate =" + str(learning_rate))
plt.show()
#tf.argmax的第二个参数等于1表示我们输出Z中每行最大值的索引
predict_op = tf.argmax(Z5, 1)
#tf.equal比较两个矩阵或向量对应元素是否相等,相等就为True,不等就为False
corrent_prediction = tf.equal(predict_op, tf.argmax(Y,1))
#tf.cast是进行数据转换,这里是bool型转为float,True就为1.0,False就是0.0
#tf.reduce_mean就是求均值
accuracy = tf.reduce_mean(tf.cast(corrent_prediction, "float"))
print("corrent_prediction accuracy = "+str(accuracy))
#这里同样将我们的数据传入我们的tensor张量中
train_accuracy = accuracy.eval({X:X_train,Y:Y_train})
test_accuracy = accuracy.eval({X:X_test,Y:Y_test})
print("训练集准确度:" + str(train_accuracy))
print("测试及准确度:" + str(test_accuracy))
return (train_accuracy, test_accuracy, parameters)
#加载数据集
train_x = load_train_images()
train_y = load_train_labels()
test_x = load_test_images()
test_y = load_test_labels()
#因为mnist数据集中每个图片是28x28,我们需要转换成28x28x1
train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2], 1)
test_x = test_x.reshape(test_x.shape[0], test_x.shape[1], test_x.shape[2], 1)
#我们还需要把y中的元素转为int,原本是9.0这种float型
train_y = train_y.reshape(len(train_y),1).astype(int)
test_y = test_y.reshape(len(test_y),1).astype(int)
train_y = cnn_utils.convert_to_one_hot(train_y,10)
test_y = cnn_utils.convert_to_one_hot(test_y, 10)
print("训练集x:", train_x.shape)
print("训练集y:", train_y.shape)
print("测试机x:", test_x.shape)
print("测试机y:", test_y.shape)
# 训练模型
tf.reset_default_graph()
np.random.seed(1)
_, _, parameters = model(train_x, train_y, test_x, test_y, num_epochs=30)
训练集x: (60000, 28, 28, 1)
训练集y: (60000, 10)
测试机x: (10000, 28, 28, 1)
测试机y: (10000, 10)
当前是第 5 代,成本值为:0.13700463391617798 ; 每一个epoch花费时间:2.6696909999999434 秒,10个epoch总的时间:13.63106999999988
当前是第 10 代,成本值为:0.051648345611684646 ; 每一个epoch花费时间:2.728288999999961 秒,10个epoch总的时间:13.54936499999991
当前是第 15 代,成本值为:0.02288695655724883 ; 每一个epoch花费时间:2.730446000000029 秒,10个epoch总的时间:13.464165000000037
当前是第 20 代,成本值为:0.010089922937483614 ; 每一个epoch花费时间:2.6733050000000276 秒,10个epoch总的时间:13.692039999999963
当前是第 25 代,成本值为:0.0053147655032918335 ; 每一个epoch花费时间:2.714560000000006 秒,10个epoch总的时间:13.653553000000045
当前是第 30 代,成本值为:0.004936797035458563 ; 每一个epoch花费时间:2.7167780000000334 秒,10个epoch总的时间:13.743517000000054
训练集准确度:0.99885
测试及准确度:0.9837
原文链接:https://blog.csdn.net/CarryLvan/article/details/103693489
原文:https://www.cnblogs.com/benming/p/12106954.html