第一步:构造列表,使用with open() as f: pickle.load进行数据的载入, 使用.reshape(1000, 3, 32, 32).transpose(0, 3, 1, 2).astype(‘float‘)

第二步:使用np.concatenate()将列表进行串接, 选出5000个数据集做为训练集,选择5000到5500个数据做为验证集,从测试集的数据中挑选出500个数据作为测试集


第四步:将图像样本减去均值), 即- np.mean(train_X, axis=0) ,并使用transpose()将样本数据的维度进行变换



import pickle as pickle
import numpy as np
import os
import importlib
import sys
#from scipy.misc import imread

def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  # 第一步:使用pick.load读取数据,使用.reshape进行矩阵变化和.tanspose进行维度变化
  with open(filename, rb) as f:
    datadict = pickle.load(f, encoding=latin1)
    X = datadict[data]
    Y = datadict[labels]
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y

def load_CIFAR10(ROOT):
  """ load all of cifar """
  xs = []
  ys = []
  # 第二步:使用列表数据添加,并使用np.concatenate进行串接,去除矩阵的维度
  for b in range(1,2):
    f = os.path.join(ROOT, data_batch_%d % (b, ))
    X, Y = load_CIFAR_batch(f)
  # 将数据进行串接
  Xtr = np.concatenate(xs)
  Ytr = np.concatenate(ys)
  del X, Y
  # 加载测试数据
  Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, test_batch))
  return Xtr, Ytr, Xte, Yte

def get_CIFAR10_data(num_training=5000, num_validation=500, num_test=500):
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for classifiers. These are the same steps as we used for the SVM, but
    condensed to a single function.
    # Load the raw CIFAR-10 data

    cifar10_dir = D://BaiduNetdiskDownload//神经网络入门基础(PPT,代码)//绁炵粡缃戠粶鍏ラ棬鍩虹锛圥PT锛屼唬鐮侊級//cifar-10-batches-py//
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    # Subsample the data
    # 第三步:将返回训练样本和测试样本,进行数据的拆分,分出5000个训练集,验证集和测试集
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    # 第四步:减去图片的均值,将训练集,验证集和测试集
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    # Transpose so that channels come first
    X_train = X_train.transpose(0, 3, 1, 2).copy()
    X_val = X_val.transpose(0, 3, 1, 2).copy()
    X_test = X_test.transpose(0, 3, 1, 2).copy()

    # Package data into a dictionary
    # 第五步:返回训练集,验证集和测试集的字典
    return {
      X_train: X_train, y_train: y_train,
      X_val: X_val, y_val: y_val,
      X_test: X_test, y_test: y_test,


第一步:def __iniit(数据维度,隐藏层维度,输出层维度,权重初始值范围,正则化惩罚项初始化)

第二步:构造初始化的self.params用于存放权重参数,初始化权重参数w1,b1, w2, b2 



                   第一步:对输入的X进行第一次的前向传播,包括x * w + b 线性变化和relu激活层函数np.maximum(0, x)

                   第二步:对第一层的输出结果,在第二层进行线性变化x * w + b, 获得各个类别得分


                   第四步:计算类别的概率值softmax, e^(x-max(x)) / ∑( e^(x-max(x)) ),使用np.sum(-np.log(prob([np.arange(N), y]))) 来表示交叉熵损失函数

                   第五步:求得softmax / dx 的值为, softmax - 1, 即prob[np.arange(x), y] - 1, 将损失值和softmax对应于x的梯度进行返回

                   第一步:对于前向传播求得的softmax/dx获得的导数值dout,将其回传到第二层,求得dx(用于第一层的回传),dw2, db2 = dout * w(第二层的权重w),  dout * x(第二层输入), np.sum(dout, axis=0)  

                   第二步:对于第二层回传的dx,进行第一层的回传,第一层进行了两步操作,第一步是线性变化,第二步是relu激活层,先对激活层进行回传,对于激活层的回传,输入值大于0的,回传的结果不变,输入值小于0的,回传的结果为0,即dx[x<0] = 0 , 将回传的结果用于线性dx, dw1, db1与上述步骤相同

                    第三步:将求得的dw2,db2,dw1, db1保存在grads中,将loss和梯度值进行返回



from layer_utils import *
import numpy as np
class TwoLayerNet(object):   
    # 第一步:构造初始化超参数,在书写代码的时候可以使用
    def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,           
                              weight_scale=1e-3, reg=0.0):    
        Initialize a new network.   
        - input_dim: An integer giving the size of the input    
        - hidden_dim: An integer giving the size of the hidden layer    
        - num_classes: An integer giving the number of classes to classify    
        - dropout: Scalar between 0 and 1 giving dropout strength.    
        - weight_scale: Scalar giving the standard deviation for random 
                        initialization of the weights.    
        - reg: Scalar giving L2 regularization strength.    
        # 第二步:构造权重字典,并且进行w1,b1,w2,b2的权重初始化
        self.params = {}    
        self.reg = reg   
        self.params[W1] = weight_scale * np.random.randn(input_dim, hidden_dim)     
        self.params[b1] = np.zeros((1, hidden_dim))    
        self.params[W2] = weight_scale * np.random.randn(hidden_dim, num_classes)  
        self.params[b2] = np.zeros((1, num_classes))

    # 第三步:构造loss函数用于进行前向传播和反向传播,返回loss和权重梯度grads
    def loss(self, X, y=None):    
        Compute loss and gradient for a minibatch of data.    
        - X: Array of input data of shape (N, d_1, ..., d_k)    
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].  
        If y is None, then run a test-time forward pass of the model and return:    
        - scores: Array of shape (N, C) giving classification scores, where              
                  scores[i, c] is the classification score for X[i] and class c. 
        If y is not None, then run a training-time forward and backward pass and    
        return a tuple of:    
        - loss: Scalar value giving the loss   
        - grads: Dictionary with the same keys as self.params, mapping parameter             
                 names to gradients of the loss with respect to those parameters.    
        # 前向传播,计算得分和损失值
        scores = None
        N = X.shape[0]
        # Unpack variables from the params dictionary
        # 权重参数w和b,
        # 获得当前的参数值
        W1, b1 = self.params[W1], self.params[b1]
        W2, b2 = self.params[W2], self.params[b2]
        # 第一步:第一层神经网络进行线性变化和relu变化 第一层的输出结果
        h1, cache1 = affine_relu_forward(X, W1, b1)
        # 第二步:第二层神经网络进行线性变化
        out, cache2 = affine_forward(h1, W2, b2)
        scores = out              # (N,C)
        # 第三步:如果没有labels,直接返回得分值作为预测结果
        if y is None:   
            return scores
        # 第四步:计算损失值和softmax的反向传播的结果
        loss, grads = 0, {}
        data_loss, dscores = softmax_loss(scores, y)
        # 加上L2正则化惩罚项
        reg_loss = 0.5 * self.reg * np.sum(W1*W1) + 0.5 * self.reg * np.sum(W2*W2)
        loss = data_loss + reg_loss

        # 反向传播,用于计算梯度值
        # 第一步:计算传到第二层的反向传播的结果,即dw2和db2
        dh1, dW2, db2 = affine_backward(dscores, cache2)
        # 第二步:计算relu的反向传播以及x*w + b 反向传播的结果
        dX, dW1, db1 = affine_relu_backward(dh1, cache1)
        # Add the regularization gradient contribution
        # 加入正则化求导的梯度值dw2 和 dw1
        dW2 += self.reg * W2
        dW1 += self.reg * W1
        # 第三步:将梯度值加入到grads的字典中, 返回损失值和grads梯度值
        grads[W1] = dW1
        grads[b1] = db1
        grads[W2] = dW2
        grads[b2] = db2

        return loss, grads


from layers import *

def affine_relu_forward(x, w, b):
  Convenience layer that perorms an affine transform followed by a ReLU

  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  a, fc_cache = affine_forward(x, w, b)
  out, relu_cache = relu_forward(a)
  cache = (fc_cache, relu_cache)
  return out, cache

def affine_relu_backward(dout, cache):
  Backward pass for the affine-relu convenience layer
  fc_cache, relu_cache = cache
  da = relu_backward(dout, relu_cache)
  dx, dw, db = affine_backward(da, fc_cache)
  return dx, dw, db


def conv_relu_forward(x, w, b, conv_param):
  A convenience layer that performs a convolution followed by a ReLU.

  - x: Input to the convolutional layer
  - w, b, conv_param: Weights and parameters for the convolutional layer
  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  a, conv_cache = conv_forward_fast(x, w, b, conv_param)
  out, relu_cache = relu_forward(a)
  cache = (conv_cache, relu_cache)
  return out, cache

def conv_relu_backward(dout, cache):
  Backward pass for the conv-relu convenience layer.
  conv_cache, relu_cache = cache
  da = relu_backward(dout, relu_cache)
  dx, dw, db = conv_backward_fast(da, conv_cache)
  return dx, dw, db

def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
  Convenience layer that performs a convolution, a ReLU, and a pool.

  - x: Input to the convolutional layer
  - w, b, conv_param: Weights and parameters for the convolutional layer
  - pool_param: Parameters for the pooling layer

  Returns a tuple of:
  - out: Output from the pooling layer
  - cache: Object to give to the backward pass
  a, conv_cache = conv_forward_fast(x, w, b, conv_param)
  s, relu_cache = relu_forward(a)
  out, pool_cache = max_pool_forward_fast(s, pool_param)
  cache = (conv_cache, relu_cache, pool_cache)
  return out, cache

def conv_relu_pool_backward(dout, cache):
  Backward pass for the conv-relu-pool convenience layer
  conv_cache, relu_cache, pool_cache = cache
  ds = max_pool_backward_fast(dout, pool_cache)
  da = relu_backward(ds, relu_cache)
  dx, dw, db = conv_backward_fast(da, conv_cache)
  return dx, dw, db


import numpy as np

def affine_forward(x, w, b):   
    Computes the forward pass for an affine (fully-connected) layer. 
    The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N   
    examples, where each example x[i] has shape (d_1, ..., d_k). We will    
    reshape each input into a vector of dimension D = d_1 * ... * d_k, and    
    then transform it to an output vector of dimension M.    
    - x: A numpy array containing input data, of shape (N, d_1, ..., d_k)    
    - w: A numpy array of weights, of shape (D, M)    
    - b: A numpy array of biases, of shape (M,)   
    Returns a tuple of:    
    - out: output, of shape (N, M)    
    - cache: (x, w, b)   
    out = None
    # Reshape x into rows
    N = x.shape[0]
    x_row = x.reshape(N, -1)         # (N,D)
    out = np.dot(x_row, w) + b       # (N,M)
    cache = (x, w, b)

    return out, cache

def affine_backward(dout, cache):   
    Computes the backward pass for an affine layer.    
    - dout: Upstream derivative, of shape (N, M)    
    - cache: Tuple of: 
    - x: Input data, of shape (N, d_1, ... d_k)    
    - w: Weights, of shape (D, M)    
    Returns a tuple of:   
    - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)    
    - dw: Gradient with respect to w, of shape (D, M) 
    - db: Gradient with respect to b, of shape (M,)    
    x, w, b = cache    
    dx, dw, db = None, None, None   
    dx = np.dot(dout, w.T)                       # (N,D)    
    dx = np.reshape(dx, x.shape)                 # (N,d1,...,d_k)   
    x_row = x.reshape(x.shape[0], -1)            # (N,D)    
    dw = np.dot(x_row.T, dout)                   # (D,M)    
    db = np.sum(dout, axis=0, keepdims=True)     # (1,M)    

    return dx, dw, db

def relu_forward(x):   
    Computes the forward pass for a layer of rectified linear units (ReLUs).    
    - x: Inputs, of any shape    
    Returns a tuple of:    
    - out: Output, of the same shape as x    
    - cache: x    
    out = None    
    out = ReLU(x)    
    cache = x    

    return out, cache

def relu_backward(dout, cache):   
    Computes the backward pass for a layer of rectified linear units (ReLUs).   
    - dout: Upstream derivatives, of any shape    
    - cache: Input x, of same shape as dout    
    - dx: Gradient with respect to x    
    dx, x = None, cache    
    dx = dout    
    dx[x <= 0] = 0    

    return dx

def svm_loss(x, y):   
    Computes the loss and gradient using for multiclass SVM classification.    
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class         
         for the ith input.    
    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and         
         0 <= y[i] < C   
    Returns a tuple of:    
    - loss: Scalar giving the loss   
    - dx: Gradient of the loss with respect to x    
    N = x.shape[0]   
    correct_class_scores = x[np.arange(N), y]    
    margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)    
    margins[np.arange(N), y] = 0   
    loss = np.sum(margins) / N   
    num_pos = np.sum(margins > 0, axis=1)    
    dx = np.zeros_like(x)   
    dx[margins > 0] = 1    
    dx[np.arange(N), y] -= num_pos    
    dx /= N    

    return loss, dx

def softmax_loss(x, y):    
    Computes the loss and gradient for softmax classification.    Inputs:    
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class         
    for the ith input.    
    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and         
         0 <= y[i] < C   
    Returns a tuple of:    
    - loss: Scalar giving the loss    
    - dx: Gradient of the loss with respect to x   
    # 计算概率值
    probs = np.exp(x - np.max(x, axis=1, keepdims=True))    
    probs /= np.sum(probs, axis=1, keepdims=True)    
    N = x.shape[0]
    # 计算损失值函数
    loss = -np.sum(np.log(probs[np.arange(N), y])) / N
    # 计算softmax回传即dsoftmax / dx 的结果
    dx = probs.copy()    
    dx[np.arange(N), y] -= 1    
    dx /= N    

    return loss, dx

def ReLU(x):    
    """ReLU non-linearity."""    
    return np.maximum(0, x)









