手写RNN网络前向后向传播

时间：2020-09-20 20:42:10 阅读：63 评论：0 收藏：0 [点我收藏+]
import numpy as np

# Softmax函数
def softmax(x, axis=1):
    # 计算每行的最大值
    row_max = x.max(axis=axis)
    # 每行元素都需要减去对应的最大值，否则求exp(x)会溢出，导致inf情况
    row_max = row_max.reshape(-1, 1)
    x = x - row_max
    # 计算e的指数次幂
    x_exp = np.exp(x)
    x_sum = np.sum(x_exp, axis=axis, keepdims=True)
    s = x_exp / x_sum
    return s
def rnn_cell_forward(x_t, s_prev, parameters):
    ‘‘‘
    # 单个cell的前向传播过程
    :param x_t:当前时刻的序列输入
    :param s_prev:上一个Cell的隐层状态输入
    :param parameters:cell中的参数, 字典
    :return:隐层输出 s_next, out_pred, cache
    ‘‘‘
    # 取出参数
    U = parameters[‘U‘]
    W = parameters[‘W‘]
    V = parameters[‘V‘]
    ba = parameters[‘ba‘]
    by = parameters[‘by‘]

    # 根据公式计算
    # 隐层输出计算
    s_next = np.tanh(np.dot(U, x_t) + np.dot(W, s_prev) + ba)

    # 计算cell的输出
    out_pred = softmax(np.dot(V, s_next) + by)

    # 记录每一层的值，用于反向传播计算
    cache = (s_next, s_prev, x_t, parameters)
    return s_next, out_pred, cache
def rnn_forward(x, s0, parameters):
    ‘‘‘
    对于所有cell进行前向传播
    :param x:输出序列，形状（m,1,T）,T序列长度
    :param s0:初始状态输入， 0
    :param parameters:所有cell共享的参数， U,W,V,ba,by
    :return:S, y, caches
    ‘‘‘

    caches = []
    # 获取序列的长度
    m, _, T = x.shape
    # 获取输入的n,定义隐层输出的大小
    m, n = parameters[‘V‘].shape
    # 获取s0的值，保存在s_next里面去，以便于前向传播到cell
    s_next = s0
    # 定义s,y保留所有cell的隐层状态以及输出
    s = np.zeros((n, 1, T))
    y = np.zeros((m, 1, T))
    # 循环对每一个cell进行前向传播
    for t in range(T):
        # 对于T时刻的cell进行输出
        s_next, out_pred, cache = rnn_cell_forward(x[:, :, t], s_next, parameters)

        # 放到数组中
        s[:, :, t] = s_next
        y[:, :, t] = out_pred

        # 放入所以的缓存到列表当中
        caches.append(cache)
    return s, y, caches
def rnn_cell_backward(ds_next, cache):
    ‘‘‘
    每个cell的右边输入梯度
    :param ds_next: s_next的梯度值
    :param cache: 当前cell的缓存
    :return: gradients 当前cell的梯度值
    ‘‘‘

    # 获取cache的缓存值以及参数
    (s_next, s_prev, x_t, parameters) = cache

    U = parameters[‘U‘]
    W = parameters[‘W‘]
    V = parameters[‘V‘]
    ba = parameters[‘ba‘]
    by = parameters[‘by‘]

    # 根据公式反向转播
    # 1.计算tanh的导数
    dtanh = (1 - s_next ** 2) * ds_next
    # 2.计算U的梯度
    dU = np.dot(dtanh, x_t.T)
    # 3.计算W的梯度
    dW = np.dot(dtanh, s_prev.T)
    # 4.计算ba的梯度
    dba = np.sum(dtanh, axis=1, keepdims=1)
    # 5.计算x_t的导数
    dx_t = np.dot(U.T, dtanh)
    # 6.计算s_prev的导数
    ds_prev = np.dot(W.T, dtanh)

    # 保存到字典返回
    gradients = {‘dtanh‘: dtanh, ‘dU‘: dU, ‘dW‘: dW, ‘dba‘: dba, ‘dx_t‘: dx_t, ‘ds_prev‘: ds_prev}
    return gradients
def rnn_backward(ds, caches):
    ‘‘‘
    所有cell的反向传播
    :param ds: 每个时刻的损失对于s的梯度值（假设是已知的）， （n, 1, 4）
    :param caches:每个cell的输出值
    :return:
    ‘‘‘
    # 取出cache当中的值
    (s1, s0, x_1, parameters) = caches[0]


    # 获取输入数据的总共序列长度
    n, _, T = ds.shape
    m, _ = x_1.shape
    # 初始化一个为0的s的第二部分梯度值
    ds_prevt = np.zeros((n, 1))
    # 存储一个更新后的所有参数
    dU = np.zeros((n, m))
    dW = np.zeros((n, n))
    dba = np.zeros((n, 1))

    # 保存其他不需要更新的梯度
    dx = np.zeros((m, 1, T))
    # 循环从前往后计算梯度
    for t  in reversed(range(T)):
        # 从3时刻开始
        # 2, 1, 0 s梯度由俩个部分组成
        gradients = rnn_cell_backward(ds[:, :, t] + ds_prevt, caches[t])
        ds_prevt = gradients[‘ds_prev‘]
        # U, W, ba, x_t, s_prev梯度
        # 共享参数需要相加
        dU += gradients[‘dU‘]
        dW += gradients[‘dW‘]
        dba += gradients[‘dba‘]

        # 保存每一层的x_t, s_prev的梯度值
        dx[:, :, t] = gradients[‘dx_t‘]
    # 返回所有跟新参数的梯度以及其他变量的梯度值
    gradients = {‘dU‘: dU, ‘dW‘: dW, ‘dba‘: dba, ‘dx‘: dx}
    return gradients
# 测试
if __name__ == ‘__main__‘:
    # 测试单个Cell
    # np.random.seed(1)
    #
    # x_t = np.random.rand(3, 1)
    # s_prev = np.random.randn(5, 1)
    # U = np.random.randn(5, 3)
    # W = np.random.randn(5, 5)
    # V = np.random.randn(3, 5)
    # ba = np.random.randn(5, 1)
    # by = np.random.randn(3, 1)
    # parameters = {"U":U, "W":W, "V":V, "ba":ba, "by":by}
    #
    #
    # s_next, out_pred, cache = rnn_cell_forward(x_t, s_prev, parameters)
    # print(s_next)
    # print(s_next.shape)
    # print(out_pred)
    # print(out_pred.shape)

    # 测试整个rnn
    # np.random.seed(1)
    #
    # x = np.random.randn(3, 1, 4)
    # s0 = np.random.randn(5, 1)
    # U = np.random.randn(5, 3)
    # W = np.random.randn(5, 5)
    # V = np.random.randn(3, 5)
    # ba = np.random.randn(5, 1)
    # by = np.random.randn(3, 1)
    # parameters = {"U":U, "W":W, "V":V, "ba":ba, "by":by}
    #
    # s, y, caches = rnn_forward(x, s0, parameters)
    # print(s)
    # print(s.shape)
    # print(y)
    # print(y.shape)

    # 测试整个backward
    np.random.seed(1)

    x = np.random.randn(3, 1, 4)
    s0 = np.random.randn(5, 1)
    U = np.random.randn(5, 3)
    W = np.random.randn(5, 5)
    V = np.random.randn(3, 5)
    ba = np.random.randn(5, 1)
    by = np.random.randn(3, 1)
    parameters = {"U":U, "W":W, "V":V, "ba":ba, "by":by}

    s, y, caches = rnn_forward(x, s0, parameters)
    # 随机给一个4个cell的隐层输出的导数结果（真实需要计算损失的导数）
    ds = np.random.randn(5, 1, 4)
    gradients = rnn_backward(ds, caches)
    print(gradients)
手写RNN网络前向后向传播
原文：https://www.cnblogs.com/yuganwj/p/13700543.html
踩
(0)
评论一句话评论（0）
分享档案
更多>
2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)