1. 归一化和标准化
1 import numpy as np 2 from numpy import sqrt 3 4 5 def normalize(mtx, a=0, b=1): 6 """ 7 normalization: remove dimensional effects 8 range (a, b), default:(0, 1) 9 formula: x* = a + k(x - min) / x* = b + k(x - max), k = (b - a)/(max - min) 10 """ 11 data = mtx.ravel() # 矩阵拉伸 12 size = mtx.shape # mtx‘s shape 13 mx = np.max(data) # max 14 mn = np.min(data) # min 15 k = (b - a)/(mx - mn) # step of (a, b) 16 17 # 需要将矩阵拉伸,否则会报错:only size-1 arrays can be converted to Python scalars 18 norm_data = [a + k*(float(i) - mn) for i in data] 19 return np.array(norm_data).reshape(size) 20 21 22 def standard(mtx): 23 """ 24 z-score standardlization: require data has approximate Gaussian distribution, otherwise will be worse 25 result: standard data obey N(0, 1) 26 formula: X* = (X - X_mean) / sqrt(var) 27 """ 28 data = mtx.ravel() 29 size = mtx.shape 30 m = np.mean(data) # 初始均值 31 var = sum([(i-m)**2 for i in data]) / np.size(data) # 初始方差 32 33 standard_data = [(x - m) / sqrt(var) for x in data] 34 return np.array(standard_data).reshape(size) 35 36 # ------------------------Test Part------------------------------- 37 # if __name__ == ‘__main__‘: 38 # arr = np.array(([1.12, 0.78, 2.33, 3.45, 4.11, 5], 39 # [1, 3, 4, 5, 7, 6.66])) 40 # 41 # norm_arr = normalize(arr) 42 # standard_arr = standard(arr) 43 # print(norm_arr) 44 # print(standard_arr)
Notes:
# 矩阵拉伸:将矩阵拉伸成行向量
ravel(): 返回数组的试图
flatten(): 返回真实数组,需要N重新分配空间
# 矩阵分割:
1 np.hsplit(mtx, arg) # 水平分割,arg:一般是列数 2 np.vsplit(mtx, arg) # 垂直分割,arg:一般是行数
# 矩阵组合:
np.hstack((a, b)) np.concatenate((a, b), axis=1) # 水平组合 print np.vstack((a, b)) print np.concatenate((a, b), axis=0) # 垂直组合
# list_to_ndarray:
np.array(list)
arr.tolist()
2. SVD分解
import numpy as np from numpy import linalg,sqrt def mtx_svd(mtx): """M = UDV""" M = mtx M_T = mtx.T Z_v = np.dot(M_T, M) e_val, e_vecs = linalg.eig(Z_v) # 排序ATA的特征值特征向量 sorted_eval_idx = np.argsort(e_val)[::-1] # eval降序索引 sorted_eval = [e_val[i] for i in sorted_eval_idx] # 排序后的特征值 v_sorted_evecs = e_vecs[:, sorted_eval_idx] # 排序后的特征向量 # 构造V矩阵,右奇异向量 V = v_sorted_evecs # 构造奇异值对角阵D sin_val = [sqrt(eig) for eig in sorted_eval if eig!=0] D = np.diag(sin_val) Z_u = np.dot(M, M_T) val_u, vecs_u = linalg.eig(Z_u) # 排序AAT的特征值特征向量 sorted_eval_idx = np.argsort(val_u)[::-1] # eval降序索引 u_sorted_evecs = vecs_u[:, sorted_eval_idx] # 排序后的特征向量 # 构造U矩阵,左奇异向量 U = u_sorted_evecs return U, D, V # -------------------------------Test Part------------------------------- # if __name__ == ‘__main__‘: # mtx = np.array(([1, 2, 3], # [2, 4, 7], # [3, 7, 10], # [4, 8, 5], # [6, 9, 7])) # u,d,v = mtx_svd(mtx) # print(u,d,v)
Notes:
# argsort(): 将矩阵从小到大排序,并提取对应的index list
argsort()[::-1]: 将索引逆置
# 按照特征值顺序对对应特征向量排序
1. 降序排列特征值,并得到其索引
eval_sorted_index = np.argsort(A)[::-1]
2. 利用列表表达式排序特征值
sorted_eval = [eval[i] for i in eval_sorted_index]
3. 排序对应特征向量
sorted_evecs = evecs[:, eval_sorted_index]
原文:https://www.cnblogs.com/KrianJ/p/12178169.html