from scipy.stats import beta
def confidence(n_bad, n_good, tol=2):
''' 返回估计的坏率p, 以及在tol倍标准差下的可信度'''
a, b = n_bad+1, n_good+1
p = a / (a+b)
v = beta.std(a, b)
up, low = p + v*tol, p - v*tol
up, low = min(up,1), max(0, low)
d = beta.cdf(up, a,b) - beta.cdf(low, a,b)
return p, v, d
test_set = [
(500, 20000, 2),
(1000, 200000, 2),
(2000, 200000, 2),
(5000, 200000, 2),
(500, 100000, 2),
(1000, 100000, 2),
(2000, 100000, 2),
(5000, 100000, 2),
(2000, 10000, 2),
]
for (n_bad, n_good, tol) in test_set:
p,v,d = confidence(n_bad, n_good, tol)
ss = ('bad = {}; total = {}; 均值p = {p:0.4f}; 标准差v = {v:0.6f}; 变异系数c = {c:0.3f}; 均值的相对误差为 {e:0.3f};'
+ '均值落在区间[p - {t}v, p + {t}v]下的概率为{d:2.2f}%'
).format(n_bad, n_bad+n_good, p=p,v=v, c=v/p, d =d*100,t=tol, e=tol*v/p)
print(ss)
bad = 500; total = 20500; 均值p = 0.0244; 标准差v = 0.001078; 变异系数c = 0.044; 均值的相对误差为 0.088;均值落在区间[p - 2v, p + 2v]下的概率为95.46%
bad = 1000; total = 201000; 均值p = 0.0050; 标准差v = 0.000157; 变异系数c = 0.032; 均值的相对误差为 0.063;均值落在区间[p - 2v, p + 2v]下的概率为95.46%
bad = 2000; total = 202000; 均值p = 0.0099; 标准差v = 0.000220; 变异系数c = 0.022; 均值的相对误差为 0.044;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
bad = 5000; total = 205000; 均值p = 0.0244; 标准差v = 0.000341; 变异系数c = 0.014; 均值的相对误差为 0.028;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
bad = 500; total = 100500; 均值p = 0.0050; 标准差v = 0.000222; 变异系数c = 0.045; 均值的相对误差为 0.089;均值落在区间[p - 2v, p + 2v]下的概率为95.46%
bad = 1000; total = 11000; 均值p = 0.0910; 标准差v = 0.002742; 变异系数c = 0.030; 均值的相对误差为 0.060;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
bad = 2000; total = 102000; 均值p = 0.0196; 标准差v = 0.000434; 变异系数c = 0.022; 均值的相对误差为 0.044;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
bad = 5000; total = 105000; 均值p = 0.0476; 标准差v = 0.000657; 变异系数c = 0.014; 均值的相对误差为 0.028;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
bad = 2000; total = 12000; 均值p = 0.1667; 标准差v = 0.003402; 变异系数c = 0.020; 均值的相对误差为 0.041;均值落在区间[p - 2v, p + 2v]下的概率为95.45%
原文:https://www.cnblogs.com/bregman/p/10510308.html