import sys import numpy as np f=open(sys.argv[2],"r") f2=open(sys.argv[1],"r") f1=open("out.txt","w") import math # 隐状态 def compute(obs, states, start_p, trans_p, emit_p): max_p = np.zeros((len(obs), len(states))) path = np.zeros((len(states), len(obs))) for i in range(len(states)): max_p[0][i] = math.log(start_p[i]) + emit_p[i][obs[0]] path[i][0] = i for t in range(1, len(obs)): #print(t) newpath = np.zeros((len(states), len(obs))) for y in range(len(states)): prob = -100000000 for y0 in range(len(states)): nprob = max_p[t-1][y0] + trans_p[y0][y] + emit_p[y][obs[t]] if nprob > prob: prob = nprob state = y0 max_p[t][y] = prob for m in range(t): newpath[y][m] = path[state][m] newpath[y][t] = y path = newpath max_prob = -100000000 path_state = 0 for y in range(len(states)): if max_p[len(obs)-1][y] > max_prob: max_prob = max_p[len(obs)-1][y] path_state = y #print(max_prob) return path[path_state] state_s = [] obser=[]#fasta file alp=[]#alphabet #get hmm file lines=f2.readlines() wordlist=lines[0].split()#将每一行的数字分开放在列表中 #for a in wordlist:#遍历每一行的数字 #print (a) statenum=int(wordlist[0]) hidden_state = [] for k in range(statenum): state_s.append(k) hidden_state.append(chr(ord(‘A‘)+k)) letternum=wordlist[1] #print((wordlist[2])) for i in range(len(wordlist[2])): alp.append((wordlist[2][i]).lower()) #print(alp[i]) wordlist=lines[1].split()#将每一行的数字分开放在列表中 start_probability =[] for a in wordlist:#遍历每一行的数字 #print (a) start_probability.append(float(a)) #print(statenum) #print(letternum) transititon_probability=np.zeros((int(statenum),int(statenum))) emission_probability=np.zeros((int(statenum),int(letternum))) #print(emission_probability[2][2]) for nn in range(int(statenum)): wordlist=lines[nn+2].split()#将每一行的数字分开放在列表中 #print(len(wordlist)) for k in range(int(statenum)): transititon_probability[nn][k]=math.log(float(wordlist[k])) #print(transititon_probability[nn][k]) for k in range(int(letternum)): emission_probability[nn][k]=math.log(float(wordlist[k+statenum])) #print("emi") #print(emission_probability[nn][k]) #print("read%d"%(nn)) for line in f:#get fasta file for a in line: if(a==">"): break for k in range(len(alp)): if(a.lower()==alp[k]): obser.append(k) #transititon_probability = np.array([[math.log(0.999), math.log(0.001)], [math.log(0.01), math.log(0.99)]])#transition table #emission_probability = np.array([[math.log(0.35), math.log(0.15), math.log(0.15),math.log(0.35)], [math.log(0.15), math.log(0.35), math.log(0.35),math.log(0.15)]])#emission table result = compute(obser, state_s, start_probability, transititon_probability, emission_probability) coun=0 sta=-1 pos=1 #print(result) for k in range(len(result)): if (sta != -1 and sta != int(result[k])): print("%7d%7d state %s" % (pos, k, hidden_state[sta])) f1.write("%7d%7d state %s\n" % (pos, k, hidden_state[sta])) if (sta == 1): coun += 1 pos = k+1 sta = int(result[k]) print("%7d%7d state %s" % (pos, k+1, hidden_state[sta])) f1.write("%7d%7d state %s\n" % (pos, k+1, hidden_state[sta]))
原文:https://www.cnblogs.com/hyffff/p/12643543.html