""" dev数据集有错误;改变一下错误索引 """ BEGIN_DOCUMENT_REGEX = re.compile(r"#begin document \((.*)\); part (\d+)") def devtoture(gold_path): with open(gold_path, "r") as gold_file,open(‘users_new.txt‘, ‘w‘) as output_file: for line in gold_file.readlines(): row = line.split() if len(row) == 0: output_file.write("\n") elif row[0].startswith(‘#‘): begin_match = re.match(BEGIN_DOCUMENT_REGEX, line) if begin_match: partnum = row[4] print(partnum) output_file.write(line) else: if row[1] != partnum: print("不相等") len0 = len(row[0])+3 lineout=line[:len0]+partnum[0]+line[len0+2:] output_file.write(lineout) else: output_file.write(line)
错误的数据集:part值和第二列对不上,修改
学到的方法:
python 修改字符串:转成list修改,或者用切片修改。用replace
lineout=line[:len0]+partnum[0]+line[len0+2:]
参考:https://www.cnblogs.com/yvonnes/p/10020911.html
原文:https://www.cnblogs.com/yttas/p/11266341.html