https://github.com/zhangbo2008/perfect_batch_generator_for_pyton
核心代码如下:
def bylineread(fimename,batchsize=1): batchsize=batchsize with open(fimename) as f: cnt=0 out=[] line = f.readline() while line: out.append(line) cnt+=1 if cnt==batchsize: yield out out=[] cnt=0 line = f.readline() yield out # 用来强制返回最后不成batch的数据. #read是一个生成器对象 read = bylineread(‘1‘,batchsize=2) while 1: try: print(next(read)) except: print(‘over‘) break
def bylineread(fimename,batchsize=1): | |
batchsize=batchsize | |
with open(fimename) as f: | |
cnt=0 | |
out=[] | |
line = f.readline() | |
while line: | |
out.append(line) | |
cnt+=1 | |
if cnt==batchsize: | |
yield out | |
out=[] | |
cnt=0 | |
line = f.readline() | |
yield out # 用来强制返回最后不成batch的数据. | |
#read是一个生成器对象 | |
read = bylineread(‘1‘,batchsize=2) | |
while 1: | |
try: | |
print(next(read)) | |
except: | |
print(‘over‘) | |
break |
带batch_size的迭代器读取文件,解决内存不足的大数据处理问题!!!!!!!!!!完美解决
原文:https://www.cnblogs.com/zhangbo2008/p/13373587.html