首页 > 其他 > 详细

日常记录

时间:2020-06-04 21:10:49      阅读:36      评论:0      收藏:0      [点我收藏+]
# -*-encoding:utf-8-*-
import os
import re
import random


def find_line_exist_num(file_dir, save_path, pattern):
count = 0
with open(save_path, "w+") as result_f:
for root, dirs, files in os.walk(file_dir):
for file in files:
file_path = os.path.join(root, file)
with open(file_path) as f:
while True:
line = f.readline().decode("utf-16")
is_true = pattern.search(line)
if is_true:
result_f.write(line+"\r\n")
count += 1
if not line:
break
return count


def get_random_line(file_path, num, result_file_path):
random_nums = [val for val in range(0, num)]
line_list = random.sample(random_nums, 2000)
with open(result_file_path, "w+") as result:
with open(file_path, "rb") as f:
cur_num = 0
while True:
line = f.readline().decode("utf-16")
if cur_num in line_list:
print(r"行号:%s, 内容:%s" % (str(cur_num), line))
result.write(line)


if __name__=="__main__":
# 文件夹路径
file_dir = ""
# 所有带数字的行结果存储地址
save_path = ""
# 随机抽取2000条数据存储地址
result_file_path = ""
pattern = re.compile(r‘\d+‘) # 查找数字
count = find_line_exist_num(file_dir, save_path, pattern)
get_random_line(save_path, count, result_file_path)

日常记录

原文:https://www.cnblogs.com/fuchenjie/p/13045607.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!