日常记录

时间：2020-06-04 21:10:49 阅读：38 评论：0 收藏：0 [点我收藏+]

# -*-encoding:utf-8-*-
import os
import re
import random


def find_line_exist_num(file_dir, save_path, pattern):
    count = 0
    with open(save_path, "w+") as result_f:
        for root, dirs, files in os.walk(file_dir):
            for file in files:
                file_path = os.path.join(root, file)
                with open(file_path) as f:
                    while True:
                        line = f.readline().decode("utf-16")
                        is_true = pattern.search(line)
                        if is_true:
                            result_f.write(line+"\r\n")
                            count += 1
                        if not line:
                            break
    return count


def get_random_line(file_path, num, result_file_path):
    random_nums = [val for val in range(0, num)]
    line_list = random.sample(random_nums, 2000)
    with open(result_file_path, "w+") as result:
        with open(file_path, "rb") as f:
            cur_num = 0
            while True:
                line = f.readline().decode("utf-16")
                if cur_num in line_list:
                    print(r"行号：%s, 内容：%s" % (str(cur_num), line))
                    result.write(line)


if __name__=="__main__":
    # 文件夹路径
    file_dir = ""
    # 所有带数字的行结果存储地址
    save_path = ""
    # 随机抽取2000条数据存储地址
    result_file_path = ""
    pattern = re.compile(r‘\d+‘)  # 查找数字
    count = find_line_exist_num(file_dir, save_path, pattern)
    get_random_line(save_path, count, result_file_path)

日常记录

原文：https://www.cnblogs.com/fuchenjie/p/13045607.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)