记录瞬间
=====================其一=====================
# coding:UTF-8 import os import threading from time import ctime def loop(loops, list): # list存放着每个线程需要处理的文本文件名 print(‘线程 %d 处理的文件列表 %s \n‘ % (loops + 1, list)) list_len = len(list) for i in range(list_len): f = open(list[i], mode="r", encoding="UTF-8") rows = len(f.readlines()) # 此处,我直接将整个文件读入,所以会比较卡,可以在此设置每次读入的大小 f.close() print(‘文件 %s __ %d 行\n‘ % (list[i], rows)) def main(): print(‘all start at:‘, ctime()) cwd = os.getcwd() dir_list = os.listdir(cwd) file_list = [] # 该列表用来存放当前目录下的所有txt文件 print(‘当前文件夹 {} 下的所有txt文件:‘.format(dir_list)) for l in dir_list: if l.rfind(‘log‘) >= 0 and os.path.isfile(l): print(‘ ‘, l) file_list.append(l) threads = [] threads_num = 4 # 线程数 在此处修改下线程数就可以比较多线程与单线程处理文件的速度差异 print(‘共有线程数:%d个‘ % threads_num) per_thread = len(file_list) / threads_num # 每个线程处理的文本数量 print(per_thread) for i in range(threads_num): if threads_num - i == 1: # 最后一个线程,分担余下的所有工作量 t = threading.Thread(target=loop, args=(i, file_list[i * int(per_thread):])) else: t = threading.Thread(target=loop, args=(i, file_list[i * int(per_thread): i * int(per_thread) + int(per_thread)])) threads.append(t) for i in range(threads_num): threads[i].start() for i in range(threads_num): # 等待所有的线程结束 threads[i].join() print(‘all end at:‘, ctime()) if __name__ == ‘__main__‘: main()
上述代码,主要解决了多线程在进行读写时使用的一些技巧,可以将这段代码引用于写一套文件上,然后将多线程写的文件做以合并。这样可以很好的解决执行速度的问题。
=====================其二=====================
混合使用多进程和多线程的例子。
#!/usr/bin/python import re import commands import time import multiprocessing import threading def download_image(url): print ‘*****the %s rpm begin to download *******‘ % url commands.getoutput(‘wget %s‘ % url) def get_rpm_url_list(url): commands.getoutput(‘wget %s‘ % url) rpm_info_str = open(‘index.html‘).read() regu_mate = ‘(?<=<a href=")(.*?)(?=">)‘ rpm_list = re.findall(regu_mate, rpm_info_str) rpm_url_list = [url + rpm_name for rpm_name in rpm_list] print ‘the count of rpm list is: ‘, len(rpm_url_list) return rpm_url_list
基础方法定义 =↑=
def multi_thread(rpm_url_list): threads = [] # url = ‘https://mirrors.ustc.edu.cn/centos/7/os/x86_64/Packages/‘ # rpm_url_list = get_rpm_url_list(url) for index in range(len(rpm_url_list)): print ‘rpm_url is:‘, rpm_url_list[index] one_thread = threading.Thread(target=download_image, args=(rpm_url_list[index],)) threads.append(one_thread) thread_num = 5 # set threading pool, you have put 4 threads in it while 1: count = min(thread_num, len(threads)) print ‘**********count*********‘, count ###25,25,...6707%25 res = [] for index in range(count): x = threads.pop() res.append(x) for thread_index in res: thread_index.start() for j in res: j.join() if not threads: break
多线程的定义 =↑=
def multi_process(rpm_url_list): # process num at the same time is 4 process = [] rpm_url_group_0 = [] rpm_url_group_1 = [] rpm_url_group_2 = [] rpm_url_group_3 = [] for index in range(len(rpm_url_list)): if index % 4 == 0: rpm_url_group_0.append(rpm_url_list[index]) elif index % 4 == 1: rpm_url_group_1.append(rpm_url_list[index]) elif index % 4 == 2: rpm_url_group_2.append(rpm_url_list[index]) elif index % 4 == 3: rpm_url_group_3.append(rpm_url_list[index]) rpm_url_groups = [rpm_url_group_0, rpm_url_group_1, rpm_url_group_2, rpm_url_group_3] for each_rpm_group in rpm_url_groups: each_process = multiprocessing.Process(target = multi_thread, args = (each_rpm_group,)) process.append(each_process) for one_process in process: one_process.start() for one_process in process: one_process.join() # for each_url in rpm_url_list: # print ‘*****the %s rpm begin to download *******‘ %each_url # # commands.getoutput(‘wget %s‘ %each_url)
多进程调用多线程的定义 =↑=
def main(): url = ‘https://mirrors.ustc.edu.cn/centos/7/os/x86_64/Packages/‘ url_paas = ‘http://mirrors.ustc.edu.cn/centos/7.3.1611/paas/x86_64/openshift-origin/‘ url_paas2 =‘http://mirrors.ustc.edu.cn/fedora/development/26/Server/x86_64/os/Packages/u/‘ start_time = time.time() rpm_list = get_rpm_url_list(url_paas) print multi_process(rpm_list) # print multi_thread(rpm_list) #print multi_process() # print multi_thread(rpm_list) # for index in range(len(rpm_list)): # print ‘rpm_url is:‘, rpm_list[index] end_time = time.time() print ‘the download time is:‘, end_time - start_time print main()
主方法 =↑=
代码来源:https://blog.csdn.net/nfzhlk/article/details/76946281
其中获取cpu核数的方法可以使用
from multiprocessing import cpu_count print(cpu_count())
一般地,想要多线程快速做事情,我们不加锁,加了锁后,容易导致执行的效率跟单线程保持一致了。
这样做不划算,当然要看具体的需求是否需要使用多线程加锁的方式。
原文:https://www.cnblogs.com/wozijisun/p/10371579.html