本文主要描述了普通文件的读写,json和CSV文件的读写解析,初步介绍了进程与线程的用法
seek(offset [,from])方法改变当前文件的位置。Offset变量表示要移动的字节数。From变量指定开始移动字节的参考位置。
如f.seek(0,0)返回文件起始位置,f.seek(0,2)表示定位至文件末尾
f.read(num)表示读取几个字符
注意:写入和读之后,默认seek为末尾
def main():
f = None
try:
f = open(‘致橡树.txt‘, ‘r‘, encoding=‘utf-8‘)
print(f.read())
except FileNotFoundError:
print(‘无法打开指定的文件!‘)
except LookupError:
print(‘指定了未知的编码!‘)
except UnicodeDecodeError:
print(‘读取文件时解码错误!‘)
finally:
if f:
f.close()
if __name__ == ‘__main__‘:
main()
import json
def main():
mydict = {
‘name‘: ‘骆昊‘,
‘age‘: 38,
‘qq‘: 957658,
‘friends‘: [‘王大锤‘, ‘白元芳‘],
‘cars‘: [
{‘brand‘: ‘BYD‘, ‘max_speed‘: 180},
{‘brand‘: ‘Audi‘, ‘max_speed‘: 280},
{‘brand‘: ‘Benz‘, ‘max_speed‘: 320}
]
}
try:
with open(‘data.json‘, ‘w‘, encoding=‘utf-8‘) as fs:
json.dump(mydict, fs)
except IOError as e:
print(e)
print(‘保存数据完成!‘)
if __name__ == ‘__main__‘:
main()
辨析常用json四种方式
json.load(data,open(filename))
json.dump(data,open(filename,‘w‘))
summary:先打开文件 -> 创建写writer(f) or 读reader(f)对象,并对对象进行写入或读取操作
import csv
with open(r‘filename‘, ‘w+‘, newline=‘‘,encoding=‘ANSI‘) as f:
data = [[‘aa7‘, ‘测试7‘], [‘aa8‘, ‘测试8‘], [‘aa9‘, ‘测试9‘]]
# writer = csv.writer(f)
# writer.writerows(data)
field_data = []
#fieldname -> keys
fieldname = [‘123‘,‘text‘]
#add data as values to fieldname as keys
for i in data:
field_data_demo = dict(zip(fieldname,i))
field_data.append(field_data_demo)
print(field_data)
#add header use DictWriter
writer = csv.DictWriter(f,fieldnames=fieldname)
writer.writeheader()
#write dict
writer.writerows(field_data)
#read csv
f.seek(0, 0)
cc = csv.reader(f)
for i in cc:
print(i)
windows中无fork函数,使用multiprocessing模块下的Process方法调用函数,并且利用start和join方法使用
注意:python中内含函数名加括号表示返回一个函数的结果,不加括号表示直接对函数调用
from multiprocessing import Process
import os
def run_proc(name):
print(‘run process is %s (%s)‘%(name,os.getpid()))
if __main__ = ‘__name__‘:
print(‘parents pid is %s‘%os.getpid())
p = Process(target=run_proc,args(‘test‘,))
print(‘child process start‘)
p.start()
p.join()
print(‘child process end‘)
from multiprocessing import Process
import os, time, random
def long_time_task(name):
print(‘run task %s (%s)‘%(name,os.getpid()))
start = time.time()
time.sleep(random.random()*3)
end = time.time()
print(‘task %s run %0.2f seconds‘%(name, (end-start)))
if __main__ = ‘__name__‘:
print(‘parent process is %s‘%os.getpid())
p = Pool()
#add all process to Pool (p.apply_async)
for i in range(5):
p.apply_async(long_time_task, args=(i,))
print(‘wait for all subprocess done‘)
#must close Pool to forbid new processes add
p.close()
p.join()
print(‘all subprocss done‘)
import subprocess
print(‘$ nslookup‘)
p = subprocess.Popen([‘nslookup‘], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate(b‘set q=mx\npython.org\nexit\n‘)
print(output.decode(‘gbk‘))
print(‘Exit code:‘, p.returncode)
from multiprocessing import Process,Queue
import os, time, random
def write(q):
print(‘Process to wirte:%s‘%os.getpid())
for value in [‘A‘,‘B‘,‘C‘]:
print(‘put %s to queue..‘%value)
q.put(value)
time.sleep(random.random())
def read(q):
print(‘Process to read:%s‘%os.getpid())
while True:
value = q.get()
print(‘Get %s from queue‘%value)
if __main__ = ‘__name__‘:
q = Queue()
pw = Process(target=write, args=(q,))
pr = Process(target=read, agrs=(q,))
pw.start()
pr.start()
pw.join()
pr.termainate()
import threading
import time
def T1_job():
print(‘T1 start‘)
time.sleep(0.5)
ptint(‘T1 end‘)
def T2_job():
print(‘T2 start‘)
time.sleep(0.1)
print(‘T2 end‘)
def text():
thread1 = threading.Thread(target=T1_job,name=‘T1‘)
thread2 = threading.Thread(target=T1_job,name=‘T1‘)
thread1.start() #->0.5s
thread2.start() #->0.1s
print(threading.active_count())
print(threading.enumerate())
‘‘‘
# T1 start ----------0.5s-----------.join()->all done
# T2 start --0.1s----|.join()-------
thread2.join()
thread1.join()
‘‘‘
‘‘‘
# T1 start ----------0.5s-----------.join()->all done
# T2 start --0.1s----| .join()->all done
thread1.join()
thread2.join()
‘‘‘
print(‘all done\n‘)
注:pycharm会自动处理共享变量,必须使其循环大于某一限制
balance = 0
lock = threading.Lock()
def change_it(n):
# 先存后取,结果应该为0:
global balance
balance = balance + n
balance = balance - n
# def run_thread(n):
# for i in range(800000):
# change_it(n)
def run_thread(n):
for i in range(800000):
lock.acquire()
try:
change_it(n)
finally:
lock.release()
t1 = threading.Thread(target=run_thread, args=(5,))
t2 = threading.Thread(target=run_thread, args=(8,))
t1.start()
t2.start()
t1.join()
t2.join()
print(balance)
import threading
from queue import Queue
def job(l,q):
for i in range(len(l)):
l[i] = l[i]**2
q.put(l)
def multithread():
#使用Queue进行堆栈处理
q = Queue()
data = [[1,2,3],[44,33]]
for i in range(len(data)):
t = threading.Thread(target=job,args=(data[i],q))
t.start()
threads.append(t)
#线程等待处理
for thread in threads:
thread.join()
result = []
#出栈处理
for each in range(q.qsize()):
result.append(q.get())
print(result)
import threading
import requests
from bs4 import BeautifulSoup
#establish cooperative relationship
spider = threading.local()
def getText():
url = ‘http://www.baidu.com‘
r = requests.get(url)
try:
r.rasie_for_status()
r.encoding = r.apparent_encoding
#pass the parse content to ob-spider
spider.text = r.text
parse()
except:
spider.text = ‘error‘
def parse():
print(threading.current_thread())
html = sipder.text
try:
soup.BeautifulSoup(html, ‘html.parser‘)
#html->print-format
print(soup.prettify())
except:
print(‘parse error‘)
if __main__ = ‘__name__‘:
thread1 = threading.Thread(target=getText,name=‘begin parse‘)
thread1.start()
thread1.join()
多进程:process:稳定性高,一个子进程崩溃不会影响其他子进程:进程只能通过queue进行进程间通信,而不能共享内存,并且创建进程的代价较大
线程:threading:连携性,一崩全崩,共享进程的内存:互锁与local
原文:https://www.cnblogs.com/WheelCode/p/12835424.html