关于excel,大部分操作应该主要用于数据分析,其中,最主流的方式应该是采用pandas库,这里主要介绍几种方式来读写excel
处理一般的excel,尤其没有合并单元格之类的情况时,我们可以采用普通的方法
这里采用xlrd库
无单元格合并的情况下
import xlrd
def parse_list(data):
"""
parse a list
:param data: list
:return:
"""
data_list = []
for i in range(1, len(data)):
data_dict = {}
for index, key in enumerate(data[0]):
data_dict[key] = data[i][index]
data_list.append(data_dict)
return data_list
def read_excel(filename, sheet=None):
"""
read excel and parse every line
:param name: file of absolute path
:return: some dict which is about everyline of list
"""
workbook = xlrd.open_workbook(filename)
total_data = []
if sheet:
sh = workbook.sheet_by_name(sheet)
for row in range(0, sh.nrows):
row_list = sh.row_values(row)
total_data.append(row_list)
return parse_list(total_data)
上述方法主要针对特定的sheet,当然,根据上述方法也也可进行改变,读取所有的sheet
关键方法在于read_excel,所以我们来改写该方法
import xlrd
def read_excel(filename):
"""
read excel and parse every line
:param name: file of absolute path
:return: some dict which is about everyline of list
"""
workbook = xlrd.open_workbook(filename)
total_data = []
name_sheets = workbook.sheet_names()
for sheet in name_sheets:
sh = workbook.sheet_by_name(sheet)
for row in range(0, sh.nrows):
row_list = sh.row_values(row)
total_data.append(row_list)
return parse_list(total_data)
主要用于处理合并存在合并单元格的情况
import xlrd
def get_excel():
data_list = []
with xlrd.open_workbook(r‘D:\0325.xlsx‘) as workbook:
name_sheets = workbook.sheet_names() # 获取Excel的sheet表列表,存储是sheet表名
for index in name_sheets: # for 循环读取每一个sheet表的内容
apply_dic = []
sheet_info = workbook.sheet_by_name(index) # 根据表名获取表中的所有内容,sheet_info也是列表,列表中的值是每个单元格里值
first_line = sheet_info.row_values(0) # 获取首行,我这里的首行是表头,我打算用表头作为字典的key,每一行数据对应表头的value,每一行组成一个字典
values_merge_cell = merge_cell(sheet_info) # 这里是调用处理合并单元格的函数
for i in range(1, sheet_info.nrows): # 开始为组成字典准备数据
other_line = sheet_info.row_values(i)
for key in values_merge_cell.keys():
if key[0] == i:
other_line[key[1]] = values_merge_cell[key]
# print(other_line)
dic = list_dic(first_line, other_line) # 调用组合字典的函数,传入key和value,字典生成
apply_dic.append(dic)
data_list.append({‘index‘: index, ‘data‘: apply_dic})
return data_list
def list_dic(list1, list2):
‘‘‘
two lists merge a dict,a list as key,other list as value
:param list1:key
:param list2:value
:return:dict
‘‘‘
dic = dict(map(lambda x, y: [x, y], list1, list2))
return dic
def merge_cell(sheet_info):
‘‘‘
#handle Merge transverse cells and handle Merge Vertical Cells, assign empty cells,
:param rlow:row, include row exclusive of row_range
:param rhigh:row_range
:param clow:col, include col exclusive of col_range
:param chigh:col_range
:param sheet_info:object of sheet
:return:dic contain all of empty cells value
‘‘‘
merge = {}
merge_cells = sheet_info.merged_cells
for (rlow, rhigh, clow, chigh) in merge_cells:
value_mg_cell = sheet_info.cell_value(rlow, clow)
if rhigh - rlow == 1:
# Merge transverse cells
for n in range(chigh - clow - 1):
merge[(rlow, clow + n + 1)] = value_mg_cell
elif chigh - clow == 1:
# Merge Vertical Cells
for n in range(rhigh - rlow - 1):
merge[(rlow + n + 1, clow)] = value_mg_cell
return merge
if __name__ == ‘__main__‘:
get_excel()
生成xlsx,使用pandas
import pandas as pd
def write_excel(data, filename, keys=‘‘):
data_df = pd.DataFrame(data)
if keys != ‘‘:
data_df.columns = list(keys)
writer = pd.ExcelWriter(‘{}.xlsx‘.format(filename))
data_df.to_excel(writer, index=None)
writer.save()
生成csv
import csv
def write_csv(name, data_list):
fieldnames = data_list[0]
print(fieldnames)
with open(name + ‘.csv‘, mode=‘w‘, newline=‘‘, encoding=‘utf-8-sig‘) as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for data in data_list:
writer.writerow(data)
当然,可以通过json去生成csv
import json
def json_to_csv(name):
with open(name) as f:
a = json.loads(f.read())
write_csv(name, a)
这里依旧可以使用pandas
def data_to_csv(data_list, csv_key, name):
"""
save data to csv
:param data_list: data
:param csv_key: csv column
:param name: file name
:return:
"""
final_data = []
for data in data_list:
sign_key = []
for key, value in data.items():
sign_key.append(value)
final_data.append(sign_key)
# 将总数据转化为data frame再输出
df = pd.DataFrame(data=final_data,
columns=csv_key)
df.to_csv(name + ‘.csv‘, index=False, encoding=‘utf-8_sig‘)
原文:https://www.cnblogs.com/ZSMblog/p/13154984.html