注意:要正常使用本文中提到的各种方法,需要安装numpy、pandas、openpyxl、xlrd、xlwt库
场景:现有工作簿test.xlsx中有Sheet1、Sheet2、Sheet3,打算创建一张空白工作表Sheet4、复制工作表Sheet1、删除工作表Sheet2、最后将工作表Sheet3在工作簿中的顺序向左移1位
import numpy as np
import pandas as pd
from openpyxl import load_workbook
with pd.ExcelWriter('test.xlsx') as writer:
book = load_workbook(writer.path) # 获得workbook对象
sheets = {ws.title:ws for ws in book.worksheets} # 获得一个字典,形如{'Sheet1':<Worksheet "Sheet1">,'Sheet2':<Worksheet "Sheet2">},其中每个键都是str格式的工作表名,对应的值为Worksheet对象
writer.book = book
writer.sheets = sheets
book.create_sheet('Sheet4')
book.copy_worksheet(sheets['Sheet1'])
book.remove(sheets['Sheet2'])
book.move_sheet(sheets['Sheet3'], offset=-1) # 值为正数时向右移,值为负数时向左移。若offset的值超过了该工作表可移动的最大偏移量,则该工作表会停留在最右(或左)侧,不会报错
pd.read_csv()语法为:
pd.read_csv('文件路径', sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal: str = '.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, error_bad_lines=True, warn_bad_lines=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)
pd.read_excel()语法为:
pd.read_excel('文件路径',sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, keep_default_na=True, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skip_footer=0, skipfooter=0, convert_float=True, mangle_dupe_cols=True)
部分重要参数:
sheet_name:int或str,表示读取excel文件中的哪张工作表,默认值为0(读取第一张),值为int类型时表示读取第n+1张工作表,值为str类型时表示读取该名称的工作表(如sheet_name=‘Sheet2‘)
index_col:指定哪列作为行标签索引(输入int类型则按列位置索引找,输入str类型则按列标签索引找)。若不指定,则Pandas会自动为返回的DataFrame增加行位置索引:0、1、2……
注意:索引号是从0开始的,如果想指定第一列为行标签索引应输入index_col=0
parse_dates:是否对列标签索引进行日期解析,默认值为False,此时列标签索引的数据类型为str;若parse_dates=True,则会将列标签索引解析为pandas._libs.tslibs.timestamps.Timestamp数据类型
注意:仅对列标签索引进行日期解析,不对除它以外的数据字段进行日期解析。因此即使数据的某些字段也是类似于日期类型的,也不会对这些字段进行日期解析,仍保留其str格式
sep:分隔符(str类型),默认分隔符是半角逗号‘,‘
,可以通过此参数手动指定其他分隔符
header:int类型,指定csv文件的第几行作为columns,默认值‘infer‘
表示自动推断,可以令header=0指定第一行,也可以令header=None将读取到的全部作为表格内容而不要columns。注意:若指定header=N,则DataFrame的内容将从第N+1行开始,即第N-1行之前的内容将被丢弃
names:list类型,自定义的DataFrame.columns。当指定了此参数后,header参数指向的列名将自动失效
usecols:list类型,读取哪些列,list里面的元素可以是int(列位置索引)或者str(列标签索引)
dtype:dict类型,自定义某些列的数据格式,字典的键是int格式的列位置索引或str格式的列标签索引,字典的值是该列的数据类型,用法如:dtype={‘volume‘:int}
、dtype={5:np.int64}
import pandas as pd
import numpy as np
data = pd.read_csv('000001.csv')
print(data)
print(type(data))
执行结果:
date open high close low volume
0 2017/2/27 9.50 9.50 9.43 9.42 407341.12
1 2017/2/28 9.43 9.51 9.48 9.42 369719.69
.. ... ... ... ... ... ...
611 2019/8/23 14.37 14.74 14.65 14.35 1636867.62
612 2019/8/26 14.42 14.50 14.25 14.15 1415122.50
[613 rows x 6 columns]
<class 'pandas.core.frame.DataFrame'>
import pandas as pd
import numpy as np
# 不进行日期解析
data = pd.read_csv('000001.csv', index_col=0)
# data = pd.read_csv('000001.csv', index_col='date') # 这样也行
print(data.index[0])
print(type(data.index[0])); print('===========')
# 进行日期解析
data = pd.read_csv('000001.csv', index_col=0, parse_dates=True)
print(data.index[0])
print(type(data.index[0])); print('===========')
print(data)
print(type(data))
执行结果:
2017/2/27
<class 'str'>
===========
2017-02-27 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
===========
open high close low volume
date
2017-02-27 9.50 9.50 9.43 9.42 407341.12
2017-02-28 9.43 9.51 9.48 9.42 369719.69
... ... ... ... ... ...
2019-08-23 14.37 14.74 14.65 14.35 1636867.62
2019-08-26 14.42 14.50 14.25 14.15 1415122.50
[613 rows x 5 columns]
<class pandas.core.frame.DataFrame'>
注意:此方法会覆盖原有同名工作簿
语法:
df.to_excel('文件路径', sheet_name="Sheet1", na_rep="", float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep="inf", verbose=True, freeze_panes=None)
参数:
columns=[‘c1‘,‘c3‘]
);否则,list中的元素应为int表示的列位置索引(如columns=[0,2]
)代码示例:
# df.to_excel()代码示例
# 注意,此方法会将原有magic_square.xls工作簿覆盖
import numpy as np
import pandas as pd
li = [[2,9,4],[7,5,3],[6,2,8]]
df = pd.DataFrame(li,columns=['c1','c2','c3'],index=['t1','t2','t3'])
df.to_excel('magic_square.xls')
注意:此方法会覆盖原有同名工作簿
# 方法一:基于文件句柄writer(繁琐)
import numpy as np
import pandas as pd
df1 = pd.DataFrame([[1,2,3],[4,5,6]],columns=['c1','c2','c3'],index=['t1','t2'])
df2 = pd.DataFrame([['a','b','c'],['d','e','f']],columns=['c1','c2','c3'],index=['t1','t2'])
writer = pd.ExcelWriter('test.xlsx')
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
writer.save()
writer.close()
# 方法二:基于with上下文管理(推荐,简便)
import numpy as np
import pandas as pd
df1 = pd.DataFrame([[1,2,3],[4,5,6]],columns=['c1','c2','c3'],index=['t1','t2'])
df2 = pd.DataFrame([['a','b','c'],['d','e','f']],columns=['c1','c2','c3'],index=['t1','t2'])
with pd.ExcelWriter('test.xlsx') as writer:
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
场景:现有工作簿中有Sheet1和Sheet2,打算将df3写至新建的Sheet3,将df4写至新建的Sheet4
注意:若原工作簿已存在Sheet3和Sheet4,则它们不会被覆盖,而是自动将df3写至Sheet31,将df4写至Sheet41
import numpy as np
import pandas as pd
from openpyxl import load_workbook
df3 = pd.DataFrame([[1,2,3],[4,5,6]],columns=['c1','c2','c3'],index=['t1','t2'])
df4 = pd.DataFrame([['a','b','c'],['d','e','f']],columns=['c1','c2','c3'],index=['t1','t2'])
with pd.ExcelWriter('test.xlsx') as writer:
book = load_workbook(writer.path)
writer.book = book
df3.to_excel(writer, sheet_name='Sheet3')
df4.to_excel(writer, sheet_name='Sheet4')
场景:现有工作簿中有Sheet1和Sheet2,打算将df1写至Sheet1,将df2写至Sheet2
注意:若startrow、startcol指向的是现有工作表外部区域,将呈现“追加”的效果;反之,将呈现“替换”的效果
import numpy as np
import pandas as pd
from openpyxl import load_workbook
df1 = pd.DataFrame([[1,2,3],[4,5,6]],columns=['c1','c2','c3'],index=['t1','t2'])
df2 = pd.DataFrame([['a','b','c'],['d','e','f']],columns=['c1','c2','c3'],index=['t1','t2'])
with pd.ExcelWriter('test.xlsx') as writer:
book = load_workbook(writer.path)
sheets = {ws.title:ws for ws in book.worksheets}
writer.book = book
writer.sheets = sheets
# 自动将df1写到Sheet1已使用区域右下的区域
max_row = book.get_sheet_by_name('Sheet1').max_row # 获取Sheet1已使用的行数
max_column = book.get_sheet_by_name('Sheet1').max_column # 获取Sheet1已使用的列数
df1.to_excel(writer, sheet_name="Sheet1", startrow=max_row, startcol=max_column)
# 手动将df2写到Sheet2第4行、第5列开始的区域(注意对应的参数值减1)
df2.to_excel(writer, sheet_name="Sheet2", startrow=3, startcol=4)
原文:https://www.cnblogs.com/oddgod/p/12310220.html