from pandas import Series
import numpy as np
data = Series(np.random.randn(10),
index=[[‘a‘,‘a‘,‘a‘,‘b‘,‘b‘,‘b‘,‘c‘,‘c‘,‘d‘,‘d‘],
[1,2,3,1,2,3,1,2,2,3]])
print(data)
‘‘‘
a 1 0.050239
2 1.886958
3 -1.366131
b 1 1.678755
2 0.029100
3 -1.121555
c 1 1.732161
2 0.401984
d 2 1.368133
3 -0.631580
dtype: float64
‘‘‘
print(data.index)
‘‘‘
MultiIndex([(‘a‘, 1),
(‘a‘, 2),
(‘a‘, 3),
(‘b‘, 1),
(‘b‘, 2),
(‘b‘, 3),
(‘c‘, 1),
(‘c‘, 2),
(‘d‘, 2),
(‘d‘, 3)],
)
‘‘‘
print(data[‘b‘])
‘‘‘
1 1.678755
2 0.029100
3 -1.121555
dtype: float64
‘‘‘
print(data[‘b‘:‘c‘])
‘‘‘
b 1 1.678755
2 0.029100
3 -1.121555
c 1 1.732161
2 0.401984
dtype: float64
‘‘‘
print(data.loc[[‘b‘,‘d‘]])
‘‘‘
b 1 1.678755
2 0.029100
3 -1.121555
d 2 1.368133
3 -0.631580
dtype: float64
‘‘‘
print(data[:,2])
‘‘‘
a 1.886958
b 0.029100
c 0.401984
d 1.368133
dtype: float64
‘‘‘
print(data.unstack())
‘‘‘
1 2 3
a 0.050239 1.886958 -1.366131
b 1.678755 0.029100 -1.121555
c 1.732161 0.401984 NaN
d NaN 1.368133 -0.631580
‘‘‘
print(data.unstack().stack())
‘‘‘
a 1 0.050239
2 1.886958
3 -1.366131
b 1 1.678755
2 0.029100
3 -1.121555
c 1 1.732161
2 0.401984
d 2 1.368133
3 -0.631580
dtype: float64
‘‘‘
from pandas import Series, DataFrame, MultiIndex
import pandas as pd
import numpy as np
frame = DataFrame(np.arange(12).reshape((4,3)),
index=[[‘a‘,‘a‘,‘b‘,‘b‘],[1,2,1,2]],
columns=[[‘Ohio‘,‘Ohio‘,‘Colorado‘],
[‘Green‘,‘Red‘,‘Green‘]])
print(frame)
‘‘‘
Ohio Colorado
Green Red Green
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11
‘‘‘
frame.index.names = [‘key1‘,‘key2‘]
frame.columns.names = [‘state‘,‘color‘]
print(frame)
‘‘‘
state Ohio Colorado
color Green Red Green
key1 key2
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11
‘‘‘
print(frame[‘Ohio‘])
‘‘‘
color Green Red
key1 key2
a 1 0 1
2 3 4
b 1 6 7
2 9 10
‘‘‘
mIndex = MultiIndex.from_arrays([[‘Ohio‘,‘Ohio‘,‘Colorado‘],[‘Green‘,‘Red‘,‘Green‘]],
names=[‘state‘,‘color‘])
frame2 = DataFrame(np.arange(12).reshape((4,3)),
columns=mIndex)
print(frame2)
‘‘‘
state Ohio Colorado
color Green Red Green
0 0 1 2
1 3 4 5
2 6 7 8
3 9 10 11
‘‘‘
# 重排分级顺序
frame3 = frame.swaplevel(‘key1‘,‘key2‘)
print(frame3)
‘‘‘
state Ohio Colorado
color Green Red Green
key2 key1
1 a 0 1 2
2 a 3 4 5
1 b 6 7 8
2 b 9 10 11
‘‘‘
frame4 = frame.swaplevel(0,1).sort_index(0)
print(frame4)
‘‘‘
state Ohio Colorado
color Green Red Green
key2 key1
1 a 0 1 2
b 6 7 8
2 a 3 4 5
b 9 10 11
‘‘‘
# 根据级别汇总统计
frame5 = frame.sum(level=‘key2‘)
print(frame5)
‘‘‘
state Ohio Colorado
color Green Red Green
key2
1 6 8 10
2 12 14 16
‘‘‘
frame6 = frame.sum(level=‘color‘,axis=1)
print(frame6)
‘‘‘
color Green Red
key1 key2
a 1 2 1
2 8 4
b 1 14 7
2 20 10
‘‘‘
from pandas import DataFrame
frame = DataFrame({‘a‘:range(7),
‘b‘:range(7,0,-1),
‘c‘:[‘one‘,‘one‘,‘one‘,‘two‘,‘two‘,‘two‘,‘two‘],
‘d‘:[0,1,2,0,1,2,3]})
print(frame)
‘‘‘
a b c d
0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
3 3 4 two 0
4 4 3 two 1
5 5 2 two 2
6 6 1 two 3
‘‘‘
# set_index 会将其一个或多个列转换为行索引,并创建一个新的DataFrame
frame2 = frame.set_index([‘c‘,‘d‘])
print(frame2)
‘‘‘
a b
c d
one 0 0 7
1 1 6
2 2 5
two 0 3 4
1 4 3
2 5 2
3 6 1
‘‘‘
# 默认情况下那些列会从DataFrame中移除,也可将其保留下来
frame3 = frame.set_index([‘c‘,‘d‘],drop=False)
print(frame3)
‘‘‘
a b c d
c d
one 0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
two 0 3 4 two 0
1 4 3 two 1
2 5 2 two 2
3 6 1 two 3
‘‘‘
# reset_index与set_index相反,层次化索引会转移到列里面
frame4 = frame2.reset_index()
print(frame4)
‘‘‘
c d a b
0 one 0 0 7
1 one 1 1 6
2 one 2 2 5
3 two 0 3 4
4 two 1 4 3
5 two 2 5 2
6 two 3 6 1
‘‘‘
原文:https://www.cnblogs.com/nicole-zhang/p/12955106.html