Python pandas

时间：2018-06-28 23:46:44 阅读：246 评论：0 收藏：0 [点我收藏+]

1.得到指定行的索引值。

In [7]:  
dframe = pd.DataFrame({"A":list("abcde"), "B":list("fghij")}, index=[10,11,12,13,14])
#dframe
"""
Out[7]:
    A   B
10  a   f
11  b   g
12  c   h
13  d   i
14  e   j
"""
# 得到指定行的索引值
dframe.index[2]
#output: 12
#删除指定行
dframe.drop(dframe.index[2])
"""
Out[99]:
    A  B
10  a  f
11  b  g
13  d  i
14  e  j
"""

2.一些作业记录

Question 1:

import pandas as pd
import string
import re
energy = pd.read_excel(‘Energy Indicators.xls‘, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[‘...‘])
energy1 = energy.drop([0])

for col in energy1.columns:
    if col[:7] == ‘Unnamed‘:
        energy1.rename(columns={col:‘Country‘}, inplace=True)
    if col[-6:] == ‘capita‘:
        energy1.rename(columns={col:col[:-6] + ‘Capita‘}, inplace=True)
    if col[-10:] == ‘Production‘:
        energy1.rename(columns={col:‘% ‘ + col[:9]}, inplace=True)

#nergy1.reset_index()gy1.set_index(‘Country‘)
energy1 = energy1.set_index(‘Country‘)
#nergy1
#GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)
#GDP 

for row in energy1.index:
    if row[:17] == "Republic of Korea":
        energy1.rename(index = {row : "South Korea"}, inplace=True)
    if row[:24] == "United States of America":
        energy1.rename(index = {row : "United States"}, inplace=True)
    if row[:14] == "United Kingdom":
        energy1.rename(index = {row : "United Kingdom"}, inplace=True)
    if row[:16] ==  "China, Hong Kong":
        energy1.rename(index = {row : "Hong Kong"}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : row.rstrip()}, inplace=True)

#====read_csv============
GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)
"""
"Korea, Rep.": "South Korea", 
"Iran, Islamic Rep.": "Iran",
"Hong Kong SAR, China": "Hong Kong"
"""
for col in GDP.columns:
    if col == ‘Country Name‘:
        GDP.rename(columns = {col : "Country"},inplace=True)
GDP = GDP.set_index(‘Country‘)

for row in GDP.index:
    if row[:11]== "Korea, Rep." :         
        GDP.rename(index = {row : "South Korea"}, inplace=True)
    if row[:18]=="Iran, Islamic Rep.":
        GDP.rename(index = {row : "Iran"}, inplace=True)
    if row[:9] == ‘Hong kong‘:
        GDP.rename(index = {row : "Hong Kong"}, inplace=True)
#===========read_excel======
ScimEn = pd.read_excel(‘scimagojr-3.xlsx‘)
ScimEn = ScimEn.set_index(‘Country‘)
#===========merge==========
df_merged = pd.merge(energy1, GDP, how=‘inner‘, left_index=True, right_index =True)
df_merged = pd.merge(ScimEn, df_merged, how = ‘inner‘, left_index=True, right_index=True)
df_merged = df_merged.sort([‘Rank‘], ascending=True)

df_merged = df_merged[df_merged[‘Rank‘] <= 15]

Question2：

def answer_two():
    energy = pd.read_excel(‘Energy Indicators.xls‘, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[‘...‘])
    energy1 = energy.drop([0])

    for col in energy1.columns:
        if col[:7] == ‘Unnamed‘:
            energy1.rename(columns={col:‘Country‘}, inplace=True)
        if col[-6:] == ‘capita‘:
            energy1.rename(columns={col:col[:-6] + ‘Capita‘}, inplace=True)
        if col[-10:] == ‘Production‘:
            energy1.rename(columns={col:‘% ‘ + col[:9]}, inplace=True)

#nergy1.reset_index()gy1.set_index(‘Country‘)
    energy1 = energy1.set_index(‘Country‘)
#nergy1
#GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)
#GDP 
    for row in energy1.index:
        if row[:17] == "Republic of Korea":
            energy1.rename(index = {row : "South Korea"}, inplace=True)
        if row[:24] == "United States of America":
            energy1.rename(index = {row : "United States"}, inplace=True)
        if row[:14] == "United Kingdom":
            energy1.rename(index = {row : "United Kingdom"}, inplace=True)
        if row[:16] ==  "China, Hong Kong":
            energy1.rename(index = {row : "Hong Kong"}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : row.rstrip()}, inplace=True)

#====read_csv============
    GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)

    for col in GDP.columns:
        if col == ‘Country Name‘:
            GDP.rename(columns = {col : "Country"},inplace=True)
    GDP = GDP.set_index(‘Country‘)

    for row in GDP.index:
        if row[:11]== "Korea, Rep." :         
            GDP.rename(index = {row : "South Korea"}, inplace=True)
        if row[:18]=="Iran, Islamic Rep.":
            GDP.rename(index = {row : "Iran"}, inplace=True)
        if row[:9] == ‘Hong kong‘:
            GDP.rename(index = {row : "Hong Kong"}, inplace=True)
#===========read_excel======
    ScimEn = pd.read_excel(‘scimagojr-3.xlsx‘)
    ScimEn = ScimEn.set_index(‘Country‘)
#===========merge==========
    df_merged = pd.merge(energy1, GDP, how=‘outer‘, left_index=True, right_index =True)
    df_merged = pd.merge(ScimEn, df_merged, how = ‘outer‘, left_index=True, right_index=True)
    
    return len(df_merged.index.unique()) -15

View Code

Question 3:

def answer_three():
    Top15 = answer_one()
    #print(Top15.columns)
    cols = [‘Rank‘, ‘Documents‘, ‘Citable documents‘, ‘Citations‘, ‘Self-citations‘,
       ‘Citations per document‘, ‘H index‘, ‘Energy Supply‘,
       ‘Energy Supply per Capita‘, ‘% Renewable‘, ‘Country Code‘,
       ‘Indicator Name‘, ‘Indicator Code‘]
    Top15_new = Top15.drop(cols, axis =1)
    #print(Top15_new.columns)
    Top15_new[‘avgGDP‘] = Top15_new.mean(axis=1)
    Top15_new = Top15_new.sort([‘avgGDP‘], ascending=False)
    #Top15_new = Top15_new[‘avgGDP‘]
    result = pd.Series(Top15_new[‘avgGDP‘])
    return result

Question 4:

def answer_four():
    Top15 = answer_one()
    result = (Top15.loc[[‘United Kingdom‘]][‘2015‘] -Top15.loc[[‘United Kingdom‘]][‘2006‘])/(Top15.loc[[‘United Kingdom‘]][‘2006‘])
    return result

Question 5:

def answer_five():
    Top15 = answer_one()
    mean = Top15[‘Energy Supply per Capita‘].mean()
    
    return mean
print(answer_five())

Question 6:

def answer_six():
    """
    What country has the maximum % Renewable and what is the     percentage?

This function should return a tuple with the name of the country and the percentage
    """
    Top15 = answer_one()
    Top15 = Top15.reset_index()
    Top = Top15.loc[:, [‘Country‘, ‘% Renewable‘]]
    Top = Top.set_index(‘Country‘) 
    return (Top.idxmax(), Top.max())

answer_six()

View Code

Question 7:

def answer_seven():
    Top15 = answer_one()
    Top = Top15[‘Self-citations‘]
    Top2 = Top15[‘Citations‘]
    Top15[‘ratio‘] = Top.div(Top2)
    Top15.reset_index()
    Top_new = Top15.loc[:,[‘Country‘, ‘ratio‘]]
    Top_new.set_index(‘Country‘)
    return (Top_new.idxmax(), Top_new.max())
answer_seven()

Question 8:

def answer_eight():
    Top15 = answer_one()
    Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])
    Top15 = Top15.sort([‘populations‘], ascending=False)
    Top15 = Top15.loc[:, [‘populations‘]]
    print(Top15)
    return Top15.index[2]
answer_eight()

Question 9: 出现错误： AttributeError: ‘float‘ object has no attribute ‘sqrt‘

#出现错误
"""
AttributeError: ‘float‘ object has no attribute ‘sqrt‘
"""

def answer_nine():
    Top15 = answer_one()
    Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])
    Top15[‘Docs per Capita‘] = Top15[‘Documents‘].div(Top15[‘populations‘])
    Top = Top15.loc[:, [‘Docs per Capita‘, ‘Energy Supply per Capita‘]]
    print(Top)
    corr = Top15[‘Docs per Capita‘].corr(Top15[‘Energy Supply per Capita‘])
    return corr
print(answer_nine())

修改后：

def answer_nine():
    Top15 = answer_one()
    Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])
    Top15[‘Docs per Capita‘] = Top15[‘Documents‘].div(Top15[‘populations‘])
    Top = Top15.loc[:, [‘Docs per Capita‘, ‘Energy Supply per Capita‘]]
    print(Top)
#修改后的方法    
    correlation=Top15[‘Docs per Capita‘].astype(‘float64‘).corr(Top15[‘Energy Supply per Capita‘].astype(‘float64‘))
    return correlation
print(answer_nine())

Question 10:

def answer_ten():
    Top15 = answer_one()
    Top15 = Top15.sort([‘% Renewable‘], ascending = False)
    print(Top15)
    median = Top15[‘% Renewable‘][7]
    Top15[‘HighRenew‘] = Top15[‘% Renewable‘]
    Top15[‘HighRenew‘] = Top15[‘HighRenew‘].apply(lambda x : 1 if x >= median else 0)
    Top15 = Top15.sort([‘Rank‘], ascending=True) 
    print(Top15)
    return Top15[‘HighRenew‘]
answer_ten()

Python pandas

原文：https://www.cnblogs.com/Shinered/p/9239476.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)