首页 > 编程语言 > 详细

Python pandas

时间:2018-06-28 23:46:44      阅读:246      评论:0      收藏:0      [点我收藏+]

1.得到指定行的索引值。

In [7]:  
dframe = pd.DataFrame({"A":list("abcde"), "B":list("fghij")}, index=[10,11,12,13,14])
#dframe
"""
Out[7]:
    A   B
10  a   f
11  b   g
12  c   h
13  d   i
14  e   j
"""
# 得到指定行的索引值
dframe.index[2]
#output: 12
#删除指定行
dframe.drop(dframe.index[2])
"""
Out[99]:
    A  B
10  a  f
11  b  g
13  d  i
14  e  j
"""

2.一些作业记录

Question 1:

import pandas as pd
import string
import re
energy = pd.read_excel(Energy Indicators.xls, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[...])
energy1 = energy.drop([0])

for col in energy1.columns:
    if col[:7] == Unnamed:
        energy1.rename(columns={col:Country}, inplace=True)
    if col[-6:] == capita:
        energy1.rename(columns={col:col[:-6] + Capita}, inplace=True)
    if col[-10:] == Production:
        energy1.rename(columns={col:%  + col[:9]}, inplace=True)

#nergy1.reset_index()gy1.set_index(‘Country‘)
energy1 = energy1.set_index(Country)
#nergy1
#GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)
#GDP 

for row in energy1.index:
    if row[:17] == "Republic of Korea":
        energy1.rename(index = {row : "South Korea"}, inplace=True)
    if row[:24] == "United States of America":
        energy1.rename(index = {row : "United States"}, inplace=True)
    if row[:14] == "United Kingdom":
        energy1.rename(index = {row : "United Kingdom"}, inplace=True)
    if row[:16] ==  "China, Hong Kong":
        energy1.rename(index = {row : "Hong Kong"}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True)
for row in energy1.index:
    energy1.rename(index = {row : row.rstrip()}, inplace=True)

#====read_csv============
GDP = pd.read_csv(world_bank.csv, skiprows=4)
"""
"Korea, Rep.": "South Korea", 
"Iran, Islamic Rep.": "Iran",
"Hong Kong SAR, China": "Hong Kong"
"""
for col in GDP.columns:
    if col == Country Name:
        GDP.rename(columns = {col : "Country"},inplace=True)
GDP = GDP.set_index(Country)

for row in GDP.index:
    if row[:11]== "Korea, Rep." :         
        GDP.rename(index = {row : "South Korea"}, inplace=True)
    if row[:18]=="Iran, Islamic Rep.":
        GDP.rename(index = {row : "Iran"}, inplace=True)
    if row[:9] == Hong kong:
        GDP.rename(index = {row : "Hong Kong"}, inplace=True)
#===========read_excel======
ScimEn = pd.read_excel(scimagojr-3.xlsx)
ScimEn = ScimEn.set_index(Country)
#===========merge==========
df_merged = pd.merge(energy1, GDP, how=inner, left_index=True, right_index =True)
df_merged = pd.merge(ScimEn, df_merged, how = inner, left_index=True, right_index=True)
df_merged = df_merged.sort([Rank], ascending=True)

df_merged = df_merged[df_merged[Rank] <= 15]

 Question2:

技术分享图片
def answer_two():
    energy = pd.read_excel(Energy Indicators.xls, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[...])
    energy1 = energy.drop([0])

    for col in energy1.columns:
        if col[:7] == Unnamed:
            energy1.rename(columns={col:Country}, inplace=True)
        if col[-6:] == capita:
            energy1.rename(columns={col:col[:-6] + Capita}, inplace=True)
        if col[-10:] == Production:
            energy1.rename(columns={col:%  + col[:9]}, inplace=True)

#nergy1.reset_index()gy1.set_index(‘Country‘)
    energy1 = energy1.set_index(Country)
#nergy1
#GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4)
#GDP 
    for row in energy1.index:
        if row[:17] == "Republic of Korea":
            energy1.rename(index = {row : "South Korea"}, inplace=True)
        if row[:24] == "United States of America":
            energy1.rename(index = {row : "United States"}, inplace=True)
        if row[:14] == "United Kingdom":
            energy1.rename(index = {row : "United Kingdom"}, inplace=True)
        if row[:16] ==  "China, Hong Kong":
            energy1.rename(index = {row : "Hong Kong"}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True)
    for row in energy1.index:
        energy1.rename(index = {row : row.rstrip()}, inplace=True)

#====read_csv============
    GDP = pd.read_csv(world_bank.csv, skiprows=4)

    for col in GDP.columns:
        if col == Country Name:
            GDP.rename(columns = {col : "Country"},inplace=True)
    GDP = GDP.set_index(Country)

    for row in GDP.index:
        if row[:11]== "Korea, Rep." :         
            GDP.rename(index = {row : "South Korea"}, inplace=True)
        if row[:18]=="Iran, Islamic Rep.":
            GDP.rename(index = {row : "Iran"}, inplace=True)
        if row[:9] == Hong kong:
            GDP.rename(index = {row : "Hong Kong"}, inplace=True)
#===========read_excel======
    ScimEn = pd.read_excel(scimagojr-3.xlsx)
    ScimEn = ScimEn.set_index(Country)
#===========merge==========
    df_merged = pd.merge(energy1, GDP, how=outer, left_index=True, right_index =True)
    df_merged = pd.merge(ScimEn, df_merged, how = outer, left_index=True, right_index=True)
    
    return len(df_merged.index.unique()) -15
View Code

Question 3:

def answer_three():
    Top15 = answer_one()
    #print(Top15.columns)
    cols = [Rank, Documents, Citable documents, Citations, Self-citations,
       Citations per document, H index, Energy Supply,
       Energy Supply per Capita, % Renewable, Country Code,
       Indicator Name, Indicator Code]
    Top15_new = Top15.drop(cols, axis =1)
    #print(Top15_new.columns)
    Top15_new[avgGDP] = Top15_new.mean(axis=1)
    Top15_new = Top15_new.sort([avgGDP], ascending=False)
    #Top15_new = Top15_new[‘avgGDP‘]
    result = pd.Series(Top15_new[avgGDP])
    return result

Question 4:

def answer_four():
    Top15 = answer_one()
    result = (Top15.loc[[United Kingdom]][2015] -Top15.loc[[United Kingdom]][2006])/(Top15.loc[[United Kingdom]][2006])
    return result

Question 5:

def answer_five():
    Top15 = answer_one()
    mean = Top15[Energy Supply per Capita].mean()
    
    return mean
print(answer_five())

Question 6:

技术分享图片
def answer_six():
    """
    What country has the maximum % Renewable and what is the     percentage?

This function should return a tuple with the name of the country and the percentage
    """
    Top15 = answer_one()
    Top15 = Top15.reset_index()
    Top = Top15.loc[:, [Country, % Renewable]]
    Top = Top.set_index(Country) 
    return (Top.idxmax(), Top.max())

answer_six()
View Code

Question 7:

def answer_seven():
    Top15 = answer_one()
    Top = Top15[Self-citations]
    Top2 = Top15[Citations]
    Top15[ratio] = Top.div(Top2)
    Top15.reset_index()
    Top_new = Top15.loc[:,[Country, ratio]]
    Top_new.set_index(Country)
    return (Top_new.idxmax(), Top_new.max())
answer_seven()

Question 8:

def answer_eight():
    Top15 = answer_one()
    Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])
    Top15 = Top15.sort([populations], ascending=False)
    Top15 = Top15.loc[:, [populations]]
    print(Top15)
    return Top15.index[2]
answer_eight()

Question 9: 出现错误: AttributeError: ‘float‘ object has no attribute ‘sqrt‘ 

#出现错误
"""
AttributeError: ‘float‘ object has no attribute ‘sqrt‘
"""

def answer_nine():
    Top15 = answer_one()
    Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])
    Top15[Docs per Capita] = Top15[Documents].div(Top15[populations])
    Top = Top15.loc[:, [Docs per Capita, Energy Supply per Capita]]
    print(Top)
    corr = Top15[Docs per Capita].corr(Top15[Energy Supply per Capita])
    return corr
print(answer_nine())

修改后:

def answer_nine():
    Top15 = answer_one()
    Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])
    Top15[Docs per Capita] = Top15[Documents].div(Top15[populations])
    Top = Top15.loc[:, [Docs per Capita, Energy Supply per Capita]]
    print(Top)
#修改后的方法    
    correlation=Top15[Docs per Capita].astype(float64).corr(Top15[Energy Supply per Capita].astype(float64))
    return correlation
print(answer_nine())

Question 10:

def answer_ten():
    Top15 = answer_one()
    Top15 = Top15.sort([% Renewable], ascending = False)
    print(Top15)
    median = Top15[% Renewable][7]
    Top15[HighRenew] = Top15[% Renewable]
    Top15[HighRenew] = Top15[HighRenew].apply(lambda x : 1 if x >= median else 0)
    Top15 = Top15.sort([Rank], ascending=True) 
    print(Top15)
    return Top15[HighRenew]
answer_ten()

 

Python pandas

原文:https://www.cnblogs.com/Shinered/p/9239476.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!