首页 > 编程语言 > 详细

Up and Down the Python Data and Web Visualization Stack

时间:2016-01-04 10:05:55      阅读:241      评论:0      收藏:0      [点我收藏+]

摘自:http://nbviewer.ipython.org/gist/wrobstory/1eb8cb704a52d18b9ee8/Up%20and%20Down%20PyData%202014.ipynb

USGS dataset listing every wind turbine in the United States:

from collections import OrderedDict

import bearcart
import bokeh
import bokeh.plotting as bp
from bokeh.plotting import output_notebook
import folium
import ggplot as gg
from ggplot import ggplot
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import vincent

%matplotlib inline
mpld3.enable_notebook()
bearcart.initialize_notebook()
vincent.initialize_notebook()
folium.initialize_notebook()

# axis_color = ‘black‘
axis_color = ‘#d0d0d0‘
 
df = pd.read_csv(‘USGS_WindTurbine_201307_cleaned.csv‘)
df.head()
 Unnamed: 0Unique IDSite NameOnline YearTurbine ManufacturerTurbine ModelTower TypeTurbine MWTotal HeightTower/Hub HeightBlade LengthRotor DiameterRotor Swept AreaLatitude-Decimal DegreesLongitude-Decimal DegreesStateCountyAttribute ConfidenceLocation ConfidenceWENDI Site Name 
0 0 1836 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3436 -160.4891 AK Aleutians East 2 2 unknown ...
1 1 1837 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3448 -160.4906 AK Aleutians East 2 2 unknown ...
2 2 1838 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1571 -170.2352 AK Aleutians West 2 2 St. Paul Island ...
3 3 1839 St. Paul Island 1999 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2359 AK Aleutians West 2 2 St. Paul Island ...
4 4 1840 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2373 AK Aleutians West 2 2 St. Paul Island ...

5 rows × 27 columns

ws = pd.read_table(‘CO_WS_2011_2012.txt‘)
ws = ws.set_index(‘Date & Time Stamp‘)
ws.index = ws.index.to_datetime()
ws.head()
 WS1_50mMeanWS1_50mStdevWS1_50mMaxWS1_50mMinWS2_50mMeanWS2_50mStDevWS2_50mMaxWS2_50mMinWS3_30mMeanWS3_30mStDevWS3_30mMaxWS3_30mMinWS4_40mMeanWS4_40mStDevWS4_40mMaxWS4_40mMinWD1_49mMeanWD1_49mStDevWD1_49mMaxWD1_49mMin 
2011-06-03 00:00:00 9.50 1.12 11.70 6.40 9.39 1.03 11.39 6.43 7.96 1.33 10.97 4.51 8.76 1.17 11.01 4.95 172 7 173 240 ...
2011-06-03 00:10:00 8.37 0.56 10.19 6.78 8.27 0.56 9.86 6.43 6.83 0.76 9.06 3.75 7.60 0.64 9.49 5.70 172 5 169 240 ...
2011-06-03 00:20:00 8.38 0.43 9.44 7.17 8.28 0.41 9.48 7.20 7.13 0.64 8.69 4.89 7.81 0.45 9.10 6.83 169 5 177 240 ...
2011-06-03 00:30:00 7.48 1.17 9.44 3.76 7.43 1.12 9.48 3.77 6.43 1.03 8.29 3.37 6.93 1.03 9.10 3.43 163 11 166 240 ...
2011-06-03 00:40:00 7.13 0.79 9.06 5.26 7.10 0.76 9.09 5.28 6.21 0.67 7.91 2.99 6.61 0.66 8.34 3.81 172 12 166 240 ...

5 rows × 24 columns

# Rotor Diameter vs. Turbine Manufacturer
mf_grouped = df.groupby(‘Turbine Manufacturer‘)
mean_grouped = mf_grouped.mean().dropna()
mean_rd = mean_grouped.sort(‘Rotor Diameter‘)[‘Rotor Diameter‘]
rotor_diam = vincent.Bar(mean_rd)
rotor_diam.axis_titles(x=‘Turbine Manufacturer‘, y=‘Rotor Diameter‘)
# The Hard Way
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in rotor_diam.axes:
    axis.properties = AxisProperties()
    for prop in [‘ticks‘, ‘axis‘, ‘major_ticks‘, ‘minor_ticks‘]:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=axis_color))
    axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color))
rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50)
rotor_diam.axes[0].properties.labels.align = ValueRef(value=‘left‘)
rotor_diam.axes[0].properties.title.dy = ValueRef(value=115)
rotor_diam.scales[2].range = [‘#b48ead‘]
rotor_diam
 
# Total Turbine Count
turbine_ct = mf_grouped.count().dropna().sort(‘Unique ID‘, ascending=False)[‘Unique ID‘]
num_turbines = (vincent.Bar(turbine_ct[:25])
                       .axis_titles(x=‘Turbine Manufacturer‘, 
                                    y=‘Number of Turbines in the US‘)
                       .colors(range_=[‘#6a9fb5‘]))
# Shortcuts!
def lighten_axes(vis, x_offset=50):
    (vis.common_axis_properties(color=axis_color, title_size=20)
        .x_axis_properties(label_angle=50, label_align=‘left‘, 
                           title_offset=x_offset)
        .y_axis_properties(title_offset=-40))
# If Area Chart
# num_turbines.scales[0].type = ‘ordinal‘
lighten_axes(num_turbines)
num_turbines
 
# Turbine Count vs. Date
grouped_date = df.groupby([‘Online Year‘, ‘Turbine Manufacturer‘])
by_year = grouped_date.count()[‘Unique ID‘].reset_index()
by_year[‘Online Year‘] = pd.to_datetime(by_year[‘Online Year‘], coerce=True)
by_year = by_year.rename(columns={‘Unique ID‘: ‘Turbine Count‘}).dropna()
by_year = by_year.pivot(index=‘Online Year‘, columns=‘Turbine Manufacturer‘, values=‘Turbine Count‘)
by_year = by_year[turbine_ct[:10].index.tolist()]

online_by_year = (vincent.StackedArea(by_year)
                         .axis_titles(x=‘Date‘, y=‘Turbine Count‘)
                         .legend(title=‘Turbine Manufacturer‘, text_color=axis_color)
                         .colors(range_=[‘#ac4142‘, ‘#d28445‘, ‘#f4bf75‘, ‘#90a959‘, 
                                         ‘#75b5aa‘, ‘#6a9fb5‘, ‘#aa759f‘, ‘#8f5536‘]))
lighten_axes(online_by_year, x_offset=30)
online_by_year
 
height_diam = (vincent.GroupedBar(mean_grouped[[‘Tower/Hub Height‘, ‘Rotor Diameter‘]]
                                  .sort([‘Rotor Diameter‘, ‘Tower/Hub Height‘], ascending=False))
                      .axis_titles(x=‘Turbine Manufacturer‘, y=‘Meters‘)
                      .legend(title=‘Parameters‘, text_color=axis_color)
                      .colors(range_=[‘#f4bf75‘, ‘#75b5aa‘]))
lighten_axes(height_diam, 100)
height_diam
 
november_2011 = ws[‘2011-11-15‘:‘2011-12-01‘]
ws_line = (vincent.Line(november_2011[‘WS1_50mMean‘])
                  .axis_titles(x=‘Date‘, y=‘Wind Speed (m/s)‘)
                  .colors(range_=[‘#d28445‘]))
lighten_axes(ws_line, x_offset=30)
ws_line
 
# Rotor Diameter vs. Power
min_heights = df[df[‘Rotor Diameter‘] > 10]
diameter_vs_mw = (vincent.Scatter(min_heights[[‘Turbine MW‘, ‘Rotor Diameter‘]], iter_idx=‘Turbine MW‘)
                         .axis_titles(x=‘Power (MW)‘, y=‘Rotor Diameter (m)‘)
                         .colors(range_=[‘#75b5aa‘]))
lighten_axes(diameter_vs_mw, x_offset=30)
diameter_vs_mw
 
(ggplot(gg.aes(x=‘Turbine MW‘, y=‘Rotor Swept Area‘), data=min_heights[:500])
    + gg.geom_point(color=‘#75b5aa‘, size=75)
    + gg.ggtitle("Rotor Swept Area vs. Power")
    + gg.xlab("Power (MW)")
    + gg.ylab("Rotor Swept Area (m^2)"))
-0.50.00.51.01.52.02.53.03.5-1,00001,0002,0003,0004,0005,0006,0007,0008,000Power (MW)Rotor Swept Area (m^2)Rotor Swept Area vs. Power
技术分享技术分享
<ggplot: (291491889)>
(ggplot(gg.aes(x=‘WS1_50mMean‘), data=ws)
 + gg.geom_histogram(fill=‘#ac4142‘) 
 + gg.ggtitle("Wind Speed Distribution") 
 + gg.labs("Wind Speed (m)", "Freq"))
binwidth defaulted to range/30. Use ‘binwidth = x‘ to adjust this.
05101520253001,0002,0003,0004,0005,0006,000Wind Speed (m)FreqWind Speed Distribution
技术分享技术分享
<ggplot: (286771433)>
molten = pd.melt(ws, value_vars=[‘WS1_50mMean‘, ‘WS2_50mMean‘, ‘WS3_30mMean‘, ‘WS4_40mMean‘])
(ggplot(gg.aes(x=‘value‘), data=molten)
 + gg.geom_histogram(fill=‘#aa759f‘)
 + gg.facet_wrap(‘variable‘)
 + gg.ggtitle("Wind Speed Distribution") 
 + gg.labs("Wind Speed (m)", "Freq"))
binwidth defaulted to range/30. Use ‘binwidth = x‘ to adjust this.
0816243202,0004,0006,000Wind Speed (m)Freq0816243202,0004,0006,000Wind Speed (m)Freq0816243202,0004,0006,000Wind Speed (m)Freq0816243202,0004,0006,000Wind Speed (m)Freq
技术分享技术分享
<ggplot: (289419253)>
november_2011[‘Date‘] = november_2011.index
@interact
def plot_gg_ws(column=november_2011.columns.tolist()):
    molten = pd.melt(november_2011, 
                     value_vars=[column], 
                     id_vars=[‘Date‘])
    gg_wind_data = (ggplot(gg.aes(x=‘Date‘, y=‘value‘), data=molten)
                   + gg.geom_line(color="steelblue")
                   + gg.stat_smooth(colour="red"))
    gg_wind_data.draw()
Tue 15Thu 17Sat 19Mon 21Wed 23Fri 25Nov 27Tue 29December051015202530Datevalue
技术分享技术分享
output_notebook()
import bokeh
subset = min_heights[:1000]
bp.figure(plot_width=800, background_fill= ‘#e5e5e5‘)
plot_tools = "pan, wheel_zoom, box_zoom, reset, hover"
bp.scatter(subset[‘Total Height‘], subset[‘Rotor Swept Area‘], 
           tools=plot_tools, size=10, color=‘purple‘, alpha=1.0)
bp.curplot().title = "Total Height (m) vs. Rotor Swept Area (m^2)"
bp.xaxis().axis_label = "Rotor Swept Area (m^2)"
bp.yaxis().axis_label = "Following"
hover = [t for t in bp.curplot().tools if isinstance(t, bokeh.objects.HoverTool)][0]
hover.tooltips = {"(Rotor Swept Area, Total Height)": "(@x, @y)"}
bp.show()

Configuring embedded BokehJS mode.

Plots
by_year.dropna()
built_by_year = bearcart.Chart(by_year, palette=‘spectrum2000‘,
                               height=500, width=750, plt_type=‘bar‘)
built_by_year
1K2K3K4K5K
1K2K3K4K5K
 
1990
2000
2010
  • ?
     
    Bonus
  • ?
     
    NEG Micon
  • ?
     
    Suzlon
  • ?
     
    Gamesa
  • ?
     
    Mitsubishi
  • ?
     
    Siemens
  • ?
     
    Kenetech
  • ?
     
    unknown
  • ?
     
    Vestas
  • ?
     
    GE
 
all_ws = bearcart.Chart(november_2011[[‘WS1_50mMean‘, ‘WS2_50mMean‘, ‘WS3_30mMean‘, ‘WS4_40mMean‘]], 
                        height=500, width=750, plt_type=‘area‘)
all_ws
20406080
20406080
Mon, 21 Nov 2011 17:20:00 GMT
WS4_40mMean: 3.09
 
Nov 17
Nov 24
Dec 1
  • ?
     
    WS4_40mMean
  • ?
     
    WS3_30mMean
  • ?
     
    WS2_50mMean
  • ?
     
    WS1_50mMean
 
all_wd = bearcart.Chart(november_2011[[‘WD1_49mMean‘, ‘WD2_38mMean‘]], 
                        height=500, width=750, plt_type=‘bar‘,
                        colors={‘WD1_49mMean‘: ‘#75b5aa‘, ‘WD2_38mMean‘: ‘#aa759f‘})
all_wd
100200300400500600700
100200300400500600700
 
Nov 17
Nov 24
Dec 1
  • ?
     
    WD2_38mMean
  • ?
     
    WD1_49mMean
 
import seaborn as sb
sb.set(rc={‘text.color‘: axis_color, ‘axes.labelcolor‘: axis_color})
rvm = df[[‘Rotor Diameter‘, ‘Turbine MW‘, ‘Turbine Manufacturer‘]].dropna()
row_mask = rvm[‘Turbine Manufacturer‘].isin([‘Vestas‘, ‘Siemens‘, ‘GE‘])
rvm = rvm[row_mask]
rvm[‘Rotor Diameter‘] = rvm[‘Rotor Diameter‘].apply(lambda x: int(5 * round(float(x)/5)))
grouped_rvm = rvm.groupby([‘Rotor Diameter‘, ‘Turbine Manufacturer‘])
rvm = grouped_rvm.mean().reset_index()
sb.lmplot("Rotor Diameter", "Turbine MW", col="Turbine Manufacturer", 
          hue="Turbine Manufacturer", data=rvm)
<seaborn.axisgrid.FacetGrid at 0x11151c410>
020406080100120-2-101234Rotor DiameterTurbine MWTurbine Manufacturer = GE020406080100120Rotor DiameterTurbine Manufacturer = Siemens020406080100120Rotor DiameterTurbine Manufacturer = Vestas
技术分享技术分享
sb.set_palette(‘husl‘)
nov_cols = november_2011.columns.tolist()
@interact
def sb_jointplot(x=nov_cols, y=nov_cols):
    sb.jointplot(x, y, data=november_2011, size=9)
-5051015202530-5051015202530WS1_50mMeanWS1_50mMeanpearsonr = 1; p = 0
技术分享技术分享
sb.jointplot(‘WD1_49mMean‘, ‘WD2_38mMean‘, data=november_2011, size=9, kind=‘kde‘)
<seaborn.axisgrid.JointGrid at 0x115fcffd0>
050100150200250300350050100150200250300350WD2_38mMeanWD1_49mMeanpearsonr = 0.73; p = 0
技术分享技术分享
import scipy.stats as stats
dists = [‘Rayleigh‘, ‘Weibull‘, ‘Normal‘]
@interact
def plot_sb_dist(column=ws.columns.tolist(), dist=dists):
    plt.figure(figsize=(10, 8))
    dist_map = {
        ‘Rayleigh‘: stats.rayleigh,
        ‘Weibull‘: stats.exponweib,
        ‘Normal‘: stats.norm,
    }
    sb.distplot(ws[column], fit=dist_map[dist])
-5051015202530350.000.020.040.060.080.100.12WS1_50mMean
技术分享技术分享
state_geo = r‘us-states.json‘
state_data = df.groupby(‘State‘).count().drop(‘State‘, axis=1).reset_index()
state_map = folium.Map(location=[48, -102], zoom_start=3)
state_map.geo_json(geo_path=state_geo, data=state_data,
                   columns=[‘State‘, ‘Unique ID‘],
                   key_on=‘feature.id‘,
                   fill_color=‘BuPu‘, fill_opacity=0.7, line_opacity=0.2,
                   legend_name=‘Turbine Count Per State‘)
state_map.create_map(‘turbines_per_state.html‘)
state_map
Solano = df[df[‘Site Name‘].isin([‘Solano Wind 1‘, ‘Solano Wind 3‘])]
solano_map = folium.Map(location=[38.1067, -121.7735], zoom_start=12, 
                        tiles=‘Stamen Terrain‘)
solano_map.lat_lng_popover()
for row in Solano.iterrows():
    wtg_id = str(row[1][‘Unique ID‘])
    lat = row[1][‘Latitude-Decimal Degrees‘]
    lng = row[1][‘Longitude-Decimal Degrees‘]
    solano_map.polygon_marker(location=[lat, lng], fill_color=‘#f4bf75‘, 
                              radius=12, popup=‘WTG ID: ‘ + wtg_id)
solano_map.create_map(‘solano_map.html‘)
solano_map.render_iframe = True
solano_map
# Let‘s do a little more mapping
# Source: http://wind.nrel.gov/Web_nrel/
wind_map = folium.Map(location=[45.48, -121.7], zoom_start=10,
                      tiles=‘Stamen Terrain‘)
locations = {
    ‘df_26512‘: (45.38, -121.52),
    ‘df_26795‘: (45.48, -121.72),
    ‘df_26798‘: (45.48, -121.62)
}
data = {
    ‘df_26512‘: pd.read_csv(‘26512.csv‘),
    ‘df_26795‘: pd.read_csv(‘26795.csv‘),
    ‘df_26798‘: pd.read_csv(‘26798.csv‘)
}
charts = {
    ‘df_26512‘: None, 
    ‘df_26795‘: None, 
    ‘df_26798‘: None
}
for k, df in data.items():
    df[‘Date‘] = pd.to_datetime(df[‘Date(YYYY-MM-DD hh:mm:ss)‘])
    data[k] = df.drop(‘Date(YYYY-MM-DD hh:mm:ss)‘, axis=1).set_index(‘Date‘)
    data[k] = data[k][‘2006-12-01‘: ‘2006-12-31‘]
for k, chart in charts.items():
    charts[k] = (vincent.Line(data[k][‘100m wind speed (m/s)‘],
                              width=400, height=200)
                        .colors(range_=[‘#6a9fb5‘]))
    charts[k].axes[0].ticks = 4
    charts[k].axis_titles(x=‘Date‘, y=‘100m wind speed (m/s)‘)
    path = ‘‘.join([k, ‘.json‘])
    charts[k].to_json(path)
    wind_map.polygon_marker(location=locations[k], fill_color=‘#6a9fb5‘,
                            radius=12, popup=(charts[k], path))
wind_map.create_map(‘wind_map.html‘)
wind_map.render_iframe = True
wind_map
from IPython.core.display import HTML
styles = open("custom_dark.css", "r").read()
HTML(styles)

 

 

Up and Down the Python Data and Web Visualization Stack

原文:http://www.cnblogs.com/sunshinekingsmile/p/5097822.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!