USGS dataset listing every wind turbine in the United States:
from collections import OrderedDict
import bearcart
import bokeh
import bokeh.plotting as bp
from bokeh.plotting import output_notebook
import folium
import ggplot as gg
from ggplot import ggplot
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import vincent
%matplotlib inline
mpld3.enable_notebook()
bearcart.initialize_notebook()
vincent.initialize_notebook()
folium.initialize_notebook()
# axis_color = ‘black‘
axis_color = ‘#d0d0d0‘
df = pd.read_csv(‘USGS_WindTurbine_201307_cleaned.csv‘)
df.head()
ws = pd.read_table(‘CO_WS_2011_2012.txt‘)
ws = ws.set_index(‘Date & Time Stamp‘)
ws.index = ws.index.to_datetime()
ws.head()
# Rotor Diameter vs. Turbine Manufacturer
mf_grouped = df.groupby(‘Turbine Manufacturer‘)
mean_grouped = mf_grouped.mean().dropna()
mean_rd = mean_grouped.sort(‘Rotor Diameter‘)[‘Rotor Diameter‘]
rotor_diam = vincent.Bar(mean_rd)
rotor_diam.axis_titles(x=‘Turbine Manufacturer‘, y=‘Rotor Diameter‘)
# The Hard Way
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in rotor_diam.axes:
axis.properties = AxisProperties()
for prop in [‘ticks‘, ‘axis‘, ‘major_ticks‘, ‘minor_ticks‘]:
setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color)))
axis.properties.title = PropertySet(font_size=ValueRef(value=20),
fill=ValueRef(value=axis_color))
axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color))
rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50)
rotor_diam.axes[0].properties.labels.align = ValueRef(value=‘left‘)
rotor_diam.axes[0].properties.title.dy = ValueRef(value=115)
rotor_diam.scales[2].range = [‘#b48ead‘]
rotor_diam
# Total Turbine Count
turbine_ct = mf_grouped.count().dropna().sort(‘Unique ID‘, ascending=False)[‘Unique ID‘]
num_turbines = (vincent.Bar(turbine_ct[:25])
.axis_titles(x=‘Turbine Manufacturer‘,
y=‘Number of Turbines in the US‘)
.colors(range_=[‘#6a9fb5‘]))
# Shortcuts!
def lighten_axes(vis, x_offset=50):
(vis.common_axis_properties(color=axis_color, title_size=20)
.x_axis_properties(label_angle=50, label_align=‘left‘,
title_offset=x_offset)
.y_axis_properties(title_offset=-40))
# If Area Chart
# num_turbines.scales[0].type = ‘ordinal‘
lighten_axes(num_turbines)
num_turbines
# Turbine Count vs. Date
grouped_date = df.groupby([‘Online Year‘, ‘Turbine Manufacturer‘])
by_year = grouped_date.count()[‘Unique ID‘].reset_index()
by_year[‘Online Year‘] = pd.to_datetime(by_year[‘Online Year‘], coerce=True)
by_year = by_year.rename(columns={‘Unique ID‘: ‘Turbine Count‘}).dropna()
by_year = by_year.pivot(index=‘Online Year‘, columns=‘Turbine Manufacturer‘, values=‘Turbine Count‘)
by_year = by_year[turbine_ct[:10].index.tolist()]
online_by_year = (vincent.StackedArea(by_year)
.axis_titles(x=‘Date‘, y=‘Turbine Count‘)
.legend(title=‘Turbine Manufacturer‘, text_color=axis_color)
.colors(range_=[‘#ac4142‘, ‘#d28445‘, ‘#f4bf75‘, ‘#90a959‘,
‘#75b5aa‘, ‘#6a9fb5‘, ‘#aa759f‘, ‘#8f5536‘]))
lighten_axes(online_by_year, x_offset=30)
online_by_year
height_diam = (vincent.GroupedBar(mean_grouped[[‘Tower/Hub Height‘, ‘Rotor Diameter‘]]
.sort([‘Rotor Diameter‘, ‘Tower/Hub Height‘], ascending=False))
.axis_titles(x=‘Turbine Manufacturer‘, y=‘Meters‘)
.legend(title=‘Parameters‘, text_color=axis_color)
.colors(range_=[‘#f4bf75‘, ‘#75b5aa‘]))
lighten_axes(height_diam, 100)
height_diam
november_2011 = ws[‘2011-11-15‘:‘2011-12-01‘]
ws_line = (vincent.Line(november_2011[‘WS1_50mMean‘])
.axis_titles(x=‘Date‘, y=‘Wind Speed (m/s)‘)
.colors(range_=[‘#d28445‘]))
lighten_axes(ws_line, x_offset=30)
ws_line
# Rotor Diameter vs. Power
min_heights = df[df[‘Rotor Diameter‘] > 10]
diameter_vs_mw = (vincent.Scatter(min_heights[[‘Turbine MW‘, ‘Rotor Diameter‘]], iter_idx=‘Turbine MW‘)
.axis_titles(x=‘Power (MW)‘, y=‘Rotor Diameter (m)‘)
.colors(range_=[‘#75b5aa‘]))
lighten_axes(diameter_vs_mw, x_offset=30)
diameter_vs_mw
(ggplot(gg.aes(x=‘Turbine MW‘, y=‘Rotor Swept Area‘), data=min_heights[:500])
+ gg.geom_point(color=‘#75b5aa‘, size=75)
+ gg.ggtitle("Rotor Swept Area vs. Power")
+ gg.xlab("Power (MW)")
+ gg.ylab("Rotor Swept Area (m^2)"))
(ggplot(gg.aes(x=‘WS1_50mMean‘), data=ws)
+ gg.geom_histogram(fill=‘#ac4142‘)
+ gg.ggtitle("Wind Speed Distribution")
+ gg.labs("Wind Speed (m)", "Freq"))
molten = pd.melt(ws, value_vars=[‘WS1_50mMean‘, ‘WS2_50mMean‘, ‘WS3_30mMean‘, ‘WS4_40mMean‘])
(ggplot(gg.aes(x=‘value‘), data=molten)
+ gg.geom_histogram(fill=‘#aa759f‘)
+ gg.facet_wrap(‘variable‘)
+ gg.ggtitle("Wind Speed Distribution")
+ gg.labs("Wind Speed (m)", "Freq"))
november_2011[‘Date‘] = november_2011.index
@interact
def plot_gg_ws(column=november_2011.columns.tolist()):
molten = pd.melt(november_2011,
value_vars=[column],
id_vars=[‘Date‘])
gg_wind_data = (ggplot(gg.aes(x=‘Date‘, y=‘value‘), data=molten)
+ gg.geom_line(color="steelblue")
+ gg.stat_smooth(colour="red"))
gg_wind_data.draw()
output_notebook()
import bokeh
subset = min_heights[:1000]
bp.figure(plot_width=800, background_fill= ‘#e5e5e5‘)
plot_tools = "pan, wheel_zoom, box_zoom, reset, hover"
bp.scatter(subset[‘Total Height‘], subset[‘Rotor Swept Area‘],
tools=plot_tools, size=10, color=‘purple‘, alpha=1.0)
bp.curplot().title = "Total Height (m) vs. Rotor Swept Area (m^2)"
bp.xaxis().axis_label = "Rotor Swept Area (m^2)"
bp.yaxis().axis_label = "Following"
hover = [t for t in bp.curplot().tools if isinstance(t, bokeh.objects.HoverTool)][0]
hover.tooltips = {"(Rotor Swept Area, Total Height)": "(@x, @y)"}
bp.show()
by_year.dropna()
built_by_year = bearcart.Chart(by_year, palette=‘spectrum2000‘,
height=500, width=750, plt_type=‘bar‘)
built_by_year
all_ws = bearcart.Chart(november_2011[[‘WS1_50mMean‘, ‘WS2_50mMean‘, ‘WS3_30mMean‘, ‘WS4_40mMean‘]],
height=500, width=750, plt_type=‘area‘)
all_ws
all_wd = bearcart.Chart(november_2011[[‘WD1_49mMean‘, ‘WD2_38mMean‘]],
height=500, width=750, plt_type=‘bar‘,
colors={‘WD1_49mMean‘: ‘#75b5aa‘, ‘WD2_38mMean‘: ‘#aa759f‘})
all_wd
import seaborn as sb
sb.set(rc={‘text.color‘: axis_color, ‘axes.labelcolor‘: axis_color})
rvm = df[[‘Rotor Diameter‘, ‘Turbine MW‘, ‘Turbine Manufacturer‘]].dropna()
row_mask = rvm[‘Turbine Manufacturer‘].isin([‘Vestas‘, ‘Siemens‘, ‘GE‘])
rvm = rvm[row_mask]
rvm[‘Rotor Diameter‘] = rvm[‘Rotor Diameter‘].apply(lambda x: int(5 * round(float(x)/5)))
grouped_rvm = rvm.groupby([‘Rotor Diameter‘, ‘Turbine Manufacturer‘])
rvm = grouped_rvm.mean().reset_index()
sb.lmplot("Rotor Diameter", "Turbine MW", col="Turbine Manufacturer",
hue="Turbine Manufacturer", data=rvm)
sb.set_palette(‘husl‘)
nov_cols = november_2011.columns.tolist()
@interact
def sb_jointplot(x=nov_cols, y=nov_cols):
sb.jointplot(x, y, data=november_2011, size=9)
sb.jointplot(‘WD1_49mMean‘, ‘WD2_38mMean‘, data=november_2011, size=9, kind=‘kde‘)
import scipy.stats as stats
dists = [‘Rayleigh‘, ‘Weibull‘, ‘Normal‘]
@interact
def plot_sb_dist(column=ws.columns.tolist(), dist=dists):
plt.figure(figsize=(10, 8))
dist_map = {
‘Rayleigh‘: stats.rayleigh,
‘Weibull‘: stats.exponweib,
‘Normal‘: stats.norm,
}
sb.distplot(ws[column], fit=dist_map[dist])
state_geo = r‘us-states.json‘
state_data = df.groupby(‘State‘).count().drop(‘State‘, axis=1).reset_index()
state_map = folium.Map(location=[48, -102], zoom_start=3)
state_map.geo_json(geo_path=state_geo, data=state_data,
columns=[‘State‘, ‘Unique ID‘],
key_on=‘feature.id‘,
fill_color=‘BuPu‘, fill_opacity=0.7, line_opacity=0.2,
legend_name=‘Turbine Count Per State‘)
state_map.create_map(‘turbines_per_state.html‘)
state_map
Solano = df[df[‘Site Name‘].isin([‘Solano Wind 1‘, ‘Solano Wind 3‘])]
solano_map = folium.Map(location=[38.1067, -121.7735], zoom_start=12,
tiles=‘Stamen Terrain‘)
solano_map.lat_lng_popover()
for row in Solano.iterrows():
wtg_id = str(row[1][‘Unique ID‘])
lat = row[1][‘Latitude-Decimal Degrees‘]
lng = row[1][‘Longitude-Decimal Degrees‘]
solano_map.polygon_marker(location=[lat, lng], fill_color=‘#f4bf75‘,
radius=12, popup=‘WTG ID: ‘ + wtg_id)
solano_map.create_map(‘solano_map.html‘)
solano_map.render_iframe = True
solano_map
# Let‘s do a little more mapping
# Source: http://wind.nrel.gov/Web_nrel/
wind_map = folium.Map(location=[45.48, -121.7], zoom_start=10,
tiles=‘Stamen Terrain‘)
locations = {
‘df_26512‘: (45.38, -121.52),
‘df_26795‘: (45.48, -121.72),
‘df_26798‘: (45.48, -121.62)
}
data = {
‘df_26512‘: pd.read_csv(‘26512.csv‘),
‘df_26795‘: pd.read_csv(‘26795.csv‘),
‘df_26798‘: pd.read_csv(‘26798.csv‘)
}
charts = {
‘df_26512‘: None,
‘df_26795‘: None,
‘df_26798‘: None
}
for k, df in data.items():
df[‘Date‘] = pd.to_datetime(df[‘Date(YYYY-MM-DD hh:mm:ss)‘])
data[k] = df.drop(‘Date(YYYY-MM-DD hh:mm:ss)‘, axis=1).set_index(‘Date‘)
data[k] = data[k][‘2006-12-01‘: ‘2006-12-31‘]
for k, chart in charts.items():
charts[k] = (vincent.Line(data[k][‘100m wind speed (m/s)‘],
width=400, height=200)
.colors(range_=[‘#6a9fb5‘]))
charts[k].axes[0].ticks = 4
charts[k].axis_titles(x=‘Date‘, y=‘100m wind speed (m/s)‘)
path = ‘‘.join([k, ‘.json‘])
charts[k].to_json(path)
wind_map.polygon_marker(location=