diff --git a/Makefile b/Makefile deleted file mode 100644 index 7cf1029..0000000 --- a/Makefile +++ /dev/null @@ -1,124 +0,0 @@ -PY?=python -PELICAN?=pelican -PELICANOPTS= - -BASEDIR=$(CURDIR) -INPUTDIR=$(BASEDIR)/content -OUTPUTDIR=$(BASEDIR)/output -CONFFILE=$(BASEDIR)/pelicanconf.py -PUBLISHCONF=$(BASEDIR)/publishconf.py - -FTP_HOST=localhost -FTP_USER=anonymous -FTP_TARGET_DIR=/ - -SSH_HOST=localhost -SSH_PORT=22 -SSH_USER=root -SSH_TARGET_DIR=/var/www - -S3_BUCKET=my_s3_bucket - -CLOUDFILES_USERNAME=my_rackspace_username -CLOUDFILES_API_KEY=my_rackspace_api_key -CLOUDFILES_CONTAINER=my_cloudfiles_container - -DROPBOX_DIR=~/Dropbox/Public/ - -GITHUB_PAGES_BRANCH=master - -DEBUG ?= 0 -ifeq ($(DEBUG), 1) - PELICANOPTS += -D -endif - -RELATIVE ?= 0 -ifeq ($(RELATIVE), 1) - PELICANOPTS += --relative-urls -endif - -help: - @echo 'Makefile for a pelican Web site ' - @echo ' ' - @echo 'Usage: ' - @echo ' make html (re)generate the web site ' - @echo ' make clean remove the generated files ' - @echo ' make regenerate regenerate files upon modification ' - @echo ' make publish generate using production settings ' - @echo ' make serve [PORT=8000] serve site at http://localhost:8000' - @echo ' make serve-global [SERVER=0.0.0.0] serve (as root) to $(SERVER):80 ' - @echo ' make devserver [PORT=8000] start/restart develop_server.sh ' - @echo ' make stopserver stop local server ' - @echo ' make ssh_upload upload the web site via SSH ' - @echo ' make rsync_upload upload the web site via rsync+ssh ' - @echo ' make dropbox_upload upload the web site via Dropbox ' - @echo ' make ftp_upload upload the web site via FTP ' - @echo ' make s3_upload upload the web site via S3 ' - @echo ' make cf_upload upload the web site via Cloud Files' - @echo ' make github upload the web site via gh-pages ' - @echo ' ' - @echo 'Set the DEBUG variable to 1 to enable debugging, e.g. make DEBUG=1 html ' - @echo 'Set the RELATIVE variable to 1 to enable relative urls ' - @echo ' ' - -html: - $(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS) - -clean: - [ ! -d $(OUTPUTDIR) ] || rm -rf $(OUTPUTDIR) - -regenerate: - $(PELICAN) -r $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS) - -serve: -ifdef PORT - cd $(OUTPUTDIR) && $(PY) -m pelican.server $(PORT) -else - cd $(OUTPUTDIR) && $(PY) -m pelican.server -endif - -serve-global: -ifdef SERVER - cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 $(SERVER) -else - cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 0.0.0.0 -endif - - -devserver: -ifdef PORT - $(BASEDIR)/develop_server.sh restart $(PORT) -else - $(BASEDIR)/develop_server.sh restart -endif - -stopserver: - $(BASEDIR)/develop_server.sh stop - @echo 'Stopped Pelican and SimpleHTTPServer processes running in background.' - -publish: - $(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(PUBLISHCONF) $(PELICANOPTS) - -ssh_upload: publish - scp -P $(SSH_PORT) -r $(OUTPUTDIR)/* $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR) - -rsync_upload: publish - rsync -e "ssh -p $(SSH_PORT)" -P -rvzc --delete $(OUTPUTDIR)/ $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR) --cvs-exclude - -dropbox_upload: publish - cp -r $(OUTPUTDIR)/* $(DROPBOX_DIR) - -ftp_upload: publish - lftp ftp://$(FTP_USER)@$(FTP_HOST) -e "mirror -R $(OUTPUTDIR) $(FTP_TARGET_DIR) ; quit" - -s3_upload: publish - s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed --guess-mime-type - -cf_upload: publish - cd $(OUTPUTDIR) && swift -v -A https://auth.api.rackspacecloud.com/v1.0 -U $(CLOUDFILES_USERNAME) -K $(CLOUDFILES_API_KEY) upload -c $(CLOUDFILES_CONTAINER) . - -github: publish - ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) $(OUTPUTDIR) - git push origin $(GITHUB_PAGES_BRANCH) - -.PHONY: html help clean regenerate serve serve-global devserver publish ssh_upload rsync_upload dropbox_upload ftp_upload s3_upload cf_upload github diff --git a/_nb_header.html b/_nb_header.html deleted file mode 100644 index defd29d..0000000 --- a/_nb_header.html +++ /dev/null @@ -1,199 +0,0 @@ - - - - - - - - - - - - - - diff --git a/archives.html b/archives.html new file mode 100644 index 0000000..71b8c93 --- /dev/null +++ b/archives.html @@ -0,0 +1,122 @@ + + +
+ + + + + + + +Bradlee Speice, Fri 27 November 2015, Blog
+ + + +My only non-core class this semester has been in Structure Products. We've been surveying a wide variety of products, and the final project was to pick one to report on. +Because these are all very similar, we decided to demonstrate all 3 products at once.
+What follows below is a notebook demonstrating the usage of Julia for Monte-Carlo simulation of some exotic products.
++
using Gadfly
+In order to price the autocallable bonds, we need to simulate the underlying assets. Let's go ahead and set up the simulation first, as this lays the foundation for what we're trying to do. We're going to use JNJ as the basis for our simulation. This implies the following parameters:
+We additionally define some parameters for simulation:
+T: The number of years to simulatem: The number of paths to simulaten: The number of steps to simulate in a yearS0 = 102.2
+nominal = 100
+q = 2.84 / 100
+σ = 15.37 / 100
+term = [0, .49, .9, 1.21, 1.45, 1.69] / 100 + 1
+
+###
+# Potential: Based on PEP
+# S0 = 100.6
+# σ = 14.86
+# q = 2.7
+###
+
+# Simulation parameters
+T = 5 # Using years as the unit of time
+n = 250 # simulations per year
+m = 100000 # paths
+num_simulations = 5; # simulation rounds per price
+To make things simpler, we simulate a single year at a time. This allows us to easily add in a dividend policy without too much difficulty, and update the simulation every year to match the term structure. The underlying uses GBM for simulation between years.
+ +simulate_gbm = function(S0, μ, σ, T, n)
+ # Set the initial state
+ m = length(S0)
+ t = T / n
+ motion = zeros(m, n)
+ motion[:,1] = S0
+
+ # Build out all states
+ for i=1:(n-1)
+ motion[:,i+1] = motion[:,i] .* exp((μ - σ^2/2)*t) .* exp(sqrt(t) * σ .* randn(m))
+ end
+
+ return motion
+end
+
+function display_motion(motion, T)
+ # Given a matrix of paths, display the motion
+ n = length(motion[1,:])
+ m = length(motion[:,1])
+ x = repmat(1:n, m)
+
+ # Calculate the ticks we're going to use. We'd like to
+ # have an xtick every month, so calculate where those
+ # ticks will actually be at.
+ if (T > 3)
+ num_ticks = T
+ xlabel = "Years"
+ else
+ num_ticks = T * 12
+ xlabel = "Months"
+ end
+ tick_width = n / num_ticks
+ x_ticks = []
+ for i=1:round(num_ticks)
+ x_ticks = vcat(x_ticks, i*tick_width)
+ end
+
+ # Use one color for each path. I'm not sure if there's
+ # a better way to do this without going through DataFrames
+ colors = []
+ for i = 1:m
+ colors = vcat(colors, ones(n)*i)
+ end
+
+ plot(x=x, y=motion', color=colors, Geom.line,
+ Guide.xticks(ticks=x_ticks, label=false),
+ Guide.xlabel(xlabel),
+ Guide.ylabel("Value"))
+end;
+Let's go ahead and run a sample simulation to see what the functions got us!
+ +initial = ones(5) * S0
+# Using μ=0, T=.25 for now, we'll use the proper values later
+motion = simulate_gbm(initial, 0, σ, .25, 200)
+
+display_motion(motion, .25)
+Now that we've got the basic motion set up, let's start making things a bit more sophisticated for the model. We're going to assume that the drift of the stock is the difference between the implied forward rate and the quarterly dividend rate.
+We're given the yearly term structure, and need to calculate the quarterly forward rate to match this structure. The term structure is assumed to follow:
+$d(0, t) = d(0,t-1)\cdot f_{i-1, i}$
+Where $f_{i-1, i}$ is the quarterly forward rate.
+ +forward_term = function(yearly_term)
+ # It is assumed that we have a yearly term structure passed in, and starts at year 0
+ # This implies a nominal rate above 0 for the first year!
+ years = length(term)-1 # because we start at 0
+ structure = [(term[i+1] / term[i]) for i=1:years]
+end;
+Now that we've got our term structure, let's validate that we're getting the correct results! If we've done this correctly, then:
+ +term[2] == term[1] * structure[1]
+
+# Example term structure taken from:
+# http://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield
+# Linear interpolation used years in-between periods, assuming real-dollar
+# interest rates
+forward_yield = forward_term(term)
+calculated_term2 = term[1] * forward_yield[1]
+
+println("Actual term[2]: $(term[2]); Calculated term[2]: $(calculated_term2)")
+Now that we have the term structure set up, we can actually start doing some real simulation! Let's construct some paths through the full 5-year time frame. In order to do this, we will simulate 1 year at a time, and use the forward rates at those times to compute the drift. Thus, there will be 5 total simulations batched together.
+ +full_motion = ones(5) * S0
+full_term = vcat(term[1], forward_yield)
+for i=1:T
+ μ = (full_term[i] - 1 - q)
+ year_motion = simulate_gbm(full_motion[:,end], μ, σ, 1, n)
+ full_motion = hcat(full_motion, year_motion)
+end
+
+display_motion(full_motion, T)
+We're now going to actually build out the full motion that we'll use for computing the pricing of our autocallable products. It will be largely the same, but we will use far more sample paths for the simulation.
+ +full_simulation = function(S0, T, n, m, term)
+ forward = vcat(term[1], forward_term(term))
+
+ # And an S0 to kick things off.
+ final_motion = ones(m) * S0
+ for i=1:T
+ μ = (forward[i] - 1 - q)
+ year_motion = simulate_gbm(final_motion[:,end], μ, σ, 1, n)
+ final_motion = hcat(final_motion, year_motion)
+ end
+ return final_motion
+end
+
+tic()
+full_simulation(S0, T, n, m, term)
+time = toq()
+@printf("Time to run simulation: %.2fs", time)
+Now that we've defined our underlying simulation, let's actually try and price an Athena note. Athena has the following characteristics:
+call_barrier = S0
+strike = S0
+protection_barrier = S0 * .6
+coupon = nominal * .07
+
+price_athena = function(initial_price, year_prices, call_barrier,
+ protection_barrier, coupon, forward_structure)
+
+ total_coupons = 0
+
+ t = length(year_prices)
+
+ for i=1:t
+ price = year_prices[i]
+ if price ≥ call_barrier
+ return (nominal + coupon*i) * exp((prod(forward_structure[i:end])-1)*(t-i))
+ end
+ end
+
+ # We've reached maturity, time to check capital protection
+ if year_prices[end] > protection_barrier
+ return nominal
+ else
+ put = (strike - year_prices[end]) / strike
+ return nominal*(1-put)
+ end
+end
+
+forward_structure = forward_term(term)
+price_function = (year_prices) -> price_athena(S0, year_prices,
+ call_barrier, protection_barrier, coupon, forward_structure)
+
+athena = function()
+ year_indexes = [n*i for i=1:T]
+ motion = full_simulation(S0, T, n, m, term)
+ payoffs = [price_function(motion[i, year_indexes]) for i=1:m]
+ return mean(payoffs)
+end
+
+mean_payoffs = zeros(num_simulations)
+for i=1:num_simulations
+ tic()
+ mean_payoffs[i] = athena()
+ time = toq()
+ @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time)
+end
+
+final_mean = mean(mean_payoffs)
+println("Mean over $num_simulations simulations: $(mean(mean_payoffs))")
+pv = final_mean * (exp(-(prod(forward_structure)-1)*T))
+@printf("Present value of Athena note: \$%.2f, notional: \$%.2f", pv, nominal)
+Let's move into pricing a Phoenix without memory. It's very similar to the Athena production, with the exception that we introduce a coupon barrier so coupons are paid even when the underlying is below the initial price.
+The Phoenix product has the following characteristics (example here):
+Some example paths (all assume that a call barrier of the current price, and coupon barrier some level below that):
+We're going to re-use the same simulation, with the following parameters:
+call_barrier = S0
+coupon_barrier = S0 * .8
+protection_barrier = S0 * .6
+coupon = nominal * .06
+
+price_phoenix_no_memory = function(initial_price, year_prices, call_barrier, coupon_barrier,
+ protection_barrier, coupon, forward_structure)
+
+ total_coupons = 0
+ t = length(year_prices)
+
+ for i=1:t
+ price = year_prices[i]
+ if price ≥ call_barrier
+ return (nominal + coupon + total_coupons)*exp((prod(forward_structure[i:end])-1)*(t-i))
+ elseif price ≥ coupon_barrier
+ total_coupons = total_coupons * exp(forward_structure[i]-1) + coupon
+ else
+ total_coupons *= exp(forward_structure[i]-1)
+ end
+ end
+
+ # We've reached maturity, time to check capital protection
+ if year_prices[end] > protection_barrier
+ return nominal + total_coupons
+ else
+ put = (strike - year_prices[end]) / strike
+ return nominal*(1-put)
+ end
+end
+
+forward_structure = forward_term(term)
+price_function = (year_prices) -> price_phoenix_no_memory(S0, year_prices,
+ call_barrier, coupon_barrier, protection_barrier, coupon, forward_structure)
+
+phoenix_no_memory = function()
+ year_indexes = [n*i for i=1:T]
+ motion = full_simulation(S0, T, n, m, term)
+ payoffs = [price_function(motion[i, year_indexes]) for i=1:m]
+ return mean(payoffs)
+end
+
+mean_payoffs = zeros(num_simulations)
+for i=1:num_simulations
+ tic()
+ mean_payoffs[i] = phoenix_no_memory()
+ time = toq()
+ @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time)
+end
+
+final_mean = mean(mean_payoffs)
+println("Mean over $num_simulations simulations: $(mean(mean_payoffs))")
+pv = final_mean * exp(-(prod(forward_structure)-1)*(T))
+@printf("Present value of Phoenix without memory note: \$%.2f", pv)
+The Phoenix with Memory structure is very similar to the Phoenix, but as the name implies, has a special "memory" property: It remembers any coupons that haven't been paid at prior observation times, and pays them all if the underlying crosses the coupon barrier. For example:
+You can also find an example here.
+Let's go ahead and set up the simulation! The parameters will be the same, but we can expect that the value will go up because of the memory attribute
+ +call_barrier = S0
+coupon_barrier = S0 * .8
+protection_barrier = S0 * .6
+coupon = nominal * .07
+
+price_phoenix_with_memory = function(initial_price, year_prices, call_barrier,
+ coupon_barrier, protection_barrier, coupon, forward_structure)
+
+ last_coupon = 0
+ total_coupons = 0
+
+ t = length(year_prices)
+
+ for i=1:t
+ price = year_prices[i]
+ if price > call_barrier
+ return (nominal + coupon + total_coupons)*exp((prod(forward_structure[i:end])-1)*(t-i))
+ elseif price > coupon_barrier
+ ####################################################################
+ # The only difference between with/without memory is the below lines
+ memory_coupons = (i - last_coupon) * coupon
+ last_coupon = i
+ total_coupons = total_coupons * exp(forward_structure[i]-1) + memory_coupons
+ ####################################################################
+ else
+ total_coupons *= exp(forward_structure[i]-1)
+ end
+ end
+
+ # We've reached maturity, time to check capital protection
+ if year_prices[end] > protection_barrier
+ return nominal + total_coupons
+ else
+ put = (strike - year_prices[end]) / strike
+ return nominal*(1-put)
+ end
+end
+
+forward_structure = forward_term(term)
+price_function = (year_prices) -> price_phoenix_with_memory(S0, year_prices,
+ call_barrier, coupon_barrier, protection_barrier, coupon, forward_structure)
+
+phoenix_with_memory = function()
+ year_indexes = [n*i for i=1:T]
+ motion = full_simulation(S0, T, n, m, term)
+ payoffs = [price_function(motion[i, year_indexes]) for i=1:m]
+ return mean(payoffs)
+end
+
+mean_payoffs = zeros(num_simulations)
+for i=1:num_simulations
+ tic()
+ mean_payoffs[i] = phoenix_with_memory()
+ time = toq()
+ @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n",
+ i, mean_payoffs[i], time)
+end
+
+final_mean = mean(mean_payoffs)
+println("Mean over $num_simulations simulations: $(mean(mean_payoffs))")
+pv = final_mean * exp(-(prod(forward_structure)-1)*(T))
+@printf("Present value of Phoenix with memory note: \$%.2f", pv)
+"Blog"
+Bradlee Speice, Sat 23 January 2016, Blog
+ + + ++
import pickle
+import pandas as pd
+import numpy as np
+from bokeh.plotting import output_notebook, figure, show
+from bokeh.palettes import RdBu4 as Palette
+from datetime import datetime
+import warnings
+
+output_notebook()
+After taking some time to explore how the weather in North Carolina stacked up over the past years, I was interested in doing the same analysis for other cities. Growing up with family from Binghamton, NY I was always told it was very cloudy there. And Seattle has a nasty reputation for being very depressing and cloudy. All said, the cities I want to examine are:
+I'd be interested to try this analysis worldwide at some point - comparing London and Seattle might be an interesting analysis. For now though, we'll stick with trying out the US data.
+There will be plenty of charts. I want to know: How has average cloud cover and precipitation chance changed over the years for each city mentioned? This will hopefully tell us whether Seattle has actually earned its reputation for being a depressing city.
+ +city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
+forecasts_df = pd.DataFrame.from_dict(city_forecasts)
+cities = ['binghamton', 'cary', 'nyc', 'seattle']
+city_colors = {cities[i]: Palette[i] for i in range(0, 4)}
+
+def safe_cover(frame):
+ if frame and 'cloudCover' in frame:
+ return frame['cloudCover']
+ else:
+ return np.NaN
+
+def monthly_avg_cloudcover(city, year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ cloud_cover_vals = list(map(lambda x: safe_cover(forecasts_df[city][x]['currently']), dates))
+ cloud_cover_samples = len(list(filter(lambda x: x is not np.NaN, cloud_cover_vals)))
+ # Ignore an issue with nanmean having all NaN values. We'll discuss the data issues below.
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ return np.nanmean(cloud_cover_vals), cloud_cover_samples
+years = range(1990, 2016)
+def city_avg_cc(city, month):
+ return [monthly_avg_cloudcover(city, y, month) for y in years]
+
+months = [
+ ('July', 7),
+ ('August', 8),
+ ('September', 9),
+ ('October', 10),
+ ('November', 11)
+]
+
+for month, month_id in months:
+ month_averages = {city: city_avg_cc(city, month_id) for city in cities}
+ f = figure(title="{} Average Cloud Cover".format(month),
+ x_axis_label='Year',
+ y_axis_label='Cloud Cover Percentage')
+ for city in cities:
+ f.line(years, [x[0] for x in month_averages[city]],
+ legend=city, color=city_colors[city])
+ show(f)
+Well, as it so happens it looks like there are some data issues. July's data is a bit sporadic, and 2013 seems to be missing from most months as well. I think really only two things can really be confirmed here:
+Let's now move from cloud cover data to looking at average rainfall chance.
+ +def safe_precip(frame):
+ if frame and 'precipProbability' in frame:
+ return frame['precipProbability']
+ else:
+ return np.NaN
+
+def monthly_avg_precip(city, year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ precip_vals = list(map(lambda x: safe_precip(forecasts_df[city][x]['currently']), dates))
+ precip_samples = len(list(filter(lambda x: x is not np.NaN, precip_vals)))
+ # Ignore an issue with nanmean having all NaN values. We'll discuss the data issues below.
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ return np.nanmean(precip_vals), precip_samples
+
+def city_avg_precip(city, month):
+ return [monthly_avg_precip(city, y, month) for y in years]
+
+for month, month_id in months:
+ month_averages = {city: city_avg_cc(city, month_id) for city in cities}
+ f = figure(title="{} Average Precipitation Chance".format(month),
+ x_axis_label='Year',
+ y_axis_label='Precipitation Chance Percentage')
+ for city in cities:
+ f.line(years, [x[0] for x in month_averages[city]],
+ legend=city, color=city_colors[city])
+ show(f)
+The same data issue caveats apply here: 2013 seems to be missing some data, and July has some issues as well. However, this seems to confirm the trends we saw with cloud cover:
+I have to admit I was a bit surprised after doing this analysis. Seattle showed a higher average cloud cover and average precipitation chance than did the other cities surveyed. Maybe Seattle is actually an objectively more depressing city to live in.
+Well that's all for weather data at the moment. It's been a great experiment, but I think this is about as far as I'll be able to get with weather data without some domain knowledge. Talk again soon!
+ +Bradlee Speice, Fri 01 January 2016, Blog
+ + + ++
from bokeh.plotting import figure, output_notebook, show
+from bokeh.palettes import PuBuGn9 as Palette
+import pandas as pd
+import numpy as np
+from datetime import datetime
+import pickle
+
+output_notebook()
+I'm originally from North Carolina, and I've been hearing a lot of people talking about how often it's been raining recently. They're excited for any day that has sun.
+So I got a bit curious: Has North Carolina over the past few months actually had more cloudy and rainy days recently than in previous years? This shouldn't be a particularly challenging task, but I'm interested to know if people's perceptions actually reflect reality.
+The data we'll use comes from forecast.io, since they can give us a cloud cover percentage. I've gone ahead and retrieved the data to a pickle file, and included the code that was used to generate it. First up: What was the average cloud cover in North Carolina during August - November, and how many days were cloudy? We're going to assume that a "cloudy" day is defined as any day in which the cloud cover is above 50%.
+ +city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
+forecast_df = pd.DataFrame.from_dict(city_forecasts)
+cary_forecast = forecast_df['cary']
+years = range(1990, 2016)
+months = range(7, 12)
+months_str = ['July', 'August', 'September', 'October', 'November']
+
+def safe_cover(frame):
+ if frame and 'cloudCover' in frame:
+ return frame['cloudCover']
+ else:
+ return np.NaN
+
+def monthly_avg_cloudcover(year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ cloud_cover_vals = list(map(lambda x: safe_cover(cary_forecast[x]['currently']), dates))
+ cloud_cover_samples = len(list(filter(lambda x: x is not np.NaN, cloud_cover_vals)))
+ return np.nanmean(cloud_cover_vals), cloud_cover_samples
+
+
+monthly_cover_vals = [[monthly_avg_cloudcover(y, m)[0] for y in years] for m in months]
+
+f = figure(title='Monthly Average Cloud Cover',
+ x_range=(1990, 2015),
+ x_axis_label='Year')
+for x in range(0, len(months)):
+ f.line(years, monthly_cover_vals[x], legend=months_str[x], color=Palette[x])
+show(f)
+As we can see from the chart above, on the whole the monthly average cloud cover has been generally trending down over time. The average cloud cover is also lower than it was last year - it seems people are mostly just complaining. There are some data issues that start in 2012 that we need to be aware of - the cloud cover percentage doesn't exist for all days. Even so, the data that we have seems to reflect the wider trend, so we'll assume for now that the missing data doesn't skew our results.
+There's one more metric we want to check though - how many cloudy days were there? This is probably a better gauge of sentiment than the average monthly cover.
+ +def monthly_cloudy_days(year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ cloud_cover_vals = list(map(lambda x: safe_cover(cary_forecast[x]['currently']), dates))
+ cloud_cover_samples = len(list(filter(lambda x: x is not np.NaN, cloud_cover_vals)))
+ cloudy_days = [cover > .5 for cover in cloud_cover_vals]
+ return np.count_nonzero(cloudy_days), cloud_cover_samples
+
+monthly_days_vals = [[monthly_cloudy_days(y, m)[0] for y in years] for m in months]
+monthly_cover_samples = [[monthly_cloudy_days(y, m)[1] for y in years] for m in months]
+
+f = figure(title='Monthly Cloudy Days',
+ x_range=(1990, 2015),
+ x_axis_label='Year')
+for x in range(0, len(months)):
+ f.line(years, monthly_days_vals[x], legend=months_str[x], color=Palette[x])
+show(f)
+
+f = figure(title='Monthly Cloud Cover Samples',
+ x_range=(1990, 2015),
+ x_axis_label='Year',
+ height=300)
+for x in range(0, len(months)):
+ f.line(years, monthly_cover_samples[x], legend=months_str[x], color=Palette[x])
+show(f)
+On the whole, the number of cloudy days seems to reflect the trend with average cloud cover - it's actually becoming more sunny as time progresses. That said, we need to be careful in how we view this number - because there weren't as many samples in 2015 as previous years, the number of days can get thrown off. In context though, even if most days not recorded were in fact cloudy, the overall count for 2015 would still be lower than previous years.
+In addition to checking cloud cover, I wanted to check precipitation data as well - what is the average precipitation chance over a month, and how many days during a month is rain likely? The thinking is that days with a high-precipitation chance will also be days in which it is cloudy or depressing.
+ +def safe_precip(frame):
+ if frame and 'precipProbability' in frame:
+ return frame['precipProbability']
+ else:
+ return np.NaN
+
+def monthly_avg_precip(year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ precip_vals = list(map(lambda x: safe_precip(cary_forecast[x]['currently']), dates))
+ precip_samples = len(list(filter(lambda x: x is not np.NaN, precip_vals)))
+ return np.nanmean(precip_vals), precip_samples
+
+monthly_avg_precip_vals = [[monthly_avg_precip(y, m)[0] for y in years] for m in months]
+
+f = figure(title='Monthly Average Precipitation Chance',
+ x_range=(1990, 2015),
+ x_axis_label='Year')
+for x in range(0, len(months)):
+ f.line(years, monthly_avg_precip_vals[x], legend=months_str[x], color=Palette[x])
+show(f)
+As we can see from the chart, the average chance of precipitation over a month more or less stays within a band of 0 - .1 for all months over all years. This is further evidence that the past few months are no more cloudy or rainy than previous years. Like the cloud cover though, we still want to get a count of all the rainy days, in addition to the average chance. We'll define a "rainy day" as any day in which the chance of rain is greater than 25%.
+ +def monthly_rainy_days(year, month):
+ dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
+ end=datetime(year, month + 1, 1, 12),
+ freq='D', closed='left')
+ precip_prob_vals = list(map(lambda x: safe_precip(cary_forecast[x]['currently']), dates))
+ precip_prob_samples = len(list(filter(lambda x: x is not np.NaN, precip_prob_vals)))
+ precip_days = [prob > .25 for prob in precip_prob_vals]
+ return np.count_nonzero(precip_days), precip_prob_samples
+
+monthly_precip_days_vals = [[monthly_rainy_days(y, m)[0] for y in years] for m in months]
+monthly_precip_samples = [[monthly_rainy_days(y, m)[1] for y in years] for m in months]
+
+f = figure(title='Monthly Rainy Days',
+ x_range=(1990, 2015),
+ x_axis_label='Year')
+for x in range(0, len(months)):
+ f.line(years, monthly_precip_days_vals[x], legend=months_str[x], color=Palette[x])
+show(f)
+
+f = figure(title='Monthly Rainy Days Samples',
+ x_range=(1990, 2015),
+ x_axis_label='Year',
+ height=300)
+for x in range(0, len(months)):
+ f.line(years, monthly_precip_samples[x], legend=months_str[x], color=Palette[x])
+show(f)
+After trying to find the number of days that are rainy, we can see that November hit its max value for rainy days in 2015. However, that value is 6, as compared to a previous maximum of 5. While it is a new record, the value isn't actually all that different. And for other months, the values are mostly in-line with the averages.
+After having looked at forecast data for Cary, it appears that the months of July - November this year in terms of weather were at worst on par with prior years, if not slightly more sunny. This seems to be a case of confirmation bias: someone complains about a string of cloudy or rainy days, and suddenly you start noticing them more.
+While this analysis doesn't take into account other areas of North Carolina, my initial guess would be to assume that other areas also will show similar results: nothing interesting is happening. Maybe that will be for another blog post later!
+Coming soon: I'll compare rain/cloud conditions in North Carolina to some other places in the U.S.!
+ +The following code was generates the file that was used throughout the blog post. Please note that I'm retrieving data for other cities to use in a future blog post, only Cary data was used for this post.
+import pandas as pd
+from functools import reduce
+import requests
+from datetime import datetime
+
+# Coordinate data from http://itouchmap.com/latlong.html
+cary_loc = (35.79154,-78.781117)
+nyc_loc = (40.78306,-73.971249)
+seattle_loc = (47.60621,-122.332071)
+binghamton_loc = (42.098687,-75.917974)
+cities = {
+ 'cary': cary_loc,
+ 'nyc': nyc_loc,
+ 'seattle': seattle_loc,
+ 'binghamton': binghamton_loc
+}
+
+apikey = '' # My super-secret API Key
+
+def get_forecast(lat, long, date=None):
+ forecast_base = "https://api.forecast.io/forecast/"
+ if date is None:
+ url = forecast_base + apikey + '/{},{}'.format(lat, long)
+ else:
+ epoch = int(date.timestamp())
+ url = forecast_base + apikey + '/{},{},{}'.format(lat, long, epoch)
+
+ return requests.get(url).json()
+
+years = range(1990,2016)
+# For datetimes, the 12 is for getting the weather at noon.
+# We're doing this over midnight because we're more concerned
+# with what people see, and people don't typically see the weather
+# at midnight.
+dt_indices = [pd.date_range(start=datetime(year, 7, 1, 12),
+ end=datetime(year, 11, 30, 12))
+ for year in years]
+dt_merge = reduce(lambda x, y: x.union(y), dt_indices)
+
+# Because we have to pay a little bit to use the API, we use for loops here
+# instead of a comprehension - if something breaks, we want to preserve the
+# data already retrieved
+city_forecasts = {}
+for city, loc in cities.items():
+ print("Retrieving data for {} starting at {}".format(city,
+ datetime.now().strftime("%I:%M:%S %p")))
+ for dt in dt_merge:
+ try:
+ city_forecasts[(city, dt)] = get_forecast(*loc, dt)
+ except Exception as e:
+ print(e)
+ city_forecasts[(city, dt)] = None
+print("End forecast retrieval: {}".format(datetime.now().strftime("%I:%M:%S %p")))
+
+import pickle
+pickle.dump(city_forecasts, open('city_forecasts.p', 'wb'))
+
+### Output:
+# Retrieving data for binghamton starting at 05:13:42 PM
+# Retrieving data for seattle starting at 05:30:51 PM
+# Retrieving data for nyc starting at 05:48:30 PM
+# Retrieving data for cary starting at 06:08:32 PM
+# End forecast retrieval: 06:25:21 PM
+