Commit 4864ddad authored by Sean Fitzgibbon's avatar Sean Fitzgibbon Committed by Paul McCarthy
Browse files

Initial commit of bokeh examples

parent 21ccc198
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# `bokeh`\n",
"\n",
"[`bokeh`](https://docs.bokeh.org/en/latest/index.html) is a Python library for creating interactive visualizations for modern web browsers. `bokeh` allows you to create these interactive web-based plots without having to code in javascript.\n",
"\n",
"`bokeh` has excellent documentation: https://docs.bokeh.org/en/latest/index.html\n",
"\n",
"This notebook is not intended to instruct you how to use `bokeh`. Instead it pulls together interesting examples from the `bokeh` documentation into a single notebook to give you a taster of what can be done with `bokeh`.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bokeh.plotting import figure, output_file, show\n",
"from bokeh.io import output_notebook\n",
"output_notebook()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bokeh import sampledata\n",
"sampledata.download()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Scatter Plots\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/iris.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bokeh.sampledata.iris import flowers\n",
"\n",
"colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}\n",
"colors = [colormap[x] for x in flowers['species']]\n",
"\n",
"p = figure(title = \"Iris Morphology\")\n",
"p.xaxis.axis_label = 'Petal Length'\n",
"p.yaxis.axis_label = 'Petal Width'\n",
"\n",
"p.circle(flowers[\"petal_length\"], flowers[\"petal_width\"],\n",
" color=colors, fill_alpha=0.2, size=10)\n",
"\n",
"# output_file(\"iris.html\", title=\"iris.py example\")\n",
"\n",
"show(p)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Line Plots\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/box_annotation.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bokeh.models import BoxAnnotation\n",
"from bokeh.sampledata.glucose import data\n",
"\n",
"TOOLS = \"pan,wheel_zoom,box_zoom,reset,save\"\n",
"\n",
"data = data.loc['2010-10-04':'2010-10-04']\n",
"\n",
"p = figure(x_axis_type=\"datetime\", tools=TOOLS, title=\"Glocose Readings, Oct 4th (Red = Outside Range)\")\n",
"p.background_fill_color = \"#efefef\"\n",
"p.xgrid.grid_line_color=None\n",
"p.xaxis.axis_label = 'Time'\n",
"p.yaxis.axis_label = 'Value'\n",
"\n",
"p.line(data.index, data.glucose, line_color='grey')\n",
"p.circle(data.index, data.glucose, color='grey', size=1)\n",
"\n",
"p.add_layout(BoxAnnotation(top=80, fill_alpha=0.1, fill_color='red', line_color='red'))\n",
"p.add_layout(BoxAnnotation(bottom=180, fill_alpha=0.1, fill_color='red', line_color='red'))\n",
"\n",
"show(p)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Bar Charts\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/bar_stacked.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bokeh.palettes import Spectral5\n",
"from bokeh.sampledata.autompg import autompg_clean as df\n",
"from bokeh.transform import factor_cmap\n",
"\n",
"df.cyl = df.cyl.astype(str)\n",
"df.yr = df.yr.astype(str)\n",
"\n",
"group = df.groupby(['cyl', 'mfr'])\n",
"\n",
"index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)\n",
"\n",
"p = figure(plot_width=800, plot_height=500, title=\"Mean MPG by # Cylinders and Manufacturer\",\n",
" x_range=group, toolbar_location=None, tooltips=[(\"MPG\", \"@mpg_mean\"), (\"Cyl, Mfr\", \"@cyl_mfr\")])\n",
"\n",
"p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=group,\n",
" line_color=\"white\", fill_color=index_cmap, )\n",
"\n",
"p.y_range.start = 0\n",
"p.x_range.range_padding = 0.05\n",
"p.xgrid.grid_line_color = None\n",
"p.xaxis.axis_label = \"Manufacturer grouped by # Cylinders\"\n",
"p.xaxis.major_label_orientation = 1.2\n",
"p.outline_line_color = None\n",
"\n",
"show(p)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Distribution Plots\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/histogram.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import scipy.special\n",
"\n",
"from bokeh.layouts import gridplot\n",
"\n",
"\n",
"def make_plot(title, hist, edges, x, pdf, cdf):\n",
" p = figure(title=title, tools='', background_fill_color=\"#fafafa\")\n",
" p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],\n",
" fill_color=\"navy\", line_color=\"white\", alpha=0.5)\n",
" p.line(x, pdf, line_color=\"#ff8888\", line_width=4, alpha=0.7, legend_label=\"PDF\")\n",
" p.line(x, cdf, line_color=\"orange\", line_width=2, alpha=0.7, legend_label=\"CDF\")\n",
"\n",
" p.y_range.start = 0\n",
" p.legend.location = \"center_right\"\n",
" p.legend.background_fill_color = \"#fefefe\"\n",
" p.xaxis.axis_label = 'x'\n",
" p.yaxis.axis_label = 'Pr(x)'\n",
" p.grid.grid_line_color=\"white\"\n",
" return p\n",
"\n",
"# Normal Distribution\n",
"\n",
"mu, sigma = 0, 0.5\n",
"\n",
"measured = np.random.normal(mu, sigma, 1000)\n",
"hist, edges = np.histogram(measured, density=True, bins=50)\n",
"\n",
"x = np.linspace(-2, 2, 1000)\n",
"pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))\n",
"cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2\n",
"\n",
"p1 = make_plot(\"Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n",
"\n",
"# Log-Normal Distribution\n",
"\n",
"mu, sigma = 0, 0.5\n",
"\n",
"measured = np.random.lognormal(mu, sigma, 1000)\n",
"hist, edges = np.histogram(measured, density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 8.0, 1000)\n",
"pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))\n",
"cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2\n",
"\n",
"p2 = make_plot(\"Log Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n",
"\n",
"# Gamma Distribution\n",
"\n",
"k, theta = 7.5, 1.0\n",
"\n",
"measured = np.random.gamma(k, theta, 1000)\n",
"hist, edges = np.histogram(measured, density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 20.0, 1000)\n",
"pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))\n",
"cdf = scipy.special.gammainc(k, x/theta)\n",
"\n",
"p3 = make_plot(\"Gamma Distribution (k=7.5, θ=1)\", hist, edges, x, pdf, cdf)\n",
"\n",
"# Weibull Distribution\n",
"\n",
"lam, k = 1, 1.25\n",
"measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)\n",
"hist, edges = np.histogram(measured, density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 8, 1000)\n",
"pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)\n",
"cdf = 1 - np.exp(-(x/lam)**k)\n",
"\n",
"p4 = make_plot(\"Weibull Distribution (λ=1, k=1.25)\", hist, edges, x, pdf, cdf)\n",
"\n",
"show(gridplot([p1,p2,p3,p4], ncols=2, plot_width=400, plot_height=400, toolbar_location=None))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Boxplot\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/boxplot.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"# generate some synthetic time series for six different categories\n",
"cats = list(\"abcdef\")\n",
"yy = np.random.randn(2000)\n",
"g = np.random.choice(cats, 2000)\n",
"for i, l in enumerate(cats):\n",
" yy[g == l] += i // 2\n",
"df = pd.DataFrame(dict(score=yy, group=g))\n",
"\n",
"# find the quartiles and IQR for each category\n",
"groups = df.groupby('group')\n",
"q1 = groups.quantile(q=0.25)\n",
"q2 = groups.quantile(q=0.5)\n",
"q3 = groups.quantile(q=0.75)\n",
"iqr = q3 - q1\n",
"upper = q3 + 1.5*iqr\n",
"lower = q1 - 1.5*iqr\n",
"\n",
"# find the outliers for each category\n",
"def outliers(group):\n",
" cat = group.name\n",
" return group[(group.score > upper.loc[cat]['score']) | (group.score < lower.loc[cat]['score'])]['score']\n",
"out = groups.apply(outliers).dropna()\n",
"\n",
"# prepare outlier data for plotting, we need coordinates for every outlier.\n",
"if not out.empty:\n",
" outx = list(out.index.get_level_values(0))\n",
" outy = list(out.values)\n",
"\n",
"p = figure(tools=\"\", background_fill_color=\"#efefef\", x_range=cats, toolbar_location=None)\n",
"\n",
"# if no outliers, shrink lengths of stems to be no longer than the minimums or maximums\n",
"qmin = groups.quantile(q=0.00)\n",
"qmax = groups.quantile(q=1.00)\n",
"upper.score = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,'score']),upper.score)]\n",
"lower.score = [max([x,y]) for (x,y) in zip(list(qmin.loc[:,'score']),lower.score)]\n",
"\n",
"# stems\n",
"p.segment(cats, upper.score, cats, q3.score, line_color=\"black\")\n",
"p.segment(cats, lower.score, cats, q1.score, line_color=\"black\")\n",
"\n",
"# boxes\n",
"p.vbar(cats, 0.7, q2.score, q3.score, fill_color=\"#E08E79\", line_color=\"black\")\n",
"p.vbar(cats, 0.7, q1.score, q2.score, fill_color=\"#3B8686\", line_color=\"black\")\n",
"\n",
"# whiskers (almost-0 height rects simpler than segments)\n",
"p.rect(cats, lower.score, 0.2, 0.01, line_color=\"black\")\n",
"p.rect(cats, upper.score, 0.2, 0.01, line_color=\"black\")\n",
"\n",
"# outliers\n",
"if not out.empty:\n",
" p.circle(outx, outy, size=6, color=\"#F38630\", fill_alpha=0.6)\n",
"\n",
"p.xgrid.grid_line_color = None\n",
"p.ygrid.grid_line_color = \"white\"\n",
"p.grid.grid_line_width = 2\n",
"p.xaxis.major_label_text_font_size=\"16px\"\n",
"\n",
"show(p)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Connectivity Matrix\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/les_mis.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from bokeh.sampledata.les_mis import data\n",
"\n",
"nodes = data['nodes']\n",
"names = [node['name'] for node in sorted(data['nodes'], key=lambda x: x['group'])]\n",
"\n",
"N = len(nodes)\n",
"counts = np.zeros((N, N))\n",
"for link in data['links']:\n",
" counts[link['source'], link['target']] = link['value']\n",
" counts[link['target'], link['source']] = link['value']\n",
"\n",
"colormap = [\"#444444\", \"#a6cee3\", \"#1f78b4\", \"#b2df8a\", \"#33a02c\", \"#fb9a99\",\n",
" \"#e31a1c\", \"#fdbf6f\", \"#ff7f00\", \"#cab2d6\", \"#6a3d9a\"]\n",
"\n",
"xname = []\n",
"yname = []\n",
"color = []\n",
"alpha = []\n",
"for i, node1 in enumerate(nodes):\n",
" for j, node2 in enumerate(nodes):\n",
" xname.append(node1['name'])\n",
" yname.append(node2['name'])\n",
"\n",
" alpha.append(min(counts[i,j]/4.0, 0.9) + 0.1)\n",
"\n",
" if node1['group'] == node2['group']:\n",
" color.append(colormap[node1['group']])\n",
" else:\n",
" color.append('lightgrey')\n",
"\n",
"data=dict(\n",
" xname=xname,\n",
" yname=yname,\n",
" colors=color,\n",
" alphas=alpha,\n",
" count=counts.flatten(),\n",
")\n",
"\n",
"p = figure(title=\"Les Mis Occurrences\",\n",
" x_axis_location=\"above\", tools=\"hover,save\",\n",
" x_range=list(reversed(names)), y_range=names,\n",
" tooltips = [('names', '@yname, @xname'), ('count', '@count')])\n",
"\n",
"p.plot_width = 800\n",
"p.plot_height = 800\n",
"p.grid.grid_line_color = None\n",
"p.axis.axis_line_color = None\n",
"p.axis.major_tick_line_color = None\n",
"p.axis.major_label_text_font_size = \"7px\"\n",
"p.axis.major_label_standoff = 0\n",
"p.xaxis.major_label_orientation = np.pi/3\n",
"\n",
"p.rect('xname', 'yname', 0.9, 0.9, source=data,\n",
" color='colors', alpha='alphas', line_color=None,\n",
" hover_line_color='black', hover_color='colors')\n",
"\n",
"show(p) # show the plot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sliders\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/slider.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from bokeh.layouts import column, row\n",
"from bokeh.models import CustomJS, Slider\n",
"from bokeh.plotting import ColumnDataSource\n",
"\n",
"x = np.linspace(0, 10, 500)\n",
"y = np.sin(x)\n",
"\n",
"source = ColumnDataSource(data=dict(x=x, y=y))\n",
"\n",
"plot = figure(y_range=(-10, 10), plot_width=400, plot_height=400)\n",
"\n",
"plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)\n",
"\n",
"amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title=\"Amplitude\")\n",
"freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title=\"Frequency\")\n",
"phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title=\"Phase\")\n",
"offset_slider = Slider(start=-5, end=5, value=0, step=.1, title=\"Offset\")\n",
"\n",
"callback = CustomJS(args=dict(source=source, amp=amp_slider, freq=freq_slider, phase=phase_slider, offset=offset_slider),\n",
" code=\"\"\"\n",
" const data = source.data;\n",
" const A = amp.value;\n",
" const k = freq.value;\n",
" const phi = phase.value;\n",
" const B = offset.value;\n",
" const x = data['x']\n",
" const y = data['y']\n",
" for (var i = 0; i < x.length; i++) {\n",
" y[i] = B + A*Math.sin(k*x[i]+phi);\n",
" }\n",
" source.change.emit();\n",
"\"\"\")\n",
"\n",
"amp_slider.js_on_change('value', callback)\n",
"freq_slider.js_on_change('value', callback)\n",
"phase_slider.js_on_change('value', callback)\n",
"offset_slider.js_on_change('value', callback)\n",
"\n",
"layout = row(\n",
" plot,\n",
" column(amp_slider, freq_slider, phase_slider, offset_slider),\n",
")\n",
"\n",
"show(layout)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:markdown id: tags:
# `bokeh`
[`bokeh`](https://docs.bokeh.org/en/latest/index.html) is a Python library for creating interactive visualizations for modern web browsers. `bokeh` allows you to create these interactive web-based plots without having to code in javascript.
`bokeh` has excellent documentation: https://docs.bokeh.org/en/latest/index.html
This notebook is not intended to instruct you how to use `bokeh`. Instead it pulls together interesting examples from the `bokeh` documentation into a single notebook to give you a taster of what can be done with `bokeh`.
%% Cell type:code id: tags:
``` python
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook
output_notebook()
```
%% Cell type:code id: tags:
``` python
from bokeh import sampledata
sampledata.download()
```
%% Cell type:markdown id: tags:
## Scatter Plots
https://docs.bokeh.org/en/latest/docs/gallery/iris.html
%% Cell type:code id: tags:
``` python
from bokeh.sampledata.iris import flowers
colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
colors = [colormap[x] for x in flowers['species']]
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'
p.circle(flowers["petal_length"], flowers["petal_width"],
color=colors, fill_alpha=0.2, size=10)
# output_file("iris.html", title="iris.py example")
show(p)
```
%% Cell type:markdown id: tags:
## Line Plots
https://docs.bokeh.org/en/latest/docs/gallery/box_annotation.html
%% Cell type:code id: tags:
``` python
from bokeh.models import BoxAnnotation
from bokeh.sampledata.glucose import data
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
data = data.loc['2010-10-04':'2010-10-04']
p = figure(x_axis_type="datetime", tools=TOOLS, title="Glocose Readings, Oct 4th (Red = Outside Range)")
p.background_fill_color = "#efefef"
p.xgrid.grid_line_color=None
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Value'
p.line(data.index, data.glucose, line_color='grey')
p.circle(data.index, data.glucose, color='grey', size=1)
p.add_layout(BoxAnnotation(top=80, fill_alpha=0.1, fill_color='red', line_color='red'))
p.add_layout(BoxAnnotation(bottom=180, fill_alpha=0.1, fill_color='red', line_color='red'))
show(p)
```
%% Cell type:markdown id: tags:
# Bar Charts
https://docs.bokeh.org/en/latest/docs/gallery/bar_stacked.html
%% Cell type:code id: tags:
``` python
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(['cyl', 'mfr'])
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=800, plot_height=500, title="Mean MPG by # Cylinders and Manufacturer",
x_range=group, toolbar_location=None, tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")])
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=group,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
```
%% Cell type:markdown id: tags:
# Distribution Plots
https://docs.bokeh.org/en/latest/docs/gallery/histogram.html
%% Cell type:code id: tags:
``` python
import numpy as np
import scipy.special
from bokeh.layouts import gridplot
def make_plot(title, hist, edges, x, pdf, cdf):
p = figure(title=title, tools='', background_fill_color="#fafafa")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
fill_color="navy", line_color="white", alpha=0.5)
p.line(x, pdf, line_color="#ff8888", line_width=4, alpha=0.7, legend_label="PDF")
p.line(x, cdf, line_color="orange", line_width=2, alpha=0.7, legend_label="CDF")
p.y_range.start = 0
p.legend.location = "center_right"
p.legend.background_fill_color = "#fefefe"
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'Pr(x)'
p.grid.grid_line_color="white"
return p
# Normal Distribution
mu, sigma = 0, 0.5
measured = np.random.normal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)
x = np.linspace(-2, 2, 1000)
pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2
p1 = make_plot("Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)
# Log-Normal Distribution
mu, sigma = 0, 0.5
measured = np.random.lognormal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)
x = np.linspace(0.0001, 8.0, 1000)
pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2
p2 = make_plot("Log Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)
# Gamma Distribution
k, theta = 7.5, 1.0