Commit 94de38d9 authored by Tom Marshall's avatar Tom Marshall

added more plot functions and a second tutorial for using them

parent 81ee4801
% download socio-economic status dataset from kaggle...
% https://mwww.kaggle.com/sdorius/countryses
% insert your path to the plotting scripts here
addpath('/Users/marshall/Documents/MATLAB/plotting/');
% insert your path to the data here
datadir = '/Users/marshall/Desktop';
fn = 'GLOBCSES.Final20170714.csv';
format_spec = '%f%C%C%f%f%C%f%f%C%C';
d = readtable(fullfile(datadir, fn), 'Delimiter', ',' , ...
'Format', format_spec);
%% get nice colours from color brewer
% (https://uk.mathworks.com/matlabcentral/fileexchange/34087-cbrewer---colorbrewer-schemes-for-matlab)
[cb] = cbrewer('qual', 'Set1', 10, 'pchip');
%% pull relevant variables out of table
% column vectors from the table columns
yr = table2array(d(:,4));
ses = table2array(d(:,5));
gdp_ppc = table2array(d(:,7));
yrs_educ = table2array(d(:,8));
region = table2array(d(:,10));
% logicals to determine region
is_eur = (region == 'East Europe' | region == 'West Europe' | ...
region == 'South Europe' | region == 'North Europe' );
is_afr = (region == 'East Africa' | region == 'West Africa' | ...
region == 'South Africa' | region == 'North Africa' | ...
region == 'Middle Africa');
is_amr = (region == 'South America' | region == 'Central America' | ...
region == 'Caribbean');
is_asi = (region == 'East Asia' | region == 'West Asia' | ...
region == 'South Asia' | region == 'Southeast Asia');
%% example of a two-d raincloud
yrs_to_select = [1950 2010];
nyrs = length(yrs_to_select);
data = [];
for i = 1:nyrs
target_yr = yrs_to_select(i);
amrdata = is_amr & yr == target_yr;
data{i,1}(2,:) = ses(amrdata);
data{i,1}(1,:) = log10(gdp_ppc(amrdata));
end
clz = cb([2 8],:);
% plot
figure('units', 'normalized', 'outerposition', [0 0 1 1])
[h, ax] = twod_raincloud(data, clz, 0);
h.s{1}.SizeData = 180;
h.s{2}.SizeData = 180;
h.s{3}.SizeData = 180;
h.s{4}.SizeData = 180;
xlabel(ax(1), 'log10 per-capita GDP ($)');
ylabel(ax(1), 'socio-economic status');
legend(ax(1), {'1950';'2010'}, 'Location', 'SouthEast');
set(ax(1), 'FontSize', 14);
%% example of n-rainclouds
clear h
yrs_to_select = [1950 1980 2010];
nyrs = length(yrs_to_select);
clz = cb([3 5],:);
figure('units', 'normalized', 'outerposition', [0.05 0.05 0.3 0.6])
for i = 1:nyrs
target_yr = yrs_to_select(i);
data = [];
data{1} = yrs_educ(is_eur & yr == target_yr);
data{2} = yrs_educ(is_asi & yr == target_yr);
subplot(1, nyrs, i)
h(i) = n_rainclouds(data, clz);
set(gca, 'XLim', [0 14]);
% flip plot so 'x' axis is vertical and 'y' is horizontal
view([-90 -90]);
set(gca, 'Xdir', 'reverse');
% labels
title(num2str(target_yr));
if i == 1
xlabel('education (years)');
end
end
legend({'europe';'asia'}, 'Location', 'SouthEast');
%% another example of nrainclouds
yrs_to_select = [1970 1990 2010];
nyrs = length(yrs_to_select);
data = [];
for i = 1:nyrs
target_yr = yrs_to_select(i);
data{i} = yrs_educ(yr == target_yr);
end
clz = cbrewer('qual', 'Set1', 3);
figure('units', 'normalized', 'outerposition', [0.55 0.05 0.6 0.4])
n_rainclouds(data, clz);
set(gca, 'XLim', [-1 14]);
legend({'1970';'1990';'2010'});
xlabel('education (years)');
\ No newline at end of file
%% simultaneous plot of N rainclouds -
% plots density plot, boxplot, and 1-d scatter concurrently for several
% data vectors
% Use as h = n_rainclouds(X, cl),
% where X is an array of N cells and cl is an N x 3 array of RGB values.
% h is a cell array of handles for the various figure parts.
function h = n_rainclouds(X, cl)
% clouds
for i = 1:length(X)
[f{i}, xi{i}] = ksdensity(X{i});
% density plot
h.dx{i} = area(xi{i}, f{i}); hold on
set(h.dx{i}, 'FaceColor', cl(i,:));
set(h.dx{i}, 'EdgeColor', 'none');
set(h.dx{i}, 'FaceAlpha', 0.5);
end
% make some space under the clouds for the drops
yl = get(gca, 'YLim');
set(gca, 'YLim', [-yl(2) yl(2)]);
% width of boxplot / drops
wdth = yl(2)*0.5;
% raindrops
for i = 1:length(X)
% jitter for raindrops
jit{i} = (rand(size(X{i})) - 0.5) * wdth;
% raindrops
h.dr{i} = scatter(X{i}, jit{i} - yl(2)/2);
h.dr{i}.SizeData = 32;
h.dr{i}.MarkerFaceColor = cl(i,:);
h.dr{i}.MarkerEdgeColor = 'none';
set(h.dr{i},'MarkerFaceAlpha', 0.5);
end
% mean and quantiles
for i = 1:length(X)
Y{i} = quantile(X{i}, [0.25 0.75 0.5 0.02 0.98]);
offset(i) = (wdth * 0.1) * (i-1) * length(X);
h.bx{i,1} = line([Y{i}(1) Y{i}(2)], wdth - [offset(i) offset(i)], 'col', 'k', 'LineWidth', 2);
h.bx{i,2} = scatter(mean(X{i}),wdth - offset(i), 120, cl(i,:));
h.bx{i,2}.MarkerFaceColor = cl(i,:);
h.bx{i,2}.MarkerEdgeColor = [0 0 0];
h.bx{i,2}.LineWidth = 2;
end
%% two-d raincloud plot -
% plots kernel densities, 2-d scatterplot of data and prediction intervals
% use like:
% h = twod_raincloud(X, cl)
% where X is a vector of n cells with the data values (2 rows)
% and cl is a 3 x n matrix of RGB values
% 'plot_lsline' adds the linear fit line to the data (default = 0);
% set 'color_face' to 0 to remove the shading on the prediction interval
% plots (default = 1);
function [h, ax] = twod_raincloud(X, cl, plot_lsline, color_face)
% defaults
if ~exist('color_face','var')
color_face = 1;
end
if ~exist('plot_lsline','var')
plot_lsline = 0;
end
% scatterplot of the two variables
ax(1) = subplot(4, 4, [2 3 4 6 7 8 10 11 12]); hold on
for i = 1:length(X)
h.s{i} = scatter(X{i}(1,:), X{i}(2,:));
h.s{i}.SizeData = 32;
h.s{i}.MarkerFaceColor = cl(i,:);
h.s{i}.MarkerFaceAlpha = 0.5;
h.s{i}.MarkerEdgeColor = 'none';
end
% least squares lines
if plot_lsline
h.lsl = lsline;
for i = 1:length(X)
h.lsl(end + 1 - i).LineWidth = 2;
h.lsl(end + 1 - i).Color = [cl(i,:) 0.2];
h.lsl(end + 1 - i).LineStyle = '--';
end
end
% get limits so we can set density plot to same limits
xl = get(gca, 'XLim');
yl = get(gca, 'YLim');
% plot the prediction intervals
for i=1:length(X)
xvals = linspace(xl(1), xl(2), 10);
ft = fit(X{i}(1, :)', X{i}(2, :)', 'poly1');
predi = predint(ft, xvals, 0.95, 'functional', 'off');
h.l{i} = plot(ax(1), xvals, predi, 'col', [cl(i, :) 0.2], 'LineStyle', '--','LineWidth',2);
x2 = [xvals fliplr(xvals)];
predi2 = [predi(:,1); flipud(predi(:,2))];
if color_face
h.f{i} = fill(x2,predi2,cl(i,:));
h.f{i}.FaceAlpha = 0.05;
h.f{i}.LineStyle = 'none';
end
end
set(gca, 'XLim', xl);
set(gca, 'YLim', yl);
hold off
% x-axis density plots
ax(2) = subplot(4, 4, [14 15 16]);
% calculate kernel densities
for i = 1:length(X)
[f{i}, xi{i}] = ksdensity(X{i}(1,:));
% density plots
h.dx{i} = area(xi{i}, f{i}); hold on
set(h.dx{i}, 'FaceColor', cl(i,:));
set(h.dx{i}, 'EdgeColor', 'none');
set(h.dx{i}, 'FaceAlpha', 0.5);
end
% flip upside down and set limits correctly
set(gca, 'Ydir', 'reverse');
set(gca, 'XLim', xl);
set(gca, 'Visible', 'off');
hold off
% y-axis density plots
ax(3) = subplot(4, 4, [1 5 9]);
% calculate kernel densities
for i = 1:length(X)
[f{i}, xi{i}] = ksdensity(X{i}(2,:));
% density plots
h.dy{i} = area(xi{i}, f{i}); hold on
set(h.dy{i}, 'FaceColor', cl(i,:));
set(h.dy{i}, 'EdgeColor', 'none');
set(h.dy{i}, 'FaceAlpha', 0.5);
end
% rotate this and set the limits correctly
set(gca, 'XLim', yl);
view([-90 -90])
set(gca, 'Xdir', 'reverse');
set(gca, 'Visible', 'off');
hold off
end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment