Commit 7fe60517 authored by Soojin Lee's avatar Soojin Lee
Browse files

Update ica_prac.m

parent e3e53e07
%% SHORT INTRO TO ICA
% ICA as a 'projection-pursuit'
%
% Understanding data often means finding 'interesting' ways to
% look at the data
%
% This means projecting the data into 'interesting' directions
%
% If X is the data matrix (d dimension x n samples)
%
% then w'X is the projection along dimension w
%
% PCA finds the direction where var(w'X) is maximal
% ICA finds the direction where w'X is maximally non-Gaussian
% this makes sense because being Gaussian is boring (i.e. it is
% what happens by default in random stuff
% so deviations from Gaussianity are interesting
% quantifying non-Gaussianity in a way that is differentiable (so
% we can optimise it) is what FastICA does
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 1. Generate a points cloud in 2d
rng('default') % for replication
n = 400; % number of samples
d = 2; % dimensionality
w1 = randn(d,1);
w1 = w1/norm(w1); % direction 1 is random
ang = 30; % rotation angle
rot = [cosd(ang) sind(ang); %rotation matrix
-sind(ang) cosd(ang)];
w2 = rot*w1; % direction 2 is rotated re direction 1
% Generate data along the two directions
% (multivariate Gaussians spread a little along w1 and w2
x1 = mvnrnd([4;1],w1*w1',n/2) + .1*randn(n/2,2);
x2 = mvnrnd([4;1],w2*w2',n/2) + .1*randn(n/2,2);
% Exercise: try this again
% but with two point clouds aligned
% with each other but with different means
% x1 = mvnrnd(w2,w1*w1',n/2) + .1*randn(n/2,2);
% x2 = mvnrnd(-w2,w1*w1',n/2) + .1*randn(n/2,2);
% Concatenate the two clouds of data
x = [x1;x2]';
% optionally add an outlier
% x(:,1) = randn(d,1)*5;
% do a quick plot to visualise the data
% which directions do you find most interesting?
figure
plot(x(1,:),x(2,:),'o')
grid on
axis equal
%% 2. Pre-processing
% Centering (mean(data)=0 along each dimension)
C = eye(n) - ones(n,1)*ones(1,n)/n;
x_c = x*C;
% % A more efficient alternative is to simply call 'mean'
% x_c = x - mean(x,2);
% Pre-whitening (such that var(data)=1 along each dimension)
% This means that PCA becomes irrelevant: all directions have the same
% variance
[V,D] = eig(cov(x'));
Q = V*diag(1./sqrt(diag(D)))*V';
x_w = Q*x_c;
% quick plot
figure,
subplot(1,2,1),hold on
plot(x(1,:),x(2,:),'o')
plot(x_c(1,:),x_c(2,:),'o')
grid on
title('Centering')
legend({'before','after'})
axis equal
subplot(1,2,2),hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(x_w(1,:),x_w(2,:),'o')
grid on
title('Prewhitening')
legend({'before','after'})
axis equal
% check that mean = 0
mean(x_c,2)
%% quick check that the variance is 1 along all directions
addpath ./utils
G = @(x)(x.^2);
figure
subplot(1,2,1), plot_cf(x_c,G)
legend({'data','variance'}), title('Before whitening');
subplot(1,2,2), plot_cf(x_w,G)
legend({'data','variance'}), title('After whitening');
%% random projections
figure
% 10 random projections
% (press any key to see the next projection)
for i = 1:10
w = randn(d,1); w = w/norm(w)*5;
subplot(1,2,1)
plot(x_w(1,:),x_w(2,:),'o'); hold on
axis equal
grid on
quiver(0,0,w(1),w(2),'linewidth',2,'color','r'); hold off
subplot(1,2,2)
histogram(w'*x_w)
pause
end
%% quick PCA
% You should be able to see that PCA picks up directions of high variance
% which do not in this case coincide with the interesting directions
% in this data
[V,~] = eig(cov(x'));
figure
% plot data
plot(x(1,:),x(2,:),'o'); hold on
% plot 1st PC direction in RED (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,2),V(2,2),'linewidth',3,'color','r')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,2),-V(2,2),'linewidth',3,'color','r')
% plot 2nd PC direction in GREEN (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,1),V(2,1),'linewidth',3,'color','g')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,1),-V(2,1),'linewidth',3,'color','g')
axis equal
grid on
%% ICA
% contrast functions
% these are functions which, when applied to the data, 'highlight' the
% non-Gaussianities
% they also are supposed to approximate neg-entropy
G0 = @(x)(x.^2); % quadratic (do not use this!)
G1 = @(x)(1-exp(-x.^2)); % Gaussian --> integral(x*exp(-x^2/2)
G2 = @(x)(log(cosh(x))); % --> integral(tanh)
G3 = @(x)(x.^3); % kurtosis
G4 = @(x)(x.^4); % tailedness
G5 = @(x)(sqrt(G1(x)));
% put your own non-linearity in here!
G = G3;
% Let's plot these functions
Gall = {G0, G1, G2, G3, G4};
Gnames = {'x^2','Gaussian','tanh','x^3','x^4'};
xx = linspace(-15,15,1000);
figure
for i = 1:5
subplot(2,3,i)
g = Gall{i};
plot(xx, g(xx), 'linewidth',2)
legend(Gnames{i})
end
%% Calculate non-Gaussianity along
% different directions and plot it
% alongside the variance
angles = linspace(0,2*pi,1000); % angles to calculate cost function along
I = zeros(size(angles)); % Interestingness
E = zeros(size(angles)); % Extent (variance)
G = G4; % select contrast function
for i = 1:length(angles)
% get vector w along angle
[w_x, w_y] = pol2cart(angles(i),1);
w = [w_x;w_y];
% calculate non-Gaussianity along w
I(i) = mean(G(w'*x_w)).^2;
% calculate variance along w
E(i) = var(w'*x_w);
end
% quick plot of the cost functions
figure
plot(x_w(1,:),x_w(2,:),'o'); hold on
[xx,yy] = pol2cart(angles,I/norm(I)*40);
plot(xx,yy,'linewidth',2,'color','r')
[xx,yy] = pol2cart(angles,E/norm(E)*40);
plot(xx,yy,'linewidth',2,'color','g','linestyle','--')
axis equal
grid on
legend({'data','non-gaussianity','variance'})
%% Plot in de-whitened space
% generate bunch of points
[xx,yy] = pol2cart(angles,I/norm(I)*40);
% un-whiten with the whitening matrix Q
ww = Q\[xx;yy];
figure,hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(ww(1,:),ww(2,:),'linewidth',3,'color','r')
axis equal
grid on
%% Compare with the FastICA toolbox :)
% Can be downloaded from https://research.ics.aalto.fi/ica/fastica/
addpath ./utils/FastICA_25
% check the help to see how to change things like the contrast function g
% note that we are using x (fastica does pre-whitening internally)
[S, A, W] = fastica(x,'approach', 'symm','g','skew');
% I am not sure why I need to do the below
W = A';
% plot the results
figure
plot(x_c(1,:),x_c(2,:),'o'); hold on
quiver(0,0,W(1,1),W(1,2),'linewidth',3,'color','k')
quiver(0,0,-W(1,1),-W(1,2),'linewidth',3,'color','k')
quiver(0,0,W(2,1),W(2,2),'linewidth',3,'color','c')
quiver(0,0,-W(2,1),-W(2,2),'linewidth',3,'color','c')
axis equal
%% Quick implementation of the FastICA algorithm
d = size(x,1);
W = orth(randn(d,d)); % Random initial weights
delta = inf; % initialize delta (w_{i+1} - w_{i})
k = 0; % counting iteration
% maxiter = 1e4; % stop iteration if k = maxiter
tolerance = 1e-7; % stop iteration if delta < tolerance
while delta > tolerance
% while delta > tolerance && k < maxiter
k = k + 1; % counting iteration
% Update weights
Wlast = W; % Save last weights
Sk = W * x_w;
% skew
G = 2 * Sk.^3;
Gp = 6 * Sk.^2; % derivative of G
W = (G * x_w') / n - bsxfun(@times,mean(Gp,2),W);
W = W ./ sqrt(sum(W.^2,2));
% Decorrelate W's
[U, S, ~] = svd(W,'econ');
W = U * diag(1 ./ diag(S)) * U' * W;
% Update convergence criteria
delta = max(1 - abs(dot(W, Wlast, 2)));
end
% plot the results
figure
subplot(1,2,1)
W1 = W';
plot(x_w(1,:),x_w(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c')
axis equal
title('Whitened space')
subplot(1,2,2)
W1 = W'*pinv(Q);
plot(x_c(1,:),x_c(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c'); hold off
axis equal
title('De-whitened space')
%% SHORT INTRO TO ICA
% ICA as a 'projection-pursuit'
%
% Understanding data often means finding 'interesting' ways to
% look at the data
%
% This means projecting the data into 'interesting' directions
%
% If X is the data matrix (d dimension x n samples)
%
% then w'X is the projection along dimension w
%
% PCA finds the direction where var(w'X) is maximal
% ICA finds the direction where w'X is maximally non-Gaussian
% this makes sense because being Gaussian is boring (i.e. it is
% what happens by default in random stuff
% so deviations from Gaussianity are interesting
% quantifying non-Gaussianity in a way that is differentiable (so
% we can optimise it) is what FastICA does
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 1. Generate a points cloud in 2d
rng('default') % for replication
n = 400; % number of samples
d = 2; % dimensionality
w1 = randn(d,1);
w1 = w1/norm(w1); % direction 1 is random
ang = 30; % rotation angle
rot = [cosd(ang) sind(ang); %rotation matrix
-sind(ang) cosd(ang)];
w2 = rot*w1; % direction 2 is rotated re direction 1
% Generate data along the two directions
% (multivariate Gaussians spread a little along w1 and w2
x1 = mvnrnd([4;1],w1*w1',n/2) + .1*randn(n/2,2);
x2 = mvnrnd([4;1],w2*w2',n/2) + .1*randn(n/2,2);
% Exercise: try this again
% but with two point clouds aligned
% with each other but with different means
% x1 = mvnrnd(w2,w1*w1',n/2) + .1*randn(n/2,2);
% x2 = mvnrnd(-w2,w1*w1',n/2) + .1*randn(n/2,2);
% Concatenate the two clouds of data
x = [x1;x2]';
% optionally add an outlier
% x(:,1) = randn(d,1)*5;
% do a quick plot to visualise the data
% which directions do you find most interesting?
figure
plot(x(1,:),x(2,:),'o')
grid on
axis equal
%% 2. Pre-processing
% Centering (mean(data)=0 along each dimension)
C = eye(n) - ones(n,1)*ones(1,n)/n;
x_c = x*C;
% % A more efficient alternative is to simply call 'mean'
% x_c = x - mean(x,2);
% Pre-whitening (such that var(data)=1 along each dimension)
% This means that PCA becomes irrelevant: all directions have the same
% variance
[V,D] = eig(cov(x'));
Q = V*diag(1./sqrt(diag(D)))*V';
x_w = Q*x_c;
% quick plot
figure,
subplot(1,2,1),hold on
plot(x(1,:),x(2,:),'o')
plot(x_c(1,:),x_c(2,:),'o')
grid on
title('Centering')
legend({'before','after'})
axis equal
subplot(1,2,2),hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(x_w(1,:),x_w(2,:),'o')
grid on
title('Prewhitening')
legend({'before','after'})
axis equal
% check that mean = 0
mean(x_c,2)
%% quick check that the variance is 1 along all directions
addpath ./utils
G = @(x)(x.^2);
figure
subplot(1,2,1), plot_cf(x_c,G)
legend({'data','variance'}), title('Before whitening');
subplot(1,2,2), plot_cf(x_w,G)
legend({'data','variance'}), title('After whitening');
%% random projections
figure
% 10 random projections
% (press any key to see the next projection)
for i = 1:10
w = randn(d,1); w = w/norm(w)*5;
subplot(1,2,1)
plot(x_w(1,:),x_w(2,:),'o'); hold on
axis equal; grid on
quiver(0,0,w(1),w(2),'linewidth',2,'color','r'); hold off
title(sprintf('%d/10 random projection',i))
subplot(1,2,2)
histogram(w'*x_w)
pause
end
%% quick PCA
% You should be able to see that PCA picks up directions of high variance
% which do not in this case coincide with the interesting directions
% in this data
[V,~] = eig(cov(x'));
figure
% plot data
plot(x(1,:),x(2,:),'o'); hold on
% plot 1st PC direction in RED (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,2),V(2,2),'linewidth',3,'color','r')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,2),-V(2,2),'linewidth',3,'color','r')
% plot 2nd PC direction in GREEN (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,1),V(2,1),'linewidth',3,'color','g')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,1),-V(2,1),'linewidth',3,'color','g')
axis equal
grid on
%% ICA
% contrast functions
% these are functions which, when applied to the data, 'highlight' the
% non-Gaussianities
% they also are supposed to approximate neg-entropy
G0 = @(x)(x.^2); % quadratic (do not use this!)
G1 = @(x)(1-exp(-x.^2)); % Gaussian --> integral(x*exp(-x^2/2)
G2 = @(x)(log(cosh(x))); % --> integral(tanh)
G3 = @(x)(x.^3); % skewness
G4 = @(x)(x.^4); % tailedness
G5 = @(x)(sqrt(G1(x)));
% Let's plot these functions
Gall = {G0, G1, G2, G3, G4};
Gnames = {'x^2','Gaussian','tanh','x^3','x^4'};
xx = linspace(-15,15,1000);
figure
for i = 1:5
subplot(2,3,i)
g = Gall{i};
plot(xx, g(xx), 'linewidth',2)
legend(Gnames{i})
end
%% Calculate non-Gaussianity along
% different directions and plot it
% alongside the variance
angles = linspace(0,2*pi,1000); % angles to calculate cost function along
I = zeros(size(angles)); % Interestingness
E = zeros(size(angles)); % Extent (variance)
% put your own non-linearity in here!
G = G4;
for i = 1:length(angles)
% get vector w along angle
[w_x, w_y] = pol2cart(angles(i),1);
w = [w_x;w_y];
% calculate non-Gaussianity along w
I(i) = mean(G(w'*x_w)).^2;
% calculate variance along w
E(i) = var(w'*x_w);
end
% quick plot of the cost functions
figure
plot(x_w(1,:),x_w(2,:),'o'); hold on
[xx,yy] = pol2cart(angles,I/norm(I)*40);
plot(xx,yy,'linewidth',2,'color','r')
[xx,yy] = pol2cart(angles,E/norm(E)*40);
plot(xx,yy,'linewidth',2,'color','g','linestyle','--')
axis equal
grid on
legend({'data','non-gaussianity','variance'})
%% Plot in de-whitened space
% generate bunch of points
[xx,yy] = pol2cart(angles,I/norm(I)*40);
% un-whiten with the whitening matrix Q
ww = Q\[xx;yy];
figure,hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(ww(1,:),ww(2,:),'linewidth',3,'color','r')
axis equal
grid on
%% Compare with the FastICA toolbox :)
% Can be downloaded from https://research.ics.aalto.fi/ica/fastica/
addpath ./utils/FastICA_25
% check the help to see how to change things like the contrast function g
% note that we are using x (fastica does pre-whitening internally)
% S = source signals, A = mixing matrix, W = unmixing matrix
[S, A, W] = fastica(x_c,'approach', 'symm','g','skew');
mixedS = A*S; % mixture of source signals
% plot the results
figure
subplot(1,3,1)
plot(S(1,:),S(2,:),'o'); hold on
quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
xlim([-5,5]); ylim([-6,6])
title('Estimated Source')
subplot(1,3,2)
plot(mixedS(1,:),mixedS(2,:),'o'); hold on
quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
xlim([-4,4]); ylim([-5,5])
title('Mixed Source')
subplot(1,3,3)
plot(x_c(1,:),x_c(2,:),'o'); hold on
quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
xlim([-4,4]); ylim([-5,5])
title('Original centered data')
%% Quick implementation of the FastICA algorithm
d = size(x,1);
W = orth(randn(d,d)); % Random initial weights
delta = inf; % initialize delta (w_{i+1} - w_{i})
k = 0; % counting iteration
% maxiter = 1e4; % stop iteration if k = maxiter
tolerance = 1e-7; % stop iteration if delta < tolerance
while delta > tolerance
% while delta > tolerance && k < maxiter
k = k + 1; % counting iteration
% Update weights
Wlast = W; % Save last weights
Sk = W * x_w;
% skew
G = 2 * Sk.^3;
Gp = 6 * Sk.^2; % derivative of G
W = (G * x_w') / n - bsxfun(@times,mean(Gp,2),W);
W = W ./ sqrt(sum(W.^2,2));
% Decorrelate W's
[U, S, ~] = svd(W,'econ');