Commit 71cfec62 authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

Initial commit

parent e3d62a38
%% SHORT INTRO TO ICA
% ICA as a 'projection-pursuit'
%
% Understanding data often means finding 'interesting' ways to
% look at the data
%
% This means projecting the data into 'interesting' directions
%
% If X is the data matrix (d dimension x n samples)
%
% then w'X is the projection along dimension w
%
% PCA finds the direction where var(w'X) is maximal
% ICA finds the direction where w'X is maximally non-Gaussian
% this makes sense because being Gaussian is boring (i.e. it is
% what happens by default in random stuff
% so deviations from Gaussianity are interesting
% quantifying non-Gaussianity in a way that is differentiable (so
% we can optimise it) is what FastICA does
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 1. Generate a points cloud in 2d
rng('default') % for replication
n = 400; % number of samples
d = 2; % dimensionality
w1 = randn(d,1);
w1 = w1/norm(w1); % direction 1 is random
ang = 30; % rotation angle
rot = [cosd(ang) sind(ang); %rotation matrix
-sind(ang) cosd(ang)];
w2 = rot*w1; % direction 2 is rotated re direction 1
% Generate data along the two directions
% (multivariate Gaussians spread a little along w1 and w2
x1 = mvnrnd([4;1],w1*w1',n/2) + .1*randn(n/2,2);
x2 = mvnrnd([4;1],w2*w2',n/2) + .1*randn(n/2,2);
% Exercise: try this again
% but with two point clouds aligned
% with each other but with different means
% x1 = mvnrnd(w2,w1*w1',n/2) + .1*randn(n/2,2);
% x2 = mvnrnd(-w2,w1*w1',n/2) + .1*randn(n/2,2);
% Concatenate the two clouds of data
x = [x1;x2]';
% optionally add an outlier
% x(:,1) = randn(d,1)*5;
% do a quick plot to visualise the data
% which directions do you find most interesting?
figure
plot(x(1,:),x(2,:),'o')
grid on
axis equal
%% 2. Pre-processing
% Centering (mean(data)=0 along each dimension)
C = eye(n) - ones(n,1)*ones(1,n)/n;
x_c = x*C;
% % optionally, we can use call 'mean'
% x_c = x - mean(x,2);
% Pre-whitening (var(data)=1 along each dimension)
% This means that PCA becomes irrelevant: all direction have the same
% variance
[V,D] = eig(cov(x'));
Q = V*diag(1./sqrt(diag(D)))*V';
x_w = Q*x_c;
% quick plot
figure,
subplot(1,2,1),hold on
plot(x(1,:),x(2,:),'o')
plot(x_c(1,:),x_c(2,:),'o')
grid on
title('Centering')
legend({'before','after'})
axis equal
subplot(1,2,2),hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(x_w(1,:),x_w(2,:),'o')
grid on
title('Prewhitening')
legend({'before','after'})
axis equal
% check that mean = 0
mean(x_c,2)
%%
G = @(x)(x.^2);
plot_cf(x_w,G)
%% random projection
figure
% 10 random projections
% (press any key to see the next projection)
for i = 1:10
w = randn(d,1); w = w/norm(w)*5;
subplot(1,2,1)
plot(x_w(1,:),x_w(2,:),'o'); hold on
axis equal
grid on
quiver(0,0,w(1),w(2),'linewidth',2,'color','r'); hold off
subplot(1,2,2)
histogram(w'*x_w)
pause
end
%% ???
t = linspace(0,10*pi,400);
s1 = sin(t) + .1*randn(size(t));
s2 = (mod(4*t,pi)) + .1*randn(size(t));
A = [1 .1;-0.3 1];
X = A*[s1;s2];
[X_w,~] = whiten(X);
W = plot_cost(X_w);
quiver(0,0,3*W(1,1),3*W(1,2),'linewidth',3,'color','k')
quiver(0,0,3*W(2,1),3*W(2,2),'linewidth',3,'color','k')
figure,
subplot(1,2,1)
plot(X_w')
subplot(1,2,2)
plot(((W*X_w))')
%% quick PCA
[V,~] = eig(cov(x'));
figure
% plot data
plot(x(1,:),x(2,:),'o'); hold on
% plot 1st PC direction (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,2),V(2,2),'linewidth',3,'color','r')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,2),-V(2,2),'linewidth',3,'color','r')
% plot 2nd PC direction (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,1),V(2,1),'linewidth',3,'color','g')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,1),-V(2,1),'linewidth',3,'color','g')
axis equal
grid on
%% ICA
% contrast functions
% these are functions which, when applied to the data, 'highlight' the
% non-Gaussianities
% they also are supposed to approximate neg-entropy
G0 = @(x)(x.^2); % quadratic (do not use!)
G1 = @(x)(1-exp(-x.^2)); % Gaussian --> integral(x*exp(-x^2/2)
G2 = @(x)(log(cosh(x))); % --> integral(tanh)
G3 = @(x)(x.^3); % kurtosis
G4 = @(x)(x.^4); % tailedness
G5 = @(x)(sqrt(G1(x)));
% put your own non-linearity in here!
G = G3;
% Let's plot these functions
Gall = {G0, G1, G2, G3, G4};
Gnames = {'x^2','Gaussian','tanh','x^3','x^4'};
xx = linspace(-15,15,1000);
figure
for i = 1:5
subplot(2,3,i)
g = Gall{i};
plot(xx, g(xx), 'linewidth',2)
legend(Gnames{i})
end
%% Calculate non-Gaussianity along
% different directions and plot it
% alongside the variance
angles = linspace(0,2*pi,1000); % angles to calculate cost function along
I = zeros(size(angles)); % Interestingness
E = zeros(size(angles)); % Extent (variance)
G = G4; % select contrast function
for i = 1:length(angles)
% get vector w along angle
[w_x, w_y] = pol2cart(angles(i),1);
w = [w_x;w_y];
% calculate non-Gaussianity along w
I(i) = mean(G(w'*x_w)).^2;
% calculate variance along w
E(i) = var(w'*x_w);
end
% quick plot of the cost functions
figure
plot(x_w(1,:),x_w(2,:),'o'); hold on
[xx,yy] = pol2cart(angles,I/norm(I)*40);
plot(xx,yy,'linewidth',2,'color','r')
[xx,yy] = pol2cart(angles,E/norm(E)*40);
plot(xx,yy,'linewidth',2,'color','g','linestyle','--')
axis equal
grid on
legend({'data','non-gaussianity','variance'})
%% Plot in de-whitened space
[xx,yy] = pol2cart(angles,I/norm(I)*40);
ww=Q\[xx;yy];
figure,hold on
plot(x_c(1,:),x_c(2,:),'o')
plot(ww(1,:),ww(2,:),'linewidth',3,'color','r')
quiver(0,0,V(1,2),V(2,2),'linewidth',3,'color','k')
quiver(0,0,-V(1,2),-V(2,2),'linewidth',3,'color','k')
axis equal
%% Compare to FastICA toolbox :)
addpath ~/matlab/FastICA_25/
[W,~] = fastica(x,'approach', 'symm','g','skew');
W = W';
% plot the results
figure
plot(x_c(1,:),x_c(2,:),'o'); hold on
% plot(w(1,:),w(2,:),'linewidth',3,'color','r')
quiver(0,0,W(1,1),W(1,2),'linewidth',3,'color','k')
quiver(0,0,-W(1,1),-W(1,2),'linewidth',3,'color','k')
quiver(0,0,W(2,1),W(2,2),'linewidth',3,'color','c')
quiver(0,0,-W(2,1),-W(2,2),'linewidth',3,'color','c')
axis equal
%% Quick implementation of the FastICA algorithm
d = size(x,1);
W = orth(randn(d,d)) % Random initial weights
delta = inf; % initialize delta (w_{i+1} - w_{i})
k = 0; % counting iteration
% maxiter = 1e4; % stop iteration if k = maxiter
tolerance = 1e-7; % stop iteration if delta < tolerance
while delta > tolerance
% while delta > tolerance && k < maxiter
k = k + 1; % counting iteration
% Update weights
Wlast = W; % Save last weights
Sk = W * x_w;
% skew
G = 2 * Sk.^3;
Gp = 6 * Sk.^2; % derivative of G
W = (G * x_w') / n - bsxfun(@times,mean(Gp,2),W);
W = normRows(W);
% Decorrelate weights
[U, S, ~] = svd(W,'econ');
W = U * diag(1 ./ diag(S)) * U' * W;
% Update convergence criteria
delta = max(1 - abs(dot(W, Wlast, 2)));
end
% plot the results
figure
subplot(1,2,1)
W1 = W';
plot(x_w(1,:),x_w(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c')
axis equal
title('Whitened space')
subplot(1,2,2)
W1 = W'*pinv(Q);
plot(x_c(1,:),x_c(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c'); hold off
axis equal
title('De-whitened space')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment