Commit efa90b68 authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

Updates to the prac

parent e3e53e07
%% SHORT INTRO TO ICA %% SHORT INTRO TO ICA
% ICA as a 'projection-pursuit' % ICA as a 'projection-pursuit'
% %
% Understanding data often means finding 'interesting' ways to % Understanding data often means finding 'interesting' ways to
% look at the data % look at the data
% %
% This means projecting the data into 'interesting' directions % This means projecting the data into 'interesting' directions
% %
% If X is the data matrix (d dimension x n samples) % If X is the data matrix (d dimension x n samples)
% %
% then w'X is the projection along dimension w % then w'X is the projection along dimension w
% %
% PCA finds the direction where var(w'X) is maximal % PCA finds the direction where var(w'X) is maximal
% ICA finds the direction where w'X is maximally non-Gaussian % ICA finds the direction where w'X is maximally non-Gaussian
% this makes sense because being Gaussian is boring (i.e. it is % this makes sense because being Gaussian is boring (i.e. it is
% what happens by default in random stuff % what happens by default in random stuff
% so deviations from Gaussianity are interesting % so deviations from Gaussianity are interesting
% quantifying non-Gaussianity in a way that is differentiable (so % quantifying non-Gaussianity in a way that is differentiable (so
% we can optimise it) is what FastICA does % we can optimise it) is what FastICA does
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 1. Generate a points cloud in 2d %% 1. Generate a points cloud in 2d
rng('default') % for replication rng('default') % for replication
n = 400; % number of samples n = 400; % number of samples
d = 2; % dimensionality d = 2; % dimensionality
w1 = randn(d,1); w1 = randn(d,1);
w1 = w1/norm(w1); % direction 1 is random w1 = w1/norm(w1); % direction 1 is random
ang = 30; % rotation angle ang = 30; % rotation angle
rot = [cosd(ang) sind(ang); %rotation matrix rot = [cosd(ang) sind(ang); %rotation matrix
-sind(ang) cosd(ang)]; -sind(ang) cosd(ang)];
w2 = rot*w1; % direction 2 is rotated re direction 1 w2 = rot*w1; % direction 2 is rotated re direction 1
% Generate data along the two directions % Generate data along the two directions
% (multivariate Gaussians spread a little along w1 and w2 % (multivariate Gaussians spread a little along w1 and w2
x1 = mvnrnd([4;1],w1*w1',n/2) + .1*randn(n/2,2); x1 = mvnrnd([4;1],w1*w1',n/2) + .1*randn(n/2,2);
x2 = mvnrnd([4;1],w2*w2',n/2) + .1*randn(n/2,2); x2 = mvnrnd([4;1],w2*w2',n/2) + .1*randn(n/2,2);
% Exercise: try this again % Exercise: try this again
% but with two point clouds aligned % but with two point clouds aligned
% with each other but with different means % with each other but with different means
% x1 = mvnrnd(w2,w1*w1',n/2) + .1*randn(n/2,2); % x1 = mvnrnd(w2,w1*w1',n/2) + .1*randn(n/2,2);
% x2 = mvnrnd(-w2,w1*w1',n/2) + .1*randn(n/2,2); % x2 = mvnrnd(-w2,w1*w1',n/2) + .1*randn(n/2,2);
% Concatenate the two clouds of data % Concatenate the two clouds of data
x = [x1;x2]'; x = [x1;x2]';
% optionally add an outlier % optionally add an outlier
% x(:,1) = randn(d,1)*5; % x(:,1) = randn(d,1)*5;
% do a quick plot to visualise the data % do a quick plot to visualise the data
% which directions do you find most interesting? % which directions do you find most interesting?
figure figure
plot(x(1,:),x(2,:),'o') plot(x(1,:),x(2,:),'o')
grid on grid on
axis equal axis equal
%% 2. Pre-processing %% 2. Pre-processing
% Centering (mean(data)=0 along each dimension) % Centering (mean(data)=0 along each dimension)
C = eye(n) - ones(n,1)*ones(1,n)/n; C = eye(n) - ones(n,1)*ones(1,n)/n;
x_c = x*C; x_c = x*C;
% % A more efficient alternative is to simply call 'mean' % % A more efficient alternative is to simply call 'mean'
% x_c = x - mean(x,2); % x_c = x - mean(x,2);
% Pre-whitening (such that var(data)=1 along each dimension) % Pre-whitening (such that var(data)=1 along each dimension)
% This means that PCA becomes irrelevant: all directions have the same % This means that PCA becomes irrelevant: all directions have the same
% variance % variance
[V,D] = eig(cov(x')); [V,D] = eig(cov(x'));
Q = V*diag(1./sqrt(diag(D)))*V'; Q = V*diag(1./sqrt(diag(D)))*V';
x_w = Q*x_c; x_w = Q*x_c;
% quick plot % quick plot
figure, figure,
subplot(1,2,1),hold on subplot(1,2,1),hold on
plot(x(1,:),x(2,:),'o') plot(x(1,:),x(2,:),'o')
plot(x_c(1,:),x_c(2,:),'o') plot(x_c(1,:),x_c(2,:),'o')
grid on grid on
title('Centering') title('Centering')
legend({'before','after'}) legend({'before','after'})
axis equal axis equal
subplot(1,2,2),hold on subplot(1,2,2),hold on
plot(x_c(1,:),x_c(2,:),'o') plot(x_c(1,:),x_c(2,:),'o')
plot(x_w(1,:),x_w(2,:),'o') plot(x_w(1,:),x_w(2,:),'o')
grid on grid on
title('Prewhitening') title('Prewhitening')
legend({'before','after'}) legend({'before','after'})
axis equal axis equal
% check that mean = 0 % check that mean = 0
mean(x_c,2) mean(x_c,2)
%% quick check that the variance is 1 along all directions %% quick check that the variance is 1 along all directions
addpath ./utils addpath ./utils
G = @(x)(x.^2); G = @(x)(x.^2);
figure figure
subplot(1,2,1), plot_cf(x_c,G) subplot(1,2,1), plot_cf(x_c,G)
legend({'data','variance'}), title('Before whitening'); legend({'data','variance'}), title('Before whitening');
subplot(1,2,2), plot_cf(x_w,G) subplot(1,2,2), plot_cf(x_w,G)
legend({'data','variance'}), title('After whitening'); legend({'data','variance'}), title('After whitening');
%% random projections %% random projections
figure figure
% 10 random projections % 10 random projections
% (press any key to see the next projection) % (press any key to see the next projection)
for i = 1:10 for i = 1:10
w = randn(d,1); w = w/norm(w)*5; w = randn(d,1); w = w/norm(w)*5;
subplot(1,2,1) subplot(1,2,1)
plot(x_w(1,:),x_w(2,:),'o'); hold on plot(x_w(1,:),x_w(2,:),'o'); hold on
axis equal axis equal; grid on
grid on quiver(0,0,w(1),w(2),'linewidth',2,'color','r'); hold off
quiver(0,0,w(1),w(2),'linewidth',2,'color','r'); hold off title(sprintf('%d/10 random projection',i))
subplot(1,2,2) subplot(1,2,2)
histogram(w'*x_w) histogram(w'*x_w)
pause pause
end end
%% quick PCA %% quick PCA
% You should be able to see that PCA picks up directions of high variance % You should be able to see that PCA picks up directions of high variance
% which do not in this case coincide with the interesting directions % which do not in this case coincide with the interesting directions
% in this data % in this data
[V,~] = eig(cov(x')); [V,~] = eig(cov(x'));
figure figure
% plot data % plot data
plot(x(1,:),x(2,:),'o'); hold on plot(x(1,:),x(2,:),'o'); hold on
% plot 1st PC direction in RED (pos and neg) % plot 1st PC direction in RED (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,2),V(2,2),'linewidth',3,'color','r') quiver(mean(x(1,:)),mean(x(2,:)),V(1,2),V(2,2),'linewidth',3,'color','r')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,2),-V(2,2),'linewidth',3,'color','r') quiver(mean(x(1,:)),mean(x(2,:)),-V(1,2),-V(2,2),'linewidth',3,'color','r')
% plot 2nd PC direction in GREEN (pos and neg) % plot 2nd PC direction in GREEN (pos and neg)
quiver(mean(x(1,:)),mean(x(2,:)),V(1,1),V(2,1),'linewidth',3,'color','g') quiver(mean(x(1,:)),mean(x(2,:)),V(1,1),V(2,1),'linewidth',3,'color','g')
quiver(mean(x(1,:)),mean(x(2,:)),-V(1,1),-V(2,1),'linewidth',3,'color','g') quiver(mean(x(1,:)),mean(x(2,:)),-V(1,1),-V(2,1),'linewidth',3,'color','g')
axis equal axis equal
grid on grid on
%% ICA %% ICA
% contrast functions % contrast functions
% these are functions which, when applied to the data, 'highlight' the % these are functions which, when applied to the data, 'highlight' the
% non-Gaussianities % non-Gaussianities
% they also are supposed to approximate neg-entropy % they also are supposed to approximate neg-entropy
G0 = @(x)(x.^2); % quadratic (do not use this!) G0 = @(x)(x.^2); % quadratic (do not use this!)
G1 = @(x)(1-exp(-x.^2)); % Gaussian --> integral(x*exp(-x^2/2) G1 = @(x)(1-exp(-x.^2)); % Gaussian --> integral(x*exp(-x^2/2)
G2 = @(x)(log(cosh(x))); % --> integral(tanh) G2 = @(x)(log(cosh(x))); % --> integral(tanh)
G3 = @(x)(x.^3); % kurtosis G3 = @(x)(x.^3); % skewness
G4 = @(x)(x.^4); % tailedness G4 = @(x)(x.^4); % tailedness
G5 = @(x)(sqrt(G1(x))); G5 = @(x)(sqrt(G1(x)));
% put your own non-linearity in here!
G = G3; % Let's plot these functions
Gall = {G0, G1, G2, G3, G4};
% Let's plot these functions Gnames = {'x^2','Gaussian','tanh','x^3','x^4'};
Gall = {G0, G1, G2, G3, G4}; xx = linspace(-15,15,1000);
Gnames = {'x^2','Gaussian','tanh','x^3','x^4'};
xx = linspace(-15,15,1000); figure
for i = 1:5
figure subplot(2,3,i)
for i = 1:5 g = Gall{i};
subplot(2,3,i) plot(xx, g(xx), 'linewidth',2)
g = Gall{i}; legend(Gnames{i})
plot(xx, g(xx), 'linewidth',2) end
legend(Gnames{i})
end
%% Calculate non-Gaussianity along
% different directions and plot it
%% Calculate non-Gaussianity along % alongside the variance
% different directions and plot it
% alongside the variance angles = linspace(0,2*pi,1000); % angles to calculate cost function along
I = zeros(size(angles)); % Interestingness
angles = linspace(0,2*pi,1000); % angles to calculate cost function along E = zeros(size(angles)); % Extent (variance)
I = zeros(size(angles)); % Interestingness
E = zeros(size(angles)); % Extent (variance) % put your own non-linearity in here!
G = G4; % select contrast function G = G4;
for i = 1:length(angles)
% get vector w along angle for i = 1:length(angles)
[w_x, w_y] = pol2cart(angles(i),1); % get vector w along angle
w = [w_x;w_y]; [w_x, w_y] = pol2cart(angles(i),1);
w = [w_x;w_y];
% calculate non-Gaussianity along w
I(i) = mean(G(w'*x_w)).^2; % calculate non-Gaussianity along w
I(i) = mean(G(w'*x_w)).^2;
% calculate variance along w
E(i) = var(w'*x_w); % calculate variance along w
end E(i) = var(w'*x_w);
end
% quick plot of the cost functions
figure % quick plot of the cost functions
plot(x_w(1,:),x_w(2,:),'o'); hold on figure
plot(x_w(1,:),x_w(2,:),'o'); hold on
[xx,yy] = pol2cart(angles,I/norm(I)*40);
plot(xx,yy,'linewidth',2,'color','r') [xx,yy] = pol2cart(angles,I/norm(I)*40);
plot(xx,yy,'linewidth',2,'color','r')
[xx,yy] = pol2cart(angles,E/norm(E)*40);
plot(xx,yy,'linewidth',2,'color','g','linestyle','--') [xx,yy] = pol2cart(angles,E/norm(E)*40);
plot(xx,yy,'linewidth',2,'color','g','linestyle','--')
axis equal
grid on axis equal
grid on
legend({'data','non-gaussianity','variance'})
legend({'data','non-gaussianity','variance'})
%% Plot in de-whitened space
%% Plot in de-whitened space
% generate bunch of points
[xx,yy] = pol2cart(angles,I/norm(I)*40); % generate bunch of points
% un-whiten with the whitening matrix Q [xx,yy] = pol2cart(angles,I/norm(I)*40);
ww = Q\[xx;yy]; % un-whiten with the whitening matrix Q
ww = Q\[xx;yy];
figure,hold on
plot(x_c(1,:),x_c(2,:),'o') figure,hold on
plot(ww(1,:),ww(2,:),'linewidth',3,'color','r') plot(x_c(1,:),x_c(2,:),'o')
axis equal plot(ww(1,:),ww(2,:),'linewidth',3,'color','r')
grid on axis equal
grid on
%% Compare with the FastICA toolbox :)
%% Compare with the FastICA toolbox :)
% Can be downloaded from https://research.ics.aalto.fi/ica/fastica/
addpath ./utils/FastICA_25 % Can be downloaded from https://research.ics.aalto.fi/ica/fastica/
addpath ./utils/FastICA_25
% check the help to see how to change things like the contrast function g
% note that we are using x (fastica does pre-whitening internally) % check the help to see how to change things like the contrast function g
[S, A, W] = fastica(x,'approach', 'symm','g','skew'); % note that we are using x (fastica does pre-whitening internally)
% S = source signals, A = mixing matrix, W = unmixing matrix
% I am not sure why I need to do the below [S, A, W] = fastica(x_c,'approach', 'symm','g','skew');
W = A';
mixedS = A*S; % mixture of source signals
% plot the results
figure % plot the results
plot(x_c(1,:),x_c(2,:),'o'); hold on figure
quiver(0,0,W(1,1),W(1,2),'linewidth',3,'color','k') subplot(1,3,1)
quiver(0,0,-W(1,1),-W(1,2),'linewidth',3,'color','k') plot(S(1,:),S(2,:),'o'); hold on
quiver(0,0,W(2,1),W(2,2),'linewidth',3,'color','c') quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
quiver(0,0,-W(2,1),-W(2,2),'linewidth',3,'color','c') quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
axis equal quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
xlim([-5,5]); ylim([-6,6])
%% Quick implementation of the FastICA algorithm title('Estimated Source')
d = size(x,1);
W = orth(randn(d,d)); % Random initial weights
delta = inf; % initialize delta (w_{i+1} - w_{i}) subplot(1,3,2)
k = 0; % counting iteration plot(mixedS(1,:),mixedS(2,:),'o'); hold on
% maxiter = 1e4; % stop iteration if k = maxiter quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
tolerance = 1e-7; % stop iteration if delta < tolerance quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
while delta > tolerance quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
% while delta > tolerance && k < maxiter xlim([-4,4]); ylim([-5,5])
k = k + 1; % counting iteration title('Mixed Source')
% Update weights subplot(1,3,3)
Wlast = W; % Save last weights plot(x_c(1,:),x_c(2,:),'o'); hold on
Sk = W * x_w; quiver(0,0,A(1,1),A(2,1),'linewidth',3,'color','k')
quiver(0,0,-A(1,1),-A(2,1),'linewidth',3,'color','k')
% skew quiver(0,0,A(1,2),A(2,2),'linewidth',3,'color','c')
G = 2 * Sk.^3; quiver(0,0,-A(1,2),-A(2,2),'linewidth',3,'color','c')
Gp = 6 * Sk.^2; % derivative of G xlim([-4,4]); ylim([-5,5])
title('Original centered data')
W = (G * x_w') / n - bsxfun(@times,mean(Gp,2),W);
W = W ./ sqrt(sum(W.^2,2));
%% Quick implementation of the FastICA algorithm
% Decorrelate W's d = size(x,1);
[U, S, ~] = svd(W,'econ'); W = orth(randn(d,d)); % Random initial weights
W = U * diag(1 ./ diag(S)) * U' * W; delta = inf; % initialize delta (w_{i+1} - w_{i})
k = 0; % counting iteration
% Update convergence criteria % maxiter = 1e4; % stop iteration if k = maxiter
delta = max(1 - abs(dot(W, Wlast, 2))); tolerance = 1e-7; % stop iteration if delta < tolerance
end
while delta > tolerance
% plot the results % while delta > tolerance && k < maxiter
figure k = k + 1; % counting iteration
subplot(1,2,1)
W1 = W'; % Update weights
plot(x_w(1,:),x_w(2,:),'o'); hold on Wlast = W; % Save last weights
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k') Sk = W * x_w;
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c') % skew
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c') G = 2 * Sk.^3;
axis equal Gp = 6 * Sk.^2; % derivative of G
title('Whitened space')
W = (G * x_w') / n - bsxfun(@times,mean(Gp,2),W);
subplot(1,2,2) W = W ./ sqrt(sum(W.^2,2));
W1 = W'*pinv(Q);
plot(x_c(1,:),x_c(2,:),'o'); hold on % Decorrelate W's
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k') [U, S, ~] = svd(W,'econ');
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k') W = U * diag(1 ./ diag(S)) * U' * W;
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c'); hold off % Update convergence criteria
axis equal delta = max(1 - abs(dot(W, Wlast, 2)));
title('De-whitened space') end
% plot the results
figure
subplot(1,2,1)
W1 = W';
plot(x_w(1,:),x_w(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c')
axis equal
title('Whitened space')
subplot(1,2,2)
W1 = W'*pinv(Q);
plot(x_c(1,:),x_c(2,:),'o'); hold on
quiver(0,0,W1(1,1),W1(1,2),'linewidth',3,'color','k')
quiver(0,0,-W1(1,1),-W1(1,2),'linewidth',3,'color','k')
quiver(0,0,W1(2,1),W1(2,2),'linewidth',3,'color','c')
quiver(0,0,-W1(2,1),-W1(2,2),'linewidth',3,'color','c'); hold off
axis equal
title('De-whitened space')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment