Commit 6364a69b authored by Fidel Alfaro-Almagro WIN UK Biobank user's avatar Fidel Alfaro-Almagro WIN UK Biobank user
Browse files

Adding QSM sub-pipeline

parent 39458b34
#!/bin/env python
#Add the toolbox to path
import numpy as np
import nibabel as nib
import os
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-id', dest="ID", type=str, nargs=1)
parser.add_argument('-codedir', dest="path", type=str, nargs=1)
argsa = parser.parse_args()
subID = ' '.join(map(str, argsa.ID))
codepath = ' '.join(map(str, argsa.path))
toolbox_path = (codepath +'/One_Dim_Mix_Mod')
sys.path.append(os.path.join(os.path.abspath(toolbox_path)))
#load ROI data
QSM = nib.load(subID + '/ROI_QSM_CSF.nii.gz')
CSF = np.array(QSM.dataobj)
CSF = CSF.ravel()
data = CSF [CSF != 0]
data_vector=(data - np.mean(data))/np.std(data)
#Define options for the mixture model fit
Inference ='Method of moments'#'Variational Bayes'#'Method of moments'#'Variational Bayes' #'Method of moments' OR 'Maximum Likelihood' OR 'Variational Bayes' ML NOT INCLUDED YET
Number_of_Components=3
Components_Model=['Gauss','InvGamma','-InvGamma'] #Each component can be Gauss, Gamma, InvGamma, -Gamma, -InvGamma
init_params=[0,1,5,2,-5,2]
init_pi=np.ones(3);
init_pi=np.divide(init_pi,3)
#init_pi[0]=0.9;init_pi[1]=0.05;init_pi[2]=0.05
maxits=300
tol=0.00000001
opts={'Inference':Inference,'Number_of_Components':Number_of_Components,'Components_Model':Components_Model,
'init_params':init_params,'maxits':maxits,'tol':tol,'init_pi':init_pi}
#Define options for the mixture model fit
# CALL TO FIT MIXTURE MODEL
from Mixture_Model_1Dim import Mixture_Model_1Dim
Model = Mixture_Model_1Dim(data_vector, opts)
Means=Model['mu1'][0]* np.std(data)+np.mean(data)
f = open(subID + "/QSM_CSF.txt", "w")
f.write(str(Means))
f.close()
import numpy as np
import sys
import os
import scipy
# Plot the resulting fit on a histogram of the data
from alb_MM_functions import invgam
from alb_MM_functions import gam
from scipy.stats import norm
def GaussGammas_Connectome_thresholding_pFDR(input_file,toolbox_path):
#Add the toolbox to path
sys.path.append(os.path.join(os.path.abspath(toolbox_path)))
from Mixture_Model_1Dim import Mixture_Model_1Dim
#load input conenctivity matrix
#connectivity_matrix = np.loadtxt(input_file, delimiter=',')#, skiprows=1,skipcolumns=1)
connectivity_matrix =np.genfromtxt(input_file, delimiter=',')
#get updiagonal terms
updiag_idx=np.triu_indices_from(connectivity_matrix,k=1)
orig_data_vector=connectivity_matrix[updiag_idx]
orig_data_vector=orig_data_vector[~np.isnan(orig_data_vector)] #data_vector=orig_data_vector[orig_data_vector>0.05]
#demean and divide for std to allow easy initialization
mean_factor=np.mean(orig_data_vector)
scaling_factor=1.#np.std(orig_data_vector)
data_vector=np.divide(orig_data_vector - mean_factor,scaling_factor)
#Define options for the mixture model fit
Inference ='Variational Bayes'#'Method of moments'#'Variational Bayes' #'Variational Bayes' #'Method of moments' OR 'Maximum Likelihood' OR 'Variational Bayes' ML NOT INCLUDED YET
Number_of_Components=3
Components_Model=['Gauss','InvGamma','-InvGamma']#,'-Gamma'] #Each component can be Gauss, Gamma, InvGamma, -Gamma, -InvGamma
maxits=500
tol=0.00001
init_params=[0,1,6,2,-6,2]
init_params=[0,1,np.percentile(data_vector,99),2,np.percentile(data_vector,1),2]
opts={'Inference':Inference,'Number_of_Components':Number_of_Components,'Components_Model':Components_Model,
'init_params':init_params,'maxits':maxits,'tol':tol}
# CALL TO FIT MIXTURE MODEL
Model = Mixture_Model_1Dim(data_vector, opts)
#if Model['Mixing Prop.'][0]<.95:
#good_model=1
# Visualizar fit
visualize_model_fit=1
if visualize_model_fit==1:
my_range=np.linspace(-10,10,10000)
plt0=np.multiply( Model['Mixing Prop.'][0],norm.pdf(my_range,Model['mu1'][0],np.sqrt(np.divide(1,Model['taus1'][0])) ) )
#plt0=np.multiply( Model['Mixing Prop.'][0],norm.pdf(my_range,Model['mu1'][0],np.sqrt(Model['taus1'][0]) ) )
#plt0=np.multiply( Model['Mixing Prop.'][0],norm.pdf(my_range,Model['mu1'][0],Model['taus1'][0]) )
if Components_Model[1]=='InvGamma':
plt1=np.multiply( Model['Mixing Prop.'][1],invgam(my_range,Model['shapes'][1],Model['scales'][1]))
elif Components_Model[1]=='Gamma':
plt1=np.multiply( Model['Mixing Prop.'][1],gam(my_range,Model['shapes'][1],np.divide(1,Model['rates'][1])))
plt1[my_range<0]=0
if Components_Model[2]=='-InvGamma':
plt2=np.multiply( Model['Mixing Prop.'][2],invgam(-my_range,Model['shapes'][2],Model['scales'][2]))
elif Components_Model[2]=='-Gamma':
plt2=np.multiply( Model['Mixing Prop.'][2],gam(-my_range,Model['shapes'][2],np.divide(1,Model['rates'][2])))
plt2[my_range>0]=0
import matplotlib.pyplot as plt
fig = plt.figure()
#plt.plot(range(10))
plt.hist(data_vector,bins=50,density=True,alpha=1, color='g')
plt.plot(my_range,plt0, 'k', linewidth=2)
plt.plot(my_range,plt1, 'k', linewidth=2)
plt.plot(my_range,plt2, 'k', linewidth=2)
plt.plot(my_range,plt0+plt1+plt2, 'r', linewidth=2)
fig.savefig(os.path.expanduser('~/Desktop/temp.png'), dpi=fig.dpi)
#plt.show()
# Plot the resulting fit on a histogram of the data
#Compute local FDR at positive and negative tail
#f0(x)=gam(x,Model['shapes'][0],np.divide(1,Model['rates'][0])))
p0=Model['Mixing Prop.'][0]
rho=data_vector.shape[0]
#FDR at positive side
sorted_data_vector=-np.sort(-data_vector)
all_localFDR=np.ones(rho)
flag=0
k=-1
while flag==0:
k=k+1
point=sorted_data_vector[k]
cdf=norm.cdf(point,Model['mu1'][0],np.sqrt(np.divide(1,Model['taus1'][0])))
numerator=np.multiply(float(p0),1-cdf)
denominator=np.divide(float(k+1),float(rho))
all_localFDR[k]=np.divide(numerator,denominator)
pFDR=all_localFDR[k]
if pFDR>0.001:
if k==0:
threshold1=sorted_data_vector[k]
else:
threshold1=sorted_data_vector[k-1]; # np.multiply(sorted_data_vector[k-1],scaling_factor)
flag=1
#print threshold1
#FDR at negative side
sorted_data_vector=-np.sort(data_vector)
all_localFDR=np.ones(rho)
flag=0
k=-1
while flag==0:
k=k+1
point=sorted_data_vector[k]
cdf=norm.cdf(-point,Model['mu1'][0],np.sqrt(np.divide(1,Model['taus1'][0])))
numerator=np.multiply(float(p0),1-cdf)
denominator=np.divide(float(k+1),float(rho))
all_localFDR[k]=np.divide(numerator,denominator)
pFDR=all_localFDR[k]
if pFDR>0.001:
if k==0:
threshold2=-sorted_data_vector[k]
else:
threshold2=-sorted_data_vector[k-1]; # np.multiply(sorted_data_vector[k-1],scaling_factor)
flag=1
#Rescale the thresholds using the data mean and std
threshold1= np.multiply(threshold1,scaling_factor) + mean_factor
threshold2= np.multiply(threshold2,scaling_factor) + mean_factor
print(threshold1)
print(threshold2)
return threshold1, threshold2, Model
def GammaGamma_Connectome_thresholding_pFDR(input_file,toolbox_path):
#Add the toolbox to path
#toolbox_path = "/Users/alblle/allera_version_controlled_code/One_Dim_Mixture_Models/python/code"
sys.path.append(os.path.join(os.path.abspath(toolbox_path)))
from Mixture_Model_1Dim import Mixture_Model_1Dim
#load input conenctivity matrix
#input_file="/Users/alblle/Dropbox/POSTDOC/Demetrius/dmn_non_normalized.csv"
#connectivity_matrix = np.loadtxt(input_file, delimiter=',')#, skiprows=1,skipcolumns=1)
connectivity_matrix =np.genfromtxt(input_file, delimiter=',')
#get updiagonal terms
updiag_idx=np.triu_indices_from(connectivity_matrix,k=1)
orig_data_vector=connectivity_matrix[updiag_idx]
orig_data_vector=orig_data_vector[~np.isnan(orig_data_vector)]
data_vector=orig_data_vector[orig_data_vector>0.05]
scaling_factor=np.mean(data_vector)
data_vector=np.divide(data_vector,scaling_factor)
#Define options for the mixture model fit
Inference ='Variational Bayes' #'Method of moments' OR 'Maximum Likelihood' OR 'Variational Bayes' ML NOT INCLUDED YET
Number_of_Components=2
Components_Model=['Gamma','InvGamma']#,'-Gamma'] #Each component can be Gauss, Gamma, InvGamma, -Gamma, -InvGamma
maxits=500
tol=0.00001
good_model=0
percentiles=np.array([99,98.5,98,97.5,97,96.5,96,95.5,95])
percentile_idx=-1
while good_model==0:
percentile_idx=percentile_idx+1
tail=np.percentile(data_vector,percentiles[percentile_idx])
init_params=[1,2,tail,2]#,-5,2]
opts={'Inference':Inference,'Number_of_Components':Number_of_Components,'Components_Model':Components_Model,
'init_params':init_params,'maxits':maxits,'tol':tol}
#Define options for the mixture model fit
# CALL TO FIT MIXTURE MODEL
Model = Mixture_Model_1Dim(data_vector, opts)
#if Model['Mixing Prop.'][0]<.95:
good_model=1
# CALL TO FIT MIXTURE MODEL
if 1:
# Plot the resulting fit on a histogram of the data
from alb_MM_functions import gam
my_range=np.linspace(0.01,np.max(data_vector),10000)
plt1=np.multiply( Model['Mixing Prop.'][0],gam(my_range,Model['shapes'][0],np.divide(1,Model['rates'][0])))
plt2=np.multiply( Model['Mixing Prop.'][1],gam(my_range,Model['shapes'][1],np.divide(1,Model['rates'][1])))
import matplotlib.pyplot as plt
plt.hist(data_vector,bins=50,density=True,alpha=1, color='g')
plt.plot(my_range,plt1, 'k', linewidth=2)
plt.plot(my_range,plt2, 'k', linewidth=2)
plt.plot(my_range,plt1+plt2, 'r', linewidth=2)
plt.show()
# Plot the resulting fit on a histogram of the data
#Compute local FDR
p0=Model['Mixing Prop.'][0]
#f0(x)=gam(x,Model['shapes'][0],np.divide(1,Model['rates'][0])))
rho=data_vector.shape[0]
sorted_data_vector=-np.sort(-data_vector)
all_localFDR=np.ones(rho)
flag=0
k=-1
while flag==0:
k=k+1
point=sorted_data_vector[k]
cdf=scipy.stats.gamma.cdf(point,Model['shapes'][0],0,np.divide(1.,Model['rates'][0]))
numerator=np.multiply(float(p0),1-cdf)
denominator=np.divide(float(k+1),float(rho))
all_localFDR[k]=np.divide(numerator,denominator)
pFDR=all_localFDR[k]
if pFDR>0.05:
threshold=np.multiply(sorted_data_vector[k-1],scaling_factor)
flag=1
print(threshold)
return threshold, Model
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 7 12:30:57 2018
@author: alblle
"""
from alb_MM_functions import Mix_Mod_MethodOfMoments
from SIN_VB_MixMod import Mix_Mod_VB
import numpy as np
def Mixture_Model_1Dim(data_vector, opts={'Inference':'Method of moments', 'Number_of_Components':3,'Components_Model':['Gauss','Gamma','-Gamma'], 'init_params':[0,1,3,1,-3,1],'maxits':100,'tol':0.00001,'init_pi':np.true_divide(np.ones(3),3)}):
if opts['Inference'] == 'Method of moments':
Model = Mix_Mod_MethodOfMoments(data_vector, opts)
#Model is a dictionary {'means','variances','Mixing Prop.''Likelihood','its','Final responsibilities'}
elif opts['Inference'] == 'Maximum Likelihood':
print("not implemented yet, very easy... do? Method of moments is a good approximation")
Model = 0
elif opts['Inference'] == 'Variational Bayes':
Model = Mix_Mod_VB(data_vector,opts)
return Model
function [src]=SIN_VB_MixMod(x,mix1)
%Vatiational Bayes Mixture model: Gauss/Gamma/Inverse Gamma
%INPUT: x is a data vector;
% mix1 is output of [mix1]=SIN_init_VB_MM(data,opts)
%OUTPUT: src is a structure with the parameters of the fitted model
%---SET DEFAULT OPTIONS
if ~isfield(mix1.opts,'K'), K = 3;
else K = mix1.opts.K; end
if ~isfield(mix1.opts,'comp'), comp = [1 1 1]; %componets [0 + -] active or not active
else comp = mix1.opts.comp; end
if ~isfield(mix1.opts,'MM'), MM='GIM';
else MM = mix1.opts.MM; end
if ~isfield(mix1.opts,'tol1'), tol1=10^-5;% tol for conv.: relative change of NFE
else tol1 = mix1.opts.tol1; end
if ~isfield(mix1.opts,'tol2'), tol2=10^-5;% tol for conv.: relative change of NFE
else tol2 = mix1.opts.tol2; end
if ~isfield(mix1.opts,'MaxNumIt'), numit=200;% tol for conv.: relative change of NFE
else numit = mix1.opts.MaxNumIt; end
if ~isfield(mix1.opts,'MaxNumIt2'), numit2=200;% tol for conv. of subloop
else numit2 = mix1.opts.MaxNumIt2; end
%---SET DEFAULT OPTIONS
src=mix1;
%---SET PRIORS
% mixture prior
lambda_0=src.prior.lambda_0;
% component 1 precision priors
b0=src.prior.b0;%scale
c0=src.prior.c0;%shape
% component 1 mean priors
m_0=src.prior.m_0;
tau_0=src.prior.tau_0;
% component 2 and 3 rate priors
d_0=src.prior.d_0; %shape
e_0=src.prior.e_0; %rate
% component 2 and 3 shape priors
loga_0=src.prior.loga_0;
b_0=src.prior.b_0;
c_0=src.prior.c_0;
%---SET PRIORS
pos=find(x>0);neg=find(x<0);
xpos=x;xpos(neg)=0;
xneg=x;xneg(pos)=0;
xneg=-xneg;
xx=[xpos ;xneg]; %I made all positive!!
% --Initialise usefull vector values
ftot=0;
%Fgauss = 0;
FEs=zeros(1,numit);
FEs(1:2)=[-10^40 -10^39];
likelihood=zeros(1,numit);
kls=zeros(1,numit);
ents=zeros(1,numit);
flag=0;
it=0;
%--Initialise usefull vector values
% -------- ITERATE POSTERIORS
while flag==0
it=it+1;
%==================================E_Step================================
% compute ´responsibilitites´
[gammas] = my_gammas(src,x,MM,comp);
%==================================E_Step================================
%==================================M_Step================================
gamma_sum = sum(gammas');
[~, dum]=max(gammas);
src.q=dum;
%--------------------Update lambda etc.--------------------
lambda = lambda_0+gamma_sum;
% store for E-step
src.post.lambda=lambda;
% contribution to energy (copied from choud)
lambda_p=lambda_0*ones(1,3);
dir1 = sum(gammaln(lambda+eps) - gammaln(lambda_p+eps));
dir2 = gammaln(sum(lambda+eps)) - gammaln(sum(lambda_p+eps));
%Fdir = dir1-dir2;
ent_gam=-sum(sum(gammas.*log(gammas+eps)));
%alb-->
dir3=sum( (lambda-lambda_0) .* (psi(lambda)-psi(sum(lambda))));
KLPI=dir2-dir1+dir3;
%--------------------Update lambda etc.--------------------
%--------------------Update component 1 ---------------------
pdf_fact=1/2;
%--------------------Update precision tau1---------------------
mu1=src.mu1;
tau1=src.tau1;
tau = tau_0+(tau1.*gamma_sum(1));
mean_xsq = sum(gammas(1,:).*(x.^2));
mean_x = sum(gammas(1,:).*x);
%mu_sq = (gamma_sum(1).*(mu1.^2+(1./tau1)))';
mu_sq = (gamma_sum(1).*(mu1.^2+(1./tau)))';%!!!!!!!
data_bit = mean_xsq-2*mu1'.*mean_x+mu_sq;
b = 1./( (1/b0) + (pdf_fact*data_bit'));
c = c0+(pdf_fact*gamma_sum(1));
mean_tau1 =b*c;
% store for E-step
src.post.b0=b;
src.post.c0=c;
% contribution to energy using KL of Gamma, check b=scale and c=shape
bp=b0;cp=c0;
bq=b;cq=c;
KLTAU1= (cq*((bq/bp)-1)) + ((cq-cp)*(psi(cq) + log(bq))) - gammaln(cq) - ( cq*log( bq)) + gammaln(cp) + (cp*log( bp));
%--------------------Update precision---------------------
%-----------------------Update mean-----------------------
tau = tau_0+(mean_tau1.*gamma_sum(1));
%mm = 1./tau.*(m_0+mean_tau1.*mean_x'); typo in choud code??
mm = 1./tau.*( (tau_0*m_0) + (mean_tau1.*mean_x'));
mean_mu1=mm;
mean_mu12=mm.^2+(1/tau);
% store for E-step
src.post.m0=mm;
src.post.tau0=tau;
% contribution to energy using KL of Gauss
bp=tau_0;mp=m_0;
bq=mean_tau1;mq=mean_mu1;
KLMU1=1/2 *( ((bp/bq)-1)-log(bp/bq)+ (bp*((mq-mp)^2) ));
%-----------------------Update means-----------------------
%--------------------Update components 2 and 3 ---------------------
if it==1
if strcmp(MM,'GGM')
MAP_shape=src.shapes;
elseif strcmp(MM,'GIM')
MAP_shape=src.shapes;
end
end
subflag=0; %for convergence of Gamma/Inverse Gamma parameters iteration
its2=0;
while subflag==0 %for its2=1:subloop
its2=its2+1;
%--------------------Update rates if GGM or scales if GIM---------------------
if strcmp(MM,'GGM')
mean_x =sum(gammas(2:3,:).*xx,2);
e=e_0 + mean_x';
d=d_0+(MAP_shape.*gamma_sum(2:3) );
mean_rates=d./e;
mean_logrates=psi(d)-log(e);
elseif strcmp(MM,'GIM')%MM =='GIM'%conj prior on rate of inv gamma is gamma
tmp=gammas(2:3,:)./xx;
mean_x(1)=sum(tmp(1,pos));
mean_x(2)=sum(tmp(2,neg));
mean_x=mean_x';
e=e_0 + mean_x';
d=d_0+(MAP_shape.*gamma_sum(2:3) );
clear mean_x
mean_scales=d./e;
mean_logscales=psi(d)-log(e);
end
%--------------------Update rates if GGM or scales if GIM---------------------
%--------------------Update shapes---------------------
B= b_0+ gamma_sum(2:3);
C= c_0+ gamma_sum(2:3);
xresp=xx.^gammas(2:3,:);%.*xx;
idx= xresp==0;
xresp(idx)=1;
logA=loga_0 +sum(log(xresp'));
if strcmp(MM,'GGM')
MAP_shape=invpsi((logA+ (C .* mean_logrates)) ./ B);
elseif strcmp(MM,'GIM')
MAP_shape=invpsi((-logA+ (C .* mean_logscales)) ./ B) ;
end
%--------------------Update shapes---------------------
%-----check convergence of subloop
if its2>1
if strcmp(MM,'GGM')
new=[mean_rates MAP_shape];%dummm variable for testing convergence
elseif strcmp(MM,'GIM')
new=[mean_scales MAP_shape];%dummm variable for testing convergence
end
mean_rel_change=mean(abs(old-new)./old);
if (its2>numit2) || mean_rel_change<tol2
subflag=1;
end
end
if strcmp(MM,'GGM')
old=[mean_rates MAP_shape];%dummm variable for testing convergence
elseif strcmp(MM,'GIM')
old=[mean_scales MAP_shape];%dummm variable for testing convergence
end
%-----check convergence of subloop
clear tmp
end
% store for E-step
src.post.d_0=d;
src.post.e_0=e;
src.post.loga_0=logA;
src.post.b_0=B;
src.post.c_0=C;
%contribution to energy
f2=@(b,alpha) (b .* psi(1,alpha));% from Laplace approx
tmpidx=find(comp(2:3));
if strcmp(MM,'GGM')
%using KL of Gamma, check b=scale and c=shape?
bp=1./e_0;cp=d_0;
bq=1./e;cq=d;
KLrates= (cq.*((bq./bp)-1)) + ((cq-cp).*(psi(cq) + log(bq))) - gammaln(cq) - ( cq.*log( bq)) + gammaln(cp) + (cp.*log( bp));
KLrates=KLrates(tmpidx);
KLrate=sum(KLrates