From 27be8bbc59be7025f3438684e286932c79439773 Mon Sep 17 00:00:00 2001 From: Moises Fernandez <moisesf@fmrib.ox.ac.uk> Date: Thu, 11 Apr 2013 16:26:53 +0000 Subject: [PATCH] Changed to give the chance to indicate the number of jobs to submit --- CUDA/bedpostx_multigpu_LSF | 194 ++++++++++--------------------------- 1 file changed, 51 insertions(+), 143 deletions(-) diff --git a/CUDA/bedpostx_multigpu_LSF b/CUDA/bedpostx_multigpu_LSF index 5ec32c8..d8792dd 100755 --- a/CUDA/bedpostx_multigpu_LSF +++ b/CUDA/bedpostx_multigpu_LSF @@ -4,7 +4,8 @@ # # SHCOPYRIGHT -beforet=`date +%s` +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${FSLDIR}/src/fdt/CUDA + if [ "x$SGE_ROOT" = "x" ] ; then if [ -f /usr/local/share/sge/default/common/settings.sh ] ; then . /usr/local/share/sge/default/common/settings.sh @@ -28,6 +29,7 @@ Usage() { echo "-s (sample every, default 25)" echo "-model (1 for monoexponential, 2 for multiexponential, default 1)" echo "-g (consider gradient nonlinearities, default off)" + echo "-NGPU (number of jobs to queue, the data is divided in NGPU parts, default 40)" echo "" echo "" echo "ALTERNATIVELY: you can pass on xfibres options onto directly bedpostx" @@ -60,6 +62,7 @@ subjdir=`echo $subjdir | sed 's/\/$/$/g'` echo subjectdir is $subjdir #parse option arguments +njobs=40 nfibres=2 fudge=1 burnin=1000 @@ -67,32 +70,26 @@ njumps=1250 sampleevery=25 model=1 gflag=0 -f0=0 -rician=0 shift while [ ! -z "$1" ] do case "$1" in + -NGPU) njobs=$2;shift;; -n) nfibres=$2;shift;; - --nf=?) nfibres=`echo $1 | sed s/"--nf="//g`;; - --model=?) model=`echo $1 | sed s/"--model="//g`;; -w) fudge=$2;shift;; -b) burnin=$2;shift;; -j) njumps=$2;shift;; -s) sampleevery=$2;shift;; - --f0) f0=1 others="$others $1";; - --rician) rician=1 others="$others $1";; -model) model=$2;shift;; -g) gflag=1;; - *) others="$others $1";; + *) break;; esac shift done - opts="--nf=$nfibres --fudge=$fudge --bi=$burnin --nj=$njumps --se=$sampleevery --model=$model" defopts="--cnonlinear" -opts="$opts $defopts $others" +opts="$opts $defopts $*" #check that all required files exist @@ -131,7 +128,7 @@ fi echo Making bedpostx directory structure mkdir -p ${subjdir}.bedpostX/ -mkdir -p ${subjdir}.bedpostX/diff_slices +mkdir -p ${subjdir}.bedpostX/diff_parts mkdir -p ${subjdir}.bedpostX/logs mkdir -p ${subjdir}.bedpostX/logs/pid_${$} mkdir -p ${subjdir}.bedpostX/xfms @@ -142,162 +139,73 @@ else echo "bedpostx_multigpu_LSF "${subjdir} $opts >> ${subjdir}.bedpostX/commands.txt fi -echo "..............Compiling CUDA-gpu code .................." - -export LD_LIBRARY_PATH=${CUDA}/lib64:${subjdir}.bedpostX:${CUDA}/lib:$LD_LIBRARY_PATH -export DYLD_LIBRARY_PATH=${CUDA}/lib64:${subjdir}.bedpostX:${CUDA}/lib:$LD_LIBRARY_PATH -export PATH=${FSLDIR}/bin:$PATH - -volumes=`${FSLDIR}/bin/fslval ${subjdir}/data dim4` -echo "DIRECTIONS NUMBER: $volumes" - -filename="${subjdir}.bedpostX/options.h" - -if [ $volumes -le 64 ]; then - threadsBlock=$volumes -else - threadsBlock=64 -fi - -params=$((2+3*$nfibres)) - -if [ $f0 -eq 1 ]; then - params=$(($params+1)) -fi - -if [ $model -eq 2 ]; then - params=$(($params+1)) -fi - -if [ $rician -eq 1 ]; then - params=$(($params+1)) -fi +mailto=`whoami`@fmrib.ox.ac.uk -rem=$(($volumes%$threadsBlock)) -maxndirs=$(($volumes/$threadsBlock)) - -if [ $rem -ne 0 ]; then - maxndirs=$(($maxndirs+1)) +echo Copying files to bedpost directory +cp ${subjdir}/bvecs ${subjdir}/bvals ${subjdir}.bedpostX +${FSLDIR}/bin/imcp ${subjdir}/nodif_brain_mask ${subjdir}.bedpostX +if [ `${FSLDIR}/bin/imtest ${subjdir}/nodif` = 1 ] ; then + ${FSLDIR}/bin/fslmaths ${subjdir}/nodif -mas ${subjdir}/nodif_brain_mask ${subjdir}.bedpostX/nodif_brain fi -dstd=$(($model-1)); - -params_rep=$(($params-1)) -params_step0_m2_rep=$(($params_step0_m2-1)) - -echo "#define NDIRECTIONS $volumes -#define NFIBRES $nfibres -#define NPARAMS $params -#define MAXNDIRS_PER_THREAD $maxndirs -#define THREADS_BLOCK $threadsBlock">$filename - -before=`date +%s` - -${CUDA}/bin/nvcc --shared --compiler-options '-fPIC' -o ${subjdir}.bedpostX/libbedpostx_cuda.so ${FSLDIR}/src/fdt/CUDA/init_gpu.cu ${FSLDIR}/src/fdt/CUDA/samples.cu ${FSLDIR}/src/fdt/CUDA/diffmodels.cu ${FSLDIR}/src/fdt/CUDA/runmcmc.cu ${FSLDIR}/src/fdt/CUDA/xfibres_gpu.cu -O3 -arch sm_20 -lcudart -lcuda -lcurand -L${CUDA}/lib64 -L${CUDA}/lib -I${subjdir}.bedpostX -I${FSLDIR}/extras/include/newmat -I${FSLDIR}/extras/include/boost -I${FSLDIR}/include -I${CUDA}/include/thrust -I${FSLDIR}/src/fdt - -file=${subjdir}.bedpostX/libbedpostx_cuda.so - -if [ -f $file ]; -then -after=`date +%s` -diff=$(($after-$before)) -echo "Compilation Time: $diff seconds" >> ${subjdir}.bedpostX/times - -echo "CUDA CODE compiled successfully" - -before=`date +%s` -echo Preprocessing stages -${FSLDIR}/bin/bedpostx_preproc.sh ${subjdir} ${gflag} - -after=`date +%s` -diff=$(($after-$before)) -echo "Preprocessing Time: $diff seconds" >> ${subjdir}.bedpostX/times - -before=`date +%s` -nslices=`${FSLDIR}/bin/fslval ${subjdir}/data dim3` - -slice=0 - -post_string="bsub -o ${subjdir}'.bedpostX/logs/output.log -e ${subjdir}.bedpostX/logs/error.log -W 0:50" - +part=0 +post_string="bsub -o ${subjdir}.bedpostX/logs/log_post_proc.log -e ${subjdir}.bedpostX/logs/error_post_proc.log -W 0:50" post_string=$post_string" -w " -echo Submitting slices to GPUs +echo "Submitting parts (jobs) to GPUs" first_job=1 -while [ $slice -lt $nslices ];do - slicezp=`$FSLDIR/bin/zeropad $slice 4` - if [ `$FSLDIR/bin/imtest ${subjdir}.bedpostX/diff_slices/data_slice_$slicezp/dyads1` -eq 1 ];then - echo "Slice $slice has already been processed" +while [ $part -lt $njobs ];do + partzp=`$FSLDIR/bin/zeropad $part 4` + + if [ ${gflag} -eq 1 ]; then + gopts="$opts --gradnonlin=${subjdir}/grad_dev" + else + gopts=$opts + fi + + string=$(bsub -o ${subjdir}.bedpostX/logs/log$partzp -e ${subjdir}.bedpostX/logs/error$partzp -W 2:00 -n 1 ${FSLDIR}/bin/xfibres_gpu --data=${subjdir}/data --mask=$subjdir.bedpostX/nodif_brain_mask -b ${subjdir}.bedpostX/bvals -r ${subjdir}.bedpostX/bvecs --forcedir --logdir=$subjdir.bedpostX/diff_parts/data_part_$partzp $gopts $part $njobs) + array=(`echo $string | tr "<" ' '`) + aux=${array[1]} + array=(`echo $aux | tr ">" ' '`) + job=${array[0]} + #echo $job + + if [ $first_job -eq 1 ];then + post_string=$post_string"done("${job}")" + first_job=0 else - string=$(bsub -o ${subjdir}'.bedpostX/logs/output.log' -e ${subjdir}'.bedpostX/logs/error.log' -W 0:30 -n 1 ${FSLDIR}'/bin/splitter_multigpu' $subjdir $gflag $nfibres $slice $opts) - array=(`echo $string | tr "<" ' '`) - aux=${array[1]} - array=(`echo $aux | tr ">" ' '`) - job=${array[0]} - #echo $job - - if [ $first_job -eq 1 ];then - post_string=$post_string"done("${job}")" - first_job=0 - else - post_string=$post_string"&&done("${job}")" - fi + post_string=$post_string"&&done("${job}")" fi - slice=$(($slice + 1)) + + part=$(($part + 1)) done -post_string=$post_string" ${FSLDIR}/bin/bedpostx_postproc.sh $subjdir" -#echo $post_string +nvox=`${FSLDIR}/bin/fslstats $subjdir.bedpostX/nodif_brain_mask -V | cut -d ' ' -f1 ` +post_string=$post_string" ${FSLDIR}/bin/bedpostx_postproc_gpu.sh --data=${subjdir}/data --mask=$subjdir.bedpostX/nodif_brain_mask -b ${subjdir}.bedpostX/bvals -r ${subjdir}.bedpostX/bvecs --forcedir --logdir=$subjdir.bedpostX/diff_parts $gopts $nvox $njobs ${subjdir}" + $post_string -echo "All jobs submitted" +echo "All parts (jobs) submitted" finished=0 logdir=${subjdir}.bedpostX/logs -tim=0 while [ $finished -eq 0 ] ; do nfin=0 - pslice=0 - while [ $pslice -lt $nslices ];do - slicezp=`${FSLDIR}/bin/zeropad $pslice 4` - isimg=`${FSLDIR}/bin/imtest ${subjdir}.bedpostX/diff_slices/data_slice_$slicezp/dyads1` - if [ $isimg -eq 1 ];then + part=0 + while [ $part -lt $njobs ];do + partzp=`${FSLDIR}/bin/zeropad $part 4` + if [ -f ${subjdir}.bedpostX/diff_parts/data_part_$partzp/mean_S0samplesJ ];then nfin=$(($nfin + 1)) fi - pslice=$(($pslice + 1)) + part=$(($part + 1)) done - echo $nfin "slices processed" - - if [ $tim -eq 0 ] ; then - if [ $nfin -eq $nslices ] ; then - after=`date +%s` - diff=$(($after-$before)) - echo "Execution Time: $diff seconds" >> ${subjdir}.bedpostX/times - tim=1 - fi - fi - - + echo $nfin "parts processed of "$njobs if [ -f ${subjdir}.bedpostX/xfms/eye.mat ] ; then finished=1 - echo "All slices processed" + echo "All parts processed" fi - sleep 15; + sleep 60; done - - -aftert=`date +%s` -diff=$(($aftert-$beforet)) -echo "TOTAL Time: $diff seconds" >> ${subjdir}.bedpostX/times - -else - echo - echo - echo "CUDA COMPILER ERROR. Please be sure that you have set correctly CUDALIBDIR in first line in file bin/bedpostx_cuda" - echo -fi - - -- GitLab