Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrei-Claudiu Roibu
BrainMapper
Commits
9f6e9cb1
Commit
9f6e9cb1
authored
Jul 10, 2020
by
Andrei Roibu
Browse files
script for computing BBwide statistics
parent
97a4ebaf
Changes
1
Show whitespace changes
Inline
Side-by-side
utils/DSbiobank.py
0 → 100644
View file @
9f6e9cb1
"""Biobank Data Stats Calculator
Description:
This file contains the relevant scripts for producing a database containing relevant statistics about the imaing data from the UK Biobank.
This is a standalone scrip, intended to be used only once during the project. Hence, it is not integrated into the larger utils packages.
Usage:
To use content from this folder, import the functions and instantiate them as you wish to use them:
from utils.DSbiobank import function_name
"""
import
numpy
as
np
from
fsl.data.image
import
Image
from
fsl.utils.image.resample
import
resampleToPixdims
import
matplotlib.pyplot
as
plt
from
data_utils
import
directory_reader
,
regression_weight_calculator
from
tempfile
import
TemporaryFile
from
datetime
import
datetime
import
pandas
as
pd
import
os
def
stats_calc
(
array
):
""" Statistics calculator
Function calculating all the required statistics for every array
Args:
array (np.array): 3D array of subject data
Returns:
*name* (float): Float representing a different statistic
"""
min_val
=
np
.
min
(
array
)
max_val
=
np
.
max
(
array
)
mean_val
=
np
.
mean
(
array
)
med_val
=
np
.
median
(
array
)
std_val
=
np
.
std
(
array
)
perc1
=
np
.
percentile
(
array
,
1
)
perc25
=
np
.
percentile
(
array
,
25
)
perc75
=
np
.
percentile
(
array
,
75
)
perc99
=
np
.
percentile
(
array
,
99
)
perc0_1
=
np
.
percentile
(
array
,
0.1
)
perc0_2
=
np
.
percentile
(
array
,
0.2
)
perc0_3
=
np
.
percentile
(
array
,
0.3
)
perc0_4
=
np
.
percentile
(
array
,
0.4
)
perc0_5
=
np
.
percentile
(
array
,
0.5
)
perc0_6
=
np
.
percentile
(
array
,
0.6
)
perc0_7
=
np
.
percentile
(
array
,
0.7
)
perc0_8
=
np
.
percentile
(
array
,
0.8
)
perc0_9
=
np
.
percentile
(
array
,
0.9
)
perc99_1
=
np
.
percentile
(
array
,
99.1
)
perc99_2
=
np
.
percentile
(
array
,
99.2
)
perc99_3
=
np
.
percentile
(
array
,
99.3
)
perc99_4
=
np
.
percentile
(
array
,
99.4
)
perc99_5
=
np
.
percentile
(
array
,
99.5
)
perc99_6
=
np
.
percentile
(
array
,
99.6
)
perc99_7
=
np
.
percentile
(
array
,
99.7
)
perc99_8
=
np
.
percentile
(
array
,
99.8
)
perc99_9
=
np
.
percentile
(
array
,
99.9
)
perc2
=
np
.
percentile
(
array
,
2
)
perc3
=
np
.
percentile
(
array
,
3
)
perc4
=
np
.
percentile
(
array
,
4
)
perc5
=
np
.
percentile
(
array
,
5
)
perc95
=
np
.
percentile
(
array
,
95
)
perc96
=
np
.
percentile
(
array
,
96
)
perc97
=
np
.
percentile
(
array
,
97
)
perc98
=
np
.
percentile
(
array
,
98
)
return
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
def
database_generator
(
data_directory
,
train_inputs
,
train_targets
,
rsfMRI_mean_mask_path
,
dMRI_mean_mask_path
):
subDirectoryList
,
number_of_subjects
=
directory_reader
(
folder_location
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
"~"
),
data_directory
),
subject_number
=
None
,
write_txt
=
False
)
dmri_imaging_dictionary
=
{}
rsfmri_imaging_dictionary
=
{}
dictionary_labels
=
[
'w_reg'
,
'min'
,
'max'
,
'mean'
,
'med'
,
'std'
,
'1p'
,
'25p'
,
'75p'
,
'99p'
,
'0.1p'
,
'0.2p'
,
'0.3p'
,
'0.4p'
,
'0.5p'
,
'0.6p'
,
'0.7p'
,
'0.8p'
,
'0.9p'
,
'99.1p'
,
'99.2p'
,
'99.3p'
,
'99.4p'
,
'99.5p'
,
'99.6p'
,
'99.7p'
,
'99.8p'
,
'99.9p'
'2p'
,
'3p'
,
'4p'
,
'5p'
,
'95p'
,
'96p'
,
'97p'
,
'98p'
]
dmri_mean_volume
=
Image
(
dMRI_mean_mask_path
).
data
rsfmri_mean_volume
=
Image
(
rsfMRI_mean_mask_path
).
data
[:,
:,
:,
0
]
index
=
0
for
subject
in
subDirectoryList
:
index
+=
1
subject_t0
=
datetime
.
now
()
print
(
"Processing subject {}/{}"
.
format
(
index
,
number_of_subjects
))
w_dMRI
,
w_rsfMRI
=
regression_weight_calculator
(
data_directory
,
subject
,
train_inputs
,
train_targets
,
rsfMRI_mean_mask_path
,
dMRI_mean_mask_path
)
# ------------------
dmri_path
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
"~"
),
data_directory
,
subject
,
train_inputs
)
dmri_volume
,
_
=
resampleToPixdims
(
Image
(
dmri_path
),
(
2
,
2
,
2
))
dmri_volume
=
np
.
subtract
(
dmri_volume
,
np
.
multiply
(
w_dMRI
,
dmri_mean_volume
))
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
=
stats_calc
(
dmri_volume
)
dmri_imaging_dictionary
[
subject
]
=
[
w_dMRI
,
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
]
del
dmri_path
,
dmri_volume
,
w_dMRI
,
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
# ------------------
rsfmri_path
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
"~"
),
data_directory
,
subject
,
train_targets
)
rsfmri_volume
=
Image
(
rsfmri_path
).
data
[:,
:,
:,
0
]
rsfmri_volume
=
np
.
subtract
(
rsfmri_volume
,
np
.
multiply
(
w_rsfMRI
,
rsfmri_mean_volume
))
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
=
stats_calc
(
rsfmri_volume
)
rsfmri_imaging_dictionary
[
subject
]
=
[
w_rsfMRI
,
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
]
del
rsfmri_path
,
rsfmri_volume
,
w_rsfMRI
,
min_val
,
max_val
,
mean_val
,
med_val
,
std_val
,
perc1
,
perc25
,
perc75
,
perc99
,
perc0_1
,
perc0_2
,
perc0_3
,
perc0_4
,
perc0_5
,
perc0_6
,
perc0_7
,
perc0_8
,
perc0_9
,
perc99_1
,
perc99_2
,
perc99_3
,
perc99_4
,
perc99_5
,
perc99_6
,
perc99_7
,
perc99_8
,
perc99_9
,
perc2
,
perc3
,
perc4
,
perc5
,
perc95
,
perc96
,
perc97
,
perc98
# ------------------
print
(
"Processed subject {}/{} | Total Duration: {}"
.
format
(
index
,
number_of_subjects
,
datetime
.
now
()
-
subject_t0
))
dmri_imaging_df
=
pd
.
DataFrame
.
from_dict
(
dmri_imaging_dictionary
,
orient
=
"index"
,
columns
=
dictionary_labels
)
dmri_imaging_df
.
to_pickle
(
'dmri_stats.pkl'
)
del
dmri_imaging_df
rsfmri_imaging_df
=
pd
.
DataFrame
.
from_dict
(
rsfmri_imaging_dictionary
,
orient
=
"index"
,
columns
=
dictionary_labels
)
rsfmri_imaging_df
.
to_pickle
(
'rsfmri_stats.pkl'
)
del
rsfmri_imaging_df
if
__name__
==
'__main__'
:
print
(
'---> Start!'
)
rsfmri_mean_mask_path
=
"utils/mean_dr_stage2.nii.gz"
dmri_mean_mask_path
=
"utils/mean_tractsNormSummed_downsampled.nii.gz"
train_list
=
"/well/win-biobank/projects/imaging/data/data3/subjectsAll/subj_22k.txt"
data_directory
=
"/well/win-biobank/projects/imaging/data/data3/subjectsAll/"
train_inputs
=
"dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
train_targets
=
"fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
database_generator
(
data_directory
,
train_inputs
,
train_targets
,
rsfmri_mean_mask_path
,
dmri_mean_mask_path
)
print
(
'---> Finished!'
)
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment