Commit 9f6e9cb1 authored by Andrei Roibu's avatar Andrei Roibu
Browse files

script for computing BBwide statistics

parent 97a4ebaf
"""Biobank Data Stats Calculator
Description:
This file contains the relevant scripts for producing a database containing relevant statistics about the imaing data from the UK Biobank.
This is a standalone scrip, intended to be used only once during the project. Hence, it is not integrated into the larger utils packages.
Usage:
To use content from this folder, import the functions and instantiate them as you wish to use them:
from utils.DSbiobank import function_name
"""
import numpy as np
from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
import matplotlib.pyplot as plt
from data_utils import directory_reader, regression_weight_calculator
from tempfile import TemporaryFile
from datetime import datetime
import pandas as pd
import os
def stats_calc(array):
""" Statistics calculator
Function calculating all the required statistics for every array
Args:
array (np.array): 3D array of subject data
Returns:
*name* (float): Float representing a different statistic
"""
min_val = np.min(array)
max_val = np.max(array)
mean_val = np.mean(array)
med_val = np.median(array)
std_val = np.std(array)
perc1 = np.percentile(array, 1)
perc25 = np.percentile(array, 25)
perc75 = np.percentile(array, 75)
perc99 = np.percentile(array, 99)
perc0_1 = np.percentile(array, 0.1)
perc0_2 = np.percentile(array, 0.2)
perc0_3 = np.percentile(array, 0.3)
perc0_4 = np.percentile(array, 0.4)
perc0_5 = np.percentile(array, 0.5)
perc0_6 = np.percentile(array, 0.6)
perc0_7 = np.percentile(array, 0.7)
perc0_8 = np.percentile(array, 0.8)
perc0_9 = np.percentile(array, 0.9)
perc99_1 = np.percentile(array, 99.1)
perc99_2 = np.percentile(array, 99.2)
perc99_3 = np.percentile(array, 99.3)
perc99_4 = np.percentile(array, 99.4)
perc99_5 = np.percentile(array, 99.5)
perc99_6 = np.percentile(array, 99.6)
perc99_7 = np.percentile(array, 99.7)
perc99_8 = np.percentile(array, 99.8)
perc99_9 = np.percentile(array, 99.9)
perc2 = np.percentile(array, 2)
perc3 = np.percentile(array, 3)
perc4 = np.percentile(array, 4)
perc5 = np.percentile(array, 5)
perc95 = np.percentile(array, 95)
perc96 = np.percentile(array, 96)
perc97 = np.percentile(array, 97)
perc98 = np.percentile(array, 98)
return min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98
def database_generator(data_directory, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path):
subDirectoryList, number_of_subjects = directory_reader(folder_location=os.path.join(os.path.expanduser("~"),data_directory), subject_number=None, write_txt=False)
dmri_imaging_dictionary = {}
rsfmri_imaging_dictionary = {}
dictionary_labels = ['w_reg', 'min', 'max', 'mean', 'med', 'std', '1p', '25p', '75p', '99p',
'0.1p', '0.2p', '0.3p', '0.4p', '0.5p', '0.6p', '0.7p', '0.8p', '0.9p',
'99.1p', '99.2p', '99.3p', '99.4p', '99.5p', '99.6p', '99.7p', '99.8p', '99.9p'
'2p', '3p', '4p', '5p', '95p', '96p', '97p', '98p']
dmri_mean_volume = Image(dMRI_mean_mask_path).data
rsfmri_mean_volume = Image(rsfMRI_mean_mask_path).data[:, :, :, 0]
index = 0
for subject in subDirectoryList:
index += 1
subject_t0 = datetime.now()
print("Processing subject {}/{}".format(index, number_of_subjects))
w_dMRI, w_rsfMRI = regression_weight_calculator(data_directory, subject, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path)
# ------------------
dmri_path = os.path.join(os.path.expanduser("~"), data_directory, subject, train_inputs)
dmri_volume, _ = resampleToPixdims(Image(dmri_path), (2, 2, 2))
dmri_volume = np.subtract(dmri_volume, np.multiply(w_dMRI, dmri_mean_volume))
min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc(dmri_volume)
dmri_imaging_dictionary[subject] = [w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98]
del dmri_path, dmri_volume, w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98
# ------------------
rsfmri_path = os.path.join(os.path.expanduser("~"), data_directory, subject, train_targets)
rsfmri_volume = Image(rsfmri_path).data[:, :, :, 0]
rsfmri_volume = np.subtract(rsfmri_volume, np.multiply(w_rsfMRI, rsfmri_mean_volume))
min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc(rsfmri_volume)
rsfmri_imaging_dictionary[subject] = [w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98]
del rsfmri_path, rsfmri_volume, w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98
# ------------------
print("Processed subject {}/{} | Total Duration: {}".format(index, number_of_subjects, datetime.now() - subject_t0))
dmri_imaging_df = pd.DataFrame.from_dict(dmri_imaging_dictionary, orient="index", columns = dictionary_labels)
dmri_imaging_df.to_pickle('dmri_stats.pkl')
del dmri_imaging_df
rsfmri_imaging_df = pd.DataFrame.from_dict(rsfmri_imaging_dictionary, orient="index", columns = dictionary_labels)
rsfmri_imaging_df.to_pickle('rsfmri_stats.pkl')
del rsfmri_imaging_df
if __name__ == '__main__':
print('---> Start!')
rsfmri_mean_mask_path = "utils/mean_dr_stage2.nii.gz"
dmri_mean_mask_path = "utils/mean_tractsNormSummed_downsampled.nii.gz"
train_list = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/subj_22k.txt"
data_directory = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/"
train_inputs = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
train_targets = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
database_generator(data_directory, train_inputs, train_targets, rsfmri_mean_mask_path, dmri_mean_mask_path)
print('---> Finished!')
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment