"""Biobank Data Stats Calculator Description: This file contains the relevant scripts for producing a database containing relevant statistics about the imaing data from the UK Biobank. This is a standalone scrip, intended to be used only once during the project. Hence, it is not integrated into the larger utils packages. Usage: To use content from this folder, import the functions and instantiate them as you wish to use them: from utils.DSbiobank import function_name """ import numpy as np from fsl.data.image import Image from fsl.utils.image.resample import resampleToPixdims import matplotlib.pyplot as plt from data_utils import directory_reader, regression_weight_calculator from tempfile import TemporaryFile from datetime import datetime import pandas as pd import os def stats_calc(array): """ Statistics calculator Function calculating all the required statistics for every array Args: array (np.array): 3D array of subject data Returns: *name* (float): Float representing a different statistic """ min_val = np.min(array) max_val = np.max(array) mean_val = np.mean(array) med_val = np.median(array) std_val = np.std(array) perc1 = np.percentile(array, 1) perc25 = np.percentile(array, 25) perc75 = np.percentile(array, 75) perc99 = np.percentile(array, 99) perc0_1 = np.percentile(array, 0.1) perc0_2 = np.percentile(array, 0.2) perc0_3 = np.percentile(array, 0.3) perc0_4 = np.percentile(array, 0.4) perc0_5 = np.percentile(array, 0.5) perc0_6 = np.percentile(array, 0.6) perc0_7 = np.percentile(array, 0.7) perc0_8 = np.percentile(array, 0.8) perc0_9 = np.percentile(array, 0.9) perc99_1 = np.percentile(array, 99.1) perc99_2 = np.percentile(array, 99.2) perc99_3 = np.percentile(array, 99.3) perc99_4 = np.percentile(array, 99.4) perc99_5 = np.percentile(array, 99.5) perc99_6 = np.percentile(array, 99.6) perc99_7 = np.percentile(array, 99.7) perc99_8 = np.percentile(array, 99.8) perc99_9 = np.percentile(array, 99.9) perc2 = np.percentile(array, 2) perc3 = np.percentile(array, 3) perc4 = np.percentile(array, 4) perc5 = np.percentile(array, 5) perc95 = np.percentile(array, 95) perc96 = np.percentile(array, 96) perc97 = np.percentile(array, 97) perc98 = np.percentile(array, 98) return min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 def database_generator(data_directory, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path): subDirectoryList, _ = directory_reader(folder_location=os.path.join( os.path.expanduser("~"), data_directory), subject_number=None, write_txt=False) dmri_imaging_dictionary = {} rsfmri_imaging_dictionary = {} dictionary_labels = ['w_reg', 'min', 'max', 'mean', 'med', 'std', '1p', '25p', '75p', '99p', '0.1p', '0.2p', '0.3p', '0.4p', '0.5p', '0.6p', '0.7p', '0.8p', '0.9p', '99.1p', '99.2p', '99.3p', '99.4p', '99.5p', '99.6p', '99.7p', '99.8p', '99.9p', '2p', '3p', '4p', '5p', '95p', '96p', '97p', '98p'] dmri_mean_volume = Image(dMRI_mean_mask_path).data rsfmri_mean_volume = Image(rsfMRI_mean_mask_path).data[:, :, :, 0] index = 0 for subject in subDirectoryList: index += 1 w_dMRI, w_rsfMRI = regression_weight_calculator( data_directory, subject, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path) # ------------------ BOanaPelea626273532!C dmri_path = os.path.join(os.path.expanduser( "~"), data_directory, subject, train_inputs) dmri_volume, _ = resampleToPixdims(Image(dmri_path), (2, 2, 2)) dmri_volume = np.subtract( dmri_volume, np.multiply(w_dMRI, dmri_mean_volume)) min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc( dmri_volume) dmri_imaging_dictionary[subject] = [w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98] del dmri_path, dmri_volume, w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 # ------------------ rsfmri_path = os.path.join(os.path.expanduser( "~"), data_directory, subject, train_targets) rsfmri_volume = Image(rsfmri_path).data[:, :, :, 0] rsfmri_volume = np.subtract( rsfmri_volume, np.multiply(w_rsfMRI, rsfmri_mean_volume)) min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc( rsfmri_volume) rsfmri_imaging_dictionary[subject] = [w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98] del rsfmri_path, rsfmri_volume, w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 # ------------------ dmri_imaging_df = pd.DataFrame.from_dict( dmri_imaging_dictionary, orient="index", columns=dictionary_labels) dmri_imaging_df.to_pickle('utils/dmri_stats.pkl') rsfmri_imaging_df = pd.DataFrame.from_dict( rsfmri_imaging_dictionary, orient="index", columns=dictionary_labels) rsfmri_imaging_df.to_pickle('utils/rsfmri_stats.pkl') if __name__ == '__main__': print('---> Start!') rsfmri_mean_mask_path = "utils/mean_dr_stage2.nii.gz" dmri_mean_mask_path = "utils/mean_tractsNormSummed_downsampled.nii.gz" train_list = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/subj_22k.txt" data_directory = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/" train_inputs = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz" train_targets = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz" database_generator(data_directory, train_inputs, train_targets, rsfmri_mean_mask_path, dmri_mean_mask_path) print('---> Finished!')