DSbiobank.py 7.56 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""Biobank Data Stats Calculator

Description:

    This file contains the relevant scripts for producing a database containing relevant statistics about the imaing data from the UK Biobank.
    This is a standalone scrip, intended to be used only once during the project. Hence, it is not integrated into the larger utils packages.

Usage:

    To use content from this folder, import the functions and instantiate them as you wish to use them:

        from utils.DSbiobank import function_name

"""


import numpy as np
from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
import matplotlib.pyplot as plt
from data_utils import directory_reader, regression_weight_calculator
from tempfile import TemporaryFile
from datetime import datetime
import pandas as pd
import os

27

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def stats_calc(array):
    """ Statistics calculator

    Function calculating all the required statistics for every array

    Args:
        array (np.array): 3D array of subject data

    Returns:
        *name* (float): Float representing a different statistic
    """

    min_val = np.min(array)
    max_val = np.max(array)
    mean_val = np.mean(array)
    med_val = np.median(array)
    std_val = np.std(array)
    perc1 = np.percentile(array, 1)
    perc25 = np.percentile(array, 25)
    perc75 = np.percentile(array, 75)
    perc99 = np.percentile(array, 99)
    perc0_1 = np.percentile(array, 0.1)
    perc0_2 = np.percentile(array, 0.2)
    perc0_3 = np.percentile(array, 0.3)
    perc0_4 = np.percentile(array, 0.4)
    perc0_5 = np.percentile(array, 0.5)
    perc0_6 = np.percentile(array, 0.6)
    perc0_7 = np.percentile(array, 0.7)
    perc0_8 = np.percentile(array, 0.8)
    perc0_9 = np.percentile(array, 0.9)
    perc99_1 = np.percentile(array, 99.1)
    perc99_2 = np.percentile(array, 99.2)
    perc99_3 = np.percentile(array, 99.3)
    perc99_4 = np.percentile(array, 99.4)
    perc99_5 = np.percentile(array, 99.5)
    perc99_6 = np.percentile(array, 99.6)
    perc99_7 = np.percentile(array, 99.7)
    perc99_8 = np.percentile(array, 99.8)
    perc99_9 = np.percentile(array, 99.9)
    perc2 = np.percentile(array, 2)
    perc3 = np.percentile(array, 3)
    perc4 = np.percentile(array, 4)
    perc5 = np.percentile(array, 5)
    perc95 = np.percentile(array, 95)
    perc96 = np.percentile(array, 96)
    perc97 = np.percentile(array, 97)
    perc98 = np.percentile(array, 98)

    return min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98


def database_generator(data_directory, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path):
80
81
    subDirectoryList, _ = directory_reader(folder_location=os.path.join(
        os.path.expanduser("~"), data_directory), subject_number=None, write_txt=False)
82
83
84
85
86
87

    dmri_imaging_dictionary = {}
    rsfmri_imaging_dictionary = {}

    dictionary_labels = ['w_reg', 'min', 'max', 'mean', 'med', 'std', '1p', '25p', '75p', '99p',
                         '0.1p', '0.2p', '0.3p', '0.4p', '0.5p', '0.6p', '0.7p', '0.8p', '0.9p',
88
                         '99.1p', '99.2p', '99.3p', '99.4p', '99.5p', '99.6p', '99.7p', '99.8p', '99.9p',
89
90
91
92
93
94
95
96
97
98
99
                         '2p', '3p', '4p', '5p', '95p', '96p', '97p', '98p']

    dmri_mean_volume = Image(dMRI_mean_mask_path).data
    rsfmri_mean_volume = Image(rsfMRI_mean_mask_path).data[:, :, :, 0]

    index = 0

    for subject in subDirectoryList:

        index += 1

100
101
        w_dMRI, w_rsfMRI = regression_weight_calculator(
            data_directory, subject, train_inputs, train_targets, rsfMRI_mean_mask_path, dMRI_mean_mask_path)
102

103
        # ------------------ BOanaPelea626273532!C
104

105
106
        dmri_path = os.path.join(os.path.expanduser(
            "~"), data_directory, subject, train_inputs)
107
        dmri_volume, _ = resampleToPixdims(Image(dmri_path), (2, 2, 2))
108
109
110
111
112
        dmri_volume = np.subtract(
            dmri_volume, np.multiply(w_dMRI, dmri_mean_volume))

        min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc(
            dmri_volume)
113

114
115
        dmri_imaging_dictionary[subject] = [w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6,
                                            perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98]
116
117
118
119
120

        del dmri_path, dmri_volume, w_dMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98

        # ------------------

121
122
        rsfmri_path = os.path.join(os.path.expanduser(
            "~"), data_directory, subject, train_targets)
123
        rsfmri_volume = Image(rsfmri_path).data[:, :, :, 0]
124
125
        rsfmri_volume = np.subtract(
            rsfmri_volume, np.multiply(w_rsfMRI, rsfmri_mean_volume))
126

127
128
        min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98 = stats_calc(
            rsfmri_volume)
129

130
131
        rsfmri_imaging_dictionary[subject] = [w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6,
                                              perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98]
132
133
134
135
136

        del rsfmri_path, rsfmri_volume, w_rsfMRI, min_val, max_val, mean_val, med_val, std_val, perc1, perc25, perc75, perc99, perc0_1, perc0_2, perc0_3, perc0_4, perc0_5, perc0_6, perc0_7, perc0_8, perc0_9, perc99_1, perc99_2, perc99_3, perc99_4, perc99_5, perc99_6, perc99_7, perc99_8, perc99_9, perc2, perc3, perc4, perc5, perc95, perc96, perc97, perc98

        # ------------------

137
138
    dmri_imaging_df = pd.DataFrame.from_dict(
        dmri_imaging_dictionary, orient="index", columns=dictionary_labels)
Andrei Roibu's avatar
Andrei Roibu committed
139
    dmri_imaging_df.to_pickle('utils/dmri_stats.pkl')
140

141
142
    rsfmri_imaging_df = pd.DataFrame.from_dict(
        rsfmri_imaging_dictionary, orient="index", columns=dictionary_labels)
Andrei Roibu's avatar
Andrei Roibu committed
143
    rsfmri_imaging_df.to_pickle('utils/rsfmri_stats.pkl')
144
145
146
147
148
149
150
151
152
153
154
155
156


if __name__ == '__main__':

    print('---> Start!')

    rsfmri_mean_mask_path = "utils/mean_dr_stage2.nii.gz"
    dmri_mean_mask_path = "utils/mean_tractsNormSummed_downsampled.nii.gz"
    train_list = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/subj_22k.txt"
    data_directory = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/"
    train_inputs = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
    train_targets = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz"

157
158
    database_generator(data_directory, train_inputs, train_targets,
                       rsfmri_mean_mask_path, dmri_mean_mask_path)
159

160
    print('---> Finished!')