data_evaluation_utils.py 19.2 KB
Newer Older
1
2
3
4
"""Data Evaluation Functions

Description:

5
    This folder contains several functions which, either on their own or included in larger pieces of software, perform data evaluation tasks.
6

7
8
9
10
11
Usage:

    To use content from this folder, import the functions and instantiate them as you wish to use them:

        from utils.data_evaluation_utils import function_name
12

13
14
TODO: Might be worth adding some information on uncertaintiy estimation, later down the line

15
16
17
"""

import os
18
import pickle
19
20
import numpy as np
import torch
21
22
23
import logging
import utils.data_utils as data_utils
import nibabel as nib
24
import matplotlib.pyplot as plt
Andrei Roibu's avatar
Andrei Roibu committed
25
import pandas as pd
26
from fsl.data.image import Image
27
28

log = logging.getLogger(__name__)
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
def evaluate_correlation(trained_model_path,
                     data_directory,
                     mapping_data_file,
                     target_data_file,
                     data_list,
                     prediction_output_path,
                     brain_mask_path,
                     mean_mask_path,
                     mean_reduction,
                     scaling_factors,
                     device=0,
                     mode='evaluate',
                     exit_on_error=False):
    """Model Evaluator
44

45
    This function generates correlation map between the output and target rsfMRI maps
46
47
48
49

    Args:
        trained_model_path (str): Path to the location of the trained model
        data_directory (str): Path to input data directory
50
51
        mapping_data_file (str): Path to the input file
        target_data_file (str): Path to the target files
52
53
        data_list (str): Path to a .txt file containing the input files for consideration
        prediction_output_path (str): Output prediction path
54
55
56
57
        brain_mask_path (str): Path to the MNI brain mask file
        mean_mask_path (str): Path to the dualreg subject mean mask
        mean_reduction (bool): Flag indicating if the targets should be de-meaned using the mean_mask_path
        scaling_factors (str): Path to the scaling factor file
58
59
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        mode (str): Current run mode or phase
60
        exit_on_error (bool): Flag that triggers the raising of an exception
61

62
63
64
    Raises:
        FileNotFoundError: Error in reading the provided file!
        Exception: Error code execution!
65
66
    """

67
68
    log.info(
        "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)")
69
70
71
72
73
74
75
76
77
78
79
80
81

    with open(data_list) as data_list_file:
        volumes_to_be_used = data_list_file.read().splitlines()

    # Test if cuda is available and attempt to run on GPU

    cuda_available = torch.cuda.is_available()
    if type(device) == int:
        if cuda_available:
            model = torch.load(trained_model_path)
            torch.cuda.empty_cache()
            model.cuda(device)
        else:
82
83
            log.warning(
                "CUDA not available. Switching to CPU. Investigate behaviour!")
84
85
86
            device = 'cpu'

    if (type(device) == str) or not cuda_available:
87
88
        model = torch.load(trained_model_path,
                           map_location=torch.device(device))
89
90
91
92
93
94
95
96
97

    model.eval()

    # Create the prediction path folder if this is not available

    data_utils.create_folder(prediction_output_path)

    # Initiate the evaluation

98
    log.info("rsfMRI Correlation Started")
99
    file_paths = data_utils.load_file_paths(
100
        data_directory, data_list, mapping_data_file)
101

102
103
    target_paths = data_utils.load_file_paths(
        data_directory, data_list, target_data_file)
104

105
106
    correlation_matrix_complete = np.empty(len(file_paths), len(target_paths))
    correlation_matrix_demeaned = np.empty(len(file_paths), len(target_paths))
107

108
    with torch.no_grad():
109

110
111
112
        for volume_index, file_path in enumerate(file_paths):
            try:
                print("Correlating Volume {}/{}".format(volume_index+1, len(file_paths)))
113

114
115
                predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map(
                    file_path, model, device, cuda_available, brain_mask_path, mean_mask_path, scaling_factors, mean_reduction)
116

117
118
                for target_index, target_path in enumerate(target_paths):
                    target, target_demeaned = data_utils.load_and_preprocess_targets(target_path, mean_mask_path)
119

120
121
                    r_predicted_complete = _pearson_correlation(predicted_complete_volume, target)
                    r_predicted_demeaned = _pearson_correlation(predicted_volume, target_demeaned)
122

123
124
                    correlation_matrix_complete[volume_index, target_index] = r_predicted_complete
                    correlation_matrix_demeaned[volume_index, target_index] = r_predicted_demeaned
125

126
127
                log.info("Correlated: " + volumes_to_be_used[volume_index] + " " + str(
                    volume_index + 1) + " out of " + str(len(volumes_to_be_used)))
128

129
130
131
132
133
            except FileNotFoundError as exception_expression:
                log.error("Error in reading the provided file!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)
134

135
136
137
138
139
            except Exception as exception_expression:
                log.error("Error code execution!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)
140

141
    log.info("rsfMRI Correlation Complete")
142

143
    # HERE WE NEED TO CALL THE PRINT FUNCTION TWICE
144

145
146
    _generate_correlation_matrix(correlation_matrix_complete, title='Complete Volume Correlation', prediction_output_path=prediction_output_path, normalize=False)
    _generate_correlation_matrix(correlation_matrix_demeaned, title='Particularity Volume Correlation', prediction_output_path=prediction_output_path, normalize=False)
147

148

149
def evaluate_mapping(trained_model_path,
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
150
151
152
153
                     data_directory,
                     mapping_data_file,
                     data_list,
                     prediction_output_path,
154
                     brain_mask_path,
Andrei Roibu's avatar
Andrei Roibu committed
155
156
                     dmri_mean_mask_path,
                     rsfmri_mean_mask_path,
157
                     mean_reduction,
158
                     scaling_factors,
Andrei Roibu's avatar
Andrei Roibu committed
159
                     regression_factors,
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
160
161
162
                     device=0,
                     mode='evaluate',
                     exit_on_error=False):
163
164
165
166
167
168
169
170
171
172
    """Model Evaluator

    This function generates the rsfMRI map for an input running on on a single axis or path

    Args:
        trained_model_path (str): Path to the location of the trained model
        data_directory (str): Path to input data directory
        mapping_data_file (str): Path to the input file
        data_list (str): Path to a .txt file containing the input files for consideration
        prediction_output_path (str): Output prediction path
173
        brain_mask_path (str): Path to the MNI brain mask file
Andrei Roibu's avatar
Andrei Roibu committed
174
175
        dmri_mean_mask_path (str): Path to the dualreg subject mean mask
        rsfmri_mean_mask_path (str): Path to the summed tract mean mask
176
        mean_reduction (bool): Flag indicating if the targets should be de-meaned using the mean_mask_path
177
        scaling_factors (str): Path to the scaling factor file
Andrei Roibu's avatar
Andrei Roibu committed
178
        regression_factors (str): Path to the linear regression weights file
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        mode (str): Current run mode or phase
        exit_on_error (bool): Flag that triggers the raising of an exception

    Raises:
        FileNotFoundError: Error in reading the provided file!
        Exception: Error code execution!
    """

    log.info(
        "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)")

    with open(data_list) as data_list_file:
        volumes_to_be_used = data_list_file.read().splitlines()

    # Test if cuda is available and attempt to run on GPU

    cuda_available = torch.cuda.is_available()
    if type(device) == int:
        if cuda_available:
            model = torch.load(trained_model_path)
            torch.cuda.empty_cache()
            model.cuda(device)
        else:
            log.warning(
                "CUDA not available. Switching to CPU. Investigate behaviour!")
            device = 'cpu'

    if (type(device) == str) or not cuda_available:
        model = torch.load(trained_model_path,
                           map_location=torch.device(device))

    model.eval()

    # Create the prediction path folder if this is not available

    data_utils.create_folder(prediction_output_path)

    # Initiate the evaluation

    log.info("rsfMRI Generation Started")
Andrei Roibu's avatar
Andrei Roibu committed
220
    file_paths, volumes_to_be_used = data_utils.load_file_paths(
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
221
        data_directory, data_list, mapping_data_file)
222
223
224
225
226

    with torch.no_grad():

        for volume_index, file_path in enumerate(file_paths):
            try:
227
                print("Mapping Volume {}/{}".format(volume_index+1, len(file_paths)))
228
                # Generate volume & header
Andrei Roibu's avatar
Andrei Roibu committed
229
230
231

                subject = volumes_to_be_used[volume_index]

232
                predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map(
Andrei Roibu's avatar
Andrei Roibu committed
233
234
235
236
                    file_path, subject, model, 
                    device, cuda_available, brain_mask_path, 
                    dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors, 
                    regression_factors, mean_reduction)
237
238
239
240
241
242

                # Generate New Header Affine

                header_affines = np.array(
                    [header['srow_x'], header['srow_y'], header['srow_z'], [0, 0, 0, 1]])

Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
243
244
                output_nifti_image = Image(
                    predicted_volume, header=header, xform=xform)
245
246
247
248
249
250
251
252
253

                output_nifti_path = os.path.join(
                    prediction_output_path, volumes_to_be_used[volume_index])

                if '.nii' not in output_nifti_path:
                    output_nifti_path += '.nii.gz'

                output_nifti_image.save(output_nifti_path)

254
255
256
257
                if mean_reduction == True:
                    output_complete_nifti_image = Image(
                        predicted_complete_volume, header=header, xform=xform)

258
259
                    output_complete_nifti_path = os.path.join(
                        prediction_output_path, volumes_to_be_used[volume_index]) + '_complete'
260
261
262
263

                    if '.nii' not in output_complete_nifti_path:
                        output_complete_nifti_path += '.nii.gz'

264
265
                    output_complete_nifti_image.save(
                        output_complete_nifti_path)
266

267
268
269
                log.info("Processed: " + volumes_to_be_used[volume_index] + " " + str(
                    volume_index + 1) + " out of " + str(len(volumes_to_be_used)))

270
271
                print("Mapped Volumes saved in: ", prediction_output_path)

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
            except FileNotFoundError as exception_expression:
                log.error("Error in reading the provided file!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)

            except Exception as exception_expression:
                log.error("Error code execution!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)

    log.info("rsfMRI Generation Complete")


Andrei Roibu's avatar
Andrei Roibu committed
287
288
289
290
291
292
293
294
295
296
297
def _generate_volume_map(file_path, 
                        subject, 
                        model, 
                        device, 
                        cuda_available, 
                        brain_mask_path, 
                        dmri_mean_mask_path, 
                        rsfmri_mean_mask_path, 
                        scaling_factors, 
                        regression_factors,
                        mean_reduction=False):
298
299
300
301
302
303
    """rsfMRI Volume Generator

    This function uses the trained model to generate a new volume

    Args:
        file_path (str): Path to the desired file
Andrei Roibu's avatar
Andrei Roibu committed
304
        subject (str): Subject ID of the subject volume to be regressed
305
306
307
        model (class): BrainMapper model class
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        cuda_available (bool): Flag indicating if a cuda-enabled GPU is present
308
        brain_mask_path (str): Path to the MNI brain mask file
Andrei Roibu's avatar
Andrei Roibu committed
309
310
        dmri_mean_mask_path (str): Path to the group mean volume
        rsfmri_mean_mask_path (str): Path to the dualreg subject mean mask
311
        scaling_factors (str): Path to the scaling factor file
Andrei Roibu's avatar
Andrei Roibu committed
312
        regression_factors (str): Path to the linear regression weights file
313
        mean_reduction (bool): Flag indicating if the targets should be de-meaned using the mean_mask_path
314
315
316
317
318
319

    Returns
        predicted_volume (np.array): Array containing the information regarding the generated volume
        header (class): 'nibabel.nifti1.Nifti1Header' class object, containing volume metadata
    """

Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
320
321
    volume, header, xform = data_utils.load_and_preprocess_evaluation(
        file_path)
322

Andrei Roibu's avatar
Andrei Roibu committed
323
324
    volume = _regress_input(volume, subject, dmri_mean_mask_path, regression_factors)

325
326
327
328
    print('volume range:', np.min(volume), np.max(volume))

    volume = _scale_input(volume, scaling_factors)
    
329
    if len(volume.shape) == 5:
330
331
332
333
334
335
        volume = volume
    else:
        volume = volume[np.newaxis, np.newaxis, :, :, :]

    volume = torch.tensor(volume).type(torch.FloatTensor)

336
337
    if cuda_available and (type(device) == int):
        volume = volume.cuda(device)
338

339
    output = model(volume)
340
341
    output = (output.cpu().numpy()).astype('float32')
    output = np.squeeze(output)
342
343
344
    
    print('output range:', np.min(output), np.max(output))

345
346
    output = _rescale_output(output, scaling_factors)

347
348
    print('output rescaled:', np.min(output), np.max(output))

349
    MNI152_T1_2mm_brain_mask = Image(brain_mask_path).data
350

351
    if mean_reduction == True:
Andrei Roibu's avatar
Andrei Roibu committed
352
353
354
        mean_mask = Image(rsfmri_mean_mask_path).data[:, :, :, 0]
        weight = pd.read_pickle(regression_factors).loc[subject]['w_rsfMRI']
        predicted_complete_volume = np.add(output, np.multiply(weight, mean_mask))
355
356
357
358
359
360
361

        print('predicted_complete_volume', np.min(predicted_complete_volume), np.max(predicted_complete_volume))

        predicted_complete_volume = np.multiply(predicted_complete_volume, MNI152_T1_2mm_brain_mask)

        print('predicted_complete_volume masked:', np.min(predicted_complete_volume), np.max(predicted_complete_volume))

362
363
    else:
        predicted_complete_volume = None
364

365
    predicted_volume = np.multiply(output, MNI152_T1_2mm_brain_mask)
366

367
368
    print('predicted_volume masked:', np.min(predicted_volume), np.max(predicted_volume))

369
    return predicted_complete_volume, predicted_volume, header, xform
370
371


372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def _scale_input(volume, scaling_factors):
    """Input Scaling

    This function reads the scaling factors from the saved file and then scales the data.

    Args:
        volume (np.array): Unscalled volume
        scaling_factors (str): Path to the scaling factor file

    Returns:
        scaled_volume (np.array): Scaled volume
    """

    with open(scaling_factors, 'rb') as input_file:
        min_value, max_value, _, _ = pickle.load(input_file)

388
389
390
391
392
393
394
    # Steve Scaling
        min_value, max_value, _, _ = [0.0, 0.2, 0.0, 10.0]

    # Eliminating outliers
    volume[volume > max_value] = max_value
    volume[volume < min_value] = min_value

395
396
397
    # Normalization to [0, 1]
    # scaled_volume = np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value))
    # Scaling between [-1, 1]
398
    scaled_volume = np.add(-1.0, np.multiply(2.0, np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value))))
399
400
401
402

    return scaled_volume


Andrei Roibu's avatar
Andrei Roibu committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
def _regress_input(volume, subject, dmri_mean_mask_path, regression_factors):
    """ Inputn Regression

    This function regresse the group mean from the input volume using the saved regression weights.

    TODO: This function repressents only a temporary solution. For deployment, a NN needs to be trained which predicts the relevant scaling factors.

    Args:
        volume (np.array): Unregressed volume
        subject (str): Subject ID of the subject volume to be regressed
        dmri_mean_mask_path (str): Path to the group mean volume
        regression_factors (str): Path to the linear regression weights file

    Returns:
        regressed_volume (np.array): Linear regressed volume

    """

    weight = pd.read_pickle(regression_factors).loc[subject]['w_dMRI']
    group_mean = Image(dmri_mean_mask_path).data
    regressed_volume = np.subtract(volume, np.multiply(weight, group_mean))

    return regressed_volume

427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def _rescale_output(volume, scaling_factors):
    """Output Rescaling

    This function reads the scaling factors from the saved file and then scales the data.

    Args:
        volume (np.array): Unscalled volume
        scaling_factors (str): Path to the scaling factor file

    Returns:
        rescaled_volume (np.array): Rescaled volume
    """

    with open(scaling_factors, 'rb') as input_file:
        _, _, min_value, max_value = pickle.load(input_file)

443
444
445
    # Steve Scaling
        _, _, min_value, max_value = [0.0, 0.2, 0.0, 10.0]

446
447
448
    # Normalization to [0, 1]
    # rescaled_volume = np.add(np.multiply(volume, np.subtract(max_value, min_value)), min_value)
    # Scaling between [-1, 1]
449
    rescaled_volume = np.add(np.multiply(np.divide(np.add(volume, 1), 2), np.subtract(max_value, min_value)), min_value)
450
451
452
453

    return rescaled_volume


454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
def _pearson_correlation(volume, target):
    """Calculate Pearson Correlation Coefficient

    This function calculates the pearson correlation coefficient between a predicted volume and the target volume

    Args:
        volume (np.array): The predicted volume
        target (np.array): The target volume

    Returns:
        r (np.float32): The Pearson Correlation Coefficient
    """

    r =  np.sum(np.multiply(np.subtract(volume, volume.mean()), np.subtract(target, target.mean()))) / np.sqrt(np.multiply(np.sum(np.power(np.subtract(volume, volume.mean()),2)), np.sum(np.power(np.subtract(target, target.mean()),2))))
    
    return r


def _generate_correlation_matrix(correlation_matrix, title, prediction_output_path, normalize=False):
    """Visual correlation matrix generator

    This function generates a visual representation of the correlation matrix

    Args:
        correaltion_matrix (np.array): Array containing the correlation matrix
        title (str): Title of the correlation matrix figure (also the experiment name)
        normalize (bool): Flag indicating if the values of the correlation matrix should be normalized
        prediction_output_path (str): Output prediction path
    """

484
485
    # THIS FUNCTION NEEDS TO BE VERIFIED!

486
487
488
489
490
491
    if normalize:
        correlation_matrix = correlation_matrix.astype('float') / correlation_matrix.sum(axis=1)[:, np.newaxis]

    plt.imshow(correlation_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
492
493
494
    tick_marks = np.arange(len(correlation_matrix))
    plt.xticks(tick_marks, correlation_matrix)
    plt.yticks(tick_marks, correlation_matrix)
495
    threshold = correlation_matrix.max() / 2.0
496
    for i, j in itertools.product(range(correlation_matrix.shape[0]), range(correlation_matrix.shape[1])):
497
498
499
500
501
502
503
        plt.text(j, i, format(correlation_matrix[i,j], '.4f'), horizontalalignment='center', color="white" if correlation_matrix[i,j] > threshold else "black")

    plt.tight_layout()
    plt.ylabel('Predicted')
    plt.xlabel('Targets')

    plt.savefig(prediction_output_path + '/' + title+ '.png')