data_evaluation_utils.py 20.7 KB
Newer Older
1
2
3
4
"""Data Evaluation Functions

Description:

5
    This folder contains several functions which, either on their own or included in larger pieces of software, perform data evaluation tasks.
6

7
8
9
10
11
Usage:

    To use content from this folder, import the functions and instantiate them as you wish to use them:

        from utils.data_evaluation_utils import function_name
12

13
14
TODO: Might be worth adding some information on uncertaintiy estimation, later down the line

15
16
17
"""

import os
18
import pickle
19
20
import numpy as np
import torch
21
22
import logging
import utils.data_utils as data_utils
23
import matplotlib.pyplot as plt
Andrei Roibu's avatar
Andrei Roibu committed
24
import pandas as pd
25
from fsl.data.image import Image
Andrei Roibu's avatar
Andrei Roibu committed
26
import itertools
27
28

log = logging.getLogger(__name__)
29

Andrei Roibu's avatar
Andrei Roibu committed
30

31
def evaluate_correlation(trained_model_path,
Andrei Roibu's avatar
Andrei Roibu committed
32
33
34
35
36
37
38
39
                         data_directory,
                         mapping_data_file,
                         target_data_file,
                         data_list,
                         prediction_output_path,
                         brain_mask_path,
                         rsfmri_mean_mask_path,
                         dmri_mean_mask_path,
40
                         mean_regression,
Andrei Roibu's avatar
Andrei Roibu committed
41
42
43
44
45
                         scaling_factors,
                         regression_factors,
                         device=0,
                         mode='evaluate',
                         exit_on_error=False):
46
    """Model Evaluator
47

48
    This function generates correlation map between the output and target rsfMRI maps
49
50
51
52

    Args:
        trained_model_path (str): Path to the location of the trained model
        data_directory (str): Path to input data directory
53
54
        mapping_data_file (str): Path to the input file
        target_data_file (str): Path to the target files
55
56
        data_list (str): Path to a .txt file containing the input files for consideration
        prediction_output_path (str): Output prediction path
57
        brain_mask_path (str): Path to the MNI brain mask file
Andrei Roibu's avatar
Andrei Roibu committed
58
59
        rsfmri_mean_mask_path (str): Path to the dualreg subject mean mask
        dmri_mean_mask_path (str): Path to the dualreg subject mean mask
60
        mean_regression (bool): Flag indicating if the targets should be de-meaned using the mean_mask_path
61
        scaling_factors (str): Path to the scaling factor file
Andrei Roibu's avatar
Andrei Roibu committed
62
        regression_factors (str): Path to the linear regression weights file
63
64
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        mode (str): Current run mode or phase
65
        exit_on_error (bool): Flag that triggers the raising of an exception
66

67
68
69
    Raises:
        FileNotFoundError: Error in reading the provided file!
        Exception: Error code execution!
70
71
    """

72
73
    log.info(
        "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)")
74
75
76
77
78
79
80
81
82
83
84
85
86

    with open(data_list) as data_list_file:
        volumes_to_be_used = data_list_file.read().splitlines()

    # Test if cuda is available and attempt to run on GPU

    cuda_available = torch.cuda.is_available()
    if type(device) == int:
        if cuda_available:
            model = torch.load(trained_model_path)
            torch.cuda.empty_cache()
            model.cuda(device)
        else:
87
88
            log.warning(
                "CUDA not available. Switching to CPU. Investigate behaviour!")
89
90
91
            device = 'cpu'

    if (type(device) == str) or not cuda_available:
92
93
        model = torch.load(trained_model_path,
                           map_location=torch.device(device))
94
95
96
97
98
99
100
101
102

    model.eval()

    # Create the prediction path folder if this is not available

    data_utils.create_folder(prediction_output_path)

    # Initiate the evaluation

103
    log.info("rsfMRI Correlation Started")
104
    file_paths = data_utils.load_file_paths(
105
        data_directory, data_list, mapping_data_file)
106

107
108
    target_paths = data_utils.load_file_paths(
        data_directory, data_list, target_data_file)
109

110
111
    correlation_matrix_complete = np.empty(len(file_paths), len(target_paths))
    correlation_matrix_demeaned = np.empty(len(file_paths), len(target_paths))
112

113
    with torch.no_grad():
114

115
116
117
        for volume_index, file_path in enumerate(file_paths):
            try:
                print("Correlating Volume {}/{}".format(volume_index+1, len(file_paths)))
118

Andrei Roibu's avatar
Andrei Roibu committed
119
120
                subject = volumes_to_be_used[volume_index]

121
                predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map(
Andrei Roibu's avatar
Andrei Roibu committed
122
123
124
                    file_path, subject, model,
                    device, cuda_available, brain_mask_path,
                    dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors,
125
                    regression_factors, mean_regression)
126

127
                for target_index, target_path in enumerate(target_paths):
Andrei Roibu's avatar
Andrei Roibu committed
128
129
                    target, target_demeaned = data_utils.load_and_preprocess_targets(
                        target_path, rsfmri_mean_mask_path)
130

Andrei Roibu's avatar
Andrei Roibu committed
131
132
133
134
                    r_predicted_complete = _pearson_correlation(
                        predicted_complete_volume, target)
                    r_predicted_demeaned = _pearson_correlation(
                        predicted_volume, target_demeaned)
135

Andrei Roibu's avatar
Andrei Roibu committed
136
137
138
139
                    correlation_matrix_complete[volume_index,
                                                target_index] = r_predicted_complete
                    correlation_matrix_demeaned[volume_index,
                                                target_index] = r_predicted_demeaned
140

141
142
                log.info("Correlated: " + volumes_to_be_used[volume_index] + " " + str(
                    volume_index + 1) + " out of " + str(len(volumes_to_be_used)))
143

144
145
146
147
148
            except FileNotFoundError as exception_expression:
                log.error("Error in reading the provided file!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)
149

150
151
152
153
154
            except Exception as exception_expression:
                log.error("Error code execution!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)
155

156
    log.info("rsfMRI Correlation Complete")
157

158
    # HERE WE NEED TO CALL THE PRINT FUNCTION TWICE
159

Andrei Roibu's avatar
Andrei Roibu committed
160
161
162
163
    _generate_correlation_matrix(correlation_matrix_complete, title='Complete Volume Correlation',
                                 prediction_output_path=prediction_output_path, normalize=False)
    _generate_correlation_matrix(correlation_matrix_demeaned, title='Particularity Volume Correlation',
                                 prediction_output_path=prediction_output_path, normalize=False)
164

165

166
def evaluate_mapping(trained_model_path,
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
167
168
169
170
                     data_directory,
                     mapping_data_file,
                     data_list,
                     prediction_output_path,
171
                     brain_mask_path,
Andrei Roibu's avatar
Andrei Roibu committed
172
173
                     dmri_mean_mask_path,
                     rsfmri_mean_mask_path,
174
175
                     mean_regression,
                     mean_subtraction,
176
                     scaling_factors,
Andrei Roibu's avatar
Andrei Roibu committed
177
                     regression_factors,
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
178
179
180
                     device=0,
                     mode='evaluate',
                     exit_on_error=False):
181
182
183
184
185
186
187
188
189
190
    """Model Evaluator

    This function generates the rsfMRI map for an input running on on a single axis or path

    Args:
        trained_model_path (str): Path to the location of the trained model
        data_directory (str): Path to input data directory
        mapping_data_file (str): Path to the input file
        data_list (str): Path to a .txt file containing the input files for consideration
        prediction_output_path (str): Output prediction path
191
        brain_mask_path (str): Path to the MNI brain mask file
Andrei Roibu's avatar
Andrei Roibu committed
192
193
        dmri_mean_mask_path (str): Path to the dualreg subject mean mask
        rsfmri_mean_mask_path (str): Path to the summed tract mean mask
194
195
        mean_regression (bool): Flag indicating if the targets should be de-meaned by regression using the mean_mask_path
        mean_subtraction (bool): Flag indicating if the targets should be de-meaned by subtraction using the mean_mask_path
196
        scaling_factors (str): Path to the scaling factor file
Andrei Roibu's avatar
Andrei Roibu committed
197
        regression_factors (str): Path to the linear regression weights file
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        mode (str): Current run mode or phase
        exit_on_error (bool): Flag that triggers the raising of an exception

    Raises:
        FileNotFoundError: Error in reading the provided file!
        Exception: Error code execution!
    """

    log.info(
        "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)")

    with open(data_list) as data_list_file:
        volumes_to_be_used = data_list_file.read().splitlines()

    # Test if cuda is available and attempt to run on GPU

    cuda_available = torch.cuda.is_available()
    if type(device) == int:
        if cuda_available:
            model = torch.load(trained_model_path)
            torch.cuda.empty_cache()
            model.cuda(device)
        else:
            log.warning(
                "CUDA not available. Switching to CPU. Investigate behaviour!")
            device = 'cpu'

    if (type(device) == str) or not cuda_available:
        model = torch.load(trained_model_path,
                           map_location=torch.device(device))

    model.eval()

    # Create the prediction path folder if this is not available

    data_utils.create_folder(prediction_output_path)

    # Initiate the evaluation

    log.info("rsfMRI Generation Started")
Andrei Roibu's avatar
Andrei Roibu committed
239
    file_paths, volumes_to_be_used = data_utils.load_file_paths(
Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
240
        data_directory, data_list, mapping_data_file)
241
242
243
244
245

    with torch.no_grad():

        for volume_index, file_path in enumerate(file_paths):
            try:
246
                print("Mapping Volume {}/{}".format(volume_index+1, len(file_paths)))
247
                # Generate volume & header
Andrei Roibu's avatar
Andrei Roibu committed
248
249
250

                subject = volumes_to_be_used[volume_index]

251
                predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map(
Andrei Roibu's avatar
Andrei Roibu committed
252
253
254
                    file_path, subject, model,
                    device, cuda_available, brain_mask_path,
                    dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors,
255
                    regression_factors, mean_regression, mean_subtraction)
256
257
258
259
260
261

                # Generate New Header Affine

                header_affines = np.array(
                    [header['srow_x'], header['srow_y'], header['srow_z'], [0, 0, 0, 1]])

Andrei-Claudiu Roibu's avatar
Andrei-Claudiu Roibu committed
262
263
                output_nifti_image = Image(
                    predicted_volume, header=header, xform=xform)
264
265
266
267
268
269
270
271
272

                output_nifti_path = os.path.join(
                    prediction_output_path, volumes_to_be_used[volume_index])

                if '.nii' not in output_nifti_path:
                    output_nifti_path += '.nii.gz'

                output_nifti_image.save(output_nifti_path)

273
                if mean_regression == True:
274
275
276
                    output_complete_nifti_image = Image(
                        predicted_complete_volume, header=header, xform=xform)

277
278
                    output_complete_nifti_path = os.path.join(
                        prediction_output_path, volumes_to_be_used[volume_index]) + '_complete'
279
280
281
282

                    if '.nii' not in output_complete_nifti_path:
                        output_complete_nifti_path += '.nii.gz'

283
284
                    output_complete_nifti_image.save(
                        output_complete_nifti_path)
285

286
287
288
                log.info("Processed: " + volumes_to_be_used[volume_index] + " " + str(
                    volume_index + 1) + " out of " + str(len(volumes_to_be_used)))

289
290
                print("Mapped Volumes saved in: ", prediction_output_path)

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
            except FileNotFoundError as exception_expression:
                log.error("Error in reading the provided file!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)

            except Exception as exception_expression:
                log.error("Error code execution!")
                log.exception(exception_expression)
                if exit_on_error:
                    raise(exception_expression)

    log.info("rsfMRI Generation Complete")


Andrei Roibu's avatar
Andrei Roibu committed
306
307
308
309
310
311
312
313
314
315
def _generate_volume_map(file_path,
                         subject,
                         model,
                         device,
                         cuda_available,
                         brain_mask_path,
                         dmri_mean_mask_path,
                         rsfmri_mean_mask_path,
                         scaling_factors,
                         regression_factors,
316
317
                         mean_regression=False,
                         mean_subtraction=False):
318
319
320
321
322
323
    """rsfMRI Volume Generator

    This function uses the trained model to generate a new volume

    Args:
        file_path (str): Path to the desired file
Andrei Roibu's avatar
Andrei Roibu committed
324
        subject (str): Subject ID of the subject volume to be regressed
325
326
327
        model (class): BrainMapper model class
        device (str/int): Device type used for training (int - GPU id, str- CPU)
        cuda_available (bool): Flag indicating if a cuda-enabled GPU is present
328
        brain_mask_path (str): Path to the MNI brain mask file
Andrei Roibu's avatar
Andrei Roibu committed
329
330
        dmri_mean_mask_path (str): Path to the group mean volume
        rsfmri_mean_mask_path (str): Path to the dualreg subject mean mask
331
        scaling_factors (str): Path to the scaling factor file
Andrei Roibu's avatar
Andrei Roibu committed
332
        regression_factors (str): Path to the linear regression weights file
333
334
        mean_regression (bool): Flag indicating if the targets should be de-meaned by regression using the mean_mask_path
        mean_subtraction (bool): Flag indicating if the targets should be de-meaned by subtraction using the mean_mask_path
335
336
337
338
339
340

    Returns
        predicted_volume (np.array): Array containing the information regarding the generated volume
        header (class): 'nibabel.nifti1.Nifti1Header' class object, containing volume metadata
    """

341
    volume, header, xform = data_utils.load_and_preprocess_evaluation(file_path)
342

343
344
    if mean_regression == True:
        volume = _regress_input(volume, subject, dmri_mean_mask_path, regression_factors)
Andrei Roibu's avatar
Andrei Roibu committed
345

346
347
348
    print('volume range:', np.min(volume), np.max(volume))

    volume = _scale_input(volume, scaling_factors)
Andrei Roibu's avatar
Andrei Roibu committed
349

350
    if len(volume.shape) == 5:
351
352
353
354
355
356
        volume = volume
    else:
        volume = volume[np.newaxis, np.newaxis, :, :, :]

    volume = torch.tensor(volume).type(torch.FloatTensor)

357
358
    if cuda_available and (type(device) == int):
        volume = volume.cuda(device)
359

360
    output = model(volume)
361
362
    output = (output.cpu().numpy()).astype('float32')
    output = np.squeeze(output)
Andrei Roibu's avatar
Andrei Roibu committed
363

364
365
    print('output range:', np.min(output), np.max(output))

366
367
    output = _rescale_output(output, scaling_factors)

368
369
    print('output rescaled:', np.min(output), np.max(output))

370
    MNI152_T1_2mm_brain_mask = Image(brain_mask_path).data
371

372
    if mean_regression == True or mean_subtraction == True:
Andrei Roibu's avatar
Andrei Roibu committed
373
        mean_mask = Image(rsfmri_mean_mask_path).data[:, :, :, 0]
374
375
376
377
378
        if mean_regression == True:
            weight = pd.read_pickle(regression_factors).loc[subject]['w_rsfMRI']
            predicted_complete_volume = np.add(output, np.multiply(weight, mean_mask))
        if mean_subtraction == True:
            predicted_complete_volume = np.add(output, mean_mask)
379

Andrei Roibu's avatar
Andrei Roibu committed
380
381
        print('predicted_complete_volume', np.min(
            predicted_complete_volume), np.max(predicted_complete_volume))
382

Andrei Roibu's avatar
Andrei Roibu committed
383
384
        predicted_complete_volume = np.multiply(
            predicted_complete_volume, MNI152_T1_2mm_brain_mask)
385

Andrei Roibu's avatar
Andrei Roibu committed
386
387
        print('predicted_complete_volume masked:', np.min(
            predicted_complete_volume), np.max(predicted_complete_volume))
388

389
390
    else:
        predicted_complete_volume = None
391

392
    predicted_volume = np.multiply(output, MNI152_T1_2mm_brain_mask)
393

Andrei Roibu's avatar
Andrei Roibu committed
394
395
    print('predicted_volume masked:', np.min(
        predicted_volume), np.max(predicted_volume))
396

397
    return predicted_complete_volume, predicted_volume, header, xform
398
399


400
401
402
403
404
405
406
407
408
409
410
411
412
def _scale_input(volume, scaling_factors):
    """Input Scaling

    This function reads the scaling factors from the saved file and then scales the data.

    Args:
        volume (np.array): Unscalled volume
        scaling_factors (str): Path to the scaling factor file

    Returns:
        scaled_volume (np.array): Scaled volume
    """

413
414
    # with open(scaling_factors, 'rb') as input_file:
    #     min_value, max_value, _, _ = pickle.load(input_file)
415

416
    # Steve Scaling
417
    min_value, max_value, _, _ = [0.0, 0.2, 0.0, 10.0]
418
419

    # Andrei Scaling
420
    # min_value, max_value, _, _ = [-0.0539, 0.0969, -12.094, 14.6319]
421
422
423
424
425

    # Eliminating outliers
    volume[volume > max_value] = max_value
    volume[volume < min_value] = min_value

426
    # Normalization to [0, 1]
427
    scaled_volume = np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value))
428
    # Scaling between [-1, 1]
429
    # scaled_volume = np.add(-1.0, np.multiply(2.0, np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value))))
430
431
432
433

    return scaled_volume


Andrei Roibu's avatar
Andrei Roibu committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
def _regress_input(volume, subject, dmri_mean_mask_path, regression_factors):
    """ Inputn Regression

    This function regresse the group mean from the input volume using the saved regression weights.

    TODO: This function repressents only a temporary solution. For deployment, a NN needs to be trained which predicts the relevant scaling factors.

    Args:
        volume (np.array): Unregressed volume
        subject (str): Subject ID of the subject volume to be regressed
        dmri_mean_mask_path (str): Path to the group mean volume
        regression_factors (str): Path to the linear regression weights file

    Returns:
        regressed_volume (np.array): Linear regressed volume

    """

    weight = pd.read_pickle(regression_factors).loc[subject]['w_dMRI']
    group_mean = Image(dmri_mean_mask_path).data
    regressed_volume = np.subtract(volume, np.multiply(weight, group_mean))

    return regressed_volume

Andrei Roibu's avatar
Andrei Roibu committed
458

459
460
461
462
463
464
465
466
467
468
469
470
471
def _rescale_output(volume, scaling_factors):
    """Output Rescaling

    This function reads the scaling factors from the saved file and then scales the data.

    Args:
        volume (np.array): Unscalled volume
        scaling_factors (str): Path to the scaling factor file

    Returns:
        rescaled_volume (np.array): Rescaled volume
    """

472
473
    # with open(scaling_factors, 'rb') as input_file:
    #     _, _, min_value, max_value = pickle.load(input_file)
474

475
    # Steve Scaling
476
    _, _, min_value, max_value = [0.0, 0.2, 0.0, 10.0]
477
478

    # Andrei Scaling
479
    # _, _, min_value, max_value = [-0.0539, 0.0969, -12.094, 14.6319]
480

481
    # Normalization to [0, 1]
482
    rescaled_volume = np.add(np.multiply(volume, np.subtract(max_value, min_value)), min_value)
483
    # Scaling between [-1, 1]
484
    # rescaled_volume = np.add(np.multiply(np.divide(np.add(volume, 1), 2), np.subtract(max_value, min_value)), min_value)
485
486
487
488

    return rescaled_volume


489
490
491
492
493
494
495
496
497
498
499
500
501
def _pearson_correlation(volume, target):
    """Calculate Pearson Correlation Coefficient

    This function calculates the pearson correlation coefficient between a predicted volume and the target volume

    Args:
        volume (np.array): The predicted volume
        target (np.array): The target volume

    Returns:
        r (np.float32): The Pearson Correlation Coefficient
    """

Andrei Roibu's avatar
Andrei Roibu committed
502
503
504
    r = np.sum(np.multiply(np.subtract(volume, volume.mean()), np.subtract(target, target.mean()))) / np.sqrt(np.multiply(
        np.sum(np.power(np.subtract(volume, volume.mean()), 2)), np.sum(np.power(np.subtract(target, target.mean()), 2))))

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
    return r


def _generate_correlation_matrix(correlation_matrix, title, prediction_output_path, normalize=False):
    """Visual correlation matrix generator

    This function generates a visual representation of the correlation matrix

    Args:
        correaltion_matrix (np.array): Array containing the correlation matrix
        title (str): Title of the correlation matrix figure (also the experiment name)
        normalize (bool): Flag indicating if the values of the correlation matrix should be normalized
        prediction_output_path (str): Output prediction path
    """

520
521
    # THIS FUNCTION NEEDS TO BE VERIFIED!

522
    if normalize:
Andrei Roibu's avatar
Andrei Roibu committed
523
524
        correlation_matrix = correlation_matrix.astype(
            'float') / correlation_matrix.sum(axis=1)[:, np.newaxis]
525
526
527
528

    plt.imshow(correlation_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
529
530
531
    tick_marks = np.arange(len(correlation_matrix))
    plt.xticks(tick_marks, correlation_matrix)
    plt.yticks(tick_marks, correlation_matrix)
532
    threshold = correlation_matrix.max() / 2.0
533
    for i, j in itertools.product(range(correlation_matrix.shape[0]), range(correlation_matrix.shape[1])):
Andrei Roibu's avatar
Andrei Roibu committed
534
535
        plt.text(j, i, format(correlation_matrix[i, j], '.4f'), horizontalalignment='center',
                 color="white" if correlation_matrix[i, j] > threshold else "black")
536
537
538
539
540

    plt.tight_layout()
    plt.ylabel('Predicted')
    plt.xlabel('Targets')

Andrei Roibu's avatar
Andrei Roibu committed
541
    plt.savefig(prediction_output_path + '/' + title + '.png')