"""Data Evaluation Functions Description: This folder contains several functions which, either on their own or included in larger pieces of software, perform data evaluation tasks. Usage: To use content from this folder, import the functions and instantiate them as you wish to use them: from utils.data_evaluation_utils import function_name TODO: Might be worth adding some information on uncertaintiy estimation, later down the line """ import os import pickle import numpy as np import torch import logging import utils.data_utils as data_utils import matplotlib.pyplot as plt import pandas as pd from fsl.data.image import Image import itertools log = logging.getLogger(__name__) def evaluate_correlation(trained_model_path, data_directory, mapping_data_file, target_data_file, data_list, prediction_output_path, brain_mask_path, rsfmri_mean_mask_path, dmri_mean_mask_path, mean_regression, scaling_factors, regression_factors, device=0, mode='evaluate', exit_on_error=False): """Model Evaluator This function generates correlation map between the output and target rsfMRI maps Args: trained_model_path (str): Path to the location of the trained model data_directory (str): Path to input data directory mapping_data_file (str): Path to the input file target_data_file (str): Path to the target files data_list (str): Path to a .txt file containing the input files for consideration prediction_output_path (str): Output prediction path brain_mask_path (str): Path to the MNI brain mask file rsfmri_mean_mask_path (str): Path to the dualreg subject mean mask dmri_mean_mask_path (str): Path to the dualreg subject mean mask mean_regression (bool): Flag indicating if the targets should be de-meaned using the mean_mask_path scaling_factors (str): Path to the scaling factor file regression_factors (str): Path to the linear regression weights file device (str/int): Device type used for training (int - GPU id, str- CPU) mode (str): Current run mode or phase exit_on_error (bool): Flag that triggers the raising of an exception Raises: FileNotFoundError: Error in reading the provided file! Exception: Error code execution! """ log.info( "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)") with open(data_list) as data_list_file: volumes_to_be_used = data_list_file.read().splitlines() # Test if cuda is available and attempt to run on GPU cuda_available = torch.cuda.is_available() if type(device) == int: if cuda_available: model = torch.load(trained_model_path) torch.cuda.empty_cache() model.cuda(device) else: log.warning( "CUDA not available. Switching to CPU. Investigate behaviour!") device = 'cpu' if (type(device) == str) or not cuda_available: model = torch.load(trained_model_path, map_location=torch.device(device)) model.eval() # Create the prediction path folder if this is not available data_utils.create_folder(prediction_output_path) # Initiate the evaluation log.info("rsfMRI Correlation Started") file_paths = data_utils.load_file_paths( data_directory, data_list, mapping_data_file) target_paths = data_utils.load_file_paths( data_directory, data_list, target_data_file) correlation_matrix_complete = np.empty(len(file_paths), len(target_paths)) correlation_matrix_demeaned = np.empty(len(file_paths), len(target_paths)) with torch.no_grad(): for volume_index, file_path in enumerate(file_paths): try: print("Correlating Volume {}/{}".format(volume_index+1, len(file_paths))) subject = volumes_to_be_used[volume_index] predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map( file_path, subject, model, device, cuda_available, brain_mask_path, dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors, regression_factors, mean_regression) for target_index, target_path in enumerate(target_paths): target, target_demeaned = data_utils.load_and_preprocess_targets( target_path, rsfmri_mean_mask_path) r_predicted_complete = _pearson_correlation( predicted_complete_volume, target) r_predicted_demeaned = _pearson_correlation( predicted_volume, target_demeaned) correlation_matrix_complete[volume_index, target_index] = r_predicted_complete correlation_matrix_demeaned[volume_index, target_index] = r_predicted_demeaned log.info("Correlated: " + volumes_to_be_used[volume_index] + " " + str( volume_index + 1) + " out of " + str(len(volumes_to_be_used))) except FileNotFoundError as exception_expression: log.error("Error in reading the provided file!") log.exception(exception_expression) if exit_on_error: raise(exception_expression) except Exception as exception_expression: log.error("Error code execution!") log.exception(exception_expression) if exit_on_error: raise(exception_expression) log.info("rsfMRI Correlation Complete") # HERE WE NEED TO CALL THE PRINT FUNCTION TWICE _generate_correlation_matrix(correlation_matrix_complete, title='Complete Volume Correlation', prediction_output_path=prediction_output_path, normalize=False) _generate_correlation_matrix(correlation_matrix_demeaned, title='Particularity Volume Correlation', prediction_output_path=prediction_output_path, normalize=False) def evaluate_mapping(trained_model_path, data_directory, mapping_data_file, data_list, prediction_output_path, brain_mask_path, dmri_mean_mask_path, rsfmri_mean_mask_path, mean_regression, mean_subtraction, scaling_factors, regression_factors, device=0, mode='evaluate', exit_on_error=False): """Model Evaluator This function generates the rsfMRI map for an input running on on a single axis or path Args: trained_model_path (str): Path to the location of the trained model data_directory (str): Path to input data directory mapping_data_file (str): Path to the input file data_list (str): Path to a .txt file containing the input files for consideration prediction_output_path (str): Output prediction path brain_mask_path (str): Path to the MNI brain mask file dmri_mean_mask_path (str): Path to the dualreg subject mean mask rsfmri_mean_mask_path (str): Path to the summed tract mean mask mean_regression (bool): Flag indicating if the targets should be de-meaned by regression using the mean_mask_path mean_subtraction (bool): Flag indicating if the targets should be de-meaned by subtraction using the mean_mask_path scaling_factors (str): Path to the scaling factor file regression_factors (str): Path to the linear regression weights file device (str/int): Device type used for training (int - GPU id, str- CPU) mode (str): Current run mode or phase exit_on_error (bool): Flag that triggers the raising of an exception Raises: FileNotFoundError: Error in reading the provided file! Exception: Error code execution! """ log.info( "Started Evaluation. Check tensorboard for plots (if a LogWriter is provided)") with open(data_list) as data_list_file: volumes_to_be_used = data_list_file.read().splitlines() # Test if cuda is available and attempt to run on GPU cuda_available = torch.cuda.is_available() if type(device) == int: if cuda_available: model = torch.load(trained_model_path) torch.cuda.empty_cache() model.cuda(device) else: log.warning( "CUDA not available. Switching to CPU. Investigate behaviour!") device = 'cpu' if (type(device) == str) or not cuda_available: model = torch.load(trained_model_path, map_location=torch.device(device)) model.eval() # Create the prediction path folder if this is not available data_utils.create_folder(prediction_output_path) # Initiate the evaluation log.info("rsfMRI Generation Started") file_paths, volumes_to_be_used = data_utils.load_file_paths( data_directory, data_list, mapping_data_file) with torch.no_grad(): for volume_index, file_path in enumerate(file_paths): try: print("Mapping Volume {}/{}".format(volume_index+1, len(file_paths))) # Generate volume & header subject = volumes_to_be_used[volume_index] predicted_complete_volume, predicted_volume, header, xform = _generate_volume_map( file_path, subject, model, device, cuda_available, brain_mask_path, dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors, regression_factors, mean_regression, mean_subtraction) # Generate New Header Affine header_affines = np.array( [header['srow_x'], header['srow_y'], header['srow_z'], [0, 0, 0, 1]]) output_nifti_image = Image( predicted_volume, header=header, xform=xform) output_nifti_path = os.path.join( prediction_output_path, volumes_to_be_used[volume_index]) if '.nii' not in output_nifti_path: output_nifti_path += '.nii.gz' output_nifti_image.save(output_nifti_path) if mean_regression == True: output_complete_nifti_image = Image( predicted_complete_volume, header=header, xform=xform) output_complete_nifti_path = os.path.join( prediction_output_path, volumes_to_be_used[volume_index]) + '_complete' if '.nii' not in output_complete_nifti_path: output_complete_nifti_path += '.nii.gz' output_complete_nifti_image.save( output_complete_nifti_path) log.info("Processed: " + volumes_to_be_used[volume_index] + " " + str( volume_index + 1) + " out of " + str(len(volumes_to_be_used))) print("Mapped Volumes saved in: ", prediction_output_path) except FileNotFoundError as exception_expression: log.error("Error in reading the provided file!") log.exception(exception_expression) if exit_on_error: raise(exception_expression) except Exception as exception_expression: log.error("Error code execution!") log.exception(exception_expression) if exit_on_error: raise(exception_expression) log.info("rsfMRI Generation Complete") def _generate_volume_map(file_path, subject, model, device, cuda_available, brain_mask_path, dmri_mean_mask_path, rsfmri_mean_mask_path, scaling_factors, regression_factors, mean_regression=False, mean_subtraction=False): """rsfMRI Volume Generator This function uses the trained model to generate a new volume Args: file_path (str): Path to the desired file subject (str): Subject ID of the subject volume to be regressed model (class): BrainMapper model class device (str/int): Device type used for training (int - GPU id, str- CPU) cuda_available (bool): Flag indicating if a cuda-enabled GPU is present brain_mask_path (str): Path to the MNI brain mask file dmri_mean_mask_path (str): Path to the group mean volume rsfmri_mean_mask_path (str): Path to the dualreg subject mean mask scaling_factors (str): Path to the scaling factor file regression_factors (str): Path to the linear regression weights file mean_regression (bool): Flag indicating if the targets should be de-meaned by regression using the mean_mask_path mean_subtraction (bool): Flag indicating if the targets should be de-meaned by subtraction using the mean_mask_path Returns predicted_volume (np.array): Array containing the information regarding the generated volume header (class): 'nibabel.nifti1.Nifti1Header' class object, containing volume metadata """ volume, header, xform = data_utils.load_and_preprocess_evaluation(file_path) if mean_regression == True: volume = _regress_input(volume, subject, dmri_mean_mask_path, regression_factors) print('volume range:', np.min(volume), np.max(volume)) volume = _scale_input(volume, scaling_factors) if len(volume.shape) == 5: volume = volume else: volume = volume[np.newaxis, np.newaxis, :, :, :] volume = torch.tensor(volume).type(torch.FloatTensor) if cuda_available and (type(device) == int): volume = volume.cuda(device) output = model(volume) output = (output.cpu().numpy()).astype('float32') output = np.squeeze(output) print('output range:', np.min(output), np.max(output)) output = _rescale_output(output, scaling_factors) print('output rescaled:', np.min(output), np.max(output)) MNI152_T1_2mm_brain_mask = Image(brain_mask_path).data if mean_regression == True or mean_subtraction == True: mean_mask = Image(rsfmri_mean_mask_path).data[:, :, :, 0] if mean_regression == True: weight = pd.read_pickle(regression_factors).loc[subject]['w_rsfMRI'] predicted_complete_volume = np.add(output, np.multiply(weight, mean_mask)) if mean_subtraction == True: predicted_complete_volume = np.add(output, mean_mask) print('predicted_complete_volume', np.min( predicted_complete_volume), np.max(predicted_complete_volume)) predicted_complete_volume = np.multiply( predicted_complete_volume, MNI152_T1_2mm_brain_mask) print('predicted_complete_volume masked:', np.min( predicted_complete_volume), np.max(predicted_complete_volume)) else: predicted_complete_volume = None predicted_volume = np.multiply(output, MNI152_T1_2mm_brain_mask) print('predicted_volume masked:', np.min( predicted_volume), np.max(predicted_volume)) return predicted_complete_volume, predicted_volume, header, xform def _scale_input(volume, scaling_factors): """Input Scaling This function reads the scaling factors from the saved file and then scales the data. Args: volume (np.array): Unscalled volume scaling_factors (str): Path to the scaling factor file Returns: scaled_volume (np.array): Scaled volume """ # with open(scaling_factors, 'rb') as input_file: # min_value, max_value, _, _ = pickle.load(input_file) # Steve Scaling min_value, max_value, _, _ = [0.0, 0.2, 0.0, 10.0] # Andrei Scaling # min_value, max_value, _, _ = [-0.0539, 0.0969, -12.094, 14.6319] # Eliminating outliers volume[volume > max_value] = max_value volume[volume < min_value] = min_value # Normalization to [0, 1] scaled_volume = np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value)) # Scaling between [-1, 1] # scaled_volume = np.add(-1.0, np.multiply(2.0, np.divide(np.subtract(volume, min_value), np.subtract(max_value, min_value)))) return scaled_volume def _regress_input(volume, subject, dmri_mean_mask_path, regression_factors): """ Inputn Regression This function regresse the group mean from the input volume using the saved regression weights. TODO: This function repressents only a temporary solution. For deployment, a NN needs to be trained which predicts the relevant scaling factors. Args: volume (np.array): Unregressed volume subject (str): Subject ID of the subject volume to be regressed dmri_mean_mask_path (str): Path to the group mean volume regression_factors (str): Path to the linear regression weights file Returns: regressed_volume (np.array): Linear regressed volume """ weight = pd.read_pickle(regression_factors).loc[subject]['w_dMRI'] group_mean = Image(dmri_mean_mask_path).data regressed_volume = np.subtract(volume, np.multiply(weight, group_mean)) return regressed_volume def _rescale_output(volume, scaling_factors): """Output Rescaling This function reads the scaling factors from the saved file and then scales the data. Args: volume (np.array): Unscalled volume scaling_factors (str): Path to the scaling factor file Returns: rescaled_volume (np.array): Rescaled volume """ # with open(scaling_factors, 'rb') as input_file: # _, _, min_value, max_value = pickle.load(input_file) # Steve Scaling _, _, min_value, max_value = [0.0, 0.2, 0.0, 10.0] # Andrei Scaling # _, _, min_value, max_value = [-0.0539, 0.0969, -12.094, 14.6319] # Normalization to [0, 1] rescaled_volume = np.add(np.multiply(volume, np.subtract(max_value, min_value)), min_value) # Scaling between [-1, 1] # rescaled_volume = np.add(np.multiply(np.divide(np.add(volume, 1), 2), np.subtract(max_value, min_value)), min_value) return rescaled_volume def _pearson_correlation(volume, target): """Calculate Pearson Correlation Coefficient This function calculates the pearson correlation coefficient between a predicted volume and the target volume Args: volume (np.array): The predicted volume target (np.array): The target volume Returns: r (np.float32): The Pearson Correlation Coefficient """ r = np.sum(np.multiply(np.subtract(volume, volume.mean()), np.subtract(target, target.mean()))) / np.sqrt(np.multiply( np.sum(np.power(np.subtract(volume, volume.mean()), 2)), np.sum(np.power(np.subtract(target, target.mean()), 2)))) return r def _generate_correlation_matrix(correlation_matrix, title, prediction_output_path, normalize=False): """Visual correlation matrix generator This function generates a visual representation of the correlation matrix Args: correaltion_matrix (np.array): Array containing the correlation matrix title (str): Title of the correlation matrix figure (also the experiment name) normalize (bool): Flag indicating if the values of the correlation matrix should be normalized prediction_output_path (str): Output prediction path """ # THIS FUNCTION NEEDS TO BE VERIFIED! if normalize: correlation_matrix = correlation_matrix.astype( 'float') / correlation_matrix.sum(axis=1)[:, np.newaxis] plt.imshow(correlation_matrix, interpolation='nearest', cmap=plt.cm.Blues) plt.title(title) plt.colorbar() tick_marks = np.arange(len(correlation_matrix)) plt.xticks(tick_marks, correlation_matrix) plt.yticks(tick_marks, correlation_matrix) threshold = correlation_matrix.max() / 2.0 for i, j in itertools.product(range(correlation_matrix.shape[0]), range(correlation_matrix.shape[1])): plt.text(j, i, format(correlation_matrix[i, j], '.4f'), horizontalalignment='center', color="white" if correlation_matrix[i, j] > threshold else "black") plt.tight_layout() plt.ylabel('Predicted') plt.xlabel('Targets') plt.savefig(prediction_output_path + '/' + title + '.png')