"""Data Processing Functions Description: This file contains the functions required for reading and loading the data into the network and preparing the various data files. Usage: To use content from this folder, import the functions and instantiate them as you wish to use them: from utils.data_utils import function_name """ import os import numpy as np import torch import torch.utils.data as data import h5py from fsl.data.image import Image from fsl.utils.image.resample import resampleToPixdims from fsl.utils.image.roi import roi class DataMapper(data.Dataset): """Data Mapper Class This class represents a generic parent class for mapping between keys and data samples. The class represents a subclass/child class of data.Dataset, inheriting its functionality. This class is composed of a __init__ constructor, a __getitem__(), supporting fetching a data sample for a given key, and __len__(), which returns the size of the dataset. This class also has several other helper functions. Args: X (hdf5 Database): Database containing the input preprocessed volumes. y (hdf5 Database): Database containing the target preprocessed volumed. Returns: X_volume (torch.tensor): Tensor representation of the input data y_volume (torch.tensor): Tensor representation of the output data """ def __init__(self, X, y): self.X = X self.y = y def __getitem__(self, index): X_volume = torch.from_numpy(self.X[index]) y_volume = torch.from_numpy(self.y[index]) return X_volume, y_volume def __len__(self): return len(self.y) def get_datasets(data_parameters): """Data Loader Function. This function loads the various data file and returns the relevand mapped datasets. Args: data_parameters (dict): Dictionary containing relevant information for the datafiles. data_parameters = { data_folder_name = "datasets" input_data_train = "input_data_train.h5" target_data_train = "target_data_train.h5" input_data_validation = "input_data_validation.h5" target_data_validation = "target_data_validation.h5" } Returns: touple: the relevant train and validation datasets """ X_train_data = h5py.File(os.path.join(data_parameters["data_folder_name"], data_parameters["input_data_train"]), 'r') y_train_data = h5py.File(os.path.join(data_parameters["data_folder_name"], data_parameters["target_data_train"]), 'r') X_validation_data = h5py.File(os.path.join(data_parameters["data_folder_name"], data_parameters["input_data_validation"]), 'r') y_validation_data = h5py.File(os.path.join(data_parameters["data_folder_name"], data_parameters["target_data_validation"]), 'r') return ( DataMapper( X_train_data['input'][()], y_train_data['target'][()] ), DataMapper( X_validation_data['input'][()], y_validation_data['target'][()] ) ) def load_file_paths(data_directory, data_list, mapping_data_file, targets_directory=None, target_file=None): """File Loader This function returns a list of combined file paths for the input and output data. Args: data_directory (str): Path to input data directory data_list (str): Path to a .txt file containing the input files for consideration mapping_data_file (str): Path to the input files targets_directory (str): Path to labelled data (Y-equivalent); None if during evaluation. Returns: file_paths (list): List containing the input data and target labelled output data volumes_to_be_used (list): List containing the volumes that will be used Raises: ValueError: "Invalid data entry - check code and data entry format!" """ volumes_to_be_used = load_subjects_from_path(data_directory, data_list) if targets_directory == None or target_file == None: file_paths = [[os.path.join(data_directory, volume, mapping_data_file)] for volume in volumes_to_be_used] else: file_paths = [[os.path.join(data_directory, volume, mapping_data_file), os.path.join( targets_directory, volume)] for volume in volumes_to_be_used] return file_paths, volumes_to_be_used def load_subjects_from_path(data_directory, data_list): """ Text File Reader This function returns a list of combined file paths for the input and output data. Args: data_directory (str): Path to input data directory data_list (str): Path to a .txt file containing the input files for consideration Returns: volumes_to_be_used (list): List containing the volumes that will be used """ if data_list: with open(data_list) as data_list_file: volumes_to_be_used = data_list_file.read().splitlines() else: volumes_to_be_used = [files for files in os.listdir(data_directory)] return volumes_to_be_used def load_and_preprocess_evaluation(file_path, crop_flag): """Load & Preprocessing before evaluation This function loads a nifty file and returns its volume and header information Args: file_path (str): Path to the desired file crop_flag (bool): Flag indicating if the volumes should be cropped from 91x109x91 to 72x90x77 to reduce storage space and speed-up training Returns: volume (np.array): Array of training image data of data type dtype. header (class): 'nibabel.nifti1.Nifti1Header' class object, containing image metadata xform (np.array): Array of shape (4, 4), containing the adjusted voxel-to-world transformation for the spatial dimensions of the resampled data Raises: ValueError: "Orientation value is invalid. It must be either >>coronal<<, >>axial<< or >>sagital<< " """ original_image = Image(file_path[0]) if crop_flag == False: volume, xform = resampleToPixdims(original_image, (2, 2, 2)) header = Image(volume, header=original_image.header, xform=xform).header elif crop_flag == True: resampled, xform = resampleToPixdims(original_image, (2, 2, 2)) resampled = Image(resampled, header=original_image.header, xform=xform) cropped = roi(resampled,((9,81),(10,100),(0,77))) volume = cropped.data header = cropped.header return volume, header, xform def load_and_preprocess_targets(target_path, mean_mask_path): """Load & Preprocessing targets before evaluation This function loads a nifty file and returns its volume, a de-meaned volume and header information Args: file_path (str): Path to the desired target file mean_mask_path (str): Path to the dualreg subject mean mask Returns: target (np.array): Array of training image data of data type dtype. target_demeaned (np.array): Array of training data from which the group mean has been subtracted Raises: ValueError: "Orientation value is invalid. It must be either >>coronal<<, >>axial<< or >>sagital<< " """ target = Image(target_path[0]).data[:, :, :, 0] target_demeaned = np.subtract( target, Image(mean_mask_path).data[:, :, :, 0]) return target, target_demeaned