Commit 2b63a9dc authored by Andrei-Claudiu Roibu's avatar Andrei-Claudiu Roibu 🖥
Browse files

rewrote DataMapper for Nifty files, deprecated several functions

parent 5af37db9
......@@ -21,6 +21,8 @@ import torch
import torch.utils.data as data
import random
import configparser
from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
def directory_reader(folder_location, write_txt=False):
"""Produces a list of of data-tags which are accessible
......@@ -90,98 +92,115 @@ def update_shuffling_flag(file_name):
with open(file_name, 'w') as configfile:
config.write(configfile)
def tract_sum_generator(folder_path):
"""Sums the tracts of different dMRI files
TODO: CHANGE FUNCTION TO GENERATE DOWNSAMPLED DATA!
THIS FUNCTION IS NOT DEPRECATED: SummedTractMaps generated remotely
When performing subject-specific probabilistic diffusion tractography using standard-space protocols, 27 tracts are created.
This function loops through all the tracts, sums them and returns the summed tract map.
This function also outputs the summed tract map as a Nifti (.nii.gz) file.
Args:
folder_location (str): A string containing the address of the required directory.
"""
tractMapName = 'tracts/tractsNorm.nii.gz'
subDirectoryList = directory_reader(folder_path)
viableSubDirectories = len(subDirectoryList)
counter = 0
if not os.path.exists('/well/win/users/hsv459/functionmapper/datasets/dMRI'):
if not os.path.exists('/well/win/users/hsv459/functionmapper/datasets'):
os.mkdir('/well/win/users/hsv459/functionmapper/datasets')
os.mkdir('/well/win/users/hsv459/functionmapper/datasets/dMRI')
for subDirectory in subDirectoryList:
tractedMapsPath = os.path.join(folder_location, str(
subDirectory), 'dMRI/autoptx_preproc/tracts/')
sum_flag = False # This is a flat showing us if this is the first tracted to be summed
print("Summing the tract number: {}/{}".format(counter, viableSubDirectories))
for tract in os.listdir(tractedMapsPath):
if os.path.isdir(os.path.join(tractedMapsPath, tract)):
tractedMapPath = os.path.join(
tractedMapsPath, tract, tractMapName)
tractedMapImg = nib.load(tractedMapPath)
tractedMap = tractedMapImg.get_fdata()
# the affine array stores the relationship between voxel coordinates in the image data array and coordinates in the reference space
tractedMapAffine = tractedMapImg.affine
if sum_flag == False:
tractedMapSum = np.copy(tractedMap)
else:
tractedMapSum = np.sum(tractedMapSum, tractedMap)
tractedMapSumPath = '/well/win/users/hsv459/functionmapper/datasets/dMRI'
tractsSumName = str(subDirectory) + ".nii.gz"
tractedMapSumImg = nib.Nifti1Image(tractedMapSum, tractedMapAffine)
nib.save(tractedMapSumImg, os.path.join(
tractedMapSumPath, tractsSumName))
counter += 1
return None
class DataMapper(data.Dataset):
"""Data Mapper Class.
"""Data Mapper Class
This class represents a generic parent class for mapping between keys and data samples.
The class represents a subclass/child class of data.Dataset, inheriting its functionality.
This class is composed of a __init__ constructor, a __getitem__(), supporting fetching a data sample for a given key, and __len__(), which returns the size of the dataset.
This class also has several other helper functions.
Args:
X (HDF5 datafile): hierarchically organized input data
y (HDF5 datafile): hierarchically organized output data
filename (str): Path to file containing the relevant volume indicator numbers
data_directory (str): Directory where the various subjects are stored.
train_data_file (str): Intenal path for each subject to the relevant normalized summed dMRI tracts
train_output_targets (str): Internal path for each subject to the relevant rsfMRI data
Returns:
input_data (torch.tensor): Tensor representation of the input data
label_data (torch.tensor): Tensor representation of the output data
X_volume (torch.tensor): Tensor representation of the input data
y_volume (torch.tensor): Tensor representation of the output data
int: lenght of the output
"""
def __init__(self, X, y):
self.X = X
self.y = y
"""
def __init__(self, filename, data_directory, train_data_file, train_output_targets):
# Initialize everything, and only store in memory the text data file.
# Memory usage limited by only storing the text string information, not the actual volumes.
# TODO: Currently, the timepoint in the fMRI data (y_volume) is hardcoded, only loading in the RSN. This needs to be updated in later iterations.
self.filename = filename
self.data_directory = data_directory
self.train_data_file = train_data_file
self.train_output_targets = train_output_targets
self.sample_pairs = []
self._get_datasets()
def __len__ (self):
return len(self.sample_pairs)
def __getitem__(self, index):
input_data = torch.from_numpy(self.X[index])
label_data = torch.from_numpy(self.y[index])
return input_data, label_data
def __len__(self):
return len(self.y)
X_path, y_path = self.sample_pairs[index]
X_volume = torch.from_numpy(self.resample(X_path))
y_volume = torch.from_numpy(self.convert_to_numpy(y_path)[:,:,:,0])
return X_volume, y_volume
def _get_datasets(self):
"""File path generator
Helper function which reads all the various strings and generates the required paths.
"""
# We read the file strings, and then come up with the full paths
with open(self.filename) as files:
lines = files.read().split('\n')
for line in lines:
if line == '':
pass
else:
X_path = os.path.join(self.data_directory, line, self.train_data_file)
y_path = os.path.join(self.data_directory, line, self.train_output_targets)
self.sample_pairs.append((X_path, y_path))
def resample(self, path):
"""dMRI Resample
Helper function downsampling the dMRI data from 1mm to 2mm.
This is due to GPU memory / RAM limitations during training.
The resampleToPixdims also carries out data smoothing.
Args:
path (str): Path to the relevant volume
Returns:
volume_resampled (np.array): Resampled volume
"""
volume_resampled, _ = resampleToPixdims(self.read_data_files(path), (2,2,2))
return volume_resampled
def read_data_files(self, path):
"""Volume Reader
Helper function reading the relvant volume.
Args:
path (str): Path to the relevant volume
Returns:
volume_image (class): fsl.data.image.Image class
"""
volume_image = Image(path)
return volume_image
def convert_to_numpy(self, path):
"""Numpy wrapper
Helper function wrapping the conversaion of a volume to numpy
Args:
path (str): Path to the relevant volume.
Returns
volume_numpy (np.array): Numpy array representation of volume data.
"""
volume_numpy = self.read_data_files(path).data
return volume_numpy
def get_datasets(data_parameters):
"""Data Loader Function.
......@@ -396,6 +415,99 @@ def load_and_preprocess_evaluation(file_path, orientation, min_max=True):
return volume, header
# Deprecated Functions & Classes & Methods:
def tract_sum_generator(folder_path):
"""Sums the tracts of different dMRI files
THIS FUNCTION IS NOT DEPRECATED: SummedTractMaps generated remotely
When performing subject-specific probabilistic diffusion tractography using standard-space protocols, 27 tracts are created.
This function loops through all the tracts, sums them and returns the summed tract map.
This function also outputs the summed tract map as a Nifti (.nii.gz) file.
Args:
folder_location (str): A string containing the address of the required directory.
"""
tractMapName = 'tracts/tractsNorm.nii.gz'
subDirectoryList = directory_reader(folder_path)
viableSubDirectories = len(subDirectoryList)
counter = 0
if not os.path.exists('/well/win/users/hsv459/functionmapper/datasets/dMRI'):
if not os.path.exists('/well/win/users/hsv459/functionmapper/datasets'):
os.mkdir('/well/win/users/hsv459/functionmapper/datasets')
os.mkdir('/well/win/users/hsv459/functionmapper/datasets/dMRI')
for subDirectory in subDirectoryList:
tractedMapsPath = os.path.join(folder_location, str(
subDirectory), 'dMRI/autoptx_preproc/tracts/')
sum_flag = False # This is a flat showing us if this is the first tracted to be summed
print("Summing the tract number: {}/{}".format(counter, viableSubDirectories))
for tract in os.listdir(tractedMapsPath):
if os.path.isdir(os.path.join(tractedMapsPath, tract)):
tractedMapPath = os.path.join(
tractedMapsPath, tract, tractMapName)
tractedMapImg = nib.load(tractedMapPath)
tractedMap = tractedMapImg.get_fdata()
# the affine array stores the relationship between voxel coordinates in the image data array and coordinates in the reference space
tractedMapAffine = tractedMapImg.affine
if sum_flag == False:
tractedMapSum = np.copy(tractedMap)
else:
tractedMapSum = np.sum(tractedMapSum, tractedMap)
tractedMapSumPath = '/well/win/users/hsv459/functionmapper/datasets/dMRI'
tractsSumName = str(subDirectory) + ".nii.gz"
tractedMapSumImg = nib.Nifti1Image(tractedMapSum, tractedMapAffine)
nib.save(tractedMapSumImg, os.path.join(
tractedMapSumPath, tractsSumName))
counter += 1
return None
class DataMapperHDF5(data.Dataset):
"""Data Mapper Class.
THIS CLASS IS NOT DEPRECATED!
This class represents a generic parent class for mapping between keys and data samples.
The class represents a subclass/child class of data.Dataset, inheriting its functionality.
This class is composed of a __init__ constructor, a __getitem__(), supporting fetching a data sample for a given key, and __len__(), which returns the size of the dataset.
Args:
X (HDF5 datafile): hierarchically organized input data
y (HDF5 datafile): hierarchically organized output data
Returns:
input_data (torch.tensor): Tensor representation of the input data
label_data (torch.tensor): Tensor representation of the output data
int: lenght of the output
"""
def __init__(self, X, y):
self.X = X
self.y = y
def __getitem__(self, index):
input_data = torch.from_numpy(self.X[index])
label_data = torch.from_numpy(self.y[index])
return input_data, label_data
def __len__(self):
return len(self.y)
if __name__ == "__main__":
......@@ -403,3 +515,4 @@ if __name__ == "__main__":
# data_test_train_validation_split(folder_location, 90, 5)
subDirectoryList = directory_reader(folder_location, write_txt=True)
print(subDirectoryList)
tract_sum_generator(folder_location)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment