Commit 3e3c9426 authored by Andrei-Claudiu Roibu's avatar Andrei-Claudiu Roibu 🖥
Browse files

added variable for selecting smaller datasets

parent c199ec38
......@@ -352,12 +352,12 @@ if __name__ == '__main__':
misc_parameters = settings['MISC']
evaluation_parameters = settings['EVALUATION']
data_shuffling_flag = data_parameters['data_split_flag']
data_split_flag = data_parameters['data_split_flag']
if data_shuffling_flag == True:
if data_split_flag == True:
# Here we shuffle the data!
data_test_train_validation_split(
data_parameters['data_directory'], data_parameters['train_percentage'], data_parameters['validation_percentage'])
data_parameters['data_directory'], data_parameters['train_percentage'], data_parameters['validation_percentage'], data_parameters['subject_number'])
update_shuffling_flag('settings.ini')
# TODO: This might also be a very good point to add cross-validation later
......
[DATA]
data_directory = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
data_split_flag = True
data_split_flag = False
train_percentage = 90
validation_percentage = 5
subject_number = None
train_list = "train.txt"
validation_list = "validation.txt"
test_list = "test.txt"
......
......@@ -25,7 +25,7 @@ from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
def directory_reader(folder_location, write_txt=False):
def directory_reader(folder_location, subject_number=None, write_txt=False):
"""Produces a list of of data-tags which are accessible
This function looks in a large data directory, and returns a list of sub-directories which are accessible.
......@@ -34,6 +34,7 @@ def directory_reader(folder_location, write_txt=False):
Args:
folder_location (str): A string containing the address of the required directory.
write_txt (bool): Flag indicating if a .txt file should be created.
suject_number (int): Number of subjects to be considered for a job. Useful when wanting to train on datasizes smaller than total datapoints available in a datafolder.
Returns:
A list of strings containing the available sub-directories. This is also printed out as a .txt file
"""
......@@ -41,20 +42,29 @@ def directory_reader(folder_location, write_txt=False):
out_file = open("files.txt", 'w')
subDirectoryList = []
number_of_subjects = 0
if subject_number is None:
subject_number = len(os.listdir(folder_location))
for directory in os.listdir(folder_location):
if os.path.isdir(os.path.join(folder_location, directory)):
filename = folder_location+directory
if os.access(filename, os.R_OK):
string = directory
if write_txt == True:
out_file.write(string)
out_file.write("\n")
subDirectoryList.append(directory)
if number_of_subjects < subject_number:
if os.path.isdir(os.path.join(folder_location, directory)):
filename = folder_location+directory
if os.access(filename, os.R_OK):
string = directory
if write_txt == True:
out_file.write(string)
out_file.write("\n")
subDirectoryList.append(directory)
number_of_subjects += 1
else:
break
return subDirectoryList
def data_test_train_validation_split(folder_location, train_percentage, validation_percentage):
def data_test_train_validation_split(folder_location, train_percentage, validation_percentage, subject_number):
"""Produces lists of train, test and validation data
This function looks at the list of all available directories and returns three lists of dsub-directories.
......@@ -64,10 +74,11 @@ def data_test_train_validation_split(folder_location, train_percentage, validati
folder_location (str): A string containing the address of the required directory.
train_percentage (int): Percentage of data to be used for training
validation_percentage (int): Percentage of data to be used for validation
suject_number (int): Number of subjects to be considered for a job. Useful when wanting to train on datasizes smaller than total datapoints available in a datafolder.
"""
subDirectoryList = directory_reader(folder_location)
subDirectoryList = directory_reader(folder_location, subject_number)
random.shuffle(subDirectoryList)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment