Commit 73483fde authored by Andrei-Claudiu Roibu's avatar Andrei-Claudiu Roibu 🖥
Browse files

fixed several bugs for cross-validation

parent b22c8428
...@@ -155,10 +155,10 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet ...@@ -155,10 +155,10 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet
number_of_classes=network_parameters['number_of_classes'], number_of_classes=network_parameters['number_of_classes'],
experiment_name=training_parameters['experiment_name'], experiment_name=training_parameters['experiment_name'],
optimizer_arguments={'lr': training_parameters['learning_rate'], optimizer_arguments={'lr': training_parameters['learning_rate'],
'betas': training_parameters['optimizer_beta'], 'betas': training_parameters['optimizer_beta'],
'eps': training_parameters['optimizer_epsilon'], 'eps': training_parameters['optimizer_epsilon'],
'weight_decay': training_parameters['optimizer_weigth_decay'] 'weight_decay': training_parameters['optimizer_weigth_decay']
}, },
model_name=misc_parameters['model_name'], model_name=misc_parameters['model_name'],
number_epochs=training_parameters['number_of_epochs'], number_epochs=training_parameters['number_of_epochs'],
loss_log_period=training_parameters['loss_log_period'], loss_log_period=training_parameters['loss_log_period'],
...@@ -193,22 +193,28 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet ...@@ -193,22 +193,28 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet
network_parameters, misc_parameters) network_parameters, misc_parameters)
else: else:
for k in range of data_parameters['k_fold']: print("Training initiated using K-fold Cross Validation!")
for k in range(data_parameters['k_fold']):
print("K-fold Number: {}".format(k+1))
k_fold_losses = [] k_fold_losses = []
data_parameters['train_list'] = os.path.join( data_parameters['train_list'] = os.path.join(
data_folder_name, 'train' + str(k+1)+'.txt') data_parameters['data_folder_name'], 'train' + str(k+1)+'.txt')
data_parameters['validation_list'] = os.path.join( data_parameters['validation_list'] = os.path.join(
data_folder_name, 'validation' + str(k+1)+'.txt') data_parameters['data_folder_name'], 'validation' + str(k+1)+'.txt')
training_parameters['final_model_output_file'])=final_model_output_file.replace(".pth.tar", str(k+1)+".pth.tar") training_parameters['final_model_output_file'] = training_parameters['final_model_output_file'].replace(
".pth.tar", str(k+1)+".pth.tar")
validation_loss=_train_runner( validation_loss = _train_runner(
data_parameters, training_parameters, network_parameters, misc_parameters) data_parameters, training_parameters, network_parameters, misc_parameters)
k_fold_losses.append(validation_loss) k_fold_losses.append(validation_loss)
mean_k_fold_loss=k_fold_losses.mean() for k in range(data_parameters['k_fold']):
print("K-fold Number: {} Loss: {}".format(k+1, k_fold_losses[k]))
print("K-fold Cross Validation Avearge Loss: {}".format(np.mean(k_fold_losses)))
def evaluate_score(training_parameters, network_parameters, misc_parameters, evaluation_parameters): def evaluate_score(training_parameters, network_parameters, misc_parameters, evaluation_parameters):
"""Mapping Score Evaluator """Mapping Score Evaluator
...@@ -247,17 +253,17 @@ def evaluate_score(training_parameters, network_parameters, misc_parameters, eva ...@@ -247,17 +253,17 @@ def evaluate_score(training_parameters, network_parameters, misc_parameters, eva
# TODO - NEED TO UPDATE THE DATA FUNCTIONS! # TODO - NEED TO UPDATE THE DATA FUNCTIONS!
logWriter=LogWriter(number_of_classes = network_parameters['number_of_classes'], logWriter = LogWriter(number_of_classes=network_parameters['number_of_classes'],
logs_directory = misc_parameters['logs_directory'], logs_directory=misc_parameters['logs_directory'],
experiment_name = training_parameters['experiment_name'] experiment_name=training_parameters['experiment_name']
) )
prediction_output_path=os.path.join(misc_parameters['experiments_directory'], prediction_output_path = os.path.join(misc_parameters['experiments_directory'],
training_parameters['experiment_name'], training_parameters['experiment_name'],
evaluation_parameters['saved_predictions_directory'] evaluation_parameters['saved_predictions_directory']
) )
_=evaluations.evaluate_dice_score(trained_model_path = evaluation_parameters['trained_model_path'], _ = evaluations.evaluate_dice_score(trained_model_path=evaluation_parameters['trained_model_path'],
number_of_classes=network_parameters['number_of_classes'], number_of_classes=network_parameters['number_of_classes'],
data_directory=evaluation_parameters['data_directory'], data_directory=evaluation_parameters['data_directory'],
targets_directory=evaluation_parameters[ targets_directory=evaluation_parameters[
...@@ -398,31 +404,30 @@ if __name__ == '__main__': ...@@ -398,31 +404,30 @@ if __name__ == '__main__':
if data_parameters['data_split_flag'] == True: if data_parameters['data_split_flag'] == True:
if data_parameters['use_data_file'] == True: if data_parameters['use_data_file'] == True:
data_test_train_validation_split(data_parameters['data_folder_name'], data_test_train_validation_split(data_parameters['data_folder_name'],
data_parameters['test_percentage'], data_parameters['test_percentage'],
data_parameters['subject_number'], data_parameters['subject_number'],
data_file= data_parameters['data_file'], data_file=data_parameters['data_file'],
K_fold= data_parameters['k_fold'] K_fold=data_parameters['k_fold']
) )
else: else:
data_test_train_validation_split(data_parameters['data_folder_name'], data_test_train_validation_split(data_parameters['data_folder_name'],
data_parameters['test_percentage'], data_parameters['test_percentage'],
data_parameters['subject_number'], data_parameters['subject_number'],
data_directory= data_parameters['data_directory'], data_directory=data_parameters['data_directory'],
K_fold= data_parameters['k_fold'] K_fold=data_parameters['k_fold']
) )
update_shuffling_flag('settings.ini') update_shuffling_flag('settings.ini')
if arguments.mode == 'train': if arguments.mode == 'train':
train(data_parameters, training_parameters, train(data_parameters, training_parameters,
network_parameters, misc_parameters) network_parameters, misc_parameters)
# NOTE: THE EVAL FUNCTIONS HAVE NOT YET BEEN DEBUGGED (16/04/20) # NOTE: THE EVAL FUNCTIONS HAVE NOT YET BEEN DEBUGGED (16/04/20)
elif arguments.mode == 'evaluate-score': elif arguments.mode == 'evaluate-score':
evaluate_score(training_parameters, evaluate_score(training_parameters,
network_parameters, misc_parameters, evaluation_parameters) network_parameters, misc_parameters, evaluation_parameters)
elif arguments.mode == 'evaluate-mapping': elif arguments.mode == 'evaluate-mapping':
logging.basicConfig(filename='evaluate-mapping-error.log') logging.basicConfig(filename='evaluate-mapping-error.log')
if arguments.settings_path is not None: if arguments.settings_path is not None:
......
...@@ -16,8 +16,8 @@ validation_data_file = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz" ...@@ -16,8 +16,8 @@ validation_data_file = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
validation_target_file = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz" validation_target_file = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
[TRAINING] [TRAINING]
training_batch_size = 2 training_batch_size = 4
validation_batch_size = 2 validation_batch_size = 4
use_pre_trained = False use_pre_trained = False
pre_trained_path = "saved_models/finetuned_alldata.pth.tar" pre_trained_path = "saved_models/finetuned_alldata.pth.tar"
experiment_name = "experiment_name" experiment_name = "experiment_name"
...@@ -50,6 +50,7 @@ number_of_classes = 1 ...@@ -50,6 +50,7 @@ number_of_classes = 1
save_model_directory = "saved_models" save_model_directory = "saved_models"
model_name = "BrainMapper" model_name = "BrainMapper"
logs_directory = "logs" logs_directory = "logs"
checkpoint_directory = "checkpoints"
device = 0 device = 0
experiments_directory = "experiments" experiments_directory = "experiments"
......
...@@ -114,8 +114,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_ ...@@ -114,8 +114,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
subDirectoryList, test_size=test_percentage/100, random_state=42, shuffle=True) subDirectoryList, test_size=test_percentage/100, random_state=42, shuffle=True)
np.savetxt(os.path.join(data_folder_name, 'test.txt'), test, fmt='%s') np.savetxt(os.path.join(data_folder_name, 'test.txt'), test, fmt='%s')
print("Test={}".format(test))
if K_fold is None: if K_fold is None:
train, validation = train_test_split( train, validation = train_test_split(
train_data, test_size=int(len(test)), random_state=42, shuffle=True) train_data, test_size=int(len(test)), random_state=42, shuffle=True)
...@@ -124,7 +122,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_ ...@@ -124,7 +122,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
'train.txt'), train, fmt='%s') 'train.txt'), train, fmt='%s')
np.savetxt(os.path.join(data_folder_name, 'validation.txt'), np.savetxt(os.path.join(data_folder_name, 'validation.txt'),
validation, fmt='%s') validation, fmt='%s')
print("Train={}, Validation={}".format(train, validation))
else: else:
k_fold = KFold(n_splits=K_fold) k_fold = KFold(n_splits=K_fold)
k = 0 k = 0
...@@ -136,8 +133,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_ ...@@ -136,8 +133,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
np.savetxt(os.path.join(data_folder_name, 'validation' + np.savetxt(os.path.join(data_folder_name, 'validation' +
str(k+1)+'.txt'), validation, fmt='%s') str(k+1)+'.txt'), validation, fmt='%s')
print("K={}, Train={}, Validation={}".format(k, train, validation))
k += 1 k += 1
...@@ -620,58 +615,3 @@ def get_datasetsHDF5(data_parameters): ...@@ -620,58 +615,3 @@ def get_datasetsHDF5(data_parameters):
training_labels['label'][()]), training_labels['label'][()]),
DataMapperHDF5(testing_data['data'][()], testing_labels['label'][()]) DataMapperHDF5(testing_data['data'][()], testing_labels['label'][()])
) )
if __name__ == '__main__':
# data_file_path = 'train.txt'
# subDirectoryList = data_file_reader(data_file_path)
# print(subDirectoryList)
# print(type(subDirectoryList))
# folder_location = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
# subDirectoryList2 = directory_reader(folder_location)
# print(subDirectoryList2)
# print(type(subDirectoryList2))
data_folder_name = "datasets"
train_percentage = 90
test_percentage = 5
K_fold = None
subject_number = None
data_directory = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
data_test_train_validation_split(
data_folder_name, test_percentage, subject_number, data_directory=data_directory, K_fold=K_fold)
# data_test_train_validation_split_Kfold_cross_validation(data_folder_name, K_fold, subject_number, data_directory = data_directory)
# data = np.arange(23)
# K = 10
# test_size = int(len(data)/K)
# for k in range(K):
# if k == 0:
# test_slice, remainder = np.split(data.copy(), [test_size], axis=0)
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# else:
# remainder[(k-1) * test_size: k *test_size], test_slice = test_slice, remainder[(k-1) * test_size: k * test_size].copy()
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# print('SKLEARN TIME!')
# from sklearn.model_selection import KFold, train_test_split
# kf = KFold(n_splits=K)
# k = 0
# train_data, test_data = train_test_split(data, test_size= 0.1)
# print('train_data= {}, test_data={}'.format(train_data, test_data))
# for train_index, test_index in kf.split(train_data):
# train, test = train_data[train_index], train_data[test_index]
# print("k= {}, val_slice={}, train={}".format(k, test, train))
# k+=1
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment