Commit 73483fde authored by Andrei-Claudiu Roibu's avatar Andrei-Claudiu Roibu 🖥
Browse files

fixed several bugs for cross-validation

parent b22c8428
......@@ -193,22 +193,28 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet
network_parameters, misc_parameters)
else:
for k in range of data_parameters['k_fold']:
print("Training initiated using K-fold Cross Validation!")
for k in range(data_parameters['k_fold']):
print("K-fold Number: {}".format(k+1))
k_fold_losses = []
data_parameters['train_list'] = os.path.join(
data_folder_name, 'train' + str(k+1)+'.txt')
data_parameters['data_folder_name'], 'train' + str(k+1)+'.txt')
data_parameters['validation_list'] = os.path.join(
data_folder_name, 'validation' + str(k+1)+'.txt')
training_parameters['final_model_output_file'])=final_model_output_file.replace(".pth.tar", str(k+1)+".pth.tar")
data_parameters['data_folder_name'], 'validation' + str(k+1)+'.txt')
training_parameters['final_model_output_file'] = training_parameters['final_model_output_file'].replace(
".pth.tar", str(k+1)+".pth.tar")
validation_loss=_train_runner(
validation_loss = _train_runner(
data_parameters, training_parameters, network_parameters, misc_parameters)
k_fold_losses.append(validation_loss)
mean_k_fold_loss=k_fold_losses.mean()
for k in range(data_parameters['k_fold']):
print("K-fold Number: {} Loss: {}".format(k+1, k_fold_losses[k]))
print("K-fold Cross Validation Avearge Loss: {}".format(np.mean(k_fold_losses)))
def evaluate_score(training_parameters, network_parameters, misc_parameters, evaluation_parameters):
"""Mapping Score Evaluator
......@@ -247,17 +253,17 @@ def evaluate_score(training_parameters, network_parameters, misc_parameters, eva
# TODO - NEED TO UPDATE THE DATA FUNCTIONS!
logWriter=LogWriter(number_of_classes = network_parameters['number_of_classes'],
logs_directory = misc_parameters['logs_directory'],
experiment_name = training_parameters['experiment_name']
logWriter = LogWriter(number_of_classes=network_parameters['number_of_classes'],
logs_directory=misc_parameters['logs_directory'],
experiment_name=training_parameters['experiment_name']
)
prediction_output_path=os.path.join(misc_parameters['experiments_directory'],
prediction_output_path = os.path.join(misc_parameters['experiments_directory'],
training_parameters['experiment_name'],
evaluation_parameters['saved_predictions_directory']
)
_=evaluations.evaluate_dice_score(trained_model_path = evaluation_parameters['trained_model_path'],
_ = evaluations.evaluate_dice_score(trained_model_path=evaluation_parameters['trained_model_path'],
number_of_classes=network_parameters['number_of_classes'],
data_directory=evaluation_parameters['data_directory'],
targets_directory=evaluation_parameters[
......@@ -401,19 +407,18 @@ if __name__ == '__main__':
data_test_train_validation_split(data_parameters['data_folder_name'],
data_parameters['test_percentage'],
data_parameters['subject_number'],
data_file= data_parameters['data_file'],
K_fold= data_parameters['k_fold']
data_file=data_parameters['data_file'],
K_fold=data_parameters['k_fold']
)
else:
data_test_train_validation_split(data_parameters['data_folder_name'],
data_parameters['test_percentage'],
data_parameters['subject_number'],
data_directory= data_parameters['data_directory'],
K_fold= data_parameters['k_fold']
data_directory=data_parameters['data_directory'],
K_fold=data_parameters['k_fold']
)
update_shuffling_flag('settings.ini')
if arguments.mode == 'train':
train(data_parameters, training_parameters,
network_parameters, misc_parameters)
......
......@@ -16,8 +16,8 @@ validation_data_file = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
validation_target_file = "fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
[TRAINING]
training_batch_size = 2
validation_batch_size = 2
training_batch_size = 4
validation_batch_size = 4
use_pre_trained = False
pre_trained_path = "saved_models/finetuned_alldata.pth.tar"
experiment_name = "experiment_name"
......@@ -50,6 +50,7 @@ number_of_classes = 1
save_model_directory = "saved_models"
model_name = "BrainMapper"
logs_directory = "logs"
checkpoint_directory = "checkpoints"
device = 0
experiments_directory = "experiments"
......
......@@ -114,8 +114,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
subDirectoryList, test_size=test_percentage/100, random_state=42, shuffle=True)
np.savetxt(os.path.join(data_folder_name, 'test.txt'), test, fmt='%s')
print("Test={}".format(test))
if K_fold is None:
train, validation = train_test_split(
train_data, test_size=int(len(test)), random_state=42, shuffle=True)
......@@ -124,7 +122,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
'train.txt'), train, fmt='%s')
np.savetxt(os.path.join(data_folder_name, 'validation.txt'),
validation, fmt='%s')
print("Train={}, Validation={}".format(train, validation))
else:
k_fold = KFold(n_splits=K_fold)
k = 0
......@@ -136,8 +133,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
np.savetxt(os.path.join(data_folder_name, 'validation' +
str(k+1)+'.txt'), validation, fmt='%s')
print("K={}, Train={}, Validation={}".format(k, train, validation))
k += 1
......@@ -620,58 +615,3 @@ def get_datasetsHDF5(data_parameters):
training_labels['label'][()]),
DataMapperHDF5(testing_data['data'][()], testing_labels['label'][()])
)
if __name__ == '__main__':
# data_file_path = 'train.txt'
# subDirectoryList = data_file_reader(data_file_path)
# print(subDirectoryList)
# print(type(subDirectoryList))
# folder_location = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
# subDirectoryList2 = directory_reader(folder_location)
# print(subDirectoryList2)
# print(type(subDirectoryList2))
data_folder_name = "datasets"
train_percentage = 90
test_percentage = 5
K_fold = None
subject_number = None
data_directory = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
data_test_train_validation_split(
data_folder_name, test_percentage, subject_number, data_directory=data_directory, K_fold=K_fold)
# data_test_train_validation_split_Kfold_cross_validation(data_folder_name, K_fold, subject_number, data_directory = data_directory)
# data = np.arange(23)
# K = 10
# test_size = int(len(data)/K)
# for k in range(K):
# if k == 0:
# test_slice, remainder = np.split(data.copy(), [test_size], axis=0)
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# else:
# remainder[(k-1) * test_size: k *test_size], test_slice = test_slice, remainder[(k-1) * test_size: k * test_size].copy()
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# print('SKLEARN TIME!')
# from sklearn.model_selection import KFold, train_test_split
# kf = KFold(n_splits=K)
# k = 0
# train_data, test_data = train_test_split(data, test_size= 0.1)
# print('train_data= {}, test_data={}'.format(train_data, test_data))
# for train_index, test_index in kf.split(train_data):
# train, test = train_data[train_index], train_data[test_index]
# print("k= {}, val_slice={}, train={}".format(k, test, train))
# k+=1
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment