Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrei-Claudiu Roibu
BrainMapper
Commits
73483fde
Commit
73483fde
authored
Apr 22, 2020
by
Andrei-Claudiu Roibu
🖥
Browse files
fixed several bugs for cross-validation
parent
b22c8428
Changes
3
Hide whitespace changes
Inline
Side-by-side
run.py
View file @
73483fde
...
...
@@ -155,10 +155,10 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet
number_of_classes
=
network_parameters
[
'number_of_classes'
],
experiment_name
=
training_parameters
[
'experiment_name'
],
optimizer_arguments
=
{
'lr'
:
training_parameters
[
'learning_rate'
],
'betas'
:
training_parameters
[
'optimizer_beta'
],
'eps'
:
training_parameters
[
'optimizer_epsilon'
],
'weight_decay'
:
training_parameters
[
'optimizer_weigth_decay'
]
},
'betas'
:
training_parameters
[
'optimizer_beta'
],
'eps'
:
training_parameters
[
'optimizer_epsilon'
],
'weight_decay'
:
training_parameters
[
'optimizer_weigth_decay'
]
},
model_name
=
misc_parameters
[
'model_name'
],
number_epochs
=
training_parameters
[
'number_of_epochs'
],
loss_log_period
=
training_parameters
[
'loss_log_period'
],
...
...
@@ -193,22 +193,28 @@ def train(data_parameters, training_parameters, network_parameters, misc_paramet
network_parameters
,
misc_parameters
)
else
:
for
k
in
range
of
data_parameters
[
'k_fold'
]:
print
(
"Training initiated using K-fold Cross Validation!"
)
for
k
in
range
(
data_parameters
[
'k_fold'
]):
print
(
"K-fold Number: {}"
.
format
(
k
+
1
))
k_fold_losses
=
[]
data_parameters
[
'train_list'
]
=
os
.
path
.
join
(
data_folder_name
,
'train'
+
str
(
k
+
1
)
+
'.txt'
)
data_parameters
[
'
data_folder_name
'
]
,
'train'
+
str
(
k
+
1
)
+
'.txt'
)
data_parameters
[
'validation_list'
]
=
os
.
path
.
join
(
data_folder_name
,
'validation'
+
str
(
k
+
1
)
+
'.txt'
)
training_parameters
[
'final_model_output_file'
])
=
final_model_output_file
.
replace
(
".pth.tar"
,
str
(
k
+
1
)
+
".pth.tar"
)
data_parameters
[
'data_folder_name'
],
'validation'
+
str
(
k
+
1
)
+
'.txt'
)
training_parameters
[
'final_model_output_file'
]
=
training_parameters
[
'final_model_output_file'
].
replace
(
".pth.tar"
,
str
(
k
+
1
)
+
".pth.tar"
)
validation_loss
=
_train_runner
(
validation_loss
=
_train_runner
(
data_parameters
,
training_parameters
,
network_parameters
,
misc_parameters
)
k_fold_losses
.
append
(
validation_loss
)
mean_k_fold_loss
=
k_fold_losses
.
mean
()
for
k
in
range
(
data_parameters
[
'k_fold'
]):
print
(
"K-fold Number: {} Loss: {}"
.
format
(
k
+
1
,
k_fold_losses
[
k
]))
print
(
"K-fold Cross Validation Avearge Loss: {}"
.
format
(
np
.
mean
(
k_fold_losses
)))
def
evaluate_score
(
training_parameters
,
network_parameters
,
misc_parameters
,
evaluation_parameters
):
"""Mapping Score Evaluator
...
...
@@ -247,17 +253,17 @@ def evaluate_score(training_parameters, network_parameters, misc_parameters, eva
# TODO - NEED TO UPDATE THE DATA FUNCTIONS!
logWriter
=
LogWriter
(
number_of_classes
=
network_parameters
[
'number_of_classes'
],
logs_directory
=
misc_parameters
[
'logs_directory'
],
experiment_name
=
training_parameters
[
'experiment_name'
]
logWriter
=
LogWriter
(
number_of_classes
=
network_parameters
[
'number_of_classes'
],
logs_directory
=
misc_parameters
[
'logs_directory'
],
experiment_name
=
training_parameters
[
'experiment_name'
]
)
prediction_output_path
=
os
.
path
.
join
(
misc_parameters
[
'experiments_directory'
],
prediction_output_path
=
os
.
path
.
join
(
misc_parameters
[
'experiments_directory'
],
training_parameters
[
'experiment_name'
],
evaluation_parameters
[
'saved_predictions_directory'
]
)
_
=
evaluations
.
evaluate_dice_score
(
trained_model_path
=
evaluation_parameters
[
'trained_model_path'
],
_
=
evaluations
.
evaluate_dice_score
(
trained_model_path
=
evaluation_parameters
[
'trained_model_path'
],
number_of_classes
=
network_parameters
[
'number_of_classes'
],
data_directory
=
evaluation_parameters
[
'data_directory'
],
targets_directory
=
evaluation_parameters
[
...
...
@@ -398,31 +404,30 @@ if __name__ == '__main__':
if
data_parameters
[
'data_split_flag'
]
==
True
:
if
data_parameters
[
'use_data_file'
]
==
True
:
data_test_train_validation_split
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_file
=
data_parameters
[
'data_file'
],
K_fold
=
data_parameters
[
'k_fold'
]
)
data_test_train_validation_split
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_file
=
data_parameters
[
'data_file'
],
K_fold
=
data_parameters
[
'k_fold'
]
)
else
:
data_test_train_validation_split
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_directory
=
data_parameters
[
'data_directory'
],
K_fold
=
data_parameters
[
'k_fold'
]
)
data_test_train_validation_split
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_directory
=
data_parameters
[
'data_directory'
],
K_fold
=
data_parameters
[
'k_fold'
]
)
update_shuffling_flag
(
'settings.ini'
)
if
arguments
.
mode
==
'train'
:
train
(
data_parameters
,
training_parameters
,
network_parameters
,
misc_parameters
)
network_parameters
,
misc_parameters
)
# NOTE: THE EVAL FUNCTIONS HAVE NOT YET BEEN DEBUGGED (16/04/20)
elif
arguments
.
mode
==
'evaluate-score'
:
evaluate_score
(
training_parameters
,
network_parameters
,
misc_parameters
,
evaluation_parameters
)
network_parameters
,
misc_parameters
,
evaluation_parameters
)
elif
arguments
.
mode
==
'evaluate-mapping'
:
logging
.
basicConfig
(
filename
=
'evaluate-mapping-error.log'
)
if
arguments
.
settings_path
is
not
None
:
...
...
settings.ini
View file @
73483fde
...
...
@@ -16,8 +16,8 @@ validation_data_file = "dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
validation_target_file
=
"fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
[TRAINING]
training_batch_size
=
2
validation_batch_size
=
2
training_batch_size
=
4
validation_batch_size
=
4
use_pre_trained
=
False
pre_trained_path
=
"saved_models/finetuned_alldata.pth.tar"
experiment_name
=
"experiment_name"
...
...
@@ -50,6 +50,7 @@ number_of_classes = 1
save_model_directory
=
"saved_models"
model_name
=
"BrainMapper"
logs_directory
=
"logs"
checkpoint_directory
=
"checkpoints"
device
=
0
experiments_directory
=
"experiments"
...
...
utils/data_utils.py
View file @
73483fde
...
...
@@ -114,8 +114,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
subDirectoryList
,
test_size
=
test_percentage
/
100
,
random_state
=
42
,
shuffle
=
True
)
np
.
savetxt
(
os
.
path
.
join
(
data_folder_name
,
'test.txt'
),
test
,
fmt
=
'%s'
)
print
(
"Test={}"
.
format
(
test
))
if
K_fold
is
None
:
train
,
validation
=
train_test_split
(
train_data
,
test_size
=
int
(
len
(
test
)),
random_state
=
42
,
shuffle
=
True
)
...
...
@@ -124,7 +122,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
'train.txt'
),
train
,
fmt
=
'%s'
)
np
.
savetxt
(
os
.
path
.
join
(
data_folder_name
,
'validation.txt'
),
validation
,
fmt
=
'%s'
)
print
(
"Train={}, Validation={}"
.
format
(
train
,
validation
))
else
:
k_fold
=
KFold
(
n_splits
=
K_fold
)
k
=
0
...
...
@@ -136,8 +133,6 @@ def data_test_train_validation_split(data_folder_name, test_percentage, subject_
np
.
savetxt
(
os
.
path
.
join
(
data_folder_name
,
'validation'
+
str
(
k
+
1
)
+
'.txt'
),
validation
,
fmt
=
'%s'
)
print
(
"K={}, Train={}, Validation={}"
.
format
(
k
,
train
,
validation
))
k
+=
1
...
...
@@ -620,58 +615,3 @@ def get_datasetsHDF5(data_parameters):
training_labels
[
'label'
][()]),
DataMapperHDF5
(
testing_data
[
'data'
][()],
testing_labels
[
'label'
][()])
)
if
__name__
==
'__main__'
:
# data_file_path = 'train.txt'
# subDirectoryList = data_file_reader(data_file_path)
# print(subDirectoryList)
# print(type(subDirectoryList))
# folder_location = "../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
# subDirectoryList2 = directory_reader(folder_location)
# print(subDirectoryList2)
# print(type(subDirectoryList2))
data_folder_name
=
"datasets"
train_percentage
=
90
test_percentage
=
5
K_fold
=
None
subject_number
=
None
data_directory
=
"../well/win-biobank/projects/imaging/data/data3/subjectsAll/"
data_test_train_validation_split
(
data_folder_name
,
test_percentage
,
subject_number
,
data_directory
=
data_directory
,
K_fold
=
K_fold
)
# data_test_train_validation_split_Kfold_cross_validation(data_folder_name, K_fold, subject_number, data_directory = data_directory)
# data = np.arange(23)
# K = 10
# test_size = int(len(data)/K)
# for k in range(K):
# if k == 0:
# test_slice, remainder = np.split(data.copy(), [test_size], axis=0)
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# else:
# remainder[(k-1) * test_size: k *test_size], test_slice = test_slice, remainder[(k-1) * test_size: k * test_size].copy()
# print("k= {}, test_slice={}, remainder={}".format(k, test_slice, remainder))
# print('SKLEARN TIME!')
# from sklearn.model_selection import KFold, train_test_split
# kf = KFold(n_splits=K)
# k = 0
# train_data, test_data = train_test_split(data, test_size= 0.1)
# print('train_data= {}, test_data={}'.format(train_data, test_data))
# for train_index, test_index in kf.split(train_data):
# train, test = train_data[train_index], train_data[test_index]
# print("k= {}, val_slice={}, train={}".format(k, test, train))
# k+=1
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment