Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrei-Claudiu Roibu
BrainMapper
Commits
214a3932
Commit
214a3932
authored
Jul 29, 2020
by
Andrei Roibu
Browse files
separeted data preprocessing from main run call, added storing as hdf5 database
parent
29d9668e
Changes
2
Hide whitespace changes
Inline
Side-by-side
utils/hdf5_generator.py
0 → 100644
View file @
214a3932
""" Database Generator
Description:
This file contains the required fuctions for generating the databases required for training the network.
This file is designed to be a standalone package, intendead to be run separately from the main network.
Usage:
To use content from this folder, import the functions and instantiate them as you wish to use them:
from utils.hdf5_generator import function_name
"""
import
os
import
h5py
import
numpy
as
np
import
utils.data_utils
as
data_utils
import
utils.preprocessor
as
preprocessor
from
utils.settings
import
Settings
from
utils.common_utils
import
create_folder
def
convert_hdf5
(
data_parameters
,
file_information
):
# First, we split the data:
if
data_parameters
[
'data_split_flag'
]
==
True
:
print
(
'Data is shuffling... This could take a few minutes!'
)
if
data_parameters
[
'use_data_file'
]
==
True
:
train_subjects
,
validation_subjects
=
preprocessor
.
data_preparation
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_directory
=
data_parameters
[
'data_directory'
],
train_inputs
=
data_parameters
[
'train_data_file'
],
train_targets
=
data_parameters
[
'train_output_targets'
],
rsfMRI_mean_mask_path
=
data_parameters
[
'rsfmri_mean_mask_path'
],
dMRI_mean_mask_path
=
data_parameters
[
'dmri_mean_mask_path'
],
data_file
=
data_parameters
[
'data_file'
],
)
else
:
train_subjects
,
validation_subjects
=
preprocessor
.
data_preparation
(
data_parameters
[
'data_folder_name'
],
data_parameters
[
'test_percentage'
],
data_parameters
[
'subject_number'
],
data_directory
=
data_parameters
[
'data_directory'
],
train_inputs
=
data_parameters
[
'train_data_file'
],
train_targets
=
data_parameters
[
'train_output_targets'
],
rsfMRI_mean_mask_path
=
data_parameters
[
'rsfmri_mean_mask_path'
],
dMRI_mean_mask_path
=
data_parameters
[
'dmri_mean_mask_path'
],
)
preprocessor
.
update_shuffling_flag
(
'utils/hdf5_settings.ini'
)
print
(
'Data shuffling... Complete!'
)
elif
data_parameters
[
'train_test_file_read_flag'
]
==
True
:
# Read the subjects from the files!
train_subjects
=
data_utils
.
load_subjects_from_path
(
data_directory
=
data_parameters
[
'data_directory'
],
data_list
=
data_parameters
[
'train_list'
])
validation_subjects
=
data_utils
.
load_subjects_from_path
(
data_directory
=
data_parameters
[
'data_directory'
],
data_list
=
data_parameters
[
'validation_list'
])
else
:
raise
ValueError
(
'Either a split flag, or a read-from-file flag must be provided as True'
)
# Then, we have to read the various test and train data, process them and write them to H5
# First, let's do this for the training data
print
(
'-> Processing training data:'
)
train_dMRI
,
train_rsfMRI
=
preprocessor
.
load_datasets
(
subjects
=
train_subjects
,
data_directory
=
data_parameters
[
'data_directory'
],
input_file
=
data_parameters
[
'train_data_file'
],
output_target
=
data_parameters
[
'train_output_targets'
],
mean_regression_flag
=
data_parameters
[
'mean_regression_flag'
],
mean_regression_all_flag
=
data_parameters
[
'mean_regression_all_flag'
],
regression_weights_path
=
data_parameters
[
'regression_weights_path'
],
dMRI_mean_mask_path
=
data_parameters
[
'dmri_mean_mask_path'
],
rsfMRI_mean_mask_path
=
data_parameters
[
'rsfmri_mean_mask_path'
],
mean_subtraction_flag
=
data_parameters
[
'mean_subtraction_flag'
],
scale_volumes_flag
=
data_parameters
[
'scale_volumes_flag'
],
normalize_flag
=
data_parameters
[
'normalize_flag'
],
negative_flag
=
data_parameters
[
'negative_flag'
],
outlier_flag
=
data_parameters
[
'outlier_flag'
],
shrinkage_flag
=
data_parameters
[
'shrinkage_flag'
],
hard_shrinkage_flag
=
data_parameters
[
'hard_shrinkage_flag'
]
)
write_hdf5
(
train_dMRI
,
train_rsfMRI
,
file_information
,
mode
=
'train'
)
# Then, we'll do it for the validation data
print
(
'-> Processing validation data:'
)
validation_dMRI
,
validation_rsfMRI
=
preprocessor
.
load_datasets
(
subjects
=
validation_subjects
,
data_directory
=
data_parameters
[
'data_directory'
],
input_file
=
data_parameters
[
'train_data_file'
],
output_target
=
data_parameters
[
'train_output_targets'
],
mean_regression_flag
=
data_parameters
[
'mean_regression_flag'
],
mean_regression_all_flag
=
data_parameters
[
'mean_regression_all_flag'
],
regression_weights_path
=
data_parameters
[
'regression_weights_path'
],
dMRI_mean_mask_path
=
data_parameters
[
'dmri_mean_mask_path'
],
rsfMRI_mean_mask_path
=
data_parameters
[
'rsfmri_mean_mask_path'
],
mean_subtraction_flag
=
data_parameters
[
'mean_subtraction_flag'
],
scale_volumes_flag
=
data_parameters
[
'scale_volumes_flag'
],
normalize_flag
=
data_parameters
[
'normalize_flag'
],
negative_flag
=
data_parameters
[
'negative_flag'
],
outlier_flag
=
data_parameters
[
'outlier_flag'
],
shrinkage_flag
=
data_parameters
[
'shrinkage_flag'
],
hard_shrinkage_flag
=
data_parameters
[
'hard_shrinkage_flag'
]
)
write_hdf5
(
validation_dMRI
,
validation_rsfMRI
,
file_information
,
mode
=
'validation'
)
def
write_hdf5
(
input_volumes
,
target_volumes
,
file_information
,
mode
):
""" HDF5 Writer
Function which writes the hdf5 files.
Args:
input_volumes (list): List of all the input volumes.
target_volumes (list) List of all the target volumes.
file_information (dict): Dictionary containing the outputs paths for the various databases
mode (str): String indicating the type of data observed
"""
with
h5py
.
File
(
file_information
[
mode
][
'input'
],
'w'
)
as
data_handle
:
data_handle
.
create_dataset
(
'input'
,
data
=
input_volumes
)
with
h5py
.
File
(
file_information
[
mode
][
'target'
],
'w'
)
as
data_handle
:
data_handle
.
create_dataset
(
'target'
,
data
=
target_volumes
)
if
__name__
==
"__main__"
:
print
(
'Started Data Generation!'
)
settings
=
Settings
(
'utils/hdf5_settings.ini'
)
data_parameters
=
settings
[
'DATA'
]
create_folder
(
data_parameters
[
'data_folder_name'
])
file_information
=
{
'train'
:
{
"input"
:
os
.
path
.
join
(
data_parameters
[
'data_directory'
],
data_parameters
[
'input_data_train'
]),
"target"
:
os
.
path
.
join
(
data_parameters
[
'data_directory'
],
data_parameters
[
'target_data_train'
]),
},
'validation'
:
{
"input"
:
os
.
path
.
join
(
data_parameters
[
'data_directory'
],
data_parameters
[
'input_data_validation'
]),
"target"
:
os
.
path
.
join
(
data_parameters
[
'data_directory'
],
data_parameters
[
'target_data_validation'
]),
}
}
convert_hdf5
(
data_parameters
,
file_information
)
print
(
'Completed Data Generation!'
)
utils/hdf5_settings.ini
0 → 100644
View file @
214a3932
[DATA]
data_folder_name
=
"datasets"
use_data_file
=
False
data_directory
=
"/well/win-biobank/projects/imaging/data/data3/subjectsAll/"
data_file
=
"/well/win-biobank/projects/imaging/data/data3/subjectsAll/subj_22k.txt"
data_split_flag
=
False
train_test_file_read_flag
=
True
test_percentage
=
5
subject_number
=
12000
train_list
=
"datasets/train.txt"
validation_list
=
"datasets/validation.txt"
regression_weights_path
=
"datasets/regression_weights.pkl"
train_data_file
=
"dMRI/autoptx_preproc/tractsNormSummed.nii.gz"
train_output_targets
=
"fMRI/rfMRI_25.dr/dr_stage2.nii.gz"
rsfmri_mean_mask_path
=
"utils/mean_dr_stage2.nii.gz"
dmri_mean_mask_path
=
"utils/mean_tractsNormSummed_downsampled.nii.gz"
mean_regression_flag
=
False
mean_regression_all_flag
=
False
mean_subtraction_flag
=
True
scale_volumes_flag
=
True
normalize_flag
=
True
negative_flag
=
True
outlier_flag
=
True
shrinkage_flag
=
False
hard_shrinkage_flag
=
False
input_data_train
=
"input_data_train.h5"
target_data_train
=
"target_data_train.h5"
input_data_validation
=
"input_data_validation.h5"
target_data_validation
=
"target_data_validation.h5"
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment