Commit 09a5b52d authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

Delete train_model.py~

parent 15378346
#!/bin/ipython
#coding: utf8
# Train convnet for cell recognition
# Oiwi, 07/2018
# Saad, 09/2018
# import modules
import numpy as np
# ------------------------------ DATA ------------------------------ #
# Create DB with equal numbers of cell/no cells images
def get_balanced_data(data,labels,split=0.0):
classes,counts = np.unique(labels,return_counts=True)
nPerClass = counts.min()
nClasses = len(classes)
idx_train = []
idx_test = []
for cl in classes:
cIdxs = np.where(labels==cl)[0]
cIdxs = np.random.choice(cIdxs, nPerClass, replace=False)
n = int((1.0-split)*len(cIdxs))
idx_train.extend(cIdxs[:n])
idx_test.extend(cIdxs[n:])
data_train = data[idx_train,...].astype(float)
labels_train = labels[idx_train]
data_test = data[idx_test,...].astype(float)
labels_test = labels[idx_test]
return data_train,labels_train,data_test,labels_test
# LOAD DATA
datafile1 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_001.npz'
celldb1 = np.load(datafile1)
datafile2 = '/vols/Data/sj/Haber_Digitisation/Images/celldb_002.npz'
celldb2 = np.load(datafile2)
images = np.concatenate((celldb1['images'],celldb2['images']))
counts = np.concatenate((celldb1['counts'],celldb2['counts']))
# Equal number of examplars per class
# Split train/test
img_train,lab_train,img_val,lab_val = get_balanced_data(images,
counts>0,.1)
# NORMALISE
img_avg = img_train.mean(axis=0)
img_std = img_train.std(axis=0)
img_train = (img_train - img_avg) / img_std
img_val = (img_val - img_avg) / img_std
# One-hot labels
from keras.utils import np_utils
n_classes = len(np.unique(lab_train))
lab_train = np_utils.to_categorical(lab_train, n_classes)
lab_val = np_utils.to_categorical(lab_val, n_classes)
# ------------------------------ MODEL ------------------------------ #
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization
from keras import backend as K
from keras import optimizers
# The below swaps image dimensions / why is this needed?
#if K.backend()=='tensorflow':
# K.set_image_dim_ordering('th')
# matconvnet model
model = Sequential()
model.add(Convolution2D(20, (5, 5), strides=(1, 1), padding='valid', input_shape=img_train.shape[1:]))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(50, (5, 5), strides=(1, 1), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=4, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(500, (5, 5), strides=(2, 2), padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (2, 2), strides=1, padding='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(2, (1, 1), strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Activation('softmax'))
model.summary()
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
# log settings
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint
OutputDir = '/vols/Data/sj/Haber_Digitisation/Images/results'
csv_logger = CSVLogger(OutputDir + '/haber1_loss.log')
checkpointer = ModelCheckpoint(filepath=OutputDir + '/haber1_weights.hdf5', verbose=1, save_best_only=True)
# train model
import time
start = time.time()
# train model
import os
gpu=True
if gpu==True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
def accuracy(test_x, test_y, model):
result = model.predict(test_x)
predicted_class = np.argmax(test_y, axis=1)
true_class = np.argmax(test_y, axis=1)
num_correct = np.sum(predicted_class == true_class)
accuracy = float(num_correct)/result.shape[0]
return (accuracy * 100)
# data augmentation
DataAugment = True
if(DataAugment):
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(img_train)
model_info = model.fit_generator(datagen.flow(img_train, lab_train, batch_size=32),
epochs = 200, verbose=1,
shuffle=True, validation_data = (img_val, lab_val),
callbacks=[csv_logger, checkpointer])
else:
model_info = model.fit(img_train,lab_train, batch_size = 32,
epochs = 100, verbose=1,
shuffle=True, validation_data=(img_val,lab_val),
callbacks=[csv_logger, checkpointer])
end = time.time()
print("Model took %0.2f seconds to train" %(end - start))
print("Accuracy on test data is: %0.2f" %accuracy(img_val, lab_val, model))
# SAVE MODEL AND FITTING HISTORY
import pandas as pd
outfile = OutputDir + '/model_db1+2_augment.h5'
model.save(outfile)
outhist = OutputDir + '/model_db1+2_augment_hist.csv'
df = pd.DataFrame(model_info.history)
df.to_csv(outhist)
# done
print("Done")
# import matplotlib.pyplot as plt
# df = pd.read_csv(OutputDir + '/tmp/tmp_model_hist.csv')
# plt.plot(df['loss'])
# plt.plot(df['val_loss'])
# plt.show()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment