Commit a27e8595 authored by Saad Jbabdi's avatar Saad Jbabdi
Browse files

cleanup

parent d82d6b81
*.npz
test
*~
__pycache__
.DS_Store
*.ipynb
build
\ No newline at end of file
......@@ -9,13 +9,36 @@
from keras.models import Sequential, Model
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization, Input, Concatenate
from keras import optimizers
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
import time
import os
# Model names
# convnet_1 : our first stab at the problem
def from_name(shape,arch='convnet_1'):
def create_from_name(shape,arch='convnet_1'):
"""
Basic deep CNN
Parameters
----------
shape : tuple.
Typically NxNx3 for 3 channels
arch : str
'convnet_1' is a succession of conv2D,maxPool,Batchnorm with softmax at the end
Returns
-------
keras Model object
"""
if(arch=='convnet_1'):
print('BUILDING A CONVNET')
model = Sequential()
......@@ -71,3 +94,66 @@ def from_name(shape,arch='convnet_1'):
return model
def build_model(shape,arch='convnet_1'):
"""
creates and build a model
"""
model = create_from_name(shape,arch)
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
return model
def train_model(model,
data,
gpu=False,
augment = False,
verbose = False,epochs=100,batch_size=32):
"""
Prespecified training regime
"""
# train model
X_train, y_train, X_test, y_test = data
start = time.time()
if gpu is True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
if augment is True:
print('* Using data augmentation')
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(X_train)
info = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
epochs = epochs, verbose=1,
shuffle=True, validation_data = (X_test, y_test))
else:
print('* NOT using data augmentation')
info = model.fit(X_train,y_train, batch_size = batch_size,
epochs = epochs, verbose=1,
shuffle=True, validation_data=(X_test,y_test))
end = time.time()
print('Model took {:.2f} seconds to train'.format(end-start))
return info
......@@ -7,24 +7,27 @@
# import modules
# General
import numpy as np
import time, os, sys
import argparse
# DL stuff
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization
from keras import backend as K
from keras import optimizers
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
# Other
import numpy as np
import time, os, sys
import pandas as pd
from CellCounting.models import model_utils as mod
from CellCounting.utils import db
from CellCounting.models import create_model
from keras.utils import np_utils
# ------------------------------ DATA ------------------------------ #
def prepare_data(celldb, args):
"""
Split data in celldb into train and test data
divide by 255
Normalise data and save the training mean and std to be applied to test data
Return X_train,y_train,X_test,y_test
"""
X_train, y_train, X_test, y_test = celldb.split_train_test(split=args.split)
# Normalise images
......@@ -54,18 +57,12 @@ def prepare_data(celldb, args):
# ------------------------------ MODEL ------------------------------ #
def build_model(shape,arch='convnet_1'):
model = create_model.from_name(shape,arch)
# compile model
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
return model
def accuracy(test_x, test_y, model):
pred = model.predict(test_x)
"""
Precision and recall
"""
pred = mod.predict(test_x)
TP = ((pred[:,1]>pred[:,0])&(test_y[:,1]>test_y[:,0])).sum()
FP = ((pred[:,1]>pred[:,0])&(test_y[:,1]<test_y[:,0])).sum()
......@@ -77,132 +74,95 @@ def accuracy(test_x, test_y, model):
return prec,recall
def train_model(model, celldb, args):
# train model
X_train, y_train, X_test, y_test = prepare_data(celldb, args)
start = time.time()
if args.gpu==True:
K.tensorflow_backend._get_available_gpus()
print('* Running forward pass on GPU (CUDA_VISIBLE_DEVICES)')
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print('* Running forward pass on CPU')
# data augmentation
DataAugment = args.augment
# Fitting params
epochs = args.epochs
batch_size = args.batch_size
if(DataAugment):
print('* Using data augmentation')
datagen = ImageDataGenerator(
rotation_range=90, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=.25, # randomly shift images horizontally (fraction of total width)
height_shift_range=.25, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=True) # randomly flip images
datagen.fit(X_train)
info = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
epochs = epochs, verbose=1,
shuffle=True, validation_data = (X_test, y_test))
else:
print('* NOT using data augmentation')
info = model.fit(X_train,y_train, batch_size = batch_size,
epochs = epochs, verbose=1,
shuffle=True, validation_data=(X_test,y_test))
end = time.time()
print('Model took {:.2f} seconds to train'.format(end-start))
prec, recall = accuracy(X_test,y_test,model)
print('Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'.format(prec,recall))
return info
# SAVE MODEL AND FITTING HISTORY
def save_results(model, info, args):
if not os.path.exists(args.out):
os.makedirs(args.out)
outfile = os.path.join(args.out,'model.h5')
model.save(outfile)
outhist = os.path.join(args.out,'model_hist.csv')
df = pd.DataFrame(info.history)
df.to_csv(outhist)
return
# Input
# - List of DBs
# - Basename folder for output
# - options for the fitting
# - GPU/Augmentation/ModelType?/train-test split/etc.
# Output
# - model.h5
# - model history
# - normalisation sub-folder
def main():
p = argparse.ArgumentParser(description='Train model on some data')
# Optional arguments
p.add_argument('--gpu', default=False, type=lambda s: s.lower() in ['true', 't', 'yes', '1'], metavar='<bool>',
help='try to use GPU if True (default), use CPU if False')
p.add_argument('--epochs', default=100, type=int, metavar='<int>',
help='number of training epochs (default=100)')
p.add_argument('--batch_size', default=32, type=int, metavar='<int>',
help='batch size (default=32)')
p.add_argument('--split', default=0.1, type=float, metavar='<float>',
help='train/test split (default=0.1)')
p.add_argument('--model', default='convnet', type=str, metavar='<str>',
help='choose model amongst [convet,...] (default=convnet)')
p.add_argument('--load_model', default=None, type=str, metavar='<str>.h5',
help='load pretrained model')
p.add_argument('--augment', default=False, type=lambda s: s.lower() in ['true', 't', 'yes', '1'], metavar='<bool>',
help='use data augmentation (default=False)')
required = p.add_argument_group('Required arguments')
optional = p.add_argument_group('Optional arguments')
# Required arguments
required = p.add_argument_group('Required arguments')
required.add_argument('-d', '--data', required=True, type=str, nargs='+', metavar='<str>npz',
help='input databases')
required.add_argument('-o', '--out', required=True, type=str, metavar='<str>',
help='output basename')
# Parse arguments
# Optional arguments
optional.add_argument('--use_gpu', action="store_true",
help='try to use GPU')
optional.add_argument('--epochs', default=100, type=int, metavar='<int>',
help='number of training epochs (default=100)')
optional.add_argument('--batch_size', default=32, type=int, metavar='<int>',
help='batch size (default=32)')
optional.add_argument('--split', default=0.1, type=float, metavar='<float>',
help='train/test split (default=0.1)')
optional.add_argument('--model', default='convnet', type=str, metavar='<str>',
help='choose model amongst [convet,...] (default=convnet)')
optional.add_argument('--load_model', default=None, type=str, metavar='<str>.h5',
help='load pretrained model')
optional.add_argument('--augment', action="store_true",
help='use data augmentation (default=False)')
optional.add_argument('--verbose',action="store_true",
help='spit out verbose info')
# ---------- Parse arguments ----------- #
args = p.parse_args()
# Do the work
print('* Preparing image database')
# ---------- Prepare the data ----------- #
if args.verbose :
print('* Preparing image database')
celldb = db.CellDB()
celldb.load_from_files(args.data)
celldb.equalise_classes()
celldb.summary()
print('* Preparing and training model')
if args.verbose :
print('* Preparing data')
X_train, y_train, X_test, y_test = prepare_data(celldb, args)
# ---------- Begin training ----------- #
if args.verbose :
print('* Preparing and training model')
shape = celldb.images.shape[1:]
if args.load_model is not None:
print('** Loading pretrained model')
if args.verbose :
print('** Loading pretrained model')
from keras.models import load_model
model = load_model(args.load_model)
else:
model = build_model(shape,args.model)
info = train_model(model, celldb, args)
model = mod.build_model(shape,args.model)
TrainingArgs = {'gpu':args.gpu,'augment':args.augment,'verbose':args.verbose}
info = mod.train_model(model = model,
data = [X_train,y_train,X_test,y_test],
**TrainingArgs)
prec, recall = accuracy(X_test,y_test,model)
if args.verbose :
print('Accuracy on test data: precision = {:.2f}, recall = {:,.2f}'.format(prec,recall))
if args.verbose :
print('* Saving results')
if not os.path.exists(args.out):
os.makedirs(args.out)
outfile = os.path.join(args.out,'model.h5')
model.save(outfile)
outhist = os.path.join(args.out,'model_hist.csv')
df = pd.DataFrame(info.history)
df.to_csv(outhist)
print('* Saving results')
save_results(model,info, args)
print('Done')
if args.verbose :
print('Done')
if __name__ == '__main__':
main()
......
#!/usr/bin/env python
#from PIL import Image
import tifffile as tif
import numpy as np
import matplotlib.pyplot as plt
import os
import os.path as op
import re
import glob
import argparse
import shutil
import random
class CellPicker(object):
def __init__(self, figure, axis):
self.points = []
self.figure = figure
self.axis = axis
self.figure.canvas.mpl_connect('button_press_event', self.on_press)
self.figure.canvas.mpl_connect('key_press_event', self.on_key_undo)
self.figure.canvas.mpl_connect('key_press_event', self.on_key_quit)
self.figure.canvas.mpl_connect('key_press_event', self.on_key_dunno)
def on_press(self, ev):
self.points.append((ev.xdata, ev.ydata))
points_array = np.array(self.points)
self.axis.plot(points_array[:, 0],
points_array[:, 1],
marker='o', linestyle='None', markersize=5, color="red")
self.figure.canvas.draw()
def on_key_quit(self, event):
if event.key == 'q':
plt.close()
exit()
def on_key_undo(self, event):
if event.key == 'u':
self.points.pop(-1)
points_array = np.array(self.points)
self.axis.clear()
self.axis.plot(points_array[:, 0],
points_array[:, 1], marker='o',
linestyle='None', markersize=5, color="red")
self.figure.canvas.draw()
def on_key_dunno(self, event):
if event.key == 'n':
plt.close()
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(
"Click on cells and save to txt file"
)
parser.add_argument("-i","--input_folder",
required=True,
help="Input folder (_splitted).")
parser.add_argument("-o","--output_cell_coordinates",
required=True,
help="Output file name.")
parser.add_argument("--shuffle", action='store_true', default=False,
dest='shuffle',help="Load sub-images in random order.")
parser.add_argument("--append", action='store_true', default=False,
dest='append',help="Append results to output file.")
parser.add_argument("--empty_zone", action='store_true', default=False,
dest="empty_zone",help="Entire zone is empty")
args = parser.parse_args()
# Find Numpy array files
infolder = args.input_folder
outfile = args.output_cell_coordinates
files = glob.glob(op.join(infolder,'*_w_*_h_*.npy'))
if args.shuffle == True:
random.shuffle(files)
create_header = True
if op.exists(outfile):
if args.append == True:
create_header = False
else:
print("File {} exists. Overwrite/Append/Exit?[O,A,E]".format(outfile))
response = input()
if response.upper() == "O":
os.remove(outfile)
elif response.upper() == "E":
print("Exiting without doing anything")
exit()
elif response.upper() == "A":
create_header = False
if create_header == True:
with open(outfile,'w') as f:
f.write('Sub-Image-File\tXcoord\tYcoord\n')
counter = 0
for file in files:
#First, get h and w values corresponding to sub-image from filename.
#This is in order to export x,y coordinates relative to original brain_slice image.
res = re.findall("w_(\d+).(\d+)_h_(\d+).(\d+)", file)[0]
w = round(float(res[0]+"."+res[1]))
h = round(float(res[2]+"."+res[3]))
if args.empty_zone == True:
with open(outfile,'a') as f:
f.write('%s\tNaN\tNaN\n' %file)
else:
#prepare figure and load .npy files in as an image, ready to show
fig = plt.figure()
ax = fig.add_subplot(111)
im = ax.imshow(np.load(file))
ax.set_xlim(ax.get_xlim())
ax.set_ylim(ax.get_ylim())
#Instantiate CellPicker class
p = CellPicker(fig,ax)
counter += 1
ax.set_title("Click on cells. \n"
"'q'=quit, 'u'=undo, 'n'=dunno \n"
"Close if you can't see any cells \n"
"Processed {}/{}".format(counter,len(files)))
#Once past this we move to the next image...
plt.show()
# Write x,y coordinates to the output file,
# adding h and w values in order to translate into coordinates
# of original image.
with open(outfile,'a') as f:
if not p.points:
f.write('%s\tNaN\tNaN\n' %file)
else:
for point in p.points:
f.write('%20s \t %12f \t %12f \n' % (os.path.abspath(file), point[1] + w, point[0] + h))
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import argparse
import numpy as np
import pandas as pd
import re
DB_IMAGE_RES = 64
def check_imshape(shape):
sx, sy, _ = shape
if sx != sy:
return False
if (sx % DB_IMAGE_RES != 0) or (sy % DB_IMAGE_RES !=0):
return False
return True
def append_file_content(fname,image_list,count_list):
df = pd.read_table(fname)
udf = df.groupby('Sub-Image-File').count()
for f in udf.index:
# Load Numpy array
im = np.load(f.strip())
if not check_imshape(im.shape):
print("Error: Bad Image dimensions. Must be square and multiple of {}".format(DB_IMAGE_RES))
sizx, sizy, _ = im.shape
# Split into sub-zones
size_ratio = sizx//DB_IMAGE_RES
im2 = im.reshape(size_ratio,sizx//size_ratio,size_ratio,sizy//size_ratio,3)
im3 = im2.transpose(0,2,1,3,4).reshape(size_ratio**2,sizx//size_ratio,sizy//size_ratio,3)
image_list.append(im3)
res = re.findall("w_(\d+).(\d+)_h_(\d+).(\d+)", f)[0]
W = round(float(res[0]+"."+res[1]))
H = round(float(res[2]+"."+res[3]))
count_cells = np.zeros((size_ratio, size_ratio),dtype=int)
for indiv_cells in df.values[df['Sub-Image-File']==f]:
if np.isnan(indiv_cells[1]):
pass
else:
w = float(indiv_cells[1])-W
h = float(indiv_cells[2])-H
count_cells[int(w//(sizx/size_ratio)),int(h//(sizy/size_ratio))] += 1
count_list.append(count_cells.flatten())
return len(udf.index)
def create_db(file_list,outfile):
image_list = []
count_list = []
total = 0
for f in file_list:
total += append_file_content(f,image_list,count_list)
shape = image_list[0].shape[1:]
count_list = np.array(count_list).flatten()