diff --git a/resnet-1470729826.pkl b/resnet-1470729826.pkl new file mode 100644 index 0000000..a39d212 Binary files /dev/null and b/resnet-1470729826.pkl differ diff --git a/resnet.py b/resnet.py new file mode 100755 index 0000000..bd90b32 --- /dev/null +++ b/resnet.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 + +import keras.backend as K +assert K.image_dim_ordering() == 'th' + +import pickle, time +import sys +import numpy as np +from keras.callbacks import LearningRateScheduler +from keras.datasets import mnist +from keras.layers import BatchNormalization +from keras.layers import Convolution2D, MaxPooling2D +from keras.layers import Flatten, Reshape +from keras.layers import Input, merge, Dense, Activation +from keras.models import Model +from keras.utils.np_utils import to_categorical + +nb_classes = 10 +width = 28 +height = 28 +loss='categorical_crossentropy' + +name = 'resnet-{:.0f}'.format(time.time()) + +args = dict(enumerate(sys.argv)) +restore_fn = args.get(1) +if restore_fn == '.': # TODO: accept any directory + # just use most recent resnet-*.pkl file in directory + import os + is_valid = lambda fn: fn.startswith('resnet-') and fn.endswith('.pkl') + files = sorted([fn for fn in os.listdir(restore_fn) if is_valid(fn)]) + if len(files) == 0: + raise Exception("couldn't find any appropriate .pkl files in the CWD") + restore_fn = files[-1] + +dont_train = False +verbose_summary = False + +reslayers = 4 +size = 8 + +batch_size = 128 +epochs = 24 +convolutional = True +resnet_enabled = True +original_resnet = False +LR = 1e-2 +LRprod = 0.1**(1/20.) # will use a tenth of the learning rate after 20 epochs + +use_image_generator = True + +def prepare(X, y): + X = X.reshape(X.shape[0], 1, width, height).astype('float32') / 255 + # convert class vectors to binary class matrices + Y = to_categorical(y, nb_classes) + return X, Y + +# the data, shuffled and split between train and test sets +(X_train, y_train), (X_test, y_test) = mnist.load_data() +X_train, Y_train = prepare(X_train, y_train) +X_test, Y_test = prepare(X_test, y_test) + +if use_image_generator: + from keras.preprocessing.image import ImageDataGenerator + idg = ImageDataGenerator(rotation_range=5., + width_shift_range=.10, + height_shift_range=.10, + shear_range=5 / 180 * np.pi, + zoom_range=0.1, + fill_mode='constant', + cval=0.) + +# ReLU activation is supposed to be the best with he_normal +if convolutional: + layer = lambda x: Convolution2D(x, 3, 3, init='he_normal', border_mode='same') +else: + layer = lambda x: Dense(x, init='he_normal') + +# start construting the model +x = Input(shape=(1, width, height)) +y = x + +if convolutional: + # it might be worth trying other sizes here + y = Convolution2D(size, 7, 7, subsample=(2, 2), border_mode='same')(y) + y = MaxPooling2D()(y) +else: + y = Flatten()(y) + y = Dense(dense_size)(y) + +for i in range(reslayers): + skip = y + if original_resnet: + y = layer(size)(y) + y = BatchNormalization(axis=1)(y) + y = Activation('relu')(y) + y = layer(size)(y) + y = BatchNormalization(axis=1)(y) + if resnet_enabled: y = merge([skip, y], mode='sum') + y = Activation('relu')(y) + else: + y = BatchNormalization(axis=1)(y) + y = Activation('relu')(y) + y = layer(size)(y) + y = BatchNormalization(axis=1)(y) + y = Activation('relu')(y) + y = layer(size)(y) + if resnet_enabled: y = merge([skip, y], mode='sum') + +if convolutional: + from keras.layers import AveragePooling1D + y = Reshape((size, int(width * height / 2**2 / 2**2)))(y) + y = AveragePooling1D(size)(y) + y = Flatten()(y) + +y = Dense(nb_classes)(y) +y = Activation('softmax')(y) + +model = Model(input=x, output=y) + +if verbose_summary: + model.summary() +else: + total_params = 0 + for layer in model.layers: + total_params += layer.count_params() + print("Total params: {}".format(total_params)) + +if restore_fn: + with open(restore_fn, 'rb') as f: + W = pickle.loads(f.read()) + if not dont_train: + # sparsify an existing model + for i, w in enumerate(W): + if w.shape == (size, size, 3, 3): + middle = np.median(np.abs(w.flat)) + where = np.abs(w) < middle + total = np.prod(w.shape) + fmt = 'W[{}]: zeroing {} params of {}' + print(fmt.format(i, int(np.count_nonzero(where)), int(total))) + W[i] = np.where(where, 0, w) + model.set_weights(W) + LR /= 10 + +model.compile(loss=loss, optimizer='adam', metrics=['accuracy']) + +if not dont_train: + callbacks = [LearningRateScheduler(lambda e: LR * LRprod**e)] + + kwargs = dict( + nb_epoch=epochs, + validation_data=(X_test, Y_test), + callbacks=callbacks, + verbose=1 + ) + + if use_image_generator: + history = model.fit_generator(idg.flow(X_train, Y_train, batch_size=batch_size), + samples_per_epoch=len(X_train), **kwargs) + else: + history = model.fit(X_train, Y_train, batch_size=batch_size, + **kwargs) + +def evaluate(X, Y): + score = model.evaluate(X, Y, verbose=0) + for name, score in zip(model.metrics_names, score): + if name == "acc": + print("{:7} {:6.2f}%".format(name, score * 100)) + else: + print("{:7} {:7.5f}".format(name, score)) + +print('TRAIN') +evaluate(X_train, Y_train) + +print('TEST') +evaluate(X_test, Y_test) + +print('ALL') +evaluate(np.vstack((X_train, X_test)), np.vstack((Y_train, Y_test))) + +if not dont_train: + open(name+'.json', 'w').write(model.to_json()) + with open(name+'.pkl', 'wb') as f: + f.write(pickle.dumps(model.get_weights()))