from scipy import io
import numpy as np
from sklearn import cross_validation as scv
# Do "python setup.py develop" or add nnet to "sys.path"
from nnet import NeuralNetBuilder
import nnet.preprocessing as pre
data_path = "../digit-dataset/train.mat"
data = io.loadmat(data_path)
data_size = len(data['train_images'][0][0])
images = np.array([data['train_images'][:,:,i].flatten() for i in xrange(data_size)])
labels = np.array([data['train_labels'][i][0] for i in xrange(data_size)])
train_images, validation_images, train_labels, validation_labels =\
scv.train_test_split(images, labels, test_size=0.0, random_state=1117)
train_set_size = len(train_images)
validation_set_size = len(validation_images)
print "Train set size: {0} | Validation set size {1}" \
.format(train_set_size, validation_set_size)
reg = pre.Normalize()
train_images = reg.fit(train_images)
validation_images = reg.transform(validation_images)
import cudamat as cm
cm.cublas_init()
net_builder = NeuralNetBuilder()
net_builder.add_fully_connected_layer(784, 'Dummy', use_dropout=True, dropout_p=0.8,
use_max_norm=True, max_norm_c=2,
use_momentum=True, momentum=0.95)\
.add_fully_connected_layer(1024, 'ReLU', use_dropout=True,
use_max_norm=True, max_norm_c=2,
use_momentum=True, momentum=0.95)\
.add_fully_connected_layer(1024, 'ReLU', use_dropout=True,
use_max_norm=True, max_norm_c=2,
use_momentum=True, momentum=0.95)\
.add_fully_connected_layer(1024, 'ReLU', use_dropout=True,
use_max_norm=True, max_norm_c=2,
use_momentum=True, momentum=0.95)\
.add_output_layer(10, 'Sigmoid', 'CEE')\
.add_batch_size(200)\
.add_dynamic_step_size_lr_func(1e-2, 1e-1, 4, 1e-5)\
.add_max_epoch_stopping_criteria(3500)\
.add_min_improve_stopping_criteria(6, 1e-8)\
.add_status_period(5000)
net = net_builder.build()
losses = net.train(train_images, train_labels)
%matplotlib inline
import matplotlib.pyplot as plt
import os
fig_directory = '../figures'
if not os.path.exists(fig_directory):
os.makedirs(fig_directory)
fig_size = (6, 4.5)
fig_dpi = 200
title = 'Training Error Rate Plot of Cross Entropy Error\n'
title += 'Mini-batch: 200; Max-norm: 2\n'
title += 'Layer size: 784, 1024, 1024, 1024\n'
title += 'Momentum: 0.95, 0.95, 0.95, 0.95\n'
title += 'Dropout rate: 0.8, 0.5, 0.5, 0.5\n'
title += 'Dynamic Step Size LR: (1e-2, 1e-1, 4, 1e-5)\n'
title += 'Sigma: sqrt(2/n)\n'
title += 'Training Error rate is {0} at epoch {1}'.format(round(1.0 - losses[0,2],2),
int(losses[0,0]))
fig = plt.figure(figsize=fig_size)
plt.plot(losses[1:,0], 1.0 - losses[1:,2])
plt.title(title)
plt.xlabel('Number of epochs')
plt.ylabel('Training Error rate')
plt.grid()
plt.show()
fig.savefig('{0}/k5_epoch_vs_error_rate.png'.format(fig_directory),
dpi=fig_dpi,
bbox_inches='tight')
title = 'Training Loss of Cross Entropy Error\n'
title += 'Mini-batch: 200; Max-norm: 2\n'
title += 'Layer size: 784, 1024, 1024, 1024\n'
title += 'Momentum: 0.95, 0.95, 0.95, 0.95\n'
title += 'Dropout rate: 0.8, 0.5, 0.5, 0.5\n'
title += 'Dynamic Step Size LR: (1e-2, 1e-1, 4, 1e-5)\n'
title += 'Sigma: sqrt(2/n)\n'
title += 'Training Loss is {0} at epoch {1}'.format(round(losses[0,3],2),
int(losses[0,0]))
fig = plt.figure(figsize=fig_size)
plt.plot(losses[1:,0], losses[1:,3])
plt.title(title)
plt.xlabel('Number of epochs')
plt.ylabel('Training Loss')
plt.grid()
plt.show()
fig.savefig('{0}/k5_epoch_vs_loss.png'.format(fig_directory),
dpi=fig_dpi,
bbox_inches='tight')
title = 'Training Error Rate Plot of Cross Entropy Error\n'
title += 'Mini-batch: 200; Max-norm: 2\n'
title += 'Layer size: 784, 1024, 1024, 1024\n'
title += 'Momentum: 0.95, 0.95, 0.95, 0.95\n'
title += 'Dropout rate: 0.8, 0.5, 0.5, 0.5\n'
title += 'Dynamic Step Size LR: (1e-2, 1e-1, 4, 1e-5)\n'
title += 'Sigma: sqrt(2/n)\n'
title += 'Training Error rate is {0} at {1} seconds'.format(round(1.0 - losses[0,2],2),
round(losses[0,4],2))
fig = plt.figure(figsize=fig_size)
plt.plot(losses[1:,4], 1.0 - losses[1:,2])
plt.title(title)
plt.xlabel('Training time (seconds)')
plt.ylabel('Training Error rate')
plt.grid()
plt.show()
fig.savefig('{0}/k5_time_vs_error_rate.png'.format(fig_directory),
dpi=fig_dpi,
bbox_inches='tight')
title = 'Training Loss of Cross Entropy Error\n'
title += 'Mini-batch: 200; Max-norm: 2\n'
title += 'Layer size: 784, 1024, 1024, 1024\n'
title += 'Momentum: 0.95, 0.95, 0.95, 0.95\n'
title += 'Dropout rate: 0.8, 0.5, 0.5, 0.5\n'
title += 'Dynamic Step Size LR: (1e-2, 1e-1, 4, 1e-5)\n'
title += 'Sigma: sqrt(2/n)\n'
title += 'Training Loss is {0} at {1} seconds'.format(round(losses[0,3],2),
round(losses[0,4],2))
fig = plt.figure(figsize=fig_size)
plt.plot(losses[1:,4], losses[1:,3])
plt.title(title)
plt.xlabel('Training time (seconds)')
plt.ylabel('Training Loss')
plt.grid()
plt.show()
fig.savefig('{0}/k5_time_vs_loss.png'.format(fig_directory),
dpi=fig_dpi,
bbox_inches='tight')
test_data_path = "../digit-dataset/test.mat"
test_data = io.loadmat(test_data_path)
test_data_size = len(test_data['test_images'][0][0])
test_images = reg.transform(np.array([test_data['test_images'][:,:,i].flatten() for i in xrange(test_data_size)]))
predictions = net.predict(test_images)
import csv
kaggle_result_path = './kaggle5_predictions.csv'
with open(kaggle_result_path, 'w') as fp:
writer = csv.writer(fp, delimiter=',')
writer.writerow(['Id', 'Category'])
for i in xrange(len(predictions)):
writer.writerow([i+1, int(predictions[i])])
net.dump('kaggle5.dump', reg)
cm.cublas_shutdown()