下丸子のコネクショニスト

機械学習、人工知能などの話題について触れます。

非抽象的Chainer開発のススメ

研究はブラックボックスをなるべく排除してシンプルに行う必要がある.
筆者がv1系から利用しているChainerはv2系から大きく抽象化されており,公開されているexampleのとおりに使うにはあまり適当ではないと思う.
と言いつつ,いいかげんv1を使い続けるのにも支障がでてきている気がする.
そこで今回個人的に用いているChainerをv1から大きくv4にアップグレードしたので,mnistサンプルをv1系風に書き換えてみた.

TrainerやIteratorを使わないのでデータの流れが比較的分かりやすくなると思う.
Chainerは後方互換に注意して書かれている点が素晴らしい.

なおfunction hooksはoptimizer.update()から呼ばれるので,通常どおり使うことができる.

#!/usr/bin/env python
"""Chainer example: train a multi-layer perceptron on MNIST

This is a minimal example to write a feed-forward net. It requires scikit-learn
to load MNIST dataset.

"""
import argparse
import numpy as np
from sklearn.datasets import fetch_mldata
from chainer import cuda, Variable, optimizers, configuration
import chainer.functions  as F
import chainer.links as L
import chainer
import os

# Prepare multi-layer perceptron model
class MLP(chainer.Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred
            self.l1 = L.Linear(None, n_units)  # n_in -> n_units
            self.l2 = L.Linear(None, n_units)  # n_units -> n_units
            self.l3 = L.Linear(None, n_out)  # n_units -> n_out

    def __call__(self, x):
        h = F.relu(self.l1(x))
        h = F.dropout(F.relu(self.l2(h)))
        y = self.l3(h)
        return y

parser = argparse.ArgumentParser(description='Chainer example: MNIST')
parser.add_argument('--gpu', '-g', default=0, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--save_dir', '-o', default='/home/work/chainer', type=str,
                    help='save directory')
parser.add_argument('--resume', '-r', default=False, type=bool,
                    help='resume training or not')
parser.add_argument('--load_dir', '-i', default='/home/work/chainer', type=str,
                    help='load directory')
args = parser.parse_args()

batchsize = 100
n_epoch   = 10
n_units   = 1000

# Prepare dataset
print 'fetch MNIST dataset'
mnist = fetch_mldata('MNIST original')
mnist.data   = mnist.data.astype(np.float32)
mnist.data  /= 255
mnist.target = mnist.target.astype(np.int32)

N = 60000
x_train, x_test = np.split(mnist.data,   [N])
t_train, t_test = np.split(mnist.target, [N])
N_test = t_test.size

# Set up a neural network to train
# Classifier reports softmax cross entropy loss and accuracy at every
# iteration, which will be used by the PrintReport extension below.
model = MLP(n_units, 10)

if args.resume == True:
    chainer.serializers.load_npz(os.path.join(args.load_dir, 'mnist_mlp.npz'), model)

if args.gpu >= 0:
    # Make a specified GPU current
    chainer.backends.cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()

# Setup optimizer
optimizer = optimizers.Adam()
optimizer.setup(model)

# Learning loop
for epoch in xrange(1, n_epoch+1):
    print 'epoch', epoch

    # training
    perm = np.random.permutation(N)
    sum_accuracy = 0
    sum_loss = 0
    for i in xrange(0, N, batchsize):
        x = x_train[perm[i:i+batchsize]]
        t = t_train[perm[i:i+batchsize]]
        if args.gpu >= 0:
            x = cuda.to_gpu(x)
            t = cuda.to_gpu(t)
        x = Variable(x)
        t = Variable(t)
        y = model(x)
        model.cleargrads()
        loss, acc = F.softmax_cross_entropy(y, t), F.accuracy(y, t)
        loss.backward()
        optimizer.update()
        sum_loss     += float(cuda.to_cpu(loss.data)) * batchsize
        sum_accuracy += float(cuda.to_cpu(acc.data)) * batchsize
        del loss

    print 'train mean loss={}, accuracy={}'.format(
        sum_loss / N, sum_accuracy / N)

    # evaluation
    with configuration.using_config('train', False):
        sum_accuracy = 0
        sum_loss     = 0
        for i in xrange(0, N_test, batchsize):
            x = x_test[i:i+batchsize]
            t = t_test[i:i+batchsize]
            if args.gpu >= 0:
                x = cuda.to_gpu(x)
                t = cuda.to_gpu(t)
            y = model(x)
            loss, acc = F.softmax_cross_entropy(y, t), F.accuracy(y, t)
            sum_loss     += float(cuda.to_cpu(loss.data)) * batchsize
            sum_accuracy += float(cuda.to_cpu(acc.data)) * batchsize
    
        print 'test  mean loss={}, accuracy={}'.format(
            sum_loss / N_test, sum_accuracy / N_test)

if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)
chainer.serializers.save_npz(os.path.join(args.save_dir, 'mnist_mlp.npz'), model)