Add readme, model download script, simple generate script

yusuketomoto · Apr 8, 2016 · 161fb43 · 161fb43
1 parent 6ece522
commit 161fb43
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -0,0 +1,32 @@
+# Chainer implementation of "Perceptual Losses for Real-Time Style Transfer and Super-Resolution"
+
+### Note:
+This repository is currently in progress.
+
+## Requirement
+- [Chainer](https://github.com/pfnet/chainer)
+```
+$ pip install chainer
+```
+
+## Prerequisite
+Download VGG16 model and convert it into smaller file so that we use only the convolution layers which are 10% of entire model.
+```
+sh setup_model.sh
+```
+
+## Train
+Need to train one image transformation network model per one style target.
+According to the paper, the models are trained on the [Microsoft COCO dataset](http://mscoco.org/dataset/#download).
+```
+python train.py -s <style_image_path> -d <training_dataset_path> -g 0
+```
+
+## Generate your own image
+```
+python generate.py <input_image_path> -m <model_path> -o <output_image_path>
+```
+
+## Reference
+- [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](arxiv.org/abs/1603.08155)
+- [chainer-gogh](https://github.com/mattya/chainer-gogh.git) chainer implementation of neural-style. I heavily referenced it, super helpful.
diff --git a/generate.py b/generate.py
@@ -0,0 +1,40 @@
+import numpy as np
+import argparse
+from PIL import Image
+import time
+
+import chainer
+from chainer import cuda, Variable, serializers
+from net import *
+
+parser = argparse.ArgumentParser(description='Real-time style transfer image generator')
+parser.add_argument('input')
+parser.add_argument('--gpu', '-g', default=-1, type=int,
+                    help='GPU ID (negative value indicates CPU)')
+parser.add_argument('--model', '-m', default='models/style.model', type=str)
+parser.add_argument('--out', '-o', default='out.jpg', type=str)
+args = parser.parse_args()
+
+model = FastStyleNet()
+serializers.load_npz(args.model, model)
+if args.gpu >= 0:
+    cuda.get_device(0).use()
+    model.to_gpu()
+xp = np if args.gpu < 0 else cuda.cupy
+
+image = xp.asarray(Image.open(args.input).convert('RGB'), dtype=xp.float32).transpose(2, 0, 1)
+image = image.reshape((1,) + image.shape)
+image -= 120
+x = Variable(image)
+
+start = time.time()
+y = model(x)
+result = cuda.to_cpu(y.data)
+
+result = result.transpose(0, 2, 3, 1)
+result = result.reshape((result.shape[1:]))
+result += 120
+result = np.uint8(result)
+print time.time() - start, 'sec'
+
+Image.fromarray(result).save(args.out)
diff --git a/models/.gitkeep b/models/.gitkeep
diff --git a/setup_model.sh b/setup_model.sh
@@ -0,0 +1,5 @@
+if [ ! -f VGG_ILSVRC_16_layers.caffemodel ]; then
+    wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel
+fi
+
+python create_chainer_model.py
diff --git a/train.py b/train.py
@@ -47,7 +47,7 @@ def gram_matrix(y):
 n_data = len(imagepaths)
 print 'num traning images:', n_data
 n_iter = n_data / args.batchsize
-print n_iter, 'iterations', n_epoch
+print n_iter, 'iterations,', n_epoch, 'epochs'
 
 model = FastStyleNet()
 vgg = VGG()