Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Classify a number of images at once, optionally using the selective
search window proposal method.
Do windowed detection by classifying a number of images/crops at once,
optionally using the selective search window proposal method.

This implementation follows
Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
Expand All @@ -12,7 +12,9 @@
https://github.com/sergeyk/selective_search_ijcv_with_python

TODO:
- [ ] batch up image filenames as well: don't want to load all of them into memory
- batch up image filenames as well: don't want to load all of them into memory
- refactor into class (without globals)
- update demo notebook with new options
"""
import numpy as np
import os
Expand All @@ -25,19 +27,18 @@
import selective_search_ijcv_with_python as selective_search
import caffe

IMAGE_DIM = 256
CROPPED_DIM = 227
IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2)
NET = None

CROP_MODES = ['center_only', 'corners', 'selective_search']
IMAGE_DIM = None
CROPPED_DIM = None
IMAGE_CENTER = None

IMAGE_MEAN = None
CROPPED_IMAGE_MEAN = None

# Load the imagenet mean file
IMAGENET_MEAN = np.load(
os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
CROPPED_IMAGENET_MEAN = IMAGENET_MEAN[
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, :]
NUM_OUTPUT = None

CROP_MODES = ['center_only', 'corners', 'selective_search']

def load_image(filename):
"""
Expand Down Expand Up @@ -70,20 +71,16 @@ def format_image(image, window=None, cropped_size=False):
"""
# Crop a subimage if window is provided.
if window is not None:
image = image[
window[0]:window[2],
window[1]:window[3]
]
image = image[window[0]:window[2], window[1]:window[3]]


# Resize to ImageNet size, convert to BGR, subtract mean.
# Resize to input size, convert to BGR, subtract mean.
image = image[:, :, ::-1]
if cropped_size:
image = skimage.transform.resize(image, (CROPPED_DIM, CROPPED_DIM)) * 255
image -= CROPPED_IMAGENET_MEAN
image -= CROPPED_IMAGE_MEAN
else:
image = skimage.transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255
image -= IMAGENET_MEAN
image -= IMAGE_MEAN

image = image.swapaxes(1, 2).swapaxes(0, 1)
return image
Expand Down Expand Up @@ -239,19 +236,14 @@ def assemble_batches(image_fnames, crop_mode='center_only', batch_size=10):
return df_batches


def compute_feats(images_df, layer='imagenet'):
if layer == 'imagenet':
num_output = 1000
else:
raise ValueError("Unknown layer requested: {}".format(layer))

def compute_feats(images_df):
num = images_df.shape[0]
input_blobs = [np.ascontiguousarray(
np.concatenate(images_df['image'].values), dtype='float32')]
output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
output_blobs = [np.empty((num, NUM_OUTPUT, 1, 1), dtype=np.float32)]
print(input_blobs[0].shape, output_blobs[0].shape)

caffenet.Forward(input_blobs, output_blobs)
NET.Forward(input_blobs, output_blobs)
feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]

# Add the features and delete the images.
Expand All @@ -260,55 +252,84 @@ def compute_feats(images_df, layer='imagenet'):
return images_df


def config(model_def, pretrained_model, gpu, image_dim, image_mean_file):
global IMAGE_DIM, CROPPED_DIM, IMAGE_CENTER, IMAGE_MEAN, CROPPED_IMAGE_MEAN
global NET, NUM_OUTPUT

# Initialize network by loading model definition and weights.
t = time.time()
print("Loading Caffe model.")
NET = caffe.CaffeNet(model_def, pretrained_model)
NET.set_phase_test()
if gpu:
NET.set_mode_gpu()
print("Caffe model loaded in {:.3f} s".format(time.time() - t))

# Configure for input/output data
IMAGE_DIM = image_dim
CROPPED_DIM = NET.blobs()[0].width
IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2)

# Load the data set mean file
IMAGE_MEAN = np.load(image_mean_file)


CROPPED_IMAGE_MEAN = IMAGE_MEAN[IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
:]
NUM_OUTPUT = NET.blobs()[-1].channels # number of output classes


if __name__ == "__main__":
## Parse cmdline options
# Parse cmdline options
gflags.DEFINE_string(
"model_def", "", "The model definition file.")
"model_def", "", "Model definition file.")
gflags.DEFINE_string(
"pretrained_model", "", "The pretrained model.")
"pretrained_model", "", "Pretrained model weights file.")
gflags.DEFINE_boolean(
"gpu", False, "use gpu for computation")
"gpu", False, "Switch for gpu computation.")
gflags.DEFINE_string(
"crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
gflags.DEFINE_string(
"images_file", "", "File that contains image filenames.")
"images_file", "", "Image filenames file.")
gflags.DEFINE_string(
"batch_size", 10, "Number of image crops to let through in one go")
gflags.DEFINE_string(
"output", "", "The output DataFrame HDF5 filename.")
"output_file", "", "Output DataFrame HDF5 filename.")
gflags.DEFINE_string(
"images_dim", 256, "Canonical dimension of (square) images.")
gflags.DEFINE_string(
"layer", "imagenet", "Layer to output.")
"images_mean_file",
os.path.join(os.path.dirname(__file__), '../imagenet/ilsvrc_2012_mean.npy'),
"Data set image mean (numpy array).")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

## Load list of image filenames and assemble into batches.

# Configure network, input, output
config(FLAGS.model_def, FLAGS.pretrained_model, FLAGS.gpu, FLAGS.images_dim,
FLAGS.images_mean_file)

# Load list of image filenames and assemble into batches.
t = time.time()
print('Assembling batches...')
with open(FLAGS.images_file) as f:
image_fnames = [_.strip() for _ in f.readlines()]
image_batches = assemble_batches(
image_fnames, FLAGS.crop_mode, FLAGS.batch_size)
print('{} batches assembled in {:.3f} s'.format(
len(image_batches), time.time() - t))

# Initialize network by loading model definition and weights.
t = time.time()
print("Loading Caffe model.")
caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
caffenet.set_phase_test()
if FLAGS.gpu:
caffenet.set_mode_gpu()
print("Caffe model loaded in {:.3f} s".format(time.time() - t))
image_batches = assemble_batches(image_fnames, FLAGS.crop_mode,
FLAGS.batch_size)
print('{} batches assembled in {:.3f} s'.format(len(image_batches),
time.time() - t))

# Process the batches.
t = time.time()
print 'Processing {} files in {} batches'.format(
len(image_fnames), len(image_batches))
print 'Processing {} files in {} batches'.format(len(image_fnames),
len(image_batches))
dfs_with_feats = []
for i in range(len(image_batches)):
if i % 10 == 0:
print('Batch {}/{}, elapsed time: {:.3f} s'.format(
i, len(image_batches), time.time() - t))
print('Batch {}/{}, elapsed time: {:.3f} s'.format(i,
len(image_batches),
time.time() - t))
dfs_with_feats.append(compute_feats(image_batches[i]))

# Concatenate, droppping the padding rows.
Expand All @@ -317,8 +338,8 @@ def compute_feats(images_df, layer='imagenet'):

# Write our the results.
t = time.time()
df.to_hdf(FLAGS.output, 'df', mode='w')
df.to_hdf(FLAGS.output_file, 'df', mode='w')
print("Done. Saving to {} took {:.3f} s.".format(
FLAGS.output, time.time() - t))
FLAGS.output_file, time.time() - t))

sys.exit()
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
"\n",
" wget http://farm1.static.flickr.com/220/512450093_7717fb8ce8.jpg\n",
" echo `pwd`/\"512450093_7717fb8ce8.jpg\" > image_cat.txt\n",
" python power_wrapper.py --images_file=image_cat.txt --crop_mode=selective_search --model_def=<path to imagenet_deploy.prototxt> --pretrained_model=<path to alexnet_train_iter_470000> --output=selective_cat.h5\n",
" python detector.py --images_file=image_cat.txt --crop_mode=selective_search --model_def=<path to imagenet_deploy.prototxt> --pretrained_model=<path to alexnet_train_iter_470000> --output=selective_cat.h5\n",
" \n",
" \n",
"Running this outputs an HDF5 file with the filenames, selected windows, and their ImageNet scores.\n",
"Of course, we only ran on one image, so the filenames will all be the same.\n",
"\n",
"In general, `power_wrapper` is most efficient when running on a lot of images: it first extracts window proposals for all of them, then batches the windows for efficient GPU processing, and then outputs the results.\n",
"Simply list an image per line in the `images_file`, and `power_wrapper` will process all of them."
"In general, `detector` is most efficient when running on a lot of images: it first extracts window proposals for all of them, then batches the windows for efficient GPU processing, and then outputs the results.\n",
"Simply list an image per line in the `images_file`, and `detector` will process all of them."
]
},
{
Expand Down
34 changes: 19 additions & 15 deletions python/caffe/imagenet/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@
from disk, using the imagenet classifier.
"""

from google.protobuf import text_format
import gzip
import numpy as np
import os
from skimage import io
from skimage import transform

import caffe
from caffe.proto import caffe_pb2

IMAGE_DIM = 256
CROPPED_DIM = 227
Expand All @@ -20,8 +17,9 @@
os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))


def oversample(image, center_only = False):
"""Oversamples an image. Currently the indices are hard coded to the
def oversample(image, center_only=False):
"""
Oversamples an image. Currently the indices are hard coded to the
4 corners and the center of the image, as well as their flipped ones,
a total of 10 images.

Expand All @@ -31,7 +29,7 @@ def oversample(image, center_only = False):
Output:
images: the output of size (10 x 3 x 227 x 227)
"""
image = image.swapaxes(1,2).swapaxes(0,1)
image = image.swapaxes(1, 2).swapaxes(0, 1)
indices = [0, IMAGE_DIM - CROPPED_DIM]
center = int(indices[1] / 2)
if center_only:
Expand All @@ -46,19 +44,19 @@ def oversample(image, center_only = False):
for j in indices:
images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM]
curr += 1
images[4] = image[
:, center:center + CROPPED_DIM,center:center + CROPPED_DIM]
images[4] = image[:, center:center + CROPPED_DIM,
center:center + CROPPED_DIM]
# flipped version
images[5:] = images[:5, :, :, ::-1]
return images


def prepare_image(filename, center_only = False):
def prepare_image(filename, center_only=False):
img = io.imread(filename)
if img.ndim == 2:
img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
elif img.shape[2] == 4:
img = img[:,:,:3]
img = img[:, :, :3]
# Resize and convert to BGR
img_reshape = (transform.resize(img, (IMAGE_DIM,IMAGE_DIM)) * 255)[:, :, ::-1]
# subtract main
Expand All @@ -67,11 +65,12 @@ def prepare_image(filename, center_only = False):


class ImageNetClassifier(object):
"""The ImageNetClassifier is a wrapper class to perform easier deployment
"""
The ImageNetClassifier is a wrapper class to perform easier deployment
of models trained on imagenet.
"""
def __init__(self, model_def_file, pretrained_model, center_only = False,
num_output=1000):
def __init__(self, model_def_file, pretrained_model, center_only=False,
num_output=1000):
if center_only:
num = 1
else:
Expand All @@ -87,7 +86,9 @@ def predict(self, filename):


def main(argv):
"""The main function will carry out classification."""
"""
The main function will carry out classification.
"""
import gflags
import glob
import time
Expand All @@ -99,15 +100,18 @@ def main(argv):
gflags.DEFINE_boolean("gpu", True, "use gpu for computation")
FLAGS = gflags.FLAGS
FLAGS(argv)

net = ImageNetClassifier(FLAGS.model_def, FLAGS.pretrained_model)

if FLAGS.gpu:
print 'Use gpu.'
net.caffenet.set_mode_gpu()

files = glob.glob(os.path.join(FLAGS.root, "*." + FLAGS.ext))
files.sort()
print 'A total of %d files' % len(files)
output = np.empty((len(files), net._output_blobs[0].shape[1]),
dtype=np.float32)
dtype=np.float32)
start = time.time()
for i, f in enumerate(files):
output[i] = net.predict(f)
Expand Down