Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions include/caffe/util/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ inline bool ReadImageToDatum(const string& filename, const int label,
}

template <typename Dtype>
void load_2d_dataset(
void hd5_load_nd_dataset(
hid_t file_id, const char* dataset_name_,
boost::scoped_ptr<Dtype>* array, hsize_t* dims);
int min_dim,//inclusive
int max_dim,//inclusive
//output:
boost::scoped_ptr<Dtype>* array,
std::vector<hsize_t>& dims
);

} // namespace caffe

Expand Down
10 changes: 5 additions & 5 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ class HDF5DataLayer : public Layer<Dtype> {
virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);

boost::scoped_ptr<Dtype> data;
boost::scoped_ptr<Dtype> label;
hsize_t data_dims[2];
hsize_t label_dims[2];
hsize_t current_row;
boost::scoped_ptr<Dtype> data_;
boost::scoped_ptr<Dtype> label_;
std::vector<hsize_t> data_dims_;
std::vector<hsize_t> label_dims_;
hsize_t current_row_;
};


Expand Down
56 changes: 41 additions & 15 deletions src/caffe/layers/hdf5_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,36 @@ void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
const char* hdf_filename = this->layer_param_.source().c_str();
LOG(INFO) << "Loading HDF5 file" << hdf_filename;
hid_t file_id = H5Fopen(hdf_filename, H5F_ACC_RDONLY, H5P_DEFAULT);
load_2d_dataset(file_id, "data", &data, data_dims);
load_2d_dataset(file_id, "label", &label, label_dims);
if (file_id < 0) {
LOG(ERROR) << "Failed opening HDF5 file" << hdf_filename;
return;
}
const int MAX_DATA_DIM = 4;
const int MAX_LABEL_DIM = 2;
const int MIN_DIM = 2;
hd5_load_nd_dataset(file_id, "data", MIN_DIM, MAX_DATA_DIM,
&data_, data_dims_);
hd5_load_nd_dataset(file_id, "label", MIN_DIM, MAX_LABEL_DIM,
&label_, label_dims_);

while(data_dims_.size() < MAX_DATA_DIM) {
data_dims_.push_back(1);
}

//add missing dimensions:
label_dims_.push_back(1);
label_dims_.push_back(1);

herr_t status = H5Fclose(file_id);
assert(data_dims[0] == label_dims[0]);
current_row = 0;
CHECK_EQ(data_dims_[0], label_dims_[0]);
LOG(INFO) << "Successully loaded " << data_dims_[0] << " rows";
current_row_ = 0;

// Reshape blobs.
(*top)[0]->Reshape(this->layer_param_.batchsize(), data_dims[1], 1, 1);
(*top)[1]->Reshape(this->layer_param_.batchsize(), label_dims[1], 1, 1);
(*top)[0]->Reshape(this->layer_param_.batchsize(),
data_dims_[1], data_dims_[2], data_dims_[3]);
(*top)[1]->Reshape(this->layer_param_.batchsize(),
label_dims_[1], label_dims_[2], label_dims_[3]);
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
Expand All @@ -50,18 +71,23 @@ template <typename Dtype>
void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const int batchsize = this->layer_param_.batchsize();
for (int i = 0; i < batchsize; ++i, ++current_row) {
if (current_row == data_dims[0]) {
current_row = 0;
const int data_count = (*top)[0]->count() / (*top)[0]->num();
const int label_data_count = (*top)[1]->count() / (*top)[1]->num();

//TODO: consolidate into a single memcpy call

for (int i = 0; i < batchsize; ++i, ++current_row_) {
if (current_row_ == data_dims_[0]) {
current_row_ = 0;
}

memcpy(&(*top)[0]->mutable_cpu_data()[i * data_dims[1]],
&(data.get()[current_row * data_dims[1]]),
sizeof(Dtype) * data_dims[1]);
memcpy(&(*top)[0]->mutable_cpu_data()[i * data_count],
&(data_.get()[current_row_ * data_count]),
sizeof(Dtype) * data_count);

memcpy(&(*top)[1]->mutable_cpu_data()[i * label_dims[1]],
&(label.get()[current_row * label_dims[1]]),
sizeof(Dtype) * label_dims[1]);
memcpy(&(*top)[1]->mutable_cpu_data()[i * label_data_count],
&(label_.get()[current_row_ * label_data_count]),
sizeof(Dtype) * label_data_count);
}
}

Expand Down
21 changes: 12 additions & 9 deletions src/caffe/layers/hdf5_data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,24 @@ template <typename Dtype>
void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const int batchsize = this->layer_param_.batchsize();
for (int i = 0; i < batchsize; ++i, ++current_row) {
if (current_row == data_dims[0]) {
current_row = 0;
const int data_count = (*top)[0]->count() / (*top)[0]->num();
const int label_data_count = (*top)[1]->count() / (*top)[1]->num();

for (int i = 0; i < batchsize; ++i, ++current_row_) {
if (current_row_ == data_dims_[0]) {
current_row_ = 0;
}

CUDA_CHECK(cudaMemcpy(
&(*top)[0]->mutable_gpu_data()[i * data_dims[1]],
&(data.get()[current_row * data_dims[1]]),
sizeof(Dtype) * data_dims[1],
&(*top)[0]->mutable_gpu_data()[i * data_count],
&(data_.get()[current_row_ * data_count]),
sizeof(Dtype) * data_count,
cudaMemcpyHostToDevice));

CUDA_CHECK(cudaMemcpy(
&(*top)[1]->mutable_gpu_data()[i * label_dims[1]],
&(label.get()[current_row * label_dims[1]]),
sizeof(Dtype) * label_dims[1],
&(*top)[1]->mutable_gpu_data()[i * label_data_count],
&(label_.get()[current_row_ * label_data_count]),
sizeof(Dtype) * label_data_count,
cudaMemcpyHostToDevice));
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/caffe/test/test_data/generate_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

num_cols = 8
num_rows = 10
data = np.arange(num_cols * num_rows).reshape(num_rows, num_cols)
height = 5
width = 5
data = np.arange(num_cols * num_rows * height * width).reshape(num_rows, num_cols, height, width)
label = np.arange(num_rows)[:, np.newaxis]
print data
print label
Expand Down
Binary file modified src/caffe/test/test_data/sample_data.h5
Binary file not shown.
38 changes: 26 additions & 12 deletions src/caffe/test/test_hdf5data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,32 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
param.set_source(*(this->filename));
int num_rows = 10;
int num_cols = 8;
int height = 5;
int width = 5;
HDF5DataLayer<TypeParam> layer(param);

// Test that the layer setup got the correct parameters.
layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
EXPECT_EQ(this->blob_top_data_->num(), batchsize);
EXPECT_EQ(this->blob_top_data_->channels(), num_cols);
EXPECT_EQ(this->blob_top_data_->height(), 1);
EXPECT_EQ(this->blob_top_data_->width(), 1);
EXPECT_EQ(this->blob_top_data_->height(), height);
EXPECT_EQ(this->blob_top_data_->width(), width);

EXPECT_EQ(this->blob_top_label_->num(), batchsize);
EXPECT_EQ(this->blob_top_label_->channels(), 1);
EXPECT_EQ(this->blob_top_label_->height(), 1);
EXPECT_EQ(this->blob_top_label_->width(), 1);

const int data_size = num_cols * height * width;

// Go through the data 100 times.
for (int iter = 0; iter < 100; ++iter) {
layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);

// On even iterations, we're reading the first half of the data.
// On odd iterations, we're reading the second half of the data.
int label_offset = (iter % 2 == 0) ? 0 : batchsize;
int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols;
int data_offset = (iter % 2 == 0) ? 0 : batchsize * data_size;

for (int i = 0; i < batchsize; ++i) {
EXPECT_EQ(
Expand All @@ -94,10 +98,15 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
}
for (int i = 0; i < batchsize; ++i) {
for (int j = 0; j < num_cols; ++j) {
EXPECT_EQ(
data_offset + i * num_cols + j,
this->blob_top_data_->cpu_data()[i * num_cols + j])
<< "debug: i " << i << " j " << j;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
int idx = i * num_cols * height * width + j * height * width + h * width + w;
EXPECT_EQ(
data_offset + idx,
this->blob_top_data_->cpu_data()[idx])
<< "debug: i " << i << " j " << j;
}
}
}
}
}
Expand All @@ -111,7 +120,7 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
// On even iterations, we're reading the first half of the data.
// On odd iterations, we're reading the second half of the data.
int label_offset = (iter % 2 == 0) ? 0 : batchsize;
int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols;
int data_offset = (iter % 2 == 0) ? 0 : batchsize * data_size;

for (int i = 0; i < batchsize; ++i) {
EXPECT_EQ(
Expand All @@ -120,10 +129,15 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
}
for (int i = 0; i < batchsize; ++i) {
for (int j = 0; j < num_cols; ++j) {
EXPECT_EQ(
data_offset + i * num_cols + j,
this->blob_top_data_->cpu_data()[i * num_cols + j])
<< "debug: i " << i << " j " << j;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
int idx = i * num_cols * height * width + j * height * width + h * width + w;
EXPECT_EQ(
data_offset + idx,
this->blob_top_data_->cpu_data()[idx])
<< "debug: i " << i << " j " << j;
}
}
}
}
}
Expand Down
45 changes: 33 additions & 12 deletions src/caffe/util/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <algorithm>
#include <string>
#include <vector>
#include <fstream> // NOLINT(readability/streams)

#include "caffe/common.hpp"
Expand Down Expand Up @@ -100,39 +101,59 @@ bool ReadImageToDatum(const string& filename, const int label,
}

template <>
void load_2d_dataset<float>(hid_t file_id, const char* dataset_name_,
boost::scoped_ptr<float>* array, hsize_t* dims) {
void hd5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
int min_dim, int max_dim,
boost::scoped_ptr<float>* array, std::vector<hsize_t>& out_dims) {
herr_t status;

int ndims;
status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
assert(ndims == 2);
CHECK_GE(ndims, min_dim);
CHECK_LE(ndims, max_dim);

boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);

H5T_class_t class_;
status = H5LTget_dataset_info(
file_id, dataset_name_, dims, &class_, NULL);
assert(class_ == H5T_NATIVE_FLOAT);
file_id, dataset_name_, dims.get(), &class_, NULL);
CHECK_EQ(class_, H5T_FLOAT) << "Epected float data";

int array_size = 1;
for (int i=0; i<ndims; ++i) {
out_dims.push_back(dims.get()[i]);
array_size *= dims.get()[i];
}

array->reset(new float[dims[0] * dims[1]]);
array->reset(new float[array_size]);
status = H5LTread_dataset_float(
file_id, dataset_name_, array->get());
}

template <>
void load_2d_dataset<double>(hid_t file_id, const char* dataset_name_,
boost::scoped_ptr<double>* array, hsize_t* dims) {
void hd5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
int min_dim, int max_dim,
boost::scoped_ptr<double>* array, std::vector<hsize_t>& out_dims) {
herr_t status;

int ndims;
status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
assert(ndims == 2);
CHECK_GE(ndims, min_dim);
CHECK_LE(ndims, max_dim);

boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);

H5T_class_t class_;
status = H5LTget_dataset_info(
file_id, dataset_name_, dims, &class_, NULL);
assert(class_ == H5T_NATIVE_DOUBLE);
file_id, dataset_name_, dims.get(), &class_, NULL);
CHECK_EQ(class_, H5T_FLOAT) << "Epected float data";

int array_size = 1;
for (int i=0; i<ndims; ++i) {
out_dims.push_back(dims.get()[i]);
array_size *= dims.get()[i];
}

array->reset(new double[dims[0] * dims[1]]);
array->reset(new double[array_size]);
status = H5LTread_dataset_double(
file_id, dataset_name_, array->get());
}
Expand Down