mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
move flags to c10 (#12144)
Summary: still influx. Pull Request resolved: https://github.com/pytorch/pytorch/pull/12144 Reviewed By: smessmer Differential Revision: D10140176 Pulled By: Yangqing fbshipit-source-id: 1a313abed022039333e3925d19f8b3ef2d95306c
This commit is contained in:
committed by
Facebook Github Bot
parent
c9f7d7b506
commit
38f3d1fc40
@ -251,5 +251,4 @@ template<class T> inline std::string to_string(T value) {
|
||||
return detail::to_string_<T>::call(value);
|
||||
}
|
||||
|
||||
|
||||
}}
|
||||
|
@ -11,9 +11,10 @@
|
||||
#include <ATen/core/context_base.h>
|
||||
#include <ATen/core/optional.h>
|
||||
|
||||
#include "c10/util/Flags.h"
|
||||
|
||||
#include "caffe2/core/allocator.h"
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/flags.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
// A global boolean variable to control whether we free memory when a Tensor
|
||||
@ -23,14 +24,13 @@
|
||||
// This parameter is respected "upper-case" methods which call Resize()
|
||||
// (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_
|
||||
// or ShrinkTo, both of which guarantee to never to free memory.
|
||||
CAFFE2_DECLARE_bool(caffe2_keep_on_shrink);
|
||||
C10_DECLARE_bool(caffe2_keep_on_shrink);
|
||||
|
||||
// Since we can have high variance in blob memory allocated across different
|
||||
// inputs in the same run, we will shrink the blob only if the memory gain
|
||||
// is larger than this flag in bytes. This only applies to functions which
|
||||
// respect caffe2_keep_on_shrink.
|
||||
CAFFE2_DECLARE_int64(caffe2_max_keep_on_shrink_memory);
|
||||
|
||||
C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -604,10 +604,13 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
||||
// is smaller than new size
|
||||
reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize();
|
||||
} else {
|
||||
reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize() ||
|
||||
!caffe2::FLAGS_caffe2_keep_on_shrink ||
|
||||
storage_.capacity() - (storage_offset_ + numel_) * storage_.itemsize() >
|
||||
static_cast<size_t>(caffe2::FLAGS_caffe2_max_keep_on_shrink_memory);
|
||||
reset_tensor = storage_.capacity() <
|
||||
(storage_offset_ + numel_) * storage_.itemsize() ||
|
||||
!c10::FLAGS_caffe2_keep_on_shrink ||
|
||||
storage_.capacity() -
|
||||
(storage_offset_ + numel_) * storage_.itemsize() >
|
||||
static_cast<size_t>(
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory);
|
||||
}
|
||||
|
||||
if (reset_tensor && !is_init) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
All files living in this directory are written with the assumption that MKL is available,
|
||||
which means that these code are not guarded by `#if AT_MKL_ENABLED()`. Therefore, whenever
|
||||
you need to use definitions from here, please guard the `#include<ATen/mkl/*.h>` and
|
||||
definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp).
|
||||
definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp).
|
||||
|
@ -1 +1 @@
|
||||
void convolve_5x5(float* output, float* input, float* kernel, int64_t outRows, int64_t outCols, int64_t inCols);
|
||||
void convolve_5x5(float* output, float* input, float* kernel, int64_t outRows, int64_t outCols, int64_t inCols);
|
||||
|
@ -211,4 +211,4 @@ void convolve_5x5_avx(float* output, float* input, float* kernel, int64_t outRow
|
||||
CLEAR_AVX();
|
||||
convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -58,4 +58,4 @@ void THNN_(SpatialFullConvolution_accGradParameters)(
|
||||
kW, kH, dW, dH, padW, padH, 1, 1, adjW, adjH, scale_);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@ -9,22 +9,19 @@ using std::map;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
backend,
|
||||
"builtin",
|
||||
"The backend to use when running the model. The allowed "
|
||||
"backend choices are: builtin, default, nnpack, eigen, mkl, cuda");
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
init_net,
|
||||
"",
|
||||
"The given net to initialize any parameters.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(init_net, "", "The given net to initialize any parameters.");
|
||||
C10_DEFINE_string(
|
||||
input,
|
||||
"",
|
||||
"Input that is needed for running the network. If "
|
||||
"multiple input needed, use comma separated string.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
input_dims,
|
||||
"",
|
||||
"Alternate to input_files, if all inputs are simple "
|
||||
@ -32,46 +29,46 @@ CAFFE2_DEFINE_string(
|
||||
"separated numbers. If multiple input needed, use "
|
||||
"semicolon to separate the dimension of different "
|
||||
"tensors.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
input_file,
|
||||
"",
|
||||
"Input file that contain the serialized protobuf for "
|
||||
"the input blobs. If multiple input needed, use comma "
|
||||
"separated string. Must have the same number of items "
|
||||
"as input does.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
input_type,
|
||||
"float",
|
||||
"Input type when specifying the input dimension."
|
||||
"The supported types are float, uint8_t.");
|
||||
CAFFE2_DEFINE_int(iter, 10, "The number of iterations to run.");
|
||||
CAFFE2_DEFINE_string(net, "", "The given net to benchmark.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_int(iter, 10, "The number of iterations to run.");
|
||||
C10_DEFINE_string(net, "", "The given net to benchmark.");
|
||||
C10_DEFINE_string(
|
||||
output,
|
||||
"",
|
||||
"Output that should be dumped after the execution "
|
||||
"finishes. If multiple outputs are needed, use comma "
|
||||
"separated string. If you want to dump everything, pass "
|
||||
"'*' as the output value.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
output_folder,
|
||||
"",
|
||||
"The folder that the output should be written to. This "
|
||||
"folder must already exist in the file system.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
run_individual,
|
||||
false,
|
||||
"Whether to benchmark individual operators.");
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
sleep_before_run,
|
||||
0,
|
||||
"The seconds to sleep before starting the benchmarking.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
text_output,
|
||||
false,
|
||||
"Whether to write out output in text format for regression purpose.");
|
||||
CAFFE2_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
|
||||
C10_DEFINE_bool(
|
||||
wipe_cache,
|
||||
false,
|
||||
"Whether to evict the cache before running network.");
|
||||
@ -81,19 +78,19 @@ int main(int argc, char** argv) {
|
||||
benchmark(
|
||||
argc,
|
||||
argv,
|
||||
caffe2::FLAGS_backend,
|
||||
caffe2::FLAGS_init_net,
|
||||
caffe2::FLAGS_input,
|
||||
caffe2::FLAGS_input_dims,
|
||||
caffe2::FLAGS_input_file,
|
||||
caffe2::FLAGS_input_type,
|
||||
caffe2::FLAGS_iter,
|
||||
caffe2::FLAGS_net,
|
||||
caffe2::FLAGS_output,
|
||||
caffe2::FLAGS_output_folder,
|
||||
caffe2::FLAGS_run_individual,
|
||||
caffe2::FLAGS_sleep_before_run,
|
||||
caffe2::FLAGS_text_output,
|
||||
caffe2::FLAGS_warmup,
|
||||
caffe2::FLAGS_wipe_cache);
|
||||
c10::FLAGS_backend,
|
||||
c10::FLAGS_init_net,
|
||||
c10::FLAGS_input,
|
||||
c10::FLAGS_input_dims,
|
||||
c10::FLAGS_input_file,
|
||||
c10::FLAGS_input_type,
|
||||
c10::FLAGS_iter,
|
||||
c10::FLAGS_net,
|
||||
c10::FLAGS_output,
|
||||
c10::FLAGS_output_folder,
|
||||
c10::FLAGS_run_individual,
|
||||
c10::FLAGS_sleep_before_run,
|
||||
c10::FLAGS_text_output,
|
||||
c10::FLAGS_warmup,
|
||||
c10::FLAGS_wipe_cache);
|
||||
}
|
||||
|
@ -20,11 +20,11 @@
|
||||
#include "caffe2/proto/caffe2_legacy.pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_db, "", "The input db.");
|
||||
CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
CAFFE2_DEFINE_string(output_db, "", "The output db.");
|
||||
CAFFE2_DEFINE_string(output_db_type, "", "The output db type.");
|
||||
CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
C10_DEFINE_string(input_db, "", "The input db.");
|
||||
C10_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
C10_DEFINE_string(output_db, "", "The output db.");
|
||||
C10_DEFINE_string(output_db_type, "", "The output db type.");
|
||||
C10_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
|
||||
using caffe2::db::Cursor;
|
||||
using caffe2::db::DB;
|
||||
@ -37,9 +37,9 @@ int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_output_db_type, caffe2::FLAGS_output_db, caffe2::db::NEW));
|
||||
c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
|
||||
int count = 0;
|
||||
@ -80,7 +80,7 @@ int main(int argc, char** argv) {
|
||||
data->set_byte_data(buffer, datum.data().size());
|
||||
}
|
||||
transaction->Put(cursor->key(), protos.SerializeAsString());
|
||||
if (++count % caffe2::FLAGS_batch_size == 0) {
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Converted " << count << " items so far.";
|
||||
}
|
||||
|
@ -19,11 +19,11 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_db, "", "The input db.");
|
||||
CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
CAFFE2_DEFINE_string(output_db, "", "The output db.");
|
||||
CAFFE2_DEFINE_string(output_db_type, "", "The output db type.");
|
||||
CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
C10_DEFINE_string(input_db, "", "The input db.");
|
||||
C10_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
C10_DEFINE_string(output_db, "", "The output db.");
|
||||
C10_DEFINE_string(output_db_type, "", "The output db type.");
|
||||
C10_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
|
||||
using caffe2::db::Cursor;
|
||||
using caffe2::db::DB;
|
||||
@ -33,15 +33,15 @@ int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_output_db_type, caffe2::FLAGS_output_db, caffe2::db::NEW));
|
||||
c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transaction->Put(cursor->key(), cursor->value());
|
||||
if (++count % caffe2::FLAGS_batch_size == 0) {
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Converted " << count << " items so far.";
|
||||
}
|
||||
|
@ -16,9 +16,9 @@
|
||||
|
||||
// This script converts an image dataset to leveldb.
|
||||
//
|
||||
// caffe2::FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// caffe2::FLAGS_list_file should be a list of files as well as their labels, in the
|
||||
// format as
|
||||
// c10::FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// c10::FLAGS_list_file should be a list of files as well as their labels, in
|
||||
// the format as
|
||||
// subfolder1/file1.JPEG 7
|
||||
// ....
|
||||
|
||||
@ -35,14 +35,15 @@
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_db_name, "", "The input image file name.");
|
||||
CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
CAFFE2_DEFINE_int(scale, 256,
|
||||
"If caffe2::FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
C10_DEFINE_string(input_db_name, "", "The input image file name.");
|
||||
C10_DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
C10_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
C10_DEFINE_int(
|
||||
scale,
|
||||
256,
|
||||
"If c10::FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
"value.");
|
||||
CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
|
||||
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -92,7 +93,7 @@ void ConvertToRawDataset(
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
data->add_dims(0);
|
||||
data->add_dims(0);
|
||||
if (caffe2::FLAGS_color) {
|
||||
if (c10::FLAGS_color) {
|
||||
data->add_dims(3);
|
||||
}
|
||||
string value;
|
||||
@ -107,28 +108,30 @@ void ConvertToRawDataset(
|
||||
const string& encoded_image = input_protos.protos(0).string_data(0);
|
||||
int encoded_size = encoded_image.size();
|
||||
cv::Mat img = cv::imdecode(
|
||||
cv::Mat(1, &encoded_size, CV_8UC1,
|
||||
const_cast<char*>(encoded_image.data())),
|
||||
caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
cv::Mat(
|
||||
1, &encoded_size, CV_8UC1, const_cast<char*>(encoded_image.data())),
|
||||
c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (caffe2::FLAGS_warp) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height =
|
||||
static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
|
||||
cv::INTER_LINEAR);
|
||||
data->set_dims(0, scaled_height);
|
||||
data->set_dims(1, scaled_width);
|
||||
DCHECK(resized_img.isContinuous());
|
||||
data->set_byte_data(resized_img.ptr(),
|
||||
scaled_height * scaled_width * (caffe2::FLAGS_color ? 3 : 1));
|
||||
data->set_byte_data(
|
||||
resized_img.ptr(),
|
||||
scaled_height * scaled_width * (c10::FLAGS_color ? 3 : 1));
|
||||
output_protos.SerializeToString(&value);
|
||||
// Put in db
|
||||
batch->Put(iter->key(), value);
|
||||
@ -151,6 +154,6 @@ void ConvertToRawDataset(
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::ConvertToRawDataset(
|
||||
caffe2::FLAGS_input_db_name, caffe2::FLAGS_output_db_name);
|
||||
c10::FLAGS_input_db_name, c10::FLAGS_output_db_name);
|
||||
return 0;
|
||||
}
|
||||
|
@ -25,14 +25,14 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
CAFFE2_DEFINE_string(input_images, "", "Comma separated images");
|
||||
CAFFE2_DEFINE_string(input_image_file, "", "The file containing imput images");
|
||||
CAFFE2_DEFINE_string(output_tensor, "", "The output tensor file in NCHW");
|
||||
CAFFE2_DEFINE_int(scale, 256, "Scale the shorter edge to the given value.");
|
||||
CAFFE2_DEFINE_bool(text_output, false, "Write the output in text format.");
|
||||
CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
C10_DEFINE_string(input_images, "", "Comma separated images");
|
||||
C10_DEFINE_string(input_image_file, "", "The file containing imput images");
|
||||
C10_DEFINE_string(output_tensor, "", "The output tensor file in NCHW");
|
||||
C10_DEFINE_int(scale, 256, "Scale the shorter edge to the given value.");
|
||||
C10_DEFINE_bool(text_output, false, "Write the output in text format.");
|
||||
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
C10_DEFINE_string(
|
||||
preprocess,
|
||||
"",
|
||||
"Options to specify the preprocess routines. The available options are "
|
||||
@ -44,17 +44,15 @@ namespace caffe2 {
|
||||
cv::Mat resizeImage(cv::Mat& img) {
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (caffe2::FLAGS_warp) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_height =
|
||||
static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
scaled_width =
|
||||
static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(
|
||||
img,
|
||||
@ -89,9 +87,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
std::vector<float> mean(3, 0);
|
||||
std::vector<float> std(3, 1);
|
||||
bool bgrtorgb = false;
|
||||
assert(img.cols == caffe2::FLAGS_scale);
|
||||
assert(img.rows == caffe2::FLAGS_scale);
|
||||
vector<string> steps = caffe2::split(',', caffe2::FLAGS_preprocess);
|
||||
assert(img.cols == c10::FLAGS_scale);
|
||||
assert(img.rows == c10::FLAGS_scale);
|
||||
vector<string> steps = caffe2::split(',', c10::FLAGS_preprocess);
|
||||
for (int i = 0; i < steps.size(); i++) {
|
||||
auto step = steps[i];
|
||||
if (step == "subtract128") {
|
||||
@ -114,8 +112,8 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
}
|
||||
}
|
||||
|
||||
int C = caffe2::FLAGS_color ? 3 : 1;
|
||||
int total_size = C * caffe2::FLAGS_scale * caffe2::FLAGS_scale;
|
||||
int C = c10::FLAGS_color ? 3 : 1;
|
||||
int total_size = C * c10::FLAGS_scale * c10::FLAGS_scale;
|
||||
std::vector<float> values(total_size);
|
||||
if (C == 1) {
|
||||
cv::MatIterator_<uchar> it, end;
|
||||
@ -132,9 +130,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
for (it = img.begin<cv::Vec3b>(), end = img.end<cv::Vec3b>(); it != end;
|
||||
++it, i++) {
|
||||
values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
|
||||
int offset = caffe2::FLAGS_scale * caffe2::FLAGS_scale + i;
|
||||
int offset = c10::FLAGS_scale * c10::FLAGS_scale + i;
|
||||
values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
|
||||
offset = caffe2::FLAGS_scale * caffe2::FLAGS_scale + offset;
|
||||
offset = c10::FLAGS_scale * c10::FLAGS_scale + offset;
|
||||
values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
|
||||
}
|
||||
}
|
||||
@ -148,7 +146,7 @@ std::vector<float> convertOneImage(std::string& filename) {
|
||||
// Load image
|
||||
cv::Mat img = cv::imread(
|
||||
filename,
|
||||
caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
|
||||
cv::Mat crop = cropToSquare(img);
|
||||
|
||||
@ -157,17 +155,17 @@ std::vector<float> convertOneImage(std::string& filename) {
|
||||
// Assert we don't have to deal with alignment
|
||||
DCHECK(resized_img.isContinuous());
|
||||
assert(resized_img.rows == resized_img.cols);
|
||||
assert(resized_img.rows == caffe2::FLAGS_scale);
|
||||
assert(resized_img.rows == c10::FLAGS_scale);
|
||||
std::vector<float> one_image_values = convertToVector(resized_img);
|
||||
return one_image_values;
|
||||
}
|
||||
|
||||
void convertImages() {
|
||||
vector<string> file_names;
|
||||
if (caffe2::FLAGS_input_images != "") {
|
||||
file_names = caffe2::split(',', caffe2::FLAGS_input_images);
|
||||
} else if (caffe2::FLAGS_input_image_file != "") {
|
||||
std::ifstream infile(caffe2::FLAGS_input_image_file);
|
||||
if (c10::FLAGS_input_images != "") {
|
||||
file_names = caffe2::split(',', c10::FLAGS_input_images);
|
||||
} else if (c10::FLAGS_input_image_file != "") {
|
||||
std::ifstream infile(c10::FLAGS_input_image_file);
|
||||
std::string line;
|
||||
while (std::getline(infile, line)) {
|
||||
vector<string> file_name = caffe2::split(',', line);
|
||||
@ -183,7 +181,7 @@ void convertImages() {
|
||||
assert(false);
|
||||
}
|
||||
std::vector<std::vector<float>> values;
|
||||
int C = caffe2::FLAGS_color ? 3 : 1;
|
||||
int C = c10::FLAGS_color ? 3 : 1;
|
||||
for (int i = 0; i < file_names.size(); i++) {
|
||||
std::vector<float> one_image_values = convertOneImage(file_names[i]);
|
||||
values.push_back(one_image_values);
|
||||
@ -195,19 +193,19 @@ void convertImages() {
|
||||
data->set_data_type(TensorProto::FLOAT);
|
||||
data->add_dims(values.size());
|
||||
data->add_dims(C);
|
||||
data->add_dims(caffe2::FLAGS_scale);
|
||||
data->add_dims(caffe2::FLAGS_scale);
|
||||
data->add_dims(c10::FLAGS_scale);
|
||||
data->add_dims(c10::FLAGS_scale);
|
||||
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
assert(values[i].size() == C * caffe2::FLAGS_scale * caffe2::FLAGS_scale);
|
||||
assert(values[i].size() == C * c10::FLAGS_scale * c10::FLAGS_scale);
|
||||
for (int j = 0; j < values[i].size(); j++) {
|
||||
data->add_float_data(values[i][j]);
|
||||
}
|
||||
}
|
||||
if (caffe2::FLAGS_text_output) {
|
||||
caffe2::WriteProtoToTextFile(protos, caffe2::FLAGS_output_tensor);
|
||||
if (c10::FLAGS_text_output) {
|
||||
caffe2::WriteProtoToTextFile(protos, c10::FLAGS_output_tensor);
|
||||
} else {
|
||||
caffe2::WriteProtoToBinaryFile(protos, caffe2::FLAGS_output_tensor);
|
||||
caffe2::WriteProtoToBinaryFile(protos, c10::FLAGS_output_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,13 +23,15 @@
|
||||
#include "caffe2/core/timer.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_db, "", "The input db.");
|
||||
CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
CAFFE2_DEFINE_int(report_interval, 1000, "The report interval.");
|
||||
CAFFE2_DEFINE_int(repeat, 10, "The number to repeat the throughput test.");
|
||||
CAFFE2_DEFINE_bool(use_reader, false, "If true, use the reader interface.");
|
||||
CAFFE2_DEFINE_int(num_read_threads, 1,
|
||||
"The number of concurrent reading threads.");
|
||||
C10_DEFINE_string(input_db, "", "The input db.");
|
||||
C10_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
C10_DEFINE_int(report_interval, 1000, "The report interval.");
|
||||
C10_DEFINE_int(repeat, 10, "The number to repeat the throughput test.");
|
||||
C10_DEFINE_bool(use_reader, false, "If true, use the reader interface.");
|
||||
C10_DEFINE_int(
|
||||
num_read_threads,
|
||||
1,
|
||||
"The number of concurrent reading threads.");
|
||||
|
||||
using caffe2::db::Cursor;
|
||||
using caffe2::db::DB;
|
||||
@ -38,11 +40,11 @@ using caffe2::string;
|
||||
|
||||
void TestThroughputWithDB() {
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
for (int iter_id = 0; iter_id < caffe2::FLAGS_repeat; ++iter_id) {
|
||||
for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
|
||||
caffe2::Timer timer;
|
||||
for (int i = 0; i < caffe2::FLAGS_report_interval; ++i) {
|
||||
for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
|
||||
string key = cursor->key();
|
||||
string value = cursor->value();
|
||||
//VLOG(1) << "Key " << key;
|
||||
@ -52,32 +54,36 @@ void TestThroughputWithDB() {
|
||||
}
|
||||
}
|
||||
double elapsed_seconds = timer.Seconds();
|
||||
printf("Iteration %03d, took %4.5f seconds, throughput %f items/sec.\n",
|
||||
iter_id, elapsed_seconds,
|
||||
caffe2::FLAGS_report_interval / elapsed_seconds);
|
||||
printf(
|
||||
"Iteration %03d, took %4.5f seconds, throughput %f items/sec.\n",
|
||||
iter_id,
|
||||
elapsed_seconds,
|
||||
c10::FLAGS_report_interval / elapsed_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
void TestThroughputWithReaderWorker(const DBReader* reader, int thread_id) {
|
||||
string key, value;
|
||||
for (int iter_id = 0; iter_id < caffe2::FLAGS_repeat; ++iter_id) {
|
||||
for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
|
||||
caffe2::Timer timer;
|
||||
for (int i = 0; i < caffe2::FLAGS_report_interval; ++i) {
|
||||
for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
|
||||
reader->Read(&key, &value);
|
||||
}
|
||||
double elapsed_seconds = timer.Seconds();
|
||||
printf("Thread %03d iteration %03d, took %4.5f seconds, "
|
||||
"throughput %f items/sec.\n",
|
||||
thread_id, iter_id, elapsed_seconds,
|
||||
caffe2::FLAGS_report_interval / elapsed_seconds);
|
||||
printf(
|
||||
"Thread %03d iteration %03d, took %4.5f seconds, "
|
||||
"throughput %f items/sec.\n",
|
||||
thread_id,
|
||||
iter_id,
|
||||
elapsed_seconds,
|
||||
c10::FLAGS_report_interval / elapsed_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
void TestThroughputWithReader() {
|
||||
caffe2::db::DBReader reader(
|
||||
caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db);
|
||||
caffe2::db::DBReader reader(c10::FLAGS_input_db_type, c10::FLAGS_input_db);
|
||||
std::vector<std::unique_ptr<std::thread>> reading_threads(
|
||||
caffe2::FLAGS_num_read_threads);
|
||||
c10::FLAGS_num_read_threads);
|
||||
for (int i = 0; i < reading_threads.size(); ++i) {
|
||||
reading_threads[i].reset(new std::thread(
|
||||
TestThroughputWithReaderWorker, &reader, i));
|
||||
@ -89,7 +95,7 @@ void TestThroughputWithReader() {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
if (caffe2::FLAGS_use_reader) {
|
||||
if (c10::FLAGS_use_reader) {
|
||||
TestThroughputWithReader();
|
||||
} else {
|
||||
TestThroughputWithDB();
|
||||
|
@ -19,17 +19,18 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "c10/util/Flags.h"
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "caffe2/core/init.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_log_level);
|
||||
C10_DECLARE_int(caffe2_log_level);
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::SetUsageMessage(
|
||||
c10::SetUsageMessage(
|
||||
"Inspects the GPUs on the current machine and prints out their details "
|
||||
"provided by cuda.");
|
||||
|
||||
|
@ -33,14 +33,14 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_folder, "", "The input folder name.");
|
||||
CAFFE2_DEFINE_string(output_train_db_name,
|
||||
"", "The output training db name.");
|
||||
CAFFE2_DEFINE_string(output_test_db_name,
|
||||
"", "The output testing db name.");
|
||||
CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
|
||||
CAFFE2_DEFINE_bool(is_cifar100, false,
|
||||
"If set, convert cifar100. Otherwise do cifar10.");
|
||||
C10_DEFINE_string(input_folder, "", "The input folder name.");
|
||||
C10_DEFINE_string(output_train_db_name, "", "The output training db name.");
|
||||
C10_DEFINE_string(output_test_db_name, "", "The output testing db name.");
|
||||
C10_DEFINE_string(db, "leveldb", "The db type.");
|
||||
C10_DEFINE_bool(
|
||||
is_cifar100,
|
||||
false,
|
||||
"If set, convert cifar100. Otherwise do cifar10.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -57,7 +57,7 @@ const int kCIFAR100TestDataSize = 10000;
|
||||
|
||||
void ReadImage(std::ifstream* file, int* label, char* buffer) {
|
||||
char label_char;
|
||||
if (caffe2::FLAGS_is_cifar100) {
|
||||
if (c10::FLAGS_is_cifar100) {
|
||||
// Skip the coarse label.
|
||||
file->read(&label_char, 1);
|
||||
}
|
||||
@ -110,31 +110,29 @@ void WriteToDB(const string& filename, const int num_items,
|
||||
|
||||
void ConvertCIFAR() {
|
||||
std::unique_ptr<db::DB> train_db(
|
||||
db::CreateDB(caffe2::FLAGS_db, caffe2::FLAGS_output_train_db_name,
|
||||
db::NEW));
|
||||
db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_train_db_name, db::NEW));
|
||||
std::unique_ptr<db::DB> test_db(
|
||||
db::CreateDB(caffe2::FLAGS_db, caffe2::FLAGS_output_test_db_name,
|
||||
db::NEW));
|
||||
db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_test_db_name, db::NEW));
|
||||
|
||||
if (!caffe2::FLAGS_is_cifar100) {
|
||||
if (!c10::FLAGS_is_cifar100) {
|
||||
// This is cifar 10.
|
||||
for (int fileid = 0; fileid < kCIFAR10TrainBatches; ++fileid) {
|
||||
stringstream train_file;
|
||||
train_file << caffe2::FLAGS_input_folder << "/data_batch_" << fileid + 1
|
||||
train_file << c10::FLAGS_input_folder << "/data_batch_" << fileid + 1
|
||||
<< ".bin";
|
||||
WriteToDB(train_file.str(), kCIFAR10BatchSize,
|
||||
fileid * kCIFAR10BatchSize, train_db.get());
|
||||
}
|
||||
stringstream test_file;
|
||||
test_file << caffe2::FLAGS_input_folder << "/test_batch.bin";
|
||||
test_file << c10::FLAGS_input_folder << "/test_batch.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR10TestDataSize, 0, test_db.get());
|
||||
} else {
|
||||
// This is cifar 100.
|
||||
stringstream train_file;
|
||||
train_file << caffe2::FLAGS_input_folder << "/train.bin";
|
||||
train_file << c10::FLAGS_input_folder << "/train.bin";
|
||||
WriteToDB(train_file.str(), kCIFAR100TrainDataSize, 0, train_db.get());
|
||||
stringstream test_file;
|
||||
test_file << caffe2::FLAGS_input_folder << "/test.bin";
|
||||
test_file << c10::FLAGS_input_folder << "/test.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR100TestDataSize, 0, test_db.get());
|
||||
}
|
||||
}
|
||||
|
@ -16,9 +16,9 @@
|
||||
|
||||
// This script converts an image dataset to a database.
|
||||
//
|
||||
// caffe2::FLAGS_input_folder is the root folder that holds all the images
|
||||
// c10::FLAGS_input_folder is the root folder that holds all the images
|
||||
//
|
||||
// caffe2::FLAGS_list_file is the path to a file containing a list of files
|
||||
// c10::FLAGS_list_file is the path to a file containing a list of files
|
||||
// and their labels, as follows:
|
||||
//
|
||||
// subfolder1/file1.JPEG 7
|
||||
@ -42,24 +42,28 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(shuffle, false,
|
||||
C10_DEFINE_bool(
|
||||
shuffle,
|
||||
false,
|
||||
"Randomly shuffle the order of images and their labels");
|
||||
CAFFE2_DEFINE_string(input_folder, "", "The input image file name.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(input_folder, "", "The input image file name.");
|
||||
C10_DEFINE_string(
|
||||
list_file,
|
||||
"",
|
||||
"The text file containing the list of images.");
|
||||
CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
|
||||
CAFFE2_DEFINE_bool(raw, false,
|
||||
C10_DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
C10_DEFINE_string(db, "leveldb", "The db type.");
|
||||
C10_DEFINE_bool(
|
||||
raw,
|
||||
false,
|
||||
"If set, we pre-read the images and store the raw buffer.");
|
||||
CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
C10_DEFINE_int(
|
||||
scale,
|
||||
256,
|
||||
"If caffe2::FLAGS_raw is set, scale the shorter edge to the given value.");
|
||||
CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
CAFFE2_DEFINE_int(
|
||||
"If c10::FLAGS_raw is set, scale the shorter edge to the given value.");
|
||||
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
C10_DEFINE_int(
|
||||
num_threads,
|
||||
-1,
|
||||
"Number of image parsing and conversion threads.");
|
||||
@ -71,11 +75,11 @@ class Converter {
|
||||
explicit Converter() {
|
||||
data_ = protos_.add_protos();
|
||||
label_ = protos_.add_protos();
|
||||
if (caffe2::FLAGS_raw) {
|
||||
if (c10::FLAGS_raw) {
|
||||
data_->set_data_type(TensorProto::BYTE);
|
||||
data_->add_dims(0);
|
||||
data_->add_dims(0);
|
||||
if (caffe2::FLAGS_color) {
|
||||
if (c10::FLAGS_color) {
|
||||
data_->add_dims(3);
|
||||
}
|
||||
} else {
|
||||
@ -115,7 +119,7 @@ class Converter {
|
||||
}
|
||||
|
||||
void run() {
|
||||
const auto& input_folder = caffe2::FLAGS_input_folder;
|
||||
const auto& input_folder = c10::FLAGS_input_folder;
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
std::string value;
|
||||
while (!in_.empty()) {
|
||||
@ -126,7 +130,7 @@ class Converter {
|
||||
label_->set_int32_data(0, pair.second);
|
||||
|
||||
// Add raw file contents to DB if !raw
|
||||
if (!caffe2::FLAGS_raw) {
|
||||
if (!c10::FLAGS_raw) {
|
||||
std::ifstream image_file_stream(input_folder + pair.first);
|
||||
if (!image_file_stream) {
|
||||
LOG(ERROR) << "Cannot open " << input_folder << pair.first
|
||||
@ -140,23 +144,22 @@ class Converter {
|
||||
// Load image
|
||||
cv::Mat img = cv::imread(
|
||||
input_folder + pair.first,
|
||||
caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR
|
||||
: CV_LOAD_IMAGE_GRAYSCALE);
|
||||
c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
|
||||
// Resize image
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (caffe2::FLAGS_warp) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = caffe2::FLAGS_scale;
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height =
|
||||
static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
|
||||
static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = caffe2::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width =
|
||||
static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
|
||||
static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(
|
||||
img,
|
||||
@ -211,12 +214,12 @@ void ConvertImageDataset(
|
||||
lines.push_back(std::make_pair(filename, file_label));
|
||||
}
|
||||
|
||||
if (caffe2::FLAGS_shuffle) {
|
||||
if (c10::FLAGS_shuffle) {
|
||||
LOG(INFO) << "Shuffling data";
|
||||
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(1701));
|
||||
}
|
||||
|
||||
auto num_threads = caffe2::FLAGS_num_threads;
|
||||
auto num_threads = c10::FLAGS_num_threads;
|
||||
if (num_threads < 1) {
|
||||
num_threads = std::thread::hardware_concurrency();
|
||||
}
|
||||
@ -224,7 +227,7 @@ void ConvertImageDataset(
|
||||
LOG(INFO) << "Processing " << lines.size() << " images...";
|
||||
LOG(INFO) << "Opening DB " << output_db_name;
|
||||
|
||||
auto db = db::CreateDB(caffe2::FLAGS_db, output_db_name, db::NEW);
|
||||
auto db = db::CreateDB(c10::FLAGS_db, output_db_name, db::NEW);
|
||||
auto transaction = db->NewTransaction();
|
||||
|
||||
LOG(INFO) << "Using " << num_threads << " processing threads...";
|
||||
@ -274,7 +277,9 @@ void ConvertImageDataset(
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::ConvertImageDataset(
|
||||
caffe2::FLAGS_input_folder, caffe2::FLAGS_list_file,
|
||||
caffe2::FLAGS_output_db_name, caffe2::FLAGS_shuffle);
|
||||
c10::FLAGS_input_folder,
|
||||
c10::FLAGS_list_file,
|
||||
c10::FLAGS_output_db_name,
|
||||
c10::FLAGS_shuffle);
|
||||
return 0;
|
||||
}
|
||||
|
@ -27,15 +27,19 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(image_file, "", "The input image file name.");
|
||||
CAFFE2_DEFINE_string(label_file, "", "The label file name.");
|
||||
CAFFE2_DEFINE_string(output_file, "", "The output db name.");
|
||||
CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
|
||||
CAFFE2_DEFINE_int(data_limit, -1,
|
||||
"If set, only output this number of data points.");
|
||||
CAFFE2_DEFINE_bool(channel_first, false,
|
||||
"If set, write the data as channel-first (CHW order) as the old "
|
||||
"Caffe does.");
|
||||
C10_DEFINE_string(image_file, "", "The input image file name.");
|
||||
C10_DEFINE_string(label_file, "", "The label file name.");
|
||||
C10_DEFINE_string(output_file, "", "The output db name.");
|
||||
C10_DEFINE_string(db, "leveldb", "The db type.");
|
||||
C10_DEFINE_int(
|
||||
data_limit,
|
||||
-1,
|
||||
"If set, only output this number of data points.");
|
||||
C10_DEFINE_bool(
|
||||
channel_first,
|
||||
false,
|
||||
"If set, write the data as channel-first (CHW order) as the old "
|
||||
"Caffe does.");
|
||||
|
||||
namespace caffe2 {
|
||||
uint32_t swap_endian(uint32_t val) {
|
||||
@ -79,7 +83,8 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
cols = swap_endian(cols);
|
||||
|
||||
// leveldb
|
||||
std::unique_ptr<db::DB> mnist_db(db::CreateDB(caffe2::FLAGS_db, db_path, db::NEW));
|
||||
std::unique_ptr<db::DB> mnist_db(
|
||||
db::CreateDB(c10::FLAGS_db, db_path, db::NEW));
|
||||
std::unique_ptr<db::Transaction> transaction(mnist_db->NewTransaction());
|
||||
// Storing to db
|
||||
char label_value;
|
||||
@ -93,7 +98,7 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
TensorProto* data = protos.add_protos();
|
||||
TensorProto* label = protos.add_protos();
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
if (caffe2::FLAGS_channel_first) {
|
||||
if (c10::FLAGS_channel_first) {
|
||||
data->add_dims(1);
|
||||
data->add_dims(rows);
|
||||
data->add_dims(cols);
|
||||
@ -133,7 +138,10 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::convert_dataset(caffe2::FLAGS_image_file.c_str(), caffe2::FLAGS_label_file.c_str(),
|
||||
caffe2::FLAGS_output_file.c_str(), caffe2::FLAGS_data_limit);
|
||||
caffe2::convert_dataset(
|
||||
c10::FLAGS_image_file.c_str(),
|
||||
c10::FLAGS_label_file.c_str(),
|
||||
c10::FLAGS_output_file.c_str(),
|
||||
c10::FLAGS_data_limit);
|
||||
return 0;
|
||||
}
|
||||
|
@ -19,8 +19,8 @@
|
||||
#include "caffe2/predictor/predictor.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(init_net, "", "The given path to the init protobuffer.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(init_net, "", "The given path to the init protobuffer.");
|
||||
C10_DEFINE_string(
|
||||
predict_net,
|
||||
"",
|
||||
"The given path to the predict protobuffer.");
|
||||
@ -28,15 +28,15 @@ CAFFE2_DEFINE_string(
|
||||
namespace caffe2 {
|
||||
|
||||
void run() {
|
||||
if (FLAGS_init_net.empty()) {
|
||||
if (c10::FLAGS_init_net.empty()) {
|
||||
LOG(FATAL) << "No init net specified. Use --init_net=/path/to/net.";
|
||||
}
|
||||
if (FLAGS_predict_net.empty()) {
|
||||
if (c10::FLAGS_predict_net.empty()) {
|
||||
LOG(FATAL) << "No predict net specified. Use --predict_net=/path/to/net.";
|
||||
}
|
||||
caffe2::NetDef init_net, predict_net;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_predict_net, &predict_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &init_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_predict_net, &predict_net));
|
||||
// Can be large due to constant fills
|
||||
VLOG(1) << "Init net: " << ProtoDebugString(init_net);
|
||||
LOG(INFO) << "Predict net: " << ProtoDebugString(predict_net);
|
||||
|
@ -21,8 +21,7 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/operator_schema.h"
|
||||
|
||||
CAFFE2_DEFINE_string(schema, "",
|
||||
"Print doc and schema of a particular operator");
|
||||
C10_DEFINE_string(schema, "", "Print doc and schema of a particular operator");
|
||||
|
||||
static bool HasSchema(const std::string& str) {
|
||||
return caffe2::OpSchemaRegistry::Schema(str);
|
||||
@ -36,15 +35,14 @@ static bool HasDoc(const std::string& str) {
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
if (!caffe2::FLAGS_schema.empty()) {
|
||||
const auto* schema = caffe2::OpSchemaRegistry::Schema(
|
||||
caffe2::FLAGS_schema);
|
||||
if (!c10::FLAGS_schema.empty()) {
|
||||
const auto* schema = caffe2::OpSchemaRegistry::Schema(c10::FLAGS_schema);
|
||||
if (!schema) {
|
||||
std::cerr << "Operator " << caffe2::FLAGS_schema
|
||||
<< " doesn't have a schema" << std::endl;
|
||||
std::cerr << "Operator " << c10::FLAGS_schema << " doesn't have a schema"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "Operator " << caffe2::FLAGS_schema << ": " << std::endl
|
||||
std::cout << "Operator " << c10::FLAGS_schema << ": " << std::endl
|
||||
<< *schema;
|
||||
return 0;
|
||||
}
|
||||
|
@ -20,17 +20,17 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
C10_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
if (caffe2::FLAGS_plan.size() == 0) {
|
||||
if (c10::FLAGS_plan.size() == 0) {
|
||||
LOG(ERROR) << "No plan specified. Use --plan=/path/to/plan.";
|
||||
return 0;
|
||||
}
|
||||
LOG(INFO) << "Loading plan: " << caffe2::FLAGS_plan;
|
||||
LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_plan, &plan_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
|
@ -16,16 +16,17 @@
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include "c10/util/Flags.h"
|
||||
#include "caffe2/core/init.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
C10_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::SetUsageMessage("Runs a caffe2 plan that has MPI operators in it.");
|
||||
c10::SetUsageMessage("Runs a caffe2 plan that has MPI operators in it.");
|
||||
int mpi_ret;
|
||||
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_ret);
|
||||
if (mpi_ret != MPI_THREAD_MULTIPLE &&
|
||||
@ -35,9 +36,9 @@ int main(int argc, char** argv) {
|
||||
return 1;
|
||||
}
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
LOG(INFO) << "Loading plan: " << caffe2::FLAGS_plan;
|
||||
LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_plan, &plan_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
|
@ -28,24 +28,21 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(net, "", "The given net to benchmark.");
|
||||
CAFFE2_DEFINE_string(
|
||||
init_net,
|
||||
"",
|
||||
"The given net to initialize any parameters.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(net, "", "The given net to benchmark.");
|
||||
C10_DEFINE_string(init_net, "", "The given net to initialize any parameters.");
|
||||
C10_DEFINE_string(
|
||||
input,
|
||||
"",
|
||||
"Input that is needed for running the network. If "
|
||||
"multiple input needed, use comma separated string.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
input_file,
|
||||
"",
|
||||
"Input file that contain the serialized protobuf for "
|
||||
"the input blobs. If multiple input needed, use comma "
|
||||
"separated string. Must have the same number of items "
|
||||
"as input does.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
input_dims,
|
||||
"",
|
||||
"Alternate to input_files, if all inputs are simple "
|
||||
@ -53,31 +50,31 @@ CAFFE2_DEFINE_string(
|
||||
"separated numbers. If multiple input needed, use "
|
||||
"semicolon to separate the dimension of different "
|
||||
"tensors.");
|
||||
CAFFE2_DEFINE_string(input_type, "", "Input type (uint8_t/float)");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(input_type, "", "Input type (uint8_t/float)");
|
||||
C10_DEFINE_string(
|
||||
output,
|
||||
"",
|
||||
"Output that should be dumped after the execution "
|
||||
"finishes. If multiple outputs are needed, use comma "
|
||||
"separated string. If you want to dump everything, pass "
|
||||
"'*' as the output value.");
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
output_folder,
|
||||
"",
|
||||
"The folder that the output should be written to. This "
|
||||
"folder must already exist in the file system.");
|
||||
CAFFE2_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
|
||||
CAFFE2_DEFINE_int(iter, 10, "The number of iterations to run.");
|
||||
CAFFE2_DEFINE_int(opt, 0, "The level of optimization to run automatically.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
|
||||
C10_DEFINE_int(iter, 10, "The number of iterations to run.");
|
||||
C10_DEFINE_int(opt, 0, "The level of optimization to run automatically.");
|
||||
C10_DEFINE_bool(
|
||||
run_individual,
|
||||
false,
|
||||
"Whether to benchmark individual operators.");
|
||||
|
||||
CAFFE2_DEFINE_bool(force_engine, false, "Force engine field for all operators");
|
||||
CAFFE2_DEFINE_string(engine, "", "Forced engine field value");
|
||||
CAFFE2_DEFINE_bool(force_algo, false, "Force algo arg for all operators");
|
||||
CAFFE2_DEFINE_string(algo, "", "Forced algo arg value");
|
||||
C10_DEFINE_bool(force_engine, false, "Force engine field for all operators");
|
||||
C10_DEFINE_string(engine, "", "Forced engine field value");
|
||||
C10_DEFINE_bool(force_algo, false, "Force algo arg for all operators");
|
||||
C10_DEFINE_string(algo, "", "Forced algo arg value");
|
||||
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
@ -89,14 +86,14 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Run initialization network.
|
||||
caffe2::NetDef net_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_init_net, &net_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &net_def));
|
||||
CAFFE_ENFORCE(workspace->RunNetOnce(net_def));
|
||||
|
||||
// Load input.
|
||||
if (caffe2::FLAGS_input.size()) {
|
||||
vector<string> input_names = caffe2::split(',', caffe2::FLAGS_input);
|
||||
if (caffe2::FLAGS_input_file.size()) {
|
||||
vector<string> input_files = caffe2::split(',', caffe2::FLAGS_input_file);
|
||||
if (c10::FLAGS_input.size()) {
|
||||
vector<string> input_names = caffe2::split(',', c10::FLAGS_input);
|
||||
if (c10::FLAGS_input_file.size()) {
|
||||
vector<string> input_files = caffe2::split(',', c10::FLAGS_input_file);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_files.size(),
|
||||
@ -106,24 +103,24 @@ int main(int argc, char** argv) {
|
||||
CAFFE_ENFORCE(caffe2::ReadProtoFromFile(input_files[i], &blob_proto));
|
||||
DeserializeBlob(blob_proto, workspace->CreateBlob(input_names[i]));
|
||||
}
|
||||
} else if (caffe2::FLAGS_input_dims.size() || caffe2::FLAGS_input_type.size()) {
|
||||
} else if (c10::FLAGS_input_dims.size() || c10::FLAGS_input_type.size()) {
|
||||
CAFFE_ENFORCE_GE(
|
||||
caffe2::FLAGS_input_dims.size(),
|
||||
c10::FLAGS_input_dims.size(),
|
||||
0,
|
||||
"Input dims must be specified when input tensors are used.");
|
||||
CAFFE_ENFORCE_GE(
|
||||
caffe2::FLAGS_input_type.size(),
|
||||
c10::FLAGS_input_type.size(),
|
||||
0,
|
||||
"Input type must be specified when input tensors are used.");
|
||||
|
||||
vector<string> input_dims_list =
|
||||
caffe2::split(';', caffe2::FLAGS_input_dims);
|
||||
caffe2::split(';', c10::FLAGS_input_dims);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_dims_list.size(),
|
||||
"Input name and dims should have the same number of items.");
|
||||
vector<string> input_type_list =
|
||||
caffe2::split(';', caffe2::FLAGS_input_type);
|
||||
caffe2::split(';', c10::FLAGS_input_type);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_type_list.size(),
|
||||
@ -161,28 +158,28 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
// Run main network.
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_net, &net_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_net, &net_def));
|
||||
if (!net_def.has_name()) {
|
||||
net_def.set_name("benchmark");
|
||||
}
|
||||
// force changing engine and algo
|
||||
if (caffe2::FLAGS_force_engine) {
|
||||
LOG(INFO) << "force engine be: " << caffe2::FLAGS_engine;
|
||||
if (c10::FLAGS_force_engine) {
|
||||
LOG(INFO) << "force engine be: " << c10::FLAGS_engine;
|
||||
for (const auto& op : net_def.op()) {
|
||||
const_cast<caffe2::OperatorDef*>(&op)->set_engine(caffe2::FLAGS_engine);
|
||||
const_cast<caffe2::OperatorDef*>(&op)->set_engine(c10::FLAGS_engine);
|
||||
}
|
||||
}
|
||||
if (caffe2::FLAGS_force_algo) {
|
||||
LOG(INFO) << "force algo be: " << caffe2::FLAGS_algo;
|
||||
if (c10::FLAGS_force_algo) {
|
||||
LOG(INFO) << "force algo be: " << c10::FLAGS_algo;
|
||||
for (const auto& op : net_def.op()) {
|
||||
caffe2::GetMutableArgument(
|
||||
"algo", true, const_cast<caffe2::OperatorDef*>(&op))
|
||||
->set_s(caffe2::FLAGS_algo);
|
||||
->set_s(c10::FLAGS_algo);
|
||||
}
|
||||
}
|
||||
if (caffe2::FLAGS_opt) {
|
||||
if (c10::FLAGS_opt) {
|
||||
#ifdef CAFFE2_OPTIMIZER
|
||||
net_def = caffe2::opt::optimize(net_def, workspace.get(), caffe2::FLAGS_opt);
|
||||
net_def = caffe2::opt::optimize(net_def, workspace.get(), c10::FLAGS_opt);
|
||||
#else
|
||||
LOG(WARNING) << "Caffe2 not compiled with optimization passes.";
|
||||
#endif
|
||||
@ -192,14 +189,13 @@ int main(int argc, char** argv) {
|
||||
CHECK_NOTNULL(net);
|
||||
CAFFE_ENFORCE(net->Run());
|
||||
net->TEST_Benchmark(
|
||||
caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
|
||||
string output_prefix = caffe2::FLAGS_output_folder.size()
|
||||
? caffe2::FLAGS_output_folder + "/"
|
||||
: "";
|
||||
if (caffe2::FLAGS_output.size()) {
|
||||
vector<string> output_names = caffe2::split(',', caffe2::FLAGS_output);
|
||||
if (caffe2::FLAGS_output == "*") {
|
||||
string output_prefix =
|
||||
c10::FLAGS_output_folder.size() ? c10::FLAGS_output_folder + "/" : "";
|
||||
if (c10::FLAGS_output.size()) {
|
||||
vector<string> output_names = caffe2::split(',', c10::FLAGS_output);
|
||||
if (c10::FLAGS_output == "*") {
|
||||
output_names = workspace->Blobs();
|
||||
}
|
||||
for (const string& name : output_names) {
|
||||
|
@ -22,33 +22,41 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_string(input_db, "", "The input db.");
|
||||
CAFFE2_DEFINE_int(splits, 0, "The number of splits.");
|
||||
CAFFE2_DEFINE_string(db_type, "", "The db type.");
|
||||
CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
C10_DEFINE_string(input_db, "", "The input db.");
|
||||
C10_DEFINE_int(splits, 0, "The number of splits.");
|
||||
C10_DEFINE_string(db_type, "", "The db type.");
|
||||
C10_DEFINE_int(batch_size, 1000, "The write batch size.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
static int Split(int argc, char** argv) {
|
||||
GlobalInit(&argc, &argv);
|
||||
|
||||
CAFFE_ENFORCE(FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
|
||||
CAFFE_ENFORCE(FLAGS_splits > 0, "Must specify a nonnegative split number.");
|
||||
CAFFE_ENFORCE(FLAGS_db_type.size(), "Must specify --db_type=[a db type].");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_splits > 0, "Must specify a nonnegative split number.");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_db_type.size(), "Must specify --db_type=[a db type].");
|
||||
|
||||
unique_ptr<db::DB> in_db(
|
||||
db::CreateDB(FLAGS_db_type, FLAGS_input_db, db::READ));
|
||||
CAFFE_ENFORCE(in_db != nullptr, "Cannot open input db: ", FLAGS_input_db);
|
||||
db::CreateDB(c10::FLAGS_db_type, c10::FLAGS_input_db, db::READ));
|
||||
CAFFE_ENFORCE(
|
||||
in_db != nullptr, "Cannot open input db: ", c10::FLAGS_input_db);
|
||||
unique_ptr<db::Cursor> cursor(in_db->NewCursor());
|
||||
// This usually won't happen, but FWIW.
|
||||
CAFFE_ENFORCE(
|
||||
cursor != nullptr, "Cannot obtain cursor for input db: ", FLAGS_input_db);
|
||||
cursor != nullptr,
|
||||
"Cannot obtain cursor for input db: ",
|
||||
c10::FLAGS_input_db);
|
||||
|
||||
vector<unique_ptr<db::DB>> out_dbs;
|
||||
vector<unique_ptr<db::Transaction>> transactions;
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
for (int i = 0; i < c10::FLAGS_splits; ++i) {
|
||||
out_dbs.push_back(unique_ptr<db::DB>(db::CreateDB(
|
||||
FLAGS_db_type, FLAGS_input_db + "_split_" + to_string(i), db::NEW)));
|
||||
c10::FLAGS_db_type,
|
||||
c10::FLAGS_input_db + "_split_" + to_string(i),
|
||||
db::NEW)));
|
||||
CAFFE_ENFORCE(out_dbs.back().get(), "Cannot create output db #", i);
|
||||
transactions.push_back(
|
||||
unique_ptr<db::Transaction>(out_dbs[i]->NewTransaction()));
|
||||
@ -58,9 +66,10 @@ static int Split(int argc, char** argv) {
|
||||
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transactions[count % FLAGS_splits]->Put(cursor->key(), cursor->value());
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
transactions[count % c10::FLAGS_splits]->Put(
|
||||
cursor->key(), cursor->value());
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
for (int i = 0; i < c10::FLAGS_splits; ++i) {
|
||||
transactions[i]->Commit();
|
||||
}
|
||||
LOG(INFO) << "Split " << count << " items so far.";
|
||||
|
@ -24,13 +24,13 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(f_in, "", "The input data file name.");
|
||||
CAFFE2_DEFINE_string(f_out, "", "The output data file name.");
|
||||
C10_DEFINE_string(f_in, "", "The input data file name.");
|
||||
C10_DEFINE_string(f_out, "", "The output data file name.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
std::ifstream f_in(caffe2::FLAGS_f_in);
|
||||
std::ofstream f_out(caffe2::FLAGS_f_out);
|
||||
std::ifstream f_in(c10::FLAGS_f_in);
|
||||
std::ofstream f_out(c10::FLAGS_f_out);
|
||||
std::string line;
|
||||
caffe2::TensorProtos tensor_protos;
|
||||
while (std::getline(f_in, line)) {
|
||||
|
@ -23,9 +23,9 @@
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/utils/zmq_helper.h"
|
||||
|
||||
CAFFE2_DEFINE_string(server, "tcp://*:5555", "The server address.");
|
||||
CAFFE2_DEFINE_string(input_db, "", "The input db.");
|
||||
CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
C10_DEFINE_string(server, "tcp://*:5555", "The server address.");
|
||||
C10_DEFINE_string(input_db, "", "The input db.");
|
||||
C10_DEFINE_string(input_db_type, "", "The input db type.");
|
||||
|
||||
using caffe2::db::DB;
|
||||
using caffe2::db::Cursor;
|
||||
@ -36,11 +36,11 @@ int main(int argc, char** argv) {
|
||||
|
||||
LOG(INFO) << "Opening DB...";
|
||||
auto in_db = caffe2::db::CreateDB(
|
||||
caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ);
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ);
|
||||
CAFFE_ENFORCE(
|
||||
in_db,
|
||||
"Cannot load input db " + caffe2::FLAGS_input_db + " of expected type " +
|
||||
caffe2::FLAGS_input_db_type);
|
||||
"Cannot load input db " + c10::FLAGS_input_db + " of expected type " +
|
||||
c10::FLAGS_input_db_type);
|
||||
auto cursor = in_db->NewCursor();
|
||||
LOG(INFO) << "DB opened.";
|
||||
|
||||
@ -48,8 +48,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Socket to talk to clients
|
||||
caffe2::ZmqSocket sender(ZMQ_PUSH);
|
||||
sender.Bind(caffe2::FLAGS_server);
|
||||
LOG(INFO) << "Server created at " << caffe2::FLAGS_server;
|
||||
sender.Bind(c10::FLAGS_server);
|
||||
LOG(INFO) << "Server created at " << c10::FLAGS_server;
|
||||
|
||||
while (1) {
|
||||
VLOG(1) << "Sending " << cursor->key();
|
||||
|
@ -7,6 +7,8 @@
|
||||
# one to link against a specific protobuf version.
|
||||
|
||||
# ---[ Configure macro file.
|
||||
set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
|
||||
set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
|
||||
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
|
||||
@ -28,6 +30,15 @@ if (${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
|
||||
target_compile_options(c10 PRIVATE "-fvisibility=hidden")
|
||||
endif()
|
||||
|
||||
# ---[ Dependency of c10
|
||||
if (${USE_GFLAGS})
|
||||
target_link_libraries(c10 PUBLIC gflags)
|
||||
endif()
|
||||
|
||||
if (${USE_GLOG})
|
||||
target_link_libraries(c10 PUBLIC glog::glog)
|
||||
endif()
|
||||
|
||||
target_include_directories(
|
||||
c10 PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
|
||||
|
@ -5,5 +5,7 @@
|
||||
// Do not include this file directly. Instead, include c10/macros/Macros.h.
|
||||
|
||||
#cmakedefine C10_BUILD_SHARED_LIBS
|
||||
#cmakedefine C10_USE_GLOG
|
||||
#cmakedefine C10_USE_GFLAGS
|
||||
|
||||
#endif // C10_MACROS_CMAKE_MACROS_H_
|
||||
|
29
c10/test/flags_test.cpp
Normal file
29
c10/test/flags_test.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "c10/util/Flags.h"
|
||||
|
||||
C10_DEFINE_bool(c10_flags_test_only_flag, true, "Only used in test.");
|
||||
|
||||
namespace c10 {
|
||||
|
||||
TEST(FlagsTest, TestGflagsCorrectness) {
|
||||
#ifdef C10_USE_GFLAGS
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
|
||||
// Change the c10 namespace and check global
|
||||
FLAGS_c10_flags_test_only_flag = false;
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, false);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, false);
|
||||
// Change global and check c10 namespace
|
||||
::FLAGS_c10_flags_test_only_flag = true;
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
|
||||
#else // C10_USE_GFLAGS
|
||||
std::cout << "Caffe2 is not built with gflags. Nothing to test here."
|
||||
<< std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace c10
|
212
c10/util/Flags.h
Normal file
212
c10/util/Flags.h
Normal file
@ -0,0 +1,212 @@
|
||||
#pragma once
|
||||
|
||||
/* Commandline flags support for C10.
|
||||
*
|
||||
* This is a portable commandline flags tool for c10, so we can optionally
|
||||
* choose to use gflags or a lightweighted custom implementation if gflags is
|
||||
* not possible on a certain platform. If you have gflags installed, set the
|
||||
* macro C10_USE_GFLAGS will seamlessly route everything to gflags.
|
||||
*
|
||||
* To define a flag foo of type bool default to true, do the following in the
|
||||
* *global* namespace:
|
||||
* C10_DEFINE_bool(foo, true, "An example.");
|
||||
*
|
||||
* To use it in another .cc file, you can use C10_DECLARE_* as follows:
|
||||
* C10_DECLARE_bool(foo);
|
||||
*
|
||||
* In both cases, you can then access the flag via c10::FLAGS_foo.
|
||||
*
|
||||
* It is recommended that you build with gflags. To learn more about the flags
|
||||
* usage, refer to the gflags page here:
|
||||
*
|
||||
* https://gflags.github.io/gflags/
|
||||
*
|
||||
* Note about Python users / devs: gflags is initiated from a C++ function
|
||||
* ParseCommandLineFlags, and is usually done in native binaries in the main
|
||||
* function. As Python does not have a modifiable main function, it is usually
|
||||
* difficult to change the flags after Python starts. Hence, it is recommended
|
||||
* that one sets the default value of the flags to one that's acceptable in
|
||||
* general - that will allow Python to run without wrong flags.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "c10/macros/Macros.h"
|
||||
#include "c10/util/Registry.h"
|
||||
|
||||
namespace c10 {
|
||||
/**
|
||||
* Sets the usage message when a commandline tool is called with "--help".
|
||||
*/
|
||||
C10_API void SetUsageMessage(const std::string& str);
|
||||
|
||||
/**
|
||||
* Returns the usage message for the commandline tool set by SetUsageMessage.
|
||||
*/
|
||||
C10_API const char* UsageMessage();
|
||||
|
||||
/**
|
||||
* Parses the commandline flags.
|
||||
*
|
||||
* This command parses all the commandline arguments passed in via pargc
|
||||
* and argv. Once it is finished, partc and argv will contain the remaining
|
||||
* commandline args that c10 does not deal with. Note that following
|
||||
* convention, argv[0] contains the binary name and is not parsed.
|
||||
*/
|
||||
C10_API bool ParseCommandLineFlags(int* pargc, char*** pargv);
|
||||
|
||||
/**
|
||||
* Checks if the commandline flags has already been passed.
|
||||
*/
|
||||
C10_API bool CommandLineFlagsHasBeenParsed();
|
||||
|
||||
} // namespace c10
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Below are gflags and non-gflags specific implementations.
|
||||
// In general, they define the following macros for one to declare (use
|
||||
// C10_DECLARE) or define (use C10_DEFINE) flags:
|
||||
// C10_{DECLARE,DEFINE}_{int,int64,double,bool,string}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef C10_USE_GFLAGS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin gflags section: most functions are basically rerouted to gflags.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags.
|
||||
// Using GFLAGS_GFLAGS_H_ to capture this change.
|
||||
#ifndef GFLAGS_GFLAGS_H_
|
||||
namespace gflags = google;
|
||||
#endif // GFLAGS_GFLAGS_H_
|
||||
|
||||
// Motivation about the gflags wrapper:
|
||||
// (1) We would need to make sure that the gflags version and the non-gflags
|
||||
// version of C10 are going to expose the same flags abstraction. One should
|
||||
// explicitly use c10::FLAGS_flag_name to access the flags.
|
||||
// (2) For flag names, it is recommended to start with c10_ to distinguish it
|
||||
// from regular gflags flags. For example, do
|
||||
// C10_DEFINE_BOOL(c10_my_flag, true, "An example");
|
||||
// to allow one to use c10::FLAGS_c10_my_flag.
|
||||
// (3) Gflags has a design issue that does not properly expose the global flags,
|
||||
// if one builds the library with -fvisibility=hidden. The current gflags (as of
|
||||
// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
|
||||
// counterparts. As a result, we will explciitly use C10_EXPORT to export the
|
||||
// flags defined in C10. This is done via a global reference, so the flag
|
||||
// itself is not duplicated - under the hood it is the same global gflags flag.
|
||||
#define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \
|
||||
DEFINE_##type(name, default_value, help_str); \
|
||||
namespace c10 { \
|
||||
C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \
|
||||
}
|
||||
|
||||
#define C10_DEFINE_int(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
|
||||
#define C10_DEFINE_int64(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
|
||||
#define C10_DEFINE_double(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str)
|
||||
#define C10_DEFINE_bool(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str)
|
||||
#define C10_DEFINE_string(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(string, ::fLS::clstring, name, default_value, help_str)
|
||||
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define C10_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \
|
||||
DECLARE_##type(name); \
|
||||
namespace c10 { \
|
||||
C10_IMPORT extern real_type& FLAGS_##name; \
|
||||
} // namespace c10
|
||||
|
||||
#define C10_DECLARE_int(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name)
|
||||
#define C10_DECLARE_int64(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name)
|
||||
#define C10_DECLARE_double(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(double, double, name)
|
||||
#define C10_DECLARE_bool(name) C10_GFLAGS_DECLARE_WRAPPER(bool, bool, name)
|
||||
#define C10_DECLARE_string(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// End gflags section.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#else // C10_USE_GFLAGS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin non-gflags section: providing equivalent functionality.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace c10 {
|
||||
|
||||
class C10_API C10FlagParser {
|
||||
public:
|
||||
C10FlagParser() {}
|
||||
bool success() {
|
||||
return success_;
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename T>
|
||||
bool Parse(const std::string& content, T* value);
|
||||
bool success_;
|
||||
};
|
||||
|
||||
C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&);
|
||||
|
||||
} // namespace c10
|
||||
|
||||
// The macros are defined outside the c10 namespace. In your code, you should
|
||||
// write the C10_DEFINE_* and C10_DECLARE_* macros outside any namespace
|
||||
// as well.
|
||||
|
||||
#define C10_DEFINE_typed_var(type, name, default_value, help_str) \
|
||||
namespace c10 { \
|
||||
C10_EXPORT type FLAGS_##name = default_value; \
|
||||
namespace { \
|
||||
class C10FlagParser_##name : public C10FlagParser { \
|
||||
public: \
|
||||
explicit C10FlagParser_##name(const std::string& content) { \
|
||||
success_ = C10FlagParser::Parse<type>(content, &FLAGS_##name); \
|
||||
} \
|
||||
}; \
|
||||
} \
|
||||
RegistererC10FlagsRegistry g_C10FlagsRegistry_##name( \
|
||||
#name, \
|
||||
C10FlagsRegistry(), \
|
||||
RegistererC10FlagsRegistry::DefaultCreator<C10FlagParser_##name>, \
|
||||
"(" #type ", default " #default_value ") " help_str); \
|
||||
}
|
||||
|
||||
#define C10_DEFINE_int(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(int, name, default_value, help_str)
|
||||
#define C10_DEFINE_int64(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(int64_t, name, default_value, help_str)
|
||||
#define C10_DEFINE_double(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(double, name, default_value, help_str)
|
||||
#define C10_DEFINE_bool(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(bool, name, default_value, help_str)
|
||||
#define C10_DEFINE_string(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(std::string, name, default_value, help_str)
|
||||
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define C10_DECLARE_typed_var(type, name) \
|
||||
namespace c10 { \
|
||||
C10_IMPORT extern type FLAGS_##name; \
|
||||
} // namespace c10
|
||||
|
||||
#define C10_DECLARE_int(name) C10_DECLARE_typed_var(int, name)
|
||||
#define C10_DECLARE_int64(name) C10_DECLARE_typed_var(int64_t, name)
|
||||
#define C10_DECLARE_double(name) C10_DECLARE_typed_var(double, name)
|
||||
#define C10_DECLARE_bool(name) C10_DECLARE_typed_var(bool, name)
|
||||
#define C10_DECLARE_string(name) C10_DECLARE_typed_var(std::string, name)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// End non-gflags section.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // C10_USE_GFLAGS
|
38
c10/util/flags_use_gflags.cpp
Normal file
38
c10/util/flags_use_gflags.cpp
Normal file
@ -0,0 +1,38 @@
|
||||
#include "c10/util/Flags.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "c10/macros/Macros.h"
|
||||
|
||||
#ifdef C10_USE_GFLAGS
|
||||
|
||||
namespace c10 {
|
||||
|
||||
using std::string;
|
||||
|
||||
C10_EXPORT void SetUsageMessage(const string& str) {
|
||||
if (UsageMessage() != nullptr) {
|
||||
// Usage message has already been set, so we will simply return.
|
||||
return;
|
||||
}
|
||||
gflags::SetUsageMessage(str);
|
||||
}
|
||||
|
||||
C10_EXPORT const char* UsageMessage() {
|
||||
return gflags::ProgramUsage();
|
||||
}
|
||||
|
||||
C10_EXPORT bool ParseCommandLineFlags(int* pargc, char*** pargv) {
|
||||
// In case there is no commandline flags to parse, simply return.
|
||||
if (*pargc == 0)
|
||||
return true;
|
||||
return gflags::ParseCommandLineFlags(pargc, pargv, true);
|
||||
}
|
||||
|
||||
C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
|
||||
// There is no way we query gflags right now, so we will simply return true.
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
#endif // C10_USE_GFLAGS
|
@ -1,66 +1,46 @@
|
||||
#include "caffe2/core/flags.h"
|
||||
#include "c10/util/Flags.h"
|
||||
#include "c10/macros/Macros.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/core/logging.h"
|
||||
#ifndef C10_USE_GFLAGS
|
||||
|
||||
namespace caffe2 {
|
||||
namespace c10 {
|
||||
|
||||
#ifdef CAFFE2_USE_GFLAGS
|
||||
using std::string;
|
||||
|
||||
C10_EXPORT void SetUsageMessage(const string& str) {
|
||||
if (UsageMessage() != nullptr) {
|
||||
// Usage message has already been set, so we will simply return.
|
||||
return;
|
||||
}
|
||||
gflags::SetUsageMessage(str);
|
||||
}
|
||||
|
||||
C10_EXPORT const char* UsageMessage() {
|
||||
return gflags::ProgramUsage();
|
||||
}
|
||||
|
||||
C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
if (*pargc == 0) return true;
|
||||
return gflags::ParseCommandLineFlags(pargc, pargv, true);
|
||||
}
|
||||
|
||||
C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
|
||||
// There is no way we query gflags right now, so we will simply return true.
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // CAFFE2_USE_GFLAGS
|
||||
|
||||
C10_DEFINE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&);
|
||||
C10_DEFINE_REGISTRY(C10FlagsRegistry, C10FlagParser, const string&);
|
||||
|
||||
namespace {
|
||||
static bool gCommandLineFlagsParsed = false;
|
||||
// Since caffe flags is going to be loaded before caffe logging, we would
|
||||
// Since flags is going to be loaded before logging, we would
|
||||
// need to have a stringstream to hold the messages instead of directly
|
||||
// using caffe logging.
|
||||
std::stringstream& GlobalInitStream() {
|
||||
static std::stringstream ss;
|
||||
return ss;
|
||||
}
|
||||
static string gUsageMessage = "(Usage message not set.)";
|
||||
}
|
||||
static const char* gUsageMessage = "(Usage message not set.)";
|
||||
} // namespace
|
||||
|
||||
C10_EXPORT void SetUsageMessage(const string& str) {
|
||||
gUsageMessage = str;
|
||||
}
|
||||
C10_EXPORT const char* UsageMessage() {
|
||||
return gUsageMessage.c_str();
|
||||
static string usage_message_safe_copy = str;
|
||||
gUsageMessage = usage_message_safe_copy.c_str();
|
||||
}
|
||||
|
||||
C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
if (*pargc == 0) return true;
|
||||
C10_EXPORT const char* UsageMessage() {
|
||||
return gUsageMessage;
|
||||
}
|
||||
|
||||
C10_EXPORT bool ParseCommandLineFlags(int* pargc, char*** pargv) {
|
||||
if (*pargc == 0)
|
||||
return true;
|
||||
char** argv = *pargv;
|
||||
bool success = true;
|
||||
GlobalInitStream() << "Parsing commandline arguments for caffe2."
|
||||
<< std::endl;
|
||||
GlobalInitStream() << "Parsing commandline arguments for c10." << std::endl;
|
||||
// write_head is the location we write the unused arguments to.
|
||||
int write_head = 1;
|
||||
for (int i = 1; i < *pargc; ++i) {
|
||||
@ -70,7 +50,7 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
// Print the help message, and quit.
|
||||
std::cout << UsageMessage() << std::endl;
|
||||
std::cout << "Arguments: " << std::endl;
|
||||
for (const auto& help_msg : Caffe2FlagsRegistry()->HelpMessage()) {
|
||||
for (const auto& help_msg : C10FlagsRegistry()->HelpMessage()) {
|
||||
std::cout << " " << help_msg.first << ": " << help_msg.second
|
||||
<< std::endl;
|
||||
}
|
||||
@ -79,7 +59,7 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
// If the arg does not start with "--", we will ignore it.
|
||||
if (arg[0] != '-' || arg[1] != '-') {
|
||||
GlobalInitStream()
|
||||
<< "Caffe2 flag: commandline argument does not match --name=var "
|
||||
<< "C10 flag: commandline argument does not match --name=var "
|
||||
"or --name format: "
|
||||
<< arg << ". Ignoring this argument." << std::endl;
|
||||
argv[write_head++] = argv[i];
|
||||
@ -96,8 +76,9 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
++i;
|
||||
if (i == *pargc) {
|
||||
GlobalInitStream()
|
||||
<< "Caffe2 flag: reached the last commandline argument, but "
|
||||
"I am expecting a value for " << arg;
|
||||
<< "C10 flag: reached the last commandline argument, but "
|
||||
"I am expecting a value for "
|
||||
<< arg;
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
@ -109,17 +90,16 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
|
||||
value = arg.substr(prefix_idx + 1, string::npos);
|
||||
}
|
||||
// If the flag is not registered, we will ignore it.
|
||||
if (!Caffe2FlagsRegistry()->Has(key)) {
|
||||
GlobalInitStream() << "Caffe2 flag: unrecognized commandline argument: "
|
||||
if (!C10FlagsRegistry()->Has(key)) {
|
||||
GlobalInitStream() << "C10 flag: unrecognized commandline argument: "
|
||||
<< arg << std::endl;
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
std::unique_ptr<Caffe2FlagParser> parser(
|
||||
Caffe2FlagsRegistry()->Create(key, value));
|
||||
std::unique_ptr<C10FlagParser> parser(
|
||||
C10FlagsRegistry()->Create(key, value));
|
||||
if (!parser->success()) {
|
||||
GlobalInitStream() << "Caffe2 flag: illegal argument: "
|
||||
<< arg << std::endl;
|
||||
GlobalInitStream() << "C10 flag: illegal argument: " << arg << std::endl;
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
@ -144,7 +124,7 @@ C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
|
||||
}
|
||||
|
||||
template <>
|
||||
C10_EXPORT bool Caffe2FlagParser::Parse<string>(
|
||||
C10_EXPORT bool C10FlagParser::Parse<string>(
|
||||
const string& content,
|
||||
string* value) {
|
||||
*value = content;
|
||||
@ -152,21 +132,19 @@ C10_EXPORT bool Caffe2FlagParser::Parse<string>(
|
||||
}
|
||||
|
||||
template <>
|
||||
C10_EXPORT bool Caffe2FlagParser::Parse<int>(
|
||||
const string& content,
|
||||
int* value) {
|
||||
C10_EXPORT bool C10FlagParser::Parse<int>(const string& content, int* value) {
|
||||
try {
|
||||
*value = std::atoi(content.c_str());
|
||||
return true;
|
||||
} catch(...) {
|
||||
GlobalInitStream() << "Caffe2 flag error: Cannot convert argument to int: "
|
||||
} catch (...) {
|
||||
GlobalInitStream() << "C10 flag error: Cannot convert argument to int: "
|
||||
<< content << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
C10_EXPORT bool Caffe2FlagParser::Parse<int64_t>(
|
||||
C10_EXPORT bool C10FlagParser::Parse<int64_t>(
|
||||
const string& content,
|
||||
int64_t* value) {
|
||||
try {
|
||||
@ -179,43 +157,41 @@ C10_EXPORT bool Caffe2FlagParser::Parse<int64_t>(
|
||||
#endif
|
||||
return true;
|
||||
} catch (...) {
|
||||
GlobalInitStream() << "Caffe2 flag error: Cannot convert argument to int: "
|
||||
GlobalInitStream() << "C10 flag error: Cannot convert argument to int: "
|
||||
<< content << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
C10_EXPORT bool Caffe2FlagParser::Parse<double>(
|
||||
C10_EXPORT bool C10FlagParser::Parse<double>(
|
||||
const string& content,
|
||||
double* value) {
|
||||
try {
|
||||
*value = std::atof(content.c_str());
|
||||
return true;
|
||||
} catch(...) {
|
||||
GlobalInitStream()
|
||||
<< "Caffe2 flag error: Cannot convert argument to double: "
|
||||
<< content << std::endl;
|
||||
} catch (...) {
|
||||
GlobalInitStream() << "C10 flag error: Cannot convert argument to double: "
|
||||
<< content << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
C10_EXPORT bool Caffe2FlagParser::Parse<bool>(
|
||||
const string& content,
|
||||
bool* value) {
|
||||
C10_EXPORT bool C10FlagParser::Parse<bool>(const string& content, bool* value) {
|
||||
if (content == "false" || content == "False" || content == "FALSE" ||
|
||||
content == "0") {
|
||||
*value = false;
|
||||
return true;
|
||||
} else if (content == "true" || content == "True" || content == "TRUE" ||
|
||||
} else if (
|
||||
content == "true" || content == "True" || content == "TRUE" ||
|
||||
content == "1") {
|
||||
*value = true;
|
||||
return true;
|
||||
} else {
|
||||
GlobalInitStream()
|
||||
<< "Caffe2 flag error: Cannot convert argument to bool: "
|
||||
<< content << std::endl
|
||||
<< "C10 flag error: Cannot convert argument to bool: " << content
|
||||
<< std::endl
|
||||
<< "Note that if you are passing in a bool flag, you need to "
|
||||
"explicitly specify it, like --arg=True or --arg True. Otherwise, "
|
||||
"the next argument may be inadvertently used as the argument, "
|
||||
@ -225,6 +201,6 @@ C10_EXPORT bool Caffe2FlagParser::Parse<bool>(
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CAFFE2_USE_GFLAGS
|
||||
} // namespace c10
|
||||
|
||||
} // namespace caffe2
|
||||
#endif // C10_USE_GFLAGS
|
@ -1 +1 @@
|
||||
0.8.2
|
||||
0.8.2
|
||||
|
@ -132,4 +132,4 @@ void DecoderThread::crop(int64 i, int64 src_width, int64 src_height, bool flip,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,4 +10,5 @@
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# limitations under the License.
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
@ -10,4 +10,5 @@
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# limitations under the License.
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
@ -9,7 +9,7 @@
|
||||
/**
|
||||
* A flag that specifies the nervana cubin path.
|
||||
*/
|
||||
CAFFE2_DECLARE_string(nervana_cubin_path);
|
||||
C10_DECLARE_string(nervana_cubin_path);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
CAFFE2_DECLARE_string(caffe_test_root);
|
||||
C10_DECLARE_string(caffe_test_root);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -3,11 +3,11 @@
|
||||
|
||||
#include "nervana_c_api.h"
|
||||
|
||||
|
||||
CAFFE2_DEFINE_string(nervana_cubin_path,
|
||||
"/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/cubin/",
|
||||
"The cubin path for nervana kernels. Currently defaulted "
|
||||
"to the internal fb deployment path.");
|
||||
C10_DEFINE_string(
|
||||
nervana_cubin_path,
|
||||
"/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/cubin/",
|
||||
"The cubin path for nervana kernels. Currently defaulted "
|
||||
"to the internal fb deployment path.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -13,11 +13,13 @@
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "nnpack.h"
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
caffe2_nnpack_num_threads, 1,
|
||||
C10_DEFINE_int(
|
||||
caffe2_nnpack_num_threads,
|
||||
1,
|
||||
"The number of nnpack pthreadpool threads.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
caffe2_nnpack_use_mkl_num_threads, true,
|
||||
C10_DEFINE_bool(
|
||||
caffe2_nnpack_use_mkl_num_threads,
|
||||
true,
|
||||
"If MKL is built, this sets nnpack to use the same number of threads as "
|
||||
"MKL does. This overrides caffe2_nnpack_num_threads if set.");
|
||||
|
||||
@ -74,8 +76,8 @@ pthreadpool_t nnpack_threadpool() {
|
||||
enum nnp_status nnpack_status = nnp_initialize();
|
||||
CAFFE_ENFORCE(
|
||||
nnpack_status == nnp_status_success, "NNPack is not supported here!");
|
||||
int num_threads = FLAGS_caffe2_nnpack_num_threads;
|
||||
if (FLAGS_caffe2_nnpack_use_mkl_num_threads) {
|
||||
int num_threads = c10::FLAGS_caffe2_nnpack_num_threads;
|
||||
if (c10::FLAGS_caffe2_nnpack_use_mkl_num_threads) {
|
||||
#ifdef CAFFE2_USE_MKL
|
||||
num_threads = mkl_get_max_threads();
|
||||
#else
|
||||
|
@ -4,10 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
caffe2_htrace_span_log_path,
|
||||
"",
|
||||
"Span log path for htrace");
|
||||
C10_DEFINE_string(caffe2_htrace_span_log_path, "", "Span log path for htrace");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -35,12 +32,12 @@ const string defaultHTraceConf(const string& net_name) {
|
||||
stream << HTRACE_SPAN_RECEIVER_KEY << "=local.file;";
|
||||
stream << HTRACE_SAMPLER_KEY << "=always;";
|
||||
|
||||
if (FLAGS_caffe2_htrace_span_log_path.empty()) {
|
||||
if (c10::FLAGS_caffe2_htrace_span_log_path.empty()) {
|
||||
stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "=/tmp/htrace_" << net_name_copy
|
||||
<< "_span_log_" << datetime << ";";
|
||||
} else {
|
||||
stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "="
|
||||
<< FLAGS_caffe2_htrace_span_log_path << ";";
|
||||
<< c10::FLAGS_caffe2_htrace_span_log_path << ";";
|
||||
}
|
||||
|
||||
return stream.str();
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
#include "caffe2/core/flags.h"
|
||||
|
||||
CAFFE2_DECLARE_string(caffe2_htrace_span_log_path);
|
||||
C10_DECLARE_string(caffe2_htrace_span_log_path);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
const string defaultHTraceConf(const string& net_name);
|
||||
const std::string defaultHTraceConf(const std::string& net_name);
|
||||
|
||||
} // namespace caffe2
|
||||
|
@ -2,12 +2,12 @@
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/typeid.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_report_cpu_memory_usage,
|
||||
false,
|
||||
"If set, print out detailed memory usage");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_zero_fill,
|
||||
true,
|
||||
"If set, do memory zerofilling when allocating on CPU");
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/numa.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||
CAFFE2_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
|
||||
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||
C10_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -58,10 +58,10 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
||||
CAFFE_ENFORCE(data);
|
||||
// move data to a thread's NUMA node
|
||||
NUMAMove(data, nbytes, GetCurrentNUMANode());
|
||||
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
if (c10::FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
memset(data, 0, nbytes);
|
||||
}
|
||||
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
reporter_.New(data, nbytes);
|
||||
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
|
||||
}
|
||||
@ -84,7 +84,7 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
||||
}
|
||||
|
||||
at::DeleterFnPtr raw_deleter() const override {
|
||||
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
return &ReportAndDelete;
|
||||
}
|
||||
return &Delete;
|
||||
|
@ -6,17 +6,17 @@
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_tensor_chunk_size,
|
||||
1000000,
|
||||
"Chunk size to split tensor data into");
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_max_tensor_serializer_threads,
|
||||
16,
|
||||
"Maximal number of threads that can be used for tensor serialization");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_serialize_fp16_as_bytes,
|
||||
false,
|
||||
"Serialize FLOAT16 tensors using byte_data field");
|
||||
@ -102,7 +102,7 @@ void TensorSerializer::SerializeWithChunkSize(
|
||||
if (chunk_size == kNoChunking) {
|
||||
chunk_size = tensor.size() + 1; // to account for empty tensors
|
||||
} else if (chunk_size == kDefaultChunkSize) {
|
||||
chunk_size = FLAGS_caffe2_tensor_chunk_size;
|
||||
chunk_size = c10::FLAGS_caffe2_tensor_chunk_size;
|
||||
}
|
||||
|
||||
auto processChunk = [&](int64_t chunkStart) {
|
||||
@ -129,7 +129,7 @@ void TensorSerializer::SerializeWithChunkSize(
|
||||
}
|
||||
};
|
||||
if (tensor.size() > chunk_size) {
|
||||
for (int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
|
||||
for (int i = 0; i < c10::FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
|
||||
futures.emplace_back(std::async(std::launch::async, task));
|
||||
}
|
||||
}
|
||||
@ -268,7 +268,7 @@ void TensorSerializer::Serialize(
|
||||
uniq_ptr.get());
|
||||
break;
|
||||
case TensorProto_DataType_FLOAT16: {
|
||||
if (FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
const int kValue = 1;
|
||||
CAFFE_ENFORCE_EQ(
|
||||
reinterpret_cast<const char*>(&kValue)[0],
|
||||
|
@ -13,9 +13,9 @@
|
||||
#include "caffe2/core/types.h"
|
||||
#include "caffe2/utils/simple_queue.h"
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_tensor_chunk_size);
|
||||
CAFFE2_DECLARE_int(caffe2_max_tensor_serializer_threads);
|
||||
CAFFE2_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
|
||||
C10_DECLARE_int(caffe2_tensor_chunk_size);
|
||||
C10_DECLARE_int(caffe2_max_tensor_serializer_threads);
|
||||
C10_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -18,9 +18,9 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_int64(caffe2_test_big_tensor_size, 100000000, "");
|
||||
CAFFE2_DECLARE_int(caffe2_tensor_chunk_size);
|
||||
CAFFE2_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
|
||||
C10_DEFINE_int64(caffe2_test_big_tensor_size, 100000000, "");
|
||||
C10_DECLARE_int(caffe2_tensor_chunk_size);
|
||||
C10_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
|
||||
|
||||
namespace caffe2 {
|
||||
using namespace ::caffe2::db;
|
||||
@ -455,8 +455,8 @@ TYPED_TEST(TensorCPUTest, NoLongerSharesAfterFreeMemory) {
|
||||
|
||||
TYPED_TEST(TensorCPUTest, KeepOnShrink) {
|
||||
// Set flags (defaults)
|
||||
FLAGS_caffe2_keep_on_shrink = true;
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
c10::FLAGS_caffe2_keep_on_shrink = true;
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
|
||||
vector<int> dims{2, 3, 5};
|
||||
Tensor tensor(dims, CPU);
|
||||
@ -486,8 +486,8 @@ TYPED_TEST(TensorCPUTest, KeepOnShrink) {
|
||||
|
||||
TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
|
||||
// Set flags
|
||||
FLAGS_caffe2_keep_on_shrink = true;
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);
|
||||
c10::FLAGS_caffe2_keep_on_shrink = true;
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);
|
||||
|
||||
vector<int> dims{1, 8, 8};
|
||||
Tensor tensor(dims, CPU);
|
||||
@ -507,7 +507,7 @@ TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
|
||||
//EXPECT_NE(ptr, new_ptr);
|
||||
|
||||
// Restore default flags
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUDeathTest, CannotAccessRawDataWhenEmpty) {
|
||||
@ -710,7 +710,7 @@ TEST(TensorTest, Half) {
|
||||
const TensorProto& tensor_proto = proto.tensor();
|
||||
EXPECT_EQ(
|
||||
tensor_proto.data_type(), TypeMetaToDataType(TypeMeta::Make<at::Half>()));
|
||||
if (FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
EXPECT_EQ(tensor_proto.byte_data().size(), 2 * kSize);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
auto value = tensor->mutable_data<at::Half>()[i].x;
|
||||
@ -850,8 +850,8 @@ TYPED_TEST_CASE(TypedTensorTest, TensorDataTypes);
|
||||
|
||||
TYPED_TEST(TypedTensorTest, BigTensorSerialization) {
|
||||
int64_t d1 = 2;
|
||||
int64_t d2 = FLAGS_caffe2_test_big_tensor_size
|
||||
? FLAGS_caffe2_test_big_tensor_size / d1
|
||||
int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
|
||||
? c10::FLAGS_caffe2_test_big_tensor_size / d1
|
||||
: static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
int64_t size = d1 * d2;
|
||||
string db_source = (string)std::tmpnam(nullptr);
|
||||
@ -1024,8 +1024,8 @@ TEST(ContentChunks, Serialization) {
|
||||
|
||||
TEST(CustomChunkSize, BigTensorSerialization) {
|
||||
int64_t d1 = 2;
|
||||
int64_t d2 = FLAGS_caffe2_test_big_tensor_size
|
||||
? FLAGS_caffe2_test_big_tensor_size / d1
|
||||
int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
|
||||
? c10::FLAGS_caffe2_test_big_tensor_size / d1
|
||||
: static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
int64_t size = d1 * d2;
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "caffe2/core/init.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cuda_full_device_control,
|
||||
false,
|
||||
"If true, assume all the cudaSetDevice and cudaGetDevice calls will be "
|
||||
@ -89,7 +89,7 @@ int NumCudaDevices() {
|
||||
|
||||
namespace {
|
||||
int gDefaultGPUID = 0;
|
||||
// Only used when FLAGS_caffe2_cuda_full_device_control is set true.
|
||||
// Only used when c10::FLAGS_caffe2_cuda_full_device_control is set true.
|
||||
thread_local int gCurrentDevice = -1;
|
||||
} // namespace
|
||||
|
||||
@ -108,7 +108,7 @@ void SetDefaultGPUID(const int deviceid) {
|
||||
int GetDefaultGPUID() { return gDefaultGPUID; }
|
||||
|
||||
int CaffeCudaGetDevice() {
|
||||
if (FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (c10::FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (gCurrentDevice < 0) {
|
||||
CUDA_ENFORCE(cudaGetDevice(&gCurrentDevice));
|
||||
}
|
||||
@ -121,7 +121,7 @@ int CaffeCudaGetDevice() {
|
||||
}
|
||||
|
||||
void CaffeCudaSetDevice(const int id) {
|
||||
if (FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (c10::FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (gCurrentDevice != id) {
|
||||
CUDA_ENFORCE(cudaSetDevice(id));
|
||||
}
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include <ATen/core/ATenCoreTest.h>
|
||||
#include <ATen/core/ArrayRef.h>
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "caffe2/core/tensor.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
caffe2_cuda_memory_pool,
|
||||
"",
|
||||
"Sets the memory pool used by caffe2. Possible values are "
|
||||
@ -29,30 +29,38 @@ CAFFE2_DEFINE_string(
|
||||
|
||||
// For description of CUB caching allocator configuration, see
|
||||
// https://nvlabs.github.io/cub/structcub_1_1_caching_device_allocator.html
|
||||
CAFFE2_DEFINE_int(caffe2_cub_bin_growth, 8,
|
||||
"If using cub as the memory allocator, sets the growth of bins "
|
||||
"used by the cub pool.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_min_bin, 3,
|
||||
"If using cub as the memory allocator, sets the min number of "
|
||||
"bins.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_max_bin, 10,
|
||||
"If using cub as the memory allocator, sets the max number of "
|
||||
"bins.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_max_managed_mb, 10 * 1024,
|
||||
"If using cub as the memory allocators, sets the maximum amount "
|
||||
"of memory managed in gigabytes");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_bin_growth,
|
||||
8,
|
||||
"If using cub as the memory allocator, sets the growth of bins "
|
||||
"used by the cub pool.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_min_bin,
|
||||
3,
|
||||
"If using cub as the memory allocator, sets the min number of "
|
||||
"bins.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_max_bin,
|
||||
10,
|
||||
"If using cub as the memory allocator, sets the max number of "
|
||||
"bins.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_max_managed_mb,
|
||||
10 * 1024,
|
||||
"If using cub as the memory allocators, sets the maximum amount "
|
||||
"of memory managed in gigabytes");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cub_print_allocation_events,
|
||||
false,
|
||||
"If true CachingDeviceAllocator will print allocation and deallocation "
|
||||
"events to stdout.");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_gpu_memory_tracking,
|
||||
false,
|
||||
"If set, logs changes in GPU memory allocations");
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_gpu_memory_report_interval_mb,
|
||||
128,
|
||||
"The threshold in MB on how frequently to report memory changes");
|
||||
@ -168,12 +176,12 @@ static void SetUpCub() {
|
||||
// Sets up the cub memory pool
|
||||
try {
|
||||
g_cub_allocator.reset(new cub::CachingDeviceAllocator(
|
||||
FLAGS_caffe2_cub_bin_growth,
|
||||
FLAGS_caffe2_cub_min_bin,
|
||||
FLAGS_caffe2_cub_max_bin,
|
||||
size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
c10::FLAGS_caffe2_cub_bin_growth,
|
||||
c10::FLAGS_caffe2_cub_min_bin,
|
||||
c10::FLAGS_caffe2_cub_max_bin,
|
||||
size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
false,
|
||||
FLAGS_caffe2_cub_print_allocation_events));
|
||||
c10::FLAGS_caffe2_cub_print_allocation_events));
|
||||
} catch (...) {
|
||||
CAFFE_THROW("Some error happened at cub initialization.");
|
||||
}
|
||||
@ -181,22 +189,23 @@ static void SetUpCub() {
|
||||
}
|
||||
|
||||
static void Caffe2SetCUDAMemoryPool() {
|
||||
if (FLAGS_caffe2_cuda_memory_pool == "" ||
|
||||
FLAGS_caffe2_cuda_memory_pool == "none") {
|
||||
if (c10::FLAGS_caffe2_cuda_memory_pool == "" ||
|
||||
c10::FLAGS_caffe2_cuda_memory_pool == "none") {
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "cnmem") {
|
||||
CAFFE_THROW("CNMEM is no longer used by Caffe2. Use cub instead. "
|
||||
"This error message may go away in the future.");
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "cub") {
|
||||
// Sets up cub.
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
|
||||
SetUpCub();
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "thc") {
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "thc") {
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::THC;
|
||||
g_thc_allocator.reset(new THCCachingAllocator());
|
||||
} else {
|
||||
CAFFE_THROW("Unrecognized cuda memory pool type: ",
|
||||
FLAGS_caffe2_cuda_memory_pool);
|
||||
CAFFE_THROW(
|
||||
"Unrecognized cuda memory pool type: ",
|
||||
c10::FLAGS_caffe2_cuda_memory_pool);
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,7 +283,7 @@ std::mutex& CUDAContext::mutex() {
|
||||
std::vector<long> CUDAContext::TotalMemoryByGpu() {
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_total_by_gpu_map;
|
||||
}
|
||||
@ -282,7 +291,7 @@ std::vector<long> CUDAContext::TotalMemoryByGpu() {
|
||||
std::vector<long> CUDAContext::MaxMemoryByGpu() {
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_max_by_gpu_map;
|
||||
}
|
||||
@ -295,7 +304,7 @@ void TrackMemoryAlloc(size_t nbytes) {
|
||||
max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
|
||||
g_total_mem += nbytes;
|
||||
if (g_total_mem - g_last_rep >
|
||||
FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
|
||||
long t = g_total_by_gpu_map[gpu];
|
||||
long max_t = g_max_by_gpu_map[gpu];
|
||||
@ -329,13 +338,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
static Caffe2CudaInitializerHelper g_cuda_initializer_;
|
||||
void* ptr = nullptr;
|
||||
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
TrackMemoryAlloc(nbytes);
|
||||
}
|
||||
switch (g_cuda_memory_pool_type) {
|
||||
case CudaMemoryPoolType::NONE:
|
||||
CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
}
|
||||
@ -345,13 +354,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
VLOG(2) << "CUB allocating pointer " << ptr << " on device "
|
||||
<< CaffeCudaGetDevice();
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
}
|
||||
return {ptr, ptr, &Delete, at::Device(CUDA)};
|
||||
case CudaMemoryPoolType::THC:
|
||||
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
}
|
||||
@ -368,7 +377,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
static void Delete(void* ptr) {
|
||||
// lock the mutex
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
auto sz_it = g_size_map.find(ptr);
|
||||
DCHECK(sz_it != g_size_map.end());
|
||||
auto aff_it = g_cuda_device_affiliation.find(ptr);
|
||||
@ -393,7 +402,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
<< cudaGetErrorString(error);
|
||||
}
|
||||
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
|
||||
}
|
||||
|
||||
@ -409,7 +418,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
}
|
||||
case CudaMemoryPoolType::THC: {
|
||||
CUDA_ENFORCE(g_thc_allocator->Free(ptr));
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
|
||||
}
|
||||
break;
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_cuda_full_device_control);
|
||||
C10_DECLARE_bool(caffe2_cuda_full_device_control);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -37,7 +37,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) {
|
||||
|
||||
TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
|
||||
// For a while, set full device control to be true.
|
||||
FLAGS_caffe2_cuda_full_device_control = true;
|
||||
c10::FLAGS_caffe2_cuda_full_device_control = true;
|
||||
for (int i = 0; i < NumCudaDevices(); ++i) {
|
||||
CaffeCudaSetDevice(i);
|
||||
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
||||
@ -46,7 +46,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
|
||||
CaffeCudaSetDevice(i);
|
||||
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
||||
}
|
||||
FLAGS_caffe2_cuda_full_device_control = false;
|
||||
c10::FLAGS_caffe2_cuda_full_device_control = false;
|
||||
}
|
||||
|
||||
TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
|
||||
|
@ -1,201 +1,4 @@
|
||||
/**
|
||||
* @file flags.h
|
||||
* @brief Commandline flags support for Caffe2.
|
||||
*
|
||||
* This is a portable commandline flags tool for caffe2, so we can optionally
|
||||
* choose to use gflags or a lightweighted custom implementation if gflags is
|
||||
* not possible on a certain platform. If you have gflags installed, set the
|
||||
* macro CAFFE2_USE_GFLAGS will seamlessly route everything to gflags.
|
||||
*
|
||||
* To define a flag foo of type bool default to true, do the following in the
|
||||
* *global* namespace:
|
||||
* CAFFE2_DEFINE_bool(foo, true, "An example.");
|
||||
*
|
||||
* To use it in another .cc file, you can use CAFFE2_DECLARE_* as follows:
|
||||
* CAFFE2_DECLARE_bool(foo);
|
||||
*
|
||||
* In both cases, you can then access the flag via caffe2::FLAGS_foo.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifndef CAFFE2_CORE_FLAGS_H_
|
||||
#define CAFFE2_CORE_FLAGS_H_
|
||||
|
||||
#include "c10/util/Registry.h"
|
||||
#include "c10/util/Flags.h"
|
||||
#include "caffe2/core/common.h"
|
||||
|
||||
namespace caffe2 {
|
||||
/**
|
||||
* Sets the usage message when a commandline tool is called with "--help".
|
||||
*/
|
||||
CAFFE2_API void SetUsageMessage(const string& str);
|
||||
|
||||
/**
|
||||
* Returns the usage message for the commandline tool set by SetUsageMessage.
|
||||
*/
|
||||
CAFFE2_API const char* UsageMessage();
|
||||
|
||||
/**
|
||||
* Parses the commandline flags.
|
||||
*
|
||||
* This command parses all the commandline arguments passed in via pargc
|
||||
* and argv. Once it is finished, partc and argv will contain the remaining
|
||||
* commandline args that caffe2 does not deal with. Note that following
|
||||
* convention, argv[0] contains the binary name and is not parsed.
|
||||
*/
|
||||
CAFFE2_API bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv);
|
||||
/**
|
||||
* Checks if the commandline flags has already been passed.
|
||||
*/
|
||||
CAFFE2_API bool CommandLineFlagsHasBeenParsed();
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Below are gflags and non-gflags specific implementations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef CAFFE2_USE_GFLAGS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin gflags section: most functions are basically rerouted to gflags.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags.
|
||||
// Using GFLAGS_GFLAGS_H_ to capture this change.
|
||||
#ifndef GFLAGS_GFLAGS_H_
|
||||
namespace gflags = google;
|
||||
#endif // GFLAGS_GFLAGS_H_
|
||||
|
||||
// Motivation about the gflags wrapper:
|
||||
// (1) We would need to make sure that the gflags version and the non-gflags
|
||||
// version of Caffe2 are going to expose the same flags abstraction. One should
|
||||
// explicitly use caffe2::FLAGS_flag_name to access the flags.
|
||||
// (2) For flag names, it is recommended to start with caffe2_ to distinguish it
|
||||
// from regular gflags flags. For example, do
|
||||
// CAFFE2_DEFINE_BOOL(caffe2_my_flag, true, "An example");
|
||||
// to allow one to use caffe2::FLAGS_caffe2_my_flag.
|
||||
// (3) Gflags has a design issue that does not properly expose the global flags,
|
||||
// if one builds the library with -fvisibility=hidden. The current gflags (as of
|
||||
// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
|
||||
// counterparts. As a result, we will explciitly use C10_EXPORT to export the
|
||||
// flags defined in Caffe2. This is done via a global reference, so the flag
|
||||
// itself is not duplicated - under the hood it is the same global gflags flag.
|
||||
#define CAFFE2_GFLAGS_DEF_WRAPPER( \
|
||||
type, real_type, name, default_value, help_str) \
|
||||
DEFINE_##type(name, default_value, help_str); \
|
||||
namespace caffe2 { \
|
||||
C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \
|
||||
}
|
||||
|
||||
#define CAFFE2_DEFINE_int(name, default_value, help_str) \
|
||||
CAFFE2_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_int64(name, default_value, help_str) \
|
||||
CAFFE2_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_double(name, default_value, help_str) \
|
||||
CAFFE2_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_bool(name, default_value, help_str) \
|
||||
CAFFE2_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_string(name, default_value, help_str) \
|
||||
CAFFE2_GFLAGS_DEF_WRAPPER( \
|
||||
string, ::fLS::clstring, name, default_value, help_str)
|
||||
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \
|
||||
DECLARE_##type(name); \
|
||||
namespace caffe2 { \
|
||||
C10_IMPORT extern real_type& FLAGS_##name; \
|
||||
} // namespace caffe2
|
||||
|
||||
#define CAFFE2_DECLARE_int(name) \
|
||||
CAFFE2_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name)
|
||||
#define CAFFE2_DECLARE_int64(name) \
|
||||
CAFFE2_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name)
|
||||
#define CAFFE2_DECLARE_double(name) \
|
||||
CAFFE2_GFLAGS_DECLARE_WRAPPER(double, double, name)
|
||||
#define CAFFE2_DECLARE_bool(name) \
|
||||
CAFFE2_GFLAGS_DECLARE_WRAPPER(bool, bool, name)
|
||||
#define CAFFE2_DECLARE_string(name) \
|
||||
CAFFE2_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// End gflags section.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#else // CAFFE2_USE_GFLAGS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin non-gflags section: providing equivalent functionality.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class CAFFE2_API Caffe2FlagParser {
|
||||
public:
|
||||
Caffe2FlagParser() {}
|
||||
bool success() { return success_; }
|
||||
|
||||
protected:
|
||||
template <typename T>
|
||||
bool Parse(const string& content, T* value);
|
||||
bool success_;
|
||||
};
|
||||
|
||||
C10_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&);
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
// The macros are defined outside the caffe2 namespace. In your code, you should
|
||||
// write the CAFFE2_DEFINE_* and CAFFE2_DECLARE_* macros outside any namespace
|
||||
// as well.
|
||||
|
||||
#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str) \
|
||||
namespace caffe2 { \
|
||||
C10_EXPORT type FLAGS_##name = default_value; \
|
||||
namespace { \
|
||||
class Caffe2FlagParser_##name : public Caffe2FlagParser { \
|
||||
public: \
|
||||
explicit Caffe2FlagParser_##name(const string& content) { \
|
||||
success_ = Caffe2FlagParser::Parse<type>(content, &FLAGS_##name); \
|
||||
} \
|
||||
}; \
|
||||
} \
|
||||
RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name( \
|
||||
#name, \
|
||||
Caffe2FlagsRegistry(), \
|
||||
RegistererCaffe2FlagsRegistry::DefaultCreator<Caffe2FlagParser_##name>, \
|
||||
"(" #type ", default " #default_value ") " help_str); \
|
||||
}
|
||||
|
||||
#define CAFFE2_DEFINE_int(name, default_value, help_str) \
|
||||
CAFFE2_DEFINE_typed_var(int, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_int64(name, default_value, help_str) \
|
||||
CAFFE2_DEFINE_typed_var(int64_t, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_double(name, default_value, help_str) \
|
||||
CAFFE2_DEFINE_typed_var(double, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_bool(name, default_value, help_str) \
|
||||
CAFFE2_DEFINE_typed_var(bool, name, default_value, help_str)
|
||||
#define CAFFE2_DEFINE_string(name, default_value, help_str) \
|
||||
CAFFE2_DEFINE_typed_var(string, name, default_value, help_str)
|
||||
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define CAFFE2_DECLARE_typed_var(type, name) \
|
||||
namespace caffe2 { \
|
||||
C10_IMPORT extern type FLAGS_##name; \
|
||||
} // namespace caffe2
|
||||
|
||||
#define CAFFE2_DECLARE_int(name) CAFFE2_DECLARE_typed_var(int, name)
|
||||
#define CAFFE2_DECLARE_int64(name) CAFFE2_DECLARE_typed_var(int64_t, name)
|
||||
#define CAFFE2_DECLARE_double(name) CAFFE2_DECLARE_typed_var(double, name)
|
||||
#define CAFFE2_DECLARE_bool(name) CAFFE2_DECLARE_typed_var(bool, name)
|
||||
#define CAFFE2_DECLARE_string(name) CAFFE2_DECLARE_typed_var(string, name)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// End non-gflags section.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // CAFFE2_USE_GFLAGS
|
||||
|
||||
#endif // CAFFE2_CORE_FLAGS_H_
|
||||
|
@ -1,27 +0,0 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "caffe2/core/macros.h"
|
||||
#include "caffe2/core/flags.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_flags_test_only_flag, true, "Only used in test.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
TEST(FlagsTest, TestGflagsCorrectness) {
|
||||
#ifdef CAFFE2_USE_GFLAGS
|
||||
EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true);
|
||||
// Change the caffe2 namespace and check global
|
||||
FLAGS_caffe2_flags_test_only_flag = false;
|
||||
EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, false);
|
||||
EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, false);
|
||||
// Change global and check caffe2 namespace
|
||||
::FLAGS_caffe2_flags_test_only_flag = true;
|
||||
EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true);
|
||||
#else // CAFFE2_USE_GFLAGS
|
||||
LOG(INFO) << "Caffe2 is not built with gflags. Nothing to test here.";
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
@ -8,16 +8,17 @@
|
||||
#include "caffe2/core/init.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_hip_full_device_control,
|
||||
false,
|
||||
"If true, assume all the hipSetDevice and hipGetDevice calls will be "
|
||||
"controlled by Caffe2, and non-Caffe2 code will ensure that the entry and "
|
||||
"exit point has the same cuda device. Under the hood, Caffe2 will use "
|
||||
"thread local variables to cache the device, in order to speed up set and "
|
||||
"get device calls. This is an experimental feature that may have non "
|
||||
"trivial side effects, so use it with care and only enable it if you are "
|
||||
"absolutely sure. Also, this flag should not be changed after the program "
|
||||
"initializes.");
|
||||
C10_DEFINE_bool(
|
||||
caffe2_hip_full_device_control,
|
||||
false,
|
||||
"If true, assume all the hipSetDevice and hipGetDevice calls will be "
|
||||
"controlled by Caffe2, and non-Caffe2 code will ensure that the entry and "
|
||||
"exit point has the same cuda device. Under the hood, Caffe2 will use "
|
||||
"thread local variables to cache the device, in order to speed up set and "
|
||||
"get device calls. This is an experimental feature that may have non "
|
||||
"trivial side effects, so use it with care and only enable it if you are "
|
||||
"absolutely sure. Also, this flag should not be changed after the program "
|
||||
"initializes.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -88,7 +89,7 @@ int NumHipDevices()
|
||||
|
||||
namespace {
|
||||
int gDefaultGPUID = 0;
|
||||
// Only used when FLAGS_caffe2_hip_full_device_control is set true.
|
||||
// Only used when c10::FLAGS_caffe2_hip_full_device_control is set true.
|
||||
thread_local int gCurrentDevice = -1;
|
||||
} // namespace
|
||||
|
||||
@ -108,36 +109,28 @@ int GetDefaultGPUID() { return gDefaultGPUID; }
|
||||
|
||||
int CaffeHipGetDevice()
|
||||
{
|
||||
if(FLAGS_caffe2_hip_full_device_control)
|
||||
{
|
||||
if(gCurrentDevice < 0)
|
||||
{
|
||||
HIP_ENFORCE(hipGetDevice(&gCurrentDevice));
|
||||
}
|
||||
return gCurrentDevice;
|
||||
}
|
||||
else
|
||||
{
|
||||
int gpu_id = 0;
|
||||
HIP_ENFORCE(hipGetDevice(&gpu_id));
|
||||
return gpu_id;
|
||||
if (c10::FLAGS_caffe2_hip_full_device_control) {
|
||||
if (gCurrentDevice < 0) {
|
||||
HIP_ENFORCE(hipGetDevice(&gCurrentDevice));
|
||||
}
|
||||
return gCurrentDevice;
|
||||
} else {
|
||||
int gpu_id = 0;
|
||||
HIP_ENFORCE(hipGetDevice(&gpu_id));
|
||||
return gpu_id;
|
||||
}
|
||||
}
|
||||
|
||||
void CaffeHipSetDevice(const int id)
|
||||
{
|
||||
if(FLAGS_caffe2_hip_full_device_control)
|
||||
{
|
||||
if(gCurrentDevice != id)
|
||||
{
|
||||
HIP_ENFORCE(hipSetDevice(id));
|
||||
}
|
||||
gCurrentDevice = id;
|
||||
}
|
||||
else
|
||||
{
|
||||
HIP_ENFORCE(hipSetDevice(id));
|
||||
if (c10::FLAGS_caffe2_hip_full_device_control) {
|
||||
if (gCurrentDevice != id) {
|
||||
HIP_ENFORCE(hipSetDevice(id));
|
||||
}
|
||||
gCurrentDevice = id;
|
||||
} else {
|
||||
HIP_ENFORCE(hipSetDevice(id));
|
||||
}
|
||||
}
|
||||
|
||||
int GetGPUIDForPointer(const void* ptr)
|
||||
|
@ -15,40 +15,48 @@
|
||||
#include "caffe2/core/tensor.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(caffe2_hip_memory_pool,
|
||||
"",
|
||||
"Sets the memory pool used by caffe2. Possible values are "
|
||||
"none, cnmen and cub.");
|
||||
C10_DEFINE_string(
|
||||
caffe2_hip_memory_pool,
|
||||
"",
|
||||
"Sets the memory pool used by caffe2. Possible values are "
|
||||
"none, cnmen and cub.");
|
||||
|
||||
// For description of CUB caching allocator configuration, see
|
||||
// https://nvlabs.github.io/cub/structcub_1_1_caching_device_allocator.html
|
||||
CAFFE2_DEFINE_int(caffe2_cub_bin_growth,
|
||||
8,
|
||||
"If using cub as the memory allocator, sets the growth of bins "
|
||||
"used by the cub pool.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_min_bin,
|
||||
3,
|
||||
"If using cub as the memory allocator, sets the min number of "
|
||||
"bins.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_max_bin,
|
||||
10,
|
||||
"If using cub as the memory allocator, sets the max number of "
|
||||
"bins.");
|
||||
CAFFE2_DEFINE_int(caffe2_cub_max_managed_mb,
|
||||
10 * 1024,
|
||||
"If using cub as the memory allocators, sets the maximum amount "
|
||||
"of memory managed in gigabytes");
|
||||
CAFFE2_DEFINE_bool(caffe2_cub_print_allocation_events,
|
||||
false,
|
||||
"If true CachingDeviceAllocator will print allocation and deallocation "
|
||||
"events to stdout.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_bin_growth,
|
||||
8,
|
||||
"If using cub as the memory allocator, sets the growth of bins "
|
||||
"used by the cub pool.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_min_bin,
|
||||
3,
|
||||
"If using cub as the memory allocator, sets the min number of "
|
||||
"bins.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_max_bin,
|
||||
10,
|
||||
"If using cub as the memory allocator, sets the max number of "
|
||||
"bins.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_cub_max_managed_mb,
|
||||
10 * 1024,
|
||||
"If using cub as the memory allocators, sets the maximum amount "
|
||||
"of memory managed in gigabytes");
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cub_print_allocation_events,
|
||||
false,
|
||||
"If true CachingDeviceAllocator will print allocation and deallocation "
|
||||
"events to stdout.");
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_gpu_memory_tracking,
|
||||
false,
|
||||
"If set, logs changes in GPU memory allocations");
|
||||
CAFFE2_DEFINE_int(caffe2_gpu_memory_report_interval_mb,
|
||||
128,
|
||||
"The threshold in MB on how frequently to report memory changes");
|
||||
C10_DEFINE_bool(
|
||||
caffe2_gpu_memory_tracking,
|
||||
false,
|
||||
"If set, logs changes in GPU memory allocations");
|
||||
C10_DEFINE_int(
|
||||
caffe2_gpu_memory_report_interval_mb,
|
||||
128,
|
||||
"The threshold in MB on how frequently to report memory changes");
|
||||
|
||||
namespace at {
|
||||
|
||||
@ -157,13 +165,13 @@ static void SetUpCub()
|
||||
// Sets up the cub memory pool
|
||||
try
|
||||
{
|
||||
g_cub_allocator.reset(
|
||||
new cub::CachingDeviceAllocator(FLAGS_caffe2_cub_bin_growth,
|
||||
FLAGS_caffe2_cub_min_bin,
|
||||
FLAGS_caffe2_cub_max_bin,
|
||||
size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
false,
|
||||
FLAGS_caffe2_cub_print_allocation_events));
|
||||
g_cub_allocator.reset(new cub::CachingDeviceAllocator(
|
||||
c10::FLAGS_caffe2_cub_bin_growth,
|
||||
c10::FLAGS_caffe2_cub_min_bin,
|
||||
c10::FLAGS_caffe2_cub_max_bin,
|
||||
size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
false,
|
||||
c10::FLAGS_caffe2_cub_print_allocation_events));
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
@ -174,30 +182,25 @@ static void SetUpCub()
|
||||
|
||||
static void Caffe2SetHIPMemoryPool()
|
||||
{
|
||||
if(FLAGS_caffe2_hip_memory_pool == "" || FLAGS_caffe2_hip_memory_pool == "none")
|
||||
{
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::NONE;
|
||||
}
|
||||
else if(FLAGS_caffe2_hip_memory_pool == "cnmem")
|
||||
{
|
||||
CAFFE_THROW("CNMEM is no longer used by Caffe2. Use cub instead. "
|
||||
"This error message may go away in the future.");
|
||||
}
|
||||
else if(FLAGS_caffe2_hip_memory_pool == "cub")
|
||||
{
|
||||
// Sets up cub.
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::CUB;
|
||||
SetUpCub();
|
||||
}
|
||||
else if(FLAGS_caffe2_hip_memory_pool == "thc")
|
||||
{
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::THC;
|
||||
g_thc_allocator.reset(new THCCachingAllocator());
|
||||
}
|
||||
else
|
||||
{
|
||||
CAFFE_THROW("Unrecognized HIP memory pool type: ", FLAGS_caffe2_hip_memory_pool);
|
||||
}
|
||||
if (c10::FLAGS_caffe2_hip_memory_pool == "" ||
|
||||
c10::FLAGS_caffe2_hip_memory_pool == "none") {
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::NONE;
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "cnmem") {
|
||||
CAFFE_THROW(
|
||||
"CNMEM is no longer used by Caffe2. Use cub instead. "
|
||||
"This error message may go away in the future.");
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "cub") {
|
||||
// Sets up cub.
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::CUB;
|
||||
SetUpCub();
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "thc") {
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::THC;
|
||||
g_thc_allocator.reset(new THCCachingAllocator());
|
||||
} else {
|
||||
CAFFE_THROW(
|
||||
"Unrecognized HIP memory pool type: ",
|
||||
c10::FLAGS_caffe2_hip_memory_pool);
|
||||
}
|
||||
}
|
||||
|
||||
// An initialization function that sets the CPU side to use pinned cpu
|
||||
@ -281,16 +284,18 @@ std::mutex& HIPContext::mutex()
|
||||
std::vector<long> HIPContext::TotalMemoryByGpu()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||
CAFFE_ENFORCE(FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_total_by_gpu_map;
|
||||
}
|
||||
|
||||
std::vector<long> HIPContext::MaxMemoryByGpu()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||
CAFFE_ENFORCE(FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_max_by_gpu_map;
|
||||
}
|
||||
|
||||
@ -301,27 +306,22 @@ void TrackMemoryAlloc(size_t nbytes)
|
||||
g_total_by_gpu_map[this_gpu] += nbytes;
|
||||
g_max_by_gpu_map[this_gpu] = std::max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
|
||||
g_total_mem += nbytes;
|
||||
if(g_total_mem - g_last_rep > FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024)
|
||||
{
|
||||
for(int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++)
|
||||
{
|
||||
long t = g_total_by_gpu_map[gpu];
|
||||
long max_t = g_max_by_gpu_map[gpu];
|
||||
if(max_t > 0)
|
||||
{
|
||||
if(max_t != t)
|
||||
{
|
||||
LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB"
|
||||
<< " (max: " << max_t / 1024 / 1024 << " MB)";
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB";
|
||||
}
|
||||
}
|
||||
if (g_total_mem - g_last_rep >
|
||||
c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
|
||||
long t = g_total_by_gpu_map[gpu];
|
||||
long max_t = g_max_by_gpu_map[gpu];
|
||||
if (max_t > 0) {
|
||||
if (max_t != t) {
|
||||
LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB"
|
||||
<< " (max: " << max_t / 1024 / 1024 << " MB)";
|
||||
} else {
|
||||
LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB";
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "Total: " << g_total_mem / 1024 / 1024 << " MB";
|
||||
g_last_rep = g_total_mem;
|
||||
}
|
||||
LOG(INFO) << "Total: " << g_total_mem / 1024 / 1024 << " MB";
|
||||
g_last_rep = g_total_mem;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -340,14 +340,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
static Caffe2HipInitializerHelper g_hip_initializer_;
|
||||
void* ptr = nullptr;
|
||||
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
TrackMemoryAlloc(nbytes);
|
||||
}
|
||||
switch (g_hip_memory_pool_type) {
|
||||
case HipMemoryPoolType::NONE:
|
||||
HIP_ENFORCE(hipMalloc(&ptr, nbytes));
|
||||
if(FLAGS_caffe2_gpu_memory_tracking)
|
||||
{
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
}
|
||||
@ -356,15 +355,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
VLOG(2) << "CUB allocating pointer " << ptr << " on device " << CaffeHipGetDevice();
|
||||
if(FLAGS_caffe2_gpu_memory_tracking)
|
||||
{
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
}
|
||||
return {ptr, ptr, &Delete, at::Device(HIP)};
|
||||
case HipMemoryPoolType::THC:
|
||||
HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||
if (FLAGS_caffe2_gpu_memory_tracking)
|
||||
{
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
}
|
||||
@ -403,8 +400,7 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
<< hipGetErrorString(error);
|
||||
}
|
||||
|
||||
if(FLAGS_caffe2_gpu_memory_tracking)
|
||||
{
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_hip_device_affiliation.erase(g_hip_device_affiliation.find(ptr));
|
||||
}
|
||||
|
||||
|
@ -29,17 +29,20 @@
|
||||
|
||||
#include "caffe2/core/hip/context_hip.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||
C10_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_async_dag_use_multiple_streams, false, "Use multiple streams per thread");
|
||||
C10_DEFINE_bool(
|
||||
caffe2_async_dag_use_multiple_streams,
|
||||
false,
|
||||
"Use multiple streams per thread");
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
C10_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
C10_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
|
||||
C10_DECLARE_int(caffe2_streams_per_gpu);
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
C10_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -97,8 +100,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option)
|
||||
}
|
||||
do {
|
||||
stream_id = stream_counters_[gpu_id]++;
|
||||
stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
|
||||
} while (FLAGS_caffe2_net_async_check_stream_status &&
|
||||
stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
|
||||
} while (c10::FLAGS_caffe2_net_async_check_stream_status &&
|
||||
!HIPContext::IsStreamFree(device_option, stream_id));
|
||||
}
|
||||
return stream_id;
|
||||
@ -117,9 +120,9 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
"None of the parent is recorded for an event.");
|
||||
|
||||
int stream_id = 0;
|
||||
if(FLAGS_caffe2_async_dag_use_multiple_streams)
|
||||
{
|
||||
stream_id = stream(operator_nodes_[source_idx].operator_->event().GetDeviceOption());
|
||||
if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
stream_id = stream(
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption());
|
||||
}
|
||||
|
||||
std::vector<const Event*> parent_events;
|
||||
@ -133,13 +136,13 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
|
||||
}
|
||||
|
||||
if(FLAGS_caffe2_dag_net_collect_stats)
|
||||
{
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(stats_[device_option.device_type()],
|
||||
task_wait_time_us,
|
||||
task_timers_[chain_id]->MicroSeconds());
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
stats_[device_option.device_type()],
|
||||
task_wait_time_us,
|
||||
task_timers_[chain_id]->MicroSeconds());
|
||||
}
|
||||
|
||||
// We've waited on all our parent indices.
|
||||
@ -160,20 +163,19 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
}
|
||||
|
||||
const auto& sink_idx = chain.back();
|
||||
if(success && FLAGS_caffe2_net_async_finish_chain)
|
||||
{
|
||||
operator_nodes_[sink_idx].operator_->event().Finish();
|
||||
if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
|
||||
operator_nodes_[sink_idx].operator_->event().Finish();
|
||||
}
|
||||
CAFFE_ENFORCE(!eventRecorded_[sink_idx], "An event for ", sink_idx, " should not be recorded.");
|
||||
eventRecorded_[sink_idx] = 1;
|
||||
|
||||
if(FLAGS_caffe2_dag_net_collect_stats)
|
||||
{
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(stats_[device_option.device_type()],
|
||||
task_time_to_scheduled_us,
|
||||
task_timers_[chain_id]->MicroSeconds());
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
stats_[device_option.device_type()],
|
||||
task_time_to_scheduled_us,
|
||||
task_timers_[chain_id]->MicroSeconds());
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
#include "caffe2/core/net_async_base.h"
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_threads_per_hip_gpu,
|
||||
1,
|
||||
"Number of CPU threads per AMD HIP GPU");
|
||||
@ -26,17 +26,19 @@ namespace caffe2 {
|
||||
std::shared_ptr<TaskThreadPool>
|
||||
GetAsyncNetHIPThreadPool(int hip_gpu_id, int pool_size, bool create_new) {
|
||||
// For GPU, use per device thread pools of predefined constant size
|
||||
if (pool_size != FLAGS_caffe2_threads_per_hip_gpu) {
|
||||
if (pool_size != c10::FLAGS_caffe2_threads_per_hip_gpu) {
|
||||
LOG(INFO) << "Overriding AMD HIP GPU pool size: using "
|
||||
<< FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
|
||||
}
|
||||
static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (create_new) {
|
||||
LOG(INFO) << "Created new AMD HIP GPU pool, size: " << FLAGS_caffe2_threads_per_hip_gpu
|
||||
LOG(INFO) << "Created new AMD HIP GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu
|
||||
<< "; GPU id: " << hip_gpu_id;
|
||||
return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
|
||||
return std::make_shared<TaskThreadPool>(
|
||||
c10::FLAGS_caffe2_threads_per_hip_gpu);
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(pool_mutex);
|
||||
|
||||
@ -46,9 +48,10 @@ GetAsyncNetHIPThreadPool(int hip_gpu_id, int pool_size, bool create_new) {
|
||||
}
|
||||
if (!shared_pool) {
|
||||
LOG(INFO) << "Created shared AMD HIP GPU pool, size: "
|
||||
<< FLAGS_caffe2_threads_per_hip_gpu << "; GPU id: " << hip_gpu_id;
|
||||
shared_pool =
|
||||
std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu
|
||||
<< "; GPU id: " << hip_gpu_id;
|
||||
shared_pool = std::make_shared<TaskThreadPool>(
|
||||
c10::FLAGS_caffe2_threads_per_hip_gpu);
|
||||
pools[hip_gpu_id] = shared_pool;
|
||||
}
|
||||
return shared_pool;
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_version,
|
||||
false,
|
||||
"Print Caffe2 version and build options on startup");
|
||||
@ -53,7 +53,7 @@ bool GlobalInit(int* pargc, char*** pargv) {
|
||||
if (init_state == internal::State::Initialized) {
|
||||
VLOG(1) << "GlobalInit has already been called: re-parsing gflags only.";
|
||||
// Reparse command line flags
|
||||
success &= ParseCaffeCommandLineFlags(pargc, pargv);
|
||||
success &= c10::ParseCommandLineFlags(pargc, pargv);
|
||||
UpdateLoggingLevelsFromFlags();
|
||||
} else if (init_state == internal::State::Uninitialized) {
|
||||
init_state = internal::State::Initializing;
|
||||
@ -68,10 +68,10 @@ bool GlobalInit(int* pargc, char*** pargv) {
|
||||
->RunRegisteredEarlyInitFunctions(pargc, pargv);
|
||||
CAFFE_ENFORCE(
|
||||
success, "Failed to run some early init functions for caffe2.");
|
||||
success &= ParseCaffeCommandLineFlags(pargc, pargv);
|
||||
success &= c10::ParseCommandLineFlags(pargc, pargv);
|
||||
success &= InitCaffeLogging(pargc, *pargv);
|
||||
// Print out the current build version. Using cerr as LOG(INFO) might be off
|
||||
if (FLAGS_caffe2_version) {
|
||||
if (c10::FLAGS_caffe2_version) {
|
||||
std::cerr << "Caffe2 build configuration: " << std::endl;
|
||||
for (const auto& it : GetBuildOptions()) {
|
||||
std::cerr << " " << std::setw(25) << std::left << it.first << " : "
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/utils/cpuid.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_quit_on_unsupported_cpu_feature,
|
||||
false,
|
||||
"If set, when Caffe2 is built with a CPU feature (like avx2) but the "
|
||||
@ -23,7 +23,7 @@ static void QuitIfFeatureUnsupported(
|
||||
"on your machine, such as SIGILL 'illegal instructions' on Linux. "
|
||||
"As a result Caffe2 will preemptively quit. Please install or "
|
||||
"build a Caffe2 binary with the feature turned off.";
|
||||
if (FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
|
||||
if (c10::FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
|
||||
LOG(FATAL) << err_string;
|
||||
} else {
|
||||
LOG(ERROR) << err_string;
|
||||
|
@ -12,11 +12,12 @@
|
||||
|
||||
#include "caffe2/core/init.h"
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
caffe2_omp_num_threads, 0,
|
||||
C10_DEFINE_int(
|
||||
caffe2_omp_num_threads,
|
||||
0,
|
||||
"The number of openmp threads. 0 to use default value. "
|
||||
"Does not have effect if OpenMP is disabled.");
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_mkl_num_threads,
|
||||
0,
|
||||
"The number of mkl threads. 0 to use default value. If set, "
|
||||
@ -34,9 +35,10 @@ bool Caffe2SetOpenMPThreads(int*, char***) {
|
||||
omp_set_num_threads(1);
|
||||
}
|
||||
|
||||
if (FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting omp_num_threads to " << FLAGS_caffe2_omp_num_threads;
|
||||
omp_set_num_threads(FLAGS_caffe2_omp_num_threads);
|
||||
if (c10::FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting omp_num_threads to "
|
||||
<< c10::FLAGS_caffe2_omp_num_threads;
|
||||
omp_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
|
||||
}
|
||||
VLOG(1) << "Caffe2 running with " << omp_get_max_threads() << " OMP threads";
|
||||
return true;
|
||||
@ -54,16 +56,18 @@ bool Caffe2SetMKLThreads(int*, char***) {
|
||||
}
|
||||
|
||||
// If caffe2_omp_num_threads is set, we use that for MKL as well.
|
||||
if (FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_omp_num_threads
|
||||
if (c10::FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to "
|
||||
<< c10::FLAGS_caffe2_omp_num_threads
|
||||
<< " as inherited from omp_num_threads.";
|
||||
mkl_set_num_threads(FLAGS_caffe2_omp_num_threads);
|
||||
mkl_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
|
||||
}
|
||||
|
||||
// Override omp_num_threads if mkl_num_threads is set.
|
||||
if (FLAGS_caffe2_mkl_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_mkl_num_threads;
|
||||
mkl_set_num_threads(FLAGS_caffe2_mkl_num_threads);
|
||||
if (c10::FLAGS_caffe2_mkl_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to "
|
||||
<< c10::FLAGS_caffe2_mkl_num_threads;
|
||||
mkl_set_num_threads(c10::FLAGS_caffe2_mkl_num_threads);
|
||||
}
|
||||
VLOG(1) << "Caffe2 running with " << mkl_get_max_threads() << " MKL threads";
|
||||
return true;
|
||||
|
@ -8,9 +8,11 @@
|
||||
|
||||
// Common code that we use regardless of whether we use glog or not.
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_use_fatal_for_enforce, false,
|
||||
"If set true, when CAFFE_ENFORCE is not met, abort instead "
|
||||
"of throwing an exception.");
|
||||
C10_DEFINE_bool(
|
||||
caffe2_use_fatal_for_enforce,
|
||||
false,
|
||||
"If set true, when CAFFE_ENFORCE is not met, abort instead "
|
||||
"of throwing an exception.");
|
||||
|
||||
namespace caffe2 {
|
||||
namespace enforce_detail {
|
||||
@ -52,7 +54,7 @@ void ThrowEnforceNotMet(
|
||||
const std::string& msg,
|
||||
const void* caller) {
|
||||
at::Error e(file, line, condition, msg, (*GetFetchStackTrace())(), caller);
|
||||
if (FLAGS_caffe2_use_fatal_for_enforce) {
|
||||
if (c10::FLAGS_caffe2_use_fatal_for_enforce) {
|
||||
LOG(FATAL) << e.msg_stack()[0];
|
||||
}
|
||||
throw e;
|
||||
@ -60,8 +62,7 @@ void ThrowEnforceNotMet(
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
#ifdef CAFFE2_USE_GFLAGS
|
||||
#ifdef C10_USE_GFLAGS
|
||||
// When GLOG depends on GFLAGS, these variables are being defined in GLOG
|
||||
// directly via the GFLAGS definition, so we will use DECLARE_* to declare
|
||||
// them, and use them in Caffe2.
|
||||
@ -74,11 +75,10 @@ DECLARE_bool(logtostderr);
|
||||
#elif !CAFFE2_MOBILE && !__APPLE__ && !defined(_WIN32)
|
||||
// Declare our own versions of the above flags so we don't error out
|
||||
// when they are passed into Caffe2.
|
||||
CAFFE2_DEFINE_int(minloglevel, 0, "Equivalent to glog minloglevel");
|
||||
CAFFE2_DEFINE_int(v, 0, "Equivalent to glog verbose");
|
||||
CAFFE2_DEFINE_bool(logtostderr, false, "Equivalent to glog logtostderr");
|
||||
#endif // CAFFE2_USE_GFLAGS
|
||||
|
||||
C10_DEFINE_int(minloglevel, 0, "Equivalent to glog minloglevel");
|
||||
C10_DEFINE_int(v, 0, "Equivalent to glog verbose");
|
||||
C10_DEFINE_bool(logtostderr, false, "Equivalent to glog logtostderr");
|
||||
#endif // C10_USE_GFLAGS
|
||||
|
||||
#ifdef CAFFE2_USE_GOOGLE_GLOG
|
||||
|
||||
@ -92,9 +92,10 @@ using fLI::FLAGS_v;
|
||||
using fLB::FLAGS_logtostderr;
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
CAFFE2_DEFINE_int(caffe2_log_level, google::GLOG_ERROR,
|
||||
"The minimum log level that caffe2 will output.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_log_level,
|
||||
google::GLOG_ERROR,
|
||||
"The minimum log level that caffe2 will output.");
|
||||
|
||||
// Google glog's api does not have an external function that allows one to check
|
||||
// if glog is initialized or not. It does have an internal function - so we are
|
||||
@ -128,14 +129,14 @@ bool InitCaffeLogging(int* argc, char** argv) {
|
||||
void UpdateLoggingLevelsFromFlags() {
|
||||
// If caffe2_log_level is set and is lower than the min log level by glog,
|
||||
// we will transfer the caffe2_log_level setting to glog to override that.
|
||||
FLAGS_minloglevel = std::min(FLAGS_caffe2_log_level, FLAGS_minloglevel);
|
||||
FLAGS_minloglevel = std::min(c10::FLAGS_caffe2_log_level, FLAGS_minloglevel);
|
||||
// If caffe2_log_level is explicitly set, let's also turn on logtostderr.
|
||||
if (FLAGS_caffe2_log_level < google::GLOG_ERROR) {
|
||||
if (c10::FLAGS_caffe2_log_level < google::GLOG_ERROR) {
|
||||
FLAGS_logtostderr = 1;
|
||||
}
|
||||
// Also, transfer the caffe2_log_level verbose setting to glog.
|
||||
if (FLAGS_caffe2_log_level < 0) {
|
||||
FLAGS_v = std::min(FLAGS_v, -FLAGS_caffe2_log_level);
|
||||
if (c10::FLAGS_caffe2_log_level < 0) {
|
||||
FLAGS_v = std::min(FLAGS_v, -c10::FLAGS_caffe2_log_level);
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,24 +152,27 @@ void ShowLogInfoToStderr() {
|
||||
#include <android/log.h>
|
||||
#endif // ANDROID
|
||||
|
||||
CAFFE2_DEFINE_int(caffe2_log_level, ERROR,
|
||||
"The minimum log level that caffe2 will output.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_log_level,
|
||||
ERROR,
|
||||
"The minimum log level that caffe2 will output.");
|
||||
|
||||
namespace caffe2 {
|
||||
bool InitCaffeLogging(int* argc, char** argv) {
|
||||
// When doing InitCaffeLogging, we will assume that caffe's flag paser has
|
||||
// already finished.
|
||||
if (*argc == 0) return true;
|
||||
if (!CommandLineFlagsHasBeenParsed()) {
|
||||
if (!c10::CommandLineFlagsHasBeenParsed()) {
|
||||
std::cerr << "InitCaffeLogging() has to be called after "
|
||||
"ParseCaffeCommandLineFlags. Modify your program to make sure "
|
||||
"of this." << std::endl;
|
||||
"c10::ParseCommandLineFlags. Modify your program to make sure "
|
||||
"of this."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (FLAGS_caffe2_log_level > FATAL) {
|
||||
if (c10::FLAGS_caffe2_log_level > FATAL) {
|
||||
std::cerr << "The log level of Caffe2 has to be no larger than FATAL("
|
||||
<< FATAL << "). Capping it to FATAL." << std::endl;
|
||||
FLAGS_caffe2_log_level = FATAL;
|
||||
c10::FLAGS_caffe2_log_level = FATAL;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -177,12 +181,12 @@ void UpdateLoggingLevelsFromFlags() {
|
||||
}
|
||||
|
||||
void ShowLogInfoToStderr() {
|
||||
FLAGS_caffe2_log_level = INFO;
|
||||
c10::FLAGS_caffe2_log_level = INFO;
|
||||
}
|
||||
|
||||
MessageLogger::MessageLogger(const char *file, int line, int severity)
|
||||
: severity_(severity) {
|
||||
if (severity_ < FLAGS_caffe2_log_level) {
|
||||
if (severity_ < c10::FLAGS_caffe2_log_level) {
|
||||
// Nothing needs to be logged.
|
||||
return;
|
||||
}
|
||||
@ -212,7 +216,7 @@ MessageLogger::MessageLogger(const char *file, int line, int severity)
|
||||
|
||||
// Output the contents of the stream to the proper channel on destruction.
|
||||
MessageLogger::~MessageLogger() {
|
||||
if (severity_ < FLAGS_caffe2_log_level) {
|
||||
if (severity_ < c10::FLAGS_caffe2_log_level) {
|
||||
// Nothing needs to be logged.
|
||||
return;
|
||||
}
|
||||
@ -235,7 +239,7 @@ MessageLogger::~MessageLogger() {
|
||||
__android_log_print(ANDROID_LOG_FATAL, tag_, "terminating.\n");
|
||||
}
|
||||
#else // !ANDROID
|
||||
if (severity_ >= FLAGS_caffe2_log_level) {
|
||||
if (severity_ >= c10::FLAGS_caffe2_log_level) {
|
||||
// If not building on Android, log all output to std::cerr.
|
||||
std::cerr << stream_.str();
|
||||
// Simulating the glog default behavior: if the severity is above INFO,
|
||||
|
@ -27,8 +27,8 @@
|
||||
#include "caffe2/core/logging_is_not_google_glog.h"
|
||||
#endif // CAFFE2_USE_GOOGLE_GLOG
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_log_level);
|
||||
CAFFE2_DECLARE_bool(caffe2_use_fatal_for_enforce);
|
||||
C10_DECLARE_int(caffe2_log_level);
|
||||
C10_DECLARE_bool(caffe2_use_fatal_for_enforce);
|
||||
|
||||
namespace caffe2 {
|
||||
// Functions that we use for initialization.
|
||||
|
@ -12,14 +12,14 @@ TEST(LoggingTest, TestEnforceTrue) {
|
||||
|
||||
TEST(LoggingTest, TestEnforceFalse) {
|
||||
bool kFalse = false;
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
try {
|
||||
CAFFE_ENFORCE(false, "This throws.");
|
||||
// This should never be triggered.
|
||||
ADD_FAILURE();
|
||||
} catch (const EnforceNotMet&) {
|
||||
}
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
}
|
||||
|
||||
TEST(LoggingTest, TestEnforceEquals) {
|
||||
@ -76,9 +76,9 @@ TEST(LoggingTest, Join) {
|
||||
#if GTEST_HAS_DEATH_TEST
|
||||
TEST(LoggingDeathTest, TestEnforceUsingFatal) {
|
||||
bool kTrue = true;
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
EXPECT_DEATH(CAFFE_ENFORCE(false, "This goes fatal."), "");
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -38,7 +38,6 @@ static_assert(
|
||||
#cmakedefine CAFFE2_USE_CUDNN
|
||||
#cmakedefine CAFFE2_USE_EIGEN_FOR_BLAS
|
||||
#cmakedefine CAFFE2_USE_FBCODE
|
||||
#cmakedefine CAFFE2_USE_GFLAGS
|
||||
#cmakedefine CAFFE2_USE_GOOGLE_GLOG
|
||||
#cmakedefine CAFFE2_USE_LITE_PROTO
|
||||
#cmakedefine CAFFE2_USE_MKL
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
caffe2_override_executor,
|
||||
"",
|
||||
"Comma-separated list of executor overrides");
|
||||
@ -115,7 +115,7 @@ const std::unordered_map<std::string, std::string>& defaultOverrides() {
|
||||
}
|
||||
|
||||
void checkExecutorOverride(std::string& net_type) {
|
||||
auto executors = caffe2::split(',', FLAGS_caffe2_override_executor);
|
||||
auto executors = caffe2::split(',', c10::FLAGS_caffe2_override_executor);
|
||||
CAFFE_ENFORCE(
|
||||
executors.size() % 2 == 0, "Invalid override executors flag value");
|
||||
std::unordered_map<std::string, std::string> overrides;
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "caffe2/utils/simple_queue.h"
|
||||
#include "caffe2/utils/thread_pool.h"
|
||||
|
||||
CAFFE2_DECLARE_string(caffe2_override_executor);
|
||||
C10_DECLARE_string(caffe2_override_executor);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -5,50 +5,50 @@
|
||||
#include "caffe2/core/timer.h"
|
||||
|
||||
// experimental support for multiple streams per worker per GPU
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_streams_per_gpu,
|
||||
1,
|
||||
"Number of streams per worker per GPU"
|
||||
" to use in GPU thread pool (experimental)");
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
C10_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_finish_chain,
|
||||
false,
|
||||
"Wait for chain to finish");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_always_schedule_child,
|
||||
false,
|
||||
"Always schedule child chains from parent chain");
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_net_async_max_gpus,
|
||||
16,
|
||||
"Max number of GPUs allowed in net async executor");
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_net_async_max_numa_nodes,
|
||||
8,
|
||||
"Max number of NUMA nodes allowed in net async executor");
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_net_async_cpu_pool_size,
|
||||
0,
|
||||
"Number of threads in CPU pool by default");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_check_stream_status,
|
||||
false,
|
||||
"Select next non-busy stream");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_use_single_pool,
|
||||
false,
|
||||
"Use single thread pool for all devices");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_use_per_net_pools,
|
||||
false,
|
||||
"Use per net thread pools");
|
||||
@ -152,14 +152,14 @@ TaskThreadPool* AsyncNetBase::pool(const DeviceOption& device_option) {
|
||||
}
|
||||
CAFFE_ENFORCE_LT(
|
||||
numa_node_id,
|
||||
FLAGS_caffe2_net_async_max_numa_nodes,
|
||||
c10::FLAGS_caffe2_net_async_max_numa_nodes,
|
||||
"Invalid NUMA node id: ",
|
||||
numa_node_id);
|
||||
return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_);
|
||||
} else if (device_option.device_type() == PROTO_CUDA) {
|
||||
auto gpu_id = device_option.cuda_gpu_id();
|
||||
CAFFE_ENFORCE(
|
||||
gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
|
||||
gpu_id >= 0 && gpu_id < c10::FLAGS_caffe2_net_async_max_gpus,
|
||||
"Invalid GPU id: " + caffe2::to_string(gpu_id));
|
||||
return poolGetter(gpu_pools_, PROTO_CUDA, gpu_id, num_workers_);
|
||||
} else {
|
||||
@ -432,8 +432,8 @@ GetAsyncNetCPUThreadPool(int numa_node_id, int pool_size, bool create_new) {
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (pool_size <= 0) {
|
||||
if (FLAGS_caffe2_net_async_cpu_pool_size > 0) {
|
||||
pool_size = FLAGS_caffe2_net_async_cpu_pool_size;
|
||||
if (c10::FLAGS_caffe2_net_async_cpu_pool_size > 0) {
|
||||
pool_size = c10::FLAGS_caffe2_net_async_cpu_pool_size;
|
||||
LOG(INFO) << "Using default CPU pool size: " << pool_size
|
||||
<< "; NUMA node id: " << numa_node_id;
|
||||
} else {
|
||||
@ -495,12 +495,12 @@ void AsyncNetBase::computeExecutionModeFlags() {
|
||||
use_per_net_pools_ = true;
|
||||
is_blocking_ = true;
|
||||
} else {
|
||||
streams_per_gpu_ = FLAGS_caffe2_streams_per_gpu;
|
||||
finish_chain_ = FLAGS_caffe2_net_async_finish_chain;
|
||||
always_schedule_child_ = FLAGS_caffe2_net_async_always_schedule_child;
|
||||
check_stream_status_ = FLAGS_caffe2_net_async_check_stream_status;
|
||||
use_single_pool_ = FLAGS_caffe2_net_async_use_single_pool;
|
||||
use_per_net_pools_ = FLAGS_caffe2_net_async_use_per_net_pools;
|
||||
streams_per_gpu_ = c10::FLAGS_caffe2_streams_per_gpu;
|
||||
finish_chain_ = c10::FLAGS_caffe2_net_async_finish_chain;
|
||||
always_schedule_child_ = c10::FLAGS_caffe2_net_async_always_schedule_child;
|
||||
check_stream_status_ = c10::FLAGS_caffe2_net_async_check_stream_status;
|
||||
use_single_pool_ = c10::FLAGS_caffe2_net_async_use_single_pool;
|
||||
use_per_net_pools_ = c10::FLAGS_caffe2_net_async_use_per_net_pools;
|
||||
is_blocking_ = false;
|
||||
}
|
||||
}
|
||||
|
@ -13,15 +13,15 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/thread_pool.h"
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_always_schedule_child);
|
||||
CAFFE2_DECLARE_int(caffe2_net_async_max_gpus);
|
||||
CAFFE2_DECLARE_int(caffe2_net_async_max_numa_nodes);
|
||||
CAFFE2_DECLARE_int(caffe2_net_async_cpu_pool_size);
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_use_single_pool);
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_use_per_net_pools);
|
||||
C10_DECLARE_int(caffe2_streams_per_gpu);
|
||||
C10_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
C10_DECLARE_bool(caffe2_net_async_always_schedule_child);
|
||||
C10_DECLARE_int(caffe2_net_async_max_gpus);
|
||||
C10_DECLARE_int(caffe2_net_async_max_numa_nodes);
|
||||
C10_DECLARE_int(caffe2_net_async_cpu_pool_size);
|
||||
C10_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
C10_DECLARE_bool(caffe2_net_async_use_single_pool);
|
||||
C10_DECLARE_bool(caffe2_net_async_use_per_net_pools);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -17,20 +17,20 @@
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||
C10_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_async_dag_use_multiple_streams,
|
||||
false,
|
||||
"Use multiple streams per thread");
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
C10_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
C10_DECLARE_bool(caffe2_net_async_finish_chain);
|
||||
|
||||
CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
|
||||
C10_DECLARE_int(caffe2_streams_per_gpu);
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
C10_DECLARE_bool(caffe2_net_async_check_stream_status);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -48,7 +48,7 @@ constexpr Color kWaitColor = 0x0066FF33; // green
|
||||
class ProfiledRange {
|
||||
public:
|
||||
ProfiledRange(const OperatorDef& def, Color color) {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
if (!c10::FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxEventAttributes_t eventAttrib = {0};
|
||||
@ -63,7 +63,7 @@ class ProfiledRange {
|
||||
}
|
||||
|
||||
~ProfiledRange() {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
if (!c10::FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxRangeEnd(range_);
|
||||
@ -119,8 +119,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option) {
|
||||
}
|
||||
do {
|
||||
stream_id = stream_counters_[gpu_id]++;
|
||||
stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
|
||||
} while (FLAGS_caffe2_net_async_check_stream_status &&
|
||||
stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
|
||||
} while (c10::FLAGS_caffe2_net_async_check_stream_status &&
|
||||
!CUDAContext::IsStreamFree(device_option, stream_id));
|
||||
}
|
||||
return stream_id;
|
||||
@ -141,7 +141,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
"None of the parent is recorded for an event.");
|
||||
|
||||
int stream_id = 0;
|
||||
if (FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
stream_id = stream(
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption());
|
||||
}
|
||||
@ -158,7 +158,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
|
||||
}
|
||||
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
@ -184,7 +184,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
}
|
||||
|
||||
const auto& sink_idx = chain.back();
|
||||
if (success && FLAGS_caffe2_net_async_finish_chain) {
|
||||
if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
|
||||
operator_nodes_[sink_idx].operator_->event().Finish();
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
@ -194,7 +194,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
" should not be recorded.");
|
||||
eventRecorded_[sink_idx] = 1;
|
||||
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
||||
CAFFE2_DEFINE_int(caffe2_threads_per_gpu, 1, "Number of CPU threads per GPU");
|
||||
C10_DEFINE_int(caffe2_threads_per_gpu, 1, "Number of CPU threads per GPU");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -11,17 +11,17 @@ C10_REGISTER_CREATOR(ThreadPoolRegistry, CUDA, GetAsyncNetGPUThreadPool);
|
||||
std::shared_ptr<TaskThreadPool>
|
||||
GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
|
||||
// For GPU, use per device thread pools of predefined constant size
|
||||
if (pool_size != FLAGS_caffe2_threads_per_gpu) {
|
||||
if (pool_size != c10::FLAGS_caffe2_threads_per_gpu) {
|
||||
LOG(INFO) << "Overriding GPU pool size: using "
|
||||
<< FLAGS_caffe2_threads_per_gpu << " threads per GPU";
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << " threads per GPU";
|
||||
}
|
||||
static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (create_new) {
|
||||
LOG(INFO) << "Created new GPU pool, size: " << FLAGS_caffe2_threads_per_gpu
|
||||
<< "; GPU id: " << gpu_id;
|
||||
return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
|
||||
LOG(INFO) << "Created new GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
return std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(pool_mutex);
|
||||
|
||||
@ -31,9 +31,9 @@ GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
|
||||
}
|
||||
if (!shared_pool) {
|
||||
LOG(INFO) << "Created shared GPU pool, size: "
|
||||
<< FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
shared_pool =
|
||||
std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
|
||||
std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
|
||||
pools[gpu_id] = shared_pool;
|
||||
}
|
||||
return shared_pool;
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/timer.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
C10_DECLARE_bool(caffe2_dag_net_collect_stats);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -36,7 +36,7 @@ bool AsyncPollingNet::DoRunAsync() {
|
||||
|
||||
Timer timer;
|
||||
bool success = pollAndSchedule();
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(stats_[PROTO_CPU], poll_time_ms, timer.MilliSeconds());
|
||||
}
|
||||
if (!success) {
|
||||
@ -49,14 +49,14 @@ bool AsyncPollingNet::DoRunAsync() {
|
||||
}
|
||||
|
||||
void AsyncPollingNet::schedule(int task_id) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[task_id]->Start();
|
||||
}
|
||||
const auto& device_option = event(task_id).GetDeviceOption();
|
||||
pool(device_option)->run([this, task_id, device_option]() {
|
||||
int stream_id = stream(task_id);
|
||||
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(
|
||||
stats_[device_option.device_type()],
|
||||
task_pool_wait_time_us,
|
||||
@ -64,7 +64,7 @@ void AsyncPollingNet::schedule(int task_id) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
Timer run_time;
|
||||
run(task_id, stream_id);
|
||||
CAFFE_EVENT(
|
||||
@ -104,7 +104,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
std::unordered_set<int> next_tasks;
|
||||
updated_tasks.reserve(current_tasks.size());
|
||||
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
timer.Start();
|
||||
}
|
||||
if (has_chain_failed_) {
|
||||
@ -121,7 +121,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
|
||||
if (prev_status != status_[task_id]) {
|
||||
updated_tasks.insert(task_id);
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
updateTaskStats(task_id);
|
||||
}
|
||||
}
|
||||
@ -130,7 +130,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
next_tasks.insert(task_id);
|
||||
}
|
||||
}
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(
|
||||
stats_[PROTO_CPU], poll_status_update_time_us, timer.MicroSeconds());
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include "caffe2/core/net_async_tracing.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_net_async_optimize_polling,
|
||||
true,
|
||||
"Use event callbacks whenever possible instead of polling");
|
||||
@ -93,7 +93,7 @@ void AsyncSchedulingNet::schedule(int task_id, bool run_inline) {
|
||||
if (!canSchedule(parent_id, child_id)) {
|
||||
// we can't schedule a child because of this parent,
|
||||
// check if parent supports callback
|
||||
if (FLAGS_caffe2_net_async_optimize_polling &&
|
||||
if (c10::FLAGS_caffe2_net_async_optimize_polling &&
|
||||
parent_event.SupportsCallback()) {
|
||||
parents_with_callback.push_back(parent_id);
|
||||
} else {
|
||||
|
@ -19,21 +19,21 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
caffe2_net_async_tracing_filepath,
|
||||
"/tmp",
|
||||
"Path to save tracing information");
|
||||
|
||||
CAFFE2_DEFINE_string(
|
||||
C10_DEFINE_string(
|
||||
caffe2_net_async_names_to_trace,
|
||||
"",
|
||||
"Comma-separated list of net names to trace");
|
||||
|
||||
CAFFE2_DEFINE_int(caffe2_net_async_tracing_nth, 100, "Trace every Nth batch");
|
||||
C10_DEFINE_int(caffe2_net_async_tracing_nth, 100, "Trace every Nth batch");
|
||||
|
||||
// For every Nth iterations, we will dump the tracing results to a json file
|
||||
// The file is appended with the iteration number.
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_net_async_tracing_dumping_nth,
|
||||
10000,
|
||||
"Dump profiling result file every Nth batch");
|
||||
@ -56,7 +56,7 @@ int getCounterForNetName(const std::string& net_name) {
|
||||
Tracer::Tracer(const NetBase* net, const std::string& net_name)
|
||||
: net_(net), filename_(net_name), iter_(0) {
|
||||
std::replace(filename_.begin(), filename_.end(), '/', '_');
|
||||
filename_ = FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
|
||||
filename_ = c10::FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
|
||||
+"_id_" + caffe2::to_string(getCounterForNetName(net_name));
|
||||
timer_.Start();
|
||||
}
|
||||
@ -375,7 +375,8 @@ int getUniqueShardId(const OperatorDef& op_def) {
|
||||
}
|
||||
|
||||
bool isTraceableNetName(const std::string& net_name) {
|
||||
auto tracing_nets = caffe2::split(',', FLAGS_caffe2_net_async_names_to_trace);
|
||||
auto tracing_nets =
|
||||
caffe2::split(',', c10::FLAGS_caffe2_net_async_names_to_trace);
|
||||
return !net_name.empty() &&
|
||||
std::find(tracing_nets.begin(), tracing_nets.end(), net_name) !=
|
||||
tracing_nets.end();
|
||||
@ -403,10 +404,10 @@ bool startIter(const std::shared_ptr<Tracer>& tracer) {
|
||||
return false;
|
||||
}
|
||||
auto iter = tracer->bumpIter();
|
||||
auto is_enabled = iter % FLAGS_caffe2_net_async_tracing_nth == 0;
|
||||
auto is_enabled = iter % c10::FLAGS_caffe2_net_async_tracing_nth == 0;
|
||||
tracer->setEnabled(is_enabled);
|
||||
if (iter % FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
|
||||
int dumping_iter = iter / FLAGS_caffe2_net_async_tracing_dumping_nth;
|
||||
if (iter % c10::FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
|
||||
int dumping_iter = iter / c10::FLAGS_caffe2_net_async_tracing_dumping_nth;
|
||||
tracer->dumpTracingResultAndClearEvents(caffe2::to_string(dumping_iter));
|
||||
}
|
||||
return is_enabled;
|
||||
|
@ -22,9 +22,9 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/timer.h"
|
||||
|
||||
CAFFE2_DECLARE_string(caffe2_net_async_tracing_filepath);
|
||||
CAFFE2_DECLARE_string(caffe2_net_async_names_to_trace);
|
||||
CAFFE2_DECLARE_int(caffe2_net_async_tracing_nth);
|
||||
C10_DECLARE_string(caffe2_net_async_tracing_filepath);
|
||||
C10_DECLARE_string(caffe2_net_async_names_to_trace);
|
||||
C10_DECLARE_int(caffe2_net_async_tracing_nth);
|
||||
|
||||
namespace caffe2 {
|
||||
namespace tracing {
|
||||
|
@ -13,12 +13,12 @@
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/utils/thread_name.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_disable_chaining,
|
||||
false,
|
||||
"Disable chaining logic (some latent multi-device issues).");
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_dag_net_collect_stats,
|
||||
false,
|
||||
"Collect time stats in DAG net");
|
||||
@ -35,7 +35,7 @@ DAGNetBase::DAGNetBase(
|
||||
operator_nodes_ = dag_utils::prepareOperatorNodes(net_def, ws);
|
||||
|
||||
execution_chains_ =
|
||||
(FLAGS_caffe2_disable_chaining
|
||||
(c10::FLAGS_caffe2_disable_chaining
|
||||
? dag_utils::singleChains(operator_nodes_)
|
||||
: dag_utils::computeChains(operator_nodes_));
|
||||
|
||||
@ -127,7 +127,7 @@ bool DAGNetBase::DoRunAsync() {
|
||||
}
|
||||
// Kickstart the job queue.
|
||||
for (auto& value : initial_frontier_) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[value]->Start();
|
||||
}
|
||||
job_queue_->Push(value);
|
||||
@ -213,7 +213,7 @@ void DAGNetBase::WorkerFunction() {
|
||||
if (!job_queue_->Pop(&idx)) {
|
||||
return;
|
||||
}
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
auto device_option =
|
||||
operator_nodes_[idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
@ -295,7 +295,7 @@ void DAGNetBase::WorkerFunction() {
|
||||
// Can't do this inline because it can race with another thread
|
||||
// calling NoMoreJobs(). So the lock needs to be held on push.
|
||||
for (const auto idx : chains_to_queue) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[idx]->Start();
|
||||
}
|
||||
job_queue_->Push(idx);
|
||||
@ -329,7 +329,7 @@ bool DAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
auto device_option =
|
||||
operator_nodes_[chain_id].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/scope_guard.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_disable_chaining);
|
||||
C10_DECLARE_bool(caffe2_disable_chaining);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -79,9 +79,9 @@ void checkChainingAndRun(
|
||||
CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_simple_net_benchmark_run_whole_net,
|
||||
true,
|
||||
"If false, whole net passes won't be performed");
|
||||
@ -108,7 +108,7 @@ vector<float> SimpleNet::TEST_Benchmark(
|
||||
".");
|
||||
Timer timer;
|
||||
auto millis = timer.MilliSeconds();
|
||||
if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
for (int i = 0; i < main_runs; ++i) {
|
||||
CAFFE_ENFORCE(Run(), "Main run ", i, " has failed.");
|
||||
}
|
||||
@ -270,7 +270,7 @@ vector<float> SimpleNet::TEST_Benchmark(
|
||||
for (size_t i = 0; i < time_per_op.size(); ++i) {
|
||||
time_per_op[i] /= main_runs;
|
||||
}
|
||||
if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
time_per_op.insert(time_per_op.begin(), millis / main_runs);
|
||||
}
|
||||
return time_per_op;
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_disable_chaining);
|
||||
C10_DECLARE_bool(caffe2_disable_chaining);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -150,9 +150,9 @@ void checkChainingAndRun(
|
||||
::google::protobuf::TextFormat::ParseFromString(spec, &net_def));
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
@ -177,9 +177,9 @@ void checkNumChainsAndRun(const char* spec, const int expected_num_chains) {
|
||||
}
|
||||
|
||||
{
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
@ -572,9 +572,9 @@ TEST(NetTest, DISABLED_FailingOperator) {
|
||||
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
@ -684,9 +684,9 @@ TEST(NetTest, ExecutorOverride) {
|
||||
|
||||
{
|
||||
Workspace ws;
|
||||
auto old = FLAGS_caffe2_override_executor;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_override_executor = old; });
|
||||
FLAGS_caffe2_override_executor = "dag,async_scheduling";
|
||||
auto old = c10::FLAGS_caffe2_override_executor;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_override_executor = old; });
|
||||
c10::FLAGS_caffe2_override_executor = "dag,async_scheduling";
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto async_net =
|
||||
|
@ -1,9 +1,6 @@
|
||||
#include "caffe2/core/numa.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
caffe2_cpu_numa_enabled,
|
||||
false,
|
||||
"Use NUMA whenever possible.");
|
||||
C10_DEFINE_bool(caffe2_cpu_numa_enabled, false, "Use NUMA whenever possible.");
|
||||
|
||||
#if defined(__linux__) && !defined(CAFFE2_DISABLE_NUMA) && CAFFE2_MOBILE == 0
|
||||
#include <numa.h>
|
||||
@ -15,7 +12,7 @@ namespace caffe2 {
|
||||
|
||||
#ifdef CAFFE2_NUMA_ENABLED
|
||||
bool IsNUMAEnabled() {
|
||||
return FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
|
||||
return c10::FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
|
||||
}
|
||||
|
||||
void NUMABind(int numa_node_id) {
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_cpu_numa_enabled);
|
||||
C10_DECLARE_bool(caffe2_cpu_numa_enabled);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -16,11 +16,11 @@
|
||||
|
||||
#include "caffe2/core/operator_c10wrapper.h"
|
||||
|
||||
CAFFE2_DEFINE_int(
|
||||
C10_DEFINE_int(
|
||||
caffe2_operator_max_engine_name_length,
|
||||
10,
|
||||
"Maximum engine name length to be stored");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_disable_implicit_engine_preference,
|
||||
false,
|
||||
"If set, disable implicit engine preferences. This is useful for unit "
|
||||
@ -151,7 +151,7 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
const auto op_def_engines = split(',', operator_def.engine());
|
||||
engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
|
||||
}
|
||||
if (!FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
g_per_op_engine_pref().count(device_type) &&
|
||||
g_per_op_engine_pref()[device_type].count(op_type)) {
|
||||
const auto& preferred_engines =
|
||||
@ -160,7 +160,7 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
engines.insert(
|
||||
engines.end(), preferred_engines.begin(), preferred_engines.end());
|
||||
}
|
||||
if (!FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
g_global_engine_pref().count(device_type)) {
|
||||
const auto& preferred_engines = g_global_engine_pref()[device_type];
|
||||
VLOG(2) << "Inserting global engine preference: " << preferred_engines;
|
||||
@ -173,11 +173,12 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
<< engine;
|
||||
auto op = TryCreateOperator(key, operator_def, ws);
|
||||
if (op) {
|
||||
if (engine.size() <= (unsigned)FLAGS_caffe2_operator_max_engine_name_length) {
|
||||
if (engine.size() <=
|
||||
(unsigned)c10::FLAGS_caffe2_operator_max_engine_name_length) {
|
||||
op->annotate_engine(engine);
|
||||
} else {
|
||||
op->annotate_engine(
|
||||
engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
|
||||
op->annotate_engine(engine.substr(
|
||||
0, c10::FLAGS_caffe2_operator_max_engine_name_length));
|
||||
}
|
||||
return op;
|
||||
} else {
|
||||
|
@ -705,7 +705,7 @@ struct DispatchHelper<FixedValues<>, ExtraArgs...> {
|
||||
}
|
||||
};
|
||||
|
||||
#define CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER( \
|
||||
#define C10_DEFINE_TENSOR_TYPES_DISPATCHER( \
|
||||
TensorTypes, DoRunWithType, DoRunWithOtherType) \
|
||||
template <typename FirstType, typename... Types, typename... ExtraArgs> \
|
||||
struct DispatchHelper<TensorTypes<FirstType, Types...>, ExtraArgs...> { \
|
||||
@ -763,15 +763,15 @@ struct DispatchHelper<FixedValues<>, ExtraArgs...> {
|
||||
return call<Op>(op, blob.meta()); \
|
||||
} \
|
||||
};
|
||||
CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(
|
||||
C10_DEFINE_TENSOR_TYPES_DISPATCHER(
|
||||
TensorTypes,
|
||||
DoRunWithType,
|
||||
DoRunWithOtherType)
|
||||
CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(
|
||||
C10_DEFINE_TENSOR_TYPES_DISPATCHER(
|
||||
TensorTypes2,
|
||||
DoRunWithType2,
|
||||
DoRunWithOtherType2)
|
||||
#undef CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER
|
||||
#undef C10_DEFINE_TENSOR_TYPES_DISPATCHER
|
||||
|
||||
// The device type registry. This works in two phases:
|
||||
// (1) gDeviceTypeRegistry() maps the device types values to the actual operator
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_handle_executor_threads_exceptions,
|
||||
false,
|
||||
"If used we will handle exceptions in executor threads. "
|
||||
@ -422,7 +422,7 @@ bool ExecuteStepRecursive(ExecutionStepWrapper& stepWrapper) {
|
||||
LOG(ERROR) << "Parallel worker exception:\n" << first_exception;
|
||||
}
|
||||
compiledStep->gotFailure = true;
|
||||
if (!FLAGS_caffe2_handle_executor_threads_exceptions) {
|
||||
if (!c10::FLAGS_caffe2_handle_executor_threads_exceptions) {
|
||||
// In complex plans other threads might get stuck if another
|
||||
// one fails. So we let exception to go out of thread which
|
||||
// causes SIGABRT. In local setup one might use this flag
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include "caffe2/core/tensor_impl.h"
|
||||
#include "caffe2/core/flags.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_keep_on_shrink,
|
||||
true,
|
||||
"If set, keeps memory when a tensor is shrinking its size.");
|
||||
|
||||
CAFFE2_DEFINE_int64(
|
||||
C10_DEFINE_int64(
|
||||
caffe2_max_keep_on_shrink_memory,
|
||||
LLONG_MAX,
|
||||
"The maximum memory in bytes to keep on shrink, if the difference between "
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "caffe2/core/tensor.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_print_blob_sizes_at_exit,
|
||||
false,
|
||||
"If true, workspace destructor will print all blob shapes");
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include "caffe2/utils/signal_handler.h"
|
||||
#include "caffe2/utils/threadpool/ThreadPool.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_print_blob_sizes_at_exit);
|
||||
C10_DECLARE_bool(caffe2_print_blob_sizes_at_exit);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -105,7 +105,7 @@ class CAFFE2_API Workspace {
|
||||
}
|
||||
|
||||
~Workspace() {
|
||||
if (FLAGS_caffe2_print_blob_sizes_at_exit) {
|
||||
if (c10::FLAGS_caffe2_print_blob_sizes_at_exit) {
|
||||
PrintBlobSizes();
|
||||
}
|
||||
// This is why we have a bookkeeper_ shared_ptr instead of a naked static! A
|
||||
|
@ -4,8 +4,10 @@
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
CAFFE2_DEFINE_int(caffe2_leveldb_block_size, 65536,
|
||||
"The caffe2 leveldb block size when writing a leveldb.");
|
||||
C10_DEFINE_int(
|
||||
caffe2_leveldb_block_size,
|
||||
65536,
|
||||
"The caffe2 leveldb block size when writing a leveldb.");
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
@ -58,7 +60,7 @@ class LevelDB : public DB {
|
||||
public:
|
||||
LevelDB(const string& source, Mode mode) : DB(source, mode) {
|
||||
leveldb::Options options;
|
||||
options.block_size = FLAGS_caffe2_leveldb_block_size;
|
||||
options.block_size = c10::FLAGS_caffe2_leveldb_block_size;
|
||||
options.write_buffer_size = 268435456;
|
||||
options.max_open_files = 100;
|
||||
options.error_if_exists = mode == NEW;
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_bool(
|
||||
caffe2_mkl_memonger_in_use,
|
||||
false,
|
||||
"Turn on if memonger is used to force reallocate intermediate "
|
||||
|
@ -48,7 +48,7 @@ class MKLConcatOp final : public MKLOperator<T> {
|
||||
dims_changed = (input_size_cache_[i] != Input(i).dims());
|
||||
}
|
||||
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
input_size_cache_.resize(nInputs);
|
||||
int output_channels = 0;
|
||||
int canonical_axis = canonical_axis_index_(axis_, nDims);
|
||||
@ -88,7 +88,7 @@ class MKLConcatOp final : public MKLOperator<T> {
|
||||
resources_[dnnResourceDst] = buffer_.buffer();
|
||||
ExecutePrimitive();
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -51,7 +51,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_FILTER_DIMS(X, filter, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
CAFFE_ENFORCE(
|
||||
C == filter.dim32(1) * group_,
|
||||
"Convolution op: input channels does not match: # of input channels ",
|
||||
@ -152,7 +152,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
|
||||
|
||||
MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
// buffer_ is not shared with Y. Free memory since it'll
|
||||
// be re-allocated in the next run anyway due to memonger in use.
|
||||
buffer_.Reset();
|
||||
|
@ -27,7 +27,7 @@ class MKLSumOp final : public MKLOperator<T> {
|
||||
MKLMemory<T>* Y = Output(0);
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X0, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
primitive_.Reset(
|
||||
dnnSumCreate<T>,
|
||||
nullptr,
|
||||
@ -56,7 +56,7 @@ class MKLSumOp final : public MKLOperator<T> {
|
||||
resources_[dnnResourceDst] = buffer_.buffer();
|
||||
MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -26,7 +26,7 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_FILTER_DIMS(X, filter, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
const int N = filter.dim32(0);
|
||||
CAFFE_ENFORCE(N == bias.dim32(0));
|
||||
|
||||
@ -80,7 +80,7 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {
|
||||
|
||||
MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -34,7 +34,7 @@ bool MKLLRNOp<float>::RunOnDeviceWithOrderNCHW() {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
size_t dim = X.ndim();
|
||||
CAFFE_ENFORCE(4 == dim);
|
||||
|
||||
@ -65,7 +65,7 @@ bool MKLLRNOp<float>::RunOnDeviceWithOrderNCHW() {
|
||||
resources_[dnnResourceWorkspace] = workspace_buffer_->buffer();
|
||||
MKLDNN_SAFE_CALL(mkl::dnnExecute<float>(primitive_, resources_));
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -58,7 +58,7 @@ bool MKLPoolOp<float>::RunOnDeviceWithOrderNCHW() {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
// We will utilize the SetOutputSize() function in the base class
|
||||
// with dummy TensorCPU input and output to calculate the sizes.
|
||||
Tensor dummy_input(X.dims(), CPU);
|
||||
@ -101,7 +101,7 @@ bool MKLPoolOp<float>::RunOnDeviceWithOrderNCHW() {
|
||||
resources_[dnnResourceWorkspace] = workspace_buffer_->buffer();
|
||||
MKLDNN_SAFE_CALL(mkl::dnnExecute<float>(primitive_, resources_));
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -19,7 +19,7 @@ class MKLReluOp : public MKLOperator<T> {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
// First run or changed input size, will need to recreate environment
|
||||
primitive_.Reset(dnnReLUCreateForward<T>, nullptr, X.layout(), 0.f);
|
||||
if (&X != Y) {
|
||||
@ -36,7 +36,7 @@ class MKLReluOp : public MKLOperator<T> {
|
||||
resources_[dnnResourceDst] = buffer_.buffer();
|
||||
ExecutePrimitive();
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -56,7 +56,7 @@ class MKLBNOp final : public Operator<MKLContext> {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
// Create main primitive.
|
||||
if (is_test_) {
|
||||
primitive_.Reset(
|
||||
@ -133,7 +133,7 @@ class MKLBNOp final : public Operator<MKLContext> {
|
||||
}
|
||||
}
|
||||
buffer_.CopyTo(Y, primitive_, dnnResourceDst);
|
||||
if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
|
||||
buffer_.Reset();
|
||||
}
|
||||
return true;
|
||||
|
@ -39,7 +39,7 @@ class MKLSqueezeOp final : public MKLOperator<T> {
|
||||
|
||||
bool dims_changed;
|
||||
CHECK_INPUT_DIMS(X, dims_changed);
|
||||
if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
|
||||
// Temp buffer mainly to convert the input to plain layout before
|
||||
// Reshape() if the input has a custom layout.
|
||||
buffer_.Reset(X.dims());
|
||||
|
@ -5,13 +5,13 @@
|
||||
|
||||
#ifdef CAFFE2_HAS_MKL_DNN
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
caffe2_mkl_implicit_layout_change, false,
|
||||
C10_DEFINE_bool(
|
||||
caffe2_mkl_implicit_layout_change,
|
||||
false,
|
||||
"Controls the behavior when we call View() on an MKLMemory: if it is set "
|
||||
"true, then the View() function will actually change the underlying "
|
||||
"storage. If it is set false, an implicit copy is triggered but the "
|
||||
"original storage is not affected."
|
||||
);
|
||||
"original storage is not affected.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
// an MKLMemory: if it is set true, then the View() function will actually
|
||||
// change the underlying storage. If it is set false, an implicit copy is
|
||||
// triggered but the original storage is not affected.
|
||||
CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change);
|
||||
C10_DECLARE_bool(caffe2_mkl_implicit_layout_change);
|
||||
|
||||
namespace caffe2 {
|
||||
namespace mkl {
|
||||
@ -511,7 +511,7 @@ class C10_EXPORT MKLMemory {
|
||||
dnnConversionCreate<T>, layout_, layout_wanted);
|
||||
MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
|
||||
convert, buffer_.get(), temp_buffer));
|
||||
if (primitive && FLAGS_caffe2_mkl_implicit_layout_change) {
|
||||
if (primitive && c10::FLAGS_caffe2_mkl_implicit_layout_change) {
|
||||
VLOG(2) << "Implicit layout change set. "
|
||||
"Changing the underlying storage.";
|
||||
// We will need to call Reset to set up all the member variables.
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "caffe2/mkl/utils/mkl_memory.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_mkl_memonger_in_use);
|
||||
C10_DECLARE_bool(caffe2_mkl_memonger_in_use);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
@ -7,14 +7,13 @@
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include <unordered_set>
|
||||
|
||||
CAFFE2_DEFINE_int(warmup, 3, "The number of iterations to warm up.");
|
||||
CAFFE2_DEFINE_int(iter, 100, "The number of iterations to run.");
|
||||
CAFFE2_DEFINE_bool(
|
||||
C10_DEFINE_int(warmup, 3, "The number of iterations to warm up.");
|
||||
C10_DEFINE_int(iter, 100, "The number of iterations to run.");
|
||||
C10_DEFINE_bool(
|
||||
run_individual,
|
||||
true,
|
||||
"Whether to benchmark individual operators.");
|
||||
|
||||
|
||||
constexpr float tol = 0.03;
|
||||
namespace caffe2 {
|
||||
void benchmarkModel(std::string init_net_pb, std::string predict_net_pb, std::string input_name, std::vector<int> input_dims, std::string net_name="benchmark_net", std::unordered_set<std::string> cpu_ops = std::unordered_set<std::string>({})) {
|
||||
@ -46,7 +45,8 @@ namespace caffe2 {
|
||||
LOG(ERROR) << "[C2DEBUG] after compareNetResult4D";
|
||||
NetBase* net = ws->CreateNet(predict_net_def_gpu);
|
||||
LOG(ERROR) << "[C2DEBUG] Benchmarking OpenGL Net";
|
||||
net->TEST_Benchmark(caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
|
||||
net->TEST_Benchmark(
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
// Test CPU
|
||||
for (auto i = 0; i < predict_net_def.op().size(); ++i) {
|
||||
auto op = predict_net_def.mutable_op(i);
|
||||
@ -58,7 +58,7 @@ namespace caffe2 {
|
||||
predict_net_def.set_name("cpu_net");
|
||||
net = ws->CreateNet(predict_net_def);
|
||||
LOG(INFO) << "[C2DEBUG] Benchmarking CPU Net";
|
||||
net->TEST_Benchmark(caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
|
||||
|
||||
net->TEST_Benchmark(
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
}
|
||||
} // namespace caffe2
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "caffe2/mobile/contrib/ios/mpscnn/mpscnn.h"
|
||||
#endif
|
||||
|
||||
CAFFE2_DECLARE_bool(caffe2_force_shared_col_buffer);
|
||||
C10_DECLARE_bool(caffe2_force_shared_col_buffer);
|
||||
|
||||
Caffe2IOSPredictor* Caffe2IOSPredictor::NewCaffe2IOSPredictor(const caffe2::NetDef& init_net,
|
||||
const caffe2::NetDef& predict_net,
|
||||
@ -49,7 +49,7 @@ Caffe2IOSPredictor::Caffe2IOSPredictor(const caffe2::NetDef& init_net,
|
||||
}
|
||||
|
||||
void Caffe2IOSPredictor::run(const Tensor& inData, Tensor& outData, std::string& errorMessage) {
|
||||
caffe2::FLAGS_caffe2_force_shared_col_buffer = true;
|
||||
c10::FLAGS_caffe2_force_shared_col_buffer = true;
|
||||
caffe2::Tensor input(caffe2::CPU);
|
||||
input.Resize(inData.dims);
|
||||
input.ShareExternalPointer(inData.data);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user