mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Using c10 namespace across caffe2. (#12714)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12714 This is a short change to enable c10 namespace in caffe2. We did not enable it before due to gflags global variable confusion, but it should have been mostly cleaned now. Right now, the plan on record is that namespace caffe2 and namespace aten will fully be supersets of namespace c10. Most of the diff is codemod, and only two places of non-codemod is in caffe2/core/common.h, where ``` using namespace c10; ``` is added, and in Flags.h, where instead of creating aliasing variables in c10 namespace, we directly put it in the global namespace to match gflags (and same behavior if gflags is not being built with). Reviewed By: dzhulgakov Differential Revision: D10390486 fbshipit-source-id: 5e2df730e28e29a052f513bddc558d9f78a23b9b
This commit is contained in:
committed by
Facebook Github Bot
parent
348867c10b
commit
7d5f7ed270
@ -594,11 +594,10 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
||||
} else {
|
||||
reset_tensor = storage_.capacity() <
|
||||
(storage_offset_ + numel_) * storage_.itemsize() ||
|
||||
!c10::FLAGS_caffe2_keep_on_shrink ||
|
||||
!FLAGS_caffe2_keep_on_shrink ||
|
||||
storage_.capacity() -
|
||||
(storage_offset_ + numel_) * storage_.itemsize() >
|
||||
static_cast<size_t>(
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory);
|
||||
static_cast<size_t>(FLAGS_caffe2_max_keep_on_shrink_memory);
|
||||
}
|
||||
|
||||
if (reset_tensor && !is_init) {
|
||||
|
@ -78,19 +78,19 @@ int main(int argc, char** argv) {
|
||||
benchmark(
|
||||
argc,
|
||||
argv,
|
||||
c10::FLAGS_backend,
|
||||
c10::FLAGS_init_net,
|
||||
c10::FLAGS_input,
|
||||
c10::FLAGS_input_dims,
|
||||
c10::FLAGS_input_file,
|
||||
c10::FLAGS_input_type,
|
||||
c10::FLAGS_iter,
|
||||
c10::FLAGS_net,
|
||||
c10::FLAGS_output,
|
||||
c10::FLAGS_output_folder,
|
||||
c10::FLAGS_run_individual,
|
||||
c10::FLAGS_sleep_before_run,
|
||||
c10::FLAGS_text_output,
|
||||
c10::FLAGS_warmup,
|
||||
c10::FLAGS_wipe_cache);
|
||||
FLAGS_backend,
|
||||
FLAGS_init_net,
|
||||
FLAGS_input,
|
||||
FLAGS_input_dims,
|
||||
FLAGS_input_file,
|
||||
FLAGS_input_type,
|
||||
FLAGS_iter,
|
||||
FLAGS_net,
|
||||
FLAGS_output,
|
||||
FLAGS_output_folder,
|
||||
FLAGS_run_individual,
|
||||
FLAGS_sleep_before_run,
|
||||
FLAGS_text_output,
|
||||
FLAGS_warmup,
|
||||
FLAGS_wipe_cache);
|
||||
}
|
||||
|
@ -37,9 +37,9 @@ int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
FLAGS_input_db_type, FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
|
||||
c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
|
||||
FLAGS_output_db_type, FLAGS_output_db, caffe2::db::NEW));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
|
||||
int count = 0;
|
||||
@ -80,7 +80,7 @@ int main(int argc, char** argv) {
|
||||
data->set_byte_data(buffer, datum.data().size());
|
||||
}
|
||||
transaction->Put(cursor->key(), protos.SerializeAsString());
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Converted " << count << " items so far.";
|
||||
}
|
||||
|
@ -33,15 +33,15 @@ int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
FLAGS_input_db_type, FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
|
||||
c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
|
||||
FLAGS_output_db_type, FLAGS_output_db, caffe2::db::NEW));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transaction->Put(cursor->key(), cursor->value());
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Converted " << count << " items so far.";
|
||||
}
|
||||
|
@ -16,8 +16,8 @@
|
||||
|
||||
// This script converts an image dataset to leveldb.
|
||||
//
|
||||
// c10::FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// c10::FLAGS_list_file should be a list of files as well as their labels, in
|
||||
// FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// FLAGS_list_file should be a list of files as well as their labels, in
|
||||
// the format as
|
||||
// subfolder1/file1.JPEG 7
|
||||
// ....
|
||||
@ -41,7 +41,7 @@ C10_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
C10_DEFINE_int(
|
||||
scale,
|
||||
256,
|
||||
"If c10::FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
"If FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
"value.");
|
||||
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
|
||||
@ -93,7 +93,7 @@ void ConvertToRawDataset(
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
data->add_dims(0);
|
||||
data->add_dims(0);
|
||||
if (c10::FLAGS_color) {
|
||||
if (FLAGS_color) {
|
||||
data->add_dims(3);
|
||||
}
|
||||
string value;
|
||||
@ -108,21 +108,20 @@ void ConvertToRawDataset(
|
||||
const string& encoded_image = input_protos.protos(0).string_data(0);
|
||||
int encoded_size = encoded_image.size();
|
||||
cv::Mat img = cv::imdecode(
|
||||
cv::Mat(1, &encoded_size, CV_8UC1,
|
||||
const_cast<char*>(encoded_image.data())),
|
||||
c10::FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
|
||||
cv::Mat(
|
||||
1, &encoded_size, CV_8UC1, const_cast<char*>(encoded_image.data())),
|
||||
FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
if (FLAGS_warp) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height =
|
||||
static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
scaled_height = FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
|
||||
cv::INTER_LINEAR);
|
||||
@ -131,7 +130,7 @@ void ConvertToRawDataset(
|
||||
DCHECK(resized_img.isContinuous());
|
||||
data->set_byte_data(
|
||||
resized_img.ptr(),
|
||||
scaled_height * scaled_width * (c10::FLAGS_color ? 3 : 1));
|
||||
scaled_height * scaled_width * (FLAGS_color ? 3 : 1));
|
||||
output_protos.SerializeToString(&value);
|
||||
// Put in db
|
||||
batch->Put(iter->key(), value);
|
||||
@ -153,7 +152,6 @@ void ConvertToRawDataset(
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::ConvertToRawDataset(
|
||||
c10::FLAGS_input_db_name, c10::FLAGS_output_db_name);
|
||||
caffe2::ConvertToRawDataset(FLAGS_input_db_name, FLAGS_output_db_name);
|
||||
return 0;
|
||||
}
|
||||
|
@ -44,15 +44,15 @@ namespace caffe2 {
|
||||
cv::Mat resizeImage(cv::Mat& img) {
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
if (FLAGS_warp) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
scaled_height = FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(
|
||||
img,
|
||||
@ -87,9 +87,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
std::vector<float> mean(3, 0);
|
||||
std::vector<float> std(3, 1);
|
||||
bool bgrtorgb = false;
|
||||
assert(img.cols == c10::FLAGS_scale);
|
||||
assert(img.rows == c10::FLAGS_scale);
|
||||
vector<string> steps = caffe2::split(',', c10::FLAGS_preprocess);
|
||||
assert(img.cols == FLAGS_scale);
|
||||
assert(img.rows == FLAGS_scale);
|
||||
vector<string> steps = caffe2::split(',', FLAGS_preprocess);
|
||||
for (int i = 0; i < steps.size(); i++) {
|
||||
auto step = steps[i];
|
||||
if (step == "subtract128") {
|
||||
@ -112,8 +112,8 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
}
|
||||
}
|
||||
|
||||
int C = c10::FLAGS_color ? 3 : 1;
|
||||
int total_size = C * c10::FLAGS_scale * c10::FLAGS_scale;
|
||||
int C = FLAGS_color ? 3 : 1;
|
||||
int total_size = C * FLAGS_scale * FLAGS_scale;
|
||||
std::vector<float> values(total_size);
|
||||
if (C == 1) {
|
||||
cv::MatIterator_<uchar> it, end;
|
||||
@ -130,9 +130,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
|
||||
for (it = img.begin<cv::Vec3b>(), end = img.end<cv::Vec3b>(); it != end;
|
||||
++it, i++) {
|
||||
values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
|
||||
int offset = c10::FLAGS_scale * c10::FLAGS_scale + i;
|
||||
int offset = FLAGS_scale * FLAGS_scale + i;
|
||||
values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
|
||||
offset = c10::FLAGS_scale * c10::FLAGS_scale + offset;
|
||||
offset = FLAGS_scale * FLAGS_scale + offset;
|
||||
values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
|
||||
}
|
||||
}
|
||||
@ -145,8 +145,7 @@ std::vector<float> convertOneImage(std::string& filename) {
|
||||
std::cout << "Converting " << filename << std::endl;
|
||||
// Load image
|
||||
cv::Mat img = cv::imread(
|
||||
filename,
|
||||
c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
|
||||
cv::Mat crop = cropToSquare(img);
|
||||
|
||||
@ -155,17 +154,17 @@ std::vector<float> convertOneImage(std::string& filename) {
|
||||
// Assert we don't have to deal with alignment
|
||||
DCHECK(resized_img.isContinuous());
|
||||
assert(resized_img.rows == resized_img.cols);
|
||||
assert(resized_img.rows == c10::FLAGS_scale);
|
||||
assert(resized_img.rows == FLAGS_scale);
|
||||
std::vector<float> one_image_values = convertToVector(resized_img);
|
||||
return one_image_values;
|
||||
}
|
||||
|
||||
void convertImages() {
|
||||
vector<string> file_names;
|
||||
if (c10::FLAGS_input_images != "") {
|
||||
file_names = caffe2::split(',', c10::FLAGS_input_images);
|
||||
} else if (c10::FLAGS_input_image_file != "") {
|
||||
std::ifstream infile(c10::FLAGS_input_image_file);
|
||||
if (FLAGS_input_images != "") {
|
||||
file_names = caffe2::split(',', FLAGS_input_images);
|
||||
} else if (FLAGS_input_image_file != "") {
|
||||
std::ifstream infile(FLAGS_input_image_file);
|
||||
std::string line;
|
||||
while (std::getline(infile, line)) {
|
||||
vector<string> file_name = caffe2::split(',', line);
|
||||
@ -181,7 +180,7 @@ void convertImages() {
|
||||
assert(false);
|
||||
}
|
||||
std::vector<std::vector<float>> values;
|
||||
int C = c10::FLAGS_color ? 3 : 1;
|
||||
int C = FLAGS_color ? 3 : 1;
|
||||
for (int i = 0; i < file_names.size(); i++) {
|
||||
std::vector<float> one_image_values = convertOneImage(file_names[i]);
|
||||
values.push_back(one_image_values);
|
||||
@ -193,19 +192,19 @@ void convertImages() {
|
||||
data->set_data_type(TensorProto::FLOAT);
|
||||
data->add_dims(values.size());
|
||||
data->add_dims(C);
|
||||
data->add_dims(c10::FLAGS_scale);
|
||||
data->add_dims(c10::FLAGS_scale);
|
||||
data->add_dims(FLAGS_scale);
|
||||
data->add_dims(FLAGS_scale);
|
||||
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
assert(values[i].size() == C * c10::FLAGS_scale * c10::FLAGS_scale);
|
||||
assert(values[i].size() == C * FLAGS_scale * FLAGS_scale);
|
||||
for (int j = 0; j < values[i].size(); j++) {
|
||||
data->add_float_data(values[i][j]);
|
||||
}
|
||||
}
|
||||
if (c10::FLAGS_text_output) {
|
||||
caffe2::WriteProtoToTextFile(protos, c10::FLAGS_output_tensor);
|
||||
if (FLAGS_text_output) {
|
||||
caffe2::WriteProtoToTextFile(protos, FLAGS_output_tensor);
|
||||
} else {
|
||||
caffe2::WriteProtoToBinaryFile(protos, c10::FLAGS_output_tensor);
|
||||
caffe2::WriteProtoToBinaryFile(protos, FLAGS_output_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,11 +40,11 @@ using caffe2::string;
|
||||
|
||||
void TestThroughputWithDB() {
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
|
||||
FLAGS_input_db_type, FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
|
||||
for (int iter_id = 0; iter_id < FLAGS_repeat; ++iter_id) {
|
||||
caffe2::Timer timer;
|
||||
for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
|
||||
for (int i = 0; i < FLAGS_report_interval; ++i) {
|
||||
string key = cursor->key();
|
||||
string value = cursor->value();
|
||||
//VLOG(1) << "Key " << key;
|
||||
@ -58,15 +58,15 @@ void TestThroughputWithDB() {
|
||||
"Iteration %03d, took %4.5f seconds, throughput %f items/sec.\n",
|
||||
iter_id,
|
||||
elapsed_seconds,
|
||||
c10::FLAGS_report_interval / elapsed_seconds);
|
||||
FLAGS_report_interval / elapsed_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
void TestThroughputWithReaderWorker(const DBReader* reader, int thread_id) {
|
||||
string key, value;
|
||||
for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
|
||||
for (int iter_id = 0; iter_id < FLAGS_repeat; ++iter_id) {
|
||||
caffe2::Timer timer;
|
||||
for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
|
||||
for (int i = 0; i < FLAGS_report_interval; ++i) {
|
||||
reader->Read(&key, &value);
|
||||
}
|
||||
double elapsed_seconds = timer.Seconds();
|
||||
@ -76,14 +76,14 @@ void TestThroughputWithReaderWorker(const DBReader* reader, int thread_id) {
|
||||
thread_id,
|
||||
iter_id,
|
||||
elapsed_seconds,
|
||||
c10::FLAGS_report_interval / elapsed_seconds);
|
||||
FLAGS_report_interval / elapsed_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
void TestThroughputWithReader() {
|
||||
caffe2::db::DBReader reader(c10::FLAGS_input_db_type, c10::FLAGS_input_db);
|
||||
caffe2::db::DBReader reader(FLAGS_input_db_type, FLAGS_input_db);
|
||||
std::vector<std::unique_ptr<std::thread>> reading_threads(
|
||||
c10::FLAGS_num_read_threads);
|
||||
FLAGS_num_read_threads);
|
||||
for (int i = 0; i < reading_threads.size(); ++i) {
|
||||
reading_threads[i].reset(new std::thread(
|
||||
TestThroughputWithReaderWorker, &reader, i));
|
||||
@ -95,7 +95,7 @@ void TestThroughputWithReader() {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
if (c10::FLAGS_use_reader) {
|
||||
if (FLAGS_use_reader) {
|
||||
TestThroughputWithReader();
|
||||
} else {
|
||||
TestThroughputWithDB();
|
||||
|
@ -57,7 +57,7 @@ const int kCIFAR100TestDataSize = 10000;
|
||||
|
||||
void ReadImage(std::ifstream* file, int* label, char* buffer) {
|
||||
char label_char;
|
||||
if (c10::FLAGS_is_cifar100) {
|
||||
if (FLAGS_is_cifar100) {
|
||||
// Skip the coarse label.
|
||||
file->read(&label_char, 1);
|
||||
}
|
||||
@ -110,29 +110,29 @@ void WriteToDB(const string& filename, const int num_items,
|
||||
|
||||
void ConvertCIFAR() {
|
||||
std::unique_ptr<db::DB> train_db(
|
||||
db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_train_db_name, db::NEW));
|
||||
db::CreateDB(FLAGS_db, FLAGS_output_train_db_name, db::NEW));
|
||||
std::unique_ptr<db::DB> test_db(
|
||||
db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_test_db_name, db::NEW));
|
||||
db::CreateDB(FLAGS_db, FLAGS_output_test_db_name, db::NEW));
|
||||
|
||||
if (!c10::FLAGS_is_cifar100) {
|
||||
if (!FLAGS_is_cifar100) {
|
||||
// This is cifar 10.
|
||||
for (int fileid = 0; fileid < kCIFAR10TrainBatches; ++fileid) {
|
||||
stringstream train_file;
|
||||
train_file << c10::FLAGS_input_folder << "/data_batch_" << fileid + 1
|
||||
train_file << FLAGS_input_folder << "/data_batch_" << fileid + 1
|
||||
<< ".bin";
|
||||
WriteToDB(train_file.str(), kCIFAR10BatchSize,
|
||||
fileid * kCIFAR10BatchSize, train_db.get());
|
||||
}
|
||||
stringstream test_file;
|
||||
test_file << c10::FLAGS_input_folder << "/test_batch.bin";
|
||||
test_file << FLAGS_input_folder << "/test_batch.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR10TestDataSize, 0, test_db.get());
|
||||
} else {
|
||||
// This is cifar 100.
|
||||
stringstream train_file;
|
||||
train_file << c10::FLAGS_input_folder << "/train.bin";
|
||||
train_file << FLAGS_input_folder << "/train.bin";
|
||||
WriteToDB(train_file.str(), kCIFAR100TrainDataSize, 0, train_db.get());
|
||||
stringstream test_file;
|
||||
test_file << c10::FLAGS_input_folder << "/test.bin";
|
||||
test_file << FLAGS_input_folder << "/test.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR100TestDataSize, 0, test_db.get());
|
||||
}
|
||||
}
|
||||
|
@ -16,9 +16,9 @@
|
||||
|
||||
// This script converts an image dataset to a database.
|
||||
//
|
||||
// c10::FLAGS_input_folder is the root folder that holds all the images
|
||||
// FLAGS_input_folder is the root folder that holds all the images
|
||||
//
|
||||
// c10::FLAGS_list_file is the path to a file containing a list of files
|
||||
// FLAGS_list_file is the path to a file containing a list of files
|
||||
// and their labels, as follows:
|
||||
//
|
||||
// subfolder1/file1.JPEG 7
|
||||
@ -61,7 +61,7 @@ C10_DEFINE_bool(color, true, "If set, load images in color.");
|
||||
C10_DEFINE_int(
|
||||
scale,
|
||||
256,
|
||||
"If c10::FLAGS_raw is set, scale the shorter edge to the given value.");
|
||||
"If FLAGS_raw is set, scale the shorter edge to the given value.");
|
||||
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
C10_DEFINE_int(
|
||||
num_threads,
|
||||
@ -75,11 +75,11 @@ class Converter {
|
||||
explicit Converter() {
|
||||
data_ = protos_.add_protos();
|
||||
label_ = protos_.add_protos();
|
||||
if (c10::FLAGS_raw) {
|
||||
if (FLAGS_raw) {
|
||||
data_->set_data_type(TensorProto::BYTE);
|
||||
data_->add_dims(0);
|
||||
data_->add_dims(0);
|
||||
if (c10::FLAGS_color) {
|
||||
if (FLAGS_color) {
|
||||
data_->add_dims(3);
|
||||
}
|
||||
} else {
|
||||
@ -119,7 +119,7 @@ class Converter {
|
||||
}
|
||||
|
||||
void run() {
|
||||
const auto& input_folder = c10::FLAGS_input_folder;
|
||||
const auto& input_folder = FLAGS_input_folder;
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
std::string value;
|
||||
while (!in_.empty()) {
|
||||
@ -130,7 +130,7 @@ class Converter {
|
||||
label_->set_int32_data(0, pair.second);
|
||||
|
||||
// Add raw file contents to DB if !raw
|
||||
if (!c10::FLAGS_raw) {
|
||||
if (!FLAGS_raw) {
|
||||
std::ifstream image_file_stream(input_folder + pair.first);
|
||||
if (!image_file_stream) {
|
||||
LOG(ERROR) << "Cannot open " << input_folder << pair.first
|
||||
@ -144,23 +144,20 @@ class Converter {
|
||||
// Load image
|
||||
cv::Mat img = cv::imread(
|
||||
input_folder + pair.first,
|
||||
c10::FLAGS_color ? cv::IMREAD_COLOR
|
||||
: cv::IMREAD_GRAYSCALE);
|
||||
FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
|
||||
|
||||
// Resize image
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (c10::FLAGS_warp) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
if (FLAGS_warp) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = c10::FLAGS_scale;
|
||||
scaled_height =
|
||||
static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = c10::FLAGS_scale;
|
||||
scaled_width =
|
||||
static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
|
||||
scaled_height = FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(
|
||||
img,
|
||||
@ -215,12 +212,12 @@ void ConvertImageDataset(
|
||||
lines.push_back(std::make_pair(filename, file_label));
|
||||
}
|
||||
|
||||
if (c10::FLAGS_shuffle) {
|
||||
if (FLAGS_shuffle) {
|
||||
LOG(INFO) << "Shuffling data";
|
||||
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(1701));
|
||||
}
|
||||
|
||||
auto num_threads = c10::FLAGS_num_threads;
|
||||
auto num_threads = FLAGS_num_threads;
|
||||
if (num_threads < 1) {
|
||||
num_threads = std::thread::hardware_concurrency();
|
||||
}
|
||||
@ -228,7 +225,7 @@ void ConvertImageDataset(
|
||||
LOG(INFO) << "Processing " << lines.size() << " images...";
|
||||
LOG(INFO) << "Opening DB " << output_db_name;
|
||||
|
||||
auto db = db::CreateDB(c10::FLAGS_db, output_db_name, db::NEW);
|
||||
auto db = db::CreateDB(FLAGS_db, output_db_name, db::NEW);
|
||||
auto transaction = db->NewTransaction();
|
||||
|
||||
LOG(INFO) << "Using " << num_threads << " processing threads...";
|
||||
@ -278,9 +275,6 @@ void ConvertImageDataset(
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::ConvertImageDataset(
|
||||
c10::FLAGS_input_folder,
|
||||
c10::FLAGS_list_file,
|
||||
c10::FLAGS_output_db_name,
|
||||
c10::FLAGS_shuffle);
|
||||
FLAGS_input_folder, FLAGS_list_file, FLAGS_output_db_name, FLAGS_shuffle);
|
||||
return 0;
|
||||
}
|
||||
|
@ -83,8 +83,7 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
cols = swap_endian(cols);
|
||||
|
||||
// leveldb
|
||||
std::unique_ptr<db::DB> mnist_db(
|
||||
db::CreateDB(c10::FLAGS_db, db_path, db::NEW));
|
||||
std::unique_ptr<db::DB> mnist_db(db::CreateDB(FLAGS_db, db_path, db::NEW));
|
||||
std::unique_ptr<db::Transaction> transaction(mnist_db->NewTransaction());
|
||||
// Storing to db
|
||||
char label_value;
|
||||
@ -98,7 +97,7 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
TensorProto* data = protos.add_protos();
|
||||
TensorProto* label = protos.add_protos();
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
if (c10::FLAGS_channel_first) {
|
||||
if (FLAGS_channel_first) {
|
||||
data->add_dims(1);
|
||||
data->add_dims(rows);
|
||||
data->add_dims(cols);
|
||||
@ -139,9 +138,9 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
caffe2::convert_dataset(
|
||||
c10::FLAGS_image_file.c_str(),
|
||||
c10::FLAGS_label_file.c_str(),
|
||||
c10::FLAGS_output_file.c_str(),
|
||||
c10::FLAGS_data_limit);
|
||||
FLAGS_image_file.c_str(),
|
||||
FLAGS_label_file.c_str(),
|
||||
FLAGS_output_file.c_str(),
|
||||
FLAGS_data_limit);
|
||||
return 0;
|
||||
}
|
||||
|
@ -28,15 +28,15 @@ C10_DEFINE_string(
|
||||
namespace caffe2 {
|
||||
|
||||
void run() {
|
||||
if (c10::FLAGS_init_net.empty()) {
|
||||
if (FLAGS_init_net.empty()) {
|
||||
LOG(FATAL) << "No init net specified. Use --init_net=/path/to/net.";
|
||||
}
|
||||
if (c10::FLAGS_predict_net.empty()) {
|
||||
if (FLAGS_predict_net.empty()) {
|
||||
LOG(FATAL) << "No predict net specified. Use --predict_net=/path/to/net.";
|
||||
}
|
||||
caffe2::NetDef init_net, predict_net;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &init_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_predict_net, &predict_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_predict_net, &predict_net));
|
||||
// Can be large due to constant fills
|
||||
VLOG(1) << "Init net: " << ProtoDebugString(init_net);
|
||||
LOG(INFO) << "Predict net: " << ProtoDebugString(predict_net);
|
||||
|
@ -35,15 +35,14 @@ static bool HasDoc(const std::string& str) {
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
|
||||
if (!c10::FLAGS_schema.empty()) {
|
||||
const auto* schema = caffe2::OpSchemaRegistry::Schema(c10::FLAGS_schema);
|
||||
if (!FLAGS_schema.empty()) {
|
||||
const auto* schema = caffe2::OpSchemaRegistry::Schema(FLAGS_schema);
|
||||
if (!schema) {
|
||||
std::cerr << "Operator " << c10::FLAGS_schema << " doesn't have a schema"
|
||||
std::cerr << "Operator " << FLAGS_schema << " doesn't have a schema"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "Operator " << c10::FLAGS_schema << ": " << std::endl
|
||||
<< *schema;
|
||||
std::cout << "Operator " << FLAGS_schema << ": " << std::endl << *schema;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -24,13 +24,13 @@ C10_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
if (c10::FLAGS_plan.size() == 0) {
|
||||
if (FLAGS_plan.size() == 0) {
|
||||
LOG(ERROR) << "No plan specified. Use --plan=/path/to/plan.";
|
||||
return 0;
|
||||
}
|
||||
LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
|
||||
LOG(INFO) << "Loading plan: " << FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
|
@ -36,9 +36,9 @@ int main(int argc, char** argv) {
|
||||
return 1;
|
||||
}
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
|
||||
LOG(INFO) << "Loading plan: " << FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
|
@ -86,14 +86,14 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Run initialization network.
|
||||
caffe2::NetDef net_def;
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &net_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &net_def));
|
||||
CAFFE_ENFORCE(workspace->RunNetOnce(net_def));
|
||||
|
||||
// Load input.
|
||||
if (c10::FLAGS_input.size()) {
|
||||
vector<string> input_names = caffe2::split(',', c10::FLAGS_input);
|
||||
if (c10::FLAGS_input_file.size()) {
|
||||
vector<string> input_files = caffe2::split(',', c10::FLAGS_input_file);
|
||||
if (FLAGS_input.size()) {
|
||||
vector<string> input_names = caffe2::split(',', FLAGS_input);
|
||||
if (FLAGS_input_file.size()) {
|
||||
vector<string> input_files = caffe2::split(',', FLAGS_input_file);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_files.size(),
|
||||
@ -103,24 +103,22 @@ int main(int argc, char** argv) {
|
||||
CAFFE_ENFORCE(caffe2::ReadProtoFromFile(input_files[i], &blob_proto));
|
||||
DeserializeBlob(blob_proto, workspace->CreateBlob(input_names[i]));
|
||||
}
|
||||
} else if (c10::FLAGS_input_dims.size() || c10::FLAGS_input_type.size()) {
|
||||
} else if (FLAGS_input_dims.size() || FLAGS_input_type.size()) {
|
||||
CAFFE_ENFORCE_GE(
|
||||
c10::FLAGS_input_dims.size(),
|
||||
FLAGS_input_dims.size(),
|
||||
0,
|
||||
"Input dims must be specified when input tensors are used.");
|
||||
CAFFE_ENFORCE_GE(
|
||||
c10::FLAGS_input_type.size(),
|
||||
FLAGS_input_type.size(),
|
||||
0,
|
||||
"Input type must be specified when input tensors are used.");
|
||||
|
||||
vector<string> input_dims_list =
|
||||
caffe2::split(';', c10::FLAGS_input_dims);
|
||||
vector<string> input_dims_list = caffe2::split(';', FLAGS_input_dims);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_dims_list.size(),
|
||||
"Input name and dims should have the same number of items.");
|
||||
vector<string> input_type_list =
|
||||
caffe2::split(';', c10::FLAGS_input_type);
|
||||
vector<string> input_type_list = caffe2::split(';', FLAGS_input_type);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
input_names.size(),
|
||||
input_type_list.size(),
|
||||
@ -158,28 +156,28 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
// Run main network.
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_net, &net_def));
|
||||
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
|
||||
if (!net_def.has_name()) {
|
||||
net_def.set_name("benchmark");
|
||||
}
|
||||
// force changing engine and algo
|
||||
if (c10::FLAGS_force_engine) {
|
||||
LOG(INFO) << "force engine be: " << c10::FLAGS_engine;
|
||||
if (FLAGS_force_engine) {
|
||||
LOG(INFO) << "force engine be: " << FLAGS_engine;
|
||||
for (const auto& op : net_def.op()) {
|
||||
const_cast<caffe2::OperatorDef*>(&op)->set_engine(c10::FLAGS_engine);
|
||||
const_cast<caffe2::OperatorDef*>(&op)->set_engine(FLAGS_engine);
|
||||
}
|
||||
}
|
||||
if (c10::FLAGS_force_algo) {
|
||||
LOG(INFO) << "force algo be: " << c10::FLAGS_algo;
|
||||
if (FLAGS_force_algo) {
|
||||
LOG(INFO) << "force algo be: " << FLAGS_algo;
|
||||
for (const auto& op : net_def.op()) {
|
||||
caffe2::GetMutableArgument(
|
||||
"algo", true, const_cast<caffe2::OperatorDef*>(&op))
|
||||
->set_s(c10::FLAGS_algo);
|
||||
->set_s(FLAGS_algo);
|
||||
}
|
||||
}
|
||||
if (c10::FLAGS_opt) {
|
||||
if (FLAGS_opt) {
|
||||
#ifdef CAFFE2_OPTIMIZER
|
||||
net_def = caffe2::opt::optimize(net_def, workspace.get(), c10::FLAGS_opt);
|
||||
net_def = caffe2::opt::optimize(net_def, workspace.get(), FLAGS_opt);
|
||||
#else
|
||||
LOG(WARNING) << "Caffe2 not compiled with optimization passes.";
|
||||
#endif
|
||||
@ -188,14 +186,13 @@ int main(int argc, char** argv) {
|
||||
caffe2::NetBase* net = workspace->CreateNet(net_def);
|
||||
CHECK_NOTNULL(net);
|
||||
CAFFE_ENFORCE(net->Run());
|
||||
net->TEST_Benchmark(
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
net->TEST_Benchmark(FLAGS_warmup, FLAGS_iter, FLAGS_run_individual);
|
||||
|
||||
string output_prefix =
|
||||
c10::FLAGS_output_folder.size() ? c10::FLAGS_output_folder + "/" : "";
|
||||
if (c10::FLAGS_output.size()) {
|
||||
vector<string> output_names = caffe2::split(',', c10::FLAGS_output);
|
||||
if (c10::FLAGS_output == "*") {
|
||||
FLAGS_output_folder.size() ? FLAGS_output_folder + "/" : "";
|
||||
if (FLAGS_output.size()) {
|
||||
vector<string> output_names = caffe2::split(',', FLAGS_output);
|
||||
if (FLAGS_output == "*") {
|
||||
output_names = workspace->Blobs();
|
||||
}
|
||||
for (const string& name : output_names) {
|
||||
|
@ -32,31 +32,23 @@ namespace caffe2 {
|
||||
static int Split(int argc, char** argv) {
|
||||
GlobalInit(&argc, &argv);
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_splits > 0, "Must specify a nonnegative split number.");
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_db_type.size(), "Must specify --db_type=[a db type].");
|
||||
CAFFE_ENFORCE(FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
|
||||
CAFFE_ENFORCE(FLAGS_splits > 0, "Must specify a nonnegative split number.");
|
||||
CAFFE_ENFORCE(FLAGS_db_type.size(), "Must specify --db_type=[a db type].");
|
||||
|
||||
unique_ptr<db::DB> in_db(
|
||||
db::CreateDB(c10::FLAGS_db_type, c10::FLAGS_input_db, db::READ));
|
||||
CAFFE_ENFORCE(
|
||||
in_db != nullptr, "Cannot open input db: ", c10::FLAGS_input_db);
|
||||
db::CreateDB(FLAGS_db_type, FLAGS_input_db, db::READ));
|
||||
CAFFE_ENFORCE(in_db != nullptr, "Cannot open input db: ", FLAGS_input_db);
|
||||
unique_ptr<db::Cursor> cursor(in_db->NewCursor());
|
||||
// This usually won't happen, but FWIW.
|
||||
CAFFE_ENFORCE(
|
||||
cursor != nullptr,
|
||||
"Cannot obtain cursor for input db: ",
|
||||
c10::FLAGS_input_db);
|
||||
cursor != nullptr, "Cannot obtain cursor for input db: ", FLAGS_input_db);
|
||||
|
||||
vector<unique_ptr<db::DB>> out_dbs;
|
||||
vector<unique_ptr<db::Transaction>> transactions;
|
||||
for (int i = 0; i < c10::FLAGS_splits; ++i) {
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
out_dbs.push_back(unique_ptr<db::DB>(db::CreateDB(
|
||||
c10::FLAGS_db_type,
|
||||
c10::FLAGS_input_db + "_split_" + to_string(i),
|
||||
db::NEW)));
|
||||
FLAGS_db_type, FLAGS_input_db + "_split_" + to_string(i), db::NEW)));
|
||||
CAFFE_ENFORCE(out_dbs.back().get(), "Cannot create output db #", i);
|
||||
transactions.push_back(
|
||||
unique_ptr<db::Transaction>(out_dbs[i]->NewTransaction()));
|
||||
@ -66,10 +58,9 @@ static int Split(int argc, char** argv) {
|
||||
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transactions[count % c10::FLAGS_splits]->Put(
|
||||
cursor->key(), cursor->value());
|
||||
if (++count % c10::FLAGS_batch_size == 0) {
|
||||
for (int i = 0; i < c10::FLAGS_splits; ++i) {
|
||||
transactions[count % FLAGS_splits]->Put(cursor->key(), cursor->value());
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
transactions[i]->Commit();
|
||||
}
|
||||
LOG(INFO) << "Split " << count << " items so far.";
|
||||
|
@ -29,8 +29,8 @@ C10_DEFINE_string(f_out, "", "The output data file name.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
std::ifstream f_in(c10::FLAGS_f_in);
|
||||
std::ofstream f_out(c10::FLAGS_f_out);
|
||||
std::ifstream f_in(FLAGS_f_in);
|
||||
std::ofstream f_out(FLAGS_f_out);
|
||||
std::string line;
|
||||
caffe2::TensorProtos tensor_protos;
|
||||
while (std::getline(f_in, line)) {
|
||||
|
@ -36,11 +36,11 @@ int main(int argc, char** argv) {
|
||||
|
||||
LOG(INFO) << "Opening DB...";
|
||||
auto in_db = caffe2::db::CreateDB(
|
||||
c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ);
|
||||
FLAGS_input_db_type, FLAGS_input_db, caffe2::db::READ);
|
||||
CAFFE_ENFORCE(
|
||||
in_db,
|
||||
"Cannot load input db " + c10::FLAGS_input_db + " of expected type " +
|
||||
c10::FLAGS_input_db_type);
|
||||
"Cannot load input db " + FLAGS_input_db + " of expected type " +
|
||||
FLAGS_input_db_type);
|
||||
auto cursor = in_db->NewCursor();
|
||||
LOG(INFO) << "DB opened.";
|
||||
|
||||
@ -48,8 +48,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Socket to talk to clients
|
||||
caffe2::ZmqSocket sender(ZMQ_PUSH);
|
||||
sender.Bind(c10::FLAGS_server);
|
||||
LOG(INFO) << "Server created at " << c10::FLAGS_server;
|
||||
sender.Bind(FLAGS_server);
|
||||
LOG(INFO) << "Server created at " << FLAGS_server;
|
||||
|
||||
while (1) {
|
||||
VLOG(1) << "Sending " << cursor->key();
|
||||
|
@ -48,4 +48,8 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Simply define the namespace, in case a dependent library want to refer to
|
||||
// the c10 namespace but not any nontrivial files.
|
||||
namespace c10 {} // namespace c10
|
||||
|
||||
#endif // C10_MACROS_MACROS_H_
|
||||
|
@ -6,24 +6,18 @@
|
||||
|
||||
C10_DEFINE_bool(c10_flags_test_only_flag, true, "Only used in test.");
|
||||
|
||||
namespace c10 {
|
||||
namespace c10_test {
|
||||
|
||||
TEST(FlagsTest, TestGflagsCorrectness) {
|
||||
#ifdef C10_USE_GFLAGS
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
|
||||
// Change the c10 namespace and check global
|
||||
FLAGS_c10_flags_test_only_flag = false;
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, false);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, false);
|
||||
// Change global and check c10 namespace
|
||||
::FLAGS_c10_flags_test_only_flag = true;
|
||||
FLAGS_c10_flags_test_only_flag = true;
|
||||
EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
|
||||
EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
|
||||
#else // C10_USE_GFLAGS
|
||||
std::cout << "Caffe2 is not built with gflags. Nothing to test here."
|
||||
<< std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
} // namespace c10_test
|
||||
|
@ -15,7 +15,7 @@
|
||||
* To use it in another .cc file, you can use C10_DECLARE_* as follows:
|
||||
* C10_DECLARE_bool(foo);
|
||||
*
|
||||
* In both cases, you can then access the flag via c10::FLAGS_foo.
|
||||
* In both cases, you can then access the flag via FLAGS_foo.
|
||||
*
|
||||
* It is recommended that you build with gflags. To learn more about the flags
|
||||
* usage, refer to the gflags page here:
|
||||
@ -75,9 +75,25 @@ C10_API bool CommandLineFlagsHasBeenParsed();
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin gflags section: most functions are basically rerouted to gflags.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
// C10 uses hidden visibility by default. However, in gflags, it only uses
|
||||
// export on Windows platform (with dllexport) but not on linux/mac (with
|
||||
// default visibility). As a result, to ensure that we are always exporting
|
||||
// global variables, we will redefine the GFLAGS_DLL_DEFINE_FLAG macro if we
|
||||
// are building C10 as a shared libray.
|
||||
// This has to be done after the inclusion of gflags, because some early
|
||||
// versions of gflags.h (e.g. 2.0 on ubuntu 14.04) directly defines the
|
||||
// macros, so we need to do definition after gflags is done.
|
||||
#ifdef GFLAGS_DLL_DEFINE_FLAG
|
||||
#undef GFLAGS_DLL_DEFINE_FLAG
|
||||
#endif // GFLAGS_DLL_DEFINE_FLAG
|
||||
#ifdef GFLAGS_DLL_DECLARE_FLAG
|
||||
#undef GFLAGS_DLL_DECLARE_FLAG
|
||||
#endif // GFLAGS_DLL_DECLARE_FLAG
|
||||
#define GFLAGS_DLL_DEFINE_FLAG C10_EXPORT
|
||||
#define GFLAGS_DLL_DECLARE_FLAG C10_IMPORT
|
||||
|
||||
// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags.
|
||||
// Using GFLAGS_GFLAGS_H_ to capture this change.
|
||||
#ifndef GFLAGS_GFLAGS_H_
|
||||
@ -87,11 +103,11 @@ namespace gflags = google;
|
||||
// Motivation about the gflags wrapper:
|
||||
// (1) We would need to make sure that the gflags version and the non-gflags
|
||||
// version of C10 are going to expose the same flags abstraction. One should
|
||||
// explicitly use c10::FLAGS_flag_name to access the flags.
|
||||
// explicitly use FLAGS_flag_name to access the flags.
|
||||
// (2) For flag names, it is recommended to start with c10_ to distinguish it
|
||||
// from regular gflags flags. For example, do
|
||||
// C10_DEFINE_BOOL(c10_my_flag, true, "An example");
|
||||
// to allow one to use c10::FLAGS_c10_my_flag.
|
||||
// to allow one to use FLAGS_c10_my_flag.
|
||||
// (3) Gflags has a design issue that does not properly expose the global flags,
|
||||
// if one builds the library with -fvisibility=hidden. The current gflags (as of
|
||||
// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
|
||||
@ -100,12 +116,11 @@ namespace gflags = google;
|
||||
// itself is not duplicated - under the hood it is the same global gflags flag.
|
||||
#define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \
|
||||
DEFINE_##type(name, default_value, help_str); \
|
||||
namespace c10 { \
|
||||
C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \
|
||||
}
|
||||
|
||||
#define C10_DEFINE_int(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
|
||||
#define C10_DEFINE_int32(name, default_value, help_str) \
|
||||
C10_DEFINE_int(name, default_value, help_str)
|
||||
#define C10_DEFINE_int64(name, default_value, help_str) \
|
||||
C10_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
|
||||
#define C10_DEFINE_double(name, default_value, help_str) \
|
||||
@ -118,12 +133,10 @@ namespace gflags = google;
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define C10_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \
|
||||
DECLARE_##type(name); \
|
||||
namespace c10 { \
|
||||
C10_IMPORT extern real_type& FLAGS_##name; \
|
||||
} // namespace c10
|
||||
|
||||
#define C10_DECLARE_int(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name)
|
||||
#define C10_DECLARE_int32(name) C10_DECLARE_int(name)
|
||||
#define C10_DECLARE_int64(name) \
|
||||
C10_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name)
|
||||
#define C10_DECLARE_double(name) \
|
||||
@ -166,8 +179,8 @@ C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&);
|
||||
// as well.
|
||||
|
||||
#define C10_DEFINE_typed_var(type, name, default_value, help_str) \
|
||||
namespace c10 { \
|
||||
C10_EXPORT type FLAGS_##name = default_value; \
|
||||
namespace c10 { \
|
||||
namespace { \
|
||||
class C10FlagParser_##name : public C10FlagParser { \
|
||||
public: \
|
||||
@ -185,6 +198,8 @@ C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&);
|
||||
|
||||
#define C10_DEFINE_int(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(int, name, default_value, help_str)
|
||||
#define C10_DEFINE_int32(name, default_value, help_str) \
|
||||
C10_DEFINE_int(name, default_value, help_str)
|
||||
#define C10_DEFINE_int64(name, default_value, help_str) \
|
||||
C10_DEFINE_typed_var(int64_t, name, default_value, help_str)
|
||||
#define C10_DEFINE_double(name, default_value, help_str) \
|
||||
@ -195,12 +210,10 @@ C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&);
|
||||
C10_DEFINE_typed_var(std::string, name, default_value, help_str)
|
||||
|
||||
// DECLARE_typed_var should be used in header files and in the global namespace.
|
||||
#define C10_DECLARE_typed_var(type, name) \
|
||||
namespace c10 { \
|
||||
C10_IMPORT extern type FLAGS_##name; \
|
||||
} // namespace c10
|
||||
#define C10_DECLARE_typed_var(type, name) C10_IMPORT extern type FLAGS_##name
|
||||
|
||||
#define C10_DECLARE_int(name) C10_DECLARE_typed_var(int, name)
|
||||
#define C10_DECLARE_int32(name) C10_DECLARE_int(name)
|
||||
#define C10_DECLARE_int64(name) C10_DECLARE_typed_var(int64_t, name)
|
||||
#define C10_DECLARE_double(name) C10_DECLARE_typed_var(double, name)
|
||||
#define C10_DECLARE_bool(name) C10_DECLARE_typed_var(bool, name)
|
||||
|
@ -76,8 +76,8 @@ pthreadpool_t nnpack_threadpool() {
|
||||
enum nnp_status nnpack_status = nnp_initialize();
|
||||
CAFFE_ENFORCE(
|
||||
nnpack_status == nnp_status_success, "NNPack is not supported here!");
|
||||
int num_threads = c10::FLAGS_caffe2_nnpack_num_threads;
|
||||
if (c10::FLAGS_caffe2_nnpack_use_mkl_num_threads) {
|
||||
int num_threads = FLAGS_caffe2_nnpack_num_threads;
|
||||
if (FLAGS_caffe2_nnpack_use_mkl_num_threads) {
|
||||
#ifdef CAFFE2_USE_MKL
|
||||
num_threads = mkl_get_max_threads();
|
||||
#else
|
||||
|
@ -32,12 +32,12 @@ const string defaultHTraceConf(const string& net_name) {
|
||||
stream << HTRACE_SPAN_RECEIVER_KEY << "=local.file;";
|
||||
stream << HTRACE_SAMPLER_KEY << "=always;";
|
||||
|
||||
if (c10::FLAGS_caffe2_htrace_span_log_path.empty()) {
|
||||
if (FLAGS_caffe2_htrace_span_log_path.empty()) {
|
||||
stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "=/tmp/htrace_" << net_name_copy
|
||||
<< "_span_log_" << datetime << ";";
|
||||
} else {
|
||||
stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "="
|
||||
<< c10::FLAGS_caffe2_htrace_span_log_path << ";";
|
||||
<< FLAGS_caffe2_htrace_span_log_path << ";";
|
||||
}
|
||||
|
||||
return stream.str();
|
||||
|
@ -58,10 +58,10 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
||||
CAFFE_ENFORCE(data);
|
||||
// move data to a thread's NUMA node
|
||||
NUMAMove(data, nbytes, GetCurrentNUMANode());
|
||||
if (c10::FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
memset(data, 0, nbytes);
|
||||
}
|
||||
if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
reporter_.New(data, nbytes);
|
||||
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
|
||||
}
|
||||
@ -84,7 +84,7 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
||||
}
|
||||
|
||||
at::DeleterFnPtr raw_deleter() const override {
|
||||
if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||
return &ReportAndDelete;
|
||||
}
|
||||
return &Delete;
|
||||
|
@ -121,7 +121,7 @@ void TensorSerializer::SerializeWithChunkSize(
|
||||
if (chunk_size == kNoChunking) {
|
||||
chunk_size = tensor.size() + 1; // to account for empty tensors
|
||||
} else if (chunk_size == kDefaultChunkSize) {
|
||||
chunk_size = c10::FLAGS_caffe2_tensor_chunk_size;
|
||||
chunk_size = FLAGS_caffe2_tensor_chunk_size;
|
||||
}
|
||||
|
||||
auto processChunk = [&](int64_t chunkStart) {
|
||||
@ -148,7 +148,7 @@ void TensorSerializer::SerializeWithChunkSize(
|
||||
}
|
||||
};
|
||||
if (tensor.size() > chunk_size) {
|
||||
for (int i = 0; i < c10::FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
|
||||
for (int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
|
||||
futures.emplace_back(std::async(std::launch::async, task));
|
||||
}
|
||||
}
|
||||
@ -287,7 +287,7 @@ void TensorSerializer::Serialize(
|
||||
uniq_ptr.get());
|
||||
break;
|
||||
case TensorProto_DataType_FLOAT16: {
|
||||
if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
if (FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
const int kValue = 1;
|
||||
CAFFE_ENFORCE_EQ(
|
||||
reinterpret_cast<const char*>(&kValue)[0],
|
||||
|
@ -457,8 +457,8 @@ TYPED_TEST(TensorCPUTest, NoLongerSharesAfterFreeMemory) {
|
||||
|
||||
TYPED_TEST(TensorCPUTest, KeepOnShrink) {
|
||||
// Set flags (defaults)
|
||||
c10::FLAGS_caffe2_keep_on_shrink = true;
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
FLAGS_caffe2_keep_on_shrink = true;
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
|
||||
vector<int> dims{2, 3, 5};
|
||||
Tensor tensor(dims, CPU);
|
||||
@ -488,8 +488,8 @@ TYPED_TEST(TensorCPUTest, KeepOnShrink) {
|
||||
|
||||
TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
|
||||
// Set flags
|
||||
c10::FLAGS_caffe2_keep_on_shrink = true;
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);
|
||||
FLAGS_caffe2_keep_on_shrink = true;
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);
|
||||
|
||||
vector<int> dims{1, 8, 8};
|
||||
Tensor tensor(dims, CPU);
|
||||
@ -509,7 +509,7 @@ TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
|
||||
//EXPECT_NE(ptr, new_ptr);
|
||||
|
||||
// Restore default flags
|
||||
c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUDeathTest, CannotAccessRawDataWhenEmpty) {
|
||||
@ -712,7 +712,7 @@ TEST(TensorTest, Half) {
|
||||
const TensorProto& tensor_proto = proto.tensor();
|
||||
EXPECT_EQ(
|
||||
tensor_proto.data_type(), TypeMetaToDataType(TypeMeta::Make<at::Half>()));
|
||||
if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
if (FLAGS_caffe2_serialize_fp16_as_bytes) {
|
||||
EXPECT_EQ(tensor_proto.byte_data().size(), 2 * kSize);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
auto value = tensor->mutable_data<at::Half>()[i].x;
|
||||
@ -852,8 +852,8 @@ TYPED_TEST_CASE(TypedTensorTest, TensorDataTypes);
|
||||
|
||||
TYPED_TEST(TypedTensorTest, BigTensorSerialization) {
|
||||
int64_t d1 = 2;
|
||||
int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
|
||||
? c10::FLAGS_caffe2_test_big_tensor_size / d1
|
||||
int64_t d2 = FLAGS_caffe2_test_big_tensor_size
|
||||
? FLAGS_caffe2_test_big_tensor_size / d1
|
||||
: static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
int64_t size = d1 * d2;
|
||||
string db_source = (string)std::tmpnam(nullptr);
|
||||
@ -1027,8 +1027,8 @@ TEST(ContentChunks, Serialization) {
|
||||
|
||||
TEST(CustomChunkSize, BigTensorSerialization) {
|
||||
int64_t d1 = 2;
|
||||
int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
|
||||
? c10::FLAGS_caffe2_test_big_tensor_size / d1
|
||||
int64_t d2 = FLAGS_caffe2_test_big_tensor_size
|
||||
? FLAGS_caffe2_test_big_tensor_size / d1
|
||||
: static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
int64_t size = d1 * d2;
|
||||
|
||||
|
@ -31,6 +31,10 @@
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Since C10 is the core library for caffe2 (and aten), we will simply reroute
|
||||
// all abstractions defined in c10 to be available in caffe2 as well.
|
||||
using namespace c10;
|
||||
|
||||
// Note(Yangqing): NVCC does not play well with unordered_map on some platforms,
|
||||
// forcing us to use std::map instead of unordered_map. This may affect speed
|
||||
// in some cases, but in most of the computation code we do not access map very
|
||||
|
@ -89,7 +89,7 @@ int NumCudaDevices() {
|
||||
|
||||
namespace {
|
||||
int gDefaultGPUID = 0;
|
||||
// Only used when c10::FLAGS_caffe2_cuda_full_device_control is set true.
|
||||
// Only used when FLAGS_caffe2_cuda_full_device_control is set true.
|
||||
thread_local int gCurrentDevice = -1;
|
||||
} // namespace
|
||||
|
||||
@ -108,7 +108,7 @@ void SetDefaultGPUID(const int deviceid) {
|
||||
int GetDefaultGPUID() { return gDefaultGPUID; }
|
||||
|
||||
int CaffeCudaGetDevice() {
|
||||
if (c10::FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (gCurrentDevice < 0) {
|
||||
CUDA_ENFORCE(cudaGetDevice(&gCurrentDevice));
|
||||
}
|
||||
@ -121,7 +121,7 @@ int CaffeCudaGetDevice() {
|
||||
}
|
||||
|
||||
void CaffeCudaSetDevice(const int id) {
|
||||
if (c10::FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (FLAGS_caffe2_cuda_full_device_control) {
|
||||
if (gCurrentDevice != id) {
|
||||
CUDA_ENFORCE(cudaSetDevice(id));
|
||||
}
|
||||
|
@ -176,12 +176,12 @@ static void SetUpCub() {
|
||||
// Sets up the cub memory pool
|
||||
try {
|
||||
g_cub_allocator.reset(new cub::CachingDeviceAllocator(
|
||||
c10::FLAGS_caffe2_cub_bin_growth,
|
||||
c10::FLAGS_caffe2_cub_min_bin,
|
||||
c10::FLAGS_caffe2_cub_max_bin,
|
||||
size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
FLAGS_caffe2_cub_bin_growth,
|
||||
FLAGS_caffe2_cub_min_bin,
|
||||
FLAGS_caffe2_cub_max_bin,
|
||||
size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
false,
|
||||
c10::FLAGS_caffe2_cub_print_allocation_events));
|
||||
FLAGS_caffe2_cub_print_allocation_events));
|
||||
} catch (...) {
|
||||
CAFFE_THROW("Some error happened at cub initialization.");
|
||||
}
|
||||
@ -189,23 +189,22 @@ static void SetUpCub() {
|
||||
}
|
||||
|
||||
static void Caffe2SetCUDAMemoryPool() {
|
||||
if (c10::FLAGS_caffe2_cuda_memory_pool == "" ||
|
||||
c10::FLAGS_caffe2_cuda_memory_pool == "none") {
|
||||
if (FLAGS_caffe2_cuda_memory_pool == "" ||
|
||||
FLAGS_caffe2_cuda_memory_pool == "none") {
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "cnmem") {
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
|
||||
CAFFE_THROW("CNMEM is no longer used by Caffe2. Use cub instead. "
|
||||
"This error message may go away in the future.");
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "cub") {
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
|
||||
// Sets up cub.
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
|
||||
SetUpCub();
|
||||
} else if (c10::FLAGS_caffe2_cuda_memory_pool == "thc") {
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "thc") {
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::THC;
|
||||
g_thc_allocator.reset(new THCCachingAllocator());
|
||||
} else {
|
||||
CAFFE_THROW(
|
||||
"Unrecognized cuda memory pool type: ",
|
||||
c10::FLAGS_caffe2_cuda_memory_pool);
|
||||
"Unrecognized cuda memory pool type: ", FLAGS_caffe2_cuda_memory_pool);
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,7 +284,7 @@ std::mutex& CUDAContext::mutex() {
|
||||
std::vector<long> CUDAContext::TotalMemoryByGpu() {
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_total_by_gpu_map;
|
||||
}
|
||||
@ -293,7 +292,7 @@ std::vector<long> CUDAContext::TotalMemoryByGpu() {
|
||||
std::vector<long> CUDAContext::MaxMemoryByGpu() {
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_max_by_gpu_map;
|
||||
}
|
||||
@ -306,7 +305,7 @@ void TrackMemoryAlloc(size_t nbytes) {
|
||||
max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
|
||||
g_total_mem += nbytes;
|
||||
if (g_total_mem - g_last_rep >
|
||||
c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
|
||||
long t = g_total_by_gpu_map[gpu];
|
||||
long max_t = g_max_by_gpu_map[gpu];
|
||||
@ -335,13 +334,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
static Caffe2CudaInitializerHelper g_cuda_initializer_;
|
||||
void* ptr = nullptr;
|
||||
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
TrackMemoryAlloc(nbytes);
|
||||
}
|
||||
switch (g_cuda_memory_pool_type) {
|
||||
case CudaMemoryPoolType::NONE:
|
||||
CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
}
|
||||
@ -351,13 +350,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
VLOG(2) << "CUB allocating pointer " << ptr << " on device "
|
||||
<< CaffeCudaGetDevice();
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
}
|
||||
return {ptr, ptr, &Delete, at::Device(CUDA, CaffeCudaGetDevice())};
|
||||
case CudaMemoryPoolType::THC:
|
||||
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||
}
|
||||
@ -374,7 +373,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
static void Delete(void* ptr) {
|
||||
// lock the mutex
|
||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
auto sz_it = g_size_map.find(ptr);
|
||||
DCHECK(sz_it != g_size_map.end());
|
||||
auto aff_it = g_cuda_device_affiliation.find(ptr);
|
||||
@ -399,7 +398,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
<< cudaGetErrorString(error);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
|
||||
}
|
||||
|
||||
@ -415,7 +414,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
|
||||
}
|
||||
case CudaMemoryPoolType::THC: {
|
||||
CUDA_ENFORCE(g_thc_allocator->Free(ptr));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
|
||||
}
|
||||
break;
|
||||
|
@ -37,7 +37,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) {
|
||||
|
||||
TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
|
||||
// For a while, set full device control to be true.
|
||||
c10::FLAGS_caffe2_cuda_full_device_control = true;
|
||||
FLAGS_caffe2_cuda_full_device_control = true;
|
||||
for (int i = 0; i < NumCudaDevices(); ++i) {
|
||||
CaffeCudaSetDevice(i);
|
||||
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
||||
@ -46,7 +46,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
|
||||
CaffeCudaSetDevice(i);
|
||||
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
||||
}
|
||||
c10::FLAGS_caffe2_cuda_full_device_control = false;
|
||||
FLAGS_caffe2_cuda_full_device_control = false;
|
||||
}
|
||||
|
||||
TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
|
||||
|
@ -89,7 +89,7 @@ int NumHipDevices()
|
||||
|
||||
namespace {
|
||||
int gDefaultGPUID = 0;
|
||||
// Only used when c10::FLAGS_caffe2_hip_full_device_control is set true.
|
||||
// Only used when FLAGS_caffe2_hip_full_device_control is set true.
|
||||
thread_local int gCurrentDevice = -1;
|
||||
} // namespace
|
||||
|
||||
@ -109,7 +109,7 @@ int GetDefaultGPUID() { return gDefaultGPUID; }
|
||||
|
||||
int CaffeHipGetDevice()
|
||||
{
|
||||
if (c10::FLAGS_caffe2_hip_full_device_control) {
|
||||
if (FLAGS_caffe2_hip_full_device_control) {
|
||||
if (gCurrentDevice < 0) {
|
||||
HIP_ENFORCE(hipGetDevice(&gCurrentDevice));
|
||||
}
|
||||
@ -123,7 +123,7 @@ int CaffeHipGetDevice()
|
||||
|
||||
void CaffeHipSetDevice(const int id)
|
||||
{
|
||||
if (c10::FLAGS_caffe2_hip_full_device_control) {
|
||||
if (FLAGS_caffe2_hip_full_device_control) {
|
||||
if (gCurrentDevice != id) {
|
||||
HIP_ENFORCE(hipSetDevice(id));
|
||||
}
|
||||
|
@ -166,12 +166,12 @@ static void SetUpCub()
|
||||
try
|
||||
{
|
||||
g_cub_allocator.reset(new cub::CachingDeviceAllocator(
|
||||
c10::FLAGS_caffe2_cub_bin_growth,
|
||||
c10::FLAGS_caffe2_cub_min_bin,
|
||||
c10::FLAGS_caffe2_cub_max_bin,
|
||||
size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
FLAGS_caffe2_cub_bin_growth,
|
||||
FLAGS_caffe2_cub_min_bin,
|
||||
FLAGS_caffe2_cub_max_bin,
|
||||
size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
|
||||
false,
|
||||
c10::FLAGS_caffe2_cub_print_allocation_events));
|
||||
FLAGS_caffe2_cub_print_allocation_events));
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
@ -182,24 +182,23 @@ static void SetUpCub()
|
||||
|
||||
static void Caffe2SetHIPMemoryPool()
|
||||
{
|
||||
if (c10::FLAGS_caffe2_hip_memory_pool == "" ||
|
||||
c10::FLAGS_caffe2_hip_memory_pool == "none") {
|
||||
if (FLAGS_caffe2_hip_memory_pool == "" ||
|
||||
FLAGS_caffe2_hip_memory_pool == "none") {
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::NONE;
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "cnmem") {
|
||||
} else if (FLAGS_caffe2_hip_memory_pool == "cnmem") {
|
||||
CAFFE_THROW(
|
||||
"CNMEM is no longer used by Caffe2. Use cub instead. "
|
||||
"This error message may go away in the future.");
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "cub") {
|
||||
} else if (FLAGS_caffe2_hip_memory_pool == "cub") {
|
||||
// Sets up cub.
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::CUB;
|
||||
SetUpCub();
|
||||
} else if (c10::FLAGS_caffe2_hip_memory_pool == "thc") {
|
||||
} else if (FLAGS_caffe2_hip_memory_pool == "thc") {
|
||||
g_hip_memory_pool_type = HipMemoryPoolType::THC;
|
||||
g_thc_allocator.reset(new THCCachingAllocator());
|
||||
} else {
|
||||
CAFFE_THROW(
|
||||
"Unrecognized HIP memory pool type: ",
|
||||
c10::FLAGS_caffe2_hip_memory_pool);
|
||||
"Unrecognized HIP memory pool type: ", FLAGS_caffe2_hip_memory_pool);
|
||||
}
|
||||
}
|
||||
|
||||
@ -287,7 +286,7 @@ std::vector<long> HIPContext::TotalMemoryByGpu()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_total_by_gpu_map;
|
||||
}
|
||||
@ -296,7 +295,7 @@ std::vector<long> HIPContext::MaxMemoryByGpu()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||
CAFFE_ENFORCE(
|
||||
c10::FLAGS_caffe2_gpu_memory_tracking,
|
||||
FLAGS_caffe2_gpu_memory_tracking,
|
||||
"Pass --caffe2_gpu_memory_tracking to enable memory stats");
|
||||
return g_max_by_gpu_map;
|
||||
}
|
||||
@ -309,7 +308,7 @@ void TrackMemoryAlloc(size_t nbytes)
|
||||
g_max_by_gpu_map[this_gpu] = std::max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
|
||||
g_total_mem += nbytes;
|
||||
if (g_total_mem - g_last_rep >
|
||||
c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
|
||||
for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
|
||||
long t = g_total_by_gpu_map[gpu];
|
||||
long max_t = g_max_by_gpu_map[gpu];
|
||||
@ -338,13 +337,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
static Caffe2HipInitializerHelper g_hip_initializer_;
|
||||
void* ptr = nullptr;
|
||||
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
TrackMemoryAlloc(nbytes);
|
||||
}
|
||||
switch (g_hip_memory_pool_type) {
|
||||
case HipMemoryPoolType::NONE:
|
||||
HIP_ENFORCE(hipMalloc(&ptr, nbytes));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
}
|
||||
@ -353,13 +352,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
VLOG(2) << "CUB allocating pointer " << ptr << " on device " << CaffeHipGetDevice();
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
}
|
||||
return {ptr, ptr, &Delete, at::Device(HIP, CaffeHipGetDevice())};
|
||||
case HipMemoryPoolType::THC:
|
||||
HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_size_map[ptr] = nbytes;
|
||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||
}
|
||||
@ -372,7 +371,7 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
// lock the mutex
|
||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
auto sz_it = g_size_map.find(ptr);
|
||||
DCHECK(sz_it != g_size_map.end());
|
||||
auto aff_it = g_hip_device_affiliation.find(ptr);
|
||||
@ -398,7 +397,7 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
<< hipGetErrorString(error);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_hip_device_affiliation.erase(g_hip_device_affiliation.find(ptr));
|
||||
}
|
||||
|
||||
@ -414,7 +413,7 @@ struct DefaultHIPAllocator final : public at::Allocator {
|
||||
}
|
||||
case HipMemoryPoolType::THC: {
|
||||
HIP_ENFORCE(g_thc_allocator->Free(ptr));
|
||||
if (c10::FLAGS_caffe2_gpu_memory_tracking) {
|
||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||
g_hip_device_affiliation.erase(g_hip_device_affiliation.find(ptr));
|
||||
}
|
||||
break;
|
||||
|
@ -100,8 +100,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option)
|
||||
}
|
||||
do {
|
||||
stream_id = stream_counters_[gpu_id]++;
|
||||
stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
|
||||
} while (c10::FLAGS_caffe2_net_async_check_stream_status &&
|
||||
stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
|
||||
} while (FLAGS_caffe2_net_async_check_stream_status &&
|
||||
!HIPContext::IsStreamFree(device_option, stream_id));
|
||||
}
|
||||
return stream_id;
|
||||
@ -120,7 +120,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
"None of the parent is recorded for an event.");
|
||||
|
||||
int stream_id = 0;
|
||||
if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
if (FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
stream_id = stream(
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption());
|
||||
}
|
||||
@ -136,7 +136,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
@ -163,13 +163,13 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
|
||||
}
|
||||
|
||||
const auto& sink_idx = chain.back();
|
||||
if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
|
||||
if (success && FLAGS_caffe2_net_async_finish_chain) {
|
||||
operator_nodes_[sink_idx].operator_->event().Finish();
|
||||
}
|
||||
CAFFE_ENFORCE(!eventRecorded_[sink_idx], "An event for ", sink_idx, " should not be recorded.");
|
||||
eventRecorded_[sink_idx] = 1;
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
|
@ -26,19 +26,17 @@ namespace caffe2 {
|
||||
std::shared_ptr<TaskThreadPoolBase>
|
||||
GetAsyncNetHIPThreadPool(int device_id, int pool_size, bool create_new) {
|
||||
// For GPU, use per device thread pools of predefined constant size
|
||||
if (pool_size != c10::FLAGS_caffe2_threads_per_hip_gpu) {
|
||||
if (pool_size != FLAGS_caffe2_threads_per_hip_gpu) {
|
||||
LOG(INFO) << "Overriding AMD HIP GPU pool size: using "
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
|
||||
<< FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
|
||||
}
|
||||
static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (create_new) {
|
||||
LOG(INFO) << "Created new AMD HIP GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu
|
||||
<< "; GPU id: " << device_id;
|
||||
return std::make_shared<TaskThreadPool>(
|
||||
c10::FLAGS_caffe2_threads_per_hip_gpu);
|
||||
<< FLAGS_caffe2_threads_per_hip_gpu << "; GPU id: " << device_id;
|
||||
return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(pool_mutex);
|
||||
|
||||
@ -48,10 +46,10 @@ GetAsyncNetHIPThreadPool(int device_id, int pool_size, bool create_new) {
|
||||
}
|
||||
if (!shared_pool) {
|
||||
LOG(INFO) << "Created shared AMD HIP GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_hip_gpu
|
||||
<< FLAGS_caffe2_threads_per_hip_gpu
|
||||
<< "; GPU id: " << device_id;
|
||||
shared_pool = std::make_shared<TaskThreadPool>(
|
||||
c10::FLAGS_caffe2_threads_per_hip_gpu);
|
||||
shared_pool =
|
||||
std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
|
||||
pools[device_id] = shared_pool;
|
||||
}
|
||||
return shared_pool;
|
||||
|
@ -71,7 +71,7 @@ bool GlobalInit(int* pargc, char*** pargv) {
|
||||
success &= c10::ParseCommandLineFlags(pargc, pargv);
|
||||
success &= InitCaffeLogging(pargc, *pargv);
|
||||
// Print out the current build version. Using cerr as LOG(INFO) might be off
|
||||
if (c10::FLAGS_caffe2_version) {
|
||||
if (FLAGS_caffe2_version) {
|
||||
std::cerr << "Caffe2 build configuration: " << std::endl;
|
||||
for (const auto& it : GetBuildOptions()) {
|
||||
std::cerr << " " << std::setw(25) << std::left << it.first << " : "
|
||||
|
@ -23,7 +23,7 @@ static void QuitIfFeatureUnsupported(
|
||||
"on your machine, such as SIGILL 'illegal instructions' on Linux. "
|
||||
"As a result Caffe2 will preemptively quit. Please install or "
|
||||
"build a Caffe2 binary with the feature turned off.";
|
||||
if (c10::FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
|
||||
if (FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
|
||||
LOG(FATAL) << err_string;
|
||||
} else {
|
||||
LOG(ERROR) << err_string;
|
||||
|
@ -35,10 +35,9 @@ bool Caffe2SetOpenMPThreads(int*, char***) {
|
||||
omp_set_num_threads(1);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting omp_num_threads to "
|
||||
<< c10::FLAGS_caffe2_omp_num_threads;
|
||||
omp_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
|
||||
if (FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting omp_num_threads to " << FLAGS_caffe2_omp_num_threads;
|
||||
omp_set_num_threads(FLAGS_caffe2_omp_num_threads);
|
||||
}
|
||||
VLOG(1) << "Caffe2 running with " << omp_get_max_threads() << " OMP threads";
|
||||
return true;
|
||||
@ -56,18 +55,16 @@ bool Caffe2SetMKLThreads(int*, char***) {
|
||||
}
|
||||
|
||||
// If caffe2_omp_num_threads is set, we use that for MKL as well.
|
||||
if (c10::FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to "
|
||||
<< c10::FLAGS_caffe2_omp_num_threads
|
||||
if (FLAGS_caffe2_omp_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_omp_num_threads
|
||||
<< " as inherited from omp_num_threads.";
|
||||
mkl_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
|
||||
mkl_set_num_threads(FLAGS_caffe2_omp_num_threads);
|
||||
}
|
||||
|
||||
// Override omp_num_threads if mkl_num_threads is set.
|
||||
if (c10::FLAGS_caffe2_mkl_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to "
|
||||
<< c10::FLAGS_caffe2_mkl_num_threads;
|
||||
mkl_set_num_threads(c10::FLAGS_caffe2_mkl_num_threads);
|
||||
if (FLAGS_caffe2_mkl_num_threads > 0) {
|
||||
VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_mkl_num_threads;
|
||||
mkl_set_num_threads(FLAGS_caffe2_mkl_num_threads);
|
||||
}
|
||||
VLOG(1) << "Caffe2 running with " << mkl_get_max_threads() << " MKL threads";
|
||||
return true;
|
||||
|
@ -39,7 +39,7 @@ void ThrowEnforceNotMet(
|
||||
const std::string& msg,
|
||||
const void* caller) {
|
||||
c10::Error e(file, line, condition, msg, (*GetFetchStackTrace())(), caller);
|
||||
if (c10::FLAGS_caffe2_use_fatal_for_enforce) {
|
||||
if (FLAGS_caffe2_use_fatal_for_enforce) {
|
||||
LOG(FATAL) << e.msg_stack()[0];
|
||||
}
|
||||
throw e;
|
||||
@ -114,14 +114,14 @@ bool InitCaffeLogging(int* argc, char** argv) {
|
||||
void UpdateLoggingLevelsFromFlags() {
|
||||
// If caffe2_log_level is set and is lower than the min log level by glog,
|
||||
// we will transfer the caffe2_log_level setting to glog to override that.
|
||||
FLAGS_minloglevel = std::min(c10::FLAGS_caffe2_log_level, FLAGS_minloglevel);
|
||||
FLAGS_minloglevel = std::min(FLAGS_caffe2_log_level, FLAGS_minloglevel);
|
||||
// If caffe2_log_level is explicitly set, let's also turn on logtostderr.
|
||||
if (c10::FLAGS_caffe2_log_level < google::GLOG_ERROR) {
|
||||
if (FLAGS_caffe2_log_level < google::GLOG_ERROR) {
|
||||
FLAGS_logtostderr = 1;
|
||||
}
|
||||
// Also, transfer the caffe2_log_level verbose setting to glog.
|
||||
if (c10::FLAGS_caffe2_log_level < 0) {
|
||||
FLAGS_v = std::min(FLAGS_v, -c10::FLAGS_caffe2_log_level);
|
||||
if (FLAGS_caffe2_log_level < 0) {
|
||||
FLAGS_v = std::min(FLAGS_v, -FLAGS_caffe2_log_level);
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,10 +154,10 @@ bool InitCaffeLogging(int* argc, char** argv) {
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (c10::FLAGS_caffe2_log_level > FATAL) {
|
||||
if (FLAGS_caffe2_log_level > FATAL) {
|
||||
std::cerr << "The log level of Caffe2 has to be no larger than FATAL("
|
||||
<< FATAL << "). Capping it to FATAL." << std::endl;
|
||||
c10::FLAGS_caffe2_log_level = FATAL;
|
||||
FLAGS_caffe2_log_level = FATAL;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -166,12 +166,12 @@ void UpdateLoggingLevelsFromFlags() {
|
||||
}
|
||||
|
||||
void ShowLogInfoToStderr() {
|
||||
c10::FLAGS_caffe2_log_level = INFO;
|
||||
FLAGS_caffe2_log_level = INFO;
|
||||
}
|
||||
|
||||
MessageLogger::MessageLogger(const char *file, int line, int severity)
|
||||
: severity_(severity) {
|
||||
if (severity_ < c10::FLAGS_caffe2_log_level) {
|
||||
if (severity_ < FLAGS_caffe2_log_level) {
|
||||
// Nothing needs to be logged.
|
||||
return;
|
||||
}
|
||||
@ -203,7 +203,7 @@ MessageLogger::MessageLogger(const char *file, int line, int severity)
|
||||
|
||||
// Output the contents of the stream to the proper channel on destruction.
|
||||
MessageLogger::~MessageLogger() {
|
||||
if (severity_ < c10::FLAGS_caffe2_log_level) {
|
||||
if (severity_ < FLAGS_caffe2_log_level) {
|
||||
// Nothing needs to be logged.
|
||||
return;
|
||||
}
|
||||
@ -226,7 +226,7 @@ MessageLogger::~MessageLogger() {
|
||||
__android_log_print(ANDROID_LOG_FATAL, tag_, "terminating.\n");
|
||||
}
|
||||
#else // !ANDROID
|
||||
if (severity_ >= c10::FLAGS_caffe2_log_level) {
|
||||
if (severity_ >= FLAGS_caffe2_log_level) {
|
||||
// If not building on Android, log all output to std::cerr.
|
||||
std::cerr << stream_.str();
|
||||
// Simulating the glog default behavior: if the severity is above INFO,
|
||||
|
@ -12,14 +12,14 @@ TEST(LoggingTest, TestEnforceTrue) {
|
||||
|
||||
TEST(LoggingTest, TestEnforceFalse) {
|
||||
bool kFalse = false;
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
try {
|
||||
CAFFE_ENFORCE(false, "This throws.");
|
||||
// This should never be triggered.
|
||||
ADD_FAILURE();
|
||||
} catch (const EnforceNotMet&) {
|
||||
}
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
|
||||
}
|
||||
|
||||
TEST(LoggingTest, TestEnforceEquals) {
|
||||
@ -76,9 +76,9 @@ TEST(LoggingTest, Join) {
|
||||
#if GTEST_HAS_DEATH_TEST
|
||||
TEST(LoggingDeathTest, TestEnforceUsingFatal) {
|
||||
bool kTrue = true;
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
EXPECT_DEATH(CAFFE_ENFORCE(false, "This goes fatal."), "");
|
||||
std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -116,7 +116,7 @@ const std::unordered_map<std::string, std::string>& defaultOverrides() {
|
||||
}
|
||||
|
||||
void checkExecutorOverride(std::string& net_type) {
|
||||
auto executors = caffe2::split(',', c10::FLAGS_caffe2_override_executor);
|
||||
auto executors = caffe2::split(',', FLAGS_caffe2_override_executor);
|
||||
CAFFE_ENFORCE(
|
||||
executors.size() % 2 == 0, "Invalid override executors flag value");
|
||||
std::unordered_map<std::string, std::string> overrides;
|
||||
|
@ -80,7 +80,7 @@ AsyncNetBase::AsyncNetBase(
|
||||
operators_.push_back(op_ptr);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_net_async_inference_mode) {
|
||||
if (FLAGS_caffe2_net_async_inference_mode) {
|
||||
execution_chains_ = dag_utils::computeGroups(operator_nodes_);
|
||||
} else {
|
||||
execution_chains_ = dag_utils::computeChains(operator_nodes_);
|
||||
@ -164,14 +164,14 @@ TaskThreadPoolBase* AsyncNetBase::pool(const DeviceOption& device_option) {
|
||||
}
|
||||
CAFFE_ENFORCE_LT(
|
||||
numa_node_id,
|
||||
c10::FLAGS_caffe2_net_async_max_numa_nodes,
|
||||
FLAGS_caffe2_net_async_max_numa_nodes,
|
||||
"Invalid NUMA node id: ",
|
||||
numa_node_id);
|
||||
return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_);
|
||||
} else if (device_option.device_type() == PROTO_CUDA) {
|
||||
auto gpu_id = device_option.device_id();
|
||||
CAFFE_ENFORCE(
|
||||
gpu_id >= 0 && gpu_id < c10::FLAGS_caffe2_net_async_max_gpus,
|
||||
gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
|
||||
"Invalid GPU id: " + caffe2::to_string(gpu_id));
|
||||
return poolGetter(gpu_pools_, PROTO_CUDA, gpu_id, num_workers_);
|
||||
} else {
|
||||
@ -509,12 +509,12 @@ void AsyncNetBase::computeExecutionModeFlags() {
|
||||
is_blocking_ = true;
|
||||
report_stats_ = false;
|
||||
} else {
|
||||
streams_per_gpu_ = c10::FLAGS_caffe2_streams_per_gpu;
|
||||
finish_chain_ = c10::FLAGS_caffe2_net_async_finish_chain;
|
||||
always_schedule_child_ = c10::FLAGS_caffe2_net_async_always_schedule_child;
|
||||
check_stream_status_ = c10::FLAGS_caffe2_net_async_check_stream_status;
|
||||
use_single_pool_ = c10::FLAGS_caffe2_net_async_use_single_pool;
|
||||
use_per_net_pools_ = c10::FLAGS_caffe2_net_async_use_per_net_pools;
|
||||
streams_per_gpu_ = FLAGS_caffe2_streams_per_gpu;
|
||||
finish_chain_ = FLAGS_caffe2_net_async_finish_chain;
|
||||
always_schedule_child_ = FLAGS_caffe2_net_async_always_schedule_child;
|
||||
check_stream_status_ = FLAGS_caffe2_net_async_check_stream_status;
|
||||
use_single_pool_ = FLAGS_caffe2_net_async_use_single_pool;
|
||||
use_per_net_pools_ = FLAGS_caffe2_net_async_use_per_net_pools;
|
||||
is_blocking_ = false;
|
||||
report_stats_ = false;
|
||||
}
|
||||
|
@ -183,8 +183,8 @@ GetAsyncNetCPUThreadPool(int numa_node_id, int pool_size, bool create_new) {
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (pool_size <= 0) {
|
||||
if (c10::FLAGS_caffe2_net_async_cpu_pool_size > 0) {
|
||||
pool_size = c10::FLAGS_caffe2_net_async_cpu_pool_size;
|
||||
if (FLAGS_caffe2_net_async_cpu_pool_size > 0) {
|
||||
pool_size = FLAGS_caffe2_net_async_cpu_pool_size;
|
||||
LOG(INFO) << "Using default CPU pool size: " << pool_size
|
||||
<< "; NUMA node id: " << numa_node_id;
|
||||
} else {
|
||||
|
@ -48,7 +48,7 @@ constexpr Color kWaitColor = 0x0066FF33; // green
|
||||
class ProfiledRange {
|
||||
public:
|
||||
ProfiledRange(const OperatorDef& def, Color color) {
|
||||
if (!c10::FLAGS_caffe2_use_nvtx) {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxEventAttributes_t eventAttrib = {0};
|
||||
@ -63,7 +63,7 @@ class ProfiledRange {
|
||||
}
|
||||
|
||||
~ProfiledRange() {
|
||||
if (!c10::FLAGS_caffe2_use_nvtx) {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxRangeEnd(range_);
|
||||
@ -119,8 +119,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option) {
|
||||
}
|
||||
do {
|
||||
stream_id = stream_counters_[gpu_id]++;
|
||||
stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
|
||||
} while (c10::FLAGS_caffe2_net_async_check_stream_status &&
|
||||
stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
|
||||
} while (FLAGS_caffe2_net_async_check_stream_status &&
|
||||
!CUDAContext::IsStreamFree(device_option, stream_id));
|
||||
}
|
||||
return stream_id;
|
||||
@ -141,7 +141,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
"None of the parent is recorded for an event.");
|
||||
|
||||
int stream_id = 0;
|
||||
if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
if (FLAGS_caffe2_async_dag_use_multiple_streams) {
|
||||
stream_id = stream(
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption());
|
||||
}
|
||||
@ -158,7 +158,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
|
||||
}
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
@ -184,7 +184,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
}
|
||||
|
||||
const auto& sink_idx = chain.back();
|
||||
if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
|
||||
if (success && FLAGS_caffe2_net_async_finish_chain) {
|
||||
operator_nodes_[sink_idx].operator_->event().Finish();
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
@ -194,7 +194,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
" should not be recorded.");
|
||||
eventRecorded_[sink_idx] = 1;
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
const auto& device_option =
|
||||
operator_nodes_[source_idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
|
@ -11,17 +11,17 @@ C10_REGISTER_CREATOR(ThreadPoolRegistry, CUDA, GetAsyncNetGPUThreadPool);
|
||||
std::shared_ptr<TaskThreadPoolBase>
|
||||
GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
|
||||
// For GPU, use per device thread pools of predefined constant size
|
||||
if (pool_size != c10::FLAGS_caffe2_threads_per_gpu) {
|
||||
if (pool_size != FLAGS_caffe2_threads_per_gpu) {
|
||||
LOG(INFO) << "Overriding GPU pool size: using "
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << " threads per GPU";
|
||||
<< FLAGS_caffe2_threads_per_gpu << " threads per GPU";
|
||||
}
|
||||
static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
|
||||
static std::mutex pool_mutex;
|
||||
|
||||
if (create_new) {
|
||||
LOG(INFO) << "Created new GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
return std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
|
||||
LOG(INFO) << "Created new GPU pool, size: " << FLAGS_caffe2_threads_per_gpu
|
||||
<< "; GPU id: " << gpu_id;
|
||||
return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(pool_mutex);
|
||||
|
||||
@ -31,9 +31,9 @@ GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
|
||||
}
|
||||
if (!shared_pool) {
|
||||
LOG(INFO) << "Created shared GPU pool, size: "
|
||||
<< c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
<< FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
|
||||
shared_pool =
|
||||
std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
|
||||
std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
|
||||
pools[gpu_id] = shared_pool;
|
||||
}
|
||||
return shared_pool;
|
||||
|
@ -36,7 +36,7 @@ bool AsyncPollingNet::DoRunAsync() {
|
||||
|
||||
Timer timer;
|
||||
bool success = pollAndSchedule();
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(stats_[PROTO_CPU], poll_time_ms, timer.MilliSeconds());
|
||||
}
|
||||
if (!success) {
|
||||
@ -49,14 +49,14 @@ bool AsyncPollingNet::DoRunAsync() {
|
||||
}
|
||||
|
||||
void AsyncPollingNet::schedule(int task_id) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[task_id]->Start();
|
||||
}
|
||||
const auto& device_option = event(task_id).GetDeviceOption();
|
||||
pool(device_option)->run([this, task_id, device_option]() {
|
||||
int stream_id = stream(task_id);
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(
|
||||
stats_[device_option.device_type()],
|
||||
task_pool_wait_time_us,
|
||||
@ -64,7 +64,7 @@ void AsyncPollingNet::schedule(int task_id) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
Timer run_time;
|
||||
run(task_id, stream_id);
|
||||
CAFFE_EVENT(
|
||||
@ -104,7 +104,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
std::unordered_set<int> next_tasks;
|
||||
updated_tasks.reserve(current_tasks.size());
|
||||
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
timer.Start();
|
||||
}
|
||||
if (has_chain_failed_) {
|
||||
@ -121,7 +121,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
|
||||
if (prev_status != status_[task_id]) {
|
||||
updated_tasks.insert(task_id);
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
updateTaskStats(task_id);
|
||||
}
|
||||
}
|
||||
@ -130,7 +130,7 @@ bool AsyncPollingNet::pollAndSchedule() {
|
||||
next_tasks.insert(task_id);
|
||||
}
|
||||
}
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
CAFFE_EVENT(
|
||||
stats_[PROTO_CPU], poll_status_update_time_us, timer.MicroSeconds());
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ void AsyncSchedulingNet::schedule(int task_id, bool run_inline) {
|
||||
if (!canSchedule(parent_id, child_id)) {
|
||||
// we can't schedule a child because of this parent,
|
||||
// check if parent supports callback
|
||||
if (c10::FLAGS_caffe2_net_async_optimize_polling &&
|
||||
if (FLAGS_caffe2_net_async_optimize_polling &&
|
||||
parent_event.SupportsCallback()) {
|
||||
parents_with_callback.push_back(parent_id);
|
||||
} else {
|
||||
@ -252,7 +252,7 @@ bool AsyncSchedulingNet::RunAsync() {
|
||||
|
||||
for (auto task_id = 0; task_id < tasksNum(); ++task_id) {
|
||||
if (parents(task_id).empty()) {
|
||||
schedule(task_id, c10::FLAGS_caffe2_net_async_run_root_tasks_inline);
|
||||
schedule(task_id, FLAGS_caffe2_net_async_run_root_tasks_inline);
|
||||
}
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
|
@ -56,7 +56,7 @@ int getCounterForNetName(const std::string& net_name) {
|
||||
Tracer::Tracer(const NetBase* net, const std::string& net_name)
|
||||
: net_(net), filename_(net_name), iter_(0) {
|
||||
std::replace(filename_.begin(), filename_.end(), '/', '_');
|
||||
filename_ = c10::FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
|
||||
filename_ = FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
|
||||
+"_id_" + caffe2::to_string(getCounterForNetName(net_name));
|
||||
timer_.Start();
|
||||
}
|
||||
@ -375,8 +375,7 @@ int getUniqueShardId(const OperatorDef& op_def) {
|
||||
}
|
||||
|
||||
bool isTraceableNetName(const std::string& net_name) {
|
||||
auto tracing_nets =
|
||||
caffe2::split(',', c10::FLAGS_caffe2_net_async_names_to_trace);
|
||||
auto tracing_nets = caffe2::split(',', FLAGS_caffe2_net_async_names_to_trace);
|
||||
return !net_name.empty() &&
|
||||
std::find(tracing_nets.begin(), tracing_nets.end(), net_name) !=
|
||||
tracing_nets.end();
|
||||
@ -404,10 +403,10 @@ bool startIter(const std::shared_ptr<Tracer>& tracer) {
|
||||
return false;
|
||||
}
|
||||
auto iter = tracer->bumpIter();
|
||||
auto is_enabled = iter % c10::FLAGS_caffe2_net_async_tracing_nth == 0;
|
||||
auto is_enabled = iter % FLAGS_caffe2_net_async_tracing_nth == 0;
|
||||
tracer->setEnabled(is_enabled);
|
||||
if (iter % c10::FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
|
||||
int dumping_iter = iter / c10::FLAGS_caffe2_net_async_tracing_dumping_nth;
|
||||
if (iter % FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
|
||||
int dumping_iter = iter / FLAGS_caffe2_net_async_tracing_dumping_nth;
|
||||
tracer->dumpTracingResultAndClearEvents(caffe2::to_string(dumping_iter));
|
||||
}
|
||||
return is_enabled;
|
||||
|
@ -35,7 +35,7 @@ DAGNetBase::DAGNetBase(
|
||||
operator_nodes_ = dag_utils::prepareOperatorNodes(net_def, ws);
|
||||
|
||||
execution_chains_ =
|
||||
(c10::FLAGS_caffe2_disable_chaining
|
||||
(FLAGS_caffe2_disable_chaining
|
||||
? dag_utils::singleChains(operator_nodes_)
|
||||
: dag_utils::computeChains(operator_nodes_));
|
||||
|
||||
@ -127,7 +127,7 @@ bool DAGNetBase::DoRunAsync() {
|
||||
}
|
||||
// Kickstart the job queue.
|
||||
for (auto& value : initial_frontier_) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[value]->Start();
|
||||
}
|
||||
job_queue_->Push(value);
|
||||
@ -213,7 +213,7 @@ void DAGNetBase::WorkerFunction() {
|
||||
if (!job_queue_->Pop(&idx)) {
|
||||
return;
|
||||
}
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
auto device_option =
|
||||
operator_nodes_[idx].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
@ -295,7 +295,7 @@ void DAGNetBase::WorkerFunction() {
|
||||
// Can't do this inline because it can race with another thread
|
||||
// calling NoMoreJobs(). So the lock needs to be held on push.
|
||||
for (const auto idx : chains_to_queue) {
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
task_timers_[idx]->Start();
|
||||
}
|
||||
job_queue_->Push(idx);
|
||||
@ -329,7 +329,7 @@ bool DAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (c10::FLAGS_caffe2_dag_net_collect_stats) {
|
||||
if (FLAGS_caffe2_dag_net_collect_stats) {
|
||||
auto device_option =
|
||||
operator_nodes_[chain_id].operator_->event().GetDeviceOption();
|
||||
CAFFE_EVENT(
|
||||
|
@ -79,9 +79,9 @@ void checkChainingAndRun(
|
||||
CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
|
@ -108,7 +108,7 @@ vector<float> SimpleNet::TEST_Benchmark(
|
||||
".");
|
||||
Timer timer;
|
||||
auto millis = timer.MilliSeconds();
|
||||
if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
for (int i = 0; i < main_runs; ++i) {
|
||||
CAFFE_ENFORCE(Run(), "Main run ", i, " has failed.");
|
||||
}
|
||||
@ -270,7 +270,7 @@ vector<float> SimpleNet::TEST_Benchmark(
|
||||
for (size_t i = 0; i < time_per_op.size(); ++i) {
|
||||
time_per_op[i] /= main_runs;
|
||||
}
|
||||
if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
|
||||
time_per_op.insert(time_per_op.begin(), millis / main_runs);
|
||||
}
|
||||
return time_per_op;
|
||||
|
@ -150,9 +150,9 @@ void checkChainingAndRun(
|
||||
TextFormat::ParseFromString(spec, &net_def));
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
@ -177,9 +177,9 @@ void checkNumChainsAndRun(const char* spec, const int expected_num_chains) {
|
||||
}
|
||||
|
||||
{
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
|
||||
@ -572,9 +572,9 @@ TEST(NetTest, DISABLED_FailingOperator) {
|
||||
|
||||
{
|
||||
net_def.set_num_workers(4);
|
||||
auto old = c10::FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
|
||||
c10::FLAGS_caffe2_disable_chaining = false;
|
||||
auto old = FLAGS_caffe2_disable_chaining;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
|
||||
FLAGS_caffe2_disable_chaining = false;
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
@ -684,9 +684,9 @@ TEST(NetTest, ExecutorOverride) {
|
||||
|
||||
{
|
||||
Workspace ws;
|
||||
auto old = c10::FLAGS_caffe2_override_executor;
|
||||
auto g = MakeGuard([&]() { c10::FLAGS_caffe2_override_executor = old; });
|
||||
c10::FLAGS_caffe2_override_executor = "dag,async_scheduling";
|
||||
auto old = FLAGS_caffe2_override_executor;
|
||||
auto g = MakeGuard([&]() { FLAGS_caffe2_override_executor = old; });
|
||||
FLAGS_caffe2_override_executor = "dag,async_scheduling";
|
||||
|
||||
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
auto async_net =
|
||||
|
@ -12,7 +12,7 @@ namespace caffe2 {
|
||||
|
||||
#ifdef CAFFE2_NUMA_ENABLED
|
||||
bool IsNUMAEnabled() {
|
||||
return c10::FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
|
||||
return FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
|
||||
}
|
||||
|
||||
void NUMABind(int numa_node_id) {
|
||||
|
@ -151,7 +151,7 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
const auto op_def_engines = split(',', operator_def.engine());
|
||||
engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
|
||||
}
|
||||
if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
if (!FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
g_per_op_engine_pref().count(device_type) &&
|
||||
g_per_op_engine_pref()[device_type].count(op_type)) {
|
||||
const auto& preferred_engines =
|
||||
@ -160,7 +160,7 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
engines.insert(
|
||||
engines.end(), preferred_engines.begin(), preferred_engines.end());
|
||||
}
|
||||
if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
if (!FLAGS_caffe2_disable_implicit_engine_preference &&
|
||||
g_global_engine_pref().count(device_type)) {
|
||||
const auto& preferred_engines = g_global_engine_pref()[device_type];
|
||||
VLOG(2) << "Inserting global engine preference: " << preferred_engines;
|
||||
@ -174,11 +174,11 @@ unique_ptr<OperatorBase> _CreateOperator(
|
||||
auto op = TryCreateOperator(key, operator_def, ws);
|
||||
if (op) {
|
||||
if (engine.size() <=
|
||||
(unsigned)c10::FLAGS_caffe2_operator_max_engine_name_length) {
|
||||
(unsigned)FLAGS_caffe2_operator_max_engine_name_length) {
|
||||
op->annotate_engine(engine);
|
||||
} else {
|
||||
op->annotate_engine(engine.substr(
|
||||
0, c10::FLAGS_caffe2_operator_max_engine_name_length));
|
||||
op->annotate_engine(
|
||||
engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
|
||||
}
|
||||
return op;
|
||||
} else {
|
||||
|
@ -422,7 +422,7 @@ bool ExecuteStepRecursive(ExecutionStepWrapper& stepWrapper) {
|
||||
LOG(ERROR) << "Parallel worker exception:\n" << first_exception;
|
||||
}
|
||||
compiledStep->gotFailure = true;
|
||||
if (!c10::FLAGS_caffe2_handle_executor_threads_exceptions) {
|
||||
if (!FLAGS_caffe2_handle_executor_threads_exceptions) {
|
||||
// In complex plans other threads might get stuck if another
|
||||
// one fails. So we let exception to go out of thread which
|
||||
// causes SIGABRT. In local setup one might use this flag
|
||||
|
@ -105,7 +105,7 @@ class CAFFE2_API Workspace {
|
||||
}
|
||||
|
||||
~Workspace() {
|
||||
if (c10::FLAGS_caffe2_print_blob_sizes_at_exit) {
|
||||
if (FLAGS_caffe2_print_blob_sizes_at_exit) {
|
||||
PrintBlobSizes();
|
||||
}
|
||||
// This is why we have a bookkeeper_ shared_ptr instead of a naked static! A
|
||||
|
@ -60,7 +60,7 @@ class LevelDB : public DB {
|
||||
public:
|
||||
LevelDB(const string& source, Mode mode) : DB(source, mode) {
|
||||
leveldb::Options options;
|
||||
options.block_size = c10::FLAGS_caffe2_leveldb_block_size;
|
||||
options.block_size = FLAGS_caffe2_leveldb_block_size;
|
||||
options.write_buffer_size = 268435456;
|
||||
options.max_open_files = 100;
|
||||
options.error_if_exists = mode == NEW;
|
||||
|
@ -45,8 +45,7 @@ namespace caffe2 {
|
||||
LOG(ERROR) << "[C2DEBUG] after compareNetResult4D";
|
||||
NetBase* net = ws->CreateNet(predict_net_def_gpu);
|
||||
LOG(ERROR) << "[C2DEBUG] Benchmarking OpenGL Net";
|
||||
net->TEST_Benchmark(
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
net->TEST_Benchmark(FLAGS_warmup, FLAGS_iter, FLAGS_run_individual);
|
||||
// Test CPU
|
||||
for (auto i = 0; i < predict_net_def.op().size(); ++i) {
|
||||
auto op = predict_net_def.mutable_op(i);
|
||||
@ -58,7 +57,6 @@ namespace caffe2 {
|
||||
predict_net_def.set_name("cpu_net");
|
||||
net = ws->CreateNet(predict_net_def);
|
||||
LOG(INFO) << "[C2DEBUG] Benchmarking CPU Net";
|
||||
net->TEST_Benchmark(
|
||||
c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
|
||||
net->TEST_Benchmark(FLAGS_warmup, FLAGS_iter, FLAGS_run_individual);
|
||||
}
|
||||
} // namespace caffe2
|
||||
|
@ -49,7 +49,7 @@ Caffe2IOSPredictor::Caffe2IOSPredictor(const caffe2::NetDef& init_net,
|
||||
}
|
||||
|
||||
void Caffe2IOSPredictor::run(const Tensor& inData, Tensor& outData, std::string& errorMessage) {
|
||||
c10::FLAGS_caffe2_force_shared_col_buffer = true;
|
||||
FLAGS_caffe2_force_shared_col_buffer = true;
|
||||
caffe2::Tensor input(caffe2::CPU);
|
||||
input.Resize(inData.dims);
|
||||
input.ShareExternalPointer(inData.data);
|
||||
|
@ -25,7 +25,7 @@ class ConvOp final : public ConvPoolOpBase<Context> {
|
||||
|
||||
// Create shared buffer mutex in the constructor
|
||||
// to avoid race-condition in DAGNet.
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
createSharedBuffer<Context>(ws_);
|
||||
}
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
||||
Y_data += Y_stride;
|
||||
}
|
||||
};
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, func);
|
||||
} else {
|
||||
func(&col_buffer_);
|
||||
@ -299,7 +299,7 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
||||
Y_data += output_offset;
|
||||
}
|
||||
};
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, f);
|
||||
} else {
|
||||
f(&col_buffer_);
|
||||
|
@ -129,7 +129,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
||||
Ydata += Y->size() / Y->dim32(0);
|
||||
}
|
||||
};
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, f);
|
||||
} else {
|
||||
f(&col_buffer_);
|
||||
@ -237,7 +237,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
||||
Ydata += Y->size() / Y->dim32(0);
|
||||
}
|
||||
};
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, f);
|
||||
} else {
|
||||
f(&col_buffer_);
|
||||
|
@ -679,7 +679,7 @@ bool ConvTransposeMobileOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
||||
Ydata += Y->size() / Y->dim32(0);
|
||||
}
|
||||
};
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, f);
|
||||
} else {
|
||||
f(&threadBuffer_);
|
||||
|
@ -126,7 +126,7 @@ class ConvTransposeUnpoolBase : public Operator<Context> {
|
||||
|
||||
// Create shared buffer mutex in the constructor
|
||||
// to avoid race-condition in DAGNet.
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
createSharedBuffer<Context>(ws_);
|
||||
}
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ class CAFFE2_API WorkspaceStack {
|
||||
Workspace* parent_ws,
|
||||
const std::unordered_map<std::string, std::string>& blob_bindings) {
|
||||
checkStack();
|
||||
if (c10::FLAGS_caffe2_workspace_stack_debug) {
|
||||
if (FLAGS_caffe2_workspace_stack_debug) {
|
||||
if (parent_ws_) {
|
||||
CAFFE_ENFORCE_EQ(parent_ws_, parent_ws, "Parent workspace mismatch");
|
||||
} else {
|
||||
@ -75,7 +75,7 @@ class CAFFE2_API WorkspaceStack {
|
||||
Workspace* parent_ws,
|
||||
const std::unordered_map<std::string, std::string>& grad_blob_bindings) {
|
||||
checkStack();
|
||||
if (c10::FLAGS_caffe2_workspace_stack_debug) {
|
||||
if (FLAGS_caffe2_workspace_stack_debug) {
|
||||
if (parent_ws_) {
|
||||
CAFFE_ENFORCE_EQ(parent_ws_, parent_ws, "Parent workspace mismatch");
|
||||
} else {
|
||||
|
@ -61,7 +61,7 @@ class DeformConvOp final : public DeformConvOpBase<T, Context> {
|
||||
: DeformConvOpBase<T, Context>(operator_def, ws) {
|
||||
// Create shared buffer mutex in the constructor
|
||||
// to avoid race-condition in DAGNet.
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
createSharedBuffer<Context>(ws_);
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +179,7 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
||||
}
|
||||
};
|
||||
|
||||
if (c10::FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
||||
runWithSharedBuffer<Context>(ws_, f);
|
||||
} else {
|
||||
f(&col_buffer_);
|
||||
|
@ -197,7 +197,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
|
||||
detail::AddApplyLinkOps(
|
||||
links_, timestep_, operator_def.device_option(), &stepNetDef_);
|
||||
|
||||
if (c10::FLAGS_caffe2_rnn_executor && enable_rnn_executor_) {
|
||||
if (FLAGS_caffe2_rnn_executor && enable_rnn_executor_) {
|
||||
VLOG(1) << "Use RecurrentNetworkExecutor";
|
||||
auto recurrent_map = detail::GetRecurrentMapping(links_, false /* backward */);
|
||||
rnnExecutor_ =
|
||||
@ -433,7 +433,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
links_, timestep_, operator_def.device_option(), &stepNetDef_);
|
||||
AddParamGradientAccumulationOps(operator_def);
|
||||
|
||||
if (c10::FLAGS_caffe2_rnn_executor && enable_rnn_executor_) {
|
||||
if (FLAGS_caffe2_rnn_executor && enable_rnn_executor_) {
|
||||
InitializeExecutor(operator_def);
|
||||
}
|
||||
}
|
||||
|
@ -502,7 +502,7 @@ class Depthwise3x3ConvOp final : public ConvPoolOpBase<CPUContext> {
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (c10::FLAGS_caffe2_profile_depthwise) {
|
||||
if (FLAGS_caffe2_profile_depthwise) {
|
||||
char buffer[1024];
|
||||
const double gmacs = double(
|
||||
Y->dim32(2) * Y->dim32(3) * Y->dim32(1) *
|
||||
|
@ -321,7 +321,7 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() {
|
||||
activation_,
|
||||
nullptr /* activation parameter */,
|
||||
pool,
|
||||
c10::FLAGS_caffe2_profile_nnpack ? &profile : nullptr);
|
||||
FLAGS_caffe2_profile_nnpack ? &profile : nullptr);
|
||||
if (status == nnp_status_insufficient_buffer) {
|
||||
/* Query required workspace size, increase buffer, and try again */
|
||||
status = nnp_convolution_inference(
|
||||
@ -375,7 +375,7 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() {
|
||||
activation_,
|
||||
nullptr /* activation parameter */,
|
||||
pool,
|
||||
c10::FLAGS_caffe2_profile_nnpack ? &profile : nullptr);
|
||||
FLAGS_caffe2_profile_nnpack ? &profile : nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -383,7 +383,7 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() {
|
||||
CAFFE_ENFORCE(
|
||||
nnp_status_success == status,
|
||||
"NNPACK convolution computation returned error");
|
||||
if (c10::FLAGS_caffe2_profile_nnpack) {
|
||||
if (FLAGS_caffe2_profile_nnpack) {
|
||||
char buffer[1024];
|
||||
const double gmacs =
|
||||
double(
|
||||
|
@ -421,7 +421,7 @@ bool printStackTracesOnFatalSignal() {
|
||||
|
||||
namespace internal {
|
||||
bool Caffe2InitFatalSignalHandler(int*, char***) {
|
||||
if (c10::FLAGS_caffe2_print_stacktraces) {
|
||||
if (FLAGS_caffe2_print_stacktraces) {
|
||||
setPrintStackTracesOnFatalSignal(true);
|
||||
}
|
||||
return true;
|
||||
|
@ -27,9 +27,9 @@ std::unique_ptr<ThreadPool> ThreadPool::defaultThreadPool() {
|
||||
|
||||
bool applyCap = false;
|
||||
#if CAFFE2_ANDROID
|
||||
applyCap = c10::FLAGS_caffe2_threadpool_android_cap;
|
||||
applyCap = FLAGS_caffe2_threadpool_android_cap;
|
||||
#elif CAFFE2_IOS
|
||||
applyCap = c10::FLAGS_caffe2_threadpool_ios_cap;
|
||||
applyCap = FLAGS_caffe2_threadpool_ios_cap;
|
||||
#endif
|
||||
|
||||
if (applyCap) {
|
||||
@ -101,7 +101,7 @@ void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
||||
// If there are no worker threads, or if the range is too small (too
|
||||
// little work), just run locally
|
||||
const bool runLocally = range < minWorkSize_ ||
|
||||
c10::FLAGS_caffe2_threadpool_force_inline || (numThreads_ == 0);
|
||||
FLAGS_caffe2_threadpool_force_inline || (numThreads_ == 0);
|
||||
if (runLocally) {
|
||||
// Work is small enough to just run locally; multithread overhead
|
||||
// is too high
|
||||
|
@ -51,11 +51,11 @@ bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) {
|
||||
caffe2::make_unique<caffe2::NetObserverReporterPrint>());
|
||||
|
||||
caffe2::ObserverConfig::initSampleRate(
|
||||
c10::FLAGS_aiBench_netInitSampleRate,
|
||||
c10::FLAGS_aiBench_netFollowupSampleRate,
|
||||
c10::FLAGS_aiBench_netFollowupSampleCount,
|
||||
c10::FLAGS_aiBench_operatorNetSampleRatio,
|
||||
c10::FLAGS_aiBench_skipIters);
|
||||
FLAGS_aiBench_netInitSampleRate,
|
||||
FLAGS_aiBench_netFollowupSampleRate,
|
||||
FLAGS_aiBench_netFollowupSampleCount,
|
||||
FLAGS_aiBench_operatorNetSampleRatio,
|
||||
FLAGS_aiBench_skipIters);
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
@ -76,7 +76,7 @@ class RocksDB : public DB {
|
||||
public:
|
||||
RocksDB(const string& source, Mode mode) : DB(source, mode) {
|
||||
rocksdb::LevelDBOptions options;
|
||||
options.block_size = c10::FLAGS_caffe2_rocksdb_block_size;
|
||||
options.block_size = FLAGS_caffe2_rocksdb_block_size;
|
||||
options.write_buffer_size = 268435456;
|
||||
options.max_open_files = 100;
|
||||
options.error_if_exists = mode == NEW;
|
||||
|
Reference in New Issue
Block a user