move flags to c10 (#12144)

Summary: still influx. Pull Request resolved: https://github.com/pytorch/pytorch/pull/12144 Reviewed By: smessmer Differential Revision: D10140176 Pulled By: Yangqing fbshipit-source-id: 1a313abed022039333e3925d19f8b3ef2d95306c
2025-10-20 21:14:14 +08:00 · 2018-10-04 02:07:11 -07:00
parent c9f7d7b506
commit 38f3d1fc40
180 changed files with 1342 additions and 1260 deletions
--- a/aten/src/ATen/core/C++17.h
+++ b/aten/src/ATen/core/C++17.h
@ -251,5 +251,4 @@ template<class T> inline std::string to_string(T value) {
    return detail::to_string_<T>::call(value);
 }

-
 }}
--- a/aten/src/ATen/core/TensorImpl.h
+++ b/aten/src/ATen/core/TensorImpl.h
@ -11,9 +11,10 @@
 #include <ATen/core/context_base.h>
 #include <ATen/core/optional.h>

+#include "c10/util/Flags.h"
+
 #include "caffe2/core/allocator.h"
 #include "caffe2/core/common.h"
-#include "caffe2/core/flags.h"
 #include "caffe2/core/logging.h"

 // A global boolean variable to control whether we free memory when a Tensor
@ -23,14 +24,13 @@
 // This parameter is respected "upper-case" methods which call Resize()
 // (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_
 // or ShrinkTo, both of which guarantee to never to free memory.
-CAFFE2_DECLARE_bool(caffe2_keep_on_shrink);
+C10_DECLARE_bool(caffe2_keep_on_shrink);

 // Since we can have high variance in blob memory allocated across different
 // inputs in the same run, we will shrink the blob only if the memory gain
 // is larger than this flag in bytes.  This only applies to functions which
 // respect caffe2_keep_on_shrink.
-CAFFE2_DECLARE_int64(caffe2_max_keep_on_shrink_memory);
-
+C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory);

 namespace caffe2 {

@ -604,10 +604,13 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
        // is smaller than new size
        reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize();
      } else {
-        reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize() ||
-            !caffe2::FLAGS_caffe2_keep_on_shrink ||
-            storage_.capacity() - (storage_offset_ + numel_) * storage_.itemsize() >
-                static_cast<size_t>(caffe2::FLAGS_caffe2_max_keep_on_shrink_memory);
+        reset_tensor = storage_.capacity() <
+                (storage_offset_ + numel_) * storage_.itemsize() ||
+            !c10::FLAGS_caffe2_keep_on_shrink ||
+            storage_.capacity() -
+                    (storage_offset_ + numel_) * storage_.itemsize() >
+                static_cast<size_t>(
+                    c10::FLAGS_caffe2_max_keep_on_shrink_memory);
      }

      if (reset_tensor && !is_init) {
--- a/aten/src/ATen/mkl/README.md
+++ b/aten/src/ATen/mkl/README.md
@ -1,4 +1,4 @@
 All files living in this directory are written with the assumption that MKL is available,
 which means that these code are not guarded by `#if AT_MKL_ENABLED()`. Therefore, whenever
 you need to use definitions from here, please guard the `#include<ATen/mkl/*.h>` and
-definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp).
+definition usages with `#if AT_MKL_ENABLED()` macro, e.g. [SpectralOps.cpp](native/mkl/SpectralOps.cpp).
--- a/aten/src/TH/generic/simd/convolve.h
+++ b/aten/src/TH/generic/simd/convolve.h
@ -1 +1 @@
-void convolve_5x5(float* output, float* input, float* kernel, int64_t outRows, int64_t outCols, int64_t inCols);
+void convolve_5x5(float* output, float* input, float* kernel, int64_t outRows, int64_t outCols, int64_t inCols);
--- a/aten/src/TH/generic/simd/convolve5x5_avx.cpp
+++ b/aten/src/TH/generic/simd/convolve5x5_avx.cpp
@ -211,4 +211,4 @@ void convolve_5x5_avx(float* output, float* input, float* kernel, int64_t outRow
    CLEAR_AVX();
    convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols);
  }
-}
+}
--- a/aten/src/THCUNN/generic/SpatialFullConvolution.cu
+++ b/aten/src/THCUNN/generic/SpatialFullConvolution.cu
@ -58,4 +58,4 @@ void THNN_(SpatialFullConvolution_accGradParameters)(
      kW, kH, dW, dH, padW, padH, 1, 1, adjW, adjH, scale_);
 }

-#endif
+#endif
--- a/binaries/caffe2_benchmark.cc
+++ b/binaries/caffe2_benchmark.cc
@ -9,22 +9,19 @@ using std::map;
 using std::string;
 using std::vector;

-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    backend,
    "builtin",
    "The backend to use when running the model. The allowed "
    "backend choices are: builtin, default, nnpack, eigen, mkl, cuda");

-CAFFE2_DEFINE_string(
-    init_net,
-    "",
-    "The given net to initialize any parameters.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(init_net, "", "The given net to initialize any parameters.");
+C10_DEFINE_string(
    input,
    "",
    "Input that is needed for running the network. If "
    "multiple input needed, use comma separated string.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    input_dims,
    "",
    "Alternate to input_files, if all inputs are simple "
@ -32,46 +29,46 @@ CAFFE2_DEFINE_string(
    "separated numbers. If multiple input needed, use "
    "semicolon to separate the dimension of different "
    "tensors.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    input_file,
    "",
    "Input file that contain the serialized protobuf for "
    "the input blobs. If multiple input needed, use comma "
    "separated string. Must have the same number of items "
    "as input does.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    input_type,
    "float",
    "Input type when specifying the input dimension."
    "The supported types are float, uint8_t.");
-CAFFE2_DEFINE_int(iter, 10, "The number of iterations to run.");
-CAFFE2_DEFINE_string(net, "", "The given net to benchmark.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_int(iter, 10, "The number of iterations to run.");
+C10_DEFINE_string(net, "", "The given net to benchmark.");
+C10_DEFINE_string(
    output,
    "",
    "Output that should be dumped after the execution "
    "finishes. If multiple outputs are needed, use comma "
    "separated string. If you want to dump everything, pass "
    "'*' as the output value.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    output_folder,
    "",
    "The folder that the output should be written to. This "
    "folder must already exist in the file system.");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    run_individual,
    false,
    "Whether to benchmark individual operators.");
-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    sleep_before_run,
    0,
    "The seconds to sleep before starting the benchmarking.");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    text_output,
    false,
    "Whether to write out output in text format for regression purpose.");
-CAFFE2_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
+C10_DEFINE_bool(
    wipe_cache,
    false,
    "Whether to evict the cache before running network.");
@ -81,19 +78,19 @@ int main(int argc, char** argv) {
  benchmark(
      argc,
      argv,
-      caffe2::FLAGS_backend,
-      caffe2::FLAGS_init_net,
-      caffe2::FLAGS_input,
-      caffe2::FLAGS_input_dims,
-      caffe2::FLAGS_input_file,
-      caffe2::FLAGS_input_type,
-      caffe2::FLAGS_iter,
-      caffe2::FLAGS_net,
-      caffe2::FLAGS_output,
-      caffe2::FLAGS_output_folder,
-      caffe2::FLAGS_run_individual,
-      caffe2::FLAGS_sleep_before_run,
-      caffe2::FLAGS_text_output,
-      caffe2::FLAGS_warmup,
-      caffe2::FLAGS_wipe_cache);
+      c10::FLAGS_backend,
+      c10::FLAGS_init_net,
+      c10::FLAGS_input,
+      c10::FLAGS_input_dims,
+      c10::FLAGS_input_file,
+      c10::FLAGS_input_type,
+      c10::FLAGS_iter,
+      c10::FLAGS_net,
+      c10::FLAGS_output,
+      c10::FLAGS_output_folder,
+      c10::FLAGS_run_individual,
+      c10::FLAGS_sleep_before_run,
+      c10::FLAGS_text_output,
+      c10::FLAGS_warmup,
+      c10::FLAGS_wipe_cache);
 }
--- a/binaries/convert_caffe_image_db.cc
+++ b/binaries/convert_caffe_image_db.cc
@ -20,11 +20,11 @@
 #include "caffe2/proto/caffe2_legacy.pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(input_db, "", "The input db.");
-CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
-CAFFE2_DEFINE_string(output_db, "", "The output db.");
-CAFFE2_DEFINE_string(output_db_type, "", "The output db type.");
-CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
+C10_DEFINE_string(input_db, "", "The input db.");
+C10_DEFINE_string(input_db_type, "", "The input db type.");
+C10_DEFINE_string(output_db, "", "The output db.");
+C10_DEFINE_string(output_db_type, "", "The output db type.");
+C10_DEFINE_int(batch_size, 1000, "The write batch size.");

 using caffe2::db::Cursor;
 using caffe2::db::DB;
@ -37,9 +37,9 @@ int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);

  std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
-      caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
+      c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
  std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
-      caffe2::FLAGS_output_db_type, caffe2::FLAGS_output_db, caffe2::db::NEW));
+      c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
  std::unique_ptr<Cursor> cursor(in_db->NewCursor());
  std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
  int count = 0;
@ -80,7 +80,7 @@ int main(int argc, char** argv) {
      data->set_byte_data(buffer, datum.data().size());
    }
    transaction->Put(cursor->key(), protos.SerializeAsString());
-    if (++count % caffe2::FLAGS_batch_size == 0) {
+    if (++count % c10::FLAGS_batch_size == 0) {
      transaction->Commit();
      LOG(INFO) << "Converted " << count << " items so far.";
    }
--- a/binaries/convert_db.cc
+++ b/binaries/convert_db.cc
@ -19,11 +19,11 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(input_db, "", "The input db.");
-CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
-CAFFE2_DEFINE_string(output_db, "", "The output db.");
-CAFFE2_DEFINE_string(output_db_type, "", "The output db type.");
-CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
+C10_DEFINE_string(input_db, "", "The input db.");
+C10_DEFINE_string(input_db_type, "", "The input db type.");
+C10_DEFINE_string(output_db, "", "The output db.");
+C10_DEFINE_string(output_db_type, "", "The output db type.");
+C10_DEFINE_int(batch_size, 1000, "The write batch size.");

 using caffe2::db::Cursor;
 using caffe2::db::DB;
@ -33,15 +33,15 @@ int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);

  std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
-      caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
+      c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
  std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
-      caffe2::FLAGS_output_db_type, caffe2::FLAGS_output_db, caffe2::db::NEW));
+      c10::FLAGS_output_db_type, c10::FLAGS_output_db, caffe2::db::NEW));
  std::unique_ptr<Cursor> cursor(in_db->NewCursor());
  std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
  int count = 0;
  for (; cursor->Valid(); cursor->Next()) {
    transaction->Put(cursor->key(), cursor->value());
-    if (++count % caffe2::FLAGS_batch_size == 0) {
+    if (++count % c10::FLAGS_batch_size == 0) {
      transaction->Commit();
      LOG(INFO) << "Converted " << count << " items so far.";
    }
--- a/binaries/convert_encoded_to_raw_leveldb.cc
+++ b/binaries/convert_encoded_to_raw_leveldb.cc
@ -16,9 +16,9 @@

 // This script converts an image dataset to leveldb.
 //
-// caffe2::FLAGS_input_folder is the root folder that holds all the images, and
-// caffe2::FLAGS_list_file should be a list of files as well as their labels, in the
-// format as
+// c10::FLAGS_input_folder is the root folder that holds all the images, and
+// c10::FLAGS_list_file should be a list of files as well as their labels, in
+// the format as
 //   subfolder1/file1.JPEG 7
 //   ....

@ -35,14 +35,15 @@
 #include "leveldb/db.h"
 #include "leveldb/write_batch.h"

-CAFFE2_DEFINE_string(input_db_name, "", "The input image file name.");
-CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
-CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
-CAFFE2_DEFINE_int(scale, 256,
-    "If caffe2::FLAGS_raw is set, scale all the images' shorter edge to the given "
+C10_DEFINE_string(input_db_name, "", "The input image file name.");
+C10_DEFINE_string(output_db_name, "", "The output training leveldb name.");
+C10_DEFINE_bool(color, true, "If set, load images in color.");
+C10_DEFINE_int(
+    scale,
+    256,
+    "If c10::FLAGS_raw is set, scale all the images' shorter edge to the given "
    "value.");
-CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
-
+C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");

 namespace caffe2 {

@ -92,7 +93,7 @@ void ConvertToRawDataset(
  data->set_data_type(TensorProto::BYTE);
  data->add_dims(0);
  data->add_dims(0);
-  if (caffe2::FLAGS_color) {
+  if (c10::FLAGS_color) {
    data->add_dims(3);
  }
  string value;
@ -107,28 +108,30 @@ void ConvertToRawDataset(
    const string& encoded_image = input_protos.protos(0).string_data(0);
    int encoded_size = encoded_image.size();
    cv::Mat img = cv::imdecode(
-        cv::Mat(1, &encoded_size, CV_8UC1,
-        const_cast<char*>(encoded_image.data())),
-        caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
+        cv::Mat(
+            1, &encoded_size, CV_8UC1, const_cast<char*>(encoded_image.data())),
+        c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
    cv::Mat resized_img;
    int scaled_width, scaled_height;
-    if (caffe2::FLAGS_warp) {
-      scaled_width = caffe2::FLAGS_scale;
-      scaled_height = caffe2::FLAGS_scale;
+    if (c10::FLAGS_warp) {
+      scaled_width = c10::FLAGS_scale;
+      scaled_height = c10::FLAGS_scale;
    } else if (img.rows > img.cols) {
-      scaled_width = caffe2::FLAGS_scale;
-      scaled_height = static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
+      scaled_width = c10::FLAGS_scale;
+      scaled_height =
+          static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
    } else {
-      scaled_height = caffe2::FLAGS_scale;
-      scaled_width = static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
+      scaled_height = c10::FLAGS_scale;
+      scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
    }
    cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
                 cv::INTER_LINEAR);
    data->set_dims(0, scaled_height);
    data->set_dims(1, scaled_width);
    DCHECK(resized_img.isContinuous());
-    data->set_byte_data(resized_img.ptr(),
-                        scaled_height * scaled_width * (caffe2::FLAGS_color ? 3 : 1));
+    data->set_byte_data(
+        resized_img.ptr(),
+        scaled_height * scaled_width * (c10::FLAGS_color ? 3 : 1));
    output_protos.SerializeToString(&value);
    // Put in db
    batch->Put(iter->key(), value);
@ -151,6 +154,6 @@ void ConvertToRawDataset(
 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
  caffe2::ConvertToRawDataset(
-      caffe2::FLAGS_input_db_name, caffe2::FLAGS_output_db_name);
+      c10::FLAGS_input_db_name, c10::FLAGS_output_db_name);
  return 0;
 }
--- a/binaries/convert_image_to_tensor.cc
+++ b/binaries/convert_image_to_tensor.cc
@ -25,14 +25,14 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
-CAFFE2_DEFINE_string(input_images, "", "Comma separated images");
-CAFFE2_DEFINE_string(input_image_file, "", "The file containing imput images");
-CAFFE2_DEFINE_string(output_tensor, "", "The output tensor file in NCHW");
-CAFFE2_DEFINE_int(scale, 256, "Scale the shorter edge to the given value.");
-CAFFE2_DEFINE_bool(text_output, false, "Write the output in text format.");
-CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_bool(color, true, "If set, load images in color.");
+C10_DEFINE_string(input_images, "", "Comma separated images");
+C10_DEFINE_string(input_image_file, "", "The file containing imput images");
+C10_DEFINE_string(output_tensor, "", "The output tensor file in NCHW");
+C10_DEFINE_int(scale, 256, "Scale the shorter edge to the given value.");
+C10_DEFINE_bool(text_output, false, "Write the output in text format.");
+C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
+C10_DEFINE_string(
    preprocess,
    "",
    "Options to specify the preprocess routines. The available options are "
@ -44,17 +44,15 @@ namespace caffe2 {
 cv::Mat resizeImage(cv::Mat& img) {
  cv::Mat resized_img;
  int scaled_width, scaled_height;
-  if (caffe2::FLAGS_warp) {
-    scaled_width = caffe2::FLAGS_scale;
-    scaled_height = caffe2::FLAGS_scale;
+  if (c10::FLAGS_warp) {
+    scaled_width = c10::FLAGS_scale;
+    scaled_height = c10::FLAGS_scale;
  } else if (img.rows > img.cols) {
-    scaled_width = caffe2::FLAGS_scale;
-    scaled_height =
-        static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
+    scaled_width = c10::FLAGS_scale;
+    scaled_height = static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
  } else {
-    scaled_height = caffe2::FLAGS_scale;
-    scaled_width =
-        static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
+    scaled_height = c10::FLAGS_scale;
+    scaled_width = static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
  }
  cv::resize(
      img,
@ -89,9 +87,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
  std::vector<float> mean(3, 0);
  std::vector<float> std(3, 1);
  bool bgrtorgb = false;
-  assert(img.cols == caffe2::FLAGS_scale);
-  assert(img.rows == caffe2::FLAGS_scale);
-  vector<string> steps = caffe2::split(',', caffe2::FLAGS_preprocess);
+  assert(img.cols == c10::FLAGS_scale);
+  assert(img.rows == c10::FLAGS_scale);
+  vector<string> steps = caffe2::split(',', c10::FLAGS_preprocess);
  for (int i = 0; i < steps.size(); i++) {
    auto step = steps[i];
    if (step == "subtract128") {
@ -114,8 +112,8 @@ std::vector<float> convertToVector(cv::Mat& img) {
    }
  }

-  int C = caffe2::FLAGS_color ? 3 : 1;
-  int total_size = C * caffe2::FLAGS_scale * caffe2::FLAGS_scale;
+  int C = c10::FLAGS_color ? 3 : 1;
+  int total_size = C * c10::FLAGS_scale * c10::FLAGS_scale;
  std::vector<float> values(total_size);
  if (C == 1) {
    cv::MatIterator_<uchar> it, end;
@ -132,9 +130,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
    for (it = img.begin<cv::Vec3b>(), end = img.end<cv::Vec3b>(); it != end;
         ++it, i++) {
      values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
-      int offset = caffe2::FLAGS_scale * caffe2::FLAGS_scale + i;
+      int offset = c10::FLAGS_scale * c10::FLAGS_scale + i;
      values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
-      offset = caffe2::FLAGS_scale * caffe2::FLAGS_scale + offset;
+      offset = c10::FLAGS_scale * c10::FLAGS_scale + offset;
      values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
    }
  }
@ -148,7 +146,7 @@ std::vector<float> convertOneImage(std::string& filename) {
  // Load image
  cv::Mat img = cv::imread(
      filename,
-      caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
+      c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);

  cv::Mat crop = cropToSquare(img);

@ -157,17 +155,17 @@ std::vector<float> convertOneImage(std::string& filename) {
  // Assert we don't have to deal with alignment
  DCHECK(resized_img.isContinuous());
  assert(resized_img.rows == resized_img.cols);
-  assert(resized_img.rows == caffe2::FLAGS_scale);
+  assert(resized_img.rows == c10::FLAGS_scale);
  std::vector<float> one_image_values = convertToVector(resized_img);
  return one_image_values;
 }

 void convertImages() {
  vector<string> file_names;
-  if (caffe2::FLAGS_input_images != "") {
-    file_names = caffe2::split(',', caffe2::FLAGS_input_images);
-  } else if (caffe2::FLAGS_input_image_file != "") {
-    std::ifstream infile(caffe2::FLAGS_input_image_file);
+  if (c10::FLAGS_input_images != "") {
+    file_names = caffe2::split(',', c10::FLAGS_input_images);
+  } else if (c10::FLAGS_input_image_file != "") {
+    std::ifstream infile(c10::FLAGS_input_image_file);
    std::string line;
    while (std::getline(infile, line)) {
      vector<string> file_name = caffe2::split(',', line);
@ -183,7 +181,7 @@ void convertImages() {
    assert(false);
  }
  std::vector<std::vector<float>> values;
-  int C = caffe2::FLAGS_color ? 3 : 1;
+  int C = c10::FLAGS_color ? 3 : 1;
  for (int i = 0; i < file_names.size(); i++) {
    std::vector<float> one_image_values = convertOneImage(file_names[i]);
    values.push_back(one_image_values);
@ -195,19 +193,19 @@ void convertImages() {
  data->set_data_type(TensorProto::FLOAT);
  data->add_dims(values.size());
  data->add_dims(C);
-  data->add_dims(caffe2::FLAGS_scale);
-  data->add_dims(caffe2::FLAGS_scale);
+  data->add_dims(c10::FLAGS_scale);
+  data->add_dims(c10::FLAGS_scale);

  for (int i = 0; i < values.size(); i++) {
-    assert(values[i].size() == C * caffe2::FLAGS_scale * caffe2::FLAGS_scale);
+    assert(values[i].size() == C * c10::FLAGS_scale * c10::FLAGS_scale);
    for (int j = 0; j < values[i].size(); j++) {
      data->add_float_data(values[i][j]);
    }
  }
-  if (caffe2::FLAGS_text_output) {
-    caffe2::WriteProtoToTextFile(protos, caffe2::FLAGS_output_tensor);
+  if (c10::FLAGS_text_output) {
+    caffe2::WriteProtoToTextFile(protos, c10::FLAGS_output_tensor);
  } else {
-    caffe2::WriteProtoToBinaryFile(protos, caffe2::FLAGS_output_tensor);
+    caffe2::WriteProtoToBinaryFile(protos, c10::FLAGS_output_tensor);
  }
 }

--- a/binaries/db_throughput.cc
+++ b/binaries/db_throughput.cc
@ -23,13 +23,15 @@
 #include "caffe2/core/timer.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(input_db, "", "The input db.");
-CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
-CAFFE2_DEFINE_int(report_interval, 1000, "The report interval.");
-CAFFE2_DEFINE_int(repeat, 10, "The number to repeat the throughput test.");
-CAFFE2_DEFINE_bool(use_reader, false, "If true, use the reader interface.");
-CAFFE2_DEFINE_int(num_read_threads, 1,
-                   "The number of concurrent reading threads.");
+C10_DEFINE_string(input_db, "", "The input db.");
+C10_DEFINE_string(input_db_type, "", "The input db type.");
+C10_DEFINE_int(report_interval, 1000, "The report interval.");
+C10_DEFINE_int(repeat, 10, "The number to repeat the throughput test.");
+C10_DEFINE_bool(use_reader, false, "If true, use the reader interface.");
+C10_DEFINE_int(
+    num_read_threads,
+    1,
+    "The number of concurrent reading threads.");

 using caffe2::db::Cursor;
 using caffe2::db::DB;
@ -38,11 +40,11 @@ using caffe2::string;

 void TestThroughputWithDB() {
  std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
-      caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ));
+      c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ));
  std::unique_ptr<Cursor> cursor(in_db->NewCursor());
-  for (int iter_id = 0; iter_id < caffe2::FLAGS_repeat; ++iter_id) {
+  for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
    caffe2::Timer timer;
-    for (int i = 0; i < caffe2::FLAGS_report_interval; ++i) {
+    for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
      string key = cursor->key();
      string value = cursor->value();
      //VLOG(1) << "Key " << key;
@ -52,32 +54,36 @@ void TestThroughputWithDB() {
      }
    }
    double elapsed_seconds = timer.Seconds();
-    printf("Iteration %03d, took %4.5f seconds, throughput %f items/sec.\n",
-           iter_id, elapsed_seconds,
-           caffe2::FLAGS_report_interval / elapsed_seconds);
+    printf(
+        "Iteration %03d, took %4.5f seconds, throughput %f items/sec.\n",
+        iter_id,
+        elapsed_seconds,
+        c10::FLAGS_report_interval / elapsed_seconds);
  }
 }

 void TestThroughputWithReaderWorker(const DBReader* reader, int thread_id) {
  string key, value;
-  for (int iter_id = 0; iter_id < caffe2::FLAGS_repeat; ++iter_id) {
+  for (int iter_id = 0; iter_id < c10::FLAGS_repeat; ++iter_id) {
    caffe2::Timer timer;
-    for (int i = 0; i < caffe2::FLAGS_report_interval; ++i) {
+    for (int i = 0; i < c10::FLAGS_report_interval; ++i) {
      reader->Read(&key, &value);
    }
    double elapsed_seconds = timer.Seconds();
-    printf("Thread %03d iteration %03d, took %4.5f seconds, "
-           "throughput %f items/sec.\n",
-           thread_id, iter_id, elapsed_seconds,
-           caffe2::FLAGS_report_interval / elapsed_seconds);
+    printf(
+        "Thread %03d iteration %03d, took %4.5f seconds, "
+        "throughput %f items/sec.\n",
+        thread_id,
+        iter_id,
+        elapsed_seconds,
+        c10::FLAGS_report_interval / elapsed_seconds);
  }
 }

 void TestThroughputWithReader() {
-  caffe2::db::DBReader reader(
-      caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db);
+  caffe2::db::DBReader reader(c10::FLAGS_input_db_type, c10::FLAGS_input_db);
  std::vector<std::unique_ptr<std::thread>> reading_threads(
-      caffe2::FLAGS_num_read_threads);
+      c10::FLAGS_num_read_threads);
  for (int i = 0; i < reading_threads.size(); ++i) {
    reading_threads[i].reset(new std::thread(
        TestThroughputWithReaderWorker, &reader, i));
@ -89,7 +95,7 @@ void TestThroughputWithReader() {

 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
-  if (caffe2::FLAGS_use_reader) {
+  if (c10::FLAGS_use_reader) {
    TestThroughputWithReader();
  } else {
    TestThroughputWithDB();
--- a/binaries/inspect_gpu.cc
+++ b/binaries/inspect_gpu.cc
@ -19,17 +19,18 @@
 #include <sstream>
 #include <vector>

+#include "c10/util/Flags.h"
 #include "caffe2/core/common_gpu.h"
 #include "caffe2/core/init.h"
 #include "caffe2/core/logging.h"

 using std::vector;

-CAFFE2_DECLARE_int(caffe2_log_level);
+C10_DECLARE_int(caffe2_log_level);

 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
-  caffe2::SetUsageMessage(
+  c10::SetUsageMessage(
      "Inspects the GPUs on the current machine and prints out their details "
      "provided by cuda.");

--- a/binaries/make_cifar_db.cc
+++ b/binaries/make_cifar_db.cc
@ -33,14 +33,14 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(input_folder, "", "The input folder name.");
-CAFFE2_DEFINE_string(output_train_db_name,
-                     "", "The output training db name.");
-CAFFE2_DEFINE_string(output_test_db_name,
-                     "", "The output testing db name.");
-CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
-CAFFE2_DEFINE_bool(is_cifar100, false,
-            "If set, convert cifar100. Otherwise do cifar10.");
+C10_DEFINE_string(input_folder, "", "The input folder name.");
+C10_DEFINE_string(output_train_db_name, "", "The output training db name.");
+C10_DEFINE_string(output_test_db_name, "", "The output testing db name.");
+C10_DEFINE_string(db, "leveldb", "The db type.");
+C10_DEFINE_bool(
+    is_cifar100,
+    false,
+    "If set, convert cifar100. Otherwise do cifar10.");

 namespace caffe2 {

@ -57,7 +57,7 @@ const int kCIFAR100TestDataSize = 10000;

 void ReadImage(std::ifstream* file, int* label, char* buffer) {
  char label_char;
-  if (caffe2::FLAGS_is_cifar100) {
+  if (c10::FLAGS_is_cifar100) {
    // Skip the coarse label.
    file->read(&label_char, 1);
  }
@ -110,31 +110,29 @@ void WriteToDB(const string& filename, const int num_items,

 void ConvertCIFAR() {
  std::unique_ptr<db::DB> train_db(
-      db::CreateDB(caffe2::FLAGS_db, caffe2::FLAGS_output_train_db_name,
-                   db::NEW));
+      db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_train_db_name, db::NEW));
  std::unique_ptr<db::DB> test_db(
-      db::CreateDB(caffe2::FLAGS_db, caffe2::FLAGS_output_test_db_name,
-                   db::NEW));
+      db::CreateDB(c10::FLAGS_db, c10::FLAGS_output_test_db_name, db::NEW));

-  if (!caffe2::FLAGS_is_cifar100) {
+  if (!c10::FLAGS_is_cifar100) {
    // This is cifar 10.
    for (int fileid = 0; fileid < kCIFAR10TrainBatches; ++fileid) {
      stringstream train_file;
-      train_file << caffe2::FLAGS_input_folder << "/data_batch_" << fileid + 1
+      train_file << c10::FLAGS_input_folder << "/data_batch_" << fileid + 1
                 << ".bin";
      WriteToDB(train_file.str(), kCIFAR10BatchSize,
                fileid * kCIFAR10BatchSize, train_db.get());
    }
    stringstream test_file;
-    test_file << caffe2::FLAGS_input_folder << "/test_batch.bin";
+    test_file << c10::FLAGS_input_folder << "/test_batch.bin";
    WriteToDB(test_file.str(), kCIFAR10TestDataSize, 0, test_db.get());
  } else {
    // This is cifar 100.
    stringstream train_file;
-    train_file << caffe2::FLAGS_input_folder << "/train.bin";
+    train_file << c10::FLAGS_input_folder << "/train.bin";
    WriteToDB(train_file.str(), kCIFAR100TrainDataSize, 0, train_db.get());
    stringstream test_file;
-    test_file << caffe2::FLAGS_input_folder << "/test.bin";
+    test_file << c10::FLAGS_input_folder << "/test.bin";
    WriteToDB(test_file.str(), kCIFAR100TestDataSize, 0, test_db.get());
  }
 }
--- a/binaries/make_image_db.cc
+++ b/binaries/make_image_db.cc
@ -16,9 +16,9 @@

 // This script converts an image dataset to a database.
 //
-// caffe2::FLAGS_input_folder is the root folder that holds all the images
+// c10::FLAGS_input_folder is the root folder that holds all the images
 //
-// caffe2::FLAGS_list_file is the path to a file containing a list of files
+// c10::FLAGS_list_file is the path to a file containing a list of files
 // and their labels, as follows:
 //
 //   subfolder1/file1.JPEG 7
@ -42,24 +42,28 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_bool(shuffle, false,
+C10_DEFINE_bool(
+    shuffle,
+    false,
    "Randomly shuffle the order of images and their labels");
-CAFFE2_DEFINE_string(input_folder, "", "The input image file name.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(input_folder, "", "The input image file name.");
+C10_DEFINE_string(
    list_file,
    "",
    "The text file containing the list of images.");
-CAFFE2_DEFINE_string(output_db_name, "", "The output training leveldb name.");
-CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
-CAFFE2_DEFINE_bool(raw, false,
+C10_DEFINE_string(output_db_name, "", "The output training leveldb name.");
+C10_DEFINE_string(db, "leveldb", "The db type.");
+C10_DEFINE_bool(
+    raw,
+    false,
    "If set, we pre-read the images and store the raw buffer.");
-CAFFE2_DEFINE_bool(color, true, "If set, load images in color.");
-CAFFE2_DEFINE_int(
+C10_DEFINE_bool(color, true, "If set, load images in color.");
+C10_DEFINE_int(
    scale,
    256,
-    "If caffe2::FLAGS_raw is set, scale the shorter edge to the given value.");
-CAFFE2_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
-CAFFE2_DEFINE_int(
+    "If c10::FLAGS_raw is set, scale the shorter edge to the given value.");
+C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
+C10_DEFINE_int(
    num_threads,
    -1,
    "Number of image parsing and conversion threads.");
@ -71,11 +75,11 @@ class Converter {
  explicit Converter() {
    data_ = protos_.add_protos();
    label_ = protos_.add_protos();
-    if (caffe2::FLAGS_raw) {
+    if (c10::FLAGS_raw) {
      data_->set_data_type(TensorProto::BYTE);
      data_->add_dims(0);
      data_->add_dims(0);
-      if (caffe2::FLAGS_color) {
+      if (c10::FLAGS_color) {
        data_->add_dims(3);
      }
    } else {
@ -115,7 +119,7 @@ class Converter {
  }

  void run() {
-    const auto& input_folder = caffe2::FLAGS_input_folder;
+    const auto& input_folder = c10::FLAGS_input_folder;
    std::unique_lock<std::mutex> lock(mutex_);
    std::string value;
    while (!in_.empty()) {
@ -126,7 +130,7 @@ class Converter {
      label_->set_int32_data(0, pair.second);

      // Add raw file contents to DB if !raw
-      if (!caffe2::FLAGS_raw) {
+      if (!c10::FLAGS_raw) {
        std::ifstream image_file_stream(input_folder + pair.first);
        if (!image_file_stream) {
          LOG(ERROR) << "Cannot open " << input_folder << pair.first
@ -140,23 +144,22 @@ class Converter {
        // Load image
        cv::Mat img = cv::imread(
            input_folder + pair.first,
-            caffe2::FLAGS_color ? CV_LOAD_IMAGE_COLOR
-                                : CV_LOAD_IMAGE_GRAYSCALE);
+            c10::FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);

        // Resize image
        cv::Mat resized_img;
        int scaled_width, scaled_height;
-        if (caffe2::FLAGS_warp) {
-          scaled_width = caffe2::FLAGS_scale;
-          scaled_height = caffe2::FLAGS_scale;
+        if (c10::FLAGS_warp) {
+          scaled_width = c10::FLAGS_scale;
+          scaled_height = c10::FLAGS_scale;
        } else if (img.rows > img.cols) {
-          scaled_width = caffe2::FLAGS_scale;
+          scaled_width = c10::FLAGS_scale;
          scaled_height =
-              static_cast<float>(img.rows) * caffe2::FLAGS_scale / img.cols;
+              static_cast<float>(img.rows) * c10::FLAGS_scale / img.cols;
        } else {
-          scaled_height = caffe2::FLAGS_scale;
+          scaled_height = c10::FLAGS_scale;
          scaled_width =
-              static_cast<float>(img.cols) * caffe2::FLAGS_scale / img.rows;
+              static_cast<float>(img.cols) * c10::FLAGS_scale / img.rows;
        }
        cv::resize(
            img,
@ -211,12 +214,12 @@ void ConvertImageDataset(
    lines.push_back(std::make_pair(filename, file_label));
  }

-  if (caffe2::FLAGS_shuffle) {
+  if (c10::FLAGS_shuffle) {
    LOG(INFO) << "Shuffling data";
    std::shuffle(lines.begin(), lines.end(), std::default_random_engine(1701));
  }

-  auto num_threads = caffe2::FLAGS_num_threads;
+  auto num_threads = c10::FLAGS_num_threads;
  if (num_threads < 1) {
    num_threads = std::thread::hardware_concurrency();
  }
@ -224,7 +227,7 @@ void ConvertImageDataset(
  LOG(INFO) << "Processing " << lines.size() << " images...";
  LOG(INFO) << "Opening DB " << output_db_name;

-  auto db = db::CreateDB(caffe2::FLAGS_db, output_db_name, db::NEW);
+  auto db = db::CreateDB(c10::FLAGS_db, output_db_name, db::NEW);
  auto transaction = db->NewTransaction();

  LOG(INFO) << "Using " << num_threads << " processing threads...";
@ -274,7 +277,9 @@ void ConvertImageDataset(
 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
  caffe2::ConvertImageDataset(
-      caffe2::FLAGS_input_folder, caffe2::FLAGS_list_file,
-      caffe2::FLAGS_output_db_name, caffe2::FLAGS_shuffle);
+      c10::FLAGS_input_folder,
+      c10::FLAGS_list_file,
+      c10::FLAGS_output_db_name,
+      c10::FLAGS_shuffle);
  return 0;
 }
--- a/binaries/make_mnist_db.cc
+++ b/binaries/make_mnist_db.cc
@ -27,15 +27,19 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(image_file, "", "The input image file name.");
-CAFFE2_DEFINE_string(label_file, "", "The label file name.");
-CAFFE2_DEFINE_string(output_file, "", "The output db name.");
-CAFFE2_DEFINE_string(db, "leveldb", "The db type.");
-CAFFE2_DEFINE_int(data_limit, -1,
-             "If set, only output this number of data points.");
-CAFFE2_DEFINE_bool(channel_first, false,
-            "If set, write the data as channel-first (CHW order) as the old "
-            "Caffe does.");
+C10_DEFINE_string(image_file, "", "The input image file name.");
+C10_DEFINE_string(label_file, "", "The label file name.");
+C10_DEFINE_string(output_file, "", "The output db name.");
+C10_DEFINE_string(db, "leveldb", "The db type.");
+C10_DEFINE_int(
+    data_limit,
+    -1,
+    "If set, only output this number of data points.");
+C10_DEFINE_bool(
+    channel_first,
+    false,
+    "If set, write the data as channel-first (CHW order) as the old "
+    "Caffe does.");

 namespace caffe2 {
 uint32_t swap_endian(uint32_t val) {
@ -79,7 +83,8 @@ void convert_dataset(const char* image_filename, const char* label_filename,
  cols = swap_endian(cols);

  // leveldb
-  std::unique_ptr<db::DB> mnist_db(db::CreateDB(caffe2::FLAGS_db, db_path, db::NEW));
+  std::unique_ptr<db::DB> mnist_db(
+      db::CreateDB(c10::FLAGS_db, db_path, db::NEW));
  std::unique_ptr<db::Transaction> transaction(mnist_db->NewTransaction());
  // Storing to db
  char label_value;
@ -93,7 +98,7 @@ void convert_dataset(const char* image_filename, const char* label_filename,
  TensorProto* data = protos.add_protos();
  TensorProto* label = protos.add_protos();
  data->set_data_type(TensorProto::BYTE);
-  if (caffe2::FLAGS_channel_first) {
+  if (c10::FLAGS_channel_first) {
    data->add_dims(1);
    data->add_dims(rows);
    data->add_dims(cols);
@ -133,7 +138,10 @@ void convert_dataset(const char* image_filename, const char* label_filename,

 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
-  caffe2::convert_dataset(caffe2::FLAGS_image_file.c_str(), caffe2::FLAGS_label_file.c_str(),
-                          caffe2::FLAGS_output_file.c_str(), caffe2::FLAGS_data_limit);
+  caffe2::convert_dataset(
+      c10::FLAGS_image_file.c_str(),
+      c10::FLAGS_label_file.c_str(),
+      c10::FLAGS_output_file.c_str(),
+      c10::FLAGS_data_limit);
  return 0;
 }
--- a/binaries/predictor_verifier.cc
+++ b/binaries/predictor_verifier.cc
@ -19,8 +19,8 @@
 #include "caffe2/predictor/predictor.h"
 #include "caffe2/utils/proto_utils.h"

-CAFFE2_DEFINE_string(init_net, "", "The given path to the init protobuffer.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(init_net, "", "The given path to the init protobuffer.");
+C10_DEFINE_string(
    predict_net,
    "",
    "The given path to the predict protobuffer.");
@ -28,15 +28,15 @@ CAFFE2_DEFINE_string(
 namespace caffe2 {

 void run() {
-  if (FLAGS_init_net.empty()) {
+  if (c10::FLAGS_init_net.empty()) {
    LOG(FATAL) << "No init net specified. Use --init_net=/path/to/net.";
  }
-  if (FLAGS_predict_net.empty()) {
+  if (c10::FLAGS_predict_net.empty()) {
    LOG(FATAL) << "No predict net specified. Use --predict_net=/path/to/net.";
  }
  caffe2::NetDef init_net, predict_net;
-  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net));
-  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_predict_net, &predict_net));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &init_net));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_predict_net, &predict_net));
  // Can be large due to constant fills
  VLOG(1) << "Init net: " << ProtoDebugString(init_net);
  LOG(INFO) << "Predict net: " << ProtoDebugString(predict_net);
--- a/binaries/print_registered_core_operators.cc
+++ b/binaries/print_registered_core_operators.cc
@ -21,8 +21,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/core/operator_schema.h"

-CAFFE2_DEFINE_string(schema, "",
-                     "Print doc and schema of a particular operator");
+C10_DEFINE_string(schema, "", "Print doc and schema of a particular operator");

 static bool HasSchema(const std::string& str) {
  return caffe2::OpSchemaRegistry::Schema(str);
@ -36,15 +35,14 @@ static bool HasDoc(const std::string& str) {
 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);

-  if (!caffe2::FLAGS_schema.empty()) {
-    const auto* schema = caffe2::OpSchemaRegistry::Schema(
-        caffe2::FLAGS_schema);
+  if (!c10::FLAGS_schema.empty()) {
+    const auto* schema = caffe2::OpSchemaRegistry::Schema(c10::FLAGS_schema);
    if (!schema) {
-      std::cerr << "Operator " << caffe2::FLAGS_schema
-                << " doesn't have a schema" << std::endl;
+      std::cerr << "Operator " << c10::FLAGS_schema << " doesn't have a schema"
+                << std::endl;
      return 1;
    }
-    std::cout << "Operator " << caffe2::FLAGS_schema << ": " << std::endl
+    std::cout << "Operator " << c10::FLAGS_schema << ": " << std::endl
              << *schema;
    return 0;
  }
--- a/binaries/run_plan.cc
+++ b/binaries/run_plan.cc
@ -20,17 +20,17 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
+C10_DEFINE_string(plan, "", "The given path to the plan protobuffer.");

 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
-  if (caffe2::FLAGS_plan.size() == 0) {
+  if (c10::FLAGS_plan.size() == 0) {
    LOG(ERROR) << "No plan specified. Use --plan=/path/to/plan.";
    return 0;
  }
-  LOG(INFO) << "Loading plan: " << caffe2::FLAGS_plan;
+  LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
  caffe2::PlanDef plan_def;
-  CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_plan, &plan_def));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
  std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
  workspace->RunPlan(plan_def);

--- a/binaries/run_plan_mpi.cc
+++ b/binaries/run_plan_mpi.cc
@ -16,16 +16,17 @@

 #include <mpi.h>

+#include "c10/util/Flags.h"
 #include "caffe2/core/init.h"
+#include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/utils/proto_utils.h"
-#include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(plan, "", "The given path to the plan protobuffer.");
+C10_DEFINE_string(plan, "", "The given path to the plan protobuffer.");

 int main(int argc, char** argv) {
-  caffe2::SetUsageMessage("Runs a caffe2 plan that has MPI operators in it.");
+  c10::SetUsageMessage("Runs a caffe2 plan that has MPI operators in it.");
  int mpi_ret;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_ret);
  if (mpi_ret != MPI_THREAD_MULTIPLE &&
@ -35,9 +36,9 @@ int main(int argc, char** argv) {
    return 1;
  }
  caffe2::GlobalInit(&argc, &argv);
-  LOG(INFO) << "Loading plan: " << caffe2::FLAGS_plan;
+  LOG(INFO) << "Loading plan: " << c10::FLAGS_plan;
  caffe2::PlanDef plan_def;
-  CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_plan, &plan_def));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_plan, &plan_def));
  std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
  workspace->RunPlan(plan_def);

--- a/binaries/speed_benchmark.cc
+++ b/binaries/speed_benchmark.cc
@ -28,24 +28,21 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_string(net, "", "The given net to benchmark.");
-CAFFE2_DEFINE_string(
-    init_net,
-    "",
-    "The given net to initialize any parameters.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(net, "", "The given net to benchmark.");
+C10_DEFINE_string(init_net, "", "The given net to initialize any parameters.");
+C10_DEFINE_string(
    input,
    "",
    "Input that is needed for running the network. If "
    "multiple input needed, use comma separated string.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    input_file,
    "",
    "Input file that contain the serialized protobuf for "
    "the input blobs. If multiple input needed, use comma "
    "separated string. Must have the same number of items "
    "as input does.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    input_dims,
    "",
    "Alternate to input_files, if all inputs are simple "
@ -53,31 +50,31 @@ CAFFE2_DEFINE_string(
    "separated numbers. If multiple input needed, use "
    "semicolon to separate the dimension of different "
    "tensors.");
-CAFFE2_DEFINE_string(input_type, "", "Input type (uint8_t/float)");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(input_type, "", "Input type (uint8_t/float)");
+C10_DEFINE_string(
    output,
    "",
    "Output that should be dumped after the execution "
    "finishes. If multiple outputs are needed, use comma "
    "separated string. If you want to dump everything, pass "
    "'*' as the output value.");
-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    output_folder,
    "",
    "The folder that the output should be written to. This "
    "folder must already exist in the file system.");
-CAFFE2_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
-CAFFE2_DEFINE_int(iter, 10, "The number of iterations to run.");
-CAFFE2_DEFINE_int(opt, 0, "The level of optimization to run automatically.");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
+C10_DEFINE_int(iter, 10, "The number of iterations to run.");
+C10_DEFINE_int(opt, 0, "The level of optimization to run automatically.");
+C10_DEFINE_bool(
    run_individual,
    false,
    "Whether to benchmark individual operators.");

-CAFFE2_DEFINE_bool(force_engine, false, "Force engine field for all operators");
-CAFFE2_DEFINE_string(engine, "", "Forced engine field value");
-CAFFE2_DEFINE_bool(force_algo, false, "Force algo arg for all operators");
-CAFFE2_DEFINE_string(algo, "", "Forced algo arg value");
+C10_DEFINE_bool(force_engine, false, "Force engine field for all operators");
+C10_DEFINE_string(engine, "", "Forced engine field value");
+C10_DEFINE_bool(force_algo, false, "Force algo arg for all operators");
+C10_DEFINE_string(algo, "", "Forced algo arg value");

 using std::string;
 using std::unique_ptr;
@ -89,14 +86,14 @@ int main(int argc, char** argv) {

  // Run initialization network.
  caffe2::NetDef net_def;
-  CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_init_net, &net_def));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_init_net, &net_def));
  CAFFE_ENFORCE(workspace->RunNetOnce(net_def));

  // Load input.
-  if (caffe2::FLAGS_input.size()) {
-    vector<string> input_names = caffe2::split(',', caffe2::FLAGS_input);
-    if (caffe2::FLAGS_input_file.size()) {
-      vector<string> input_files = caffe2::split(',', caffe2::FLAGS_input_file);
+  if (c10::FLAGS_input.size()) {
+    vector<string> input_names = caffe2::split(',', c10::FLAGS_input);
+    if (c10::FLAGS_input_file.size()) {
+      vector<string> input_files = caffe2::split(',', c10::FLAGS_input_file);
      CAFFE_ENFORCE_EQ(
          input_names.size(),
          input_files.size(),
@ -106,24 +103,24 @@ int main(int argc, char** argv) {
        CAFFE_ENFORCE(caffe2::ReadProtoFromFile(input_files[i], &blob_proto));
        DeserializeBlob(blob_proto, workspace->CreateBlob(input_names[i]));
      }
-    } else if (caffe2::FLAGS_input_dims.size() || caffe2::FLAGS_input_type.size()) {
+    } else if (c10::FLAGS_input_dims.size() || c10::FLAGS_input_type.size()) {
      CAFFE_ENFORCE_GE(
-          caffe2::FLAGS_input_dims.size(),
+          c10::FLAGS_input_dims.size(),
          0,
          "Input dims must be specified when input tensors are used.");
      CAFFE_ENFORCE_GE(
-          caffe2::FLAGS_input_type.size(),
+          c10::FLAGS_input_type.size(),
          0,
          "Input type must be specified when input tensors are used.");

      vector<string> input_dims_list =
-          caffe2::split(';', caffe2::FLAGS_input_dims);
+          caffe2::split(';', c10::FLAGS_input_dims);
      CAFFE_ENFORCE_EQ(
          input_names.size(),
          input_dims_list.size(),
          "Input name and dims should have the same number of items.");
      vector<string> input_type_list =
-          caffe2::split(';', caffe2::FLAGS_input_type);
+          caffe2::split(';', c10::FLAGS_input_type);
      CAFFE_ENFORCE_EQ(
          input_names.size(),
          input_type_list.size(),
@ -161,28 +158,28 @@ int main(int argc, char** argv) {
  }

  // Run main network.
-  CAFFE_ENFORCE(ReadProtoFromFile(caffe2::FLAGS_net, &net_def));
+  CAFFE_ENFORCE(ReadProtoFromFile(c10::FLAGS_net, &net_def));
  if (!net_def.has_name()) {
    net_def.set_name("benchmark");
  }
  // force changing engine and algo
-  if (caffe2::FLAGS_force_engine) {
-    LOG(INFO) << "force engine be: " << caffe2::FLAGS_engine;
+  if (c10::FLAGS_force_engine) {
+    LOG(INFO) << "force engine be: " << c10::FLAGS_engine;
    for (const auto& op : net_def.op()) {
-      const_cast<caffe2::OperatorDef*>(&op)->set_engine(caffe2::FLAGS_engine);
+      const_cast<caffe2::OperatorDef*>(&op)->set_engine(c10::FLAGS_engine);
    }
  }
-  if (caffe2::FLAGS_force_algo) {
-    LOG(INFO) << "force algo be: " << caffe2::FLAGS_algo;
+  if (c10::FLAGS_force_algo) {
+    LOG(INFO) << "force algo be: " << c10::FLAGS_algo;
    for (const auto& op : net_def.op()) {
      caffe2::GetMutableArgument(
          "algo", true, const_cast<caffe2::OperatorDef*>(&op))
-          ->set_s(caffe2::FLAGS_algo);
+          ->set_s(c10::FLAGS_algo);
    }
  }
-  if (caffe2::FLAGS_opt) {
+  if (c10::FLAGS_opt) {
 #ifdef CAFFE2_OPTIMIZER
-    net_def = caffe2::opt::optimize(net_def, workspace.get(), caffe2::FLAGS_opt);
+    net_def = caffe2::opt::optimize(net_def, workspace.get(), c10::FLAGS_opt);
 #else
    LOG(WARNING) << "Caffe2 not compiled with optimization passes.";
 #endif
@ -192,14 +189,13 @@ int main(int argc, char** argv) {
  CHECK_NOTNULL(net);
  CAFFE_ENFORCE(net->Run());
  net->TEST_Benchmark(
-      caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
+      c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);

-  string output_prefix = caffe2::FLAGS_output_folder.size()
-      ? caffe2::FLAGS_output_folder + "/"
-      : "";
-  if (caffe2::FLAGS_output.size()) {
-    vector<string> output_names = caffe2::split(',', caffe2::FLAGS_output);
-    if (caffe2::FLAGS_output == "*") {
+  string output_prefix =
+      c10::FLAGS_output_folder.size() ? c10::FLAGS_output_folder + "/" : "";
+  if (c10::FLAGS_output.size()) {
+    vector<string> output_names = caffe2::split(',', c10::FLAGS_output);
+    if (c10::FLAGS_output == "*") {
      output_names = workspace->Blobs();
    }
    for (const string& name : output_names) {
--- a/binaries/split_db.cc
+++ b/binaries/split_db.cc
@ -22,33 +22,41 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_string(input_db, "", "The input db.");
-CAFFE2_DEFINE_int(splits, 0, "The number of splits.");
-CAFFE2_DEFINE_string(db_type, "", "The db type.");
-CAFFE2_DEFINE_int(batch_size, 1000, "The write batch size.");
+C10_DEFINE_string(input_db, "", "The input db.");
+C10_DEFINE_int(splits, 0, "The number of splits.");
+C10_DEFINE_string(db_type, "", "The db type.");
+C10_DEFINE_int(batch_size, 1000, "The write batch size.");

 namespace caffe2 {

 static int Split(int argc, char** argv) {
  GlobalInit(&argc, &argv);

-  CAFFE_ENFORCE(FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
-  CAFFE_ENFORCE(FLAGS_splits > 0, "Must specify a nonnegative split number.");
-  CAFFE_ENFORCE(FLAGS_db_type.size(), "Must specify --db_type=[a db type].");
+  CAFFE_ENFORCE(
+      c10::FLAGS_input_db.size(), "Must specify --input_db=/path/to/db.");
+  CAFFE_ENFORCE(
+      c10::FLAGS_splits > 0, "Must specify a nonnegative split number.");
+  CAFFE_ENFORCE(
+      c10::FLAGS_db_type.size(), "Must specify --db_type=[a db type].");

  unique_ptr<db::DB> in_db(
-      db::CreateDB(FLAGS_db_type, FLAGS_input_db, db::READ));
-  CAFFE_ENFORCE(in_db != nullptr, "Cannot open input db: ", FLAGS_input_db);
+      db::CreateDB(c10::FLAGS_db_type, c10::FLAGS_input_db, db::READ));
+  CAFFE_ENFORCE(
+      in_db != nullptr, "Cannot open input db: ", c10::FLAGS_input_db);
  unique_ptr<db::Cursor> cursor(in_db->NewCursor());
  // This usually won't happen, but FWIW.
  CAFFE_ENFORCE(
-      cursor != nullptr, "Cannot obtain cursor for input db: ", FLAGS_input_db);
+      cursor != nullptr,
+      "Cannot obtain cursor for input db: ",
+      c10::FLAGS_input_db);

  vector<unique_ptr<db::DB>> out_dbs;
  vector<unique_ptr<db::Transaction>> transactions;
-  for (int i = 0; i < FLAGS_splits; ++i) {
+  for (int i = 0; i < c10::FLAGS_splits; ++i) {
    out_dbs.push_back(unique_ptr<db::DB>(db::CreateDB(
-        FLAGS_db_type, FLAGS_input_db + "_split_" + to_string(i), db::NEW)));
+        c10::FLAGS_db_type,
+        c10::FLAGS_input_db + "_split_" + to_string(i),
+        db::NEW)));
    CAFFE_ENFORCE(out_dbs.back().get(), "Cannot create output db #", i);
    transactions.push_back(
        unique_ptr<db::Transaction>(out_dbs[i]->NewTransaction()));
@ -58,9 +66,10 @@ static int Split(int argc, char** argv) {

  int count = 0;
  for (; cursor->Valid(); cursor->Next()) {
-    transactions[count % FLAGS_splits]->Put(cursor->key(), cursor->value());
-    if (++count % FLAGS_batch_size == 0) {
-      for (int i = 0; i < FLAGS_splits; ++i) {
+    transactions[count % c10::FLAGS_splits]->Put(
+        cursor->key(), cursor->value());
+    if (++count % c10::FLAGS_batch_size == 0) {
+      for (int i = 0; i < c10::FLAGS_splits; ++i) {
        transactions[i]->Commit();
      }
      LOG(INFO) << "Split " << count << " items so far.";
--- a/binaries/tsv_2_proto.cc
+++ b/binaries/tsv_2_proto.cc
@ -24,13 +24,13 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/utils/proto_utils.h"

-CAFFE2_DEFINE_string(f_in, "", "The input data file name.");
-CAFFE2_DEFINE_string(f_out, "", "The output data file name.");
+C10_DEFINE_string(f_in, "", "The input data file name.");
+C10_DEFINE_string(f_out, "", "The output data file name.");

 int main(int argc, char** argv) {
  caffe2::GlobalInit(&argc, &argv);
-  std::ifstream f_in(caffe2::FLAGS_f_in);
-  std::ofstream f_out(caffe2::FLAGS_f_out);
+  std::ifstream f_in(c10::FLAGS_f_in);
+  std::ofstream f_out(c10::FLAGS_f_out);
  std::string line;
  caffe2::TensorProtos tensor_protos;
  while (std::getline(f_in, line)) {
--- a/binaries/zmq_feeder.cc
+++ b/binaries/zmq_feeder.cc
@ -23,9 +23,9 @@
 #include "caffe2/core/logging.h"
 #include "caffe2/utils/zmq_helper.h"

-CAFFE2_DEFINE_string(server, "tcp://*:5555", "The server address.");
-CAFFE2_DEFINE_string(input_db, "", "The input db.");
-CAFFE2_DEFINE_string(input_db_type, "", "The input db type.");
+C10_DEFINE_string(server, "tcp://*:5555", "The server address.");
+C10_DEFINE_string(input_db, "", "The input db.");
+C10_DEFINE_string(input_db_type, "", "The input db type.");

 using caffe2::db::DB;
 using caffe2::db::Cursor;
@ -36,11 +36,11 @@ int main(int argc, char** argv) {

  LOG(INFO) << "Opening DB...";
  auto in_db = caffe2::db::CreateDB(
-      caffe2::FLAGS_input_db_type, caffe2::FLAGS_input_db, caffe2::db::READ);
+      c10::FLAGS_input_db_type, c10::FLAGS_input_db, caffe2::db::READ);
  CAFFE_ENFORCE(
      in_db,
-      "Cannot load input db " + caffe2::FLAGS_input_db + " of expected type " +
-          caffe2::FLAGS_input_db_type);
+      "Cannot load input db " + c10::FLAGS_input_db + " of expected type " +
+          c10::FLAGS_input_db_type);
  auto cursor = in_db->NewCursor();
  LOG(INFO) << "DB opened.";

@ -48,8 +48,8 @@ int main(int argc, char** argv) {

  //  Socket to talk to clients
  caffe2::ZmqSocket sender(ZMQ_PUSH);
-  sender.Bind(caffe2::FLAGS_server);
-  LOG(INFO) << "Server created at " << caffe2::FLAGS_server;
+  sender.Bind(c10::FLAGS_server);
+  LOG(INFO) << "Server created at " << c10::FLAGS_server;

  while (1) {
    VLOG(1) << "Sending " << cursor->key();
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@ -7,6 +7,8 @@
 # one to link against a specific protobuf version.

 # ---[ Configure macro file.
+set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
+set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
 set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
 configure_file(
    ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
@ -28,6 +30,15 @@ if (${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
  target_compile_options(c10 PRIVATE "-fvisibility=hidden")
 endif()

+# ---[ Dependency of c10
+if (${USE_GFLAGS})
+   target_link_libraries(c10 PUBLIC gflags)
+endif()
+
+if (${USE_GLOG})
+    target_link_libraries(c10 PUBLIC glog::glog)
+endif()
+
 target_include_directories(
    c10 PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
--- a/c10/macros/cmake_macros.h.in
+++ b/c10/macros/cmake_macros.h.in
@ -5,5 +5,7 @@
 // Do not include this file directly. Instead, include c10/macros/Macros.h.

 #cmakedefine C10_BUILD_SHARED_LIBS
+#cmakedefine C10_USE_GLOG
+#cmakedefine C10_USE_GFLAGS

 #endif // C10_MACROS_CMAKE_MACROS_H_
--- a/c10/test/flags_test.cpp
+++ b/c10/test/flags_test.cpp
@ -0,0 +1,29 @@
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "c10/util/Flags.h"
+
+C10_DEFINE_bool(c10_flags_test_only_flag, true, "Only used in test.");
+
+namespace c10 {
+
+TEST(FlagsTest, TestGflagsCorrectness) {
+#ifdef C10_USE_GFLAGS
+  EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
+  EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
+  // Change the c10 namespace and check global
+  FLAGS_c10_flags_test_only_flag = false;
+  EXPECT_EQ(FLAGS_c10_flags_test_only_flag, false);
+  EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, false);
+  // Change global and check c10 namespace
+  ::FLAGS_c10_flags_test_only_flag = true;
+  EXPECT_EQ(FLAGS_c10_flags_test_only_flag, true);
+  EXPECT_EQ(::FLAGS_c10_flags_test_only_flag, true);
+#else // C10_USE_GFLAGS
+  std::cout << "Caffe2 is not built with gflags. Nothing to test here."
+            << std::endl;
+#endif
+}
+
+} // namespace c10
--- a/c10/util/Flags.h
+++ b/c10/util/Flags.h
@ -0,0 +1,212 @@
+#pragma once
+
+/* Commandline flags support for C10.
+ *
+ * This is a portable commandline flags tool for c10, so we can optionally
+ * choose to use gflags or a lightweighted custom implementation if gflags is
+ * not possible on a certain platform. If you have gflags installed, set the
+ * macro C10_USE_GFLAGS will seamlessly route everything to gflags.
+ *
+ * To define a flag foo of type bool default to true, do the following in the
+ * *global* namespace:
+ *     C10_DEFINE_bool(foo, true, "An example.");
+ *
+ * To use it in another .cc file, you can use C10_DECLARE_* as follows:
+ *     C10_DECLARE_bool(foo);
+ *
+ * In both cases, you can then access the flag via c10::FLAGS_foo.
+ *
+ * It is recommended that you build with gflags. To learn more about the flags
+ * usage, refer to the gflags page here:
+ *
+ * https://gflags.github.io/gflags/
+ *
+ * Note about Python users / devs: gflags is initiated from a C++ function
+ * ParseCommandLineFlags, and is usually done in native binaries in the main
+ * function. As Python does not have a modifiable main function, it is usually
+ * difficult to change the flags after Python starts. Hence, it is recommended
+ * that one sets the default value of the flags to one that's acceptable in
+ * general - that will allow Python to run without wrong flags.
+ */
+
+#include <string>
+
+#include "c10/macros/Macros.h"
+#include "c10/util/Registry.h"
+
+namespace c10 {
+/**
+ * Sets the usage message when a commandline tool is called with "--help".
+ */
+C10_API void SetUsageMessage(const std::string& str);
+
+/**
+ * Returns the usage message for the commandline tool set by SetUsageMessage.
+ */
+C10_API const char* UsageMessage();
+
+/**
+ * Parses the commandline flags.
+ *
+ * This command parses all the commandline arguments passed in via pargc
+ * and argv. Once it is finished, partc and argv will contain the remaining
+ * commandline args that c10 does not deal with. Note that following
+ * convention, argv[0] contains the binary name and is not parsed.
+ */
+C10_API bool ParseCommandLineFlags(int* pargc, char*** pargv);
+
+/**
+ * Checks if the commandline flags has already been passed.
+ */
+C10_API bool CommandLineFlagsHasBeenParsed();
+
+} // namespace c10
+
+////////////////////////////////////////////////////////////////////////////////
+// Below are gflags and non-gflags specific implementations.
+// In general, they define the following macros for one to declare (use
+// C10_DECLARE) or define (use C10_DEFINE) flags:
+// C10_{DECLARE,DEFINE}_{int,int64,double,bool,string}
+////////////////////////////////////////////////////////////////////////////////
+
+#ifdef C10_USE_GFLAGS
+
+////////////////////////////////////////////////////////////////////////////////
+// Begin gflags section: most functions are basically rerouted to gflags.
+////////////////////////////////////////////////////////////////////////////////
+
+#include <gflags/gflags.h>
+
+// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags.
+// Using GFLAGS_GFLAGS_H_ to capture this change.
+#ifndef GFLAGS_GFLAGS_H_
+namespace gflags = google;
+#endif // GFLAGS_GFLAGS_H_
+
+// Motivation about the gflags wrapper:
+// (1) We would need to make sure that the gflags version and the non-gflags
+// version of C10 are going to expose the same flags abstraction. One should
+// explicitly use c10::FLAGS_flag_name to access the flags.
+// (2) For flag names, it is recommended to start with c10_ to distinguish it
+// from regular gflags flags. For example, do
+//    C10_DEFINE_BOOL(c10_my_flag, true, "An example");
+// to allow one to use c10::FLAGS_c10_my_flag.
+// (3) Gflags has a design issue that does not properly expose the global flags,
+// if one builds the library with -fvisibility=hidden. The current gflags (as of
+// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
+// counterparts. As a result, we will explciitly use C10_EXPORT to export the
+// flags defined in C10. This is done via a global reference, so the flag
+// itself is not duplicated - under the hood it is the same global gflags flag.
+#define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \
+  DEFINE_##type(name, default_value, help_str);                                \
+  namespace c10 {                                                              \
+  C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name;                         \
+  }
+
+#define C10_DEFINE_int(name, default_value, help_str) \
+  C10_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
+#define C10_DEFINE_int64(name, default_value, help_str) \
+  C10_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
+#define C10_DEFINE_double(name, default_value, help_str) \
+  C10_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str)
+#define C10_DEFINE_bool(name, default_value, help_str) \
+  C10_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str)
+#define C10_DEFINE_string(name, default_value, help_str) \
+  C10_GFLAGS_DEF_WRAPPER(string, ::fLS::clstring, name, default_value, help_str)
+
+// DECLARE_typed_var should be used in header files and in the global namespace.
+#define C10_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \
+  DECLARE_##type(name);                                   \
+  namespace c10 {                                         \
+  C10_IMPORT extern real_type& FLAGS_##name;              \
+  } // namespace c10
+
+#define C10_DECLARE_int(name) \
+  C10_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name)
+#define C10_DECLARE_int64(name) \
+  C10_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name)
+#define C10_DECLARE_double(name) \
+  C10_GFLAGS_DECLARE_WRAPPER(double, double, name)
+#define C10_DECLARE_bool(name) C10_GFLAGS_DECLARE_WRAPPER(bool, bool, name)
+#define C10_DECLARE_string(name) \
+  C10_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name)
+
+////////////////////////////////////////////////////////////////////////////////
+// End gflags section.
+////////////////////////////////////////////////////////////////////////////////
+
+#else // C10_USE_GFLAGS
+
+////////////////////////////////////////////////////////////////////////////////
+// Begin non-gflags section: providing equivalent functionality.
+////////////////////////////////////////////////////////////////////////////////
+
+namespace c10 {
+
+class C10_API C10FlagParser {
+ public:
+  C10FlagParser() {}
+  bool success() {
+    return success_;
+  }
+
+ protected:
+  template <typename T>
+  bool Parse(const std::string& content, T* value);
+  bool success_;
+};
+
+C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&);
+
+} // namespace c10
+
+// The macros are defined outside the c10 namespace. In your code, you should
+// write the C10_DEFINE_* and C10_DECLARE_* macros outside any namespace
+// as well.
+
+#define C10_DEFINE_typed_var(type, name, default_value, help_str)       \
+  namespace c10 {                                                       \
+  C10_EXPORT type FLAGS_##name = default_value;                         \
+  namespace {                                                           \
+  class C10FlagParser_##name : public C10FlagParser {                   \
+   public:                                                              \
+    explicit C10FlagParser_##name(const std::string& content) {         \
+      success_ = C10FlagParser::Parse<type>(content, &FLAGS_##name);    \
+    }                                                                   \
+  };                                                                    \
+  }                                                                     \
+  RegistererC10FlagsRegistry g_C10FlagsRegistry_##name(                 \
+      #name,                                                            \
+      C10FlagsRegistry(),                                               \
+      RegistererC10FlagsRegistry::DefaultCreator<C10FlagParser_##name>, \
+      "(" #type ", default " #default_value ") " help_str);             \
+  }
+
+#define C10_DEFINE_int(name, default_value, help_str) \
+  C10_DEFINE_typed_var(int, name, default_value, help_str)
+#define C10_DEFINE_int64(name, default_value, help_str) \
+  C10_DEFINE_typed_var(int64_t, name, default_value, help_str)
+#define C10_DEFINE_double(name, default_value, help_str) \
+  C10_DEFINE_typed_var(double, name, default_value, help_str)
+#define C10_DEFINE_bool(name, default_value, help_str) \
+  C10_DEFINE_typed_var(bool, name, default_value, help_str)
+#define C10_DEFINE_string(name, default_value, help_str) \
+  C10_DEFINE_typed_var(std::string, name, default_value, help_str)
+
+// DECLARE_typed_var should be used in header files and in the global namespace.
+#define C10_DECLARE_typed_var(type, name) \
+  namespace c10 {                         \
+  C10_IMPORT extern type FLAGS_##name;    \
+  } // namespace c10
+
+#define C10_DECLARE_int(name) C10_DECLARE_typed_var(int, name)
+#define C10_DECLARE_int64(name) C10_DECLARE_typed_var(int64_t, name)
+#define C10_DECLARE_double(name) C10_DECLARE_typed_var(double, name)
+#define C10_DECLARE_bool(name) C10_DECLARE_typed_var(bool, name)
+#define C10_DECLARE_string(name) C10_DECLARE_typed_var(std::string, name)
+
+////////////////////////////////////////////////////////////////////////////////
+// End non-gflags section.
+////////////////////////////////////////////////////////////////////////////////
+
+#endif // C10_USE_GFLAGS
--- a/c10/util/flags_use_gflags.cpp
+++ b/c10/util/flags_use_gflags.cpp
@ -0,0 +1,38 @@
+#include "c10/util/Flags.h"
+
+#include <string>
+
+#include "c10/macros/Macros.h"
+
+#ifdef C10_USE_GFLAGS
+
+namespace c10 {
+
+using std::string;
+
+C10_EXPORT void SetUsageMessage(const string& str) {
+  if (UsageMessage() != nullptr) {
+    // Usage message has already been set, so we will simply return.
+    return;
+  }
+  gflags::SetUsageMessage(str);
+}
+
+C10_EXPORT const char* UsageMessage() {
+  return gflags::ProgramUsage();
+}
+
+C10_EXPORT bool ParseCommandLineFlags(int* pargc, char*** pargv) {
+  // In case there is no commandline flags to parse, simply return.
+  if (*pargc == 0)
+    return true;
+  return gflags::ParseCommandLineFlags(pargc, pargv, true);
+}
+
+C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
+  // There is no way we query gflags right now, so we will simply return true.
+  return true;
+}
+
+} // namespace c10
+#endif // C10_USE_GFLAGS
--- a/c10/util/flags_use_no_gflags.cpp
+++ b/c10/util/flags_use_no_gflags.cpp
@ -1,66 +1,46 @@
-#include "caffe2/core/flags.h"
+#include "c10/util/Flags.h"
+#include "c10/macros/Macros.h"

 #include <cstdlib>
 #include <iostream>
 #include <sstream>
+#include <string>

-#include "caffe2/core/logging.h"
+#ifndef C10_USE_GFLAGS

-namespace caffe2 {
+namespace c10 {

-#ifdef CAFFE2_USE_GFLAGS
+using std::string;

-C10_EXPORT void SetUsageMessage(const string& str) {
-  if (UsageMessage() != nullptr) {
-    // Usage message has already been set, so we will simply return.
-    return;
-  }
-  gflags::SetUsageMessage(str);
-}
-
-C10_EXPORT const char* UsageMessage() {
-  return gflags::ProgramUsage();
-}
-
-C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
-  if (*pargc == 0) return true;
-  return gflags::ParseCommandLineFlags(pargc, pargv, true);
-}
-
-C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
-  // There is no way we query gflags right now, so we will simply return true.
-  return true;
-}
-
-#else  // CAFFE2_USE_GFLAGS
-
-C10_DEFINE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&);
+C10_DEFINE_REGISTRY(C10FlagsRegistry, C10FlagParser, const string&);

 namespace {
 static bool gCommandLineFlagsParsed = false;
-// Since caffe flags is going to be loaded before caffe logging, we would
+// Since flags is going to be loaded before logging, we would
 // need to have a stringstream to hold the messages instead of directly
 // using caffe logging.
 std::stringstream& GlobalInitStream() {
  static std::stringstream ss;
  return ss;
 }
-static string gUsageMessage = "(Usage message not set.)";
-}
+static const char* gUsageMessage = "(Usage message not set.)";
+} // namespace

 C10_EXPORT void SetUsageMessage(const string& str) {
-  gUsageMessage = str;
-}
-C10_EXPORT const char* UsageMessage() {
-  return gUsageMessage.c_str();
+  static string usage_message_safe_copy = str;
+  gUsageMessage = usage_message_safe_copy.c_str();
 }

-C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
-  if (*pargc == 0) return true;
+C10_EXPORT const char* UsageMessage() {
+  return gUsageMessage;
+}
+
+C10_EXPORT bool ParseCommandLineFlags(int* pargc, char*** pargv) {
+  if (*pargc == 0)
+    return true;
  char** argv = *pargv;
  bool success = true;
-  GlobalInitStream() << "Parsing commandline arguments for caffe2."
-                     << std::endl;
+  GlobalInitStream() << "Parsing commandline arguments for c10." << std::endl;
  // write_head is the location we write the unused arguments to.
  int write_head = 1;
  for (int i = 1; i < *pargc; ++i) {
@ -70,7 +50,7 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
      // Print the help message, and quit.
      std::cout << UsageMessage() << std::endl;
      std::cout << "Arguments: " << std::endl;
-      for (const auto& help_msg : Caffe2FlagsRegistry()->HelpMessage()) {
+      for (const auto& help_msg : C10FlagsRegistry()->HelpMessage()) {
        std::cout << "    " << help_msg.first << ": " << help_msg.second
                  << std::endl;
      }
@ -79,7 +59,7 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
    // If the arg does not start with "--", we will ignore it.
    if (arg[0] != '-' || arg[1] != '-') {
      GlobalInitStream()
-          << "Caffe2 flag: commandline argument does not match --name=var "
+          << "C10 flag: commandline argument does not match --name=var "
             "or --name format: "
          << arg << ". Ignoring this argument." << std::endl;
      argv[write_head++] = argv[i];
@ -96,8 +76,9 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
      ++i;
      if (i == *pargc) {
        GlobalInitStream()
-            << "Caffe2 flag: reached the last commandline argument, but "
-               "I am expecting a value for " << arg;
+            << "C10 flag: reached the last commandline argument, but "
+               "I am expecting a value for "
+            << arg;
        success = false;
        break;
      }
@ -109,17 +90,16 @@ C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) {
      value = arg.substr(prefix_idx + 1, string::npos);
    }
    // If the flag is not registered, we will ignore it.
-    if (!Caffe2FlagsRegistry()->Has(key)) {
-      GlobalInitStream() << "Caffe2 flag: unrecognized commandline argument: "
+    if (!C10FlagsRegistry()->Has(key)) {
+      GlobalInitStream() << "C10 flag: unrecognized commandline argument: "
                         << arg << std::endl;
      success = false;
      break;
    }
-    std::unique_ptr<Caffe2FlagParser> parser(
-        Caffe2FlagsRegistry()->Create(key, value));
+    std::unique_ptr<C10FlagParser> parser(
+        C10FlagsRegistry()->Create(key, value));
    if (!parser->success()) {
-      GlobalInitStream() << "Caffe2 flag: illegal argument: "
-                         << arg << std::endl;
+      GlobalInitStream() << "C10 flag: illegal argument: " << arg << std::endl;
      success = false;
      break;
    }
@ -144,7 +124,7 @@ C10_EXPORT bool CommandLineFlagsHasBeenParsed() {
 }

 template <>
-C10_EXPORT bool Caffe2FlagParser::Parse<string>(
+C10_EXPORT bool C10FlagParser::Parse<string>(
    const string& content,
    string* value) {
  *value = content;
@ -152,21 +132,19 @@ C10_EXPORT bool Caffe2FlagParser::Parse<string>(
 }

 template <>
-C10_EXPORT bool Caffe2FlagParser::Parse<int>(
-    const string& content,
-    int* value) {
+C10_EXPORT bool C10FlagParser::Parse<int>(const string& content, int* value) {
  try {
    *value = std::atoi(content.c_str());
    return true;
-  } catch(...) {
-    GlobalInitStream() << "Caffe2 flag error: Cannot convert argument to int: "
+  } catch (...) {
+    GlobalInitStream() << "C10 flag error: Cannot convert argument to int: "
                       << content << std::endl;
    return false;
  }
 }

 template <>
-C10_EXPORT bool Caffe2FlagParser::Parse<int64_t>(
+C10_EXPORT bool C10FlagParser::Parse<int64_t>(
    const string& content,
    int64_t* value) {
  try {
@ -179,43 +157,41 @@ C10_EXPORT bool Caffe2FlagParser::Parse<int64_t>(
 #endif
    return true;
  } catch (...) {
-    GlobalInitStream() << "Caffe2 flag error: Cannot convert argument to int: "
+    GlobalInitStream() << "C10 flag error: Cannot convert argument to int: "
                       << content << std::endl;
    return false;
  }
 }

 template <>
-C10_EXPORT bool Caffe2FlagParser::Parse<double>(
+C10_EXPORT bool C10FlagParser::Parse<double>(
    const string& content,
    double* value) {
  try {
    *value = std::atof(content.c_str());
    return true;
-  } catch(...) {
-    GlobalInitStream()
-        << "Caffe2 flag error: Cannot convert argument to double: "
-        << content << std::endl;
+  } catch (...) {
+    GlobalInitStream() << "C10 flag error: Cannot convert argument to double: "
+                       << content << std::endl;
    return false;
  }
 }

 template <>
-C10_EXPORT bool Caffe2FlagParser::Parse<bool>(
-    const string& content,
-    bool* value) {
+C10_EXPORT bool C10FlagParser::Parse<bool>(const string& content, bool* value) {
  if (content == "false" || content == "False" || content == "FALSE" ||
      content == "0") {
    *value = false;
    return true;
-  } else if (content == "true" || content == "True" || content == "TRUE" ||
+  } else if (
+      content == "true" || content == "True" || content == "TRUE" ||
      content == "1") {
    *value = true;
    return true;
  } else {
    GlobalInitStream()
-        << "Caffe2 flag error: Cannot convert argument to bool: "
-        << content << std::endl
+        << "C10 flag error: Cannot convert argument to bool: " << content
+        << std::endl
        << "Note that if you are passing in a bool flag, you need to "
           "explicitly specify it, like --arg=True or --arg True. Otherwise, "
           "the next argument may be inadvertently used as the argument, "
@ -225,6 +201,6 @@ C10_EXPORT bool Caffe2FlagParser::Parse<bool>(
  }
 }

-#endif  // CAFFE2_USE_GFLAGS
+} // namespace c10

-}  // namespace caffe2
+#endif // C10_USE_GFLAGS
--- a/caffe2/VERSION_NUMBER
+++ b/caffe2/VERSION_NUMBER
@ -1 +1 @@
-0.8.2
+0.8.2
--- a/caffe2/contrib/cuda-convnet2/cudaconvnet/src/jpeg.cpp
+++ b/caffe2/contrib/cuda-convnet2/cudaconvnet/src/jpeg.cpp
@ -132,4 +132,4 @@ void DecoderThread::crop(int64 i, int64 src_width, int64 src_height, bool flip,
            }
        }
    }
-}
+}
--- a/caffe2/contrib/cuda-convnet2/make-data/pyext/init.py
+++ b/caffe2/contrib/cuda-convnet2/make-data/pyext/init.py
@ -10,4 +10,5 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License.
+from __future__ import absolute_import, division, print_function, unicode_literals
--- a/caffe2/contrib/cuda-convnet2/python_util/init.py
+++ b/caffe2/contrib/cuda-convnet2/python_util/init.py
@ -10,4 +10,5 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License.
+from __future__ import absolute_import, division, print_function, unicode_literals
--- a/caffe2/contrib/nervana/nervana.h
+++ b/caffe2/contrib/nervana/nervana.h
@ -9,7 +9,7 @@
 /**
 * A flag that specifies the nervana cubin path.
 */
-CAFFE2_DECLARE_string(nervana_cubin_path);
+C10_DECLARE_string(nervana_cubin_path);

 namespace caffe2 {

--- a/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc
+++ b/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc
@ -11,7 +11,7 @@

 #include <gtest/gtest.h>

-CAFFE2_DECLARE_string(caffe_test_root);
+C10_DECLARE_string(caffe_test_root);

 namespace caffe2 {

--- a/caffe2/contrib/nervana/nervana_init_gpu.cc
+++ b/caffe2/contrib/nervana/nervana_init_gpu.cc
@ -3,11 +3,11 @@

 #include "nervana_c_api.h"

-
-CAFFE2_DEFINE_string(nervana_cubin_path,
-                     "/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/cubin/",
-                     "The cubin path for nervana kernels. Currently defaulted "
-                     "to the internal fb deployment path.");
+C10_DEFINE_string(
+    nervana_cubin_path,
+    "/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/cubin/",
+    "The cubin path for nervana kernels. Currently defaulted "
+    "to the internal fb deployment path.");

 namespace caffe2 {

--- a/caffe2/contrib/nnpack/nnpack_ops.cc
+++ b/caffe2/contrib/nnpack/nnpack_ops.cc
@ -13,11 +13,13 @@
 #include "caffe2/utils/math.h"
 #include "nnpack.h"

-CAFFE2_DEFINE_int(
-    caffe2_nnpack_num_threads, 1,
+C10_DEFINE_int(
+    caffe2_nnpack_num_threads,
+    1,
    "The number of nnpack pthreadpool threads.");
-CAFFE2_DEFINE_bool(
-    caffe2_nnpack_use_mkl_num_threads, true,
+C10_DEFINE_bool(
+    caffe2_nnpack_use_mkl_num_threads,
+    true,
    "If MKL is built, this sets nnpack to use the same number of threads as "
    "MKL does. This overrides caffe2_nnpack_num_threads if set.");

@ -74,8 +76,8 @@ pthreadpool_t nnpack_threadpool() {
    enum nnp_status nnpack_status = nnp_initialize();
    CAFFE_ENFORCE(
        nnpack_status == nnp_status_success, "NNPack is not supported here!");
-    int num_threads = FLAGS_caffe2_nnpack_num_threads;
-    if (FLAGS_caffe2_nnpack_use_mkl_num_threads) {
+    int num_threads = c10::FLAGS_caffe2_nnpack_num_threads;
+    if (c10::FLAGS_caffe2_nnpack_use_mkl_num_threads) {
 #ifdef CAFFE2_USE_MKL
      num_threads = mkl_get_max_threads();
 #else
--- a/caffe2/contrib/prof/htrace_conf.cc
+++ b/caffe2/contrib/prof/htrace_conf.cc
@ -4,10 +4,7 @@
 #include <algorithm>
 #include <ctime>

-CAFFE2_DEFINE_string(
-    caffe2_htrace_span_log_path,
-    "",
-    "Span log path for htrace");
+C10_DEFINE_string(caffe2_htrace_span_log_path, "", "Span log path for htrace");

 namespace caffe2 {

@ -35,12 +32,12 @@ const string defaultHTraceConf(const string& net_name) {
  stream << HTRACE_SPAN_RECEIVER_KEY << "=local.file;";
  stream << HTRACE_SAMPLER_KEY << "=always;";

-  if (FLAGS_caffe2_htrace_span_log_path.empty()) {
+  if (c10::FLAGS_caffe2_htrace_span_log_path.empty()) {
    stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "=/tmp/htrace_" << net_name_copy
           << "_span_log_" << datetime << ";";
  } else {
    stream << HTRACE_LOCAL_FILE_RCV_PATH_KEY << "="
-           << FLAGS_caffe2_htrace_span_log_path << ";";
+           << c10::FLAGS_caffe2_htrace_span_log_path << ";";
  }

  return stream.str();
--- a/caffe2/contrib/prof/htrace_conf.h
+++ b/caffe2/contrib/prof/htrace_conf.h
@ -2,10 +2,10 @@

 #include "caffe2/core/flags.h"

-CAFFE2_DECLARE_string(caffe2_htrace_span_log_path);
+C10_DECLARE_string(caffe2_htrace_span_log_path);

 namespace caffe2 {

-const string defaultHTraceConf(const string& net_name);
+const std::string defaultHTraceConf(const std::string& net_name);

 } // namespace caffe2
--- a/caffe2/core/allocator.cc
+++ b/caffe2/core/allocator.cc
@ -2,12 +2,12 @@
 #include "caffe2/core/logging.h"
 #include "caffe2/core/typeid.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_report_cpu_memory_usage,
    false,
    "If set, print out detailed memory usage");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_cpu_allocator_do_zero_fill,
    true,
    "If set, do memory zerofilling when allocating on CPU");
--- a/caffe2/core/allocator.h
+++ b/caffe2/core/allocator.h
@ -8,8 +8,8 @@
 #include "caffe2/core/logging.h"
 #include "caffe2/core/numa.h"

-CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
-CAFFE2_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
+C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
+C10_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);

 namespace caffe2 {

@ -58,10 +58,10 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
    CAFFE_ENFORCE(data);
    // move data to a thread's NUMA node
    NUMAMove(data, nbytes, GetCurrentNUMANode());
-    if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
+    if (c10::FLAGS_caffe2_cpu_allocator_do_zero_fill) {
      memset(data, 0, nbytes);
    }
-    if (FLAGS_caffe2_report_cpu_memory_usage) {
+    if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
      reporter_.New(data, nbytes);
      return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
    }
@ -84,7 +84,7 @@ struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
  }

  at::DeleterFnPtr raw_deleter() const override {
-    if (FLAGS_caffe2_report_cpu_memory_usage) {
+    if (c10::FLAGS_caffe2_report_cpu_memory_usage) {
      return &ReportAndDelete;
    }
    return &Delete;
--- a/caffe2/core/blob_serialization.cc
+++ b/caffe2/core/blob_serialization.cc
@ -6,17 +6,17 @@
 #include "caffe2/core/blob.h"
 #include "caffe2/utils/proto_utils.h"

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_tensor_chunk_size,
    1000000,
    "Chunk size to split tensor data into");

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_max_tensor_serializer_threads,
    16,
    "Maximal number of threads that can be used for tensor serialization");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_serialize_fp16_as_bytes,
    false,
    "Serialize FLOAT16 tensors using byte_data field");
@ -102,7 +102,7 @@ void TensorSerializer::SerializeWithChunkSize(
  if (chunk_size == kNoChunking) {
    chunk_size = tensor.size() + 1; // to account for empty tensors
  } else if (chunk_size == kDefaultChunkSize) {
-    chunk_size = FLAGS_caffe2_tensor_chunk_size;
+    chunk_size = c10::FLAGS_caffe2_tensor_chunk_size;
  }

  auto processChunk = [&](int64_t chunkStart) {
@ -129,7 +129,7 @@ void TensorSerializer::SerializeWithChunkSize(
    }
  };
  if (tensor.size() > chunk_size) {
-    for (int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
+    for (int i = 0; i < c10::FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
      futures.emplace_back(std::async(std::launch::async, task));
    }
  }
@ -268,7 +268,7 @@ void TensorSerializer::Serialize(
          uniq_ptr.get());
      break;
    case TensorProto_DataType_FLOAT16: {
-      if (FLAGS_caffe2_serialize_fp16_as_bytes) {
+      if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
        const int kValue = 1;
        CAFFE_ENFORCE_EQ(
            reinterpret_cast<const char*>(&kValue)[0],
--- a/caffe2/core/blob_serialization.h
+++ b/caffe2/core/blob_serialization.h
@ -13,9 +13,9 @@
 #include "caffe2/core/types.h"
 #include "caffe2/utils/simple_queue.h"

-CAFFE2_DECLARE_int(caffe2_tensor_chunk_size);
-CAFFE2_DECLARE_int(caffe2_max_tensor_serializer_threads);
-CAFFE2_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
+C10_DECLARE_int(caffe2_tensor_chunk_size);
+C10_DECLARE_int(caffe2_max_tensor_serializer_threads);
+C10_DECLARE_bool(caffe2_serialize_fp16_as_bytes);

 namespace caffe2 {

--- a/caffe2/core/blob_test.cc
+++ b/caffe2/core/blob_test.cc
@ -18,9 +18,9 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/utils/proto_utils.h"

-CAFFE2_DEFINE_int64(caffe2_test_big_tensor_size, 100000000, "");
-CAFFE2_DECLARE_int(caffe2_tensor_chunk_size);
-CAFFE2_DECLARE_bool(caffe2_serialize_fp16_as_bytes);
+C10_DEFINE_int64(caffe2_test_big_tensor_size, 100000000, "");
+C10_DECLARE_int(caffe2_tensor_chunk_size);
+C10_DECLARE_bool(caffe2_serialize_fp16_as_bytes);

 namespace caffe2 {
 using namespace ::caffe2::db;
@ -455,8 +455,8 @@ TYPED_TEST(TensorCPUTest, NoLongerSharesAfterFreeMemory) {

 TYPED_TEST(TensorCPUTest, KeepOnShrink) {
  // Set flags (defaults)
-  FLAGS_caffe2_keep_on_shrink = true;
-  FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
+  c10::FLAGS_caffe2_keep_on_shrink = true;
+  c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;

  vector<int> dims{2, 3, 5};
  Tensor tensor(dims, CPU);
@ -486,8 +486,8 @@ TYPED_TEST(TensorCPUTest, KeepOnShrink) {

 TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
  // Set flags
-  FLAGS_caffe2_keep_on_shrink = true;
-  FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);
+  c10::FLAGS_caffe2_keep_on_shrink = true;
+  c10::FLAGS_caffe2_max_keep_on_shrink_memory = 8 * 4 * sizeof(TypeParam);

  vector<int> dims{1, 8, 8};
  Tensor tensor(dims, CPU);
@ -507,7 +507,7 @@ TYPED_TEST(TensorCPUTest, MaxKeepOnShrink) {
  //EXPECT_NE(ptr, new_ptr);

  // Restore default flags
-  FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
+  c10::FLAGS_caffe2_max_keep_on_shrink_memory = LLONG_MAX;
 }

 TYPED_TEST(TensorCPUDeathTest, CannotAccessRawDataWhenEmpty) {
@ -710,7 +710,7 @@ TEST(TensorTest, Half) {
  const TensorProto& tensor_proto = proto.tensor();
  EXPECT_EQ(
      tensor_proto.data_type(), TypeMetaToDataType(TypeMeta::Make<at::Half>()));
-  if (FLAGS_caffe2_serialize_fp16_as_bytes) {
+  if (c10::FLAGS_caffe2_serialize_fp16_as_bytes) {
    EXPECT_EQ(tensor_proto.byte_data().size(), 2 * kSize);
    for (int i = 0; i < kSize; ++i) {
      auto value = tensor->mutable_data<at::Half>()[i].x;
@ -850,8 +850,8 @@ TYPED_TEST_CASE(TypedTensorTest, TensorDataTypes);

 TYPED_TEST(TypedTensorTest, BigTensorSerialization) {
  int64_t d1 = 2;
-  int64_t d2 = FLAGS_caffe2_test_big_tensor_size
-      ? FLAGS_caffe2_test_big_tensor_size / d1
+  int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
+      ? c10::FLAGS_caffe2_test_big_tensor_size / d1
      : static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
  int64_t size = d1 * d2;
  string db_source = (string)std::tmpnam(nullptr);
@ -1024,8 +1024,8 @@ TEST(ContentChunks, Serialization) {

 TEST(CustomChunkSize, BigTensorSerialization) {
  int64_t d1 = 2;
-  int64_t d2 = FLAGS_caffe2_test_big_tensor_size
-      ? FLAGS_caffe2_test_big_tensor_size / d1
+  int64_t d2 = c10::FLAGS_caffe2_test_big_tensor_size
+      ? c10::FLAGS_caffe2_test_big_tensor_size / d1
      : static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
  int64_t size = d1 * d2;

--- a/caffe2/core/common_gpu.cc
+++ b/caffe2/core/common_gpu.cc
@ -10,7 +10,7 @@
 #include "caffe2/core/init.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_cuda_full_device_control,
    false,
    "If true, assume all the cudaSetDevice and cudaGetDevice calls will be "
@ -89,7 +89,7 @@ int NumCudaDevices() {

 namespace {
 int gDefaultGPUID = 0;
-// Only used when FLAGS_caffe2_cuda_full_device_control is set true.
+// Only used when c10::FLAGS_caffe2_cuda_full_device_control is set true.
 thread_local int gCurrentDevice = -1;
 }  // namespace

@ -108,7 +108,7 @@ void SetDefaultGPUID(const int deviceid) {
 int GetDefaultGPUID() { return gDefaultGPUID; }

 int CaffeCudaGetDevice() {
-  if (FLAGS_caffe2_cuda_full_device_control) {
+  if (c10::FLAGS_caffe2_cuda_full_device_control) {
    if (gCurrentDevice < 0) {
      CUDA_ENFORCE(cudaGetDevice(&gCurrentDevice));
    }
@ -121,7 +121,7 @@ int CaffeCudaGetDevice() {
 }

 void CaffeCudaSetDevice(const int id) {
-  if (FLAGS_caffe2_cuda_full_device_control) {
+  if (c10::FLAGS_caffe2_cuda_full_device_control) {
    if (gCurrentDevice != id) {
      CUDA_ENFORCE(cudaSetDevice(id));
    }
--- a/caffe2/core/context.h
+++ b/caffe2/core/context.h
@ -16,7 +16,7 @@
 #include <ATen/core/ATenCoreTest.h>
 #include <ATen/core/ArrayRef.h>

-CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
+C10_DECLARE_bool(caffe2_report_cpu_memory_usage);

 namespace caffe2 {

--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@ -21,7 +21,7 @@
 #include "caffe2/core/tensor.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    caffe2_cuda_memory_pool,
    "",
    "Sets the memory pool used by caffe2. Possible values are "
@ -29,30 +29,38 @@ CAFFE2_DEFINE_string(

 // For description of CUB caching allocator configuration, see
 // https://nvlabs.github.io/cub/structcub_1_1_caching_device_allocator.html
-CAFFE2_DEFINE_int(caffe2_cub_bin_growth, 8,
-             "If using cub as the memory allocator, sets the growth of bins "
-             "used by the cub pool.");
-CAFFE2_DEFINE_int(caffe2_cub_min_bin, 3,
-             "If using cub as the memory allocator, sets the min number of "
-             "bins.");
-CAFFE2_DEFINE_int(caffe2_cub_max_bin, 10,
-             "If using cub as the memory allocator, sets the max number of "
-             "bins.");
-CAFFE2_DEFINE_int(caffe2_cub_max_managed_mb, 10 * 1024,
-             "If using cub as the memory allocators, sets the maximum amount "
-             "of memory managed in gigabytes");
+C10_DEFINE_int(
+    caffe2_cub_bin_growth,
+    8,
+    "If using cub as the memory allocator, sets the growth of bins "
+    "used by the cub pool.");
+C10_DEFINE_int(
+    caffe2_cub_min_bin,
+    3,
+    "If using cub as the memory allocator, sets the min number of "
+    "bins.");
+C10_DEFINE_int(
+    caffe2_cub_max_bin,
+    10,
+    "If using cub as the memory allocator, sets the max number of "
+    "bins.");
+C10_DEFINE_int(
+    caffe2_cub_max_managed_mb,
+    10 * 1024,
+    "If using cub as the memory allocators, sets the maximum amount "
+    "of memory managed in gigabytes");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_cub_print_allocation_events,
    false,
    "If true CachingDeviceAllocator will print allocation and deallocation "
    "events to stdout.");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_gpu_memory_tracking,
    false,
    "If set, logs changes in GPU memory allocations");
-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_gpu_memory_report_interval_mb,
    128,
    "The threshold in MB on how frequently to report memory changes");
@ -168,12 +176,12 @@ static void SetUpCub() {
  // Sets up the cub memory pool
  try {
    g_cub_allocator.reset(new cub::CachingDeviceAllocator(
-        FLAGS_caffe2_cub_bin_growth,
-        FLAGS_caffe2_cub_min_bin,
-        FLAGS_caffe2_cub_max_bin,
-        size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
+        c10::FLAGS_caffe2_cub_bin_growth,
+        c10::FLAGS_caffe2_cub_min_bin,
+        c10::FLAGS_caffe2_cub_max_bin,
+        size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
        false,
-        FLAGS_caffe2_cub_print_allocation_events));
+        c10::FLAGS_caffe2_cub_print_allocation_events));
  } catch (...) {
    CAFFE_THROW("Some error happened at cub initialization.");
  }
@ -181,22 +189,23 @@ static void SetUpCub() {
 }

 static void Caffe2SetCUDAMemoryPool() {
-  if (FLAGS_caffe2_cuda_memory_pool == "" ||
-      FLAGS_caffe2_cuda_memory_pool == "none") {
+  if (c10::FLAGS_caffe2_cuda_memory_pool == "" ||
+      c10::FLAGS_caffe2_cuda_memory_pool == "none") {
    g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
-  } else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
+  } else if (c10::FLAGS_caffe2_cuda_memory_pool == "cnmem") {
    CAFFE_THROW("CNMEM is no longer used by Caffe2. Use cub instead. "
                "This error message may go away in the future.");
-  } else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
+  } else if (c10::FLAGS_caffe2_cuda_memory_pool == "cub") {
    // Sets up cub.
    g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
    SetUpCub();
-  } else if (FLAGS_caffe2_cuda_memory_pool == "thc") {
+  } else if (c10::FLAGS_caffe2_cuda_memory_pool == "thc") {
    g_cuda_memory_pool_type = CudaMemoryPoolType::THC;
    g_thc_allocator.reset(new THCCachingAllocator());
  } else {
-    CAFFE_THROW("Unrecognized cuda memory pool type: ",
-                FLAGS_caffe2_cuda_memory_pool);
+    CAFFE_THROW(
+        "Unrecognized cuda memory pool type: ",
+        c10::FLAGS_caffe2_cuda_memory_pool);
  }
 }

@ -274,7 +283,7 @@ std::mutex& CUDAContext::mutex() {
 std::vector<long> CUDAContext::TotalMemoryByGpu() {
  std::lock_guard<std::mutex> lock(CUDAContext::mutex());
  CAFFE_ENFORCE(
-      FLAGS_caffe2_gpu_memory_tracking,
+      c10::FLAGS_caffe2_gpu_memory_tracking,
      "Pass --caffe2_gpu_memory_tracking to enable memory stats");
  return g_total_by_gpu_map;
 }
@ -282,7 +291,7 @@ std::vector<long> CUDAContext::TotalMemoryByGpu() {
 std::vector<long> CUDAContext::MaxMemoryByGpu() {
  std::lock_guard<std::mutex> lock(CUDAContext::mutex());
  CAFFE_ENFORCE(
-      FLAGS_caffe2_gpu_memory_tracking,
+      c10::FLAGS_caffe2_gpu_memory_tracking,
      "Pass --caffe2_gpu_memory_tracking to enable memory stats");
  return g_max_by_gpu_map;
 }
@ -295,7 +304,7 @@ void TrackMemoryAlloc(size_t nbytes) {
      max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
  g_total_mem += nbytes;
  if (g_total_mem - g_last_rep >
-      FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
+      c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
    for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
      long t = g_total_by_gpu_map[gpu];
      long max_t = g_max_by_gpu_map[gpu];
@ -329,13 +338,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
    static Caffe2CudaInitializerHelper g_cuda_initializer_;
    void* ptr = nullptr;

-    if (FLAGS_caffe2_gpu_memory_tracking) {
+    if (c10::FLAGS_caffe2_gpu_memory_tracking) {
      TrackMemoryAlloc(nbytes);
    }
    switch (g_cuda_memory_pool_type) {
      case CudaMemoryPoolType::NONE:
        CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
-        if (FLAGS_caffe2_gpu_memory_tracking) {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr] = nbytes;
          g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
        }
@ -345,13 +354,13 @@ struct DefaultCUDAAllocator final : public at::Allocator {
        g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
        VLOG(2) << "CUB allocating pointer " << ptr << " on device "
                << CaffeCudaGetDevice();
-        if (FLAGS_caffe2_gpu_memory_tracking) {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr] = nbytes;
        }
        return {ptr, ptr, &Delete, at::Device(CUDA)};
      case CudaMemoryPoolType::THC:
        CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
-        if (FLAGS_caffe2_gpu_memory_tracking) {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr] = nbytes;
          g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
        }
@ -368,7 +377,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
  static void Delete(void* ptr) {
    // lock the mutex
    std::lock_guard<std::mutex> lock(CUDAContext::mutex());
-    if (FLAGS_caffe2_gpu_memory_tracking) {
+    if (c10::FLAGS_caffe2_gpu_memory_tracking) {
      auto sz_it = g_size_map.find(ptr);
      DCHECK(sz_it != g_size_map.end());
      auto aff_it = g_cuda_device_affiliation.find(ptr);
@ -393,7 +402,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
                     << cudaGetErrorString(error);
        }

-        if (FLAGS_caffe2_gpu_memory_tracking) {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
        }

@ -409,7 +418,7 @@ struct DefaultCUDAAllocator final : public at::Allocator {
      }
      case CudaMemoryPoolType::THC: {
        CUDA_ENFORCE(g_thc_allocator->Free(ptr));
-        if (FLAGS_caffe2_gpu_memory_tracking) {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_cuda_device_affiliation.erase(g_cuda_device_affiliation.find(ptr));
        }
        break;
--- a/caffe2/core/context_gpu_test.cc
+++ b/caffe2/core/context_gpu_test.cc
@ -7,7 +7,7 @@
 #include "caffe2/core/context_gpu.h"
 #include <gtest/gtest.h>

-CAFFE2_DECLARE_bool(caffe2_cuda_full_device_control);
+C10_DECLARE_bool(caffe2_cuda_full_device_control);

 namespace caffe2 {

@ -37,7 +37,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) {

 TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
  // For a while, set full device control to be true.
-  FLAGS_caffe2_cuda_full_device_control = true;
+  c10::FLAGS_caffe2_cuda_full_device_control = true;
  for (int i = 0; i < NumCudaDevices(); ++i) {
    CaffeCudaSetDevice(i);
    EXPECT_EQ(CaffeCudaGetDevice(), i);
@ -46,7 +46,7 @@ TEST(CUDAContextTest, TestSetGetDeviceWithCaffeMode) {
    CaffeCudaSetDevice(i);
    EXPECT_EQ(CaffeCudaGetDevice(), i);
  }
-  FLAGS_caffe2_cuda_full_device_control = false;
+  c10::FLAGS_caffe2_cuda_full_device_control = false;
 }

 TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
--- a/caffe2/core/flags.h
+++ b/caffe2/core/flags.h
@ -1,201 +1,4 @@
-/**
- * @file flags.h
- * @brief Commandline flags support for Caffe2.
- *
- * This is a portable commandline flags tool for caffe2, so we can optionally
- * choose to use gflags or a lightweighted custom implementation if gflags is
- * not possible on a certain platform. If you have gflags installed, set the
- * macro CAFFE2_USE_GFLAGS will seamlessly route everything to gflags.
- *
- * To define a flag foo of type bool default to true, do the following in the
- * *global* namespace:
- *     CAFFE2_DEFINE_bool(foo, true, "An example.");
- *
- * To use it in another .cc file, you can use CAFFE2_DECLARE_* as follows:
- *     CAFFE2_DECLARE_bool(foo);
- *
- * In both cases, you can then access the flag via caffe2::FLAGS_foo.
- */
+#pragma once

-#ifndef CAFFE2_CORE_FLAGS_H_
-#define CAFFE2_CORE_FLAGS_H_
-
-#include "c10/util/Registry.h"
+#include "c10/util/Flags.h"
 #include "caffe2/core/common.h"
-
-namespace caffe2 {
-/**
- * Sets the usage message when a commandline tool is called with "--help".
- */
-CAFFE2_API void SetUsageMessage(const string& str);
-
-/**
- * Returns the usage message for the commandline tool set by SetUsageMessage.
- */
-CAFFE2_API const char* UsageMessage();
-
-/**
- * Parses the commandline flags.
- *
- * This command parses all the commandline arguments passed in via pargc
- * and argv. Once it is finished, partc and argv will contain the remaining
- * commandline args that caffe2 does not deal with. Note that following
- * convention, argv[0] contains the binary name and is not parsed.
- */
-CAFFE2_API bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv);
-/**
- * Checks if the commandline flags has already been passed.
- */
-CAFFE2_API bool CommandLineFlagsHasBeenParsed();
-
-}  // namespace caffe2
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Below are gflags and non-gflags specific implementations.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifdef CAFFE2_USE_GFLAGS
-
-////////////////////////////////////////////////////////////////////////////////
-// Begin gflags section: most functions are basically rerouted to gflags.
-////////////////////////////////////////////////////////////////////////////////
-
-#include <gflags/gflags.h>
-
-// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags.
-// Using GFLAGS_GFLAGS_H_ to capture this change.
-#ifndef GFLAGS_GFLAGS_H_
-namespace gflags = google;
-#endif  // GFLAGS_GFLAGS_H_
-
-// Motivation about the gflags wrapper:
-// (1) We would need to make sure that the gflags version and the non-gflags
-// version of Caffe2 are going to expose the same flags abstraction. One should
-// explicitly use caffe2::FLAGS_flag_name to access the flags.
-// (2) For flag names, it is recommended to start with caffe2_ to distinguish it
-// from regular gflags flags. For example, do
-//    CAFFE2_DEFINE_BOOL(caffe2_my_flag, true, "An example");
-// to allow one to use caffe2::FLAGS_caffe2_my_flag.
-// (3) Gflags has a design issue that does not properly expose the global flags,
-// if one builds the library with -fvisibility=hidden. The current gflags (as of
-// Aug 2018) only deals with the Windows case using dllexport, and not the Linux
-// counterparts. As a result, we will explciitly use C10_EXPORT to export the
-// flags defined in Caffe2. This is done via a global reference, so the flag
-// itself is not duplicated - under the hood it is the same global gflags flag.
-#define CAFFE2_GFLAGS_DEF_WRAPPER(                     \
-    type, real_type, name, default_value, help_str)    \
-  DEFINE_##type(name, default_value, help_str);        \
-  namespace caffe2 {                                   \
-  C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \
-  }
-
-#define CAFFE2_DEFINE_int(name, default_value, help_str)                       \
-  CAFFE2_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str)
-#define CAFFE2_DEFINE_int64(name, default_value, help_str)                     \
-  CAFFE2_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str)
-#define CAFFE2_DEFINE_double(name, default_value, help_str)                    \
-  CAFFE2_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str)
-#define CAFFE2_DEFINE_bool(name, default_value, help_str)                      \
-  CAFFE2_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str)
-#define CAFFE2_DEFINE_string(name, default_value, help_str)                    \
-  CAFFE2_GFLAGS_DEF_WRAPPER(                                                   \
-      string, ::fLS::clstring, name, default_value, help_str)
-
-// DECLARE_typed_var should be used in header files and in the global namespace.
-#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \
-  DECLARE_##type(name);                                      \
-  namespace caffe2 {                                         \
-  C10_IMPORT extern real_type& FLAGS_##name;                 \
-  } // namespace caffe2
-
-#define CAFFE2_DECLARE_int(name)                                               \
-  CAFFE2_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name)
-#define CAFFE2_DECLARE_int64(name)                                             \
-  CAFFE2_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name)
-#define CAFFE2_DECLARE_double(name)                                            \
-  CAFFE2_GFLAGS_DECLARE_WRAPPER(double, double, name)
-#define CAFFE2_DECLARE_bool(name)                                              \
-  CAFFE2_GFLAGS_DECLARE_WRAPPER(bool, bool, name)
-#define CAFFE2_DECLARE_string(name)                                            \
-  CAFFE2_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name)
-
-////////////////////////////////////////////////////////////////////////////////
-// End gflags section.
-////////////////////////////////////////////////////////////////////////////////
-
-#else   // CAFFE2_USE_GFLAGS
-
-////////////////////////////////////////////////////////////////////////////////
-// Begin non-gflags section: providing equivalent functionality.
-////////////////////////////////////////////////////////////////////////////////
-
-namespace caffe2 {
-
-class CAFFE2_API Caffe2FlagParser {
- public:
-  Caffe2FlagParser() {}
-  bool success() { return success_; }
-
- protected:
-  template <typename T>
-  bool Parse(const string& content, T* value);
-  bool success_;
-};
-
-C10_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&);
-
-}  // namespace caffe2
-
-// The macros are defined outside the caffe2 namespace. In your code, you should
-// write the CAFFE2_DEFINE_* and CAFFE2_DECLARE_* macros outside any namespace
-// as well.
-
-#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str)          \
-  namespace caffe2 {                                                          \
-  C10_EXPORT type FLAGS_##name = default_value;                               \
-  namespace {                                                                 \
-  class Caffe2FlagParser_##name : public Caffe2FlagParser {                   \
-   public:                                                                    \
-    explicit Caffe2FlagParser_##name(const string& content) {                 \
-      success_ = Caffe2FlagParser::Parse<type>(content, &FLAGS_##name);       \
-    }                                                                         \
-  };                                                                          \
-  }                                                                           \
-  RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name(                 \
-      #name,                                                                  \
-      Caffe2FlagsRegistry(),                                                  \
-      RegistererCaffe2FlagsRegistry::DefaultCreator<Caffe2FlagParser_##name>, \
-      "(" #type ", default " #default_value ") " help_str);                   \
-  }
-
-#define CAFFE2_DEFINE_int(name, default_value, help_str)                       \
-  CAFFE2_DEFINE_typed_var(int, name, default_value, help_str)
-#define CAFFE2_DEFINE_int64(name, default_value, help_str)                     \
-  CAFFE2_DEFINE_typed_var(int64_t, name, default_value, help_str)
-#define CAFFE2_DEFINE_double(name, default_value, help_str)                    \
-  CAFFE2_DEFINE_typed_var(double, name, default_value, help_str)
-#define CAFFE2_DEFINE_bool(name, default_value, help_str)                      \
-  CAFFE2_DEFINE_typed_var(bool, name, default_value, help_str)
-#define CAFFE2_DEFINE_string(name, default_value, help_str)                    \
-  CAFFE2_DEFINE_typed_var(string, name, default_value, help_str)
-
-// DECLARE_typed_var should be used in header files and in the global namespace.
-#define CAFFE2_DECLARE_typed_var(type, name) \
-  namespace caffe2 {                         \
-  C10_IMPORT extern type FLAGS_##name;       \
-  } // namespace caffe2
-
-#define CAFFE2_DECLARE_int(name) CAFFE2_DECLARE_typed_var(int, name)
-#define CAFFE2_DECLARE_int64(name) CAFFE2_DECLARE_typed_var(int64_t, name)
-#define CAFFE2_DECLARE_double(name) CAFFE2_DECLARE_typed_var(double, name)
-#define CAFFE2_DECLARE_bool(name) CAFFE2_DECLARE_typed_var(bool, name)
-#define CAFFE2_DECLARE_string(name) CAFFE2_DECLARE_typed_var(string, name)
-
-////////////////////////////////////////////////////////////////////////////////
-// End non-gflags section.
-////////////////////////////////////////////////////////////////////////////////
-
-#endif  // CAFFE2_USE_GFLAGS
-
-#endif  // CAFFE2_CORE_FLAGS_H_
--- a/caffe2/core/flags_test.cc
+++ b/caffe2/core/flags_test.cc
@ -1,27 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/macros.h"
-#include "caffe2/core/flags.h"
-#include "caffe2/core/logging.h"
-
-CAFFE2_DEFINE_bool(caffe2_flags_test_only_flag, true, "Only used in test.");
-
-namespace caffe2 {
-
-TEST(FlagsTest, TestGflagsCorrectness) {
-#ifdef CAFFE2_USE_GFLAGS
-  EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true);
-  EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true);
-  // Change the caffe2 namespace and check global
-  FLAGS_caffe2_flags_test_only_flag = false;  
-  EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, false);
-  EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, false);
-  // Change global and check caffe2 namespace
-  ::FLAGS_caffe2_flags_test_only_flag = true;  
-  EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true);
-  EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true);
-#else  // CAFFE2_USE_GFLAGS
-  LOG(INFO) << "Caffe2 is not built with gflags. Nothing to test here.";
-#endif
-}
-
-} // namespace caffe2
--- a/caffe2/core/hip/common_hip.cc
+++ b/caffe2/core/hip/common_hip.cc
@ -8,16 +8,17 @@
 #include "caffe2/core/init.h"
 #include "caffe2/core/logging.h"

-CAFFE2_DEFINE_bool(caffe2_hip_full_device_control,
-                   false,
-                   "If true, assume all the hipSetDevice and hipGetDevice calls will be "
-                   "controlled by Caffe2, and non-Caffe2 code will ensure that the entry and "
-                   "exit point has the same cuda device. Under the hood, Caffe2 will use "
-                   "thread local variables to cache the device, in order to speed up set and "
-                   "get device calls. This is an experimental feature that may have non "
-                   "trivial side effects, so use it with care and only enable it if you are "
-                   "absolutely sure. Also, this flag should not be changed after the program "
-                   "initializes.");
+C10_DEFINE_bool(
+    caffe2_hip_full_device_control,
+    false,
+    "If true, assume all the hipSetDevice and hipGetDevice calls will be "
+    "controlled by Caffe2, and non-Caffe2 code will ensure that the entry and "
+    "exit point has the same cuda device. Under the hood, Caffe2 will use "
+    "thread local variables to cache the device, in order to speed up set and "
+    "get device calls. This is an experimental feature that may have non "
+    "trivial side effects, so use it with care and only enable it if you are "
+    "absolutely sure. Also, this flag should not be changed after the program "
+    "initializes.");

 namespace caffe2 {

@ -88,7 +89,7 @@ int NumHipDevices()

 namespace {
 int gDefaultGPUID = 0;
-// Only used when FLAGS_caffe2_hip_full_device_control is set true.
+// Only used when c10::FLAGS_caffe2_hip_full_device_control is set true.
 thread_local int gCurrentDevice = -1;
 } // namespace

@ -108,36 +109,28 @@ int GetDefaultGPUID() { return gDefaultGPUID; }

 int CaffeHipGetDevice()
 {
-    if(FLAGS_caffe2_hip_full_device_control)
-    {
-        if(gCurrentDevice < 0)
-        {
-            HIP_ENFORCE(hipGetDevice(&gCurrentDevice));
-        }
-        return gCurrentDevice;
-    }
-    else
-    {
-        int gpu_id = 0;
-        HIP_ENFORCE(hipGetDevice(&gpu_id));
-        return gpu_id;
+  if (c10::FLAGS_caffe2_hip_full_device_control) {
+    if (gCurrentDevice < 0) {
+      HIP_ENFORCE(hipGetDevice(&gCurrentDevice));
    }
+    return gCurrentDevice;
+  } else {
+    int gpu_id = 0;
+    HIP_ENFORCE(hipGetDevice(&gpu_id));
+    return gpu_id;
+  }
 }

 void CaffeHipSetDevice(const int id)
 {
-    if(FLAGS_caffe2_hip_full_device_control)
-    {
-        if(gCurrentDevice != id)
-        {
-            HIP_ENFORCE(hipSetDevice(id));
-        }
-        gCurrentDevice = id;
-    }
-    else
-    {
-        HIP_ENFORCE(hipSetDevice(id));
+  if (c10::FLAGS_caffe2_hip_full_device_control) {
+    if (gCurrentDevice != id) {
+      HIP_ENFORCE(hipSetDevice(id));
    }
+    gCurrentDevice = id;
+  } else {
+    HIP_ENFORCE(hipSetDevice(id));
+  }
 }

 int GetGPUIDForPointer(const void* ptr)
--- a/caffe2/core/hip/context_hip.cc
+++ b/caffe2/core/hip/context_hip.cc
@ -15,40 +15,48 @@
 #include "caffe2/core/tensor.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_string(caffe2_hip_memory_pool,
-                     "",
-                     "Sets the memory pool used by caffe2. Possible values are "
-                     "none, cnmen and cub.");
+C10_DEFINE_string(
+    caffe2_hip_memory_pool,
+    "",
+    "Sets the memory pool used by caffe2. Possible values are "
+    "none, cnmen and cub.");

 // For description of CUB caching allocator configuration, see
 // https://nvlabs.github.io/cub/structcub_1_1_caching_device_allocator.html
-CAFFE2_DEFINE_int(caffe2_cub_bin_growth,
-                  8,
-                  "If using cub as the memory allocator, sets the growth of bins "
-                  "used by the cub pool.");
-CAFFE2_DEFINE_int(caffe2_cub_min_bin,
-                  3,
-                  "If using cub as the memory allocator, sets the min number of "
-                  "bins.");
-CAFFE2_DEFINE_int(caffe2_cub_max_bin,
-                  10,
-                  "If using cub as the memory allocator, sets the max number of "
-                  "bins.");
-CAFFE2_DEFINE_int(caffe2_cub_max_managed_mb,
-                  10 * 1024,
-                  "If using cub as the memory allocators, sets the maximum amount "
-                  "of memory managed in gigabytes");
-CAFFE2_DEFINE_bool(caffe2_cub_print_allocation_events,
-                   false,
-                   "If true CachingDeviceAllocator will print allocation and deallocation "
-                   "events to stdout.");
+C10_DEFINE_int(
+    caffe2_cub_bin_growth,
+    8,
+    "If using cub as the memory allocator, sets the growth of bins "
+    "used by the cub pool.");
+C10_DEFINE_int(
+    caffe2_cub_min_bin,
+    3,
+    "If using cub as the memory allocator, sets the min number of "
+    "bins.");
+C10_DEFINE_int(
+    caffe2_cub_max_bin,
+    10,
+    "If using cub as the memory allocator, sets the max number of "
+    "bins.");
+C10_DEFINE_int(
+    caffe2_cub_max_managed_mb,
+    10 * 1024,
+    "If using cub as the memory allocators, sets the maximum amount "
+    "of memory managed in gigabytes");
+C10_DEFINE_bool(
+    caffe2_cub_print_allocation_events,
+    false,
+    "If true CachingDeviceAllocator will print allocation and deallocation "
+    "events to stdout.");

-CAFFE2_DEFINE_bool(caffe2_gpu_memory_tracking,
-                   false,
-                   "If set, logs changes in GPU memory allocations");
-CAFFE2_DEFINE_int(caffe2_gpu_memory_report_interval_mb,
-                  128,
-                  "The threshold in MB on how frequently to report memory changes");
+C10_DEFINE_bool(
+    caffe2_gpu_memory_tracking,
+    false,
+    "If set, logs changes in GPU memory allocations");
+C10_DEFINE_int(
+    caffe2_gpu_memory_report_interval_mb,
+    128,
+    "The threshold in MB on how frequently to report memory changes");

 namespace at {

@ -157,13 +165,13 @@ static void SetUpCub()
    // Sets up the cub memory pool
    try
    {
-        g_cub_allocator.reset(
-            new cub::CachingDeviceAllocator(FLAGS_caffe2_cub_bin_growth,
-                                            FLAGS_caffe2_cub_min_bin,
-                                            FLAGS_caffe2_cub_max_bin,
-                                            size_t(FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
-                                            false,
-                                            FLAGS_caffe2_cub_print_allocation_events));
+      g_cub_allocator.reset(new cub::CachingDeviceAllocator(
+          c10::FLAGS_caffe2_cub_bin_growth,
+          c10::FLAGS_caffe2_cub_min_bin,
+          c10::FLAGS_caffe2_cub_max_bin,
+          size_t(c10::FLAGS_caffe2_cub_max_managed_mb) * 1024L * 1024L,
+          false,
+          c10::FLAGS_caffe2_cub_print_allocation_events));
    }
    catch(...)
    {
@ -174,30 +182,25 @@ static void SetUpCub()

 static void Caffe2SetHIPMemoryPool()
 {
-    if(FLAGS_caffe2_hip_memory_pool == "" || FLAGS_caffe2_hip_memory_pool == "none")
-    {
-        g_hip_memory_pool_type = HipMemoryPoolType::NONE;
-    }
-    else if(FLAGS_caffe2_hip_memory_pool == "cnmem")
-    {
-        CAFFE_THROW("CNMEM is no longer used by Caffe2. Use cub instead. "
-                    "This error message may go away in the future.");
-    }
-    else if(FLAGS_caffe2_hip_memory_pool == "cub")
-    {
-        // Sets up cub.
-        g_hip_memory_pool_type = HipMemoryPoolType::CUB;
-        SetUpCub();
-    }
-    else if(FLAGS_caffe2_hip_memory_pool == "thc")
-    {
-        g_hip_memory_pool_type = HipMemoryPoolType::THC;
-        g_thc_allocator.reset(new THCCachingAllocator());
-    }
-    else
-    {
-        CAFFE_THROW("Unrecognized HIP memory pool type: ", FLAGS_caffe2_hip_memory_pool);
-    }
+  if (c10::FLAGS_caffe2_hip_memory_pool == "" ||
+      c10::FLAGS_caffe2_hip_memory_pool == "none") {
+    g_hip_memory_pool_type = HipMemoryPoolType::NONE;
+  } else if (c10::FLAGS_caffe2_hip_memory_pool == "cnmem") {
+    CAFFE_THROW(
+        "CNMEM is no longer used by Caffe2. Use cub instead. "
+        "This error message may go away in the future.");
+  } else if (c10::FLAGS_caffe2_hip_memory_pool == "cub") {
+    // Sets up cub.
+    g_hip_memory_pool_type = HipMemoryPoolType::CUB;
+    SetUpCub();
+  } else if (c10::FLAGS_caffe2_hip_memory_pool == "thc") {
+    g_hip_memory_pool_type = HipMemoryPoolType::THC;
+    g_thc_allocator.reset(new THCCachingAllocator());
+  } else {
+    CAFFE_THROW(
+        "Unrecognized HIP memory pool type: ",
+        c10::FLAGS_caffe2_hip_memory_pool);
+  }
 }

 // An initialization function that sets the CPU side to use pinned cpu
@ -281,16 +284,18 @@ std::mutex& HIPContext::mutex()
 std::vector<long> HIPContext::TotalMemoryByGpu()
 {
    std::lock_guard<std::mutex> lock(HIPContext::mutex());
-    CAFFE_ENFORCE(FLAGS_caffe2_gpu_memory_tracking,
-                  "Pass --caffe2_gpu_memory_tracking to enable memory stats");
+    CAFFE_ENFORCE(
+        c10::FLAGS_caffe2_gpu_memory_tracking,
+        "Pass --caffe2_gpu_memory_tracking to enable memory stats");
    return g_total_by_gpu_map;
 }

 std::vector<long> HIPContext::MaxMemoryByGpu()
 {
    std::lock_guard<std::mutex> lock(HIPContext::mutex());
-    CAFFE_ENFORCE(FLAGS_caffe2_gpu_memory_tracking,
-                  "Pass --caffe2_gpu_memory_tracking to enable memory stats");
+    CAFFE_ENFORCE(
+        c10::FLAGS_caffe2_gpu_memory_tracking,
+        "Pass --caffe2_gpu_memory_tracking to enable memory stats");
    return g_max_by_gpu_map;
 }

@ -301,27 +306,22 @@ void TrackMemoryAlloc(size_t nbytes)
    g_total_by_gpu_map[this_gpu] += nbytes;
    g_max_by_gpu_map[this_gpu] = std::max(g_max_by_gpu_map[this_gpu], g_total_by_gpu_map[this_gpu]);
    g_total_mem += nbytes;
-    if(g_total_mem - g_last_rep > FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024)
-    {
-        for(int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++)
-        {
-            long t     = g_total_by_gpu_map[gpu];
-            long max_t = g_max_by_gpu_map[gpu];
-            if(max_t > 0)
-            {
-                if(max_t != t)
-                {
-                    LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB"
-                              << " (max: " << max_t / 1024 / 1024 << " MB)";
-                }
-                else
-                {
-                    LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB";
-                }
-            }
+    if (g_total_mem - g_last_rep >
+        c10::FLAGS_caffe2_gpu_memory_report_interval_mb * 1024 * 1024) {
+      for (int gpu = 0; gpu < g_total_by_gpu_map.size(); gpu++) {
+        long t = g_total_by_gpu_map[gpu];
+        long max_t = g_max_by_gpu_map[gpu];
+        if (max_t > 0) {
+          if (max_t != t) {
+            LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB"
+                      << " (max: " << max_t / 1024 / 1024 << " MB)";
+          } else {
+            LOG(INFO) << "GPU " << gpu << ": " << t / 1024 / 1024 << " MB";
+          }
        }
-        LOG(INFO) << "Total: " << g_total_mem / 1024 / 1024 << " MB";
-        g_last_rep = g_total_mem;
+      }
+      LOG(INFO) << "Total: " << g_total_mem / 1024 / 1024 << " MB";
+      g_last_rep = g_total_mem;
    }
 }
 }
@ -340,14 +340,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
    static Caffe2HipInitializerHelper g_hip_initializer_;
    void* ptr = nullptr;

-    if (FLAGS_caffe2_gpu_memory_tracking) {
+    if (c10::FLAGS_caffe2_gpu_memory_tracking) {
      TrackMemoryAlloc(nbytes);
    }
    switch (g_hip_memory_pool_type) {
      case HipMemoryPoolType::NONE:
        HIP_ENFORCE(hipMalloc(&ptr, nbytes));
-        if(FLAGS_caffe2_gpu_memory_tracking)
-        {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr] = nbytes;
          g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
        }
@ -356,15 +355,13 @@ struct DefaultHIPAllocator final : public at::Allocator {
        HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
        g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
        VLOG(2) << "CUB allocating pointer " << ptr << " on device " << CaffeHipGetDevice();
-        if(FLAGS_caffe2_gpu_memory_tracking)
-        {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr] = nbytes;
        }
        return {ptr, ptr, &Delete, at::Device(HIP)};
    case HipMemoryPoolType::THC:
        HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
-        if (FLAGS_caffe2_gpu_memory_tracking)
-        {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_size_map[ptr]                = nbytes;
          g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
        }
@ -403,8 +400,7 @@ struct DefaultHIPAllocator final : public at::Allocator {
                     << hipGetErrorString(error);
        }

-        if(FLAGS_caffe2_gpu_memory_tracking)
-        {
+        if (c10::FLAGS_caffe2_gpu_memory_tracking) {
          g_hip_device_affiliation.erase(g_hip_device_affiliation.find(ptr));
        }

--- a/caffe2/core/hip/net_async_dag_hip.cc
+++ b/caffe2/core/hip/net_async_dag_hip.cc
@ -29,17 +29,20 @@

 #include "caffe2/core/hip/context_hip.h"

-CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
+C10_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");

-CAFFE2_DEFINE_bool(caffe2_async_dag_use_multiple_streams, false, "Use multiple streams per thread");
+C10_DEFINE_bool(
+    caffe2_async_dag_use_multiple_streams,
+    false,
+    "Use multiple streams per thread");

-CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
+C10_DECLARE_bool(caffe2_dag_net_collect_stats);

-CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
+C10_DECLARE_bool(caffe2_net_async_finish_chain);

-CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
+C10_DECLARE_int(caffe2_streams_per_gpu);

-CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
+C10_DECLARE_bool(caffe2_net_async_check_stream_status);

 namespace caffe2 {

@ -97,8 +100,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option)
      }
      do {
        stream_id = stream_counters_[gpu_id]++;
-        stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
-      } while (FLAGS_caffe2_net_async_check_stream_status &&
+        stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
+      } while (c10::FLAGS_caffe2_net_async_check_stream_status &&
               !HIPContext::IsStreamFree(device_option, stream_id));
    }
    return stream_id;
@ -117,9 +120,9 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
                  "None of the parent is recorded for an event.");

    int stream_id = 0;
-    if(FLAGS_caffe2_async_dag_use_multiple_streams)
-    {
-        stream_id = stream(operator_nodes_[source_idx].operator_->event().GetDeviceOption());
+    if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
+      stream_id = stream(
+          operator_nodes_[source_idx].operator_->event().GetDeviceOption());
    }

    std::vector<const Event*> parent_events;
@ -133,13 +136,13 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
        operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
    }

-    if(FLAGS_caffe2_dag_net_collect_stats)
-    {
-        const auto& device_option =
-            operator_nodes_[source_idx].operator_->event().GetDeviceOption();
-        CAFFE_EVENT(stats_[device_option.device_type()],
-                    task_wait_time_us,
-                    task_timers_[chain_id]->MicroSeconds());
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
+      const auto& device_option =
+          operator_nodes_[source_idx].operator_->event().GetDeviceOption();
+      CAFFE_EVENT(
+          stats_[device_option.device_type()],
+          task_wait_time_us,
+          task_timers_[chain_id]->MicroSeconds());
    }

    // We've waited on all our parent indices.
@ -160,20 +163,19 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain)
    }

    const auto& sink_idx = chain.back();
-    if(success && FLAGS_caffe2_net_async_finish_chain)
-    {
-        operator_nodes_[sink_idx].operator_->event().Finish();
+    if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
+      operator_nodes_[sink_idx].operator_->event().Finish();
    }
    CAFFE_ENFORCE(!eventRecorded_[sink_idx], "An event for ", sink_idx, " should not be recorded.");
    eventRecorded_[sink_idx] = 1;

-    if(FLAGS_caffe2_dag_net_collect_stats)
-    {
-        const auto& device_option =
-            operator_nodes_[source_idx].operator_->event().GetDeviceOption();
-        CAFFE_EVENT(stats_[device_option.device_type()],
-                    task_time_to_scheduled_us,
-                    task_timers_[chain_id]->MicroSeconds());
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
+      const auto& device_option =
+          operator_nodes_[source_idx].operator_->event().GetDeviceOption();
+      CAFFE_EVENT(
+          stats_[device_option.device_type()],
+          task_time_to_scheduled_us,
+          task_timers_[chain_id]->MicroSeconds());
    }
    return success;
 }
--- a/caffe2/core/hip/net_async_hip_thread_pool_hip.cc
+++ b/caffe2/core/hip/net_async_hip_thread_pool_hip.cc
@ -16,7 +16,7 @@

 #include "caffe2/core/net_async_base.h"

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_threads_per_hip_gpu,
    1,
    "Number of CPU threads per AMD HIP GPU");
@ -26,17 +26,19 @@ namespace caffe2 {
 std::shared_ptr<TaskThreadPool>
 GetAsyncNetHIPThreadPool(int hip_gpu_id, int pool_size, bool create_new) {
  // For GPU, use per device thread pools of predefined constant size
-  if (pool_size != FLAGS_caffe2_threads_per_hip_gpu) {
+  if (pool_size != c10::FLAGS_caffe2_threads_per_hip_gpu) {
    LOG(INFO) << "Overriding AMD HIP GPU pool size: using "
-              << FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
+              << c10::FLAGS_caffe2_threads_per_hip_gpu << " threads per GPU";
  }
  static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
  static std::mutex pool_mutex;

  if (create_new) {
-    LOG(INFO) << "Created new AMD HIP GPU pool, size: " << FLAGS_caffe2_threads_per_hip_gpu
+    LOG(INFO) << "Created new AMD HIP GPU pool, size: "
+              << c10::FLAGS_caffe2_threads_per_hip_gpu
              << "; GPU id: " << hip_gpu_id;
-    return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
+    return std::make_shared<TaskThreadPool>(
+        c10::FLAGS_caffe2_threads_per_hip_gpu);
  } else {
    std::lock_guard<std::mutex> lock(pool_mutex);

@ -46,9 +48,10 @@ GetAsyncNetHIPThreadPool(int hip_gpu_id, int pool_size, bool create_new) {
    }
    if (!shared_pool) {
      LOG(INFO) << "Created shared AMD HIP GPU pool, size: "
-                << FLAGS_caffe2_threads_per_hip_gpu << "; GPU id: " << hip_gpu_id;
-      shared_pool =
-          std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_hip_gpu);
+                << c10::FLAGS_caffe2_threads_per_hip_gpu
+                << "; GPU id: " << hip_gpu_id;
+      shared_pool = std::make_shared<TaskThreadPool>(
+          c10::FLAGS_caffe2_threads_per_hip_gpu);
      pools[hip_gpu_id] = shared_pool;
    }
    return shared_pool;
--- a/caffe2/core/init.cc
+++ b/caffe2/core/init.cc
@ -5,7 +5,7 @@
 #include <iomanip>
 #include <mutex>

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_version,
    false,
    "Print Caffe2 version and build options on startup");
@ -53,7 +53,7 @@ bool GlobalInit(int* pargc, char*** pargv) {
  if (init_state == internal::State::Initialized) {
    VLOG(1) << "GlobalInit has already been called: re-parsing gflags only.";
    // Reparse command line flags
-    success &= ParseCaffeCommandLineFlags(pargc, pargv);
+    success &= c10::ParseCommandLineFlags(pargc, pargv);
    UpdateLoggingLevelsFromFlags();
  } else if (init_state == internal::State::Uninitialized) {
    init_state = internal::State::Initializing;
@ -68,10 +68,10 @@ bool GlobalInit(int* pargc, char*** pargv) {
                   ->RunRegisteredEarlyInitFunctions(pargc, pargv);
    CAFFE_ENFORCE(
        success, "Failed to run some early init functions for caffe2.");
-    success &= ParseCaffeCommandLineFlags(pargc, pargv);
+    success &= c10::ParseCommandLineFlags(pargc, pargv);
    success &= InitCaffeLogging(pargc, *pargv);
    // Print out the current build version. Using cerr as LOG(INFO) might be off
-    if (FLAGS_caffe2_version) {
+    if (c10::FLAGS_caffe2_version) {
      std::cerr << "Caffe2 build configuration: " << std::endl;
      for (const auto& it : GetBuildOptions()) {
        std::cerr << "  " << std::setw(25) << std::left << it.first << " : "
--- a/caffe2/core/init_intrinsics_check.cc
+++ b/caffe2/core/init_intrinsics_check.cc
@ -4,7 +4,7 @@
 #include "caffe2/core/logging.h"
 #include "caffe2/utils/cpuid.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_quit_on_unsupported_cpu_feature,
    false,
    "If set, when Caffe2 is built with a CPU feature (like avx2) but the "
@ -23,7 +23,7 @@ static void QuitIfFeatureUnsupported(
        "on your machine, such as SIGILL 'illegal instructions' on Linux. "
        "As a result Caffe2 will preemptively quit. Please install or "
        "build a Caffe2 binary with the feature turned off.";
-    if (FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
+    if (c10::FLAGS_caffe2_quit_on_unsupported_cpu_feature) {
      LOG(FATAL) << err_string;
    } else {
      LOG(ERROR) << err_string;
--- a/caffe2/core/init_omp.cc
+++ b/caffe2/core/init_omp.cc
@ -12,11 +12,12 @@

 #include "caffe2/core/init.h"

-CAFFE2_DEFINE_int(
-    caffe2_omp_num_threads, 0,
+C10_DEFINE_int(
+    caffe2_omp_num_threads,
+    0,
    "The number of openmp threads. 0 to use default value. "
    "Does not have effect if OpenMP is disabled.");
-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_mkl_num_threads,
    0,
    "The number of mkl threads. 0 to use default value. If set, "
@ -34,9 +35,10 @@ bool Caffe2SetOpenMPThreads(int*, char***) {
    omp_set_num_threads(1);
  }

-  if (FLAGS_caffe2_omp_num_threads > 0) {
-    VLOG(1) << "Setting omp_num_threads to " << FLAGS_caffe2_omp_num_threads;
-    omp_set_num_threads(FLAGS_caffe2_omp_num_threads);
+  if (c10::FLAGS_caffe2_omp_num_threads > 0) {
+    VLOG(1) << "Setting omp_num_threads to "
+            << c10::FLAGS_caffe2_omp_num_threads;
+    omp_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
  }
  VLOG(1) << "Caffe2 running with " << omp_get_max_threads() << " OMP threads";
  return true;
@ -54,16 +56,18 @@ bool Caffe2SetMKLThreads(int*, char***) {
  }

  // If caffe2_omp_num_threads is set, we use that for MKL as well.
-  if (FLAGS_caffe2_omp_num_threads > 0) {
-    VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_omp_num_threads
+  if (c10::FLAGS_caffe2_omp_num_threads > 0) {
+    VLOG(1) << "Setting mkl_num_threads to "
+            << c10::FLAGS_caffe2_omp_num_threads
            << " as inherited from omp_num_threads.";
-    mkl_set_num_threads(FLAGS_caffe2_omp_num_threads);
+    mkl_set_num_threads(c10::FLAGS_caffe2_omp_num_threads);
  }

  // Override omp_num_threads if mkl_num_threads is set.
-  if (FLAGS_caffe2_mkl_num_threads > 0) {
-    VLOG(1) << "Setting mkl_num_threads to " << FLAGS_caffe2_mkl_num_threads;
-    mkl_set_num_threads(FLAGS_caffe2_mkl_num_threads);
+  if (c10::FLAGS_caffe2_mkl_num_threads > 0) {
+    VLOG(1) << "Setting mkl_num_threads to "
+            << c10::FLAGS_caffe2_mkl_num_threads;
+    mkl_set_num_threads(c10::FLAGS_caffe2_mkl_num_threads);
  }
  VLOG(1) << "Caffe2 running with " << mkl_get_max_threads() << " MKL threads";
  return true;
--- a/caffe2/core/logging.cc
+++ b/caffe2/core/logging.cc
@ -8,9 +8,11 @@

 // Common code that we use regardless of whether we use glog or not.

-CAFFE2_DEFINE_bool(caffe2_use_fatal_for_enforce, false,
-                   "If set true, when CAFFE_ENFORCE is not met, abort instead "
-                   "of throwing an exception.");
+C10_DEFINE_bool(
+    caffe2_use_fatal_for_enforce,
+    false,
+    "If set true, when CAFFE_ENFORCE is not met, abort instead "
+    "of throwing an exception.");

 namespace caffe2 {
 namespace enforce_detail {
@ -52,7 +54,7 @@ void ThrowEnforceNotMet(
    const std::string& msg,
    const void* caller) {
  at::Error e(file, line, condition, msg, (*GetFetchStackTrace())(), caller);
-  if (FLAGS_caffe2_use_fatal_for_enforce) {
+  if (c10::FLAGS_caffe2_use_fatal_for_enforce) {
    LOG(FATAL) << e.msg_stack()[0];
  }
  throw e;
@ -60,8 +62,7 @@ void ThrowEnforceNotMet(

 }  // namespace caffe2

-
-#ifdef CAFFE2_USE_GFLAGS
+#ifdef C10_USE_GFLAGS
 // When GLOG depends on GFLAGS, these variables are being defined in GLOG
 // directly via the GFLAGS definition, so we will use DECLARE_* to declare
 // them, and use them in Caffe2.
@ -74,11 +75,10 @@ DECLARE_bool(logtostderr);
 #elif !CAFFE2_MOBILE && !__APPLE__ && !defined(_WIN32)
 // Declare our own versions of the above flags so we don't error out
 // when they are passed into Caffe2.
-CAFFE2_DEFINE_int(minloglevel, 0, "Equivalent to glog minloglevel");
-CAFFE2_DEFINE_int(v, 0, "Equivalent to glog verbose");
-CAFFE2_DEFINE_bool(logtostderr, false, "Equivalent to glog logtostderr");
-#endif // CAFFE2_USE_GFLAGS
-
+C10_DEFINE_int(minloglevel, 0, "Equivalent to glog minloglevel");
+C10_DEFINE_int(v, 0, "Equivalent to glog verbose");
+C10_DEFINE_bool(logtostderr, false, "Equivalent to glog logtostderr");
+#endif // C10_USE_GFLAGS

 #ifdef CAFFE2_USE_GOOGLE_GLOG

@ -92,9 +92,10 @@ using fLI::FLAGS_v;
 using fLB::FLAGS_logtostderr;
 }  // namespace caffe2

-
-CAFFE2_DEFINE_int(caffe2_log_level, google::GLOG_ERROR,
-                  "The minimum log level that caffe2 will output.");
+C10_DEFINE_int(
+    caffe2_log_level,
+    google::GLOG_ERROR,
+    "The minimum log level that caffe2 will output.");

 // Google glog's api does not have an external function that allows one to check
 // if glog is initialized or not. It does have an internal function - so we are
@ -128,14 +129,14 @@ bool InitCaffeLogging(int* argc, char** argv) {
 void UpdateLoggingLevelsFromFlags() {
  // If caffe2_log_level is set and is lower than the min log level by glog,
  // we will transfer the caffe2_log_level setting to glog to override that.
-  FLAGS_minloglevel = std::min(FLAGS_caffe2_log_level, FLAGS_minloglevel);
+  FLAGS_minloglevel = std::min(c10::FLAGS_caffe2_log_level, FLAGS_minloglevel);
  // If caffe2_log_level is explicitly set, let's also turn on logtostderr.
-  if (FLAGS_caffe2_log_level < google::GLOG_ERROR) {
+  if (c10::FLAGS_caffe2_log_level < google::GLOG_ERROR) {
    FLAGS_logtostderr = 1;
  }
  // Also, transfer the caffe2_log_level verbose setting to glog.
-  if (FLAGS_caffe2_log_level < 0) {
-    FLAGS_v = std::min(FLAGS_v, -FLAGS_caffe2_log_level);
+  if (c10::FLAGS_caffe2_log_level < 0) {
+    FLAGS_v = std::min(FLAGS_v, -c10::FLAGS_caffe2_log_level);
  }
 }

@ -151,24 +152,27 @@ void ShowLogInfoToStderr() {
 #include <android/log.h>
 #endif // ANDROID

-CAFFE2_DEFINE_int(caffe2_log_level, ERROR,
-                  "The minimum log level that caffe2 will output.");
+C10_DEFINE_int(
+    caffe2_log_level,
+    ERROR,
+    "The minimum log level that caffe2 will output.");

 namespace caffe2 {
 bool InitCaffeLogging(int* argc, char** argv) {
  // When doing InitCaffeLogging, we will assume that caffe's flag paser has
  // already finished.
  if (*argc == 0) return true;
-  if (!CommandLineFlagsHasBeenParsed()) {
+  if (!c10::CommandLineFlagsHasBeenParsed()) {
    std::cerr << "InitCaffeLogging() has to be called after "
-                 "ParseCaffeCommandLineFlags. Modify your program to make sure "
-                 "of this." << std::endl;
+                 "c10::ParseCommandLineFlags. Modify your program to make sure "
+                 "of this."
+              << std::endl;
    return false;
  }
-  if (FLAGS_caffe2_log_level > FATAL) {
+  if (c10::FLAGS_caffe2_log_level > FATAL) {
    std::cerr << "The log level of Caffe2 has to be no larger than FATAL("
              << FATAL << "). Capping it to FATAL." << std::endl;
-    FLAGS_caffe2_log_level = FATAL;
+    c10::FLAGS_caffe2_log_level = FATAL;
  }
  return true;
 }
@ -177,12 +181,12 @@ void UpdateLoggingLevelsFromFlags() {
 }

 void ShowLogInfoToStderr() {
-  FLAGS_caffe2_log_level = INFO;
+  c10::FLAGS_caffe2_log_level = INFO;
 }

 MessageLogger::MessageLogger(const char *file, int line, int severity)
  : severity_(severity) {
-  if (severity_ < FLAGS_caffe2_log_level) {
+  if (severity_ < c10::FLAGS_caffe2_log_level) {
    // Nothing needs to be logged.
    return;
  }
@ -212,7 +216,7 @@ MessageLogger::MessageLogger(const char *file, int line, int severity)

 // Output the contents of the stream to the proper channel on destruction.
 MessageLogger::~MessageLogger() {
-  if (severity_ < FLAGS_caffe2_log_level) {
+  if (severity_ < c10::FLAGS_caffe2_log_level) {
    // Nothing needs to be logged.
    return;
  }
@ -235,7 +239,7 @@ MessageLogger::~MessageLogger() {
    __android_log_print(ANDROID_LOG_FATAL, tag_, "terminating.\n");
  }
 #else  // !ANDROID
-  if (severity_ >= FLAGS_caffe2_log_level) {
+  if (severity_ >= c10::FLAGS_caffe2_log_level) {
    // If not building on Android, log all output to std::cerr.
    std::cerr << stream_.str();
    // Simulating the glog default behavior: if the severity is above INFO,
--- a/caffe2/core/logging.h
+++ b/caffe2/core/logging.h
@ -27,8 +27,8 @@
 #include "caffe2/core/logging_is_not_google_glog.h"
 #endif // CAFFE2_USE_GOOGLE_GLOG

-CAFFE2_DECLARE_int(caffe2_log_level);
-CAFFE2_DECLARE_bool(caffe2_use_fatal_for_enforce);
+C10_DECLARE_int(caffe2_log_level);
+C10_DECLARE_bool(caffe2_use_fatal_for_enforce);

 namespace caffe2 {
 // Functions that we use for initialization.
--- a/caffe2/core/logging_test.cc
+++ b/caffe2/core/logging_test.cc
@ -12,14 +12,14 @@ TEST(LoggingTest, TestEnforceTrue) {

 TEST(LoggingTest, TestEnforceFalse) {
  bool kFalse = false;
-  std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
+  std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
  try {
    CAFFE_ENFORCE(false, "This throws.");
    // This should never be triggered.
    ADD_FAILURE();
  } catch (const EnforceNotMet&) {
  }
-  std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse);
+  std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kFalse);
 }

 TEST(LoggingTest, TestEnforceEquals) {
@ -76,9 +76,9 @@ TEST(LoggingTest, Join) {
 #if GTEST_HAS_DEATH_TEST
 TEST(LoggingDeathTest, TestEnforceUsingFatal) {
  bool kTrue = true;
-  std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
+  std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
  EXPECT_DEATH(CAFFE_ENFORCE(false, "This goes fatal."), "");
-  std::swap(FLAGS_caffe2_use_fatal_for_enforce, kTrue);
+  std::swap(c10::FLAGS_caffe2_use_fatal_for_enforce, kTrue);
 }
 #endif

--- a/caffe2/core/macros.h.in
+++ b/caffe2/core/macros.h.in
@ -38,7 +38,6 @@ static_assert(
 #cmakedefine CAFFE2_USE_CUDNN
 #cmakedefine CAFFE2_USE_EIGEN_FOR_BLAS
 #cmakedefine CAFFE2_USE_FBCODE
-#cmakedefine CAFFE2_USE_GFLAGS
 #cmakedefine CAFFE2_USE_GOOGLE_GLOG
 #cmakedefine CAFFE2_USE_LITE_PROTO
 #cmakedefine CAFFE2_USE_MKL
--- a/caffe2/core/net.cc
+++ b/caffe2/core/net.cc
@ -12,7 +12,7 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    caffe2_override_executor,
    "",
    "Comma-separated list of executor overrides");
@ -115,7 +115,7 @@ const std::unordered_map<std::string, std::string>& defaultOverrides() {
 }

 void checkExecutorOverride(std::string& net_type) {
-  auto executors = caffe2::split(',', FLAGS_caffe2_override_executor);
+  auto executors = caffe2::split(',', c10::FLAGS_caffe2_override_executor);
  CAFFE_ENFORCE(
      executors.size() % 2 == 0, "Invalid override executors flag value");
  std::unordered_map<std::string, std::string> overrides;
--- a/caffe2/core/net.h
+++ b/caffe2/core/net.h
@ -21,7 +21,7 @@
 #include "caffe2/utils/simple_queue.h"
 #include "caffe2/utils/thread_pool.h"

-CAFFE2_DECLARE_string(caffe2_override_executor);
+C10_DECLARE_string(caffe2_override_executor);

 namespace caffe2 {

--- a/caffe2/core/net_async_base.cc
+++ b/caffe2/core/net_async_base.cc
@ -5,50 +5,50 @@
 #include "caffe2/core/timer.h"

 // experimental support for multiple streams per worker per GPU
-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_streams_per_gpu,
    1,
    "Number of streams per worker per GPU"
    " to use in GPU thread pool (experimental)");

-CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
+C10_DECLARE_bool(caffe2_dag_net_collect_stats);

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_finish_chain,
    false,
    "Wait for chain to finish");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_always_schedule_child,
    false,
    "Always schedule child chains from parent chain");

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_net_async_max_gpus,
    16,
    "Max number of GPUs allowed in net async executor");

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_net_async_max_numa_nodes,
    8,
    "Max number of NUMA nodes allowed in net async executor");

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_net_async_cpu_pool_size,
    0,
    "Number of threads in CPU pool by default");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_check_stream_status,
    false,
    "Select next non-busy stream");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_use_single_pool,
    false,
    "Use single thread pool for all devices");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_use_per_net_pools,
    false,
    "Use per net thread pools");
@ -152,14 +152,14 @@ TaskThreadPool* AsyncNetBase::pool(const DeviceOption& device_option) {
    }
    CAFFE_ENFORCE_LT(
        numa_node_id,
-        FLAGS_caffe2_net_async_max_numa_nodes,
+        c10::FLAGS_caffe2_net_async_max_numa_nodes,
        "Invalid NUMA node id: ",
        numa_node_id);
    return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_);
  } else if (device_option.device_type() == PROTO_CUDA) {
    auto gpu_id = device_option.cuda_gpu_id();
    CAFFE_ENFORCE(
-        gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
+        gpu_id >= 0 && gpu_id < c10::FLAGS_caffe2_net_async_max_gpus,
        "Invalid GPU id: " + caffe2::to_string(gpu_id));
    return poolGetter(gpu_pools_, PROTO_CUDA, gpu_id, num_workers_);
  } else {
@ -432,8 +432,8 @@ GetAsyncNetCPUThreadPool(int numa_node_id, int pool_size, bool create_new) {
  static std::mutex pool_mutex;

  if (pool_size <= 0) {
-    if (FLAGS_caffe2_net_async_cpu_pool_size > 0) {
-      pool_size = FLAGS_caffe2_net_async_cpu_pool_size;
+    if (c10::FLAGS_caffe2_net_async_cpu_pool_size > 0) {
+      pool_size = c10::FLAGS_caffe2_net_async_cpu_pool_size;
      LOG(INFO) << "Using default CPU pool size: " << pool_size
                << "; NUMA node id: " << numa_node_id;
    } else {
@ -495,12 +495,12 @@ void AsyncNetBase::computeExecutionModeFlags() {
    use_per_net_pools_ = true;
    is_blocking_ = true;
  } else {
-    streams_per_gpu_ = FLAGS_caffe2_streams_per_gpu;
-    finish_chain_ = FLAGS_caffe2_net_async_finish_chain;
-    always_schedule_child_ = FLAGS_caffe2_net_async_always_schedule_child;
-    check_stream_status_ = FLAGS_caffe2_net_async_check_stream_status;
-    use_single_pool_ = FLAGS_caffe2_net_async_use_single_pool;
-    use_per_net_pools_ = FLAGS_caffe2_net_async_use_per_net_pools;
+    streams_per_gpu_ = c10::FLAGS_caffe2_streams_per_gpu;
+    finish_chain_ = c10::FLAGS_caffe2_net_async_finish_chain;
+    always_schedule_child_ = c10::FLAGS_caffe2_net_async_always_schedule_child;
+    check_stream_status_ = c10::FLAGS_caffe2_net_async_check_stream_status;
+    use_single_pool_ = c10::FLAGS_caffe2_net_async_use_single_pool;
+    use_per_net_pools_ = c10::FLAGS_caffe2_net_async_use_per_net_pools;
    is_blocking_ = false;
  }
 }
--- a/caffe2/core/net_async_base.h
+++ b/caffe2/core/net_async_base.h
@ -13,15 +13,15 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/thread_pool.h"

-CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
-CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
-CAFFE2_DECLARE_bool(caffe2_net_async_always_schedule_child);
-CAFFE2_DECLARE_int(caffe2_net_async_max_gpus);
-CAFFE2_DECLARE_int(caffe2_net_async_max_numa_nodes);
-CAFFE2_DECLARE_int(caffe2_net_async_cpu_pool_size);
-CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
-CAFFE2_DECLARE_bool(caffe2_net_async_use_single_pool);
-CAFFE2_DECLARE_bool(caffe2_net_async_use_per_net_pools);
+C10_DECLARE_int(caffe2_streams_per_gpu);
+C10_DECLARE_bool(caffe2_net_async_finish_chain);
+C10_DECLARE_bool(caffe2_net_async_always_schedule_child);
+C10_DECLARE_int(caffe2_net_async_max_gpus);
+C10_DECLARE_int(caffe2_net_async_max_numa_nodes);
+C10_DECLARE_int(caffe2_net_async_cpu_pool_size);
+C10_DECLARE_bool(caffe2_net_async_check_stream_status);
+C10_DECLARE_bool(caffe2_net_async_use_single_pool);
+C10_DECLARE_bool(caffe2_net_async_use_per_net_pools);

 namespace caffe2 {

--- a/caffe2/core/net_async_dag_gpu.cc
+++ b/caffe2/core/net_async_dag_gpu.cc
@ -17,20 +17,20 @@
 #include <nvToolsExt.h>
 #endif

-CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
+C10_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_async_dag_use_multiple_streams,
    false,
    "Use multiple streams per thread");

-CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
+C10_DECLARE_bool(caffe2_dag_net_collect_stats);

-CAFFE2_DECLARE_bool(caffe2_net_async_finish_chain);
+C10_DECLARE_bool(caffe2_net_async_finish_chain);

-CAFFE2_DECLARE_int(caffe2_streams_per_gpu);
+C10_DECLARE_int(caffe2_streams_per_gpu);

-CAFFE2_DECLARE_bool(caffe2_net_async_check_stream_status);
+C10_DECLARE_bool(caffe2_net_async_check_stream_status);

 namespace caffe2 {

@ -48,7 +48,7 @@ constexpr Color kWaitColor = 0x0066FF33; // green
 class ProfiledRange {
 public:
  ProfiledRange(const OperatorDef& def, Color color) {
-    if (!FLAGS_caffe2_use_nvtx) {
+    if (!c10::FLAGS_caffe2_use_nvtx) {
      return;
    }
    nvtxEventAttributes_t eventAttrib = {0};
@ -63,7 +63,7 @@ class ProfiledRange {
  }

  ~ProfiledRange() {
-    if (!FLAGS_caffe2_use_nvtx) {
+    if (!c10::FLAGS_caffe2_use_nvtx) {
      return;
    }
    nvtxRangeEnd(range_);
@ -119,8 +119,8 @@ int AsyncDAGNet::stream(const DeviceOption& device_option) {
    }
    do {
      stream_id = stream_counters_[gpu_id]++;
-      stream_counters_[gpu_id] %= FLAGS_caffe2_streams_per_gpu;
-    } while (FLAGS_caffe2_net_async_check_stream_status &&
+      stream_counters_[gpu_id] %= c10::FLAGS_caffe2_streams_per_gpu;
+    } while (c10::FLAGS_caffe2_net_async_check_stream_status &&
             !CUDAContext::IsStreamFree(device_option, stream_id));
  }
  return stream_id;
@ -141,7 +141,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
      "None of the parent is recorded for an event.");

  int stream_id = 0;
-  if (FLAGS_caffe2_async_dag_use_multiple_streams) {
+  if (c10::FLAGS_caffe2_async_dag_use_multiple_streams) {
    stream_id = stream(
        operator_nodes_[source_idx].operator_->event().GetDeviceOption());
  }
@ -158,7 +158,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
    operator_nodes_[source_idx].operator_->WaitEvents(parent_events, stream_id);
  }

-  if (FLAGS_caffe2_dag_net_collect_stats) {
+  if (c10::FLAGS_caffe2_dag_net_collect_stats) {
    const auto& device_option =
        operator_nodes_[source_idx].operator_->event().GetDeviceOption();
    CAFFE_EVENT(
@ -184,7 +184,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
  }

  const auto& sink_idx = chain.back();
-  if (success && FLAGS_caffe2_net_async_finish_chain) {
+  if (success && c10::FLAGS_caffe2_net_async_finish_chain) {
    operator_nodes_[sink_idx].operator_->event().Finish();
  }
  CAFFE_ENFORCE(
@ -194,7 +194,7 @@ bool AsyncDAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
      " should not be recorded.");
  eventRecorded_[sink_idx] = 1;

-  if (FLAGS_caffe2_dag_net_collect_stats) {
+  if (c10::FLAGS_caffe2_dag_net_collect_stats) {
    const auto& device_option =
        operator_nodes_[source_idx].operator_->event().GetDeviceOption();
    CAFFE_EVENT(
--- a/caffe2/core/net_async_gpu_thread_pool_gpu.cc
+++ b/caffe2/core/net_async_gpu_thread_pool_gpu.cc
@ -2,7 +2,7 @@

 #include "caffe2/core/context_gpu.h"

-CAFFE2_DEFINE_int(caffe2_threads_per_gpu, 1, "Number of CPU threads per GPU");
+C10_DEFINE_int(caffe2_threads_per_gpu, 1, "Number of CPU threads per GPU");

 namespace caffe2 {

@ -11,17 +11,17 @@ C10_REGISTER_CREATOR(ThreadPoolRegistry, CUDA, GetAsyncNetGPUThreadPool);
 std::shared_ptr<TaskThreadPool>
 GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
  // For GPU, use per device thread pools of predefined constant size
-  if (pool_size != FLAGS_caffe2_threads_per_gpu) {
+  if (pool_size != c10::FLAGS_caffe2_threads_per_gpu) {
    LOG(INFO) << "Overriding GPU pool size: using "
-              << FLAGS_caffe2_threads_per_gpu << " threads per GPU";
+              << c10::FLAGS_caffe2_threads_per_gpu << " threads per GPU";
  }
  static std::unordered_map<int, std::weak_ptr<TaskThreadPool>> pools;
  static std::mutex pool_mutex;

  if (create_new) {
-    LOG(INFO) << "Created new GPU pool, size: " << FLAGS_caffe2_threads_per_gpu
-              << "; GPU id: " << gpu_id;
-    return std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
+    LOG(INFO) << "Created new GPU pool, size: "
+              << c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
+    return std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
  } else {
    std::lock_guard<std::mutex> lock(pool_mutex);

@ -31,9 +31,9 @@ GetAsyncNetGPUThreadPool(int gpu_id, int pool_size, bool create_new) {
    }
    if (!shared_pool) {
      LOG(INFO) << "Created shared GPU pool, size: "
-                << FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
+                << c10::FLAGS_caffe2_threads_per_gpu << "; GPU id: " << gpu_id;
      shared_pool =
-          std::make_shared<TaskThreadPool>(FLAGS_caffe2_threads_per_gpu);
+          std::make_shared<TaskThreadPool>(c10::FLAGS_caffe2_threads_per_gpu);
      pools[gpu_id] = shared_pool;
    }
    return shared_pool;
--- a/caffe2/core/net_async_polling.cc
+++ b/caffe2/core/net_async_polling.cc
@ -3,7 +3,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/core/timer.h"

-CAFFE2_DECLARE_bool(caffe2_dag_net_collect_stats);
+C10_DECLARE_bool(caffe2_dag_net_collect_stats);

 namespace caffe2 {

@ -36,7 +36,7 @@ bool AsyncPollingNet::DoRunAsync() {

  Timer timer;
  bool success = pollAndSchedule();
-  if (FLAGS_caffe2_dag_net_collect_stats) {
+  if (c10::FLAGS_caffe2_dag_net_collect_stats) {
    CAFFE_EVENT(stats_[PROTO_CPU], poll_time_ms, timer.MilliSeconds());
  }
  if (!success) {
@ -49,14 +49,14 @@ bool AsyncPollingNet::DoRunAsync() {
 }

 void AsyncPollingNet::schedule(int task_id) {
-  if (FLAGS_caffe2_dag_net_collect_stats) {
+  if (c10::FLAGS_caffe2_dag_net_collect_stats) {
    task_timers_[task_id]->Start();
  }
  const auto& device_option = event(task_id).GetDeviceOption();
  pool(device_option)->run([this, task_id, device_option]() {
    int stream_id = stream(task_id);

-    if (FLAGS_caffe2_dag_net_collect_stats) {
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
      CAFFE_EVENT(
          stats_[device_option.device_type()],
          task_pool_wait_time_us,
@ -64,7 +64,7 @@ void AsyncPollingNet::schedule(int task_id) {
    }

    try {
-      if (FLAGS_caffe2_dag_net_collect_stats) {
+      if (c10::FLAGS_caffe2_dag_net_collect_stats) {
        Timer run_time;
        run(task_id, stream_id);
        CAFFE_EVENT(
@ -104,7 +104,7 @@ bool AsyncPollingNet::pollAndSchedule() {
    std::unordered_set<int> next_tasks;
    updated_tasks.reserve(current_tasks.size());

-    if (FLAGS_caffe2_dag_net_collect_stats) {
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
      timer.Start();
    }
    if (has_chain_failed_) {
@ -121,7 +121,7 @@ bool AsyncPollingNet::pollAndSchedule() {

      if (prev_status != status_[task_id]) {
        updated_tasks.insert(task_id);
-        if (FLAGS_caffe2_dag_net_collect_stats) {
+        if (c10::FLAGS_caffe2_dag_net_collect_stats) {
          updateTaskStats(task_id);
        }
      }
@ -130,7 +130,7 @@ bool AsyncPollingNet::pollAndSchedule() {
        next_tasks.insert(task_id);
      }
    }
-    if (FLAGS_caffe2_dag_net_collect_stats) {
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
      CAFFE_EVENT(
          stats_[PROTO_CPU], poll_status_update_time_us, timer.MicroSeconds());
    }
--- a/caffe2/core/net_async_scheduling.cc
+++ b/caffe2/core/net_async_scheduling.cc
@ -2,7 +2,7 @@

 #include "caffe2/core/net_async_tracing.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_net_async_optimize_polling,
    true,
    "Use event callbacks whenever possible instead of polling");
@ -93,7 +93,7 @@ void AsyncSchedulingNet::schedule(int task_id, bool run_inline) {
              if (!canSchedule(parent_id, child_id)) {
                // we can't schedule a child because of this parent,
                // check if parent supports callback
-                if (FLAGS_caffe2_net_async_optimize_polling &&
+                if (c10::FLAGS_caffe2_net_async_optimize_polling &&
                    parent_event.SupportsCallback()) {
                  parents_with_callback.push_back(parent_id);
                } else {
--- a/caffe2/core/net_async_tracing.cc
+++ b/caffe2/core/net_async_tracing.cc
@ -19,21 +19,21 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/string_utils.h"

-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    caffe2_net_async_tracing_filepath,
    "/tmp",
    "Path to save tracing information");

-CAFFE2_DEFINE_string(
+C10_DEFINE_string(
    caffe2_net_async_names_to_trace,
    "",
    "Comma-separated list of net names to trace");

-CAFFE2_DEFINE_int(caffe2_net_async_tracing_nth, 100, "Trace every Nth batch");
+C10_DEFINE_int(caffe2_net_async_tracing_nth, 100, "Trace every Nth batch");

 // For every Nth iterations, we will dump the tracing results to a json file
 // The file is appended with the iteration number.
-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_net_async_tracing_dumping_nth,
    10000,
    "Dump profiling result file every Nth batch");
@ -56,7 +56,7 @@ int getCounterForNetName(const std::string& net_name) {
 Tracer::Tracer(const NetBase* net, const std::string& net_name)
    : net_(net), filename_(net_name), iter_(0) {
  std::replace(filename_.begin(), filename_.end(), '/', '_');
-  filename_ = FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
+  filename_ = c10::FLAGS_caffe2_net_async_tracing_filepath + "/" + filename_ +
      +"_id_" + caffe2::to_string(getCounterForNetName(net_name));
  timer_.Start();
 }
@ -375,7 +375,8 @@ int getUniqueShardId(const OperatorDef& op_def) {
 }

 bool isTraceableNetName(const std::string& net_name) {
-  auto tracing_nets = caffe2::split(',', FLAGS_caffe2_net_async_names_to_trace);
+  auto tracing_nets =
+      caffe2::split(',', c10::FLAGS_caffe2_net_async_names_to_trace);
  return !net_name.empty() &&
      std::find(tracing_nets.begin(), tracing_nets.end(), net_name) !=
      tracing_nets.end();
@ -403,10 +404,10 @@ bool startIter(const std::shared_ptr<Tracer>& tracer) {
    return false;
  }
  auto iter = tracer->bumpIter();
-  auto is_enabled = iter % FLAGS_caffe2_net_async_tracing_nth == 0;
+  auto is_enabled = iter % c10::FLAGS_caffe2_net_async_tracing_nth == 0;
  tracer->setEnabled(is_enabled);
-  if (iter % FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
-    int dumping_iter = iter / FLAGS_caffe2_net_async_tracing_dumping_nth;
+  if (iter % c10::FLAGS_caffe2_net_async_tracing_dumping_nth == 0) {
+    int dumping_iter = iter / c10::FLAGS_caffe2_net_async_tracing_dumping_nth;
    tracer->dumpTracingResultAndClearEvents(caffe2::to_string(dumping_iter));
  }
  return is_enabled;
--- a/caffe2/core/net_async_tracing.h
+++ b/caffe2/core/net_async_tracing.h
@ -22,9 +22,9 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/core/timer.h"

-CAFFE2_DECLARE_string(caffe2_net_async_tracing_filepath);
-CAFFE2_DECLARE_string(caffe2_net_async_names_to_trace);
-CAFFE2_DECLARE_int(caffe2_net_async_tracing_nth);
+C10_DECLARE_string(caffe2_net_async_tracing_filepath);
+C10_DECLARE_string(caffe2_net_async_names_to_trace);
+C10_DECLARE_int(caffe2_net_async_tracing_nth);

 namespace caffe2 {
 namespace tracing {
--- a/caffe2/core/net_dag.cc
+++ b/caffe2/core/net_dag.cc
@ -13,12 +13,12 @@
 #include "caffe2/utils/proto_utils.h"
 #include "caffe2/utils/thread_name.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_disable_chaining,
    false,
    "Disable chaining logic (some latent multi-device issues).");

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_dag_net_collect_stats,
    false,
    "Collect time stats in DAG net");
@ -35,7 +35,7 @@ DAGNetBase::DAGNetBase(
  operator_nodes_ = dag_utils::prepareOperatorNodes(net_def, ws);

  execution_chains_ =
-      (FLAGS_caffe2_disable_chaining
+      (c10::FLAGS_caffe2_disable_chaining
           ? dag_utils::singleChains(operator_nodes_)
           : dag_utils::computeChains(operator_nodes_));

@ -127,7 +127,7 @@ bool DAGNetBase::DoRunAsync() {
  }
  // Kickstart the job queue.
  for (auto& value : initial_frontier_) {
-    if (FLAGS_caffe2_dag_net_collect_stats) {
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
      task_timers_[value]->Start();
    }
    job_queue_->Push(value);
@ -213,7 +213,7 @@ void DAGNetBase::WorkerFunction() {
    if (!job_queue_->Pop(&idx)) {
      return;
    }
-    if (FLAGS_caffe2_dag_net_collect_stats) {
+    if (c10::FLAGS_caffe2_dag_net_collect_stats) {
      auto device_option =
          operator_nodes_[idx].operator_->event().GetDeviceOption();
      CAFFE_EVENT(
@ -295,7 +295,7 @@ void DAGNetBase::WorkerFunction() {
      // Can't do this inline because it can race with another thread
      // calling NoMoreJobs(). So the lock needs to be held on push.
      for (const auto idx : chains_to_queue) {
-        if (FLAGS_caffe2_dag_net_collect_stats) {
+        if (c10::FLAGS_caffe2_dag_net_collect_stats) {
          task_timers_[idx]->Start();
        }
        job_queue_->Push(idx);
@ -329,7 +329,7 @@ bool DAGNet::RunAt(int chain_id, const std::vector<int>& chain) {
      return false;
    }
  }
-  if (FLAGS_caffe2_dag_net_collect_stats) {
+  if (c10::FLAGS_caffe2_dag_net_collect_stats) {
    auto device_option =
        operator_nodes_[chain_id].operator_->event().GetDeviceOption();
    CAFFE_EVENT(
--- a/caffe2/core/net_gpu_test.cc
+++ b/caffe2/core/net_gpu_test.cc
@ -5,7 +5,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/core/scope_guard.h"

-CAFFE2_DECLARE_bool(caffe2_disable_chaining);
+C10_DECLARE_bool(caffe2_disable_chaining);

 namespace caffe2 {

@ -79,9 +79,9 @@ void checkChainingAndRun(
  CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
  {
    net_def.set_num_workers(4);
-    auto old = FLAGS_caffe2_disable_chaining;
-    auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
-    FLAGS_caffe2_disable_chaining = false;
+    auto old = c10::FLAGS_caffe2_disable_chaining;
+    auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
+    c10::FLAGS_caffe2_disable_chaining = false;

    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
    auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
--- a/caffe2/core/net_simple.cc
+++ b/caffe2/core/net_simple.cc
@ -12,7 +12,7 @@
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/utils/proto_utils.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_simple_net_benchmark_run_whole_net,
    true,
    "If false, whole net passes won't be performed");
@ -108,7 +108,7 @@ vector<float> SimpleNet::TEST_Benchmark(
      ".");
  Timer timer;
  auto millis = timer.MilliSeconds();
-  if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
+  if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
    for (int i = 0; i < main_runs; ++i) {
      CAFFE_ENFORCE(Run(), "Main run ", i, " has failed.");
    }
@ -270,7 +270,7 @@ vector<float> SimpleNet::TEST_Benchmark(
  for (size_t i = 0; i < time_per_op.size(); ++i) {
    time_per_op[i] /= main_runs;
  }
-  if (FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
+  if (c10::FLAGS_caffe2_simple_net_benchmark_run_whole_net) {
    time_per_op.insert(time_per_op.begin(), millis / main_runs);
  }
  return time_per_op;
--- a/caffe2/core/net_test.cc
+++ b/caffe2/core/net_test.cc
@ -7,7 +7,7 @@

 #include <google/protobuf/text_format.h>

-CAFFE2_DECLARE_bool(caffe2_disable_chaining);
+C10_DECLARE_bool(caffe2_disable_chaining);

 namespace caffe2 {

@ -150,9 +150,9 @@ void checkChainingAndRun(
      ::google::protobuf::TextFormat::ParseFromString(spec, &net_def));
  {
    net_def.set_num_workers(4);
-    auto old = FLAGS_caffe2_disable_chaining;
-    auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
-    FLAGS_caffe2_disable_chaining = false;
+    auto old = c10::FLAGS_caffe2_disable_chaining;
+    auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
+    c10::FLAGS_caffe2_disable_chaining = false;

    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
    auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
@ -177,9 +177,9 @@ void checkNumChainsAndRun(const char* spec, const int expected_num_chains) {
  }

  {
-    auto old = FLAGS_caffe2_disable_chaining;
-    auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
-    FLAGS_caffe2_disable_chaining = false;
+    auto old = c10::FLAGS_caffe2_disable_chaining;
+    auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
+    c10::FLAGS_caffe2_disable_chaining = false;

    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
    auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
@ -572,9 +572,9 @@ TEST(NetTest, DISABLED_FailingOperator) {

  {
    net_def.set_num_workers(4);
-    auto old = FLAGS_caffe2_disable_chaining;
-    auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
-    FLAGS_caffe2_disable_chaining = false;
+    auto old = c10::FLAGS_caffe2_disable_chaining;
+    auto g = MakeGuard([&]() { c10::FLAGS_caffe2_disable_chaining = old; });
+    c10::FLAGS_caffe2_disable_chaining = false;

    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
    for (int i = 0; i < 10; i++) {
@ -684,9 +684,9 @@ TEST(NetTest, ExecutorOverride) {

  {
    Workspace ws;
-    auto old = FLAGS_caffe2_override_executor;
-    auto g = MakeGuard([&]() { FLAGS_caffe2_override_executor = old; });
-    FLAGS_caffe2_override_executor = "dag,async_scheduling";
+    auto old = c10::FLAGS_caffe2_override_executor;
+    auto g = MakeGuard([&]() { c10::FLAGS_caffe2_override_executor = old; });
+    c10::FLAGS_caffe2_override_executor = "dag,async_scheduling";

    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
    auto async_net =
--- a/caffe2/core/numa.cc
+++ b/caffe2/core/numa.cc
@ -1,9 +1,6 @@
 #include "caffe2/core/numa.h"

-CAFFE2_DEFINE_bool(
-    caffe2_cpu_numa_enabled,
-    false,
-    "Use NUMA whenever possible.");
+C10_DEFINE_bool(caffe2_cpu_numa_enabled, false, "Use NUMA whenever possible.");

 #if defined(__linux__) && !defined(CAFFE2_DISABLE_NUMA) && CAFFE2_MOBILE == 0
 #include <numa.h>
@ -15,7 +12,7 @@ namespace caffe2 {

 #ifdef CAFFE2_NUMA_ENABLED
 bool IsNUMAEnabled() {
-  return FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
+  return c10::FLAGS_caffe2_cpu_numa_enabled && numa_available() >= 0;
 }

 void NUMABind(int numa_node_id) {
--- a/caffe2/core/numa.h
+++ b/caffe2/core/numa.h
@ -3,7 +3,7 @@

 #include "caffe2/core/logging.h"

-CAFFE2_DECLARE_bool(caffe2_cpu_numa_enabled);
+C10_DECLARE_bool(caffe2_cpu_numa_enabled);

 namespace caffe2 {

--- a/caffe2/core/operator.cc
+++ b/caffe2/core/operator.cc
@ -16,11 +16,11 @@

 #include "caffe2/core/operator_c10wrapper.h"

-CAFFE2_DEFINE_int(
+C10_DEFINE_int(
    caffe2_operator_max_engine_name_length,
    10,
    "Maximum engine name length to be stored");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_disable_implicit_engine_preference,
    false,
    "If set, disable implicit engine preferences. This is useful for unit "
@ -151,7 +151,7 @@ unique_ptr<OperatorBase> _CreateOperator(
    const auto op_def_engines = split(',', operator_def.engine());
    engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
  }
-  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
+  if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
      g_per_op_engine_pref().count(device_type) &&
      g_per_op_engine_pref()[device_type].count(op_type)) {
    const auto& preferred_engines =
@ -160,7 +160,7 @@ unique_ptr<OperatorBase> _CreateOperator(
    engines.insert(
        engines.end(), preferred_engines.begin(), preferred_engines.end());
  }
-  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
+  if (!c10::FLAGS_caffe2_disable_implicit_engine_preference &&
      g_global_engine_pref().count(device_type)) {
    const auto& preferred_engines = g_global_engine_pref()[device_type];
    VLOG(2) << "Inserting global engine preference: " << preferred_engines;
@ -173,11 +173,12 @@ unique_ptr<OperatorBase> _CreateOperator(
            << engine;
    auto op = TryCreateOperator(key, operator_def, ws);
    if (op) {
-      if (engine.size() <= (unsigned)FLAGS_caffe2_operator_max_engine_name_length) {
+      if (engine.size() <=
+          (unsigned)c10::FLAGS_caffe2_operator_max_engine_name_length) {
        op->annotate_engine(engine);
      } else {
-        op->annotate_engine(
-            engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
+        op->annotate_engine(engine.substr(
+            0, c10::FLAGS_caffe2_operator_max_engine_name_length));
      }
      return op;
    } else {
--- a/caffe2/core/operator.h
+++ b/caffe2/core/operator.h
@ -705,7 +705,7 @@ struct DispatchHelper<FixedValues<>, ExtraArgs...> {
  }
 };

-#define CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(                                 \
+#define C10_DEFINE_TENSOR_TYPES_DISPATCHER(                                    \
    TensorTypes, DoRunWithType, DoRunWithOtherType)                            \
  template <typename FirstType, typename... Types, typename... ExtraArgs>      \
  struct DispatchHelper<TensorTypes<FirstType, Types...>, ExtraArgs...> {      \
@ -763,15 +763,15 @@ struct DispatchHelper<FixedValues<>, ExtraArgs...> {
      return call<Op>(op, blob.meta());                                        \
    }                                                                          \
  };
-CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(
+C10_DEFINE_TENSOR_TYPES_DISPATCHER(
    TensorTypes,
    DoRunWithType,
    DoRunWithOtherType)
-CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(
+C10_DEFINE_TENSOR_TYPES_DISPATCHER(
    TensorTypes2,
    DoRunWithType2,
    DoRunWithOtherType2)
-#undef CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER
+#undef C10_DEFINE_TENSOR_TYPES_DISPATCHER

 // The device type registry. This works in two phases:
 // (1) gDeviceTypeRegistry() maps the device types values to the actual operator
--- a/caffe2/core/plan_executor.cc
+++ b/caffe2/core/plan_executor.cc
@ -11,7 +11,7 @@
 #include "caffe2/core/workspace.h"
 #include "caffe2/proto/caffe2_pb.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_handle_executor_threads_exceptions,
    false,
    "If used we will handle exceptions in executor threads. "
@ -422,7 +422,7 @@ bool ExecuteStepRecursive(ExecutionStepWrapper& stepWrapper) {
              LOG(ERROR) << "Parallel worker exception:\n" << first_exception;
            }
            compiledStep->gotFailure = true;
-            if (!FLAGS_caffe2_handle_executor_threads_exceptions) {
+            if (!c10::FLAGS_caffe2_handle_executor_threads_exceptions) {
              // In complex plans other threads might get stuck if another
              // one fails. So we let exception to go out of thread which
              // causes SIGABRT. In local setup one might use this flag
--- a/caffe2/core/tensor_impl.cc
+++ b/caffe2/core/tensor_impl.cc
@ -1,12 +1,12 @@
 #include "caffe2/core/tensor_impl.h"
 #include "caffe2/core/flags.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_keep_on_shrink,
    true,
    "If set, keeps memory when a tensor is shrinking its size.");

-CAFFE2_DEFINE_int64(
+C10_DEFINE_int64(
    caffe2_max_keep_on_shrink_memory,
    LLONG_MAX,
    "The maximum memory in bytes to keep on shrink, if the difference between "
--- a/caffe2/core/workspace.cc
+++ b/caffe2/core/workspace.cc
@ -11,7 +11,7 @@
 #include "caffe2/core/tensor.h"
 #include "caffe2/proto/caffe2_pb.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_print_blob_sizes_at_exit,
    false,
    "If true, workspace destructor will print all blob shapes");
--- a/caffe2/core/workspace.h
+++ b/caffe2/core/workspace.h
@ -18,7 +18,7 @@
 #include "caffe2/utils/signal_handler.h"
 #include "caffe2/utils/threadpool/ThreadPool.h"

-CAFFE2_DECLARE_bool(caffe2_print_blob_sizes_at_exit);
+C10_DECLARE_bool(caffe2_print_blob_sizes_at_exit);

 namespace caffe2 {

@ -105,7 +105,7 @@ class CAFFE2_API Workspace {
  }

  ~Workspace() {
-    if (FLAGS_caffe2_print_blob_sizes_at_exit) {
+    if (c10::FLAGS_caffe2_print_blob_sizes_at_exit) {
      PrintBlobSizes();
    }
    // This is why we have a bookkeeper_ shared_ptr instead of a naked static! A
--- a/caffe2/db/leveldb.cc
+++ b/caffe2/db/leveldb.cc
@ -4,8 +4,10 @@
 #include "leveldb/db.h"
 #include "leveldb/write_batch.h"

-CAFFE2_DEFINE_int(caffe2_leveldb_block_size, 65536,
-                  "The caffe2 leveldb block size when writing a leveldb.");
+C10_DEFINE_int(
+    caffe2_leveldb_block_size,
+    65536,
+    "The caffe2 leveldb block size when writing a leveldb.");

 namespace caffe2 {
 namespace db {
@ -58,7 +60,7 @@ class LevelDB : public DB {
 public:
  LevelDB(const string& source, Mode mode) : DB(source, mode) {
    leveldb::Options options;
-    options.block_size = FLAGS_caffe2_leveldb_block_size;
+    options.block_size = c10::FLAGS_caffe2_leveldb_block_size;
    options.write_buffer_size = 268435456;
    options.max_open_files = 100;
    options.error_if_exists = mode == NEW;
--- a/caffe2/mkl/mkl_operator.cc
+++ b/caffe2/mkl/mkl_operator.cc
@ -1,7 +1,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/proto/caffe2_pb.h"

-CAFFE2_DEFINE_bool(
+C10_DEFINE_bool(
    caffe2_mkl_memonger_in_use,
    false,
    "Turn on if memonger is used to force reallocate intermediate "
--- a/caffe2/mkl/operators/concat_op.cc
+++ b/caffe2/mkl/operators/concat_op.cc
@ -48,7 +48,7 @@ class MKLConcatOp final : public MKLOperator<T> {
      dims_changed = (input_size_cache_[i] != Input(i).dims());
    }

-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      input_size_cache_.resize(nInputs);
      int output_channels = 0;
      int canonical_axis = canonical_axis_index_(axis_, nDims);
@ -88,7 +88,7 @@ class MKLConcatOp final : public MKLOperator<T> {
    resources_[dnnResourceDst] = buffer_.buffer();
    ExecutePrimitive();
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      buffer_.Reset();
    }
    return true;
--- a/caffe2/mkl/operators/conv_op.cc
+++ b/caffe2/mkl/operators/conv_op.cc
@ -51,7 +51,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {

    bool dims_changed;
    CHECK_INPUT_FILTER_DIMS(X, filter, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      CAFFE_ENFORCE(
          C == filter.dim32(1) * group_,
          "Convolution op: input channels does not match: # of input channels ",
@ -152,7 +152,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {

    MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      // buffer_ is not shared with Y. Free memory since it'll
      // be re-allocated in the next run anyway due to memonger in use.
      buffer_.Reset();
--- a/caffe2/mkl/operators/elementwise_sum_op.cc
+++ b/caffe2/mkl/operators/elementwise_sum_op.cc
@ -27,7 +27,7 @@ class MKLSumOp final : public MKLOperator<T> {
    MKLMemory<T>* Y = Output(0);
    bool dims_changed;
    CHECK_INPUT_DIMS(X0, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      primitive_.Reset(
          dnnSumCreate<T>,
          nullptr,
@ -56,7 +56,7 @@ class MKLSumOp final : public MKLOperator<T> {
    resources_[dnnResourceDst] = buffer_.buffer();
    MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      buffer_.Reset();
    }
    return true;
--- a/caffe2/mkl/operators/fully_connected_op.cc
+++ b/caffe2/mkl/operators/fully_connected_op.cc
@ -26,7 +26,7 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {

    bool dims_changed;
    CHECK_INPUT_FILTER_DIMS(X, filter, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      const int N = filter.dim32(0);
      CAFFE_ENFORCE(N == bias.dim32(0));

@ -80,7 +80,7 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {

    MKLDNN_SAFE_CALL(mkl::dnnExecute<T>(primitive_, resources_));
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      buffer_.Reset();
    }
    return true;
--- a/caffe2/mkl/operators/local_response_normalization_op.cc
+++ b/caffe2/mkl/operators/local_response_normalization_op.cc
@ -34,7 +34,7 @@ bool MKLLRNOp<float>::RunOnDeviceWithOrderNCHW() {

  bool dims_changed;
  CHECK_INPUT_DIMS(X, dims_changed);
-  if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+  if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
    size_t dim = X.ndim();
    CAFFE_ENFORCE(4 == dim);

@ -65,7 +65,7 @@ bool MKLLRNOp<float>::RunOnDeviceWithOrderNCHW() {
  resources_[dnnResourceWorkspace] = workspace_buffer_->buffer();
  MKLDNN_SAFE_CALL(mkl::dnnExecute<float>(primitive_, resources_));
  buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-  if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+  if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
    buffer_.Reset();
  }
  return true;
--- a/caffe2/mkl/operators/pool_op.cc
+++ b/caffe2/mkl/operators/pool_op.cc
@ -58,7 +58,7 @@ bool MKLPoolOp<float>::RunOnDeviceWithOrderNCHW() {

  bool dims_changed;
  CHECK_INPUT_DIMS(X, dims_changed);
-  if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+  if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
    // We will utilize the SetOutputSize() function in the base class
    // with dummy TensorCPU input and output to calculate the sizes.
    Tensor dummy_input(X.dims(), CPU);
@ -101,7 +101,7 @@ bool MKLPoolOp<float>::RunOnDeviceWithOrderNCHW() {
  resources_[dnnResourceWorkspace] = workspace_buffer_->buffer();
  MKLDNN_SAFE_CALL(mkl::dnnExecute<float>(primitive_, resources_));
  buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-  if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+  if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
    buffer_.Reset();
  }
  return true;
--- a/caffe2/mkl/operators/relu_op.cc
+++ b/caffe2/mkl/operators/relu_op.cc
@ -19,7 +19,7 @@ class MKLReluOp : public MKLOperator<T> {

    bool dims_changed;
    CHECK_INPUT_DIMS(X, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      // First run or changed input size, will need to recreate environment
      primitive_.Reset(dnnReLUCreateForward<T>, nullptr, X.layout(), 0.f);
      if (&X != Y) {
@ -36,7 +36,7 @@ class MKLReluOp : public MKLOperator<T> {
    resources_[dnnResourceDst] = buffer_.buffer();
    ExecutePrimitive();
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      buffer_.Reset();
    }
    return true;
--- a/caffe2/mkl/operators/spatial_batch_norm_op.cc
+++ b/caffe2/mkl/operators/spatial_batch_norm_op.cc
@ -56,7 +56,7 @@ class MKLBNOp final : public Operator<MKLContext> {

    bool dims_changed;
    CHECK_INPUT_DIMS(X, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      // Create main primitive.
      if (is_test_) {
        primitive_.Reset(
@ -133,7 +133,7 @@ class MKLBNOp final : public Operator<MKLContext> {
      }
    }
    buffer_.CopyTo(Y, primitive_, dnnResourceDst);
-    if (FLAGS_caffe2_mkl_memonger_in_use && !shared) {
+    if (c10::FLAGS_caffe2_mkl_memonger_in_use && !shared) {
      buffer_.Reset();
    }
    return true;
--- a/caffe2/mkl/operators/squeeze_op.cc
+++ b/caffe2/mkl/operators/squeeze_op.cc
@ -39,7 +39,7 @@ class MKLSqueezeOp final : public MKLOperator<T> {

    bool dims_changed;
    CHECK_INPUT_DIMS(X, dims_changed);
-    if (dims_changed || FLAGS_caffe2_mkl_memonger_in_use) {
+    if (dims_changed || c10::FLAGS_caffe2_mkl_memonger_in_use) {
      // Temp buffer mainly to convert the input to plain layout before
      // Reshape() if the input has a custom layout.
      buffer_.Reset(X.dims());
--- a/caffe2/mkl/utils/mkl_memory.cc
+++ b/caffe2/mkl/utils/mkl_memory.cc
@ -5,13 +5,13 @@

 #ifdef CAFFE2_HAS_MKL_DNN

-CAFFE2_DEFINE_bool(
-    caffe2_mkl_implicit_layout_change, false,
+C10_DEFINE_bool(
+    caffe2_mkl_implicit_layout_change,
+    false,
    "Controls the behavior when we call View() on an MKLMemory: if it is set "
    "true, then the View() function will actually change the underlying "
    "storage. If it is set false, an implicit copy is triggered but the "
-    "original storage is not affected."
-    );
+    "original storage is not affected.");

 namespace caffe2 {

--- a/caffe2/mkl/utils/mkl_memory.h
+++ b/caffe2/mkl/utils/mkl_memory.h
@ -13,7 +13,7 @@
 // an MKLMemory: if it is set true, then the View() function will actually
 // change the underlying storage. If it is set false, an implicit copy is
 // triggered but the original storage is not affected.
-CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change);
+C10_DECLARE_bool(caffe2_mkl_implicit_layout_change);

 namespace caffe2 {
 namespace mkl {
@ -511,7 +511,7 @@ class C10_EXPORT MKLMemory {
          dnnConversionCreate<T>, layout_, layout_wanted);
      MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
          convert, buffer_.get(), temp_buffer));
-      if (primitive && FLAGS_caffe2_mkl_implicit_layout_change) {
+      if (primitive && c10::FLAGS_caffe2_mkl_implicit_layout_change) {
        VLOG(2) << "Implicit layout change set. "
                   "Changing the underlying storage.";
        // We will need to call Reset to set up all the member variables.
--- a/caffe2/mkl/utils/mkl_operator.h
+++ b/caffe2/mkl/utils/mkl_operator.h
@ -6,7 +6,7 @@
 #include "caffe2/mkl/utils/mkl_memory.h"
 #include "caffe2/proto/caffe2_pb.h"

-CAFFE2_DECLARE_bool(caffe2_mkl_memonger_in_use);
+C10_DECLARE_bool(caffe2_mkl_memonger_in_use);

 namespace caffe2 {

--- a/caffe2/mobile/contrib/arm-compute/test/gl_model_test.h
+++ b/caffe2/mobile/contrib/arm-compute/test/gl_model_test.h
@ -7,14 +7,13 @@
 #include "caffe2/core/workspace.h"
 #include <unordered_set>

-CAFFE2_DEFINE_int(warmup, 3, "The number of iterations to warm up.");
-CAFFE2_DEFINE_int(iter, 100, "The number of iterations to run.");
-CAFFE2_DEFINE_bool(
+C10_DEFINE_int(warmup, 3, "The number of iterations to warm up.");
+C10_DEFINE_int(iter, 100, "The number of iterations to run.");
+C10_DEFINE_bool(
    run_individual,
    true,
    "Whether to benchmark individual operators.");

-
 constexpr float tol = 0.03;
 namespace caffe2 {
  void benchmarkModel(std::string init_net_pb, std::string predict_net_pb, std::string input_name, std::vector<int> input_dims, std::string net_name="benchmark_net", std::unordered_set<std::string> cpu_ops = std::unordered_set<std::string>({})) {
@ -46,7 +45,8 @@ namespace caffe2 {
  LOG(ERROR) << "[C2DEBUG] after compareNetResult4D";
  NetBase* net = ws->CreateNet(predict_net_def_gpu);
  LOG(ERROR) << "[C2DEBUG] Benchmarking OpenGL Net";
-  net->TEST_Benchmark(caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
+  net->TEST_Benchmark(
+      c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
  // Test CPU
  for (auto i = 0; i < predict_net_def.op().size(); ++i) {
    auto op = predict_net_def.mutable_op(i);
@ -58,7 +58,7 @@ namespace caffe2 {
  predict_net_def.set_name("cpu_net");
  net = ws->CreateNet(predict_net_def);
  LOG(INFO) << "[C2DEBUG] Benchmarking CPU Net";
-  net->TEST_Benchmark(caffe2::FLAGS_warmup, caffe2::FLAGS_iter, caffe2::FLAGS_run_individual);
-
+  net->TEST_Benchmark(
+      c10::FLAGS_warmup, c10::FLAGS_iter, c10::FLAGS_run_individual);
  }
 } // namespace caffe2
--- a/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc
+++ b/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc
@ -6,7 +6,7 @@
 #include "caffe2/mobile/contrib/ios/mpscnn/mpscnn.h"
 #endif

-CAFFE2_DECLARE_bool(caffe2_force_shared_col_buffer);
+C10_DECLARE_bool(caffe2_force_shared_col_buffer);

 Caffe2IOSPredictor* Caffe2IOSPredictor::NewCaffe2IOSPredictor(const caffe2::NetDef& init_net,
                                                              const caffe2::NetDef& predict_net,
@ -49,7 +49,7 @@ Caffe2IOSPredictor::Caffe2IOSPredictor(const caffe2::NetDef& init_net,
 }

 void Caffe2IOSPredictor::run(const Tensor& inData, Tensor& outData, std::string& errorMessage) {
-  caffe2::FLAGS_caffe2_force_shared_col_buffer = true;
+  c10::FLAGS_caffe2_force_shared_col_buffer = true;
  caffe2::Tensor input(caffe2::CPU);
  input.Resize(inData.dims);
  input.ShareExternalPointer(inData.data);
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .8.2
 .8.2