From d3059b9c493827fe5825ea7485fc21bad83469ba Mon Sep 17 00:00:00 2001 From: Dmytro Dzhulgakov Date: Sun, 19 May 2019 23:01:09 -0700 Subject: [PATCH] Lightweight logging for once-only API usage --- aten/src/ATen/cuda/detail/CUDAHooks.cpp | 1 + c10/core/TensorImpl.cpp | 8 +++- c10/macros/Macros.h | 17 +++++++-- c10/util/Logging.cpp | 50 +++++++++++++++++++++---- c10/util/Logging.h | 26 +++++++++++++ c10/util/Registry.h | 12 ------ c10/util/typeid.h | 34 ++++++++--------- caffe2/core/context_gpu.cu | 1 + caffe2/core/init.cc | 1 + caffe2/core/net.cc | 1 + caffe2/core/operator.cc | 1 + caffe2/core/operator.h | 2 +- caffe2/python/pybind_state.cc | 2 + torch/csrc/Module.cpp | 17 ++++++++- torch/csrc/distributed/c10d/init.cpp | 1 + torch/csrc/jit/export.cpp | 1 + torch/csrc/jit/graph_executor.cpp | 3 +- torch/csrc/jit/import.cpp | 1 + torch/csrc/jit/python_tracer.cpp | 2 + torch/csrc/jit/script/init.cpp | 2 + torch/lib/c10d/ProcessGroup.cpp | 6 ++- torch/nn/modules/module.py | 1 + torch/optim/optimizer.py | 1 + torch/utils/data/dataloader.py | 1 + 24 files changed, 146 insertions(+), 46 deletions(-) diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp index 5bef27a29f1a..a53f6d215165 100644 --- a/aten/src/ATen/cuda/detail/CUDAHooks.cpp +++ b/aten/src/ATen/cuda/detail/CUDAHooks.cpp @@ -39,6 +39,7 @@ namespace detail { // compilation unit (alt is to have another method in hooks, but // let's not if we don't need to!) std::unique_ptr CUDAHooks::initCUDA() const { + C10_LOG_API_USAGE_ONCE("aten.init.cuda"); THCState* thc_state = THCState_alloc(); THCudaInit(thc_state); diff --git a/c10/core/TensorImpl.cpp b/c10/core/TensorImpl.cpp index 84fcb466133b..6e5bf981967c 100644 --- a/c10/core/TensorImpl.cpp +++ b/c10/core/TensorImpl.cpp @@ -48,8 +48,12 @@ TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::Ty data_type_(data_type), device_opt_(device_opt), type_id_(type_id) { - AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() == caffe2::TypeIdentifier::uninitialized() || - device_opt_.has_value()); + if (type_id != UndefinedTensorId()) { + AT_ASSERT(data_type.id() == caffe2::TypeIdentifier::uninitialized() || + device_opt_.has_value()); + // UndefinedTensorImpl is a singleton, so we skip logging it + C10_LOG_API_USAGE_ONCE("tensor.create"); + } // we would also like to check that non-cpu devices have an index, but some Caffe2 operators create // Storages with default devices. strides_.push_back(1); diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index 09e4b46fcfee..fc3ad273a516 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -29,10 +29,21 @@ classname(const classname&) = delete; \ classname& operator=(const classname&) = delete -#define CONCAT_IMPL(x, y) x##y -#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y) +#define C10_CONCATENATE_IMPL(s1, s2) s1##s2 +#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2) + +#define C10_MACRO_EXPAND(args) args + +/** + * C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with + * str and ending with a number that varies with the line. + */ +#ifdef __COUNTER__ +#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__) +#else +#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__) +#endif -#define MACRO_EXPAND(args) args /// C10_NODISCARD - Warn if a type or return value is discarded. diff --git a/c10/util/Logging.cpp b/c10/util/Logging.cpp index 25405769b6e3..33ef083dab9e 100644 --- a/c10/util/Logging.cpp +++ b/c10/util/Logging.cpp @@ -1,9 +1,10 @@ #include "c10/util/Logging.h" -#include "c10/util/Flags.h" #include "c10/util/Backtrace.h" +#include "c10/util/Flags.h" #include #include +#include #include #include @@ -24,7 +25,9 @@ namespace enforce_detail { namespace { std::function* GetFetchStackTrace() { - static std::function func = []() { return get_backtrace(/*frames_to_skip=*/ 1); }; + static std::function func = []() { + return get_backtrace(/*frames_to_skip=*/1); + }; return &func; }; } // namespace @@ -49,12 +52,43 @@ void ThrowEnforceNotMet( // PyTorch-style error message // (This must be defined here for access to GetFetchStackTrace) Error::Error(SourceLocation source_location, const std::string& msg) - : Error( - msg, - str(" (", - source_location, - ")\n", - (*GetFetchStackTrace())())) {} + : Error(msg, str(" (", source_location, ")\n", (*GetFetchStackTrace())())) { +} + +using APIUsageLoggerType = std::function; + +namespace { +bool IsAPIUsageDebugMode() { + return getenv("PYTORCH_API_USAGE_STDERR"); +} + +void APIUsageDebug(const string& event) { + // use stderr to avoid messing with glog + std::cerr << "PYTORCH_API_USAGE " << event << std::endl; +} + +APIUsageLoggerType* GetAPIUsageLogger() { + static APIUsageLoggerType func = + IsAPIUsageDebugMode() ? &APIUsageDebug : [](const string&) {}; + return &func; +}; +} // namespace + +void SetAPIUsageLogger(std::function logger) { + AT_ASSERT(logger); + *GetAPIUsageLogger() = logger; +} + +void LogAPIUsage(const std::string& event) { + (*GetAPIUsageLogger())(event); +} + +namespace detail { +bool LogAPIUsageFakeReturn(const std::string& event) { + (*GetAPIUsageLogger())(event); + return true; +} +} } // namespace c10 diff --git a/c10/util/Logging.h b/c10/util/Logging.h index 5833f355c87e..ce081e8ebc5b 100644 --- a/c10/util/Logging.h +++ b/c10/util/Logging.h @@ -257,6 +257,32 @@ BINARY_COMP_HELPER(LessEquals, <=) #define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \ CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER( \ Greater((x), (y)), #x " > " #y, __VA_ARGS__) + +/** + * Very lightweight logging for the first time API usage. It's beneficial for + * tracking of individual functionality usage in larger applications. + * + * In order to ensure light-weightness of logging, we utilize static variable + * trick - LogAPIUsage will be invoked only once and further invocations will + * just do an atomic check. + * + * Example: + * // Logs caller info with an arbitrary text event, if there is a usage. + * C10_LOG_API_USAGE_ONCE("my_api"); + */ +#define C10_LOG_API_USAGE_ONCE(...) \ + C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \ + ::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__); + +// API usage logging capabilities +C10_API void SetAPIUsageLogger(std::function logger); +C10_API void LogAPIUsage(const std::string& context); + +namespace detail { +// Return value is needed to do the static variable initialization trick +C10_API bool LogAPIUsageFakeReturn(const std::string& context); +} + } // namespace c10 #endif // C10_UTIL_LOGGING_H_ diff --git a/c10/util/Registry.h b/c10/util/Registry.h index 060f80d5fa0d..e9defd160285 100644 --- a/c10/util/Registry.h +++ b/c10/util/Registry.h @@ -185,18 +185,6 @@ class Registerer { } }; -/** - * C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with - * str and ending with a number that varies with the line. - */ -#define C10_CONCATENATE_IMPL(s1, s2) s1##s2 -#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2) -#ifdef __COUNTER__ -#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__) -#else -#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__) -#endif - /** * C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function * declaration, as well as creating a convenient typename for its corresponding diff --git a/c10/util/typeid.h b/c10/util/typeid.h index 36e39458ac0f..09bd806850e9 100644 --- a/c10/util/typeid.h +++ b/c10/util/typeid.h @@ -23,10 +23,10 @@ #include #include #include +#include +#include #include #include -#include -#include /* * TypeIdentifier is a small type containing an id. @@ -498,15 +498,15 @@ inline std::ostream& operator<<( #define EXPORT_IF_NOT_GCC #endif -#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \ - namespace detail { \ - const TypeMetaData MACRO_CONCAT(_typeMetaDataInstance_, Counter) = \ - _makeTypeMetaDataInstance(_typeName(#T)); \ - } \ - template <> \ - EXPORT_IF_NOT_GCC const detail::TypeMetaData* \ - TypeMeta::_typeMetaDataInstance() noexcept { \ - return &MACRO_CONCAT(detail::_typeMetaDataInstance_, Counter); \ +#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \ + namespace detail { \ + const TypeMetaData C10_CONCATENATE(_typeMetaDataInstance_, Counter) = \ + _makeTypeMetaDataInstance(_typeName(#T)); \ + } \ + template <> \ + EXPORT_IF_NOT_GCC const detail::TypeMetaData* \ + TypeMeta::_typeMetaDataInstance() noexcept { \ + return &C10_CONCATENATE(detail::_typeMetaDataInstance_, Counter); \ } #define CAFFE_KNOWN_TYPE(T) \ template <> \ @@ -529,20 +529,20 @@ inline std::ostream& operator<<( return TypeIdentifier(PreallocatedId); \ } \ namespace detail { \ - C10_API extern const TypeMetaData MACRO_CONCAT( \ + C10_API extern const TypeMetaData C10_CONCATENATE( \ _typeMetaDataInstance_preallocated_, \ PreallocatedId); \ } #define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \ namespace detail { \ - C10_EXPORT const TypeMetaData MACRO_CONCAT( \ + C10_EXPORT const TypeMetaData C10_CONCATENATE( \ _typeMetaDataInstance_preallocated_, \ PreallocatedId) = _makeTypeMetaDataInstance(_typeName(#T)); \ } \ template <> \ C10_EXPORT const detail::TypeMetaData* \ TypeMeta::_typeMetaDataInstance() noexcept { \ - return &MACRO_CONCAT( \ + return &C10_CONCATENATE( \ detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \ } #else // _MSC_VER @@ -552,19 +552,19 @@ inline std::ostream& operator<<( return TypeIdentifier(PreallocatedId); \ } \ namespace detail { \ - C10_EXPORT extern const TypeMetaData MACRO_CONCAT( \ + C10_EXPORT extern const TypeMetaData C10_CONCATENATE( \ _typeMetaDataInstance_preallocated_, \ PreallocatedId); \ } \ template <> \ inline const detail::TypeMetaData* \ TypeMeta::_typeMetaDataInstance() noexcept { \ - return &MACRO_CONCAT( \ + return &C10_CONCATENATE( \ detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \ } #define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \ namespace detail { \ - const TypeMetaData MACRO_CONCAT( \ + const TypeMetaData C10_CONCATENATE( \ _typeMetaDataInstance_preallocated_, \ PreallocatedId) = _makeTypeMetaDataInstance(_typeName(#T)); \ } diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu index a360a0ea64ac..98f6b12258d0 100644 --- a/caffe2/core/context_gpu.cu +++ b/caffe2/core/context_gpu.cu @@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() { VLOG(1) << "No cuda gpu present. Skipping."; return; } + C10_LOG_API_USAGE_ONCE("caffe2.init.cuda"); // Check if the number of GPUs matches the expected compile-time max number // of GPUs. CAFFE_ENFORCE_LE( diff --git a/caffe2/core/init.cc b/caffe2/core/init.cc index 572f39933d17..5e8f746cfec7 100644 --- a/caffe2/core/init.cc +++ b/caffe2/core/init.cc @@ -42,6 +42,7 @@ bool GlobalInitAlreadyRun() { } bool GlobalInit(int* pargc, char*** pargv) { + C10_LOG_API_USAGE_ONCE("caffe2.global_init"); static std::recursive_mutex init_mutex; std::lock_guard guard(init_mutex); internal::State& init_state = internal::GlobalInitState(); diff --git a/caffe2/core/net.cc b/caffe2/core/net.cc index 41a314403489..ec679673c1b0 100644 --- a/caffe2/core/net.cc +++ b/caffe2/core/net.cc @@ -37,6 +37,7 @@ NetBase::NetBase( name_(def->name()), net_def_(def) { static GlobalInitIsCalledGuard guard; + C10_LOG_API_USAGE_ONCE("caffe2.net.create"); // Check that node_name is empty for all ops for (const OperatorDef& op : def->op()) { if (op.has_device_option()) { diff --git a/caffe2/core/operator.cc b/caffe2/core/operator.cc index 9087d3ac2fa7..fe5c278e3d4f 100644 --- a/caffe2/core/operator.cc +++ b/caffe2/core/operator.cc @@ -386,6 +386,7 @@ C10_DEFINE_REGISTRY( GradientOpsMeta GetGradientForOp( const OperatorDef& def, const vector& g_output) { + C10_LOG_API_USAGE_ONCE("caffe2.gradient_maker"); std::unique_ptr maker( GradientRegistry()->Create(def.type(), def, g_output)); CAFFE_ENFORCE(maker, diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index 2e2353368e9b..70f888ba362e 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -1267,7 +1267,7 @@ C10_DECLARE_REGISTRY( #define REGISTER_CPU_GRADIENT_OPERATOR(...) /* No gradients. */ #else #define REGISTER_CPU_GRADIENT_OPERATOR(...) \ - MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__)) + C10_MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__)) #endif C10_DECLARE_REGISTRY( diff --git a/caffe2/python/pybind_state.cc b/caffe2/python/pybind_state.cc index 66434ff4ef15..c6f626196e15 100644 --- a/caffe2/python/pybind_state.cc +++ b/caffe2/python/pybind_state.cc @@ -1819,6 +1819,8 @@ void addGlobalMethods(py::module& m) { PYBIND11_MODULE(caffe2_pybind11_state, m) { m.doc() = "pybind11 stateful interface to Caffe2 workspaces"; + C10_LOG_API_USAGE_ONCE("caffe2.python.import"); + addGlobalMethods(m); addObjectMethods(m); for (const auto& addition : PybindAdditionRegistry()->Keys()) { diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 3675a146b189..1f80f86c9841 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -565,6 +566,16 @@ static void warning_handler( } } +// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE +// Guaranteed to be invoked from Python under GIL, no locking on map needed +static void LogAPIUsageOnceFromPython(const std::string& event) { + static std::unordered_set seen; + if (!seen.count(event)) { + seen.insert(event); + c10::LogAPIUsage(event); + } +} + #ifdef _WIN32 __declspec(dllexport) @@ -573,6 +584,8 @@ PyObject* initModule() { HANDLE_TH_ERRORS at::init_num_threads(); + C10_LOG_API_USAGE_ONCE("torch.python.import"); + #define ASSERT_TRUE(cmd) if (!(cmd)) return nullptr THPUtils_addPyMethodDefs(methods, TorchMethods); @@ -675,7 +688,9 @@ PyObject* initModule() { // setting up TH Errors so that they throw C++ exceptions at::init(); - py::reinterpret_borrow(module).def("_demangle", &c10::demangle); + auto py_module = py::reinterpret_borrow(module); + py_module.def("_demangle", &c10::demangle); + py_module.def("_log_api_usage_once", &LogAPIUsageOnceFromPython); // Set ATen warnings to issue Python warnings ::c10::Warning::set_warning_handler(&warning_handler); diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index b8ce586ded42..018ee65057bb 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -36,6 +36,7 @@ template using shared_ptr_class_ = py::class_>; PyObject* c10d_init(PyObject* _unused) { + C10_LOG_API_USAGE_ONCE("c10d.python.import"); auto c10d_module = THPObjectPtr(PyImport_ImportModule("torch.distributed")); if (!c10d_module) { throw python_error(); diff --git a/torch/csrc/jit/export.cpp b/torch/csrc/jit/export.cpp index 0d64e0399a63..7542edadb3d8 100644 --- a/torch/csrc/jit/export.cpp +++ b/torch/csrc/jit/export.cpp @@ -550,6 +550,7 @@ ScriptModuleSerializer::ScriptModuleSerializer(std::ostream* ofs) void ScriptModuleSerializer::serialize( const script::Module& module, const script::ExtraFilesMap& extra_files) { + C10_LOG_API_USAGE_ONCE("torch.script.save"); torch::ModelDef model_def; convertModel(module, &model_def, extra_files); std::string output; diff --git a/torch/csrc/jit/graph_executor.cpp b/torch/csrc/jit/graph_executor.cpp index dd12a8220a56..0fc492410eb8 100644 --- a/torch/csrc/jit/graph_executor.cpp +++ b/torch/csrc/jit/graph_executor.cpp @@ -488,6 +488,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase { " inputs, but got only ", stack.size()); + C10_LOG_API_USAGE_ONCE("torch.graph_executor.run"); logging::getLogger()->addStatValue( logging::runtime_counters::GRAPH_EXECUTOR_INVOCATIONS, 1.0); @@ -579,7 +580,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase { for (Node* dnode : diff_nodes) { auto diff_graph = std::move(dnode->g(attr::Subgraph)); Gradient gradient = differentiate(diff_graph); - // Run post differentiation optimizations, Autodiff will replace some + // Run post differentiation optimizations, Autodiff will replace some // parts of graph with new graph, these new graphs usually consists of // control flows and miss shape information on nodes, so we run shape // prop and differentiable optimizations to ensure the graph is optimized diff --git a/torch/csrc/jit/import.cpp b/torch/csrc/jit/import.cpp index f6d35b190cd5..9a83f17d74ba 100644 --- a/torch/csrc/jit/import.cpp +++ b/torch/csrc/jit/import.cpp @@ -95,6 +95,7 @@ void ScriptModuleDeserializer::deserialize( script::ModuleLookup module_lookup, c10::optional device, script::ExtraFilesMap& extra_files) { + C10_LOG_API_USAGE_ONCE("torch.script.load"); torch::ModelDef model_def; at::DataPtr data_ptr; size_t data_size; diff --git a/torch/csrc/jit/python_tracer.cpp b/torch/csrc/jit/python_tracer.cpp index 478262a86cc1..bf53bb014f5b 100644 --- a/torch/csrc/jit/python_tracer.cpp +++ b/torch/csrc/jit/python_tracer.cpp @@ -42,6 +42,8 @@ std::shared_ptr createGraphByTracing( const py::function& var_name_lookup_fn, bool force_outplace, const std::shared_ptr& self) { + C10_LOG_API_USAGE_ONCE("torch.tracer"); + auto enter_info = tracer::enter(std::move(trace_inputs), self); auto graph = enter_info.first->graph; diff --git a/torch/csrc/jit/script/init.cpp b/torch/csrc/jit/script/init.cpp index a7dad0290a33..e88acae9fa41 100644 --- a/torch/csrc/jit/script/init.cpp +++ b/torch/csrc/jit/script/init.cpp @@ -531,6 +531,7 @@ void initJitScriptBindings(PyObject* module) { m.def( "_jit_script_compile", [](const Def& def, ResolutionCallback rcb, FunctionDefaults defaults) { + C10_LOG_API_USAGE_ONCE("torch.script.compile"); CompilationUnit cu; cu.define({def}, {pythonResolver(rcb)}, nullptr); std::shared_ptr defined = cu.get_functions().at(0); @@ -561,6 +562,7 @@ void initJitScriptBindings(PyObject* module) { [](const std::string& qualifiedName, const ClassDef& classDef, ResolutionCallback rcb) { + C10_LOG_API_USAGE_ONCE("torch.script.class"); auto cu = std::make_shared(); auto classType = ClassType::create(c10::QualifiedName(qualifiedName), cu); diff --git a/torch/lib/c10d/ProcessGroup.cpp b/torch/lib/c10d/ProcessGroup.cpp index 353ef887e95a..99b2e2b6a3a1 100644 --- a/torch/lib/c10d/ProcessGroup.cpp +++ b/torch/lib/c10d/ProcessGroup.cpp @@ -1,5 +1,7 @@ #include +#include + namespace c10d { ProcessGroup::Work::~Work() {} @@ -44,7 +46,9 @@ void ProcessGroup::Work::finish(std::exception_ptr exception) { cv_.notify_all(); } -ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {} +ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) { + C10_LOG_API_USAGE_ONCE("c10d.process_group"); +} ProcessGroup::~ProcessGroup() {} diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index eb82b8363fa0..5047926b2793 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -63,6 +63,7 @@ class Module(object): _version = 1 def __init__(self): + torch._C._log_api_usage_once("python.nn_module") self._backend = thnn_backend self._parameters = OrderedDict() self._buffers = OrderedDict() diff --git a/torch/optim/optimizer.py b/torch/optim/optimizer.py index 1d657b141bb8..9420a8e9eb2b 100644 --- a/torch/optim/optimizer.py +++ b/torch/optim/optimizer.py @@ -30,6 +30,7 @@ class Optimizer(object): """ def __init__(self, params, defaults): + torch._C._log_api_usage_once("python.optimizer") self.defaults = defaults if isinstance(params, torch.Tensor): diff --git a/torch/utils/data/dataloader.py b/torch/utils/data/dataloader.py index b0b70f30b4c3..5368b42f1f76 100644 --- a/torch/utils/data/dataloader.py +++ b/torch/utils/data/dataloader.py @@ -142,6 +142,7 @@ class DataLoader(object): batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): + torch._C._log_api_usage_once("python.data_loader") self.dataset = dataset self.batch_size = batch_size self.num_workers = num_workers