From a6630e25afb49642cf517ee948f5a0c06a9cba6d Mon Sep 17 00:00:00 2001 From: Christian Puhrsch Date: Sat, 22 Sep 2018 18:07:38 -0700 Subject: [PATCH] Remove many caffe2::TIndex and replace them with int64_t (#11943) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11943 See title Reviewed By: ezyang Differential Revision: D9992645 fbshipit-source-id: e8f80d6ea762971513e5e8072975ceea53e1f11a --- binaries/core_overhead_benchmark_gpu.cc | 2 +- caffe2/contrib/aten/aten_op_template.h | 2 +- caffe2/contrib/gloo/allgather_ops.h | 2 +- caffe2/contrib/nccl/cuda_nccl_gpu.cc | 4 +- caffe2/contrib/tensorrt/tensorrt_op_trt.cc | 14 +- caffe2/contrib/tensorrt/tensorrt_op_trt.h | 4 +- caffe2/core/blob_serialization.cc | 6 +- caffe2/core/blob_test.cc | 10 +- caffe2/core/logging.h | 2 +- caffe2/core/operator.cc | 4 +- caffe2/core/operator.h | 6 +- caffe2/core/operator_schema.cc | 14 +- caffe2/core/operator_schema.h | 12 +- caffe2/core/qtensor.h | 8 +- caffe2/core/tensor.cc | 2 +- caffe2/core/tensor.h | 30 +-- caffe2/core/tensor_impl.h | 70 +++---- caffe2/cuda_rtc/pool_op_rtc_gpu.cc | 4 +- .../operators/fully_connected_op_prune.h | 12 +- .../operators/fully_connected_op_sparse.h | 8 +- caffe2/experiments/operators/funhash_op.h | 60 +++--- .../experiments/operators/sparse_funhash_op.h | 70 +++---- .../operators/sparse_matrix_reshape_op.h | 20 +- .../experiments/operators/tt_contraction_op.h | 54 ++--- caffe2/experiments/operators/tt_pad_op.h | 12 +- caffe2/ideep/operators/concat_split_op.cc | 2 +- caffe2/ideep/operators/conv_pool_base_op.h | 2 +- caffe2/ideep/operators/squeeze_op.cc | 2 +- caffe2/image/image_input_op.h | 14 +- caffe2/mkl/mkl_utils_test.cc | 10 +- caffe2/mkl/mklmemory_serialization.cc | 4 +- caffe2/mkl/operators/concat_op.cc | 2 +- caffe2/mkl/operators/conv_op.cc | 16 +- caffe2/mkl/operators/conv_op_mkldnn.cc | 4 +- caffe2/mkl/operators/elementwise_sum_op.cc | 2 +- caffe2/mkl/operators/fully_connected_op.cc | 4 +- .../local_response_normalization_op.cc | 2 +- caffe2/mkl/operators/packed_fc_op.cc | 2 +- caffe2/mkl/operators/pool_op.cc | 4 +- caffe2/mkl/operators/relu_op.cc | 2 +- caffe2/mkl/operators/spatial_batch_norm_op.cc | 2 +- caffe2/mkl/operators/squeeze_op.cc | 2 +- caffe2/mkl/utils/mkl_memory.cc | 2 +- caffe2/mkl/utils/mkl_memory.h | 18 +- caffe2/mkl/utils/mkl_operator.h | 2 +- .../mobile/contrib/arm-compute/core/context.h | 18 +- .../operators/fully_connected_op.cc | 2 +- .../contrib/arm-compute/operators/pool_op.cc | 4 +- .../arm-compute/operators/resize_op.cc | 2 +- caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm | 8 +- .../mobile/contrib/ios/mpscnn/mpscnn_test.mm | 2 +- caffe2/mobile/contrib/ios/pool_test.cc | 4 +- caffe2/mobile/contrib/ios/resize_test.cc | 4 +- .../contrib/opengl/test/TestGLConvolution.cc | 18 +- .../mobile/contrib/snpe/snpe_op_benchmark.cc | 8 +- caffe2/mobile/contrib/ulp2/ulp_neon.cc | 2 +- caffe2/mobile/contrib/ulp2/ulp_test.cc | 8 +- caffe2/mpi/mpi_ops.h | 2 +- caffe2/operators/accuracy_op.cc | 2 +- caffe2/operators/accuracy_op.cu | 2 +- caffe2/operators/arg_ops.cc | 10 +- caffe2/operators/arg_ops.cu | 8 +- caffe2/operators/arg_ops.h | 6 +- caffe2/operators/assert_op.h | 2 +- caffe2/operators/atomic_ops.cc | 4 +- caffe2/operators/batch_box_cox_op.cc | 26 +-- caffe2/operators/batch_box_cox_op.h | 8 +- caffe2/operators/batch_bucketize_op.cc | 12 +- caffe2/operators/batch_gather_ops.cu | 2 +- caffe2/operators/batch_gather_ops.h | 2 +- caffe2/operators/batch_matmul_op.cc | 10 +- caffe2/operators/batch_matmul_op.h | 2 +- caffe2/operators/batch_matmul_op_gpu_test.cc | 16 +- caffe2/operators/batch_matmul_op_test.cc | 16 +- caffe2/operators/batch_sparse_to_dense_op.cc | 36 ++-- caffe2/operators/batch_sparse_to_dense_op.h | 6 +- caffe2/operators/bbox_transform_op.cc | 2 +- caffe2/operators/boolean_mask_ops.cc | 8 +- caffe2/operators/boolean_mask_ops.cu | 34 ++-- caffe2/operators/boolean_unmask_ops_test.cc | 4 +- caffe2/operators/cast_op.cc | 2 +- caffe2/operators/cast_op.h | 2 +- caffe2/operators/concat_split_op.h | 8 +- caffe2/operators/conditional_op.cc | 2 +- caffe2/operators/conv_op_cache_cudnn.h | 10 +- caffe2/operators/conv_op_cache_cudnn_test.cc | 14 +- caffe2/operators/conv_op_cudnn.cc | 4 +- caffe2/operators/conv_op_eigen.cc | 16 +- caffe2/operators/conv_op_impl.h | 6 +- caffe2/operators/conv_pool_op_base.h | 6 +- caffe2/operators/conv_transpose_op_cudnn.cc | 4 +- caffe2/operators/conv_transpose_op_impl.h | 12 +- .../conv_transpose_op_mobile_test.cc | 10 +- caffe2/operators/cross_entropy_op.cc | 18 +- caffe2/operators/cross_entropy_op.cu | 10 +- .../operators/ctc_beam_search_decoder_op.cc | 4 +- caffe2/operators/ctc_greedy_decoder_op.cc | 4 +- caffe2/operators/dataset_ops.cc | 16 +- caffe2/operators/dataset_ops.h | 8 +- caffe2/operators/deform_conv_op.cu | 16 +- caffe2/operators/deform_conv_op.h | 12 +- caffe2/operators/deform_conv_op_impl.h | 8 +- caffe2/operators/distance_op.cc | 2 +- caffe2/operators/distance_op.cu | 4 +- caffe2/operators/dropout_op_cudnn.cc | 4 +- caffe2/operators/elementwise_op_test.h | 2 +- caffe2/operators/elementwise_ops_schema.cc | 2 +- caffe2/operators/expand_squeeze_dims_op.h | 2 +- .../experimental/c10/cpu/averaged_loss_cpu.cc | 3 +- .../experimental/c10/cpu/batch_gather_cpu.cc | 3 +- .../experimental/c10/cpu/batch_matmul_cpu.cc | 3 +- .../experimental/c10/cpu/cast_cpu.cc | 3 +- .../experimental/c10/cpu/concat_cpu.cc | 5 +- .../experimental/c10/cpu/filler_cpu.cc | 7 +- .../sigmoid_cross_entropy_with_logits_cpu.cc | 5 +- .../c10/cpu/sparse_lengths_sum_cpu.cc | 7 +- .../operators/experimental/c10/schemas/fc.h | 2 +- caffe2/operators/extend_tensor_op.cc | 2 +- caffe2/operators/filler_op.cc | 2 +- caffe2/operators/filler_op.cu | 4 +- caffe2/operators/filler_op.h | 24 +-- caffe2/operators/flatten_op.cc | 4 +- caffe2/operators/flexible_top_k.cc | 46 ++--- caffe2/operators/fully_connected_op.h | 2 +- .../fused_rowwise_8bit_conversion_ops.h | 4 +- .../fused_rowwise_random_quantization_ops.cc | 8 +- .../operators/gather_fused_8bit_rowwise_op.h | 2 +- caffe2/operators/gather_ranges_to_dense_op.h | 2 +- caffe2/operators/generate_proposals_op.cc | 6 +- .../operators/generate_proposals_op_test.cc | 50 ++--- caffe2/operators/glu_op.h | 2 +- caffe2/operators/half_float_ops.h | 4 +- .../local_response_normalization_op_miopen.cc | 4 +- caffe2/operators/hip/relu_op_miopen.cc | 4 +- caffe2/operators/hip/softmax_op_miopen.cc | 4 +- .../hip/spatial_batch_norm_op_miopen.cc | 4 +- caffe2/operators/im2col_op.h | 4 +- caffe2/operators/index_hash_ops.cc | 2 +- caffe2/operators/index_ops.cc | 24 +-- caffe2/operators/integral_image_op.cc | 2 +- caffe2/operators/integral_image_op.cu | 4 +- caffe2/operators/is_empty_op.h | 2 +- caffe2/operators/layer_norm_op.cc | 2 +- caffe2/operators/layer_norm_op.cu | 4 +- caffe2/operators/lengths_pad_op.h | 2 +- .../lengths_reducer_fused_8bit_rowwise_ops.h | 2 +- caffe2/operators/lengths_reducer_ops.h | 6 +- .../lengths_reducer_rowwise_8bit_ops.h | 14 +- caffe2/operators/lengths_tile_op.cc | 2 +- caffe2/operators/lengths_tile_op.cu | 2 +- caffe2/operators/lengths_top_k_op.cc | 12 +- .../local_response_normalization_op.cc | 10 +- .../local_response_normalization_op_cudnn.cc | 4 +- caffe2/operators/lpnorm_op.cc | 4 +- caffe2/operators/map_ops.h | 2 +- caffe2/operators/matmul_op.h | 2 +- caffe2/operators/numpy_tile_op.h | 2 +- caffe2/operators/one_hot_ops.cc | 42 ++-- caffe2/operators/one_hot_ops.cu | 12 +- caffe2/operators/one_hot_ops.h | 12 +- caffe2/operators/onnx_while_op.h | 6 +- caffe2/operators/onnxifi_op.cc | 2 +- caffe2/operators/onnxifi_op.h | 6 +- .../operators/operator_fallback_gpu_test.cc | 4 +- caffe2/operators/order_switch_ops.cc | 4 +- caffe2/operators/order_switch_ops.cu | 4 +- caffe2/operators/pack_rnn_sequence_op.h | 4 +- caffe2/operators/pack_segments.cc | 14 +- caffe2/operators/pack_segments.h | 4 +- caffe2/operators/partition_ops.h | 20 +- caffe2/operators/perplexity_op.cc | 2 +- caffe2/operators/perplexity_op.cu | 2 +- .../piecewise_linear_transform_op.cu | 34 ++-- .../operators/piecewise_linear_transform_op.h | 58 +++--- caffe2/operators/pool_op_cudnn.cu | 4 +- caffe2/operators/reducer_functors.h | 80 ++++---- caffe2/operators/reduction_front_back_ops.h | 12 +- caffe2/operators/reduction_ops.h | 6 +- caffe2/operators/replace_nan_op.cc | 4 +- caffe2/operators/replace_nan_op.cu | 4 +- caffe2/operators/replace_nan_op.h | 2 +- caffe2/operators/reshape_op_gpu_test.cc | 4 +- caffe2/operators/reverse_packed_segs_op.h | 4 +- .../operators/rnn/hip/recurrent_op_miopen.h | 2 +- .../rnn/recurrent_network_blob_fetcher_op.h | 2 +- caffe2/operators/rnn/recurrent_network_op.h | 2 +- caffe2/operators/rnn/recurrent_op_cudnn.h | 2 +- caffe2/operators/roi_align_op_gpu_test.cc | 16 +- caffe2/operators/segment_reduction_op.h | 190 +++++++++--------- caffe2/operators/segment_reduction_op_gpu.cu | 26 +-- caffe2/operators/sequence_ops.cc | 2 +- caffe2/operators/sequence_ops.cu | 2 +- caffe2/operators/sequence_ops.h | 10 +- caffe2/operators/shape_op.h | 10 +- caffe2/operators/slice_op.cu | 24 +-- caffe2/operators/slice_op.h | 24 +-- caffe2/operators/softmax_op_cudnn.cc | 4 +- caffe2/operators/softmax_ops.cu | 4 +- caffe2/operators/softmax_with_loss_op.cc | 2 +- caffe2/operators/sparse_to_dense_mask_op.h | 8 +- caffe2/operators/sparse_to_dense_op.cu | 2 +- .../operators/spatial_softmax_with_loss_op.cc | 2 +- caffe2/operators/text_file_reader.cc | 2 +- caffe2/operators/tile_op.h | 4 +- caffe2/operators/top_k.cc | 112 +++++------ caffe2/operators/top_k.cu | 114 +++++------ caffe2/operators/transpose_op.h | 2 +- caffe2/operators/utility_ops.cu | 38 ++-- caffe2/operators/utility_ops.h | 34 ++-- caffe2/operators/utility_ops_gpu_test.cc | 4 +- caffe2/operators/utility_ops_test.cc | 4 +- caffe2/opt/onnxifi_transformer.cc | 2 +- caffe2/perfkernels/embedding_lookup.cc | 28 +-- caffe2/perfkernels/embedding_lookup.h | 8 +- caffe2/perfkernels/embedding_lookup_avx2.cc | 156 +++++++------- ...mbedding_lookup_fused_8bit_rowwise_avx2.cc | 156 +++++++------- .../fused_8bit_rowwise_embedding_lookup.cc | 30 +-- .../fused_8bit_rowwise_embedding_lookup.h | 8 +- caffe2/perfkernels/hp_emblookup_codegen.py | 10 +- caffe2/predictor/predictor_test.cc | 2 +- caffe2/python/pybind_state.cc | 10 +- caffe2/python/pybind_state.h | 2 +- caffe2/python/pybind_state_dlpack.h | 2 +- caffe2/python/pybind_state_gpu.cc | 2 +- caffe2/python/pybind_state_hip.cc | 2 +- caffe2/python/pybind_state_mkl.cc | 2 +- caffe2/queue/rebatching_queue.cc | 2 +- caffe2/sgd/ftrl_op.cc | 12 +- caffe2/sgd/lars_op.h | 4 +- caffe2/sgd/learning_rate_op.h | 2 +- .../depthwise/depthwise3x3_conv_op_test.cc | 8 +- caffe2/share/contrib/nnpack/nnpack_test.cc | 8 +- .../contrib/zstd/quant_decomp_zstd_op.cc | 2 +- caffe2/utils/filler.h | 10 +- caffe2/utils/hip/math_hip.cc | 12 +- caffe2/utils/math_cpu.cc | 6 +- caffe2/utils/math_gpu.cu | 14 +- caffe2/utils/math_gpu_test.cc | 6 +- caffe2/utils/math_test.cc | 6 +- caffe2/utils/smart_tensor_printer_test.cc | 2 +- caffe2/video/video_input_op.h | 8 +- modules/detectron/sample_as_op.cu | 2 +- modules/detectron/select_smooth_l1_loss_op.cu | 2 +- .../sigmoid_cross_entropy_loss_op.cu | 6 +- modules/detectron/sigmoid_focal_loss_op.cu | 2 +- modules/detectron/smooth_l1_loss_op.cu | 2 +- modules/detectron/softmax_focal_loss_op.cu | 2 +- modules/detectron/upsample_nearest_op.cu | 2 +- 248 files changed, 1446 insertions(+), 1454 deletions(-) diff --git a/binaries/core_overhead_benchmark_gpu.cc b/binaries/core_overhead_benchmark_gpu.cc index 018880432d4b..e024e4ddc9fa 100644 --- a/binaries/core_overhead_benchmark_gpu.cc +++ b/binaries/core_overhead_benchmark_gpu.cc @@ -139,7 +139,7 @@ BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize); static void BM_CudaPointerAffinity(benchmark::State& state) { CAFFE2_SKIP_IF_NO_GPU; - Tensor tensor(vector{1, 2, 3, 4}, CUDA); + Tensor tensor(vector{1, 2, 3, 4}, CUDA); float* ptr = tensor.mutable_data(); while (state.KeepRunning()) { volatile int id = GetGPUIDForPointer(ptr); diff --git a/caffe2/contrib/aten/aten_op_template.h b/caffe2/contrib/aten/aten_op_template.h index d01c1240aae6..b6d31268db0f 100644 --- a/caffe2/contrib/aten/aten_op_template.h +++ b/caffe2/contrib/aten/aten_op_template.h @@ -144,7 +144,7 @@ private: } template void assignToValue(Tensor* dst, T v) { - dst->Resize(std::vector()); + dst->Resize(std::vector()); math::Set(1, v, dst->template mutable_data(), &context_); } int findImplementation(const OperatorDef& operator_def) { diff --git a/caffe2/contrib/gloo/allgather_ops.h b/caffe2/contrib/gloo/allgather_ops.h index 044357cd06ae..1f55233a095c 100644 --- a/caffe2/contrib/gloo/allgather_ops.h +++ b/caffe2/contrib/gloo/allgather_ops.h @@ -75,7 +75,7 @@ class AllgatherOp final : public Operator { auto comm_size = OperatorBase::Input>(0)->size; const auto dims = - std::vector(1, (InputSize() - 1) * Input(1).size() * comm_size); + std::vector(1, (InputSize() - 1) * Input(1).size() * comm_size); Output(0)->Resize(dims); // Store which inputs/outputs this instance initialized with diff --git a/caffe2/contrib/nccl/cuda_nccl_gpu.cc b/caffe2/contrib/nccl/cuda_nccl_gpu.cc index 603281b30be8..b544445a2687 100644 --- a/caffe2/contrib/nccl/cuda_nccl_gpu.cc +++ b/caffe2/contrib/nccl/cuda_nccl_gpu.cc @@ -269,7 +269,7 @@ void NCCL::AllGather(const NCCLExecution& ex) { ex, [n](const NCCLElement& ctx) { CAFFE_ENFORCE_NE(ctx.src, ctx.dst); - std::vector dims; + std::vector dims; dims.reserve(ctx.src->ndim() + 1); dims.push_back(n); for (auto d : ctx.src->dims()) { @@ -307,7 +307,7 @@ void NCCL::ReduceScatter(const NCCLExecution& ex) { [](const NCCLElement& ctx) { CAFFE_ENFORCE_NE(ctx.src, ctx.dst); const auto& srcDims = ctx.src->dims(); - std::vector dstDims(srcDims.begin() + 1, srcDims.end()); + std::vector dstDims(srcDims.begin() + 1, srcDims.end()); ctx.dst->Resize(dstDims); ctx.dst->template mutable_data(); }, diff --git a/caffe2/contrib/tensorrt/tensorrt_op_trt.cc b/caffe2/contrib/tensorrt/tensorrt_op_trt.cc index 0d0ddc49b6cb..260d2efbe8cd 100644 --- a/caffe2/contrib/tensorrt/tensorrt_op_trt.cc +++ b/caffe2/contrib/tensorrt/tensorrt_op_trt.cc @@ -15,7 +15,7 @@ namespace { // Otherwise, return the product of CHW dimensions int64_t CheckDims( const nvinfer1::Dims& nv_dims, - const std::vector& c2_dims) { + const std::vector& c2_dims) { if (nv_dims.nbDims + 1 != c2_dims.size()) { CAFFE_THROW( "Mismatched dimensions between TRT input (", @@ -115,7 +115,7 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws) const std::string key = MakeString("output_size_hint_", output_idx); auto output_size_hint = OperatorBase::GetRepeatedArgument(key); if (!output_size_hint.empty()) { - std::vector dims; + std::vector dims; for (const auto v : output_size_hint) { dims.push_back(v); } @@ -130,17 +130,17 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws) void TensorRTOp::MaybeAdjustOutputShape( int output_idx, - std::vector* dims) { + std::vector* dims) { const auto it = output_size_hints_.find(output_idx); if (it != output_size_hints_.end()) { const auto& dims_hint = it->second; auto total_trt = std::accumulate( - dims->begin(), dims->end(), (TIndex)(1), std::multiplies()); + dims->begin(), dims->end(), (int64_t)(1), std::multiplies()); auto total_c2 = std::accumulate( dims_hint.begin(), dims_hint.end(), - (TIndex)(1), - std::multiplies()); + (int64_t)(1), + std::multiplies()); CAFFE_ENFORCE_EQ( total_trt, total_c2, @@ -204,7 +204,7 @@ bool TensorRTOp::RunOnDevice() { } else { // output, we need to allocate the output tensor at first batch run auto* output_tensor = Output(output_idx); - std::vector tensor_dims; + std::vector tensor_dims; tensor_dims.push_back(N); int64_t chw = 1; for (int i = 0; i < dims.nbDims; ++i) { diff --git a/caffe2/contrib/tensorrt/tensorrt_op_trt.h b/caffe2/contrib/tensorrt/tensorrt_op_trt.h index cd0700f96a26..a98b8a33a331 100644 --- a/caffe2/contrib/tensorrt/tensorrt_op_trt.h +++ b/caffe2/contrib/tensorrt/tensorrt_op_trt.h @@ -17,13 +17,13 @@ class TensorRTOp final : public Operator { virtual ~TensorRTOp() noexcept {} private: - void MaybeAdjustOutputShape(int output_idx, std::vector* dims); + void MaybeAdjustOutputShape(int output_idx, std::vector* dims); tensorrt::TrtLogger logger_; int max_batch_size_; std::vector nv_dims_; std::vector is_input_; - std::unordered_map> output_size_hints_; + std::unordered_map> output_size_hints_; std::shared_ptr trt_engine_{nullptr}; std::shared_ptr trt_executor_{nullptr}; bool batch_warning_issued_{false}; diff --git a/caffe2/core/blob_serialization.cc b/caffe2/core/blob_serialization.cc index 780897b1b532..7ff5a2b25eac 100644 --- a/caffe2/core/blob_serialization.cc +++ b/caffe2/core/blob_serialization.cc @@ -139,7 +139,7 @@ void TensorSerializer::SerializeWithChunkSize( // Serialize whole vector. If vector is empty, it's shape still needs to be // serialized in empty proto for (size_t chunkBegin = 0; - chunkBegin < std::max(tensor.size(), static_cast(1)); + chunkBegin < std::max(tensor.size(), static_cast(1)); chunkBegin += chunk_size) { VLOG(2) << "Starting a chunk at " << chunkBegin; #ifndef __ANDROID__ @@ -374,8 +374,8 @@ void TensorDeserializer::Deserialize(const TensorProto& proto, Tensor* tensor) { tensor->GetStaticContext()->CreateContext(proto.device_detail()); auto context = uniq_ptr.get(); context->SwitchToDevice(0); - vector dims; - for (const TIndex d : proto.dims()) { + vector dims; + for (const int64_t d : proto.dims()) { dims.push_back(d); } tensor->Resize(dims); diff --git a/caffe2/core/blob_test.cc b/caffe2/core/blob_test.cc index 628731d31bc8..24b2a2d0593d 100644 --- a/caffe2/core/blob_test.cc +++ b/caffe2/core/blob_test.cc @@ -557,9 +557,9 @@ TEST(TensorTest, TensorNonFundamentalTypeClone) { TEST(TensorTest, Tensor64BitDimension) { // Initialize a large tensor. - TIndex large_number = + int64_t large_number = static_cast(std::numeric_limits::max()) + 1; - Tensor tensor(vector{large_number}, CPU); + Tensor tensor(vector{large_number}, CPU); EXPECT_EQ(tensor.ndim(), 1); EXPECT_EQ(tensor.dim(0), large_number); EXPECT_EQ(tensor.size(), large_number); @@ -589,9 +589,9 @@ TEST(TensorTest, Tensor64BitDimension) { } TEST(TensorDeathTest, CannotCastDownLargeDims) { - TIndex large_number = + int64_t large_number = static_cast(std::numeric_limits::max()) + 1; - Tensor tensor(vector{large_number}, CPU); + Tensor tensor(vector{large_number}, CPU); EXPECT_EQ(tensor.ndim(), 1); EXPECT_EQ(tensor.dim(0), large_number); ASSERT_THROW(tensor.dim32(0), EnforceNotMet); @@ -694,7 +694,7 @@ TEST(TensorTest, TensorSerialization_CustomType) { } TEST(TensorTest, Half) { - const TIndex kSize = 3000000; + const int64_t kSize = 3000000; Blob blob; TensorCPU* tensor = blob.GetMutableTensor(CPU); tensor->Resize(kSize); diff --git a/caffe2/core/logging.h b/caffe2/core/logging.h index 67428df833dd..37fcd939c4d6 100644 --- a/caffe2/core/logging.h +++ b/caffe2/core/logging.h @@ -145,7 +145,7 @@ using EnforceNotMet = at::Error; * functions to caffe2::enforce_detail namespace. For example: * * namespace caffe2 { namespace enforce_detail { - * inline EnforceFailMessage IsVector(const vector& shape) { + * inline EnforceFailMessage IsVector(const vector& shape) { * if (shape.size() == 1) { return EnforceOK(); } * return MakeString("Shape ", shape, " is not a vector"); * } diff --git a/caffe2/core/operator.cc b/caffe2/core/operator.cc index 51f614546431..5f3f653b5a4b 100644 --- a/caffe2/core/operator.cc +++ b/caffe2/core/operator.cc @@ -581,7 +581,7 @@ TensorShapes InferBlobShapesAndTypesFromWorkspace( } TensorShapes InferBlobShapesAndTypesFromMap( - const CaffeMap>& blob_dimensions, + const CaffeMap>& blob_dimensions, const vector& nets) { CaffeMap blob_desc; // Populate shapes from known blobs @@ -597,7 +597,7 @@ TensorShapes InferBlobShapesAndTypesFromMap( } TensorShapes InferBlobShapesAndTypesFromMap( - const CaffeMap>& blob_dimensions, + const CaffeMap>& blob_dimensions, const CaffeMap& blob_types, const vector& nets) { CaffeMap blob_desc; diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index 9b428f9003d9..25aa801d265d 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -700,7 +700,7 @@ struct DispatchHelper, ExtraArgs...> { template struct DispatchHelper, ExtraArgs...> { template - static bool call(Op* op, TIndex /*size*/) { + static bool call(Op* op, int64_t /*size*/) { return op->template DoRunWithValue(); } }; @@ -973,11 +973,11 @@ CAFFE2_API TensorShapes InferBlobShapesAndTypesFromWorkspace( const vector& nets); CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap( - const CaffeMap>& blob_dimensions, + const CaffeMap>& blob_dimensions, const vector& nets); CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap( - const CaffeMap>& blob_dimensions, + const CaffeMap>& blob_dimensions, const CaffeMap& blob_types, const vector& nets); diff --git a/caffe2/core/operator_schema.cc b/caffe2/core/operator_schema.cc index 5d9b640a4039..a76a0df9bd00 100644 --- a/caffe2/core/operator_schema.cc +++ b/caffe2/core/operator_schema.cc @@ -331,7 +331,7 @@ int OpSchema::CalculateOutput(int num_input) const { } static void SparseLengthsFillerHelper( - const std::vector>& shapes, + const std::vector>& shapes, size_t value_index, size_t length_index, std::vector* fillers) { @@ -341,7 +341,7 @@ static void SparseLengthsFillerHelper( } static void SparseSegmentsFillerHelper( - const std::vector>& shapes, + const std::vector>& shapes, size_t value_index, size_t segment_index, std::vector* fillers) { @@ -364,7 +364,7 @@ OpSchema& OpSchema::ValueKeyLengthInputFillers( size_t key_index, size_t length_index) { filler_supplier_ = [this, value_index, key_index, length_index]( - const std::vector>& shapes) { + const std::vector>& shapes) { auto fillers = SupplyDenseFillers(shapes); // fill in the length (value_index is used to get the correct shape) SparseLengthsFillerHelper(shapes, key_index, length_index, &fillers); @@ -383,7 +383,7 @@ OpSchema& OpSchema::ValueLengthInputFillers( size_t value_index, size_t length_index) { filler_supplier_ = [this, value_index, length_index]( - const std::vector>& shapes) { + const std::vector>& shapes) { auto fillers = SupplyDenseFillers(shapes); // fill in the length (value_index is used to get the correct shape) SparseLengthsFillerHelper(shapes, value_index, length_index, &fillers); @@ -394,7 +394,7 @@ OpSchema& OpSchema::ValueLengthInputFillers( OpSchema& OpSchema::DisallowInputFillers() { filler_supplier_ = - [this](const std::vector>& /* unused */) { + [this](const std::vector>& /* unused */) { throw std::invalid_argument(type_ + " does not have input fillers"); return std::vector(); }; @@ -402,12 +402,12 @@ OpSchema& OpSchema::DisallowInputFillers() { } std::vector OpSchema::InputFillers( - const std::vector>& shapes) const { + const std::vector>& shapes) const { return filler_supplier_(shapes); } std::vector OpSchema::SupplyDenseFillers( - const std::vector>& shapes) { + const std::vector>& shapes) { std::vector fillers; for (const auto& shape : shapes) { fillers.emplace_back(shape); diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h index 0653de28c68b..e0b6495647eb 100644 --- a/caffe2/core/operator_schema.h +++ b/caffe2/core/operator_schema.h @@ -383,11 +383,11 @@ class CAFFE2_API OpSchema { OpSchema& DisallowInputFillers(); std::vector InputFillers( - const std::vector>& shapes) const; + const std::vector>& shapes) const; private: std::vector SupplyDenseFillers( - const std::vector>& shapes); + const std::vector>& shapes); private: string type_; @@ -438,9 +438,9 @@ class CAFFE2_API OpSchema { }; std::function( - const std::vector>&)> + const std::vector>&)> filler_supplier_ = - [this](const std::vector>& shapes) { + [this](const std::vector>& shapes) { return SupplyDenseFillers(shapes); }; }; @@ -508,8 +508,8 @@ inline TensorShape CreateTensorShape( } // Helper function -inline vector GetDimsVector(const TensorShape& shape) { - vector dims; +inline vector GetDimsVector(const TensorShape& shape) { + vector dims; for (auto d : shape.dims()) { dims.push_back(d); } diff --git a/caffe2/core/qtensor.h b/caffe2/core/qtensor.h index e4f373ab3722..f277ffdbdd0a 100644 --- a/caffe2/core/qtensor.h +++ b/caffe2/core/qtensor.h @@ -212,8 +212,8 @@ class CAFFE2_EXPORT QTensor { /** * Return product of all dimensions starting from K. */ - inline TIndex size_from_dim(int k) const { - TIndex r = 1; + inline int64_t size_from_dim(int k) const { + int64_t r = 1; for (int i = k; i < dims_.size(); ++i) { r *= dims_[i]; } @@ -223,9 +223,9 @@ class CAFFE2_EXPORT QTensor { /** * Product of all dims up to. */ - inline TIndex size_to_dim(int k) const { + inline int64_t size_to_dim(int k) const { CAFFE_ENFORCE(k < dims_.size()); - TIndex r = 1; + int64_t r = 1; for (int i = 0; i < k; ++i) { r *= dims_[i]; } diff --git a/caffe2/core/tensor.cc b/caffe2/core/tensor.cc index 58b4c4b75e91..e142e1a6b6a9 100644 --- a/caffe2/core/tensor.cc +++ b/caffe2/core/tensor.cc @@ -77,7 +77,7 @@ void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) { int GetGPUIDForPointer(const void* ptr); -vector GetTensorInfo( +vector GetTensorInfo( const void* c, size_t* capacity, DeviceOption* device) { diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index 932cbc2587e2..286718d4268c 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -59,7 +59,7 @@ class CAFFE2_API Tensor final { * Note that the actual data allocation is not going to be carried out until * the first time mutable_data() is called. */ - explicit Tensor(const vector& dims, DeviceType type) + explicit Tensor(const vector& dims, DeviceType type) : Tensor(Storage(type)) { // TODO: here, we create a Storage // and immediately discard it in Resize() since @@ -96,7 +96,7 @@ class CAFFE2_API Tensor final { */ template Tensor( - const vector& dims, + const vector& dims, const vector& values, BaseContext* context) : Tensor(Storage(context->device_type(), TypeMeta::Make())) { @@ -115,7 +115,7 @@ class CAFFE2_API Tensor final { typename = typename std::enable_if::value>::type> Tensor(const T& value, BaseContext* context) : Tensor(Storage(context->device_type(), TypeMeta::Make())) { - Resize(std::vector{}); + Resize(std::vector{}); context->CopyItemsFromCPU( storage().dtype(), size(), &value, mutable_data()); } @@ -142,15 +142,15 @@ class CAFFE2_API Tensor final { impl_.get()->CopyFrom(*src.impl_.get(), context); } - void ExtendTo(TIndex num, float growthPct, BaseContext* context) const { + void ExtendTo(int64_t num, float growthPct, BaseContext* context) const { impl_.get()->ExtendTo(num, growthPct, context); } - void Extend(TIndex num, float growthPct, BaseContext* context) const { + void Extend(int64_t num, float growthPct, BaseContext* context) const { impl_.get()->Extend(num, growthPct, context); } - void ShrinkTo(TIndex outer_dim) const { + void ShrinkTo(int64_t outer_dim) const { impl_.get()->ShrinkTo(outer_dim); } @@ -168,7 +168,7 @@ class CAFFE2_API Tensor final { impl_.get()->ResizeLike(*src_tensor.impl_.get()); } - inline void Reshape(const vector& dims) const { + inline void Reshape(const vector& dims) const { impl_.get()->Reshape(dims); } @@ -250,7 +250,7 @@ class CAFFE2_API Tensor final { return impl_.get()->ndim(); } - inline TIndex size() const { + inline int64_t size() const { return impl_.get()->size(); } @@ -266,19 +266,19 @@ class CAFFE2_API Tensor final { return impl_.get()->capacity_nbytes(); } - inline const vector& dims() const { + inline const vector& dims() const { return impl_.get()->dims(); } - inline TIndex size_from_dim(int k) const { + inline int64_t size_from_dim(int k) const { return impl_.get()->size_from_dim(k); } - inline TIndex size_to_dim(int k) const { + inline int64_t size_to_dim(int k) const { return impl_.get()->size_to_dim(k); } - inline TIndex size_between_dim(int k, int l) const { + inline int64_t size_between_dim(int k, int l) const { return impl_.get()->size_between_dim(k, l); } @@ -311,7 +311,7 @@ class CAFFE2_API Tensor final { return impl_.get()->dim32(i); } - inline TIndex dim(const int i) const { + inline int64_t dim(const int i) const { return impl_.get()->dim(i); } @@ -337,7 +337,7 @@ TypeCall GetTypeCallFunction(TypeIdentifier id); void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c); // Shape call registry -typedef vector (*TensorInfoCall)( +typedef vector (*TensorInfoCall)( const void*, size_t* capacity, DeviceOption* device); @@ -377,7 +377,7 @@ void TensorPrinter::Print(const Tensor& tensor) { std::stringstream values_stream; // One most likely doesn't want to print int64-number of items for visual // inspection, so we cast down to int here. - int total_count = static_cast(std::min(tensor.size(), TIndex(limit_))); + int total_count = static_cast(std::min(tensor.size(), int64_t(limit_))); const T* tensor_data = tensor.template data(); for (int i = 0; i < total_count - 1; ++i) { values_stream << tensor_data[i] << ","; diff --git a/caffe2/core/tensor_impl.h b/caffe2/core/tensor_impl.h index 5a2d2c821d2b..20c398f7e4c8 100644 --- a/caffe2/core/tensor_impl.h +++ b/caffe2/core/tensor_impl.h @@ -26,17 +26,17 @@ namespace caffe2 { class DeviceOption; /** - * A utility function to convert vector to vector. + * A utility function to convert vector to vector. */ -inline std::vector ToVectorTIndex(const std::vector& src) { - return std::vector(src.begin(), src.end()); +inline std::vector ToVectorint64_t(const std::vector& src) { + return std::vector(src.begin(), src.end()); } /** * Return product of all dimensions starting from k */ -inline TIndex size_from_dim_(int k, const std::vector& dims) { - TIndex r = 1; +inline int64_t size_from_dim_(int k, const std::vector& dims) { + int64_t r = 1; for (size_t i = k; i < dims.size(); ++i) { r *= dims[i]; } @@ -44,9 +44,9 @@ inline TIndex size_from_dim_(int k, const std::vector& dims) { } // Product of all dims up to k (not including dims[k]) -inline TIndex size_to_dim_(int k, const std::vector& dims) { +inline int64_t size_to_dim_(int k, const std::vector& dims) { CAFFE_ENFORCE((unsigned)k <= dims.size()); - TIndex r = 1; + int64_t r = 1; for (int i = 0; i < k; ++i) { r *= dims[i]; } @@ -54,9 +54,9 @@ inline TIndex size_to_dim_(int k, const std::vector& dims) { } // Product of all dims between k and l (not including dims[k] and dims[l]) -inline TIndex size_between_dim_(int k, int l, const std::vector& dims) { +inline int64_t size_between_dim_(int k, int l, const std::vector& dims) { CAFFE_ENFORCE((unsigned)l < dims.size()); - TIndex r = 1; + int64_t r = 1; if (k < l) { for (int i = k + 1; i < l; ++i) { r *= dims[i]; @@ -191,7 +191,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * @brief Extend the outer-most dimension of this tensor * to dimension of `num`. */ - void ExtendTo(TIndex num, float growthPct, at::BaseContext* context) { + void ExtendTo(int64_t num, float growthPct, at::BaseContext* context) { CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1); CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0); CAFFE_ENFORCE(context != nullptr, "Context must be provided."); @@ -207,7 +207,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * growthPct. This ensures that Extend runs on an amortized O(1) time * complexity. */ - void Extend(TIndex num, float growthPct, at::BaseContext* context) { + void Extend(int64_t num, float growthPct, at::BaseContext* context) { CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1); CAFFE_ENFORCE_GE_WITH_CALLER( num, 0, "`num` must be non-negative for Extend"); @@ -223,8 +223,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { auto newNumel = std::accumulate( newDims.begin(), newDims.end(), - static_cast(1), - std::multiplies()); + static_cast(1), + std::multiplies()); if (newNumel * storage_.itemsize() <= storage_.capacity()) { dims_ = newDims; numel_ = newNumel; @@ -253,7 +253,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * This method guarantees that no re-allocations are carried out, which means * that the extra capacity after the end of the shurnk tensor is maintained. */ - void ShrinkTo(TIndex outer_dim) { + void ShrinkTo(int64_t outer_dim) { CAFFE_ENFORCE_WITH_CALLER( is_contiguous_, "Right now ShrinkTo is only supported on contiguous Tensor."); @@ -268,8 +268,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { numel_ = std::accumulate( dims_.begin(), dims_.end(), - static_cast(1), - std::multiplies()); + static_cast(1), + std::multiplies()); } /** @@ -292,8 +292,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { auto newNumel = std::accumulate( newCapacity.begin(), newCapacity.end(), - static_cast(1), - std::multiplies()); + static_cast(1), + std::multiplies()); if (newNumel * storage_.itemsize() <= storage_.capacity()) { return; } @@ -365,11 +365,11 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * Resizes the tensor without touching underlying storage. * This requires the total size of the tensor to remains constant. */ - inline void Reshape(const std::vector& dims) { + inline void Reshape(const std::vector& dims) { CAFFE_ENFORCE_WITH_CALLER( is_contiguous_, "Right now Reshape is only supported for contiguous Tensor."); - TIndex new_size = 1; + int64_t new_size = 1; for (auto d : dims) { CAFFE_ENFORCE_GE_WITH_CALLER(d, 0); new_size *= d; @@ -387,7 +387,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { } inline void Reshape(const std::vector& dims) { - Reshape(ToVectorTIndex(dims)); + Reshape(ToVectorint64_t(dims)); } /** @@ -674,7 +674,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { /** * Returns the size (i.e. the number of items) of the tensor. */ - inline TIndex size() const { + inline int64_t size() const { return numel_; } /** @@ -701,19 +701,19 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { /** * Returns the dimensions of the tensor as a vector. */ - inline const std::vector& dims() const { + inline const std::vector& dims() const { return dims_; } - inline TIndex size_from_dim(int k) const { + inline int64_t size_from_dim(int k) const { return size_from_dim_(k, dims_); } - inline TIndex size_to_dim(int k) const { + inline int64_t size_to_dim(int k) const { return size_to_dim_(k, dims_); } - inline TIndex size_between_dim(int k, int l) const { + inline int64_t size_between_dim(int k, int l) const { return size_between_dim_(k, l, dims_); } @@ -772,7 +772,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { /** * Returns the i-th dimension of the tensor in int. * - * This function returns an int value instead of TIndex, which depending on + * This function returns an int value instead of int64_t, which depending on * the typedef could be int64. If you want int64 dim values, make sure you * call dim() instead. */ @@ -790,7 +790,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * must be between 0 (inclusive) and the number of dimensions, otherwise * this function will produce a fatal message. */ - inline TIndex dim(const int i) const { + inline int64_t dim(const int i) const { #ifndef NDEBUG CAFFE_ENFORCE_LT_WITH_CALLER(i, dims_.size(), "Exceeding ndim limit"); CAFFE_ENFORCE_GE_WITH_CALLER(i, 0, "Cannot have negative dimension index"); @@ -818,9 +818,9 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { protected: // TODO: change to DimVector - std::vector dims_; // sizes_ + std::vector dims_; // sizes_ at::DimVector strides_; - TIndex numel_ = -1; // numel_ + int64_t numel_ = -1; // numel_ bool is_contiguous_ = true; // we decide to keep reserved_ and it will // live in Tensor after the split @@ -838,7 +838,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { bool SetDims(const std::vector& src) { auto old_numel = numel_; dims_.resize(src.size()); - TIndex new_numel = 1; + int64_t new_numel = 1; for (size_t i = 0; i < src.size(); ++i) { new_numel *= src[i]; dims_[i] = src[i]; @@ -859,7 +859,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { // TODO(jiayq): maybe rewrite the following functions with initializer list. // NVCC does not play well with initializer lists last time, but worth // another shot. - bool SetDims(const TIndex d0) { + bool SetDims(const int64_t d0) { auto old_numel = numel_; dims_.resize(1); dims_[0] = d0; @@ -868,7 +868,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { return numel_ != old_numel; } - bool SetDims(const TIndex d0, const TIndex d1) { + bool SetDims(const int64_t d0, const int64_t d1) { auto old_numel = numel_; dims_.resize(2); dims_[0] = d0; @@ -878,7 +878,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { return numel_ != old_numel; } - bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) { + bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) { auto old_numel = numel_; dims_.resize(3); dims_[0] = d0; @@ -890,7 +890,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { } bool - SetDims(const TIndex d0, const TIndex d1, const TIndex d2, const TIndex d3) { + SetDims(const int64_t d0, const int64_t d1, const int64_t d2, const int64_t d3) { auto old_numel = numel_; dims_.resize(4); dims_[0] = d0; diff --git a/caffe2/cuda_rtc/pool_op_rtc_gpu.cc b/caffe2/cuda_rtc/pool_op_rtc_gpu.cc index 4dc8d598044b..0362829eaf3f 100644 --- a/caffe2/cuda_rtc/pool_op_rtc_gpu.cc +++ b/caffe2/cuda_rtc/pool_op_rtc_gpu.cc @@ -232,7 +232,7 @@ class MaxPoolRTCOp final : public ConvPoolOpBase { private: MaxPoolRTCFunction func_; - vector input_dims_; + vector input_dims_; }; class MaxPoolGradientRTCOp final : public ConvPoolOpBase { @@ -285,7 +285,7 @@ class MaxPoolGradientRTCOp final : public ConvPoolOpBase { private: MaxPoolGradientRTCFunction func_; - vector input_dims_; + vector input_dims_; }; namespace { diff --git a/caffe2/experiments/operators/fully_connected_op_prune.h b/caffe2/experiments/operators/fully_connected_op_prune.h index 05d5bc10d5a4..c1995845f49b 100644 --- a/caffe2/experiments/operators/fully_connected_op_prune.h +++ b/caffe2/experiments/operators/fully_connected_op_prune.h @@ -29,8 +29,8 @@ namespace caffe2 { using Shape = std::array; template - const std::vector& shape(Shape vs) { - static thread_local std::vector cache; + const std::vector& shape(Shape vs) { + static thread_local std::vector cache; cache.resize(vs.size()); for (auto i = 0; i < vs.size(); ++i) { cache[i] = vs[i]; @@ -38,11 +38,11 @@ namespace caffe2 { return cache; } - inline const std::vector& shape(int i) { + inline const std::vector& shape(int i) { return shape<1>(Shape<1>({i})); } - inline const std::vector& shape(int i, int j) { + inline const std::vector& shape(int i, int j) { return shape<2>(Shape<2>({i, j})); } @@ -177,7 +177,7 @@ namespace caffe2 { Y->template mutable_data(), &context_); if (OutputSize() == 2){ auto* Comp_rate = Output(1); - Comp_rate->Resize(vector()); + Comp_rate->Resize(vector()); T* comp_data = Comp_rate->template mutable_data(); math::Sum( Mask.size(), Mask.template data(), comp_data, &context_); @@ -262,7 +262,7 @@ namespace caffe2 { 0, dW->template mutable_data(), &context_); - comp_r_buf_.Resize(vector()); + comp_r_buf_.Resize(vector()); T* comp_data = comp_r_buf_.template mutable_data(); math::Sum( Mask.size(), Mask.template data(), comp_data, &context_); diff --git a/caffe2/experiments/operators/fully_connected_op_sparse.h b/caffe2/experiments/operators/fully_connected_op_sparse.h index 6f19c1bacdc5..4c13e51dde8c 100644 --- a/caffe2/experiments/operators/fully_connected_op_sparse.h +++ b/caffe2/experiments/operators/fully_connected_op_sparse.h @@ -32,8 +32,8 @@ template using Shape = std::array; template -const std::vector& shape(Shape vs) { - static thread_local std::vector cache; +const std::vector& shape(Shape vs) { + static thread_local std::vector cache; cache.resize(vs.size()); for (auto i = 0; i < vs.size(); ++i) { cache[i] = vs[i]; @@ -41,11 +41,11 @@ const std::vector& shape(Shape vs) { return cache; } -inline const std::vector& shape(int i) { +inline const std::vector& shape(int i) { return shape<1>(Shape<1>({i})); } -inline const std::vector& shape(int i, int j) { +inline const std::vector& shape(int i, int j) { return shape<2>(Shape<2>({i, j})); } diff --git a/caffe2/experiments/operators/funhash_op.h b/caffe2/experiments/operators/funhash_op.h index 76bd37aee08e..98ffc83bb424 100644 --- a/caffe2/experiments/operators/funhash_op.h +++ b/caffe2/experiments/operators/funhash_op.h @@ -37,9 +37,9 @@ class FunHashOp : public Operator { FunHashOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), num_outputs_( - OperatorBase::GetSingleArgument("num_outputs", -1)), + OperatorBase::GetSingleArgument("num_outputs", -1)), num_segments_( - OperatorBase::GetSingleArgument("num_segments", -1)), + OperatorBase::GetSingleArgument("num_segments", -1)), seed_(OperatorBase::GetSingleArgument("seed", 0)) { CAFFE_ENFORCE( OperatorBase::HasArgument("num_outputs"), @@ -54,7 +54,7 @@ class FunHashOp : public Operator { const auto& seg = Input(2); const auto& weight = Input(3); - TIndex num_alpha = 1; + int64_t num_alpha = 1; if (adaptive_) { const auto& alpha = Input(4); num_alpha = alpha.dim(0); @@ -62,12 +62,12 @@ class FunHashOp : public Operator { const auto* seg_data = seg.template data(); - TIndex num_weight = weight.dim(0); - TIndex num_nz_ent = seg.dim(0); + int64_t num_weight = weight.dim(0); + int64_t num_nz_ent = seg.dim(0); - TIndex n_segments = num_segments_; + int64_t n_segments = num_segments_; if (num_segments_ == -1) { - for (TIndex i = 0; i < num_nz_ent; ++i) { + for (int64_t i = 0; i < num_nz_ent; ++i) { if (seg_data[i] > n_segments) { n_segments = seg_data[i]; } @@ -85,16 +85,16 @@ class FunHashOp : public Operator { const auto* weight_data = weight.template data(); const auto* alpha_data = adaptive_ ? Input(4).template data() : 0; const auto* val_data = val.template data(); - const auto* key_data = key.template data(); + const auto* key_data = key.template data(); - for (TIndex j = 0; j < num_nz_ent; ++j) { - TIndex cur_seg = seg_data[j]; - TIndex cur_key = key_data[j]; + for (int64_t j = 0; j < num_nz_ent; ++j) { + int64_t cur_seg = seg_data[j]; + int64_t cur_key = key_data[j]; T cur_val = val_data[j]; - TIndex output_stride = cur_seg * num_outputs_; - for (TIndex i = 0; i < num_outputs_; ++i) { + int64_t output_stride = cur_seg * num_outputs_; + for (int64_t i = 0; i < num_outputs_; ++i) { T sum = 0; - for (TIndex k = 0; k < num_alpha; ++k) { + for (int64_t k = 0; k < num_alpha; ++k) { uint64_t hash; // The hash function takes as input four integers: // 1. feature index @@ -108,7 +108,7 @@ class FunHashOp : public Operator { hash_data[3] = INDEX_MAGIC; hash = XXH64(hash_data.data(), hash_data.size(), seed_); - TIndex index = hash % num_weight; + int64_t index = hash % num_weight; T cur_weight = weight_data[index]; #ifdef USE_SIGN @@ -133,8 +133,8 @@ class FunHashOp : public Operator { } protected: - TIndex num_outputs_; - TIndex num_segments_; + int64_t num_outputs_; + int64_t num_segments_; uint64_t seed_; std::array hash_data; bool adaptive_; @@ -147,7 +147,7 @@ class FunHashGradientOp : public Operator { FunHashGradientOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), num_outputs_( - OperatorBase::GetSingleArgument("num_outputs", -1)), + OperatorBase::GetSingleArgument("num_outputs", -1)), seed_(OperatorBase::GetSingleArgument("seed", 0)) { adaptive_ = (InputSize() == 6); } @@ -159,7 +159,7 @@ class FunHashGradientOp : public Operator { const auto& seg = Input(3); const auto& weight = Input(4); - TIndex num_alpha = 1; + int64_t num_alpha = 1; T* grad_alpha_data = 0; if (adaptive_) { @@ -173,8 +173,8 @@ class FunHashGradientOp : public Operator { const auto* seg_data = seg.template data(); - TIndex num_weight = weight.dim(0); - TIndex num_nz_ent = seg.dim(0); + int64_t num_weight = weight.dim(0); + int64_t num_nz_ent = seg.dim(0); auto* grad_weight = Output(0); grad_weight->ResizeLike(weight); @@ -184,18 +184,18 @@ class FunHashGradientOp : public Operator { const auto* weight_data = weight.template data(); const auto* alpha_data = adaptive_ ? Input(5).template data() : 0; const auto* val_data = val.template data(); - const auto* key_data = key.template data(); + const auto* key_data = key.template data(); memset(grad_weight_data, 0, sizeof(T) * num_weight); - for (TIndex j = 0; j < num_nz_ent; ++j) { - TIndex cur_seg = seg_data[j]; - TIndex cur_key = key_data[j]; + for (int64_t j = 0; j < num_nz_ent; ++j) { + int64_t cur_seg = seg_data[j]; + int64_t cur_key = key_data[j]; T cur_val = val_data[j]; - TIndex grad_out_stride = cur_seg * num_outputs_; - for (TIndex i = 0; i < num_outputs_; ++i) { + int64_t grad_out_stride = cur_seg * num_outputs_; + for (int64_t i = 0; i < num_outputs_; ++i) { T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val; - for (TIndex k = 0; k < num_alpha; ++k) { + for (int64_t k = 0; k < num_alpha; ++k) { uint64_t hash; hash_data[0] = cur_key; hash_data[1] = i; @@ -203,7 +203,7 @@ class FunHashGradientOp : public Operator { hash_data[3] = INDEX_MAGIC; hash = XXH64(hash_data.data(), hash_data.size(), seed_); - TIndex index = hash % num_weight; + int64_t index = hash % num_weight; T cur_grad_out_scale = grad_out_scale; #ifdef USE_SIGN @@ -227,7 +227,7 @@ class FunHashGradientOp : public Operator { } protected: - TIndex num_outputs_; + int64_t num_outputs_; uint64_t seed_; std::array hash_data; bool adaptive_; diff --git a/caffe2/experiments/operators/sparse_funhash_op.h b/caffe2/experiments/operators/sparse_funhash_op.h index 04c2441f297b..d4febbc8fa6d 100644 --- a/caffe2/experiments/operators/sparse_funhash_op.h +++ b/caffe2/experiments/operators/sparse_funhash_op.h @@ -36,9 +36,9 @@ class SparseFunHashOp : public Operator { SparseFunHashOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), num_outputs_( - OperatorBase::GetSingleArgument("num_outputs", -1)), + OperatorBase::GetSingleArgument("num_outputs", -1)), num_segments_( - OperatorBase::GetSingleArgument("num_segments", -1)), + OperatorBase::GetSingleArgument("num_segments", -1)), seed_(OperatorBase::GetSingleArgument("seed", 0)) { CAFFE_ENFORCE( OperatorBase::HasArgument("num_outputs"), @@ -53,7 +53,7 @@ class SparseFunHashOp : public Operator { const auto& seg = Input(2); const auto& weight = Input(3); - TIndex num_alpha = 1; + int64_t num_alpha = 1; if (adaptive_) { const auto& alpha = Input(4); num_alpha = alpha.dim(0); @@ -61,12 +61,12 @@ class SparseFunHashOp : public Operator { const auto* seg_data = seg.template data(); - TIndex num_weight = weight.dim(0); - TIndex num_nz_ent = seg.dim(0); + int64_t num_weight = weight.dim(0); + int64_t num_nz_ent = seg.dim(0); - TIndex n_segments = num_segments_; + int64_t n_segments = num_segments_; if (num_segments_ == -1) { - for (TIndex i = 0; i < num_nz_ent; ++i) { + for (int64_t i = 0; i < num_nz_ent; ++i) { if (seg_data[i] > n_segments) { n_segments = seg_data[i]; } @@ -84,16 +84,16 @@ class SparseFunHashOp : public Operator { const auto* weight_data = weight.template data(); const auto* alpha_data = adaptive_ ? Input(4).template data() : 0; const auto* val_data = val.template data(); - const auto* key_data = key.template data(); + const auto* key_data = key.template data(); - for (TIndex j = 0; j < num_nz_ent; ++j) { - TIndex cur_seg = seg_data[j]; - TIndex cur_key = key_data[j]; + for (int64_t j = 0; j < num_nz_ent; ++j) { + int64_t cur_seg = seg_data[j]; + int64_t cur_key = key_data[j]; T cur_val = val_data[j]; - TIndex output_stride = cur_seg * num_outputs_; - for (TIndex i = 0; i < num_outputs_; ++i) { + int64_t output_stride = cur_seg * num_outputs_; + for (int64_t i = 0; i < num_outputs_; ++i) { T sum = 0; - for (TIndex k = 0; k < num_alpha; ++k) { + for (int64_t k = 0; k < num_alpha; ++k) { // The hash function takes as input three integers: // 1. feature index // 2. output index @@ -108,13 +108,13 @@ class SparseFunHashOp : public Operator { #ifdef USE_SIGN // Use the least significant bit for sign, the rest for weights. - TIndex index = (hash >> 1) % num_weight; + int64_t index = (hash >> 1) % num_weight; T cur_weight = weight_data[index]; if (hash & 1) { cur_weight = -cur_weight; } #else - TIndex index = hash % num_weight; + int64_t index = hash % num_weight; T cur_weight = weight_data[index]; #endif @@ -132,8 +132,8 @@ class SparseFunHashOp : public Operator { } protected: - TIndex num_outputs_; - TIndex num_segments_; + int64_t num_outputs_; + int64_t num_segments_; uint64_t seed_; std::array hash_data; bool adaptive_; @@ -146,7 +146,7 @@ class SparseFunHashGradientOp : public Operator { SparseFunHashGradientOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), num_outputs_( - OperatorBase::GetSingleArgument("num_outputs", -1)), + OperatorBase::GetSingleArgument("num_outputs", -1)), seed_(OperatorBase::GetSingleArgument("seed", 0)) { adaptive_ = (InputSize() == 6); } @@ -158,7 +158,7 @@ class SparseFunHashGradientOp : public Operator { const auto& seg = Input(3); const auto& weight = Input(4); - TIndex num_alpha = 1; + int64_t num_alpha = 1; T* grad_alpha_data = 0; if (adaptive_) { @@ -172,10 +172,10 @@ class SparseFunHashGradientOp : public Operator { const auto* seg_data = seg.template data(); - TIndex num_weight = weight.dim(0); - TIndex num_nz_ent = seg.dim(0); + int64_t num_weight = weight.dim(0); + int64_t num_nz_ent = seg.dim(0); - TIndex grad_weight_size = num_nz_ent * num_outputs_ * num_alpha; + int64_t grad_weight_size = num_nz_ent * num_outputs_ * num_alpha; auto* grad_weight_val = Output(0); grad_weight_val->Resize(grad_weight_size); T* grad_weight_val_data = grad_weight_val->template mutable_data(); @@ -183,23 +183,23 @@ class SparseFunHashGradientOp : public Operator { auto* grad_weight_ind = Output(1); grad_weight_ind->Resize(grad_weight_size); auto* grad_weight_ind_data = - grad_weight_ind->template mutable_data(); + grad_weight_ind->template mutable_data(); const auto* grad_out_data = grad_out.template data(); const auto* weight_data = weight.template data(); const auto* alpha_data = adaptive_ ? Input(5).template data() : 0; const auto* val_data = val.template data(); - const auto* key_data = key.template data(); + const auto* key_data = key.template data(); - TIndex w_ind = 0; - for (TIndex j = 0; j < num_nz_ent; ++j) { - TIndex cur_seg = seg_data[j]; - TIndex cur_key = key_data[j]; + int64_t w_ind = 0; + for (int64_t j = 0; j < num_nz_ent; ++j) { + int64_t cur_seg = seg_data[j]; + int64_t cur_key = key_data[j]; T cur_val = val_data[j]; - TIndex grad_out_stride = cur_seg * num_outputs_; - for (TIndex i = 0; i < num_outputs_; ++i) { + int64_t grad_out_stride = cur_seg * num_outputs_; + for (int64_t i = 0; i < num_outputs_; ++i) { T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val; - for (TIndex k = 0; k < num_alpha; ++k) { + for (int64_t k = 0; k < num_alpha; ++k) { hash_data[0] = cur_key; hash_data[1] = i; hash_data[2] = k; @@ -209,12 +209,12 @@ class SparseFunHashGradientOp : public Operator { T cur_grad_out_scale = grad_out_scale; #ifdef USE_SIGN - TIndex index = (hash >> 1) % num_weight; + int64_t index = (hash >> 1) % num_weight; if (hash & 1) { cur_grad_out_scale = -cur_grad_out_scale; } #else - TIndex index = hash % num_weight; + int64_t index = hash % num_weight; #endif if (adaptive_) { @@ -232,7 +232,7 @@ class SparseFunHashGradientOp : public Operator { } protected: - TIndex num_outputs_; + int64_t num_outputs_; uint64_t seed_; std::array hash_data; bool adaptive_; diff --git a/caffe2/experiments/operators/sparse_matrix_reshape_op.h b/caffe2/experiments/operators/sparse_matrix_reshape_op.h index b2026a866ff8..f35f0cbbe6df 100644 --- a/caffe2/experiments/operators/sparse_matrix_reshape_op.h +++ b/caffe2/experiments/operators/sparse_matrix_reshape_op.h @@ -36,10 +36,10 @@ class SparseMatrixReshapeOp : public Operator { OperatorBase::HasArgument("new_shape"), "Argument `new_shape` is missing."); - vector old_shape = - OperatorBase::GetRepeatedArgument("old_shape"); - vector new_shape = - OperatorBase::GetRepeatedArgument("new_shape"); + vector old_shape = + OperatorBase::GetRepeatedArgument("old_shape"); + vector new_shape = + OperatorBase::GetRepeatedArgument("new_shape"); CAFFE_ENFORCE( old_shape.size() == 2, @@ -63,7 +63,7 @@ class SparseMatrixReshapeOp : public Operator { old_shape[0] > 0, "The first dimension in `old_shape` must be positive."); - TIndex matrix_size = old_shape[0] * old_shape[1]; + int64_t matrix_size = old_shape[0] * old_shape[1]; if (new_shape[0] == -1) { CAFFE_ENFORCE( @@ -106,14 +106,14 @@ class SparseMatrixReshapeOp : public Operator { new_col->Resize(nnz); new_row->Resize(nnz); - const auto* old_col_data = old_col.template data(); + const auto* old_col_data = old_col.template data(); const auto* old_row_data = old_row.template data(); - auto* new_col_data = new_col->template mutable_data(); + auto* new_col_data = new_col->template mutable_data(); auto* new_row_data = new_row->template mutable_data(); for (int i = 0; i < nnz; ++i) { - TIndex offset = old_row_data[i] * old_stride_ + old_col_data[i]; + int64_t offset = old_row_data[i] * old_stride_ + old_col_data[i]; new_row_data[i] = offset / new_stride_; new_col_data[i] = offset % new_stride_; } @@ -122,8 +122,8 @@ class SparseMatrixReshapeOp : public Operator { } private: - TIndex old_stride_; - TIndex new_stride_; + int64_t old_stride_; + int64_t new_stride_; }; } // namespace caffe2 diff --git a/caffe2/experiments/operators/tt_contraction_op.h b/caffe2/experiments/operators/tt_contraction_op.h index 11ef35bd235a..7f42d1f68d0b 100644 --- a/caffe2/experiments/operators/tt_contraction_op.h +++ b/caffe2/experiments/operators/tt_contraction_op.h @@ -29,9 +29,9 @@ class TTContractionOp final : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; TTContractionOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - K_(OperatorBase::GetSingleArgument("K", 0)), - M_(OperatorBase::GetSingleArgument("M", 0)), - N_(OperatorBase::GetSingleArgument("N", 0)) { + K_(OperatorBase::GetSingleArgument("K", 0)), + M_(OperatorBase::GetSingleArgument("M", 0)), + N_(OperatorBase::GetSingleArgument("N", 0)) { CAFFE_ENFORCE(OperatorBase::HasArgument("K"), "Argument `K` is missing."); CAFFE_ENFORCE(OperatorBase::HasArgument("M"), "Argument `M` is missing."); CAFFE_ENFORCE(OperatorBase::HasArgument("N"), "Argument `N` is missing."); @@ -44,8 +44,8 @@ class TTContractionOp final : public Operator { CAFFE_ENFORCE(A.ndim() == 2, A.ndim()); - TIndex A_size = A.size_from_dim(0); - TIndex B_size = B.size_from_dim(0); + int64_t A_size = A.size_from_dim(0); + int64_t B_size = B.size_from_dim(0); CAFFE_ENFORCE( K_ * M_ == A_size, @@ -55,19 +55,19 @@ class TTContractionOp final : public Operator { B_size % (K_ * N_) == 0, "Argument `K` and `N` do not agree with the size of B."); - TIndex D_ = B_size / (K_ * N_); + int64_t D_ = B_size / (K_ * N_); - TIndex C_size = D_ * M_ * N_; - C->Resize(vector{C_size}); + int64_t C_size = D_ * M_ * N_; + C->Resize(vector{C_size}); - TIndex B_stride = K_ * N_; - TIndex C_stride = M_ * N_; + int64_t B_stride = K_ * N_; + int64_t C_stride = M_ * N_; const T* A_data = A.template data(); const T* B_data = B.template data(); T* C_data = C->template mutable_data(); - for (TIndex B_index = 0; B_index < B_size; B_index += B_stride) { + for (int64_t B_index = 0; B_index < B_size; B_index += B_stride) { math::Gemm( CblasTrans, CblasNoTrans, @@ -84,9 +84,9 @@ class TTContractionOp final : public Operator { } protected: - TIndex K_; - TIndex M_; - TIndex N_; + int64_t K_; + int64_t M_; + int64_t N_; }; template @@ -95,9 +95,9 @@ class TTContractionGradientOp final : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; TTContractionGradientOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - K_(OperatorBase::GetSingleArgument("K", 0)), - M_(OperatorBase::GetSingleArgument("M", 0)), - N_(OperatorBase::GetSingleArgument("N", 0)) {} + K_(OperatorBase::GetSingleArgument("K", 0)), + M_(OperatorBase::GetSingleArgument("M", 0)), + N_(OperatorBase::GetSingleArgument("N", 0)) {} bool RunOnDevice() override { const auto& G = Input(0); @@ -106,16 +106,16 @@ class TTContractionGradientOp final : public Operator { auto* dA = Output(0); auto* dB = Output(1); - TIndex G_size = G.size_from_dim(0); - TIndex D_ = G_size / (M_ * N_); + int64_t G_size = G.size_from_dim(0); + int64_t D_ = G_size / (M_ * N_); - TIndex dB_size = D_ * K_ * N_; + int64_t dB_size = D_ * K_ * N_; dA->Resize(A.dims()); dB->Resize(B.dims()); - TIndex B_stride = K_ * N_; - TIndex G_stride = M_ * N_; + int64_t B_stride = K_ * N_; + int64_t G_stride = M_ * N_; const T* G_data = G.template data(); const T* A_data = A.template data(); @@ -125,7 +125,7 @@ class TTContractionGradientOp final : public Operator { T* dB_data = dB->template mutable_data(); const T* G_ptr = G_data; - for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) { + for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) { math::Gemm( CblasNoTrans, CblasTrans, @@ -139,7 +139,7 @@ class TTContractionGradientOp final : public Operator { } G_ptr = G_data; - for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) { + for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) { math::Gemm( CblasNoTrans, CblasNoTrans, @@ -156,9 +156,9 @@ class TTContractionGradientOp final : public Operator { } protected: - TIndex K_; - TIndex M_; - TIndex N_; + int64_t K_; + int64_t M_; + int64_t N_; }; } // namespace caffe2 diff --git a/caffe2/experiments/operators/tt_pad_op.h b/caffe2/experiments/operators/tt_pad_op.h index 83d197782790..c78df9c5f29f 100644 --- a/caffe2/experiments/operators/tt_pad_op.h +++ b/caffe2/experiments/operators/tt_pad_op.h @@ -29,7 +29,7 @@ class TTPadOp final : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; TTPadOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - scale_(OperatorBase::GetSingleArgument("scale", 0)) { + scale_(OperatorBase::GetSingleArgument("scale", 0)) { CAFFE_ENFORCE( OperatorBase::HasArgument("scale"), "Argument `scale` is missing."); } @@ -46,16 +46,16 @@ class TTPadOp final : public Operator { auto* X_orig_dim0 = Output(1); X_orig_dim0->Resize(1); - *X_orig_dim0->template mutable_data() = X_dim0; + *X_orig_dim0->template mutable_data() = X_dim0; if (X_dim0 % scale_ != 0) { - TIndex padded_dim0 = (X_dim0 / scale_ + 1) * scale_; + int64_t padded_dim0 = (X_dim0 / scale_ + 1) * scale_; auto dim0_diff = padded_dim0 - X_dim0; // set growthPct to the upper bound percentage: (100 * scale_ / X_dim0) X_pad->Extend(dim0_diff, 100 * scale_ / X_dim0, &context_); auto* X_pad_data = X_pad->template mutable_data(); - TIndex X_size = X_dim0 * X_dim1; + int64_t X_size = X_dim0 * X_dim1; memset(X_pad_data + X_size, 0, dim0_diff * X_dim1 * sizeof(T)); } @@ -63,7 +63,7 @@ class TTPadOp final : public Operator { } protected: - TIndex scale_; + int64_t scale_; }; template @@ -78,7 +78,7 @@ class TTPadGradientOp final : public Operator { auto* output = Output(0); CAFFE_ENFORCE(&G == output); - auto old_dim0 = *Input(1).template data(); + auto old_dim0 = *Input(1).template data(); auto new_dim0 = G.dim(0); auto dim1 = G.dim(1); diff --git a/caffe2/ideep/operators/concat_split_op.cc b/caffe2/ideep/operators/concat_split_op.cc index 311c6446184a..8d011cd3be8b 100644 --- a/caffe2/ideep/operators/concat_split_op.cc +++ b/caffe2/ideep/operators/concat_split_op.cc @@ -43,7 +43,7 @@ class IDEEPConcatOp final : public IDEEPOperator { } auto axis_vdata = ideep::concat::compute(inputs, axis_, add_axis_, *output); - axis_info->Resize(vector(1, InputSize())); + axis_info->Resize(vector(1, InputSize())); int* axis_data = axis_info->template mutable_data(); for (int i = 0; i < axis_vdata.size(); i++) { axis_data[i] = axis_vdata[i]; diff --git a/caffe2/ideep/operators/conv_pool_base_op.h b/caffe2/ideep/operators/conv_pool_base_op.h index 03da0792acb3..5f026efde9a3 100644 --- a/caffe2/ideep/operators/conv_pool_base_op.h +++ b/caffe2/ideep/operators/conv_pool_base_op.h @@ -39,7 +39,7 @@ class IDEEPConvPoolOpBase : public ConvPoolOpBase { ideep::tensor::dims output_dims; auto input_dims = input.get_dims(); - vector input_Tdims (input_dims.begin(), input_dims.end()); + vector input_Tdims (input_dims.begin(), input_dims.end()); InferOutputSize( input_Tdims, output_channel, diff --git a/caffe2/ideep/operators/squeeze_op.cc b/caffe2/ideep/operators/squeeze_op.cc index fe78e30a7d33..4cf73fe9c70d 100644 --- a/caffe2/ideep/operators/squeeze_op.cc +++ b/caffe2/ideep/operators/squeeze_op.cc @@ -35,7 +35,7 @@ class IDEEPSqueezeOp final : public IDEEPOperator { (dims_.back() + 1), " dimensions."); const auto& ideep_dims = X.get_dims(); - vector dims(ideep_dims.begin(), ideep_dims.end()); + vector dims(ideep_dims.begin(), ideep_dims.end()); const auto& new_dims = SqueezeOp::ComputeDims(dims, dims_); itensor::dims new_dims_ideep(new_dims.begin(), new_dims.end()); if (&X != Y) { diff --git a/caffe2/image/image_input_op.h b/caffe2/image/image_input_op.h index 8e9a81190017..2ce313758589 100644 --- a/caffe2/image/image_input_op.h +++ b/caffe2/image/image_input_op.h @@ -372,14 +372,14 @@ ImageInputOp::ImageInputOp( randgen_per_thread_.emplace_back(meta_randgen()); } prefetched_image_.Resize( - TIndex(batch_size_), - TIndex(crop_), - TIndex(crop_), - TIndex(color_ ? 3 : 1)); + int64_t(batch_size_), + int64_t(crop_), + int64_t(crop_), + int64_t(color_ ? 3 : 1)); if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) { - prefetched_label_.Resize(TIndex(batch_size_), TIndex(num_labels_)); + prefetched_label_.Resize(int64_t(batch_size_), int64_t(num_labels_)); } else { - prefetched_label_.Resize(vector(1, batch_size_)); + prefetched_label_.Resize(vector(1, batch_size_)); } for (int i = 0; i < additional_output_sizes.size(); ++i) { @@ -387,7 +387,7 @@ ImageInputOp::ImageInputOp( Context::GetDeviceType()); prefetched_additional_outputs_.emplace_back(CPU); prefetched_additional_outputs_[i].Resize( - TIndex(batch_size_), TIndex(additional_output_sizes[i])); + int64_t(batch_size_), int64_t(additional_output_sizes[i])); } } diff --git a/caffe2/mkl/mkl_utils_test.cc b/caffe2/mkl/mkl_utils_test.cc index a16224bbe270..622bbca6613c 100644 --- a/caffe2/mkl/mkl_utils_test.cc +++ b/caffe2/mkl/mkl_utils_test.cc @@ -23,10 +23,10 @@ TEST(MKLDNNTest, SimpleConvolutionTest) { int pads[2] = {0, 0}; // Creating Input and output tensors - Tensor X(vector{16, 8, 32, 32}, CPU); - Tensor W(vector{64, 8, 3, 3}, CPU); - Tensor b(vector{64}, CPU); - Tensor Y(vector{16, 64, 30, 30}, CPU); + Tensor X(vector{16, 8, 32, 32}, CPU); + Tensor W(vector{64, 8, 3, 3}, CPU); + Tensor b(vector{64}, CPU); + Tensor Y(vector{16, 64, 30, 30}, CPU); float* data = X.mutable_data(); for (int i = 0; i < X.size(); ++i) { @@ -91,7 +91,7 @@ TEST(MKLDNNTest, MKLMemoryCopyTest) { // the buffer size being empty for both - former in dnnAllocateBuffer and // the latter in dnnConversionExecute (likely due to some difference in // layout?). Test both cases. - vector> dims_list{{10, 3, 20, 20}, {0}, {0, 10}}; + vector> dims_list{{10, 3, 20, 20}, {0}, {0, 10}}; for (const auto& dims : dims_list) { auto X_cpu_in = caffe2::make_unique(dims, CPU); CPUContext ctx; diff --git a/caffe2/mkl/mklmemory_serialization.cc b/caffe2/mkl/mklmemory_serialization.cc index e59a9a15f422..a613623aa2f9 100644 --- a/caffe2/mkl/mklmemory_serialization.cc +++ b/caffe2/mkl/mklmemory_serialization.cc @@ -84,8 +84,8 @@ class MKLMemoryDeserializer : public BlobDeserializerBase { "MKLMemory only supports either float or double formats."); CAFFE_ENFORCE( !proto.has_segment(), "MKLMemory does not support segment right now."); - vector dims; - for (const TIndex d : proto.dims()) { + vector dims; + for (const int64_t d : proto.dims()) { dims.push_back(d); } // TODO: right now, every time we do a deserializer we create a new MKL diff --git a/caffe2/mkl/operators/concat_op.cc b/caffe2/mkl/operators/concat_op.cc index 204f1c1cda93..b8a1c18950a3 100644 --- a/caffe2/mkl/operators/concat_op.cc +++ b/caffe2/mkl/operators/concat_op.cc @@ -96,7 +96,7 @@ class MKLConcatOp final : public MKLOperator { private: int axis_; - vector cached_output_dims_; + vector cached_output_dims_; }; } // namespace mkl diff --git a/caffe2/mkl/operators/conv_op.cc b/caffe2/mkl/operators/conv_op.cc index 2678f4c37e17..87c8522f1a5a 100644 --- a/caffe2/mkl/operators/conv_op.cc +++ b/caffe2/mkl/operators/conv_op.cc @@ -37,7 +37,7 @@ class MKLConvOp final : public ConvPoolOpBase { math::Set( M, 0.0, cpu_zero_bias.template mutable_data(), &ctx); - zero_bias_.reset(new MKLMemory(std::vector{M})); + zero_bias_.reset(new MKLMemory(std::vector{M})); zero_bias_->CopyFrom(cpu_zero_bias); } const auto& bias = InputSize() == 2 @@ -130,11 +130,11 @@ class MKLConvOp final : public ConvPoolOpBase { if (group_ > 1) { // Explicitly reformat the buffer. MKLMemory group_filter( - std::vector{TIndex(group_), - TIndex(filter.dim32(0) / group_), - TIndex(filter.dim32(1)), - TIndex(filter.dim32(2)), - TIndex(filter.dim32(3))}, + std::vector{int64_t(group_), + int64_t(filter.dim32(0) / group_), + int64_t(filter.dim32(1)), + int64_t(filter.dim32(2)), + int64_t(filter.dim32(3))}, nullptr, dnnResourceFilter, /*share_memory_if_possible=*/true); @@ -168,8 +168,8 @@ class MKLConvOp final : public ConvPoolOpBase { // Input: X, W, b // Output: Y std::unique_ptr> zero_bias_; - vector cached_input_dims_; - vector cached_filter_dims_; + vector cached_input_dims_; + vector cached_filter_dims_; PrimitiveWrapper primitive_; LayoutWrapper input_layout_; LayoutWrapper filter_layout_; diff --git a/caffe2/mkl/operators/conv_op_mkldnn.cc b/caffe2/mkl/operators/conv_op_mkldnn.cc index 80edf1332d06..cb0fe8eae06d 100644 --- a/caffe2/mkl/operators/conv_op_mkldnn.cc +++ b/caffe2/mkl/operators/conv_op_mkldnn.cc @@ -106,8 +106,8 @@ class ConvMKLDNNOp final : public ConvPoolOpBase { private: // Input: X, W, b // Output: Y - vector cached_input_dims_; - vector cached_filter_dims_; + vector cached_input_dims_; + vector cached_filter_dims_; PrimitiveWrapper primitive_; unique_ptr> X_wrapper_ = nullptr; unique_ptr> filter_wrapper_ = nullptr; diff --git a/caffe2/mkl/operators/elementwise_sum_op.cc b/caffe2/mkl/operators/elementwise_sum_op.cc index 7827e874716e..bcd095df9716 100644 --- a/caffe2/mkl/operators/elementwise_sum_op.cc +++ b/caffe2/mkl/operators/elementwise_sum_op.cc @@ -64,7 +64,7 @@ class MKLSumOp final : public MKLOperator { private: std::vector coefficients_; - vector cached_input_dims_; + vector cached_input_dims_; vector> input_views_; }; diff --git a/caffe2/mkl/operators/fully_connected_op.cc b/caffe2/mkl/operators/fully_connected_op.cc index 404a67a6b7ac..5d21823f8646 100644 --- a/caffe2/mkl/operators/fully_connected_op.cc +++ b/caffe2/mkl/operators/fully_connected_op.cc @@ -90,8 +90,8 @@ class MKLFullyConnectedOp final : public MKLOperator { // Input: X, W, b // Output: Y size_t axis_{1}; - vector cached_input_dims_; - vector cached_filter_dims_; + vector cached_input_dims_; + vector cached_filter_dims_; PrimitiveWrapper primitive_; LayoutWrapper input_layout_; LayoutWrapper filter_layout_; diff --git a/caffe2/mkl/operators/local_response_normalization_op.cc b/caffe2/mkl/operators/local_response_normalization_op.cc index f57b4b48b7e7..a57398933f62 100644 --- a/caffe2/mkl/operators/local_response_normalization_op.cc +++ b/caffe2/mkl/operators/local_response_normalization_op.cc @@ -19,7 +19,7 @@ class MKLLRNOp final : public LRNOpBase { bool RunOnDeviceWithOrderNHWC() override; private: - vector cached_input_dims_; + vector cached_input_dims_; LayoutWrapper workspace_layout_; std::unique_ptr> workspace_buffer_; PrimitiveWrapper primitive_; diff --git a/caffe2/mkl/operators/packed_fc_op.cc b/caffe2/mkl/operators/packed_fc_op.cc index 0ed93cf06107..5e7b0931c390 100644 --- a/caffe2/mkl/operators/packed_fc_op.cc +++ b/caffe2/mkl/operators/packed_fc_op.cc @@ -141,7 +141,7 @@ class PackedFCOp final : public Operator { } size_t axis_{1}; uint32_t hash_{0}; - vector Y_shape_cache_; + vector Y_shape_cache_; Tensor bias_multiplier_{CPU}; std::unique_ptr local_packed_matrix_; }; diff --git a/caffe2/mkl/operators/pool_op.cc b/caffe2/mkl/operators/pool_op.cc index 284e7f80b8c3..281c9db22a2c 100644 --- a/caffe2/mkl/operators/pool_op.cc +++ b/caffe2/mkl/operators/pool_op.cc @@ -41,8 +41,8 @@ class MKLPoolOp final : public ConvPoolOpBase { // Input: X // Output: Y private: - vector cached_input_dims_; - // vector cached_avgpool_input_dims_; + vector cached_input_dims_; + // vector cached_avgpool_input_dims_; LayoutWrapper workspace_layout_; std::unique_ptr> workspace_buffer_; PrimitiveWrapper primitive_; diff --git a/caffe2/mkl/operators/relu_op.cc b/caffe2/mkl/operators/relu_op.cc index ef734eda72f8..98443d42f2e2 100644 --- a/caffe2/mkl/operators/relu_op.cc +++ b/caffe2/mkl/operators/relu_op.cc @@ -43,7 +43,7 @@ class MKLReluOp : public MKLOperator { } private: - vector cached_input_dims_; + vector cached_input_dims_; }; template diff --git a/caffe2/mkl/operators/spatial_batch_norm_op.cc b/caffe2/mkl/operators/spatial_batch_norm_op.cc index 7d9856f1adfd..13f83dcff7e4 100644 --- a/caffe2/mkl/operators/spatial_batch_norm_op.cc +++ b/caffe2/mkl/operators/spatial_batch_norm_op.cc @@ -146,7 +146,7 @@ class MKLBNOp final : public Operator { const StorageOrder order_; const int num_batches_; - vector cached_input_dims_; + vector cached_input_dims_; LayoutWrapper scale_bias_layout_; LayoutWrapper saved_mean_layout_; LayoutWrapper saved_var_layout_; diff --git a/caffe2/mkl/operators/squeeze_op.cc b/caffe2/mkl/operators/squeeze_op.cc index fb71be56f774..c89258773520 100644 --- a/caffe2/mkl/operators/squeeze_op.cc +++ b/caffe2/mkl/operators/squeeze_op.cc @@ -57,7 +57,7 @@ class MKLSqueezeOp final : public MKLOperator { private: vector dims_; - vector cached_input_dims_; + vector cached_input_dims_; }; } // namespace mkl diff --git a/caffe2/mkl/utils/mkl_memory.cc b/caffe2/mkl/utils/mkl_memory.cc index 26e423220e3f..3f05f9c5d24b 100644 --- a/caffe2/mkl/utils/mkl_memory.cc +++ b/caffe2/mkl/utils/mkl_memory.cc @@ -19,7 +19,7 @@ CAFFE_KNOWN_TYPE(mkl::MKLMemory); CAFFE_KNOWN_TYPE(mkl::MKLMemory); template -static vector GetMKLTensorInfo( +static vector GetMKLTensorInfo( const void* c, size_t* capacity, DeviceOption* device) { diff --git a/caffe2/mkl/utils/mkl_memory.h b/caffe2/mkl/utils/mkl_memory.h index 9d9e91a565eb..bd0ad4042207 100644 --- a/caffe2/mkl/utils/mkl_memory.h +++ b/caffe2/mkl/utils/mkl_memory.h @@ -5,8 +5,8 @@ #include #include -#include "caffe2/core/flags.h" // for TIndex -#include "caffe2/core/tensor.h" // for TIndex +#include "caffe2/core/flags.h" // for int64_t +#include "caffe2/core/tensor.h" // for int64_t #include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h" // A global boolean variable that controls the behavior when we call View() on @@ -270,7 +270,7 @@ class MKLMemory { "Reshape is not allowed for custom layouts. " "Convert to plain layout before invoking Reshape()."); - TIndex new_size = 1; + int64_t new_size = 1; for (auto i = 0; i < dims.size(); ++i) { CAFFE_ENFORCE_GE_WITH_CALLER(dims[i], 0); new_size *= dims[i]; @@ -279,7 +279,7 @@ class MKLMemory { new_size == size_, "New size and old size are not equal. Reshape is not possible."); - vector new_dims(dims.size()); + vector new_dims(dims.size()); vector size(dims.size()); vector strides(dims.size()); for (int i = 0; i < dims.size(); ++i) { @@ -456,7 +456,7 @@ class MKLMemory { return buffer_.get(); } - inline const vector& dims() const { + inline const vector& dims() const { return dims_; } @@ -470,7 +470,7 @@ class MKLMemory { /** * Returns the size (i.e., the number of items) in the buffer. */ - inline TIndex size() const { + inline int64_t size() const { return size_; } @@ -479,7 +479,7 @@ class MKLMemory { * must be between 0 (inclusive) and the number of dimensions, otherwise * this function will produce a fatal message. */ - inline TIndex dim(const int i) const { + inline int64_t dim(const int i) const { return dims_.at(i); } @@ -545,9 +545,9 @@ class MKLMemory { mutable std::mutex buffer_lock_; // The dimensions in the same order as Caffe2 does. This is used to // interface with C2. - vector dims_; + vector dims_; // Number of items in the buffer. - TIndex size_ = -1; + int64_t size_ = -1; // The user dnn layout. LayoutWrapper user_layout_; // The internal dnn layout. diff --git a/caffe2/mkl/utils/mkl_operator.h b/caffe2/mkl/utils/mkl_operator.h index 1b91788c89c0..2236e9267af5 100644 --- a/caffe2/mkl/utils/mkl_operator.h +++ b/caffe2/mkl/utils/mkl_operator.h @@ -97,7 +97,7 @@ class MKLOperator : public OperatorBase { // The primitive used in the operator. PrimitiveWrapper primitive_; // Size cache for all the input sizes. - vector> input_size_cache_; + vector> input_size_cache_; // An internal MKLMemory buffer. This is usually handy when we have a // single output from the operator. If your operator has multiple outputs // then you should allocate your own buffer. diff --git a/caffe2/mobile/contrib/arm-compute/core/context.h b/caffe2/mobile/contrib/arm-compute/core/context.h index 4085e4983cc8..5ec668cb6d49 100644 --- a/caffe2/mobile/contrib/arm-compute/core/context.h +++ b/caffe2/mobile/contrib/arm-compute/core/context.h @@ -249,7 +249,7 @@ public: const int32_t ndim() const { return dims_.size(); } - vector dims() const { return dims_; } + vector dims() const { return dims_; } const int32_t dim32(const int index) const { return dims_.at(index); } @@ -283,7 +283,7 @@ private: bool SetDims(const vector &src) { auto old_size = size_; dims_.resize(src.size()); - TIndex new_size = 1; + int64_t new_size = 1; for (unsigned int i = 0; i < src.size(); ++i) { new_size *= src[i]; dims_[i] = src[i]; @@ -299,7 +299,7 @@ private: return size_ > old_size; } - bool SetDims(const TIndex d0) { + bool SetDims(const int64_t d0) { auto old_size = size_; dims_.resize(1); dims_[0] = d0; @@ -307,7 +307,7 @@ private: return size_ > old_size; } - bool SetDims(const TIndex d0, const TIndex d1) { + bool SetDims(const int64_t d0, const int64_t d1) { auto old_size = size_; dims_.resize(2); dims_[0] = d0; @@ -316,7 +316,7 @@ private: return size_ > old_size; } - bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) { + bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) { auto old_size = size_; dims_.resize(3); dims_[0] = d0; @@ -326,8 +326,8 @@ private: return size_ > old_size; } - bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2, - const TIndex d3) { + bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2, + const int64_t d3) { auto old_size = size_; dims_.resize(4); dims_[0] = d0; @@ -338,8 +338,8 @@ private: return size_ > old_size; } - vector dims_; - TIndex size_ = -1; + vector dims_; + int64_t size_ = -1; arm_compute::TensorShape shape_; unique_ptr tensor_; }; diff --git a/caffe2/mobile/contrib/arm-compute/operators/fully_connected_op.cc b/caffe2/mobile/contrib/arm-compute/operators/fully_connected_op.cc index ac36118054cc..448d84315cc9 100644 --- a/caffe2/mobile/contrib/arm-compute/operators/fully_connected_op.cc +++ b/caffe2/mobile/contrib/arm-compute/operators/fully_connected_op.cc @@ -40,7 +40,7 @@ bool GLFullyConnectedOp::RunOnDevice() { CAFFE_ENFORCE_EQ(1, B_->ndim()); CAFFE_ENFORCE_EQ(N, B_->dim32(0)); - vector output_dims = {M, N}; + vector output_dims = {M, N}; GLTensor *Y = OperatorBase::Outputs()[0]->template GetMutable>(); if (first_run_) { diff --git a/caffe2/mobile/contrib/arm-compute/operators/pool_op.cc b/caffe2/mobile/contrib/arm-compute/operators/pool_op.cc index 19aede792d01..cc9af9c7359e 100644 --- a/caffe2/mobile/contrib/arm-compute/operators/pool_op.cc +++ b/caffe2/mobile/contrib/arm-compute/operators/pool_op.cc @@ -53,7 +53,7 @@ bool GLAveragePoolOp::RunOnDeviceWithOrderNCHW() { int height = X_->dim32(2); int width = X_->dim32(3); - vector output_dims = {N, channels, 1, 1}; + vector output_dims = {N, channels, 1, 1}; if (!global_pooling_) { output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1; output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1; @@ -116,7 +116,7 @@ template <> bool GLMaxPoolOp::RunOnDeviceWithOrderNCHW() { int height = X_->dim32(2); int width = X_->dim32(3); - vector output_dims = {N, channels, 1, 1}; + vector output_dims = {N, channels, 1, 1}; if (!global_pooling_) { output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1; output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1; diff --git a/caffe2/mobile/contrib/arm-compute/operators/resize_op.cc b/caffe2/mobile/contrib/arm-compute/operators/resize_op.cc index ed9f672ce52b..e15663b674c4 100644 --- a/caffe2/mobile/contrib/arm-compute/operators/resize_op.cc +++ b/caffe2/mobile/contrib/arm-compute/operators/resize_op.cc @@ -45,7 +45,7 @@ bool GLResizeNearestOp::RunOnDevice() { GLTensor *Y = OperatorBase::Outputs()[0]->template GetMutable>(); - vector output_dims = {N, C, H * height_scale_, W * width_scale_}; + vector output_dims = {N, C, H * height_scale_, W * width_scale_}; if (first_run_) { Y->Resize(output_dims); diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm index 2238d7af08dd..52f746f63f31 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm @@ -329,7 +329,7 @@ class CopyToMPSCNNOp final : public Operator { for (auto i = 0; i < Inputs().size(); ++i) { const auto& X = Input(i); CAFFE_ENFORCE(X.ndim() > 0 && X.ndim() <= 4); - std::vector XDims = {1, 1, 1, 1}; + std::vector XDims = {1, 1, 1, 1}; XDims.assign(X.dims().begin(), X.dims().end()); caffe2::Timer t; @@ -2259,15 +2259,15 @@ class MPSCNNGenerateProposalsCPPOp final : public Operator { // bbox_deltas: (num_images, A * 4, H, W) CAFFE_ENFORCE_EQ( - bbox_deltas.dims(), (vector{num_images, 4 * A, height, width})); + bbox_deltas.dims(), (vector{num_images, 4 * A, height, width})); // im_info_tensor: (num_images, 3), format [height, width, scale; ...] - CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector{num_images, 3})); + CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector{num_images, 3})); CAFFE_ENFORCE( im_info_tensor.template IsType(), im_info_tensor.meta().name()); // anchors: (A, 4) - CAFFE_ENFORCE_EQ(anchors.dims(), (vector{A, 4})); + CAFFE_ENFORCE_EQ(anchors.dims(), (vector{A, 4})); CAFFE_ENFORCE(anchors.template IsType(), anchors.meta().name()); // Broadcast the anchors to all pixels auto all_anchors_vec = diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm index bcf588d8a384..7216b16611aa 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm @@ -640,7 +640,7 @@ void testMPSCNN() { CAFFE_ENFORCE_EQ(t1.ndim(), 2); CAFFE_ENFORCE(t2.dim32(2) == 1 && t2.dim32(3) == 1); const_cast(t2).Reshape( - std::vector{TIndex(batchSize), TIndex(COut)}); + std::vector{int64_t(batchSize), int64_t(COut)}); // Note dims do not match, as Metal leaves a 1x1 spatial // dimension. CAFFE_ENFORCE_EQ(t1.dims(), t2.dims()); diff --git a/caffe2/mobile/contrib/ios/pool_test.cc b/caffe2/mobile/contrib/ios/pool_test.cc index c4f6ff4d6a3a..47fd405eef01 100644 --- a/caffe2/mobile/contrib/ios/pool_test.cc +++ b/caffe2/mobile/contrib/ios/pool_test.cc @@ -12,7 +12,7 @@ namespace caffe2 { namespace { -void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { +void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); @@ -58,7 +58,7 @@ void compareMaxPooling(int N, def1.add_arg()->CopyFrom(MakeArgument("pad_b", padB)); def1.add_arg()->CopyFrom(MakeArgument("pad_r", padR)); - AddNoiseInput(vector{N, C, H, W}, "X", &ws); + AddNoiseInput(vector{N, C, H, W}, "X", &ws); unique_ptr op1(CreateOperator(def1, &ws)); EXPECT_NE(nullptr, op1.get()); diff --git a/caffe2/mobile/contrib/ios/resize_test.cc b/caffe2/mobile/contrib/ios/resize_test.cc index 90e672397b82..1c08df0f32a1 100644 --- a/caffe2/mobile/contrib/ios/resize_test.cc +++ b/caffe2/mobile/contrib/ios/resize_test.cc @@ -12,7 +12,7 @@ namespace caffe2 { namespace { -void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { +void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); @@ -44,7 +44,7 @@ void compareResizeNeareast(int N, def1.add_arg()->CopyFrom(MakeArgument("width_scale", wscale)); def1.add_arg()->CopyFrom(MakeArgument("height_scale", hscale)); - AddNoiseInput(vector{N, C, H, W}, "X", &ws); + AddNoiseInput(vector{N, C, H, W}, "X", &ws); unique_ptr op1(CreateOperator(def1, &ws)); EXPECT_NE(nullptr, op1.get()); diff --git a/caffe2/mobile/contrib/opengl/test/TestGLConvolution.cc b/caffe2/mobile/contrib/opengl/test/TestGLConvolution.cc index 13c5a72dcd8d..cb175c5b0537 100644 --- a/caffe2/mobile/contrib/opengl/test/TestGLConvolution.cc +++ b/caffe2/mobile/contrib/opengl/test/TestGLConvolution.cc @@ -12,7 +12,7 @@ #include -void AddNoiseInput(const std::vector& shape, +void AddNoiseInput(const std::vector& shape, const std::string& name, caffe2::Workspace* ws) { caffe2::CPUContext context; @@ -60,13 +60,13 @@ double BenchOp(const std::string& typ, def1.add_arg()->CopyFrom(caffe2::MakeArgument("pad_r", 0)); def1.add_arg()->CopyFrom(caffe2::MakeArgument("convolution_transform_strategy", std::string("PRECOMPUTE"))); - AddNoiseInput(std::vector{1, inputC, inH, inW}, "X", ws); + AddNoiseInput(std::vector{1, inputC, inH, inW}, "X", ws); if (transposed) { - AddNoiseInput(std::vector{inputC, outputC, kH, kW}, "W", ws); + AddNoiseInput(std::vector{inputC, outputC, kH, kW}, "W", ws); } else { - AddNoiseInput(std::vector{outputC, inputC, kH, kW}, "W", ws); + AddNoiseInput(std::vector{outputC, inputC, kH, kW}, "W", ws); } - AddNoiseInput(std::vector{outputC}, "B", ws); + AddNoiseInput(std::vector{outputC}, "B", ws); std::unique_ptr op1(CreateOperator(def1, ws)); @@ -131,19 +131,19 @@ static double BenchGLConvolution(int input_channels, } AddNoiseInput( - std::vector{1, input_channels, input_height, input_width}, "X_cpu", ws); + std::vector{1, input_channels, input_height, input_width}, "X_cpu", ws); if (transposed) { AddNoiseInput( - std::vector{input_channels, output_channels, kernel_height, kernel_width}, + std::vector{input_channels, output_channels, kernel_height, kernel_width}, "W", ws); } else { AddNoiseInput( - std::vector{output_channels, input_channels, kernel_height, kernel_width}, + std::vector{output_channels, input_channels, kernel_height, kernel_width}, "W", ws); } - AddNoiseInput(std::vector{output_channels}, "b", ws); + AddNoiseInput(std::vector{output_channels}, "b", ws); caffe2::NetDef netdef; { diff --git a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc index 1bbe303ef777..deced7196449 100644 --- a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc +++ b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc @@ -36,7 +36,7 @@ namespace caffe2 { -void AddConstInput(const vector& shape, +void AddConstInput(const vector& shape, const float value, const string& name, Workspace* ws) { @@ -50,7 +50,7 @@ void AddConstInput(const vector& shape, &context); } -void AddNoiseInput(const vector& shape, +void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { DeviceOption option; @@ -72,7 +72,7 @@ float snpe_run(int iters, Workspace& ws) { const int W = 227; const int C = 3; - POPULATE_DATA("X_snpe", (caffe2::vector{H, W, C}), hwc); + POPULATE_DATA("X_snpe", (caffe2::vector{H, W, C}), hwc); OperatorDef def; def.set_name("snpe_test"); @@ -108,7 +108,7 @@ float caffe2_run(int iters, Workspace& ws) { ReadProtoFromBinaryFile("/data/local/tmp/squeeze_init_net.pb", &init_net); ReadProtoFromBinaryFile("/data/local/tmp/squeeze_predict_net.pb", &predict_net); ws.RunNetOnce(init_net); - POPULATE_DATA("data", (caffe2::vector{N, C, H, W}), chw); + POPULATE_DATA("data", (caffe2::vector{N, C, H, W}), chw); predict_net.set_name("SqueezeNet"); ws.CreateNet(predict_net); diff --git a/caffe2/mobile/contrib/ulp2/ulp_neon.cc b/caffe2/mobile/contrib/ulp2/ulp_neon.cc index 4c8e668775e5..7d84662d20c5 100644 --- a/caffe2/mobile/contrib/ulp2/ulp_neon.cc +++ b/caffe2/mobile/contrib/ulp2/ulp_neon.cc @@ -538,7 +538,7 @@ void run2b1bConvIm2ColGEMM(QConvState* state, CAFFE_ENFORCE_EQ(Y->dim32(0), divRoundUp(X.dim32(0) * OH * OW, kGEMMTileSize) * kGEMMTileSize); CAFFE_ENFORCE_EQ(Y->dim32(1), OC); Y->ShrinkTo(X.dim32(0) * OH * OW); - Y->Reshape(std::vector{{TIndex(X.dim(0)), TIndex(OH), TIndex(OW), TIndex(OC)}}); + Y->Reshape(std::vector{{int64_t(X.dim(0)), int64_t(OH), int64_t(OW), int64_t(OC)}}); } } diff --git a/caffe2/mobile/contrib/ulp2/ulp_test.cc b/caffe2/mobile/contrib/ulp2/ulp_test.cc index f6705e638dda..a1c1af0f6dfb 100644 --- a/caffe2/mobile/contrib/ulp2/ulp_test.cc +++ b/caffe2/mobile/contrib/ulp2/ulp_test.cc @@ -62,7 +62,7 @@ int randInt(int a, int b) { return std::uniform_int_distribution(a, b)(gen); } -TensorCPU genTensor11(std::vector shape) { +TensorCPU genTensor11(std::vector shape) { Tensor r(CPU); r.Resize(shape); @@ -76,7 +76,7 @@ TensorCPU genTensor11(std::vector shape) { return r; } -TensorCPU genTensorUniform11(std::vector shape) { +TensorCPU genTensorUniform11(std::vector shape) { Tensor r(CPU); r.Resize(shape); @@ -90,7 +90,7 @@ TensorCPU genTensorUniform11(std::vector shape) { return r; } -TensorCPU genTensor0123(std::vector shape) { +TensorCPU genTensor0123(std::vector shape) { Tensor r(CPU); r.Resize(shape); @@ -171,7 +171,7 @@ inline void qgemmNT(int M, int N, int K, const uint8_t* A, const uint8_t* B, flo } } -void gemmTest(TIndex M, TIndex N, TIndex K) { +void gemmTest(int64_t M, int64_t N, int64_t K) { auto X = genTensor11({M, K}); auto W = genTensor11({N, K}); Tensor XQ(CPU), WQ(CPU), YQ(CPU), Y(CPU); diff --git a/caffe2/mpi/mpi_ops.h b/caffe2/mpi/mpi_ops.h index 8657c107ed0f..911b51b96039 100644 --- a/caffe2/mpi/mpi_ops.h +++ b/caffe2/mpi/mpi_ops.h @@ -98,7 +98,7 @@ class MPIAllgatherOp final : public Operator { MPI_Comm comm = OperatorBase::Input(0).comm(); auto& input = Input(1); auto* output = Output(0); - vector output_dims = input.dims(); + vector output_dims = input.dims(); output_dims[0] *= OperatorBase::Input(0).size(); output->Resize(output_dims); MPI_CHECK(MPI_Allgather( diff --git a/caffe2/operators/accuracy_op.cc b/caffe2/operators/accuracy_op.cc index 8c1273eca209..2ee730aa9b44 100644 --- a/caffe2/operators/accuracy_op.cc +++ b/caffe2/operators/accuracy_op.cc @@ -12,7 +12,7 @@ bool AccuracyOp::RunOnDevice() { int D = X.dim32(1); CAFFE_ENFORCE_EQ(label.ndim(), 1); CAFFE_ENFORCE_EQ(label.dim32(0), N); - Y->Resize(vector()); + Y->Resize(vector()); const auto* Xdata = X.data(); const auto* labelData = label.data(); const int top_k = top_k_; diff --git a/caffe2/operators/accuracy_op.cu b/caffe2/operators/accuracy_op.cu index 5d27707662c7..b1f6f137831e 100644 --- a/caffe2/operators/accuracy_op.cu +++ b/caffe2/operators/accuracy_op.cu @@ -53,7 +53,7 @@ bool AccuracyOp::RunOnDevice() { int D = X.dim32(1); CAFFE_ENFORCE_EQ(label.ndim(), 1); CAFFE_ENFORCE_EQ(label.dim32(0), N); - Y->Resize(vector()); + Y->Resize(vector()); float* Ydata = Y->template mutable_data(); math::Set(1, 0, Ydata, &context_); AccuracyKernel<<< diff --git a/caffe2/operators/arg_ops.cc b/caffe2/operators/arg_ops.cc index aeedbd5f1437..c381509bc8ba 100644 --- a/caffe2/operators/arg_ops.cc +++ b/caffe2/operators/arg_ops.cc @@ -15,14 +15,14 @@ void ComputeArgImpl( const int n, const Compare& comp, const T* X, - TIndex* Y, + int64_t* Y, Context* context) { - math::Set(prev_size * next_size, TIndex(0), Y, context); + math::Set(prev_size * next_size, int64_t(0), Y, context); for (int i = 0; i < prev_size; ++i) { const T* cur_X = X + i * n * next_size + next_size; for (int k = 1; k < n; ++k) { for (int j = 0; j < next_size; ++j) { - TIndex* cur_Y = Y + i * next_size + j; + int64_t* cur_Y = Y + i * next_size + j; if (comp(*cur_X, X[i * n * next_size + *cur_Y * next_size + j])) { *cur_Y = k; } @@ -41,7 +41,7 @@ bool ArgMaxReducer::operator()( const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, CPUContext* context) const { ComputeArgImpl(prev_size, next_size, n, std::greater(), X, Y, context); return true; @@ -54,7 +54,7 @@ bool ArgMinReducer::operator()( const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, CPUContext* context) const { ComputeArgImpl(prev_size, next_size, n, std::less(), X, Y, context); return true; diff --git a/caffe2/operators/arg_ops.cu b/caffe2/operators/arg_ops.cu index 1735e4268e3b..fdc6331dccc1 100644 --- a/caffe2/operators/arg_ops.cu +++ b/caffe2/operators/arg_ops.cu @@ -28,7 +28,7 @@ __global__ void ComputeArgCUDAKernel( const Reducer reducer, const T init, const T* X, - TIndex* Y) { + int64_t* Y) { __shared__ typename BlockReduce::TempStorage temp_storage; const int d = stride.d(); for (int idx = blockIdx.x; idx < outer_size; idx += gridDim.x) { @@ -41,7 +41,7 @@ __global__ void ComputeArgCUDAKernel( } kv = BlockReduce(temp_storage).Reduce(kv, reducer); if (threadIdx.x == 0) { - Y[idx] = static_cast(kv.key); + Y[idx] = static_cast(kv.key); } __syncthreads(); } @@ -56,7 +56,7 @@ bool ArgMaxReducer::operator()( const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, CUDAContext* context) const { const int outer_size = prev_size * next_size; const FixedDivisor stride(next_size); @@ -82,7 +82,7 @@ bool ArgMinReducer::operator()( const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, CUDAContext* context) const { const int outer_size = prev_size * next_size; const FixedDivisor stride(next_size); diff --git a/caffe2/operators/arg_ops.h b/caffe2/operators/arg_ops.h index 98917a350dcf..f29c0d5aa50a 100644 --- a/caffe2/operators/arg_ops.h +++ b/caffe2/operators/arg_ops.h @@ -60,7 +60,7 @@ class ArgOp final : public Operator { next_size, n, X.template data(), - Y->template mutable_data(), + Y->template mutable_data(), &context_); } @@ -78,7 +78,7 @@ struct ArgMaxReducer { const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, Context* context) const; }; @@ -90,7 +90,7 @@ struct ArgMinReducer { const int next_size, const int n, const T* X, - TIndex* Y, + int64_t* Y, Context* context) const; }; diff --git a/caffe2/operators/assert_op.h b/caffe2/operators/assert_op.h index 3e74c5afc647..796f8af257bd 100644 --- a/caffe2/operators/assert_op.h +++ b/caffe2/operators/assert_op.h @@ -22,7 +22,7 @@ class AssertOp final : public Operator { cmp_tensor_.CopyFrom(Input(0)); auto* cmp_data = cmp_tensor_.template data(); - for (TIndex i = 0; i < cmp_tensor_.size(); ++i) { + for (int64_t i = 0; i < cmp_tensor_.size(); ++i) { CAFFE_ENFORCE((bool)cmp_data[i], [&]() { std::stringstream ss; ss << "Assert failed for element " << i diff --git a/caffe2/operators/atomic_ops.cc b/caffe2/operators/atomic_ops.cc index 73c4196b6e9b..2ce97b0d58c5 100644 --- a/caffe2/operators/atomic_ops.cc +++ b/caffe2/operators/atomic_ops.cc @@ -29,8 +29,8 @@ class AtomicFetchAddOp final : public Operator { auto& b = Input(2); auto* c = Output(0); auto* d = Output(1); - c->Resize(std::vector()); - d->Resize(std::vector()); + c->Resize(std::vector()); + d->Resize(std::vector()); auto* aPtr = a.data(); auto* bPtr = b.data(); auto* cPtr = c->template mutable_data(); diff --git a/caffe2/operators/batch_box_cox_op.cc b/caffe2/operators/batch_box_cox_op.cc index e35c726d185a..aad1daf91f3e 100644 --- a/caffe2/operators/batch_box_cox_op.cc +++ b/caffe2/operators/batch_box_cox_op.cc @@ -105,7 +105,7 @@ bool BatchBoxCoxOp::DoRunWithType() { zeros_.clear(); nonzeros_.reserve(D); zeros_.reserve(D); - for (TIndex j = 0; j < D; j++) { + for (int64_t j = 0; j < D; j++) { if (lambda1_ptr[j] == 0) { zeros_.push_back(j); } else { @@ -121,7 +121,7 @@ bool BatchBoxCoxOp::DoRunWithType() { // rows by replicating the input parameters K times. Then finish row-by-row. TypedCachedBuffers& b = GetBuffers(); if (nonzeros_.size() == D) { - TIndex i = 0; + int64_t i = 0; if (K > 1) { TileArrayIntoVector(lambda1_ptr, D, K, &b.lambda1_); TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_); @@ -142,7 +142,7 @@ bool BatchBoxCoxOp::DoRunWithType() { D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr); } } else if (zeros_.size() == D) { - TIndex i = 0; + int64_t i = 0; if (K > 1) { TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_z_); DCHECK_EQ(K * D, b.lambda2_z_.size()); @@ -169,7 +169,7 @@ bool BatchBoxCoxOp::DoRunWithType() { PackV(nonzeros_.size(), lambda2_ptr, nonzeros_.data(), b.lambda2_.data()); PackV(zeros_.size(), lambda2_ptr, zeros_.data(), b.lambda2_z_.data()); - TIndex i = 0; + int64_t i = 0; b.accumulator_.resize(std::max(nonzeros_.size(), zeros_.size())); if (K > 1) { // Truncate to original size, and re-tile with offsets this time. @@ -219,15 +219,15 @@ bool BatchBoxCoxOp::DoRunWithType() { template <> template void BatchBoxCoxOp::BoxCoxNaive( - TIndex N, - TIndex D, + int64_t N, + int64_t D, const T* data_ptr, const T* lambda1_ptr, const T* lambda2_ptr, T k_eps, T* output_ptr) { - for (TIndex i = 0; i < N; i++) { - for (TIndex j = 0; j < D; j++, data_ptr++, output_ptr++) { + for (int64_t i = 0; i < N; i++) { + for (int64_t j = 0; j < D; j++, data_ptr++, output_ptr++) { T lambda1_v = lambda1_ptr[j]; T lambda2_v = lambda2_ptr[j]; T tmp = std::max(*data_ptr + lambda2_v, k_eps); @@ -245,18 +245,18 @@ void BatchBoxCoxOp::BoxCoxNaive( template <> template void BatchBoxCoxOp::BoxCoxNonzeroLambda( - TIndex D, + int64_t D, const T* data_ptr, const T* lambda1, const T* lambda2, T k_eps, T* out) { caffe2::math::Add(D, data_ptr, lambda2, out, &context_); - for (TIndex j = 0; j < D; j++) { + for (int64_t j = 0; j < D; j++) { out[j] = std::max(out[j], k_eps); } Pow(D, out, lambda1, out); - for (TIndex j = 0; j < D; j++) { + for (int64_t j = 0; j < D; j++) { out[j] -= 1.0; } caffe2::math::Div(D, out, lambda1, out, &context_); @@ -265,13 +265,13 @@ void BatchBoxCoxOp::BoxCoxNonzeroLambda( template <> template void BatchBoxCoxOp::BoxCoxZeroLambda( - TIndex D, + int64_t D, const T* data_ptr, const T* lambda2, T k_eps, T* output_ptr) { caffe2::math::Add(D, data_ptr, lambda2, output_ptr, &context_); - for (TIndex j = 0; j < D; j++) { + for (int64_t j = 0; j < D; j++) { output_ptr[j] = std::max(output_ptr[j], k_eps); } caffe2::math::Log(D, output_ptr, output_ptr, &context_); diff --git a/caffe2/operators/batch_box_cox_op.h b/caffe2/operators/batch_box_cox_op.h index 7f7b2dd8da03..6bdc3c759370 100644 --- a/caffe2/operators/batch_box_cox_op.h +++ b/caffe2/operators/batch_box_cox_op.h @@ -27,8 +27,8 @@ class BatchBoxCoxOp final : public Operator { protected: template void BoxCoxNaive( - TIndex N, - TIndex D, + int64_t N, + int64_t D, const T* data_ptr, const T* lambda1_ptr, const T* lambda2_ptr, @@ -38,7 +38,7 @@ class BatchBoxCoxOp final : public Operator { #ifdef CAFFE2_USE_MKL template void BoxCoxNonzeroLambda( - TIndex D, + int64_t D, const T* data_ptr, const T* lambda1, const T* lambda2, @@ -47,7 +47,7 @@ class BatchBoxCoxOp final : public Operator { template void BoxCoxZeroLambda( - TIndex D, + int64_t D, const T* data_ptr, const T* lambda2, T k_eps, diff --git a/caffe2/operators/batch_bucketize_op.cc b/caffe2/operators/batch_bucketize_op.cc index dbbd56d75f46..21f3029de4d4 100644 --- a/caffe2/operators/batch_bucketize_op.cc +++ b/caffe2/operators/batch_bucketize_op.cc @@ -26,21 +26,21 @@ bool BatchBucketizeOp::RunOnDevice() { auto feature_dim = feature.dim(1); auto output_dim = indices.size(); - TIndex length_sum = 0; - for (TIndex i = 0; i < lengths.size(); i++) { + int64_t length_sum = 0; + for (int64_t i = 0; i < lengths.size(); i++) { CAFFE_ENFORCE_GE(feature_dim, indices_data[i]); length_sum += lengths_data[i]; } CAFFE_ENFORCE_EQ(length_sum, boundaries.size()); - TIndex lower_bound = 0; + int64_t lower_bound = 0; output->Resize(batch_size, output_dim); auto* output_data = output->template mutable_data(); - for (TIndex i = 0; i < batch_size; i++) { + for (int64_t i = 0; i < batch_size; i++) { lower_bound = 0; - for (TIndex j = 0; j < output_dim; j++) { - for (TIndex k = 0; k <= lengths_data[j]; k++) { + for (int64_t j = 0; j < output_dim; j++) { + for (int64_t k = 0; k <= lengths_data[j]; k++) { if (k == lengths_data[j] || feature_data[i * feature_dim + indices_data[j]] <= boundaries_data[lower_bound + k]) { diff --git a/caffe2/operators/batch_gather_ops.cu b/caffe2/operators/batch_gather_ops.cu index 2d047660491b..d1559dc6d9a3 100644 --- a/caffe2/operators/batch_gather_ops.cu +++ b/caffe2/operators/batch_gather_ops.cu @@ -41,7 +41,7 @@ bool BatchGatherOp::DoRunWithType() { auto& indices = Input(INDICES); auto* output = Output(0); - vector shape; + vector shape; shape.push_back(data.dim(0)); shape.insert(shape.end(), indices.dims().begin(), indices.dims().end()); shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end()); diff --git a/caffe2/operators/batch_gather_ops.h b/caffe2/operators/batch_gather_ops.h index 9478150265dd..2b9e4d6d5e6e 100644 --- a/caffe2/operators/batch_gather_ops.h +++ b/caffe2/operators/batch_gather_ops.h @@ -26,7 +26,7 @@ class BatchGatherOp final : public Operator { CAFFE_ENFORCE_GE(data.ndim(), 2, "DATA should be at least 2-D"); - vector shape; + vector shape; shape.push_back(data.dim(0)); shape.insert(shape.end(), indices.dims().begin(), indices.dims().end()); shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end()); diff --git a/caffe2/operators/batch_matmul_op.cc b/caffe2/operators/batch_matmul_op.cc index eda519586e78..431e1f4a0a75 100644 --- a/caffe2/operators/batch_matmul_op.cc +++ b/caffe2/operators/batch_matmul_op.cc @@ -27,16 +27,16 @@ vector TensorInferenceForBatchMatMul( b_dim1 = in[1].dims(ndim - 1); } - auto output_dims = vector{in[0].dims().begin(), in[0].dims().end()}; + auto output_dims = vector{in[0].dims().begin(), in[0].dims().end()}; output_dims[ndim - 2] = a_dim0; output_dims[ndim - 1] = b_dim1; return vector{ - CreateTensorShape(vector{output_dims}, in[0].data_type())}; + CreateTensorShape(vector{output_dims}, in[0].data_type())}; } else { auto ndims_A = in[0].dims_size(); auto ndims_B = in[1].dims_size(); - std::vector dims_A(ndims_A), dims_B(ndims_B); + std::vector dims_A(ndims_A), dims_B(ndims_B); for (int i = 0; i < ndims_A; ++i) { dims_A[i] = in[0].dims(i); } @@ -66,7 +66,7 @@ vector TensorInferenceForBatchMatMul( N = dims_B[ndims_B - 1]; } - std::vector new_dims; + std::vector new_dims; if (ndims_A >= ndims_B) { new_dims.assign(dims_A.begin(), dims_A.end() - 2); } else { @@ -82,7 +82,7 @@ vector TensorInferenceForBatchMatMul( new_dims.push_back(1); } return vector{ - CreateTensorShape(vector{new_dims}, in[0].data_type())}; + CreateTensorShape(vector{new_dims}, in[0].data_type())}; } } diff --git a/caffe2/operators/batch_matmul_op.h b/caffe2/operators/batch_matmul_op.h index f4d32e5e3782..e4d5d01fada9 100644 --- a/caffe2/operators/batch_matmul_op.h +++ b/caffe2/operators/batch_matmul_op.h @@ -175,7 +175,7 @@ class BatchMatMulOp final : public Operator { // Calculate output tensor shapes [B..., (M), (N)] // Batch dimensions will be broadcasted out to those of the longer tensor // A or B. Either M or N are optional if A or B, respectively are 1-D. - std::vector new_dims; + std::vector new_dims; if (ndims_A >= ndims_B) { new_dims.assign(dims_A.begin(), dims_A.end() - 2); } else { diff --git a/caffe2/operators/batch_matmul_op_gpu_test.cc b/caffe2/operators/batch_matmul_op_gpu_test.cc index 57a09e3e60c7..804296307d6e 100644 --- a/caffe2/operators/batch_matmul_op_gpu_test.cc +++ b/caffe2/operators/batch_matmul_op_gpu_test.cc @@ -26,7 +26,7 @@ class BatchMatMulOpGPUTest : public testing::Test { } void AddConstInput( - const std::vector& dims, + const std::vector& dims, const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); @@ -39,7 +39,7 @@ class BatchMatMulOpGPUTest : public testing::Test { cuda_context_.get()); } - void VerifyOutput(const std::vector& dims, const float value) const { + void VerifyOutput(const std::vector& dims, const float value) const { const Blob* Y_blob = ws_.GetBlob("Y"); ASSERT_NE(nullptr, Y_blob); const auto& Y = Y_blob->Get(); @@ -64,12 +64,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUNormalTest) { if (!HasCudaGPU()) { return; } - AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); - AddConstInput(std::vector{3, 10, 6}, 1.0f, "B"); + AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); + AddConstInput(std::vector{3, 10, 6}, 1.0f, "B"); std::unique_ptr op(CreateOperator(def_, &ws_)); ASSERT_NE(nullptr, op); ASSERT_TRUE(op->Run()); - VerifyOutput(std::vector{3, 5, 6}, 10.0f); + VerifyOutput(std::vector{3, 5, 6}, 10.0f); } TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) { @@ -79,12 +79,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) { auto* arg = def_.add_arg(); arg->set_name("broadcast"); arg->set_i(1); - AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); - AddConstInput(std::vector{2, 3, 10, 6}, 1.0f, "B"); + AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); + AddConstInput(std::vector{2, 3, 10, 6}, 1.0f, "B"); std::unique_ptr op(CreateOperator(def_, &ws_)); ASSERT_NE(nullptr, op); ASSERT_TRUE(op->Run()); - VerifyOutput(std::vector{2, 3, 5, 6}, 10.0f); + VerifyOutput(std::vector{2, 3, 5, 6}, 10.0f); } } // namespace diff --git a/caffe2/operators/batch_matmul_op_test.cc b/caffe2/operators/batch_matmul_op_test.cc index 28fa8c1a9086..45db7dd5b848 100644 --- a/caffe2/operators/batch_matmul_op_test.cc +++ b/caffe2/operators/batch_matmul_op_test.cc @@ -20,7 +20,7 @@ class BatchMatMulOpTest : public testing::Test { } void AddConstInput( - const std::vector& dims, + const std::vector& dims, const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); @@ -33,7 +33,7 @@ class BatchMatMulOpTest : public testing::Test { cpu_context_.get()); } - void VerifyOutput(const std::vector& dims, const float value) const { + void VerifyOutput(const std::vector& dims, const float value) const { const Blob* Y_blob = ws_.GetBlob("Y"); ASSERT_NE(nullptr, Y_blob); const auto& Y = Y_blob->Get(); @@ -54,24 +54,24 @@ class BatchMatMulOpTest : public testing::Test { }; TEST_F(BatchMatMulOpTest, BatchMatMulOpNormalTest) { - AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); - AddConstInput(std::vector{3, 10, 6}, 1.0f, "B"); + AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); + AddConstInput(std::vector{3, 10, 6}, 1.0f, "B"); std::unique_ptr op(CreateOperator(def_, &ws_)); ASSERT_NE(nullptr, op); ASSERT_TRUE(op->Run()); - VerifyOutput(std::vector{3, 5, 6}, 10.0f); + VerifyOutput(std::vector{3, 5, 6}, 10.0f); } TEST_F(BatchMatMulOpTest, BatchMatMulOpBroadcastTest) { auto* arg = def_.add_arg(); arg->set_name("broadcast"); arg->set_i(1); - AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); - AddConstInput(std::vector{2, 3, 10, 6}, 1.0f, "B"); + AddConstInput(std::vector{3, 5, 10}, 1.0f, "A"); + AddConstInput(std::vector{2, 3, 10, 6}, 1.0f, "B"); std::unique_ptr op(CreateOperator(def_, &ws_)); ASSERT_NE(nullptr, op); ASSERT_TRUE(op->Run()); - VerifyOutput(std::vector{2, 3, 5, 6}, 10.0f); + VerifyOutput(std::vector{2, 3, 5, 6}, 10.0f); } } // namespace diff --git a/caffe2/operators/batch_sparse_to_dense_op.cc b/caffe2/operators/batch_sparse_to_dense_op.cc index b02e4992889e..8d191c63ce93 100644 --- a/caffe2/operators/batch_sparse_to_dense_op.cc +++ b/caffe2/operators/batch_sparse_to_dense_op.cc @@ -14,15 +14,15 @@ bool BatchSparseToDenseOp::RunOnDevice() { CAFFE_ENFORCE_EQ(lengths.ndim(), 1); CAFFE_ENFORCE_EQ(indices.ndim(), 1); - const TIndex* lengths_data = lengths.template data(); - const TIndex* indices_data = indices.template data(); + const int64_t* lengths_data = lengths.template data(); + const int64_t* indices_data = indices.template data(); const T* values_data = values.template data(); - TIndex batch_size = lengths.size(); - TIndex lengths_sum = 0; - math::Sum(batch_size, lengths_data, &lengths_sum, &context_); + int64_t batch_size = lengths.size(); + int64_t lengths_sum = 0; + math::Sum(batch_size, lengths_data, &lengths_sum, &context_); CAFFE_ENFORCE_EQ(lengths_sum, indices.size()); - vector output_shape = {batch_size}; + vector output_shape = {batch_size}; if (InputSize() == 4) { auto& shaper = Input(3); CAFFE_ENFORCE_EQ(shaper.ndim(), 2); @@ -42,9 +42,9 @@ bool BatchSparseToDenseOp::RunOnDevice() { math::Set( output->size(), static_cast(default_value_), output_data, &context_); - TIndex k = 0; - for (TIndex i = 0; i < batch_size; ++i) { - for (TIndex j = 0; j < lengths_data[i]; ++j) { + int64_t k = 0; + for (int64_t i = 0; i < batch_size; ++i) { + for (int64_t j = 0; j < lengths_data[i]; ++j) { CAFFE_ENFORCE( indices_data[k] < dense_last_dim_, "An indice (", @@ -69,24 +69,24 @@ bool BatchDenseToSparseOp::RunOnDevice() { CAFFE_ENFORCE_EQ(lengths.ndim(), 1); CAFFE_ENFORCE_EQ(indices.ndim(), 1); CAFFE_ENFORCE_EQ(dense.ndim(), 2); - const TIndex* lengths_data = lengths.template data(); - const TIndex* indices_data = indices.template data(); + const int64_t* lengths_data = lengths.template data(); + const int64_t* indices_data = indices.template data(); const T* dense_data = dense.template data(); - TIndex batch_size = lengths.size(); - TIndex lengths_sum = 0; - math::Sum(batch_size, lengths_data, &lengths_sum, &context_); + int64_t batch_size = lengths.size(); + int64_t lengths_sum = 0; + math::Sum(batch_size, lengths_data, &lengths_sum, &context_); CAFFE_ENFORCE_EQ(lengths_sum, indices.size()); CAFFE_ENFORCE_EQ(batch_size, dense.dim(0)); dense_last_dim_ = dense.dim(1); - vector output_shape = indices.dims(); + vector output_shape = indices.dims(); output->Resize(output_shape); T* output_data = output->template mutable_data(); - TIndex k = 0; - for (TIndex i = 0; i < batch_size; ++i) { - for (TIndex j = 0; j < lengths_data[i]; ++j) { + int64_t k = 0; + for (int64_t i = 0; i < batch_size; ++i) { + for (int64_t j = 0; j < lengths_data[i]; ++j) { CAFFE_ENFORCE( indices_data[k] < dense.dim(1), "An indice (", diff --git a/caffe2/operators/batch_sparse_to_dense_op.h b/caffe2/operators/batch_sparse_to_dense_op.h index de6c69b795d1..8dc9da3dcf70 100644 --- a/caffe2/operators/batch_sparse_to_dense_op.h +++ b/caffe2/operators/batch_sparse_to_dense_op.h @@ -15,12 +15,12 @@ class BatchSparseToDenseOp : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; BatchSparseToDenseOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - OP_SINGLE_ARG(TIndex, "dense_last_dim", dense_last_dim_, -1), + OP_SINGLE_ARG(int64_t, "dense_last_dim", dense_last_dim_, -1), OP_SINGLE_ARG(T, "default_value", default_value_, static_cast(0)) {} bool RunOnDevice() override; private: - TIndex dense_last_dim_; + int64_t dense_last_dim_; T default_value_; INPUT_TAGS(LENGTHS, INDICES, VALUES); }; @@ -34,7 +34,7 @@ class BatchDenseToSparseOp : public Operator { bool RunOnDevice() override; private: - TIndex dense_last_dim_; + int64_t dense_last_dim_; INPUT_TAGS(LENGTHS, INDICES, DENSE); }; diff --git a/caffe2/operators/bbox_transform_op.cc b/caffe2/operators/bbox_transform_op.cc index 79520face8c0..6dbea350960a 100644 --- a/caffe2/operators/bbox_transform_op.cc +++ b/caffe2/operators/bbox_transform_op.cc @@ -138,7 +138,7 @@ bool BBoxTransformOp::RunOnDevice() { } } - CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector{batch_size, 3})); + CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector{batch_size, 3})); Eigen::Map iminfo( iminfo_in.data(), iminfo_in.dim(0), iminfo_in.dim(1)); diff --git a/caffe2/operators/boolean_mask_ops.cc b/caffe2/operators/boolean_mask_ops.cc index 2d1deb0badc5..c2ab55891a48 100644 --- a/caffe2/operators/boolean_mask_ops.cc +++ b/caffe2/operators/boolean_mask_ops.cc @@ -62,7 +62,7 @@ bool BooleanMaskOp::RunOnDevice() { ++numOutputs; } } - std::vector outShape; + std::vector outShape; outShape.push_back(numOutputs); outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end()); dataOut->Resize(outShape); @@ -81,11 +81,11 @@ bool BooleanMaskOp::RunOnDevice() { const auto innerSize = data.size_from_dim(1); const auto innerSizeBytes = innerSize * data.meta().itemsize(); - TIndex lastStart = -1; + int64_t lastStart = -1; const auto* inPtr = (char*)data.raw_data(); - TIndex outStart = 0; + int64_t outStart = 0; - for (TIndex i = 0;; ++i) { + for (int64_t i = 0;; ++i) { // mask was true and either a) became false, or b) sequence finished if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) { const auto* src = inPtr + lastStart * innerSizeBytes; diff --git a/caffe2/operators/boolean_mask_ops.cu b/caffe2/operators/boolean_mask_ops.cu index 855cf6202bc9..2dcc28a064ca 100644 --- a/caffe2/operators/boolean_mask_ops.cu +++ b/caffe2/operators/boolean_mask_ops.cu @@ -7,15 +7,15 @@ namespace caffe2 { namespace { __global__ void BooleanMaskCopyKernel( - const TIndex numOfOutput, - const TIndex numBytes, - const TIndex* indices, + const int64_t numOfOutput, + const int64_t numBytes, + const int64_t* indices, const uint8_t* src, uint8_t* dest) { - for (TIndex i = blockIdx.x; i < numOfOutput; i += gridDim.x) { + for (int64_t i = blockIdx.x; i < numOfOutput; i += gridDim.x) { const auto srcBase = indices[i] * numBytes; const auto destBase = i * numBytes; - for (TIndex j = threadIdx.x; j < numBytes; j += blockDim.x) { + for (int64_t j = threadIdx.x; j < numBytes; j += blockDim.x) { dest[destBase + j] = src[srcBase + j]; } } @@ -40,7 +40,7 @@ class BooleanMaskOp final : public Operator { const auto* maskData = mask.data(); const auto outerSize = mask.dims()[0]; indices_.Resize(outerSize); - auto* indicesData = indices_.mutable_data(); + auto* indicesData = indices_.mutable_data(); size_t numBytes = 0; cub::CountingInputIterator itr(0); @@ -50,16 +50,16 @@ class BooleanMaskOp final : public Operator { itr, maskData, indicesData, - static_cast(nullptr), + static_cast(nullptr), outerSize, context_.cuda_stream()); - auto numTIndex = - static_cast((numBytes + sizeof(TIndex) - 1) / sizeof(TIndex)); - // allocate one more TIndex at the end of scratch for storing numOfOutput - scratch_.Resize(numTIndex + 1); - auto* scratchData = scratch_.mutable_data(); - auto* numOfOutputData = scratchData + numTIndex; + auto numint64_t = + static_cast((numBytes + sizeof(int64_t) - 1) / sizeof(int64_t)); + // allocate one more int64_t at the end of scratch for storing numOfOutput + scratch_.Resize(numint64_t + 1); + auto* scratchData = scratch_.mutable_data(); + auto* numOfOutputData = scratchData + numint64_t; cub::DeviceSelect::Flagged( static_cast(scratchData), @@ -72,11 +72,11 @@ class BooleanMaskOp final : public Operator { context_.cuda_stream()); // Copy numOfOutput from gpu to cpu - TIndex numOfOutput; + int64_t numOfOutput; context_.CopyToCPU(1, numOfOutputData, &numOfOutput); indices_.Resize(numOfOutput); - std::vector dims = src.dims(); + std::vector dims = src.dims(); dims[0] = numOfOutput; dest->Resize(dims); auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta()); @@ -84,12 +84,12 @@ class BooleanMaskOp final : public Operator { if (OutputSize() == 2) { auto* indicesOut = Output(1); indicesOut->Resize(numOfOutput); - indicesOut->template mutable_data(); + indicesOut->template mutable_data(); } if (numOfOutput > 0) { BooleanMaskCopyKernel<<< - min(numOfOutput, static_cast(CAFFE_MAXIMUM_NUM_BLOCKS)), + min(numOfOutput, static_cast(CAFFE_MAXIMUM_NUM_BLOCKS)), CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>( diff --git a/caffe2/operators/boolean_unmask_ops_test.cc b/caffe2/operators/boolean_unmask_ops_test.cc index 2972cee49574..8814be17153d 100644 --- a/caffe2/operators/boolean_unmask_ops_test.cc +++ b/caffe2/operators/boolean_unmask_ops_test.cc @@ -18,10 +18,10 @@ static void AddScalarInput( Blob* blob = ws->CreateBlob(name); auto* tensor = blob->GetMutableTensor(CPU); if (!isEmpty) { - tensor->Resize(vector{1}); + tensor->Resize(vector{1}); *(tensor->template mutable_data()) = value; } else { - tensor->Resize(vector{0}); + tensor->Resize(vector{0}); tensor->template mutable_data(); } return; diff --git a/caffe2/operators/cast_op.cc b/caffe2/operators/cast_op.cc index de3345b832cc..eb7ba0d86857 100644 --- a/caffe2/operators/cast_op.cc +++ b/caffe2/operators/cast_op.cc @@ -11,7 +11,7 @@ bool CastOp::DoRunWithType() { const auto* data = input.template data(); auto* out = output->template mutable_data(); auto N = input.size(); - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { out[i] = static_cast(data[i]); } return true; diff --git a/caffe2/operators/cast_op.h b/caffe2/operators/cast_op.h index 491028c8105a..e880a10a5ff1 100644 --- a/caffe2/operators/cast_op.h +++ b/caffe2/operators/cast_op.h @@ -42,7 +42,7 @@ class CastOp : public Operator { const auto* data = input.template data(); auto* out = output->template mutable_data(); auto N = input.size(); - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { out[i] = static_cast(data[i]); } return true; diff --git a/caffe2/operators/concat_split_op.h b/caffe2/operators/concat_split_op.h index d62017152070..1b5a2430ff41 100644 --- a/caffe2/operators/concat_split_op.h +++ b/caffe2/operators/concat_split_op.h @@ -161,7 +161,7 @@ bool SplitOp::RunOnDevice() { input_channels, "Sum of split dimensions do not match: should be ", input_channels); - vector output_dims(input.dims()); + vector output_dims(input.dims()); int before = 1, after = 1; for (int i = 0; i < canonical_axis; ++i) { before *= input.dim32(i); @@ -215,7 +215,7 @@ bool SplitByLengthsOp::RunOnDevice() { input_channels, "Sum of split dimensions do not match: should be ", input_channels); - vector output_dims(input.dims()); + vector output_dims(input.dims()); int before = input.size_to_dim(canonical_axis); int after = input.size_from_dim(canonical_axis + 1); size_t input_offset = 0; @@ -245,7 +245,7 @@ template bool ConcatOp::RunOnDevice() { auto* output = Output(0); Tensor* split = this->template Output(1, CPU); - split->Resize(vector(1, InputSize())); + split->Resize(vector(1, InputSize())); int* axis_data = split->template mutable_data(); auto& input_zero = Input(0); int adj_size = input_zero.ndim() + (add_axis_ ? 1 : 0); @@ -263,7 +263,7 @@ bool ConcatOp::RunOnDevice() { } int before = 1, after = 1; - vector output_dims(input_zero.dims()); + vector output_dims(input_zero.dims()); for (int i = 0; i < input_zero.ndim(); ++i) { if (i == canonical_axis && !add_axis_) { continue; diff --git a/caffe2/operators/conditional_op.cc b/caffe2/operators/conditional_op.cc index e202ea2e9881..3cb301cc66f7 100644 --- a/caffe2/operators/conditional_op.cc +++ b/caffe2/operators/conditional_op.cc @@ -31,7 +31,7 @@ bool ConditionalOp::RunOnDevice() { // perform conditional op along first dimension const auto* ptrT = (char*)dataT.raw_data(); const auto* ptrF = (char*)dataF.raw_data(); - for (TIndex i = 0; i < condition.size(); i++) { + for (int64_t i = 0; i < condition.size(); i++) { auto* dst = outPtr + i * innerSizeBytes; if (condPtr[i]) { context_.CopyItemsSameDevice( diff --git a/caffe2/operators/conv_op_cache_cudnn.h b/caffe2/operators/conv_op_cache_cudnn.h index ee3bae2363bc..aefb1b61205f 100644 --- a/caffe2/operators/conv_op_cache_cudnn.h +++ b/caffe2/operators/conv_op_cache_cudnn.h @@ -16,8 +16,8 @@ class AlgorithmsCache { // combination of tensor dimensions & compute data type. // TAlgorithm getAlgorithm( - const std::vector& tensorDimensions1, - const std::vector& tensorDimensions2, + const std::vector& tensorDimensions1, + const std::vector& tensorDimensions2, int algorithmFlags, // Differentiate between algorithms with different // parameters in a generic way std::function generatingFunc); @@ -28,14 +28,14 @@ class AlgorithmsCache { template TAlgorithm AlgorithmsCache::getAlgorithm( - const std::vector& tensorDimensions1, - const std::vector& tensorDimensions2, + const std::vector& tensorDimensions1, + const std::vector& tensorDimensions2, int algorithmFlags, std::function generatingFunc) { int64_t seed = 0; // Hash all of the inputs, which we wiill then use to try and look up // a previously discovered algorithm, or fall back to generating a new one. - std::hash hashFn; + std::hash hashFn; for (const auto num : tensorDimensions1) { // Copied from boost::hash_combine. // Adding 1 to differentiate between first and second vector. diff --git a/caffe2/operators/conv_op_cache_cudnn_test.cc b/caffe2/operators/conv_op_cache_cudnn_test.cc index 2d2da0d465f6..5867e487de75 100644 --- a/caffe2/operators/conv_op_cache_cudnn_test.cc +++ b/caffe2/operators/conv_op_cache_cudnn_test.cc @@ -12,11 +12,11 @@ namespace caffe2 { TEST(AlgorithmsCacheTest, CachesCorrectly) { AlgorithmsCache cache; int result = cache.getAlgorithm( - std::vector(1), std::vector(1), 0, []() { return 5; }); + std::vector(1), std::vector(1), 0, []() { return 5; }); EXPECT_EQ(result, 5); int res2 = cache.getAlgorithm( - std::vector(1), std::vector(1), 0, []() { return 10; }); + std::vector(1), std::vector(1), 0, []() { return 10; }); EXPECT_EQ(res2, 5); } @@ -24,11 +24,11 @@ TEST(AlgorithmsCacheTest, CachesCorrectly) { TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) { AlgorithmsCache cache; int result = cache.getAlgorithm( - std::vector(1, 10), std::vector(), 0, []() { return 5; }); + std::vector(1, 10), std::vector(), 0, []() { return 5; }); EXPECT_EQ(result, 5); int res2 = cache.getAlgorithm( - std::vector(), std::vector(1, 10), 0, []() { + std::vector(), std::vector(1, 10), 0, []() { return 10; }); @@ -38,20 +38,20 @@ TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) { TEST(AlgorithmsCacheTest, KeysDifferIfFlagsAreDifferent) { AlgorithmsCache cache; int result = cache.getAlgorithm( - std::vector{2, 3, 4}, std::vector{5, 6}, 123, []() { + std::vector{2, 3, 4}, std::vector{5, 6}, 123, []() { return 5; }); EXPECT_EQ(result, 5); int res2 = cache.getAlgorithm( - std::vector{2, 3, 4}, std::vector{5, 6}, 456, []() { + std::vector{2, 3, 4}, std::vector{5, 6}, 456, []() { return 10; }); EXPECT_EQ(res2, 10); int res3 = cache.getAlgorithm( - std::vector{2, 3, 4}, std::vector{5, 6}, 456, []() { + std::vector{2, 3, 4}, std::vector{5, 6}, 456, []() { return 15; }); diff --git a/caffe2/operators/conv_op_cudnn.cc b/caffe2/operators/conv_op_cudnn.cc index 7a983bf4e629..fa52a5bd5a48 100644 --- a/caffe2/operators/conv_op_cudnn.cc +++ b/caffe2/operators/conv_op_cudnn.cc @@ -411,8 +411,8 @@ class CudnnConvOpBase : public ConvPoolOpBase { } } - vector cudnn_input_dims_; - vector cudnn_filter_dims_; + vector cudnn_input_dims_; + vector cudnn_filter_dims_; CuDNNWrapper cudnn_wrapper_; cudnnTensorDescriptor_t bottom_desc_; diff --git a/caffe2/operators/conv_op_eigen.cc b/caffe2/operators/conv_op_eigen.cc index b565b567ab29..a559a7c574db 100644 --- a/caffe2/operators/conv_op_eigen.cc +++ b/caffe2/operators/conv_op_eigen.cc @@ -42,10 +42,10 @@ bool EigenConvOp::RunOnDeviceWithOrderNCHW() { CAFFE_ENFORCE(filter.dim32(2) == kernel_h()); CAFFE_ENFORCE(filter.dim32(3) == kernel_w()); ConvPoolOpBase::SetOutputSize(X, Y, filter.dim32(0)); - Eigen::array kernel_shuffles - { {TIndex(2), TIndex(3), TIndex(1), TIndex(0)} }; - Eigen::array input_shuffles - { {TIndex(0), TIndex(2), TIndex(3), TIndex(1)} }; + Eigen::array kernel_shuffles + { {int64_t(2), int64_t(3), int64_t(1), int64_t(0)} }; + Eigen::array input_shuffles + { {int64_t(0), int64_t(2), int64_t(3), int64_t(1)} }; Eigen::Tensor filter_tensor = Eigen::TensorMap>( @@ -109,14 +109,14 @@ bool EigenConvOp::RunOnDeviceWithOrderNCHW() { // It seems that the bias broadcast is still slower so let's do the // following for now. EigenArrayMap Y_arr( - Y_tensor.data(), static_cast(M), Y->size() / M); + Y_tensor.data(), static_cast(M), Y->size() / M); ConstEigenVectorArrayMap bias_arr(bias.template data(), M); Y_arr = Y_arr.colwise() + bias_arr; } // Do a last transpose. - Eigen::array output_shuffles - { {TIndex(0), TIndex(3), TIndex(1), TIndex(2) } }; + Eigen::array output_shuffles + { {int64_t(0), int64_t(3), int64_t(1), int64_t(2) } }; Eigen::TensorMap>( Y->template mutable_data(), N, M, Y->dim32(2), Y->dim32(3)) = @@ -204,7 +204,7 @@ bool EigenConvOp::RunOnDeviceWithOrderNHWC() { // It seems that the bias broadcast is still slower so let's do the // following for now. EigenArrayMap Y_arr( - Y->template mutable_data(), static_cast(M), Y->size() / M); + Y->template mutable_data(), static_cast(M), Y->size() / M); ConstEigenVectorArrayMap bias_arr(bias.template data(), M); Y_arr = Y_arr.colwise() + bias_arr; } diff --git a/caffe2/operators/conv_op_impl.h b/caffe2/operators/conv_op_impl.h index 8fea61f7e627..7455bfc29cf0 100644 --- a/caffe2/operators/conv_op_impl.h +++ b/caffe2/operators/conv_op_impl.h @@ -240,7 +240,7 @@ bool ConvOp::RunOnDeviceWithOrderNHWC() { } auto f = [&](Tensor* col_buffer) { col_buffer->Resize( - vector{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C}); + vector{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C}); T* col_buffer_data = col_buffer->template mutable_data(); // Im2Col, followed by gemm. for (int image_id = 0; image_id < N; ++image_id) { @@ -504,7 +504,7 @@ bool ConvGradientOp::RunOnDeviceWithOrderNCHW() { dbias->Resize(M); if (bias_multiplier_.size() != output_image_size) { // If the helper bias multiplier is not M, reshape and fill it with one. - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); math::Set( output_image_size, static_cast(1), @@ -689,7 +689,7 @@ bool ConvGradientOp::RunOnDeviceWithOrderNHWC() { math::Set(dbias->size(), 0, dbias_data, &context_); if (bias_multiplier_.size() != output_image_size) { // If the helper bias multiplier is not M, reshape and fill it with one. - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); math::Set( output_image_size, static_cast(1), diff --git a/caffe2/operators/conv_pool_op_base.h b/caffe2/operators/conv_pool_op_base.h index 9b2fa02fa712..43b0bee665f1 100644 --- a/caffe2/operators/conv_pool_op_base.h +++ b/caffe2/operators/conv_pool_op_base.h @@ -246,7 +246,7 @@ class ConvPoolOpBase : public Operator { // Helper function that is also called from OperatorSchema. Modified // kernel parameters and output output_dims and channel_first. static inline void InferOutputSize( - vector input_dims, + vector input_dims, int /*output_channel*/, StorageOrder order, bool global_pooling, @@ -259,7 +259,7 @@ class ConvPoolOpBase : public Operator { vector& pads, bool& channel_first) { channel_first = false; // initialized to suppress compiler warning. - vector dims; + vector dims; switch (order) { case StorageOrder::NHWC: channel_first = false; @@ -358,7 +358,7 @@ class ConvPoolOpBase : public Operator { if (bias_multiplier_->size() != size) { // If the helper bias multiplier is not image size, reshape and fill it // with one. - bias_multiplier_->Resize(std::vector{size}); + bias_multiplier_->Resize(std::vector{size}); math::Set( size, static_cast(1), diff --git a/caffe2/operators/conv_transpose_op_cudnn.cc b/caffe2/operators/conv_transpose_op_cudnn.cc index 28435325f231..b02210f192e6 100644 --- a/caffe2/operators/conv_transpose_op_cudnn.cc +++ b/caffe2/operators/conv_transpose_op_cudnn.cc @@ -64,8 +64,8 @@ class CudnnConvTransposeOpBase : public ConvTransposeUnpoolBase { } protected: - vector cudnn_input_dims_; - vector cudnn_filter_dims_; + vector cudnn_input_dims_; + vector cudnn_filter_dims_; CuDNNWrapper cudnn_wrapper_; cudnnTensorDescriptor_t bottom_desc_; diff --git a/caffe2/operators/conv_transpose_op_impl.h b/caffe2/operators/conv_transpose_op_impl.h index 23def95ea9bd..a5f85303b2d9 100644 --- a/caffe2/operators/conv_transpose_op_impl.h +++ b/caffe2/operators/conv_transpose_op_impl.h @@ -45,7 +45,7 @@ bool ConvTransposeOp::RunOnDeviceWithOrderNCHW() { bias.dim32(0) == C, "bias dimension must be equal to output channel number"); if (bias_multiplier_.size() != output_image_size) { - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); T* bm_data = bias_multiplier_.template mutable_data(); math::Set( output_image_size, @@ -61,7 +61,7 @@ bool ConvTransposeOp::RunOnDeviceWithOrderNCHW() { auto f = [&](Tensor* col_buffer) { col_buffer->Resize( - vector{C, this->kernel_h(), this->kernel_w(), H, W}); + vector{C, this->kernel_h(), this->kernel_w(), H, W}); T* col_buffer_data = col_buffer->template mutable_data(); for (auto image_id = 0; image_id < N; ++image_id) { // Weight term @@ -167,7 +167,7 @@ bool ConvTransposeOp::RunOnDeviceWithOrderNHWC() { bias.dim32(0) == C, "bias dimension must be equal to output channel number"); if (bias_multiplier_.size() != output_image_size) { - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); T* bm_data = bias_multiplier_.template mutable_data(); math::Set( output_image_size, @@ -182,7 +182,7 @@ bool ConvTransposeOp::RunOnDeviceWithOrderNHWC() { auto f = [&](Tensor* /*col_buffer*/) { col_buffer_.Resize( - vector{H, W, this->kernel_h(), this->kernel_w(), C}); + vector{H, W, this->kernel_h(), this->kernel_w(), C}); T* col_buffer_data = col_buffer_.template mutable_data(); for (auto image_id = 0; image_id < N; ++image_id) { // Weight term @@ -270,7 +270,7 @@ bool ConvTransposeGradientOp::RunOnDeviceWithOrderNCHW() { const int output_image_size = dY.dim32(2) * dY.dim32(3); // The col buffer is stored in CHW order as well col_buffer_.Resize( - vector{C, this->kernel_h(), this->kernel_w(), H, W}); + vector{C, this->kernel_h(), this->kernel_w(), H, W}); if (!no_bias_) { auto* dbias = Output(BIAS_OR_INPUT_GRAD); dbias->Resize(C); @@ -422,7 +422,7 @@ bool ConvTransposeGradientOp::RunOnDeviceWithOrderNHWC() { const int output_image_size = dY.dim32(1) * dY.dim32(2); // The col buffer is stored in HWC order as well col_buffer_.Resize( - vector{H, W, this->kernel_h(), this->kernel_w(), C}); + vector{H, W, this->kernel_h(), this->kernel_w(), C}); if (!no_bias_) { auto* dbias = Output(BIAS_OR_INPUT_GRAD); dbias->Resize(C); diff --git a/caffe2/operators/conv_transpose_op_mobile_test.cc b/caffe2/operators/conv_transpose_op_mobile_test.cc index da443928a974..6eb45eb5f8d1 100644 --- a/caffe2/operators/conv_transpose_op_mobile_test.cc +++ b/caffe2/operators/conv_transpose_op_mobile_test.cc @@ -10,7 +10,7 @@ namespace caffe2 { -void AddConstInput(const vector& shape, +void AddConstInput(const vector& shape, const float value, const string& name, Workspace* ws) { @@ -23,7 +23,7 @@ void AddConstInput(const vector& shape, tensor->size(), value, tensor->template mutable_data(), &context); } -void AddNoiseInput(const vector& shape, +void AddNoiseInput(const vector& shape, const string& name, Workspace* ws) { DeviceOption option; @@ -81,9 +81,9 @@ void compare(int N, int inputC, int H, int W, def1.add_arg()->CopyFrom(MakeArgument("adj_h", adjH)); def1.add_arg()->CopyFrom(MakeArgument("adj_w", adjW)); - AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); - AddNoiseInput(vector{inputC, outputC, kernelH, kernelW}, "W", &ws); - AddNoiseInput(vector{outputC}, "B", &ws); + AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); + AddNoiseInput(vector{inputC, outputC, kernelH, kernelW}, "W", &ws); + AddNoiseInput(vector{outputC}, "B", &ws); unique_ptr op1(CreateOperator(def1, &ws)); EXPECT_NE(nullptr, op1.get()); diff --git a/caffe2/operators/cross_entropy_op.cc b/caffe2/operators/cross_entropy_op.cc index 0473e7d4e435..c635c355e371 100644 --- a/caffe2/operators/cross_entropy_op.cc +++ b/caffe2/operators/cross_entropy_op.cc @@ -80,9 +80,9 @@ bool SigmoidCrossEntropyWithLogitsOp::RunOnDevice() { auto* out = Output(0); if (logits.ndim() == 0) { - out->Resize(std::vector{}); + out->Resize(std::vector{}); } else { - std::vector dims(logits.dims().begin(), logits.dims().end() - 1); + std::vector dims(logits.dims().begin(), logits.dims().end() - 1); out->Resize(dims); } auto* out_ptr = out->template mutable_data(); @@ -162,9 +162,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp::RunOnDevice() { auto* out = Output(0); if (logits.ndim() == 0) { - out->Resize(std::vector{}); + out->Resize(std::vector{}); } else { - std::vector dims(logits.dims().begin(), logits.dims().end() - 1); + std::vector dims(logits.dims().begin(), logits.dims().end() - 1); out->Resize(dims); } auto* out_ptr = out->template mutable_data(); @@ -260,11 +260,11 @@ bool MakeTwoClassOp::RunOnDevice() { auto* Y = Output(0); auto shape = X.dims(); shape.push_back(2); - TIndex N = X.size(); + int64_t N = X.size(); Y->Resize(shape); const auto* Xdata = X.data(); auto* Ydata = Y->template mutable_data(); - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { DCHECK_GE(Xdata[i], 0.0); DCHECK_LE(Xdata[i], 1.0); Ydata[i * 2] = 1.0 - Xdata[i]; @@ -284,9 +284,9 @@ bool MakeTwoClassGradientOp::RunOnDevice() { dX->Resize(shape); const float* dYdata = dY.data(); float* dXdata = dX->template mutable_data(); - TIndex N = dX->size(); + int64_t N = dX->size(); // use eigen? - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { dXdata[i] = dYdata[i * 2 + 1] - dYdata[i * 2]; } return true; @@ -308,7 +308,7 @@ bool CrossEntropyOp::RunOnDevice() { CAFFE_ENFORCE( (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == D)); CAFFE_ENFORCE_EQ(label.dim32(0), N); - Y->Resize(vector{N}); + Y->Resize(vector{N}); const float* Xdata = X.data(); const float* labelData = label.data(); auto* Ydata = Y->template mutable_data(); diff --git a/caffe2/operators/cross_entropy_op.cu b/caffe2/operators/cross_entropy_op.cu index df7a124d2971..b8fc2521971d 100644 --- a/caffe2/operators/cross_entropy_op.cu +++ b/caffe2/operators/cross_entropy_op.cu @@ -42,7 +42,7 @@ bool LabelCrossEntropyOp::RunOnDevice() { CAFFE_ENFORCE( (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1)); CAFFE_ENFORCE_EQ(label.dim32(0), N); - Y->Resize(vector(size_t(1), N)); + Y->Resize(vector(size_t(1), N)); LabelCrossEntropyKernel<<< CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS, @@ -250,9 +250,9 @@ bool SigmoidCrossEntropyWithLogitsOp::RunOnDevice() { auto* out = Output(0); if (logits.ndim() == 0) { - out->Resize(std::vector{}); + out->Resize(std::vector{}); } else { - std::vector dims(logits.dims().begin(), logits.dims().end() - 1); + std::vector dims(logits.dims().begin(), logits.dims().end() - 1); out->Resize(dims); } auto* out_ptr = out->template mutable_data(); @@ -372,9 +372,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp:: auto* out = Output(0); if (logits.ndim() == 0) { - out->Resize(std::vector{}); + out->Resize(std::vector{}); } else { - std::vector dims(logits.dims().begin(), logits.dims().end() - 1); + std::vector dims(logits.dims().begin(), logits.dims().end() - 1); out->Resize(dims); } auto* out_ptr = out->template mutable_data(); diff --git a/caffe2/operators/ctc_beam_search_decoder_op.cc b/caffe2/operators/ctc_beam_search_decoder_op.cc index e299950e9d94..c6a565df340f 100644 --- a/caffe2/operators/ctc_beam_search_decoder_op.cc +++ b/caffe2/operators/ctc_beam_search_decoder_op.cc @@ -32,7 +32,7 @@ bool CTCBeamSearchDecoderOp::RunOnDevice() { (InputSize() == 2) ? Input(SEQ_LEN).data() : nullptr; vector values_cache; - output_len->Resize(vector{batch_size}); + output_len->Resize(vector{batch_size}); int* output_len_data = output_len->mutable_data(); for (int32_t i = 0; i < batch_size; ++i) { @@ -121,7 +121,7 @@ bool CTCBeamSearchDecoderOp::RunOnDevice() { } int32_t cache_size = values_cache.size(); - values->Resize(vector{cache_size}); + values->Resize(vector{cache_size}); int* values_data = values->mutable_data(); for (int i = 0; i < values_cache.size(); ++i) { values_data[i] = values_cache.at(i); diff --git a/caffe2/operators/ctc_greedy_decoder_op.cc b/caffe2/operators/ctc_greedy_decoder_op.cc index 8a5e0932defd..d1b8621b03b3 100644 --- a/caffe2/operators/ctc_greedy_decoder_op.cc +++ b/caffe2/operators/ctc_greedy_decoder_op.cc @@ -32,7 +32,7 @@ bool CTCGreedyDecoderOp::RunOnDevice() { (InputSize() == 2) ? Input(SEQ_LEN).data() : nullptr; vector values_cach; - output_len->Resize(vector{batch_size}); + output_len->Resize(vector{batch_size}); int* output_len_data = output_len->template mutable_data(); for (int32_t i = 0; i < batch_size; ++i) { @@ -54,7 +54,7 @@ bool CTCGreedyDecoderOp::RunOnDevice() { } int32_t values_cach_size = values_cach.size(); - values->Resize(vector{values_cach_size}); + values->Resize(vector{values_cach_size}); int* values_data = values->mutable_data(); for (int i = 0; i < values_cach.size(); ++i) { values_data[i] = values_cach.at(i); diff --git a/caffe2/operators/dataset_ops.cc b/caffe2/operators/dataset_ops.cc index 92b7f80129ec..832942242808 100644 --- a/caffe2/operators/dataset_ops.cc +++ b/caffe2/operators/dataset_ops.cc @@ -155,7 +155,7 @@ void TreeWalker::advance() { cursor_.it.advance(lengths_, cursor_.offsets, sizes_, limits_, 1); } -std::vector TreeWalker::fieldDim(int fieldId) const { +std::vector TreeWalker::fieldDim(int fieldId) const { auto tensorDim = input(fieldId).dims(); tensorDim[0] = sizes_[lengthIdx(fieldId)]; return tensorDim; @@ -355,7 +355,7 @@ class UnPackRecordsOp : public Operator { auto numTensors = OutputSize(); // Precomputer the output sizes to avoid resizing - std::vector> outputDims(numTensors); + std::vector> outputDims(numTensors); std::vector metas(numTensors); CAFFE_ENFORCE( @@ -414,7 +414,7 @@ class UnPackRecordsOp : public Operator { private: void getShapeAndMetaFromInput( - std::vector>& outputDims, + std::vector>& outputDims, std::vector& metas) { const auto* inputs = Input(0).template data(); @@ -434,7 +434,7 @@ class UnPackRecordsOp : public Operator { } void getShapeAndMetaFromPrototypeBlobs( - std::vector>& outputDims, + std::vector>& outputDims, std::vector& metas) { const auto numTensors = fields_.size(); CAFFE_ENFORCE_EQ(numTensors, InputSize() - 1); @@ -501,7 +501,7 @@ class ReadNextBatchOp : public Operator { } } // gather data - std::vector outDim; + std::vector outDim; for (int i = 0; i < cursor->it.fields().size(); ++i) { auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1; auto size = sizes[lengthIdx]; @@ -676,7 +676,7 @@ class ReadRandomBatchOp : public Operator { auto idxvec = idxblob.template data(); auto& offsetdim = offsetsmat.dims(); // gather data - std::vector outDim; + std::vector outDim; int64_t idx; { std::lock_guard lock(cursor->mutex_); @@ -883,7 +883,7 @@ class ConcatTensorVectorOp final : public Operator { auto* tensor = Output(TENSOR); CAFFE_ENFORCE(!tensorVector->empty()); - vector outputDims(tensorVector->at(0).dims()); + vector outputDims(tensorVector->at(0).dims()); CAFFE_ENFORCE(outputDims.size() > 0); for (int i = 1; i < tensorVector->size(); i++) { // the tensor shapes are the same except for the first dimension @@ -895,7 +895,7 @@ class ConcatTensorVectorOp final : public Operator { } tensor->Resize(outputDims); - TIndex offset = 0; + int64_t offset = 0; auto* dst = (char*)tensor->raw_mutable_data(tensorVector->at(0).meta()); for (const auto& t : *tensorVector) { diff --git a/caffe2/operators/dataset_ops.h b/caffe2/operators/dataset_ops.h index 809e570ba3c0..47a5260c83c2 100644 --- a/caffe2/operators/dataset_ops.h +++ b/caffe2/operators/dataset_ops.h @@ -123,7 +123,7 @@ class TreeWalker { return prevOffsets_[lengthIdx(fieldId)]; } - std::vector fieldDim(int fieldId) const; + std::vector fieldDim(int fieldId) const; void* fieldPtr(int fieldId) const; @@ -134,12 +134,12 @@ class TreeWalker { Field(TreeWalker& walker, int fieldId) : walker_(walker), fieldId_(fieldId) {} - inline std::vector dim() const { + inline std::vector dim() const { return walker_.fieldDim(fieldId_); } - inline TIndex size() const { - TIndex size = 1; + inline int64_t size() const { + int64_t size = 1; for (const auto d : dim()) { size *= d; } diff --git a/caffe2/operators/deform_conv_op.cu b/caffe2/operators/deform_conv_op.cu index 29e5552612bf..63ba77eed20e 100644 --- a/caffe2/operators/deform_conv_op.cu +++ b/caffe2/operators/deform_conv_op.cu @@ -67,8 +67,8 @@ namespace caffe2 { -typedef TIndex index_t; -typedef std::vector TShape; +typedef int64_t index_t; +typedef std::vector TShape; template __device__ DType deformable_im2col_bilinear( @@ -304,8 +304,8 @@ template void DeformConvOpBase::DeformableIm2col( const DType* data_im, const DType* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, DType* data_col) { CHECK_LT(2, CAFFE_CUDA_NUM_THREADS); CAFFE_ENFORCE_EQ(pad_t(), pad_b()); @@ -430,8 +430,8 @@ template void DeformConvOpBase::DeformableCol2im( const DType* data_col, const DType* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, DType* grad_im) { CAFFE_ENFORCE_EQ(pad_t(), pad_b()); CAFFE_ENFORCE_EQ(pad_l(), pad_r()); @@ -577,8 +577,8 @@ void DeformConvOpBase::DeformableCol2imCoord( const DType* data_col, const DType* data_im, const DType* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, DType* grad_offset) { CAFFE_ENFORCE_EQ(pad_t(), pad_b()); CAFFE_ENFORCE_EQ(pad_l(), pad_r()); diff --git a/caffe2/operators/deform_conv_op.h b/caffe2/operators/deform_conv_op.h index cfe29d4d56be..fb75ec9b0b2c 100644 --- a/caffe2/operators/deform_conv_op.h +++ b/caffe2/operators/deform_conv_op.h @@ -24,21 +24,21 @@ class DeformConvOpBase : public ConvPoolOpBase { void DeformableIm2col( const T* data_im, const T* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, T* data_col); void DeformableCol2im( const T* data_col, const T* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, T* grad_im); void DeformableCol2imCoord( const T* data_col, const T* data_im, const T* data_offset, - const std::vector& im_shape, - const std::vector& col_shape, + const std::vector& im_shape, + const std::vector& col_shape, T* grad_offset); protected: diff --git a/caffe2/operators/deform_conv_op_impl.h b/caffe2/operators/deform_conv_op_impl.h index 5d84d5905fd9..96d555460e0b 100644 --- a/caffe2/operators/deform_conv_op_impl.h +++ b/caffe2/operators/deform_conv_op_impl.h @@ -119,7 +119,7 @@ bool DeformConvOp::RunOnDeviceWithOrderNCHW() { // If the helper bias multiplier is not image size, reshape and fill it // with // one. - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); math::Set( output_image_size, static_cast(1), @@ -274,9 +274,9 @@ bool DeformConvGradientOp::RunOnDeviceWithOrderNCHW() { // The col buffer is stored in CHW order as well - kernel_dim, and the // height and width. - vector img_shape; + vector img_shape; img_shape.assign(X.dims().begin() + 1, X.dims().end()); - vector col_buffer_shape; + vector col_buffer_shape; col_buffer_shape.push_back(C * kernel_dims_size); col_buffer_shape.insert( col_buffer_shape.end(), output_dims.begin(), output_dims.end()); @@ -301,7 +301,7 @@ bool DeformConvGradientOp::RunOnDeviceWithOrderNCHW() { dbias->Resize(M); if (bias_multiplier_.size() != output_image_size) { // If the helper bias multiplier is not M, reshape and fill it with one. - bias_multiplier_.Resize(vector(1, output_image_size)); + bias_multiplier_.Resize(vector(1, output_image_size)); math::Set( output_image_size, static_cast(1), diff --git a/caffe2/operators/distance_op.cc b/caffe2/operators/distance_op.cc index 9a38a4a77a00..6cb940c5ad58 100644 --- a/caffe2/operators/distance_op.cc +++ b/caffe2/operators/distance_op.cc @@ -237,7 +237,7 @@ vector TensorInferenceForDotProduct( const vector& in) { CAFFE_ENFORCE_GT(in.size(), 0); - vector dims(1); + vector dims(1); dims[0] = in[0].dims().size() > 0 ? in[0].dims(0) : 1; return vector{CreateTensorShape(dims, in[0].data_type())}; } diff --git a/caffe2/operators/distance_op.cu b/caffe2/operators/distance_op.cu index 82ff859cae67..bfafb1523d6e 100644 --- a/caffe2/operators/distance_op.cu +++ b/caffe2/operators/distance_op.cu @@ -49,7 +49,7 @@ bool SquaredL2DistanceOp::RunOnDevice() { } int N = X.ndim() > 0 ? X.dim32(0) : 1; int D = X.size() / N; - distance->Resize(vector(size_t(1), N)); + distance->Resize(vector(size_t(1), N)); SquaredL2DistanceKernel<<< std::min(N, CAFFE_MAXIMUM_NUM_BLOCKS), CAFFE_CUDA_NUM_THREADS, @@ -164,7 +164,7 @@ bool L1DistanceOp::RunOnDevice() { } const int N = X.ndim() > 0 ? X.dim32(0) : 1; const int D = N > 0 ? X.size() / N : 0; - distance->Resize(vector(size_t(1), N)); + distance->Resize(vector(size_t(1), N)); L1DistanceKernel<<< std::min(N, CAFFE_MAXIMUM_NUM_BLOCKS), CAFFE_CUDA_NUM_THREADS, diff --git a/caffe2/operators/dropout_op_cudnn.cc b/caffe2/operators/dropout_op_cudnn.cc index fa8c437463e5..a68a1263f6f4 100644 --- a/caffe2/operators/dropout_op_cudnn.cc +++ b/caffe2/operators/dropout_op_cudnn.cc @@ -55,7 +55,7 @@ class CuDNNDropoutOp final : public Operator { cudnnTensorDescriptor_t data_desc_; cudnnDropoutDescriptor_t dropout_desc_; - vector cudnn_input_dims_; + vector cudnn_input_dims_; float ratio_; bool is_test_; @@ -113,7 +113,7 @@ class CuDNNDropoutGradientOp final : public Operator { cudnnTensorDescriptor_t data_desc_; cudnnDropoutDescriptor_t dropout_desc_; - vector cudnn_input_dims_; + vector cudnn_input_dims_; Blob* scratch_blob_; diff --git a/caffe2/operators/elementwise_op_test.h b/caffe2/operators/elementwise_op_test.h index 9afb154d9bdd..bcd547e28f09 100644 --- a/caffe2/operators/elementwise_op_test.h +++ b/caffe2/operators/elementwise_op_test.h @@ -16,7 +16,7 @@ template void FillTensor( caffe2::Workspace* ws, const std::string& name, - const std::vector& shape, + const std::vector& shape, const std::vector& values) { auto* blob = ws->CreateBlob(name); auto* tensor = blob->GetMutableTensor(Context::GetDeviceType()); diff --git a/caffe2/operators/elementwise_ops_schema.cc b/caffe2/operators/elementwise_ops_schema.cc index 4cf970284675..98e2c9a9d786 100644 --- a/caffe2/operators/elementwise_ops_schema.cc +++ b/caffe2/operators/elementwise_ops_schema.cc @@ -636,7 +636,7 @@ Performs element-wise {desc} comparison **{name}** (with limited broadcast suppo } \ } \ auto output_dims = \ - std::vector(in[0].dims().begin(), in[0].dims().end()); \ + std::vector(in[0].dims().begin(), in[0].dims().end()); \ return vector{ \ CreateTensorShape(output_dims, TensorProto::BOOL)}; \ }) \ diff --git a/caffe2/operators/expand_squeeze_dims_op.h b/caffe2/operators/expand_squeeze_dims_op.h index 1493abebe36a..505b1ec7d690 100644 --- a/caffe2/operators/expand_squeeze_dims_op.h +++ b/caffe2/operators/expand_squeeze_dims_op.h @@ -85,7 +85,7 @@ class SqueezeOp : public Operator { } static std::vector ComputeDims( - std::vector inputDims, + std::vector inputDims, std::vector dims) { int j = 0; std::vector newDims; diff --git a/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc b/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc index dc0d727fc4cd..8e8f6411b4d2 100644 --- a/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc @@ -4,7 +4,6 @@ using caffe2::BaseContext; using caffe2::Tensor; -using caffe2::TIndex; using std::vector; namespace caffe2 { @@ -16,7 +15,7 @@ void averaged_loss_op_cpu_impl( Tensor* sum, caffe2::ops::AveragedLoss::State* state, BaseContext* context) { - sum->Resize(vector()); + sum->Resize(vector()); T* data = sum->template mutable_data(); diff --git a/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc b/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc index 62c0c790b3ee..6a03dfcd2a4d 100644 --- a/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc @@ -4,7 +4,6 @@ using caffe2::BaseContext; using caffe2::Tensor; -using caffe2::TIndex; using std::vector; namespace caffe2 { @@ -18,7 +17,7 @@ void batch_gather_op_cpu_impl( BaseContext* context) { CAFFE_ENFORCE_GE(data.ndim(), 2, "DATA should be at least 2-D"); - vector shape; + vector shape; shape.push_back(data.dim(0)); shape.insert(shape.end(), indices.dims().begin(), indices.dims().end()); shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end()); diff --git a/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc b/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc index 5e74f83656bc..72677f26b1b5 100644 --- a/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc @@ -4,7 +4,6 @@ using caffe2::BaseContext; using caffe2::Tensor; -using caffe2::TIndex; using std::vector; namespace math = caffe2::math; @@ -163,7 +162,7 @@ void batch_matmul_op_cpu_impl( // Calculate output tensor shapes [B..., (M), (N)] // Batch dimensions will be broadcasted out to those of the longer tensor // A or B. Either M or N are optional if A or B, respectively are 1-D. - std::vector new_dims; + std::vector new_dims; if (ndims_A >= ndims_B) { new_dims.assign(dims_A.begin(), dims_A.end() - 2); } else { diff --git a/caffe2/operators/experimental/c10/cpu/cast_cpu.cc b/caffe2/operators/experimental/c10/cpu/cast_cpu.cc index ec0cce711e60..178a977b4e32 100644 --- a/caffe2/operators/experimental/c10/cpu/cast_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/cast_cpu.cc @@ -5,7 +5,6 @@ using caffe2::CPUContext; using caffe2::Tensor; using caffe2::TensorProto_DataType; -using caffe2::TIndex; namespace caffe2 { namespace { @@ -16,7 +15,7 @@ void do_cast_(const Tensor& input, Tensor* output) { const auto* data = input.template data(); auto* out = output->template mutable_data(); auto N = input.size(); - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { out[i] = static_cast(data[i]); } } diff --git a/caffe2/operators/experimental/c10/cpu/concat_cpu.cc b/caffe2/operators/experimental/c10/cpu/concat_cpu.cc index 48df1dcbecd0..b118dabe463e 100644 --- a/caffe2/operators/experimental/c10/cpu/concat_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/concat_cpu.cc @@ -6,7 +6,6 @@ using caffe2::BaseContext; using caffe2::CPUContext; using caffe2::Tensor; using caffe2::TensorCPU; -using caffe2::TIndex; using std::vector; namespace caffe2 { @@ -19,7 +18,7 @@ void concat_op_cpu_impl( int axis, int add_axis, BaseContext* context) { - split->Resize(vector(1, inputs.size())); + split->Resize(vector(1, inputs.size())); int* axis_data = split->template mutable_data(); int adj_size = inputs[0]->ndim() + (add_axis ? 1 : 0); int canonical_axis = caffe2::canonical_axis_index_(axis, adj_size); @@ -36,7 +35,7 @@ void concat_op_cpu_impl( } int before = 1, after = 1; - vector output_dims(inputs[0]->dims()); + vector output_dims(inputs[0]->dims()); for (int i = 0; i < inputs[0]->ndim(); ++i) { if (i == canonical_axis && !add_axis) { continue; diff --git a/caffe2/operators/experimental/c10/cpu/filler_cpu.cc b/caffe2/operators/experimental/c10/cpu/filler_cpu.cc index a813616b6b94..848f9b2984e6 100644 --- a/caffe2/operators/experimental/c10/cpu/filler_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/filler_cpu.cc @@ -5,7 +5,6 @@ using caffe2::CPUContext; using caffe2::Tensor; using caffe2::TensorCPU; -using caffe2::TIndex; using std::vector; namespace caffe2 { @@ -17,7 +16,7 @@ void filler_init( const std::vector& extra_shape, bool input_as_shape) { if (inputs.size()) { - auto real_shape = vector{}; + auto real_shape = vector{}; if (input_as_shape) { // Shape input must be in CPU context auto& input = *inputs[0]; @@ -25,8 +24,8 @@ void filler_init( input.ndim(), 1, "When input_as_shape is true, the input must be a 1D tensor of " - "data type TIndex"); - auto* shape_data = input.template data(); + "data type int64_t"); + auto* shape_data = input.template data(); real_shape.insert( real_shape.end(), shape_data, shape_data + input.dim32(0)); } else { diff --git a/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc b/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc index 6912cb507606..64c01f2e9351 100644 --- a/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc @@ -3,7 +3,6 @@ #include "caffe2/utils/math.h" using caffe2::Tensor; -using caffe2::TIndex; namespace caffe2 { namespace { @@ -36,9 +35,9 @@ void sigmoid_cross_entropy_with_logits_op_cpu_impl( const auto outer_size = logits.size() / inner_size; if (logits.ndim() == 0) { - out->Resize(std::vector{}); + out->Resize(std::vector{}); } else { - std::vector dims(logits.dims().begin(), logits.dims().end() - 1); + std::vector dims(logits.dims().begin(), logits.dims().end() - 1); out->Resize(dims); } auto* out_ptr = out->mutable_data(); diff --git a/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc b/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc index dcdeab823027..7fe6e5b579c6 100644 --- a/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc @@ -4,7 +4,6 @@ #include "caffe2/utils/math.h" using caffe2::Tensor; -using caffe2::TIndex; namespace caffe2 { namespace { @@ -21,10 +20,10 @@ void sparse_lengths_sum_op_cpu_impl( CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector"); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex N = dataInput.dim(0); + const int64_t N = dataInput.dim(0); const int D = dataInput.size_from_dim(1); - const TIndex M = lengthsInput.dim(0); - const TIndex indices_size = indicesInput.size(); + const int64_t M = lengthsInput.dim(0); + const int64_t indices_size = indicesInput.size(); auto shape = dataInput.dims(); shape[0] = M; diff --git a/caffe2/operators/experimental/c10/schemas/fc.h b/caffe2/operators/experimental/c10/schemas/fc.h index 869adf05fd04..8fc15db02547 100644 --- a/caffe2/operators/experimental/c10/schemas/fc.h +++ b/caffe2/operators/experimental/c10/schemas/fc.h @@ -10,7 +10,7 @@ struct FullyConnected final { static constexpr const char* name = "FC"; struct Cache final { - vector Y_shape_cache_; + vector Y_shape_cache_; Tensor bias_multiplier_ = Tensor{CPU}; }; diff --git a/caffe2/operators/extend_tensor_op.cc b/caffe2/operators/extend_tensor_op.cc index 6ac1be087ae3..5f05901a8515 100644 --- a/caffe2/operators/extend_tensor_op.cc +++ b/caffe2/operators/extend_tensor_op.cc @@ -34,7 +34,7 @@ class ExtendTensorOp final : public Operator { indices.template data(), indices.template data() + indices.size())); - auto extendSize = (TIndex)maxElem - oldSize; + auto extendSize = (int64_t)maxElem - oldSize; if (extendSize > 0) { new_tensor->Extend(extendSize, growthPct_, &context_); if (!new_tensor->meta().ctor()) { diff --git a/caffe2/operators/filler_op.cc b/caffe2/operators/filler_op.cc index ab41072ca480..b1a486625b93 100644 --- a/caffe2/operators/filler_op.cc +++ b/caffe2/operators/filler_op.cc @@ -21,7 +21,7 @@ bool DiagonalFillOp::FillWithType(Tensor* output) { math::Set(output->size(), T(0), data, &context_); // then calculate step size for diagonal auto step = GetStepSize(output); - for (TIndex i = 0; i < output->size(); i += step) { + for (int64_t i = 0; i < output->size(); i += step) { math::Set(1, value, data, &context_); data += step; } diff --git a/caffe2/operators/filler_op.cu b/caffe2/operators/filler_op.cu index a754d361442e..7b6f2ce01664 100644 --- a/caffe2/operators/filler_op.cu +++ b/caffe2/operators/filler_op.cu @@ -15,7 +15,7 @@ __global__ void FillRangeKernel(const int n, float* data) { template __global__ void FillDiagonalKernel( const int num_diagonal_elements, - const TIndex step_size, + const int64_t step_size, const T value, T* data) { CUDA_1D_KERNEL_LOOP(index, num_diagonal_elements) { @@ -45,7 +45,7 @@ bool DiagonalFillOp::FillWithType(Tensor* output) { math::Set(size, T(0), data, &context_); T value = OperatorBase::GetSingleArgument("value", 0); - TIndex step_size = GetStepSize(output); + int64_t step_size = GetStepSize(output); int num_diagonal_elements = ceil((float)size / step_size); FillDiagonalKernel<<< diff --git a/caffe2/operators/filler_op.h b/caffe2/operators/filler_op.h index b09ae0843882..a757490e38ef 100644 --- a/caffe2/operators/filler_op.h +++ b/caffe2/operators/filler_op.h @@ -23,7 +23,7 @@ class FillerOp : public Operator { FillerOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), shape_(this->template GetRepeatedArgument("shape")), - extra_shape_(ToVectorTIndex( + extra_shape_(ToVectorint64_t( this->template GetRepeatedArgument("extra_shape"))), input_as_shape_( this->template GetSingleArgument("input_as_shape", false)) { @@ -53,7 +53,7 @@ class FillerOp : public Operator { bool RunOnDevice() override { auto* output = Operator::Output(0); if (InputSize()) { - auto shape = vector{}; + auto shape = vector{}; if (input_as_shape_) { // Shape input must be in CPU context auto& input = this->template Input(0, CPU); @@ -61,8 +61,8 @@ class FillerOp : public Operator { input.ndim(), 1, "When input_as_shape is true, the input must be a 1D tensor of " - "data type TIndex"); - auto* shape_data = input.template data(); + "data type int64_t"); + auto* shape_data = input.template data(); shape.insert(shape.end(), shape_data, shape_data + input.dim32(0)); } else { auto& input = Input(0); @@ -79,8 +79,8 @@ class FillerOp : public Operator { virtual bool Fill(Tensor* output) = 0; protected: - vector shape_; - vector extra_shape_; + vector shape_; + vector extra_shape_; bool input_as_shape_; }; @@ -367,27 +367,27 @@ class DiagonalFillOp final : public FillerOp { CAFFE_ENFORCE(output->ndim() >= 2, "Input shape must be >= 2D"); } - TIndex GetStepSize(Tensor* output) { - TIndex step; + int64_t GetStepSize(Tensor* output) { + int64_t step; if (output->ndim() == 2) { step = output->dim(1) + 1; } else { - TIndex prev_i = output->dim(0); + int64_t prev_i = output->dim(0); for (auto i : output->dims()) { if (i != prev_i) { CAFFE_THROW("All dimensions of input must be of equal length"); } } - vector cumprod(output->ndim()); + vector cumprod(output->ndim()); auto dims = output->dims(); std::partial_sum( dims.begin(), dims.end() - 1, cumprod.begin(), - std::multiplies()); + std::multiplies()); step = 1 + std::accumulate( - cumprod.begin(), cumprod.end(), static_cast(0)); + cumprod.begin(), cumprod.end(), static_cast(0)); VLOG(0) << step; } return step; diff --git a/caffe2/operators/flatten_op.cc b/caffe2/operators/flatten_op.cc index 342e5c839cbe..0b88b678212f 100644 --- a/caffe2/operators/flatten_op.cc +++ b/caffe2/operators/flatten_op.cc @@ -12,8 +12,8 @@ OPERATOR_SCHEMA(Flatten) ArgumentHelper helper(def); const int axis = helper.GetSingleArgument("axis", 1); vector out(1); - TIndex outer = 1; - TIndex inner = 1; + int64_t outer = 1; + int64_t inner = 1; std::size_t index = 0; for (auto d : in[0].dims()) { if (index < axis) { diff --git a/caffe2/operators/flexible_top_k.cc b/caffe2/operators/flexible_top_k.cc index aff0a27842ab..0f1133ea5591 100644 --- a/caffe2/operators/flexible_top_k.cc +++ b/caffe2/operators/flexible_top_k.cc @@ -9,8 +9,8 @@ namespace { template struct ValueCmp { bool operator()( - const std::pair& lhs, - const std::pair& rhs) { + const std::pair& lhs, + const std::pair& rhs) { return ( lhs.first > rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second)); @@ -27,20 +27,20 @@ bool FlexibleTopKOp::RunOnDevice() { auto* indices = Output(1); const T* input_data = input.template data(); - const TIndex* k_data = k.template data(); + const int64_t* k_data = k.template data(); // get flatten shape of input CAFFE_ENFORCE_GT(input.ndim(), 0); - vector input_dims = input.dims(); - vector linear_shape = { + vector input_dims = input.dims(); + vector linear_shape = { size_to_dim_(input_dims.size() - 1, input_dims), input_dims.back()}; CAFFE_ENFORCE_EQ( linear_shape[0], k.size(), "first n-1 dims of input data and K does not match."); - TIndex output_size = 0; - for (TIndex i = 0; i < linear_shape[0]; ++i) { + int64_t output_size = 0; + for (int64_t i = 0; i < linear_shape[0]; ++i) { CAFFE_ENFORCE( linear_shape[1] >= k_data[i], "k should not be greater than last dim, error at index ", @@ -58,21 +58,21 @@ bool FlexibleTopKOp::RunOnDevice() { values->Resize(output_size); indices->Resize(output_size); T* values_data = values->template mutable_data(); - TIndex* indices_data = indices->template mutable_data(); + int64_t* indices_data = indices->template mutable_data(); - TIndex output_offset = 0; + int64_t output_offset = 0; // Sort preserving indices - for (TIndex i = 0; i < linear_shape[0]; ++i) { + for (int64_t i = 0; i < linear_shape[0]; ++i) { // Build a min-heap, the heap element is pair of (value, idx) // the top of the heap is the smallest value std::priority_queue< - std::pair, - std::vector>, + std::pair, + std::vector>, ValueCmp> PQ; - TIndex k_ = k_data[i]; - for (TIndex j = 0; j < linear_shape[1]; ++j) { + int64_t k_ = k_data[i]; + for (int64_t j = 0; j < linear_shape[1]; ++j) { const T value = input_data[i * linear_shape[1] + j]; if (PQ.size() < k_ || value > PQ.top().first) { PQ.push(std::make_pair(value, j)); @@ -81,7 +81,7 @@ bool FlexibleTopKOp::RunOnDevice() { PQ.pop(); } } - for (TIndex j = 0; j < k_; ++j) { + for (int64_t j = 0; j < k_; ++j) { auto& pqElem = PQ.top(); values_data[output_offset + k_ - j - 1] = pqElem.first; indices_data[output_offset + k_ - j - 1] = pqElem.second; @@ -101,24 +101,24 @@ bool FlexibleTopKGradientOp::RunOnDevice() { auto& indices = Input(3); auto* output = Output(0); - const TIndex* k_data = k.template data(); + const int64_t* k_data = k.template data(); const T* values_data = values.template data(); - const TIndex* indices_data = indices.template data(); + const int64_t* indices_data = indices.template data(); // Resize output tensors to be as orignial_input size and initialized with 0 CAFFE_ENFORCE_GT(original_input.ndim(), 0); - vector original_dims = original_input.dims(); + vector original_dims = original_input.dims(); output->Resize(original_dims); T* output_data = output->template mutable_data(); math::Set( output->size(), static_cast(0), output_data, &context_); - TIndex index_offset = 0; - for (TIndex i = 0; i < k.size(); ++i) { + int64_t index_offset = 0; + for (int64_t i = 0; i < k.size(); ++i) { // offset of output_data - TIndex output_offset = i * original_dims.back(); - for (TIndex j = 0; j < k_data[i]; ++j) { - TIndex index = indices_data[index_offset + j]; + int64_t output_offset = i * original_dims.back(); + for (int64_t j = 0; j < k_data[i]; ++j) { + int64_t index = indices_data[index_offset + j]; T value = values_data[index_offset + j]; output_data[output_offset + index] = value; } diff --git a/caffe2/operators/fully_connected_op.h b/caffe2/operators/fully_connected_op.h index d2fce7d9751a..27b8da695559 100644 --- a/caffe2/operators/fully_connected_op.h +++ b/caffe2/operators/fully_connected_op.h @@ -143,7 +143,7 @@ class FullyConnectedOp final : public Operator { size_t axis_w_{1}; // A local vector to cache the output shape so we don't need to recreate // a vector object every time we run Run(). - vector Y_shape_cache_; + vector Y_shape_cache_; Tensor bias_multiplier_{Context::GetDeviceType()}; ; diff --git a/caffe2/operators/fused_rowwise_8bit_conversion_ops.h b/caffe2/operators/fused_rowwise_8bit_conversion_ops.h index ca5002078129..5eace583588e 100644 --- a/caffe2/operators/fused_rowwise_8bit_conversion_ops.h +++ b/caffe2/operators/fused_rowwise_8bit_conversion_ops.h @@ -41,7 +41,7 @@ class FloatToFused8BitRowwiseQuantizedOp : public Operator { // bytes of each row for scale (4 bytes) and bias (4 bytes). // | ... int8 data ... | scale | bias | // | number_of_columns | 4B | 4B | - const std::vector output_dimensions = {input_rows, + const std::vector output_dimensions = {input_rows, input_columns + 8}; output->Resize(output_dimensions); @@ -96,7 +96,7 @@ class Fused8BitRowwiseQuantizedToFloatOp : public Operator { // The last 8 bytes per row are the scale and the bias. The rest of // input_columns is the number of values in the original row. - const std::vector output_dimensions = {input_rows, + const std::vector output_dimensions = {input_rows, input_columns - 8}; output->Resize(output_dimensions); const auto output_columns = output->dim(1); diff --git a/caffe2/operators/fused_rowwise_random_quantization_ops.cc b/caffe2/operators/fused_rowwise_random_quantization_ops.cc index 1f498e225147..ca5d8f25d3a9 100644 --- a/caffe2/operators/fused_rowwise_random_quantization_ops.cc +++ b/caffe2/operators/fused_rowwise_random_quantization_ops.cc @@ -38,8 +38,8 @@ bool FloatToFusedRandRowwiseQuantizedOp::RunOnDevice() { size_t data_per_byte = 8 / bitwidth_; // How many bytes in the output size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte; - const std::vector output_dimensions = { - input_rows, 10 + static_cast(segment_size)}; + const std::vector output_dimensions = { + input_rows, 10 + static_cast(segment_size)}; output->Resize(output_dimensions); const auto* input_data = input.template data(); @@ -92,8 +92,8 @@ bool FusedRandRowwiseQuantizedToFloatOp::RunOnDevice() { "Unsupported bitwidth"); const size_t tail = input_data[1]; const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail; - const std::vector output_dimensions = { - input_rows, static_cast(output_columns)}; + const std::vector output_dimensions = { + input_rows, static_cast(output_columns)}; output->Resize(output_dimensions); auto* output_data = output->template mutable_data(); for (size_t row = 0; row < input_rows; ++row) { diff --git a/caffe2/operators/gather_fused_8bit_rowwise_op.h b/caffe2/operators/gather_fused_8bit_rowwise_op.h index 78d9ec93feb6..0a125b3edd8d 100644 --- a/caffe2/operators/gather_fused_8bit_rowwise_op.h +++ b/caffe2/operators/gather_fused_8bit_rowwise_op.h @@ -28,7 +28,7 @@ class GatherFused8BitRowwiseOp : public Operator { CAFFE_ENFORCE_GT(data.dim(1), 8, "DATA must have more than 8 columns"); // Subtract 8 from the #columns of data for the 4 bytes for scale and 4 // bytes for bias that we use in the fused representation (per row). - const std::vector shape = {indices.dim(0), data.dim(1) - 8}; + const std::vector shape = {indices.dim(0), data.dim(1) - 8}; output->Resize(shape); int block_size = shape[1]; diff --git a/caffe2/operators/gather_ranges_to_dense_op.h b/caffe2/operators/gather_ranges_to_dense_op.h index 70d57fe184f1..7b171d0e2e09 100644 --- a/caffe2/operators/gather_ranges_to_dense_op.h +++ b/caffe2/operators/gather_ranges_to_dense_op.h @@ -62,7 +62,7 @@ class GatherRangesToDenseOp final : public Operator { auto itemsize = data.meta().itemsize(); auto batchSize = ranges.dim(0); - vector outputDims{batchSize, 0}; + vector outputDims{batchSize, 0}; vector outputRawData; for (int i = 0; i < OutputSize(); ++i) { auto* output = Output(i); diff --git a/caffe2/operators/generate_proposals_op.cc b/caffe2/operators/generate_proposals_op.cc index 2b1039b35a84..bfd641a1e246 100644 --- a/caffe2/operators/generate_proposals_op.cc +++ b/caffe2/operators/generate_proposals_op.cc @@ -241,15 +241,15 @@ bool GenerateProposalsOp::RunOnDevice() { // bbox_deltas: (num_images, A * box_dim, H, W) CAFFE_ENFORCE_EQ( bbox_deltas.dims(), - (vector{num_images, box_dim * A, height, width})); + (vector{num_images, box_dim * A, height, width})); // im_info_tensor: (num_images, 3), format [height, width, scale; ...] - CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector{num_images, 3})); + CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector{num_images, 3})); CAFFE_ENFORCE( im_info_tensor.template IsType(), im_info_tensor.meta().name()); // anchors: (A, box_dim) - CAFFE_ENFORCE_EQ(anchors.dims(), (vector{A, box_dim})); + CAFFE_ENFORCE_EQ(anchors.dims(), (vector{A, box_dim})); CAFFE_ENFORCE(anchors.template IsType(), anchors.meta().name()); // Broadcast the anchors to all pixels diff --git a/caffe2/operators/generate_proposals_op_test.cc b/caffe2/operators/generate_proposals_op_test.cc index fb4c54581396..2b3a033a665d 100644 --- a/caffe2/operators/generate_proposals_op_test.cc +++ b/caffe2/operators/generate_proposals_op_test.cc @@ -11,7 +11,7 @@ namespace caffe2 { static void AddConstInput( - const vector& shape, + const vector& shape, const float value, const string& name, Workspace* ws) { @@ -26,7 +26,7 @@ static void AddConstInput( } static void AddLinSpacedInput( - const vector& shape, + const vector& shape, const float min_val, const float max_val, const string& name, @@ -44,7 +44,7 @@ static void AddLinSpacedInput( } static void AddInput( - const vector& shape, + const vector& shape, const vector& values, const string& name, Workspace* ws) { @@ -79,7 +79,7 @@ TEST(GenerateProposalsTest, TestComputeAllAnchors) { 79, -68, 8, 115, 103, -160, -40, 207, 151, -6, 32, 85, 79, -52, 8, 131, 103, -144, -40, 223, 151; - Tensor anchors_tensor(vector{anchors.rows(), anchors.cols()}, CPU); + Tensor anchors_tensor(vector{anchors.rows(), anchors.cols()}, CPU); Eigen::Map( anchors_tensor.mutable_data(), anchors.rows(), anchors.cols()) = anchors; @@ -143,7 +143,7 @@ TEST(GenerateProposalsTest, TestComputeAllAnchorsRotated) { all_anchors_gt(i, 4) = angles[i % angles.size()]; } - Tensor anchors_tensor(vector{anchors.rows(), anchors.cols()}, CPU); + Tensor anchors_tensor(vector{anchors.rows(), anchors.cols()}, CPU); Eigen::Map( anchors_tensor.mutable_data(), anchors.rows(), anchors.cols()) = anchors; @@ -171,11 +171,11 @@ TEST(GenerateProposalsTest, TestEmpty) { const int A = 4; const int H = 10; const int W = 8; - AddConstInput(vector{img_count, A, H, W}, 1., "scores", &ws); + AddConstInput(vector{img_count, A, H, W}, 1., "scores", &ws); AddLinSpacedInput( - vector{img_count, 4 * A, H, W}, 0, 10, "bbox_deltas", &ws); - AddConstInput(vector{img_count, 3}, 0.1, "im_info", &ws); - AddConstInput(vector{A, 4}, 1.0, "anchors", &ws); + vector{img_count, 4 * A, H, W}, 0, 10, "bbox_deltas", &ws); + AddConstInput(vector{img_count, 3}, 0.1, "im_info", &ws); + AddConstInput(vector{A, 4}, 1.0, "anchors", &ws); def.add_arg()->CopyFrom(MakeArgument("spatial_scale", 2.0f)); @@ -280,10 +280,10 @@ TEST(GenerateProposalsTest, TestRealDownSampled) { 1.50015003e-05f, 8.91025957e-06f}; - AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); - AddInput(vector{img_count, 4 * A, H, W}, bbx, "bbox_deltas", &ws); - AddInput(vector{img_count, 3}, im_info, "im_info", &ws); - AddInput(vector{A, 4}, anchors, "anchors", &ws); + AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); + AddInput(vector{img_count, 4 * A, H, W}, bbx, "bbox_deltas", &ws); + AddInput(vector{img_count, 3}, im_info, "im_info", &ws); + AddInput(vector{A, 4}, anchors, "anchors", &ws); def.add_arg()->CopyFrom(MakeArgument("spatial_scale", 1.0f / 16.0f)); def.add_arg()->CopyFrom(MakeArgument("pre_nms_topN", 6000)); @@ -300,7 +300,7 @@ TEST(GenerateProposalsTest, TestRealDownSampled) { Blob* rois_blob = ws.GetBlob("rois"); EXPECT_NE(nullptr, rois_blob); auto& rois = rois_blob->Get(); - EXPECT_EQ(rois.dims(), (vector{rois_gt.rows(), rois_gt.cols()})); + EXPECT_EQ(rois.dims(), (vector{rois_gt.rows(), rois_gt.cols()})); auto rois_data = Eigen::Map(rois.data(), rois.dim(0), rois.dim(1)); EXPECT_NEAR((rois_data.matrix() - rois_gt).cwiseAbs().maxCoeff(), 0, 1e-4); @@ -309,7 +309,7 @@ TEST(GenerateProposalsTest, TestRealDownSampled) { Blob* rois_probs_blob = ws.GetBlob("rois_probs"); EXPECT_NE(nullptr, rois_probs_blob); auto& rois_probs = rois_probs_blob->Get(); - EXPECT_EQ(rois_probs.dims(), (vector{TIndex(rois_probs_gt.size())})); + EXPECT_EQ(rois_probs.dims(), (vector{int64_t(rois_probs_gt.size())})); auto rois_probs_data = ConstEigenVectorArrayMap(rois_probs.data(), rois.dim(0)); EXPECT_NEAR( @@ -445,14 +445,14 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotatedAngle0) { 1.50015003e-05f, 8.91025957e-06f}; - AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); + AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); AddInput( - vector{img_count, 5 * A, H, W}, + vector{img_count, 5 * A, H, W}, bbx_with_angle, "bbox_deltas", &ws); - AddInput(vector{img_count, 3}, im_info, "im_info", &ws); - AddInput(vector{A, 5}, anchors, "anchors", &ws); + AddInput(vector{img_count, 3}, im_info, "im_info", &ws); + AddInput(vector{A, 5}, anchors, "anchors", &ws); def.add_arg()->CopyFrom(MakeArgument("spatial_scale", 1.0f / 16.0f)); def.add_arg()->CopyFrom(MakeArgument("pre_nms_topN", 6000)); @@ -470,7 +470,7 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotatedAngle0) { Blob* rois_blob = ws.GetBlob("rois"); EXPECT_NE(nullptr, rois_blob); auto& rois = rois_blob->Get(); - EXPECT_EQ(rois.dims(), (vector{rois_gt.rows(), rois_gt.cols()})); + EXPECT_EQ(rois.dims(), (vector{rois_gt.rows(), rois_gt.cols()})); auto rois_data = Eigen::Map(rois.data(), rois.dim(0), rois.dim(1)); EXPECT_NEAR((rois_data.matrix() - rois_gt).cwiseAbs().maxCoeff(), 0, 1e-3); @@ -479,7 +479,7 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotatedAngle0) { Blob* rois_probs_blob = ws.GetBlob("rois_probs"); EXPECT_NE(nullptr, rois_probs_blob); auto& rois_probs = rois_probs_blob->Get(); - EXPECT_EQ(rois_probs.dims(), (vector{TIndex(rois_probs_gt.size())})); + EXPECT_EQ(rois_probs.dims(), (vector{int64_t(rois_probs_gt.size())})); auto rois_probs_data = ConstEigenVectorArrayMap(rois_probs.data(), rois.dim(0)); EXPECT_NEAR( @@ -587,14 +587,14 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotated) { // vector anchors{-38, -16, 53, 31, -120, -120, 135, 135}; vector anchors{8, 8, 92, 48, angle, 8, 8, 256, 256, angle}; - AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); + AddInput(vector{img_count, A, H, W}, scores, "scores", &ws); AddInput( - vector{img_count, 5 * A, H, W}, + vector{img_count, 5 * A, H, W}, bbx_with_angle, "bbox_deltas", &ws); - AddInput(vector{img_count, 3}, im_info, "im_info", &ws); - AddInput(vector{A, 5}, anchors, "anchors", &ws); + AddInput(vector{img_count, 3}, im_info, "im_info", &ws); + AddInput(vector{A, 5}, anchors, "anchors", &ws); def.add_arg()->CopyFrom(MakeArgument("spatial_scale", 1.0f / 16.0f)); def.add_arg()->CopyFrom(MakeArgument("pre_nms_topN", 6000)); diff --git a/caffe2/operators/glu_op.h b/caffe2/operators/glu_op.h index 6cbb9e8ff6c8..fc4999d1e987 100644 --- a/caffe2/operators/glu_op.h +++ b/caffe2/operators/glu_op.h @@ -17,7 +17,7 @@ class GluOp final : public Operator { bool RunOnDevice() { auto& X = Input(0); auto* Y = Output(0); - vector Yshape; + vector Yshape; Yshape.insert(Yshape.end(), X.dims().begin(), X.dims().end()); const int split_index = dim_ == -1 ? Yshape.size() - 1 : dim_; CAFFE_ENFORCE( diff --git a/caffe2/operators/half_float_ops.h b/caffe2/operators/half_float_ops.h index b8d5dacf6947..a7c0dbe1b471 100644 --- a/caffe2/operators/half_float_ops.h +++ b/caffe2/operators/half_float_ops.h @@ -36,7 +36,7 @@ class Float16ConstantFillOp : public Operator { bool RunOnDevice() override; private: - vector shape_; + vector shape_; }; class Float16UniformFillOp : public Operator { @@ -65,7 +65,7 @@ class Float16UniformFillOp : public Operator { bool RunOnDevice() override; private: - vector shape_; + vector shape_; float min_; float max_; }; diff --git a/caffe2/operators/hip/local_response_normalization_op_miopen.cc b/caffe2/operators/hip/local_response_normalization_op_miopen.cc index 26da9bf5b874..d5d5a9880f33 100644 --- a/caffe2/operators/hip/local_response_normalization_op_miopen.cc +++ b/caffe2/operators/hip/local_response_normalization_op_miopen.cc @@ -51,7 +51,7 @@ class MIOPEN_LRNOP final : public Operator { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenLRNDescriptor_t norm_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; const miopenLRNMode_t mode_; const int size_; const float alpha_; @@ -103,7 +103,7 @@ class MIOPENLRNGradientOp final : public Operator { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenLRNDescriptor_t norm_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; const miopenLRNMode_t mode_; const int size_; const float alpha_; diff --git a/caffe2/operators/hip/relu_op_miopen.cc b/caffe2/operators/hip/relu_op_miopen.cc index 5a8a147ff2a8..dcf74600504c 100644 --- a/caffe2/operators/hip/relu_op_miopen.cc +++ b/caffe2/operators/hip/relu_op_miopen.cc @@ -98,7 +98,7 @@ class MIOPENReluOp final : public Operator { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenActivationDescriptor_t activ_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; const float alpha_; const float beta_; const double power_; @@ -191,7 +191,7 @@ class MIOPENReluGradientOp final : public Operator { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenActivationDescriptor_t activ_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; const float alpha_; const float beta_; const double power_; diff --git a/caffe2/operators/hip/softmax_op_miopen.cc b/caffe2/operators/hip/softmax_op_miopen.cc index 6859dccc7f8a..1fa978e59a85 100644 --- a/caffe2/operators/hip/softmax_op_miopen.cc +++ b/caffe2/operators/hip/softmax_op_miopen.cc @@ -71,7 +71,7 @@ class MIOpenSoftmaxOp final : public Operator { protected: MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t desc_; - vector dims_; + vector dims_; const int axis_; const float alpha_; const float beta_; @@ -135,7 +135,7 @@ class MIOpenSoftmaxGradientOp final : public Operator { const float alpha_; const float beta_; miopenTensorDescriptor_t desc_; - vector dims_; + vector dims_; }; namespace { diff --git a/caffe2/operators/hip/spatial_batch_norm_op_miopen.cc b/caffe2/operators/hip/spatial_batch_norm_op_miopen.cc index 0a8b1d9b1cbf..7b5398296234 100644 --- a/caffe2/operators/hip/spatial_batch_norm_op_miopen.cc +++ b/caffe2/operators/hip/spatial_batch_norm_op_miopen.cc @@ -58,7 +58,7 @@ class MIOpenSpatialBNOp final : public SpatialBNOp { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenTensorDescriptor_t bn_param_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; float alpha_; float beta_; miopenBatchNormMode_t mode_; @@ -97,7 +97,7 @@ class MIOpenSpatialBNGradientOp final : public SpatialBNGradientOp { MIOPENWrapper miopen_wrapper_; miopenTensorDescriptor_t data_desc_; miopenTensorDescriptor_t bn_param_desc_; - vector miopen_input_dims_; + vector miopen_input_dims_; float alpha_; float beta_; miopenBatchNormMode_t mode_; diff --git a/caffe2/operators/im2col_op.h b/caffe2/operators/im2col_op.h index d5c4fec7f1c9..80c3a85a9689 100644 --- a/caffe2/operators/im2col_op.h +++ b/caffe2/operators/im2col_op.h @@ -77,7 +77,7 @@ class Im2ColOp final : public Operator { switch (order_) { case StorageOrder::NCHW: { Y->Resize( - std::vector{N, C * kernel_h_ * kernel_w_, out_h, out_w}); + std::vector{N, C * kernel_h_ * kernel_w_, out_h, out_w}); const size_t dx = X.size() / N; const size_t dy = Y->size() / N; @@ -105,7 +105,7 @@ class Im2ColOp final : public Operator { }; break; case StorageOrder::NHWC: { Y->Resize( - std::vector{N, out_h, out_w, kernel_h_ * kernel_w_ * C}); + std::vector{N, out_h, out_w, kernel_h_ * kernel_w_ * C}); const size_t dx = X.size() / N; const size_t dy = Y->size() / N; diff --git a/caffe2/operators/index_hash_ops.cc b/caffe2/operators/index_hash_ops.cc index 4f374895451c..b803f8a6c126 100644 --- a/caffe2/operators/index_hash_ops.cc +++ b/caffe2/operators/index_hash_ops.cc @@ -21,7 +21,7 @@ specified number. All input and output indices are enforced to be positive. .TensorInferenceFunction([](const OperatorDef& /* unused */, const vector& in) { std::vector out(1); - std::vector output_dims = GetDimsVector(in[0]); + std::vector output_dims = GetDimsVector(in[0]); out[0] = CreateTensorShape(output_dims, in[0].data_type()); return out; }); diff --git a/caffe2/operators/index_ops.cc b/caffe2/operators/index_ops.cc index b9a8b1b46e27..241b0ff97c60 100644 --- a/caffe2/operators/index_ops.cc +++ b/caffe2/operators/index_ops.cc @@ -11,12 +11,12 @@ namespace caffe2 { namespace { using IndexKeyTypes = TensorTypes; -using TIndexValue = int64_t; +using int64_tValue = int64_t; } // namespace struct IndexBase { public: - IndexBase(TIndexValue maxElements, const TypeMeta& type) + IndexBase(int64_tValue maxElements, const TypeMeta& type) : maxElements_{maxElements} , meta_(type) , frozen_{false} {} @@ -35,7 +35,7 @@ struct IndexBase { const TypeMeta& Type() const { return meta_; } - TIndexValue Size() { + int64_tValue Size() { std::lock_guard guard(dictMutex_); return nextId_; } @@ -43,17 +43,17 @@ struct IndexBase { protected: int64_t maxElements_; TypeMeta meta_; - TIndexValue nextId_{1}; // guarded by dictMutex_ + int64_tValue nextId_{1}; // guarded by dictMutex_ std::atomic frozen_{false}; std::mutex dictMutex_; }; template struct Index: IndexBase { - explicit Index(TIndexValue maxElements) + explicit Index(int64_tValue maxElements) : IndexBase(maxElements, TypeMeta::Make()) {} - void Get(const T* keys, TIndexValue* values, size_t numKeys) { + void Get(const T* keys, int64_tValue* values, size_t numKeys) { if (frozen_) { FrozenGet(keys, values, numKeys); return; @@ -104,14 +104,14 @@ struct Index: IndexBase { } private: - void FrozenGet(const T* keys, TIndexValue* values, size_t numKeys) { + void FrozenGet(const T* keys, int64_tValue* values, size_t numKeys) { for (int i = 0; i < numKeys; ++i) { auto it = dict_.find(keys[i]); values[i] = it != dict_.end() ? it->second : 0; } } - std::unordered_map dict_; + std::unordered_map dict_; }; // TODO(azzolini): support sizes larger than int32 @@ -131,7 +131,7 @@ class IndexCreateOp: public Operator { } private: - TIndexValue maxElements_; + int64_tValue maxElements_; }; class IndexGetOp: public Operator { @@ -152,7 +152,7 @@ class IndexGetOp: public Operator { values->ResizeLike(keys); dict->Get( keys.data(), - values->template mutable_data(), + values->template mutable_data(), keys.size()); return true; } @@ -227,8 +227,8 @@ class IndexSizeOp : public Operator { bool RunOnDevice() override { auto& base = OperatorBase::Input>(0); auto* out = Output(0); - out->Resize(std::vector{}); - *out->template mutable_data() = base->Size(); + out->Resize(std::vector{}); + *out->template mutable_data() = base->Size(); return true; } }; diff --git a/caffe2/operators/integral_image_op.cc b/caffe2/operators/integral_image_op.cc index 27356104bbd6..14baf484a232 100644 --- a/caffe2/operators/integral_image_op.cc +++ b/caffe2/operators/integral_image_op.cc @@ -19,7 +19,7 @@ bool IntegralImageOp::RunOnDevice() { auto* Y = Output(0); CAFFE_ENFORCE_EQ(X.ndim(), 4, "Only supports 4D tensors for the momement"); - vector out_shape(X.dims()); + vector out_shape(X.dims()); out_shape[2] += 1; // H + 1 output size out_shape[3] += 1; // W + 1 output size Y->Resize(out_shape); diff --git a/caffe2/operators/integral_image_op.cu b/caffe2/operators/integral_image_op.cu index d8fa0b8f4dcc..d865e7a386c3 100644 --- a/caffe2/operators/integral_image_op.cu +++ b/caffe2/operators/integral_image_op.cu @@ -124,7 +124,7 @@ bool IntegralImageOp::RunOnDevice() { // Input is (N, C, H, W) // Output is (N, C, H + 1, W + 1) - vector out_shape(X.dims()); + vector out_shape(X.dims()); out_shape[2] += 1; // H + 1 output size out_shape[3] += 1; // W + 1 output size Y->Resize(out_shape); @@ -172,7 +172,7 @@ bool IntegralImageGradientOp::RunOnDevice() { // Row pass reduces shape of dY from (N, C, H + 1, W + 1) // to (N, C, H + 1, W) // Col pass reduces shape to (N, C, H, W) - vector row_pass_shape(dY.dims()); + vector row_pass_shape(dY.dims()); row_pass_shape[3] -= 1; row_pass_buffer_.Resize(row_pass_shape); const int chans = row_pass_buffer_.dim32(1); diff --git a/caffe2/operators/is_empty_op.h b/caffe2/operators/is_empty_op.h index c5c9402f342d..ef11bc06608f 100644 --- a/caffe2/operators/is_empty_op.h +++ b/caffe2/operators/is_empty_op.h @@ -15,7 +15,7 @@ class IsEmptyOp : public Operator { bool RunOnDevice() override { auto& input = Input(0); auto* output = Output(0); - output->Resize(std::vector{}); + output->Resize(std::vector{}); *output->template mutable_data() = (input.size() == 0); return true; } diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc index 4b995fa49d8c..a461eecdcc14 100644 --- a/caffe2/operators/layer_norm_op.cc +++ b/caffe2/operators/layer_norm_op.cc @@ -28,7 +28,7 @@ bool LayerNormOp::DoRunWithType() { const int right = input.size_from_dim(canonical_axis); output->ResizeLike(input); - std::vector stats_dims( + std::vector stats_dims( input.dims().begin(), input.dims().begin() + canonical_axis); stats_dims.push_back(1); mean->Resize(stats_dims); diff --git a/caffe2/operators/layer_norm_op.cu b/caffe2/operators/layer_norm_op.cu index 1f78b9e50e09..3de32dee87e0 100644 --- a/caffe2/operators/layer_norm_op.cu +++ b/caffe2/operators/layer_norm_op.cu @@ -92,7 +92,7 @@ bool LayerNormOp::DoRunWithType() { const int right = input.size_from_dim(canonical_axis); output->ResizeLike(input); - std::vector stats_dims( + std::vector stats_dims( input.dims().begin(), input.dims().begin() + canonical_axis); stats_dims.push_back(1); mean->Resize(stats_dims); @@ -256,7 +256,7 @@ bool LayerNormGradientOp::DoRunWithType() { const unsigned long right = norm_inputs.size_from_dim(canonical_axis); ginput->ResizeLike(norm_inputs); - std::vector stats_dims( + std::vector stats_dims( norm_inputs.dims().begin(), norm_inputs.dims().begin() + canonical_axis); stats_dims.push_back(1); dmean_.Resize(stats_dims); diff --git a/caffe2/operators/lengths_pad_op.h b/caffe2/operators/lengths_pad_op.h index 9f65c39a262a..a5d4b04850dc 100644 --- a/caffe2/operators/lengths_pad_op.h +++ b/caffe2/operators/lengths_pad_op.h @@ -56,7 +56,7 @@ class LengthsPadOp : public Operator { math::Set( output->size(), static_cast(padding_value_), out_data, &context_); - for (TIndex i = 0; i < lengths_size; ++i) { + for (int64_t i = 0; i < lengths_size; ++i) { auto length = lengths_data[i]; CAFFE_ENFORCE_GE(length, 0); CAFFE_ENFORCE_GE( diff --git a/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h b/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h index 22c7a3b7c0f6..5ce2c87e988c 100644 --- a/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h +++ b/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h @@ -50,7 +50,7 @@ class SparseLengthsFused8BitRowwiseOp : public Operator { CAFFE_ENFORCE_GT(data.dim(1), 8, "DATA must have more than 8 columns"); // Subtract 8 from the #columns of data for the 4 bytes for scale and 4 // bytes for bias that we use in the fused representation (per row). - const std::vector shape = {lengths.dim(0), data.dim(1) - 8}; + const std::vector shape = {lengths.dim(0), data.dim(1) - 8}; output->Resize(shape); Fused8BitRowwiseEmbeddingLookup( diff --git a/caffe2/operators/lengths_reducer_ops.h b/caffe2/operators/lengths_reducer_ops.h index 8153e088074a..c10b8eba981f 100644 --- a/caffe2/operators/lengths_reducer_ops.h +++ b/caffe2/operators/lengths_reducer_ops.h @@ -47,10 +47,10 @@ class CPUSparseLengthsReductionOp : public Operator { CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector"); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex N = dataInput.dim(0); + const int64_t N = dataInput.dim(0); const int D = dataInput.size_from_dim(1); - const TIndex M = lengthsInput.dim(0); - const TIndex indices_size = indicesInput.size(); + const int64_t M = lengthsInput.dim(0); + const int64_t indices_size = indicesInput.size(); auto* output = Output(0); auto shape = dataInput.dims(); diff --git a/caffe2/operators/lengths_reducer_rowwise_8bit_ops.h b/caffe2/operators/lengths_reducer_rowwise_8bit_ops.h index 58ebe6cb58e8..c912d1b0009b 100644 --- a/caffe2/operators/lengths_reducer_rowwise_8bit_ops.h +++ b/caffe2/operators/lengths_reducer_rowwise_8bit_ops.h @@ -39,7 +39,7 @@ class SparseLengths8BitsRowwiseOp : public Operator { auto* output = Output(0); auto* scale_bias = Input(SCALE_BIAS).template data(); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex outputSize = lengthsInput.dim(0); + const int64_t outputSize = lengthsInput.dim(0); auto& indicesInput = Input(INDICES); CAFFE_ENFORCE_EQ( @@ -54,23 +54,23 @@ class SparseLengths8BitsRowwiseOp : public Operator { "the second dim of scale_bias has to be equal to 2"); CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector"); const IndexType* indices = indicesInput.template data(); - TIndex dataToReduceSize = indicesInput.dim(0); + int64_t dataToReduceSize = indicesInput.dim(0); const int* lengths = lengthsInput.template data(); - vector shape = dataInput.dims(); + vector shape = dataInput.dims(); shape[0] = outputSize; output->Resize(shape); const float* w = nullptr; if (USE_WEIGHTS) { w = Input(WEIGHTS).template data(); } - TIndex in_block_size = dataInput.size_from_dim(1); + int64_t in_block_size = dataInput.size_from_dim(1); OutDataT* out = output->template mutable_data(); const uint8_t* input_data = dataInput.template data(); // delegate work to perfkernel that branches based on architecture - const TIndex indices_size = indicesInput.size(); - const TIndex N = dataInput.dim(0); + const int64_t indices_size = indicesInput.size(); + const int64_t N = dataInput.dim(0); EmbeddingLookup( in_block_size, outputSize, @@ -107,7 +107,7 @@ class FloatToRowwiseQuantized8BitsOp : public Operator { auto* scale_bias = Output(SCALE_BIAS); auto* input_data = input.template data(); output->ResizeLike(input); - vector scale_bias_dims = {input.dim(0), 2}; + vector scale_bias_dims = {input.dim(0), 2}; scale_bias->Resize(scale_bias_dims); auto* output_data = output->template mutable_data(); float* scale_bias_data = scale_bias->template mutable_data(); diff --git a/caffe2/operators/lengths_tile_op.cc b/caffe2/operators/lengths_tile_op.cc index d5af0a91bd65..f1b843108750 100644 --- a/caffe2/operators/lengths_tile_op.cc +++ b/caffe2/operators/lengths_tile_op.cc @@ -33,7 +33,7 @@ bool LengthsTileOp::RunOnDevice() { auto src = static_cast(data.raw_data()); auto out = static_cast(output->raw_mutable_data(data.meta())); - for (TIndex i = 0; i < lengths_size; ++i) { + for (int64_t i = 0; i < lengths_size; ++i) { auto length = lengths_data[i]; CAFFE_ENFORCE_GE(length, 0); for (int32_t j = 0; j < length; ++j) { diff --git a/caffe2/operators/lengths_tile_op.cu b/caffe2/operators/lengths_tile_op.cu index aebb33c1460a..b15350a7cf96 100644 --- a/caffe2/operators/lengths_tile_op.cu +++ b/caffe2/operators/lengths_tile_op.cu @@ -50,7 +50,7 @@ bool LengthsTileOp::RunOnDevice() { rowMappingDevice_.Resize(total_length); auto* rowOffsets = rowMappingHost_.mutable_data(); int32_t outputRow = 0; - for (TIndex i = 0; i < lengths_size; i++) { + for (int64_t i = 0; i < lengths_size; i++) { auto length = lengths_data[i]; for (int32_t j = 0; j < length; j++) { rowOffsets[outputRow++] = i * numElementsPerRow; diff --git a/caffe2/operators/lengths_top_k_op.cc b/caffe2/operators/lengths_top_k_op.cc index c871d53caf95..0f8f0708270f 100644 --- a/caffe2/operators/lengths_top_k_op.cc +++ b/caffe2/operators/lengths_top_k_op.cc @@ -21,25 +21,25 @@ bool LengthsTopKOp::RunOnDevice() { int* output_topk_indices_data = output_topk_indices->template mutable_data(); - auto cmp = [](std::pair& lhs, std::pair& rhs) { + auto cmp = [](std::pair& lhs, std::pair& rhs) { return lhs.first > rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second); }; // Sort preserving indices int next_index = 0; - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { // Build a min-heap, the heap element is pair of (value, idx) // the top of the heap is the smallest value std::priority_queue< - std::pair, - std::vector>, + std::pair, + std::vector>, decltype(cmp)> p_queue(cmp); // Maintain the size of heap to be less or equal to k_, so the // heap will hold the k_ largest values - for (TIndex j = 0; j < input_len[i]; ++j) { + for (int64_t j = 0; j < input_len[i]; ++j) { const auto value = X_data[next_index++]; if (p_queue.size() < k_ || value > p_queue.top().first) { p_queue.push(std::make_pair(value, j)); @@ -50,7 +50,7 @@ bool LengthsTopKOp::RunOnDevice() { } int last_index = p_queue.size(); - for (TIndex j = 0; j < k_; ++j) { + for (int64_t j = 0; j < k_; ++j) { if (p_queue.size() > 0) { auto& pqElem = p_queue.top(); output_topk_values_data[i * k_ + last_index - j - 1] = pqElem.first; diff --git a/caffe2/operators/local_response_normalization_op.cc b/caffe2/operators/local_response_normalization_op.cc index 81499b4a5d6a..bbbb68872cbf 100644 --- a/caffe2/operators/local_response_normalization_op.cc +++ b/caffe2/operators/local_response_normalization_op.cc @@ -27,7 +27,7 @@ bool LRNOp::RunOnDeviceWithOrderNCHW() { scale_->ResizeLike(X); float* scale_data = scale_->template mutable_data(); math::Set(X.size(), bias_, scale_data, &context_); - Tensor padded_square(vector{C + size_ - 1, H, W}, CPU); + Tensor padded_square(vector{C + size_ - 1, H, W}, CPU); float* padded_square_data = padded_square.template mutable_data(); math::Set(padded_square.size(), 0., padded_square_data, &context_); @@ -91,7 +91,7 @@ bool LRNOp::RunOnDeviceWithOrderNHWC() { scale_->ResizeLike(X); float* scale_data = scale_->template mutable_data(); - Tensor padded_square(vector(1, C + size_ - 1), CPU); + Tensor padded_square(vector(1, C + size_ - 1), CPU); float* padded_square_data = padded_square.template mutable_data(); math::Set(padded_square.size(), 0., padded_square_data, &context_); @@ -146,7 +146,7 @@ bool LRNGradientOp::RunOnDeviceWithOrderNCHW() { const float* dYdata = dY.data(); float* dXdata = dX->template mutable_data(); - Tensor padded_ratio(vector{C + size_ - 1, H, W}, CPU); + Tensor padded_ratio(vector{C + size_ - 1, H, W}, CPU); float* padded_ratio_data = padded_ratio.template mutable_data(); // Compute scale(copied from LRNOp) - reusing padded_ratio math::Set(X.size(), bias_, scale_data, &context_); @@ -183,7 +183,7 @@ bool LRNGradientOp::RunOnDeviceWithOrderNCHW() { math::Set(padded_ratio.size(), 0., padded_ratio_data, &context_); - Tensor accum_ratio(vector{H, W}, CPU); + Tensor accum_ratio(vector{H, W}, CPU); float* accum_ratio_data = accum_ratio.template mutable_data(); const float cache_ratio = 2. * alpha_ * beta_ / size_; @@ -243,7 +243,7 @@ bool LRNGradientOp::RunOnDeviceWithOrderNHWC() { scale_ = &local_scale_tensor_; } scale_->ResizeLike(X); - Tensor padded_ratio(vector(1, C + size_ - 1), CPU); + Tensor padded_ratio(vector(1, C + size_ - 1), CPU); float* padded_ratio_data = padded_ratio.template mutable_data(); float* scale_data = scale_->template mutable_data(); // Compute scale(copied from LRNOp) - reusing padded_ratio diff --git a/caffe2/operators/local_response_normalization_op_cudnn.cc b/caffe2/operators/local_response_normalization_op_cudnn.cc index e195b8b5ae44..c1e5683a1c28 100644 --- a/caffe2/operators/local_response_normalization_op_cudnn.cc +++ b/caffe2/operators/local_response_normalization_op_cudnn.cc @@ -38,7 +38,7 @@ class CuDNNLRNOp final : public Operator { cudnnTensorDescriptor_t data_desc_; cudnnLRNDescriptor_t norm_desc_; - vector cudnn_input_dims_; + vector cudnn_input_dims_; const int size_; const float alpha_; @@ -80,7 +80,7 @@ class CuDNNLRNGradientOp final : public Operator { cudnnTensorDescriptor_t data_desc_; cudnnLRNDescriptor_t norm_desc_; - vector cudnn_input_dims_; + vector cudnn_input_dims_; const int size_; const float alpha_; diff --git a/caffe2/operators/lpnorm_op.cc b/caffe2/operators/lpnorm_op.cc index 79c35cd83a21..d4b66ed548a3 100644 --- a/caffe2/operators/lpnorm_op.cc +++ b/caffe2/operators/lpnorm_op.cc @@ -132,10 +132,10 @@ Y: "*(type: bool; default: False)* Whether we calculate norm or averaged_norm.The Lp_averaged_norm(x) is defined as Lp_averaged_norm(x) = LpNorm(x) / size(x)") .TensorInferenceFunction([](const OperatorDef& /* unused */, const vector& in) { - std::vector output_dims(1); + std::vector output_dims(1); output_dims[0] = 1; // 1 return vector{ - CreateTensorShape(vector{output_dims}, in[0].data_type())}; + CreateTensorShape(vector{output_dims}, in[0].data_type())}; }); OPERATOR_SCHEMA(LpNormGradient) diff --git a/caffe2/operators/map_ops.h b/caffe2/operators/map_ops.h index 9d207815cc08..52cf8d1a8a1f 100644 --- a/caffe2/operators/map_ops.h +++ b/caffe2/operators/map_ops.h @@ -200,7 +200,7 @@ class MapSerializer : public BlobSerializerBase { BlobSerializerBase::SerializationAcceptor acceptor) override { CAFFE_ENFORCE(blob.IsType()); const MapType& map_data = blob.template Get(); - TIndex sz = map_data.size(); + int64_t sz = map_data.size(); Tensor key_tensor(CPU); key_tensor.Resize(sz); Tensor value_tensor(CPU); diff --git a/caffe2/operators/matmul_op.h b/caffe2/operators/matmul_op.h index ee5807b0348a..cea885c0c352 100644 --- a/caffe2/operators/matmul_op.h +++ b/caffe2/operators/matmul_op.h @@ -92,7 +92,7 @@ class MatMulOp final : public Operator { protected: // A local vector to cache the output shape so we don't need to recreate // a vector object every time we run Run(). - vector Y_shape_cache_{0, 0}; + vector Y_shape_cache_{0, 0}; int axis_a_{1}; int axis_b_{1}; bool trans_a_; diff --git a/caffe2/operators/numpy_tile_op.h b/caffe2/operators/numpy_tile_op.h index 2413652e3277..88dc3cb85164 100644 --- a/caffe2/operators/numpy_tile_op.h +++ b/caffe2/operators/numpy_tile_op.h @@ -39,7 +39,7 @@ class NumpyTileOp : public Operator { // output tensor. Tensor *src = &buffer, *dst = output; src->CopyFrom(input); - vector output_dims(input.dims()); + vector output_dims(input.dims()); for (size_t i = 0; i < repeats.size(); ++i) { if (repeats_data[i] == 1) { continue; diff --git a/caffe2/operators/one_hot_ops.cc b/caffe2/operators/one_hot_ops.cc index 35512680327b..3f7449c51535 100644 --- a/caffe2/operators/one_hot_ops.cc +++ b/caffe2/operators/one_hot_ops.cc @@ -17,9 +17,9 @@ bool BatchOneHotOp::DoRunWithType() { CAFFE_ENFORCE_EQ(lens.size(), D); const auto* lens_data = lens.template data(); - TIndex output_dim = 0; + int64_t output_dim = 0; valsOffsets_.resize(D + 1); - for (TIndex i = 0; i < D; i++) { + for (int64_t i = 0; i < D; i++) { CAFFE_ENFORCE_GE(lens_data[i], 0); valsOffsets_[i] = output_dim; output_dim += lens_data[i]; @@ -34,10 +34,10 @@ bool BatchOneHotOp::DoRunWithType() { const auto* vals_data = vals.template data(); auto* output_data = output->template mutable_data(); - for (TIndex i = 0; i < N; ++i) { - for (TIndex j = 0; j < D; j++) { + for (int64_t i = 0; i < N; ++i) { + for (int64_t j = 0; j < D; j++) { const auto input_val = input_data[i * D + j]; - for (TIndex k = valsOffsets_[j]; k < valsOffsets_[j + 1]; ++k) { + for (int64_t k = valsOffsets_[j]; k < valsOffsets_[j + 1]; ++k) { output_data[k] = vals_data[k] == input_val; } } @@ -50,21 +50,21 @@ bool BatchOneHotOp::DoRunWithType() { vector TensorInferenceForBatchOneHot( const OperatorDef& /* def */, const vector& in) { - std::vector output_dims(2); + std::vector output_dims(2); output_dims[0] = in[0].dims(0); // N output_dims[1] = in[2].dims(0); // vals.size() return vector{ - CreateTensorShape(vector{output_dims}, in[0].data_type())}; + CreateTensorShape(vector{output_dims}, in[0].data_type())}; } vector TensorInferenceForBucketBatchOneHot( const OperatorDef& /* def */, const vector& in) { - std::vector output_dims(2); + std::vector output_dims(2); output_dims[0] = in[0].dims(0); // N output_dims[1] = in[1].dims(0) + in[2].dims(0); // vals.size() + length.size() return vector{ - CreateTensorShape(vector{output_dims}, in[0].data_type())}; + CreateTensorShape(vector{output_dims}, in[0].data_type())}; } OpSchema::Cost CostInferenceForBatchOneHot( @@ -90,11 +90,11 @@ OpSchema::Cost CostInferenceForBatchOneHot( template <> void OneHotOp::DoOneHotOp( - TIndex batch_size, - TIndex index_size, + int64_t batch_size, + int64_t index_size, const Tensor& indices, Tensor* one_hots) { - const TIndex* indices_ptr = indices.template data(); + const int64_t* indices_ptr = indices.template data(); float* one_hots_ptr = one_hots->template mutable_data(); memset(one_hots_ptr, 0, one_hots->nbytes()); for (int i = 0; i < batch_size; ++i) { @@ -122,8 +122,8 @@ bool BatchBucketOneHotOp::RunOnDevice() { boundaries.size(), "The sum of length should be equal to the length of boundaries"); - TIndex output_dim = 0; - for (TIndex i = 0; i < D; i++) { + int64_t output_dim = 0; + for (int64_t i = 0; i < D; i++) { CAFFE_ENFORCE_GT(lens_data[i], 0); // Number of buckets is number of bucket edges + 1 output_dim += (lens_data[i] + 1); @@ -137,26 +137,26 @@ bool BatchBucketOneHotOp::RunOnDevice() { math::Set(output->size(), 0.f, output_data, &context_); - TIndex pos = 0; - for (TIndex i = 0; i < N; i++) { + int64_t pos = 0; + for (int64_t i = 0; i < N; i++) { auto* boundaries_offset = boundaries_data; - TIndex output_offset = 0; + int64_t output_offset = 0; - for (TIndex j = 0; j < D; j++) { + for (int64_t j = 0; j < D; j++) { // here we assume the boundary values for each feature are sorted - TIndex lower_bucket_idx = std::lower_bound( + int64_t lower_bucket_idx = std::lower_bound( boundaries_offset, boundaries_offset + lens_data[j], input_data[pos]) - boundaries_offset; - TIndex upper_bucket_idx = std::upper_bound( + int64_t upper_bucket_idx = std::upper_bound( boundaries_offset, boundaries_offset + lens_data[j], input_data[pos]) - boundaries_offset; - TIndex bucket_idx = (lower_bucket_idx + upper_bucket_idx) / 2; + int64_t bucket_idx = (lower_bucket_idx + upper_bucket_idx) / 2; output_data[i * output_dim + output_offset + bucket_idx] = 1.0; boundaries_offset += lens_data[j]; output_offset += (lens_data[j] + 1); diff --git a/caffe2/operators/one_hot_ops.cu b/caffe2/operators/one_hot_ops.cu index e1b6e18daf87..1528f9418823 100644 --- a/caffe2/operators/one_hot_ops.cu +++ b/caffe2/operators/one_hot_ops.cu @@ -6,9 +6,9 @@ namespace caffe2 { __global__ void OneHotOpKernel( - const TIndex batch_size, - const TIndex index_size, - const TIndex* indices, + const int64_t batch_size, + const int64_t index_size, + const int64_t* indices, float* output) { CUDA_1D_KERNEL_LOOP(i, batch_size) { output[i * index_size + indices[i]] = 1.; @@ -17,8 +17,8 @@ __global__ void OneHotOpKernel( template <> void OneHotOp::DoOneHotOp( - TIndex batch_size, - TIndex index_size, + int64_t batch_size, + int64_t index_size, const Tensor& indices, Tensor* output) { float* output_ptr = output->template mutable_data(); @@ -28,7 +28,7 @@ void OneHotOp::DoOneHotOp( CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>( - batch_size, index_size, indices.data(), output_ptr); + batch_size, index_size, indices.data(), output_ptr); } REGISTER_CUDA_OPERATOR(OneHot, OneHotOp); diff --git a/caffe2/operators/one_hot_ops.h b/caffe2/operators/one_hot_ops.h index 5b0e9a2a6656..826a7250f457 100644 --- a/caffe2/operators/one_hot_ops.h +++ b/caffe2/operators/one_hot_ops.h @@ -21,17 +21,17 @@ class OneHotOp final : public Operator { CAFFE_ENFORCE_EQ( indices.ndim(), 1, - "indices input must be 1D tensor of data type TIndex"); + "indices input must be 1D tensor of data type int64_t"); // Index size input must be in CPU context auto& index_size_tensor = this->template Input(1, CPU); CAFFE_ENFORCE_EQ( index_size_tensor.size(), 1, - "index_size_tensor input must be scalar of data type TIndex"); + "index_size_tensor input must be scalar of data type int64_t"); auto batch_size = indices.size(); - auto index_size = *index_size_tensor.template data(); + auto index_size = *index_size_tensor.template data(); auto one_hots = Output(0); one_hots->Resize(batch_size, index_size); auto output_size = one_hots->size(); @@ -45,8 +45,8 @@ class OneHotOp final : public Operator { protected: void DoOneHotOp( - TIndex batch_size, - TIndex index_size, + int64_t batch_size, + int64_t index_size, const Tensor& indices, Tensor* output); }; @@ -72,7 +72,7 @@ class BatchOneHotOp final : public Operator { private: // allows for fast random access to a given dict and is re-used across runs - std::vector valsOffsets_; + std::vector valsOffsets_; }; template diff --git a/caffe2/operators/onnx_while_op.h b/caffe2/operators/onnx_while_op.h index fc6cc2739ebe..dbd510395246 100644 --- a/caffe2/operators/onnx_while_op.h +++ b/caffe2/operators/onnx_while_op.h @@ -117,7 +117,7 @@ class ONNXWhileOp final : public Operator { // Use this to keep track of the sizes of the scan outputs and validate // they're the same across iterations. - std::vector> scan_outputs_sizes; + std::vector> scan_outputs_sizes; Workspace *cur_ws = nullptr; bool cur_output_condition = false; @@ -165,8 +165,8 @@ class ONNXWhileOp final : public Operator { dims.insert(dims.begin(), itr); scan_output_target->Extend(1, 2.0f, &context_); - TIndex timestep_size = 1; - for (const TIndex t : scan_outputs_sizes[i]) { + int64_t timestep_size = 1; + for (const int64_t t : scan_outputs_sizes[i]) { timestep_size *= t; } diff --git a/caffe2/operators/onnxifi_op.cc b/caffe2/operators/onnxifi_op.cc index aee7fff4bc33..d1b0824f1b31 100644 --- a/caffe2/operators/onnxifi_op.cc +++ b/caffe2/operators/onnxifi_op.cc @@ -90,7 +90,7 @@ bool OnnxifiOp::RunOnDevice() { for (unsigned i = 0U; i < OutputSize(); ++i) { auto* output_tensor = Output(i); - std::vector tensor_dims; + std::vector tensor_dims; SetOutputShape(i, &tensor_dims); output_tensor->Resize(tensor_dims); auto& tensor_descriptor = output_desc_.at(i); diff --git a/caffe2/operators/onnxifi_op.h b/caffe2/operators/onnxifi_op.h index f354069d4082..3ebf8be05179 100644 --- a/caffe2/operators/onnxifi_op.h +++ b/caffe2/operators/onnxifi_op.h @@ -38,7 +38,7 @@ class OnnxifiOp final : public Operator { const std::string key = MakeString("output_size_hint_", output_idx); auto output_size_hint = this->template GetRepeatedArgument(key); if (!output_size_hint.empty()) { - std::vector dims; + std::vector dims; for (const auto v : output_size_hint) { dims.push_back(v); } @@ -127,7 +127,7 @@ class OnnxifiOp final : public Operator { bool RunOnDevice() override; private: - void SetOutputShape(int output_idx, std::vector* dims) { + void SetOutputShape(int output_idx, std::vector* dims) { const auto it = output_size_hints_.find(output_idx); if (it != output_size_hints_.end()) { *dims = it->second; @@ -163,7 +163,7 @@ class OnnxifiOp final : public Operator { std::vector> output_shapes_; // output shape hints - std::unordered_map> output_size_hints_; + std::unordered_map> output_size_hints_; }; } // namespace caffe2 diff --git a/caffe2/operators/operator_fallback_gpu_test.cc b/caffe2/operators/operator_fallback_gpu_test.cc index 59d765d58604..964708bc1090 100644 --- a/caffe2/operators/operator_fallback_gpu_test.cc +++ b/caffe2/operators/operator_fallback_gpu_test.cc @@ -36,7 +36,7 @@ TEST(OperatorFallbackTest, IncrementByOneOp) { "IncrementByOne", "", vector{"X"}, vector{"X"}); Workspace ws; - Tensor source_tensor(vector{2, 3}, CPU); + Tensor source_tensor(vector{2, 3}, CPU); for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } @@ -60,7 +60,7 @@ TEST(OperatorFallbackTest, GPUIncrementByOneOp) { vector{"X"}); op_def.mutable_device_option()->set_device_type(PROTO_CUDA); Workspace ws; - Tensor source_tensor(vector{2, 3}, CPU); + Tensor source_tensor(vector{2, 3}, CPU); for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } diff --git a/caffe2/operators/order_switch_ops.cc b/caffe2/operators/order_switch_ops.cc index 502295cb00b9..3e0940f91a83 100644 --- a/caffe2/operators/order_switch_ops.cc +++ b/caffe2/operators/order_switch_ops.cc @@ -10,7 +10,7 @@ bool NHWC2NCHWOp::RunOnDevice() { auto ndim = X.ndim(); CAFFE_ENFORCE_GE(ndim, 3); const int N = X.dim32(0), C = X.dim32(ndim - 1); - vector Y_dims(ndim); + vector Y_dims(ndim); Y_dims[0] = N; Y_dims[1] = C; int image_size = 1; @@ -47,7 +47,7 @@ bool NCHW2NHWCOp::RunOnDevice() { auto ndim = X.ndim(); CAFFE_ENFORCE_GE(X.ndim(), 3); const int N = X.dim32(0), C = X.dim32(1); - vector Y_dims(ndim); + vector Y_dims(ndim); Y_dims[0] = N; int image_size = 1; for (auto i = 1; i < ndim - 1; ++i) { diff --git a/caffe2/operators/order_switch_ops.cu b/caffe2/operators/order_switch_ops.cu index f63a7d87fa88..d58f01e51863 100644 --- a/caffe2/operators/order_switch_ops.cu +++ b/caffe2/operators/order_switch_ops.cu @@ -98,7 +98,7 @@ bool NHWC2NCHWOp::RunOnDevice() { CAFFE_ENFORCE_GE(ndim, 3); const int N = X.dim32(0); const int C = X.dim32(ndim - 1); - vector Y_dims(ndim); + vector Y_dims(ndim); Y_dims[0] = N; Y_dims[1] = C; int HxW = 1; @@ -134,7 +134,7 @@ bool NCHW2NHWCOp::RunOnDevice() { CAFFE_ENFORCE_GE(X.ndim(), 3); const int N = X.dim32(0); const int C = X.dim32(1); - vector Y_dims(ndim); + vector Y_dims(ndim); Y_dims[0] = N; int HxW = 1; for (auto i = 1; i < ndim - 1; ++i) { diff --git a/caffe2/operators/pack_rnn_sequence_op.h b/caffe2/operators/pack_rnn_sequence_op.h index 74d40f6bfd47..534139afbcc4 100644 --- a/caffe2/operators/pack_rnn_sequence_op.h +++ b/caffe2/operators/pack_rnn_sequence_op.h @@ -30,7 +30,7 @@ class PackRNNSequenceOpBase : public Operator { CAFFE_ENFORCE_GT(values.ndim(), dim_offset); // block_size is the size for each individual feature - TIndex block_size = values.size_from_dim(dim_offset); + int64_t block_size = values.size_from_dim(dim_offset); auto values_vec = values.template data(); auto& lengths = Input(LENGTHS); @@ -47,7 +47,7 @@ class PackRNNSequenceOpBase : public Operator { math::Sum(cols, lengths_vec, &length_sum, &context_); } - vector shape; + vector shape; // the output shape is rows * cols for the pack, // or length_sum for the sequence if (Forward) { diff --git a/caffe2/operators/pack_segments.cc b/caffe2/operators/pack_segments.cc index ab831445e56e..9659d1eddd48 100644 --- a/caffe2/operators/pack_segments.cc +++ b/caffe2/operators/pack_segments.cc @@ -27,7 +27,7 @@ bool PackSegmentsOp::DoRunWithType2() { // Find the length of the longest sequence. const T* l = lengths.template data(); T max_length = 0; - TIndex total_length = 0; + int64_t total_length = 0; for (T i = 0; i < lengths.dim(0); ++i) { max_length = std::max(max_length, l[i]); total_length += l[i]; @@ -61,7 +61,7 @@ bool PackSegmentsOp::DoRunWithType2() { bool* presence_mask_data = nullptr; if (return_presence_mask_) { // Shape of presence is batch_size x max_len - std::vector presence_shape{lengths.size(), max_length}; + std::vector presence_shape{lengths.size(), max_length}; presence_mask->Resize(presence_shape); presence_mask_data = presence_mask->template mutable_data(); } @@ -86,8 +86,8 @@ bool PackSegmentsOp::DoRunWithType2() { auto block_size = data.size_from_dim(1); auto block_bytesize = data.itemsize() * block_size; const auto* d = static_cast(data.raw_data()); - TIndex start = 0; - for (TIndex i = 0; i < lengths.dim(0); ++i) { + int64_t start = 0; + for (int64_t i = 0; i < lengths.dim(0); ++i) { context_.CopyItemsSameDevice( data.meta(), l[i] * block_size, @@ -127,7 +127,7 @@ bool UnpackSegmentsOp::DoRunWithType2() { } const T* l = lengths.template data(); - TIndex total_l = std::accumulate(l, l + lengths.dim(0), (TIndex)0); + int64_t total_l = std::accumulate(l, l + lengths.dim(0), (int64_t)0); auto shape = data.dims(); CAFFE_ENFORCE_EQ( @@ -143,8 +143,8 @@ bool UnpackSegmentsOp::DoRunWithType2() { auto block_size = data.size_from_dim(2); auto block_bytesize = data.itemsize() * block_size; const auto* d = static_cast(data.raw_data()); - TIndex start = 0; - for (TIndex i = 0; i < lengths.dim(0); ++i) { + int64_t start = 0; + for (int64_t i = 0; i < lengths.dim(0); ++i) { context_.CopyItemsSameDevice( data.meta(), l[i] * block_size, diff --git a/caffe2/operators/pack_segments.h b/caffe2/operators/pack_segments.h index 8d16bb658c17..6315c5906512 100644 --- a/caffe2/operators/pack_segments.h +++ b/caffe2/operators/pack_segments.h @@ -45,7 +45,7 @@ class PackSegmentsOp final : public Operator { INPUT_TAGS(LENGTHS, DATA); private: - TIndex max_length_; + int64_t max_length_; bool pad_minf_; float padding_; bool return_presence_mask_; @@ -80,7 +80,7 @@ class UnpackSegmentsOp final : public Operator { INPUT_TAGS(LENGTHS, DATA); private: - TIndex max_length_; + int64_t max_length_; Tensor dev_buffer_{Context::GetDeviceType()}; Tensor dev_lengths_prefix_sum_{Context::GetDeviceType()}; Tensor dev_max_length_{Context::GetDeviceType()}; diff --git a/caffe2/operators/partition_ops.h b/caffe2/operators/partition_ops.h index 89762e387160..94bd1e6150ce 100644 --- a/caffe2/operators/partition_ops.h +++ b/caffe2/operators/partition_ops.h @@ -41,7 +41,7 @@ class GatherByKeyOp : public Operator { const auto& in0Shape = Input(1).dims(); CAFFE_ENFORCE_GE(in0Shape.size(), 1); - vector outShape(keysShape); + vector outShape(keysShape); outShape.insert(outShape.end(), in0Shape.begin() + 1, in0Shape.end()); CAFFE_ENFORCE_GE(outShape.size(), 1); @@ -122,10 +122,10 @@ class PartitionOpBase : public Operator { CAFFE_ENFORCE_GT(partitions, 0, "Invalid number of partitions"); auto& main_input = Input(mainInputIndex); - TIndex size = main_input.size(); + int64_t size = main_input.size(); const Index* data = main_input.template data(); counts_.assign(partitions, 0); - for (TIndex p = 0; p < size; p++) { + for (int64_t p = 0; p < size; p++) { int shard = moduloPartition(data[p], partitions); ++counts_[shard]; } @@ -158,7 +158,7 @@ class PartitionOpBase : public Operator { block_sizes_[i] = input.size_from_dim(main_input.ndim()); metas_[i] = input.meta(); // shape = partition_size + suffix of input dims - vector shape( + vector shape( input.dims().begin() + main_input.ndim() - 1, input.dims().end()); for (int j = 0; j < partitions; ++j) { int out_idx = i + j * inputSize; @@ -170,9 +170,9 @@ class PartitionOpBase : public Operator { } counts_.assign(partitions, 0); - for (TIndex p = 0; p < size; p++) { + for (int64_t p = 0; p < size; p++) { int shard = moduloPartition(data[p], partitions); - TIndex idx = counts_[shard]++; + int64_t idx = counts_[shard]++; // special case first input static_cast(out_datas_[shard * inputSize + mainInputIndex])[idx] = @@ -196,8 +196,8 @@ class PartitionOpBase : public Operator { bool pack_first_input_; // use member fields to reuse memory - vector counts_; - vector block_sizes_; + vector counts_; + vector block_sizes_; vector metas_; vector raw_datas_; vector out_datas_; @@ -268,11 +268,11 @@ class LengthsPartitionOp : public PartitionOpBase { // Compute lengths after sharding auto& main_input = Input(1); - TIndex size = main_input.size(); + int64_t size = main_input.size(); const Index* data = main_input.template data(); auto& length_input = Input(0); - TIndex elements = length_input.size(); + int64_t elements = length_input.size(); const int32_t* lengths_data = length_input.template data(); out_length_.resize(partitions); for (int i = 0; i < partitions; ++i) { diff --git a/caffe2/operators/perplexity_op.cc b/caffe2/operators/perplexity_op.cc index 028a6077cc86..24b4e4fed1b9 100644 --- a/caffe2/operators/perplexity_op.cc +++ b/caffe2/operators/perplexity_op.cc @@ -10,7 +10,7 @@ bool PerplexityOp::RunOnDevice() { DCHECK_EQ(X.ndim(), 1); int N = X.dim32(0); - Y->Resize(vector()); + Y->Resize(vector()); const auto* Xdata = X.data(); float perplexity = 1.0; diff --git a/caffe2/operators/perplexity_op.cu b/caffe2/operators/perplexity_op.cu index 230bdb1601cb..83261471259d 100644 --- a/caffe2/operators/perplexity_op.cu +++ b/caffe2/operators/perplexity_op.cu @@ -25,7 +25,7 @@ bool PerplexityOp::RunOnDevice() { DCHECK_EQ(X.ndim(), 1); int N = X.dim32(0); - Y->Resize(vector()); + Y->Resize(vector()); float* Ydata = Y->template mutable_data(); const float* Xdata = X.data(); diff --git a/caffe2/operators/piecewise_linear_transform_op.cu b/caffe2/operators/piecewise_linear_transform_op.cu index 8dc2d4e02285..1d3e8503efeb 100644 --- a/caffe2/operators/piecewise_linear_transform_op.cu +++ b/caffe2/operators/piecewise_linear_transform_op.cu @@ -103,14 +103,14 @@ __global__ void PieceWiseLinearTransformBinaryKernel2( template <> void PiecewiseLinearTransformOp::setUpTensors( - TIndex& num_func_per_group, - TIndex& num_group, - TIndex M) { + int64_t& num_func_per_group, + int64_t& num_group, + int64_t M) { if (transform_param_from_arg_) { if (!gpu_copied_) { - TIndex num_bounds; - TIndex num_slopes; - TIndex num_intercepts; + int64_t num_bounds; + int64_t num_slopes; + int64_t num_intercepts; CAFFE_ENFORCE_EQ(InputSize(), 1); @@ -162,9 +162,9 @@ void PiecewiseLinearTransformOp::setUpTensors( gpu_copied_ = true; } } else { - TIndex num_bounds; - TIndex num_slopes; - TIndex num_intercepts; + int64_t num_bounds; + int64_t num_slopes; + int64_t num_intercepts; CAFFE_ENFORCE_EQ(InputSize(), 4); auto& bounds_input = Input(BOUNDS); auto& slopes_input = Input(SLOPES); @@ -196,12 +196,12 @@ bool PiecewiseLinearTransformOp::TransformGeneral() { auto& X = Input(0); auto* Y = Output(0); CAFFE_ENFORCE_EQ(X.ndim(), 2); - TIndex N = X.dim32(0); - TIndex M = X.dim32(1); + int64_t N = X.dim32(0); + int64_t M = X.dim32(1); Y->ResizeLike(X); - TIndex num_func_per_group; - TIndex num_group; + int64_t num_func_per_group; + int64_t num_group; setUpTensors(num_func_per_group, num_group, M); @@ -228,15 +228,15 @@ bool PiecewiseLinearTransformOp::TransformBinary() { auto& X = Input(0); auto* Y = Output(0); CAFFE_ENFORCE(X.ndim() == 1 || X.ndim() == 2); - TIndex N = X.dim32(0); - TIndex M = X.ndim() == 2 ? X.dim32(1) : 1; + int64_t N = X.dim32(0); + int64_t M = X.ndim() == 2 ? X.dim32(1) : 1; CAFFE_ENFORCE( M == 1 || M == 2, "If binary is set to true, the input must be Nx2 or Nx1 tensor"); Y->ResizeLike(X); - TIndex num_func_per_group; - TIndex num_group; + int64_t num_func_per_group; + int64_t num_group; setUpTensors(num_func_per_group, num_group, M); diff --git a/caffe2/operators/piecewise_linear_transform_op.h b/caffe2/operators/piecewise_linear_transform_op.h index 1b552b0ae766..19edaba5ed9f 100644 --- a/caffe2/operators/piecewise_linear_transform_op.h +++ b/caffe2/operators/piecewise_linear_transform_op.h @@ -32,11 +32,11 @@ class PiecewiseLinearTransformOp final : public Operator { // num_group: The number of groups of linear functions. Each group is for // transforming one column of predictions. void InferNumFunctionsPerGroup( - const TIndex num_bounds, - const TIndex num_slopes, - const TIndex num_intercepts, - TIndex* num_func_per_group, - TIndex* num_group) { + const int64_t num_bounds, + const int64_t num_slopes, + const int64_t num_intercepts, + int64_t* num_func_per_group, + int64_t* num_group) { CAFFE_ENFORCE_EQ(num_slopes, num_intercepts); // This is based on the facts: @@ -54,10 +54,10 @@ class PiecewiseLinearTransformOp final : public Operator { bool CheckBoundsSorted( const T* bounds, - const TIndex num_bounds_per_group, - const TIndex num_group) { + const int64_t num_bounds_per_group, + const int64_t num_group) { const T* start = bounds; - for (TIndex i = 0; i < num_group; i++) { + for (int64_t i = 0; i < num_group; i++) { if (!std::is_sorted(start, start + num_bounds_per_group)) { return false; } @@ -77,8 +77,8 @@ class PiecewiseLinearTransformOp final : public Operator { good_param == 0 || good_param == 3, "bounds, slopes, intercepts must be all set or all not set"); if (good_param == 3) { - TIndex num_func_per_group; - TIndex num_group; + int64_t num_func_per_group; + int64_t num_group; InferNumFunctionsPerGroup( bounds_from_arg_.size(), slopes_from_arg_.size(), @@ -94,17 +94,17 @@ class PiecewiseLinearTransformOp final : public Operator { return good_param == 3; } - void setUpTensors(TIndex& num_func_per_group, TIndex& num_group, TIndex M); + void setUpTensors(int64_t& num_func_per_group, int64_t& num_group, int64_t M); void GetTransParamData( const T** bounds, const T** slopes, const T** intercepts, - TIndex* num_func_per_group, - TIndex* num_group) { - TIndex num_bounds; - TIndex num_slopes; - TIndex num_intercepts; + int64_t* num_func_per_group, + int64_t* num_group) { + int64_t num_bounds; + int64_t num_slopes; + int64_t num_intercepts; if (transform_param_from_arg_) { CAFFE_ENFORCE_EQ(InputSize(), 1); @@ -134,8 +134,8 @@ class PiecewiseLinearTransformOp final : public Operator { auto& X = Input(0); auto* Y = Output(0); CAFFE_ENFORCE_EQ(X.ndim(), 2); - TIndex N = X.dim32(0); - TIndex M = X.dim32(1); + int64_t N = X.dim32(0); + int64_t M = X.dim32(1); Y->ResizeLike(X); const auto* Xdata = X.template data(); T* Ydata = Y->template mutable_data(); @@ -143,17 +143,17 @@ class PiecewiseLinearTransformOp final : public Operator { const T* bounds; const T* slopes; const T* intercepts; - TIndex num_func_per_group; - TIndex num_group; + int64_t num_func_per_group; + int64_t num_group; GetTransParamData( &bounds, &slopes, &intercepts, &num_func_per_group, &num_group); CAFFE_ENFORCE_EQ(num_group, M); - for (TIndex j = 0; j < M; ++j) { + for (int64_t j = 0; j < M; ++j) { const T* bounds_group = bounds + j * (num_func_per_group + 1); const T* slopes_group = slopes + j * num_func_per_group; const T* intercepts_group = intercepts + j * num_func_per_group; - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { Ydata[i * M + j] = PiecewiseLinearTransform( Xdata[i * M + j], bounds_group, @@ -169,8 +169,8 @@ class PiecewiseLinearTransformOp final : public Operator { auto& X = Input(PREDICTIONS); auto* Y = Output(0); CAFFE_ENFORCE(X.ndim() == 1 || X.ndim() == 2); - TIndex N = X.dim32(0); - TIndex M = X.ndim() == 2 ? X.dim32(1) : 1; + int64_t N = X.dim32(0); + int64_t M = X.ndim() == 2 ? X.dim32(1) : 1; CAFFE_ENFORCE( M == 1 || M == 2, "If binary is set to true, the input must be Nx2 or Nx1 tensor"); @@ -181,19 +181,19 @@ class PiecewiseLinearTransformOp final : public Operator { const T* bounds; const T* slopes; const T* intercepts; - TIndex num_func_per_group; - TIndex num_group; + int64_t num_func_per_group; + int64_t num_group; GetTransParamData( &bounds, &slopes, &intercepts, &num_func_per_group, &num_group); CAFFE_ENFORCE_EQ(num_group, 1); if (M == 1) { - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { Ydata[i] = PiecewiseLinearTransform( Xdata[i], bounds, slopes, intercepts, num_func_per_group); } } else { - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { Ydata[i * M + 1] = PiecewiseLinearTransform( Xdata[i * M + 1], bounds, slopes, intercepts, num_func_per_group); Ydata[i * M] = 1.0f - Ydata[i * M + 1]; @@ -208,7 +208,7 @@ class PiecewiseLinearTransformOp final : public Operator { const T* bounds, const T* slopes, const T* intercepts, - const TIndex num_func_per_group) { + const int64_t num_func_per_group) { T y = 0; // deal with samples out of bounds // make it the same as the upper/lower bound value diff --git a/caffe2/operators/pool_op_cudnn.cu b/caffe2/operators/pool_op_cudnn.cu index eda02a220301..7552f5b29581 100644 --- a/caffe2/operators/pool_op_cudnn.cu +++ b/caffe2/operators/pool_op_cudnn.cu @@ -285,7 +285,7 @@ class CuDNNPoolOp : public ConvPoolOpBase { } protected: - vector cudnn_input_dims_; + vector cudnn_input_dims_; CuDNNWrapper cudnn_wrapper_; cudnnTensorDescriptor_t bottom_desc_; @@ -498,7 +498,7 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase { } protected: - vector cudnn_input_dims_; + vector cudnn_input_dims_; CuDNNWrapper cudnn_wrapper_; cudnnTensorDescriptor_t bottom_desc_; diff --git a/caffe2/operators/reducer_functors.h b/caffe2/operators/reducer_functors.h index 6d357e1b9f99..dd9a858e9c31 100644 --- a/caffe2/operators/reducer_functors.h +++ b/caffe2/operators/reducer_functors.h @@ -27,8 +27,8 @@ template class SumRangeReducer { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* in, T* out, CPUContext* /*context*/) { @@ -42,15 +42,15 @@ template class SumRangeReducerGradient { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* segment_grad, T* data_grad, const T* /*data_in*/, // unused const T* /*data_out*/, // unused Context* context) { // do we have some op that does it smartly with minimum number of memcpy? - for (TIndex i = 0; i < blocks; ++i) { + for (int64_t i = 0; i < blocks; ++i) { context->template CopySameDevice( block_size, segment_grad, data_grad + block_size * i); } @@ -78,8 +78,8 @@ template class LogSumExpRangeReducer { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* in, T* out, CPUContext* /*context*/) { @@ -102,8 +102,8 @@ template class LogSumExpRangeReducerGradient { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* segment_grad, // GO T* data_grad, // GI const T* data_in, // I @@ -140,8 +140,8 @@ template class LogMeanExpRangeReducer { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* in, T* out, CPUContext* /*context*/) { @@ -164,8 +164,8 @@ template class LogMeanExpRangeReducerGradient { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* segment_grad, // GO T* data_grad, // GI const T* data_in, // I @@ -202,8 +202,8 @@ template class MeanRangeReducer { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* in, T* out, CPUContext* /*context*/) { @@ -221,8 +221,8 @@ template class MeanRangeReducerGradient { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* segment_grad, // GO T* data_grad, // GI const T* /*data_in*/, // I @@ -261,8 +261,8 @@ template class MaxRangeReducer { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* in, T* out, CPUContext* /*context*/) { @@ -280,8 +280,8 @@ template class MaxRangeReducerGradient { public: void operator()( - const TIndex block_size, - const TIndex blocks, + const int64_t block_size, + const int64_t blocks, const T* segment_grad, // GO T* data_grad, // GI const T* data_in, // I @@ -329,13 +329,13 @@ class BaseReducer { static constexpr int kInputCount = 1; struct Meta { - TIndex block_size; - vector block_shape; + int64_t block_size; + vector block_shape; bool first_dim; explicit Meta(bool first = true) : first_dim(first) {} - void computeMeta(const std::vector& dims, int skip_dims) { + void computeMeta(const std::vector& dims, int skip_dims) { first_dim ? block_shape.assign(dims.begin() + skip_dims, dims.end()) : block_shape.assign(dims.begin(), dims.end() - skip_dims); block_size = first_dim ? size_from_dim_(skip_dims, dims) @@ -348,13 +348,13 @@ class BaseReducer { computeMeta(dims, skip_dims); } - void appendOutputShape(vector* output_shape) { + void appendOutputShape(vector* output_shape) { output_shape->insert( output_shape->end(), block_shape.begin(), block_shape.end()); } - vector getOutputShape(const TensorShape& in, int skip_dims) { - vector dims(in.dims().begin(), in.dims().end()); + vector getOutputShape(const TensorShape& in, int skip_dims) { + vector dims(in.dims().begin(), in.dims().end()); computeMeta(dims, skip_dims); return block_shape; } @@ -389,8 +389,8 @@ class BaseReducerGradient { } struct Meta { - TIndex block_size; - vector block_shape; + int64_t block_size; + vector block_shape; bool first_dim; Meta(const Tensor& out_grad, int skip_dims, bool first_dim = true) @@ -409,7 +409,7 @@ class BaseReducerGradient { Tensor* /*input_grad*/, // optional grad to populate int /*skip_dims*/) {} - void appendGradShape(vector* output_shape) { + void appendGradShape(vector* output_shape) { output_shape->insert( output_shape->end(), block_shape.begin(), block_shape.end()); } @@ -438,7 +438,7 @@ class SumReducer : public BaseReducer { void process( const Meta& meta, const T* in, - TIndex /*offset*/, + int64_t /*offset*/, CPUContext* context) { if (meta.first_dim) { math::AxpyFixedSize( @@ -469,7 +469,7 @@ class SumReducerGradient : public BaseReducerGradient { void fillGrad( const Meta& meta, T* data_grad, - TIndex offset, + int64_t offset, Context* context, const int length) { if (FixedSize == 1) { // static if @@ -535,7 +535,7 @@ class WeightedSumReducer : public BaseReducer { } template void - process(const Meta& meta, const T* in, TIndex offset, CPUContext* context) { + process(const Meta& meta, const T* in, int64_t offset, CPUContext* context) { CAFFE_ENFORCE( meta.first_dim, "WeightedSumReducer implemented only for " @@ -596,7 +596,7 @@ class WeightedSumReducerGradient : public BaseReducerGradient { void fillGrad( const Meta& meta, T* data_grad, - TIndex offset, + int64_t offset, Context* context, const int /*length*/) { math::ScaleFixedSize( @@ -610,7 +610,7 @@ class WeightedSumReducerGradient : public BaseReducerGradient { const Meta& meta, const T* data, T* data_grad, - TIndex offset, + int64_t offset, Context* context, const int /*length*/) { math::ScaleFixedSize( @@ -667,7 +667,7 @@ class MeanReducer : public BaseReducer { void process( const Meta& meta, const T* in, - TIndex /*offset*/, + int64_t /*offset*/, CPUContext* context) { if (meta.first_dim) { math::AxpyFixedSize( @@ -716,7 +716,7 @@ class MeanReducerGradient : public BaseReducerGradient { void fillGrad( const Meta& meta, T* data_grad, - TIndex offset, + int64_t offset, Context* context, const int length) { CAFFE_ENFORCE_GT(length, 0, "Segment length must be > 0"); @@ -765,7 +765,7 @@ class MaxReducer : public BaseReducer { void process( const Meta& meta, const T* in, - TIndex /*offset*/, + int64_t /*offset*/, CPUContext* context) { CAFFE_ENFORCE( meta.first_dim, @@ -810,10 +810,10 @@ class MaxReducerGradient : public BaseReducerGradient { const T* data, T* data_grad, const T* forward_output, - TIndex /*offset*/, + int64_t /*offset*/, Context* /*context*/, const int /*length*/) { - for (TIndex i = 0; i < meta.block_size; ++i) { + for (int64_t i = 0; i < meta.block_size; ++i) { data_grad[i] = data[i] == forward_output[i] ? s_grad_[i] : 0; } } diff --git a/caffe2/operators/reduction_front_back_ops.h b/caffe2/operators/reduction_front_back_ops.h index 3c66fa4fe202..98fa71c1831a 100644 --- a/caffe2/operators/reduction_front_back_ops.h +++ b/caffe2/operators/reduction_front_back_ops.h @@ -32,7 +32,7 @@ class SumReduceDimsOp final : public Operator { num_reduce_dims_ >= 0 && num_reduce_dims_ <= X.dims().size(), "For N-dim input tensor, support num_reduce_dims in range [0, N]."); - vector output_shape; + vector output_shape; int start_index = FIRSTDIMS ? num_reduce_dims_ : 0; int end_index = FIRSTDIMS ? X.dims().size() : X.dims().size() - num_reduce_dims_; @@ -109,13 +109,13 @@ class SumReduceDimsGradientOp final : public Operator { // the shape of the input to the data tensor. This made the backward // computation incompatible with old models. To fix this, we check // the dimension and type of Input(1). - if (input_1.ndim() == 1 && input_1.template IsType()) { + if (input_1.ndim() == 1 && input_1.template IsType()) { // Input(1) is the shape of the input shape_.CopyFrom(input_1); // Copy first dims - vector output_shape( - shape_.template data(), - shape_.template data() + shape_.size()); + vector output_shape( + shape_.template data(), + shape_.template data() + shape_.size()); dX->Resize(output_shape); } else { // Input(1) is data tensor X @@ -183,7 +183,7 @@ class MaxReduceDimsOp final : public Operator { const int cols = FIRSTDIMS ? X.size_from_dim(num_reduce_dims_) : X.size_from_dim(X.ndim() - num_reduce_dims_); - vector output_shape; + vector output_shape; int start_index = FIRSTDIMS ? num_reduce_dims_ : 0; int end_index = FIRSTDIMS ? X.dims().size() : X.dims().size() - num_reduce_dims_; diff --git a/caffe2/operators/reduction_ops.h b/caffe2/operators/reduction_ops.h index 90ac549934c2..6c867ba6c3e8 100644 --- a/caffe2/operators/reduction_ops.h +++ b/caffe2/operators/reduction_ops.h @@ -24,7 +24,7 @@ class SumElementsOp : public Operator { bool RunOnDevice() override { auto& X = Input(0); auto* sum = Output(0); - sum->Resize(vector()); + sum->Resize(vector()); T* data = sum->template mutable_data(); @@ -58,7 +58,7 @@ class SumElementsIntOp : public Operator { bool RunOnDevice() override { auto& X = Input(0); auto* sum = Output(0); - sum->Resize(vector()); + sum->Resize(vector()); T* data = sum->template mutable_data(); math::Sum( X.size(), X.template data(), data, &context_, &scratch_); @@ -105,7 +105,7 @@ class SumSqrElementsOp : public Operator { bool average = this->template GetSingleArgument("average", false); auto& X = Input(0); auto* sum = Output(0); - sum->Resize(vector()); + sum->Resize(vector()); math::SumSqr( X.size(), X.template data(), diff --git a/caffe2/operators/replace_nan_op.cc b/caffe2/operators/replace_nan_op.cc index a0c7b271567f..0e8193a0d2ed 100644 --- a/caffe2/operators/replace_nan_op.cc +++ b/caffe2/operators/replace_nan_op.cc @@ -6,10 +6,10 @@ template <> template void ReplaceNaNOp::ReplaceNaN( const T& value, - const TIndex size, + const int64_t size, const T* X, T* Y) { - for (TIndex i = 0; i < size; i++) { + for (int64_t i = 0; i < size; i++) { if (std::isnan(X[i])) { Y[i] = value; } else { diff --git a/caffe2/operators/replace_nan_op.cu b/caffe2/operators/replace_nan_op.cu index e84fb3e62cf9..f6cdbed5b35d 100644 --- a/caffe2/operators/replace_nan_op.cu +++ b/caffe2/operators/replace_nan_op.cu @@ -6,7 +6,7 @@ namespace caffe2 { namespace { template __global__ void -replace_nan_kernel(const T value, const TIndex size, const T* X, T* Y) { +replace_nan_kernel(const T value, const int64_t size, const T* X, T* Y) { CUDA_1D_KERNEL_LOOP(i, size) { if (isnan(X[i])) { Y[i] = value; @@ -21,7 +21,7 @@ template <> template void ReplaceNaNOp::ReplaceNaN( const T& value, - const TIndex size, + const int64_t size, const T* X, T* Y) { replace_nan_kernel<<< diff --git a/caffe2/operators/replace_nan_op.h b/caffe2/operators/replace_nan_op.h index 6256aef807a9..ee62c45a6d28 100644 --- a/caffe2/operators/replace_nan_op.h +++ b/caffe2/operators/replace_nan_op.h @@ -20,7 +20,7 @@ class ReplaceNaNOp final : public Operator { } template - void ReplaceNaN(const T& value, const TIndex size, const T* X, T* Y); + void ReplaceNaN(const T& value, const int64_t size, const T* X, T* Y); template bool DoRunWithType() { diff --git a/caffe2/operators/reshape_op_gpu_test.cc b/caffe2/operators/reshape_op_gpu_test.cc index 3786e0a4245f..3537ab69d058 100644 --- a/caffe2/operators/reshape_op_gpu_test.cc +++ b/caffe2/operators/reshape_op_gpu_test.cc @@ -12,7 +12,7 @@ CAFFE2_DECLARE_string(caffe_test_root); namespace caffe2 { static void AddConstInput( - const vector& shape, + const vector& shape, const float value, const string& name, Workspace* ws) { @@ -39,7 +39,7 @@ TEST(ReshapeOpGPUTest, testReshapeWithScalar) { def.add_output("OldShape"); def.add_arg()->CopyFrom(MakeArgument("shape", vector{1})); def.mutable_device_option()->set_device_type(PROTO_CUDA); - AddConstInput(vector(), 3.14, "X", &ws); + AddConstInput(vector(), 3.14, "X", &ws); // execute the op unique_ptr op(CreateOperator(def, &ws)); EXPECT_TRUE(op->Run()); diff --git a/caffe2/operators/reverse_packed_segs_op.h b/caffe2/operators/reverse_packed_segs_op.h index f0bdbcf48211..6c79f1755863 100644 --- a/caffe2/operators/reverse_packed_segs_op.h +++ b/caffe2/operators/reverse_packed_segs_op.h @@ -63,10 +63,10 @@ class ReversePackedSegsOp final : public Operator { context_.FinishDeviceComputation(); T* rev_data_ptr = output->template mutable_data(); - for (TIndex i = 0; i < batch_size; i++) { + for (int64_t i = 0; i < batch_size; i++) { const auto& seg_length = lengths_host[i]; CAFFE_ENFORCE_LE(seg_length, max_length); - TIndex j = 0; + int64_t j = 0; for (; j < seg_length; j++) { const T* data_block_ptr = data_ptr + (j * batch_size + i) * block_size; T* rev_data_block_ptr = diff --git a/caffe2/operators/rnn/hip/recurrent_op_miopen.h b/caffe2/operators/rnn/hip/recurrent_op_miopen.h index 13dc3abff1d5..8a861d38def5 100644 --- a/caffe2/operators/rnn/hip/recurrent_op_miopen.h +++ b/caffe2/operators/rnn/hip/recurrent_op_miopen.h @@ -56,7 +56,7 @@ class RecurrentBaseOp : public Operator { std::unique_ptr> xDesc_; std::unique_ptr> yDesc_; - std::vector cachedInputDims_; + std::vector cachedInputDims_; size_t reserveNbytes_; size_t miopenWsNbytes_; diff --git a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h index 86b7a69fe0d1..98675cea858d 100644 --- a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h +++ b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h @@ -31,7 +31,7 @@ class RecurrentNetworkBlobFetcherOp final : public Operator { std::vector blob_names_vector = {}; - for (TIndex i = 0; i < stepWorkspaces.size(); i++) { + for (int64_t i = 0; i < stepWorkspaces.size(); i++) { Workspace* currentStepWorkspace = stepWorkspaces[i].get(); std::vector blob_names = currentStepWorkspace->LocalBlobs(); diff --git a/caffe2/operators/rnn/recurrent_network_op.h b/caffe2/operators/rnn/recurrent_network_op.h index 00595198b6db..2421bc44263a 100644 --- a/caffe2/operators/rnn/recurrent_network_op.h +++ b/caffe2/operators/rnn/recurrent_network_op.h @@ -900,7 +900,7 @@ class RNNApplyLinkOp : public Operator { auto* external_out = Output(1); CAFFE_ENFORCE_GT(external.size(), 0); - const TIndex externalTimestepSize = external.size() / external.dim(0); + const int64_t externalTimestepSize = external.size() / external.dim(0); auto* externalData = external_out->template mutable_data() + (t + offset_) * externalTimestepSize; auto internalDims = external_out->dims(); diff --git a/caffe2/operators/rnn/recurrent_op_cudnn.h b/caffe2/operators/rnn/recurrent_op_cudnn.h index 5c70b5262029..f3947901b619 100644 --- a/caffe2/operators/rnn/recurrent_op_cudnn.h +++ b/caffe2/operators/rnn/recurrent_op_cudnn.h @@ -56,7 +56,7 @@ class RecurrentBaseOp : public Operator { std::unique_ptr> xDesc_; std::unique_ptr> yDesc_; - std::vector cachedInputDims_; + std::vector cachedInputDims_; size_t reserveNbytes_; size_t cudnnWsNbytes_; diff --git a/caffe2/operators/roi_align_op_gpu_test.cc b/caffe2/operators/roi_align_op_gpu_test.cc index eca60a6006a2..2647a97d6f0b 100644 --- a/caffe2/operators/roi_align_op_gpu_test.cc +++ b/caffe2/operators/roi_align_op_gpu_test.cc @@ -12,7 +12,7 @@ namespace { template void AddConstInput( - const vector& shape, + const vector& shape, const float value, const string& name, Context* context, @@ -27,14 +27,14 @@ void AddConstInput( template void AddInput( - const vector& shape, + const vector& shape, const vector& values, const string& name, Workspace* ws); template <> void AddInput( - const vector& shape, + const vector& shape, const vector& values, const string& name, Workspace* ws) { @@ -48,7 +48,7 @@ void AddInput( template <> void AddInput( - const vector& shape, + const vector& shape, const vector& values, const string& name, Workspace* ws) { @@ -102,10 +102,10 @@ void CreateAndRun( vector features(N * C * H * W); std::iota(features.begin(), features.end(), 0); // utils::AsEArrXt(features) /= features.size(); - AddInput(vector{N, C, H, W}, features, "X", &ws); + AddInput(vector{N, C, H, W}, features, "X", &ws); const int n_rois = test_params.n_rois; const vector& rois = test_params.rois_array; - AddInput(vector{n_rois, 5}, rois, "R", &ws); + AddInput(vector{n_rois, 5}, rois, "R", &ws); } else { const int N = 2; const int C = 3; @@ -114,7 +114,7 @@ void CreateAndRun( vector features(N * C * H * W); std::iota(features.begin(), features.end(), 0); // utils::AsEArrXt(features) /= features.size(); - AddInput(vector{N, C, H, W}, features, "X", &ws); + AddInput(vector{N, C, H, W}, features, "X", &ws); vector rois{0, 0, 0, 79, 59, 0, 0, 5.0005703f, 52.63237f, 43.69501495f, 0, 24.13628387f, 7.51243401f, 79, 46.06628418f, @@ -124,7 +124,7 @@ void CreateAndRun( 0, 23.57396317f, 29.98791885f, 79, 59, 0, 0, 41.90219116f, 79, 59, 0, 0, 23.30098343f, 79, 59}; - AddInput(vector{9, 5}, rois, "R", &ws); + AddInput(vector{9, 5}, rois, "R", &ws); } std::vector> ops; diff --git a/caffe2/operators/segment_reduction_op.h b/caffe2/operators/segment_reduction_op.h index fa0f64f1eca2..9e7ab6d60401 100644 --- a/caffe2/operators/segment_reduction_op.h +++ b/caffe2/operators/segment_reduction_op.h @@ -19,7 +19,7 @@ class BaseInputAccessor { } inline const TData* - getBlockPtr(TIndex in_block_size, TIndex idx, TIndex /* blocks */ = 1) { + getBlockPtr(int64_t in_block_size, int64_t idx, int64_t /* blocks */ = 1) { return static_cast(data_) + in_block_size * idx; } @@ -82,12 +82,12 @@ class AbstractSortedSegmentRangeOp : public Operator { return true; } - TIndex block_size = dataInput.size() / N; + int64_t block_size = dataInput.size() / N; // Assume the segments are sorted and there are no gaps CAFFE_ENFORCE_EQ(0, s_ids[0], "Indices must be sorted and not have gaps"); - for (TIndex i = 0; i < N;) { - TIndex start = i; + for (int64_t i = 0; i < N;) { + int64_t start = i; for (++i; i < N && s_ids[start] == s_ids[i]; ++i) ; @@ -135,7 +135,7 @@ class AbstractSortedSegmentRangeGradientOp : public Operator { auto* data_grads = Output(0); CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex N = segment_ids.dim(0); + int64_t N = segment_ids.dim(0); const SIndex* s_ids = segment_ids.template data(); const T* s_grads = segment_grads.template data(); @@ -153,15 +153,15 @@ class AbstractSortedSegmentRangeGradientOp : public Operator { return true; } - TIndex block_size = segment_grads.size_from_dim(1); + int64_t block_size = segment_grads.size_from_dim(1); // Assume the segments are sorted and there are no gaps CAFFE_ENFORCE_EQ(0, s_ids[0], "Indices must be sorted and not have gaps"); // repeat the check from forward op CAFFE_ENFORCE_EQ( K - 1, s_ids[N - 1], "Indices must be sorted and not have gaps"); - for (TIndex i = 0; i < N;) { - TIndex start = i; + for (int64_t i = 0; i < N;) { + int64_t start = i; for (++i; i < N && s_ids[start] == s_ids[i]; ++i) ; @@ -292,7 +292,7 @@ class AbstractReduceFrontOrBackOp : public Operator { auto& data = Input(0); // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex in_block_size = FirstDim + int64_t in_block_size = FirstDim ? data.size_from_dim(num_reduce_dims_) : data.size_to_dim(data.ndim() - num_reduce_dims_); return DispatchHelper::call( @@ -319,7 +319,7 @@ class AbstractReduceFrontOrBackOp : public Operator { data.meta().name(), "."); - vector shape; + vector shape; ctx.appendOutputShape(&shape); output->Resize(shape); @@ -332,7 +332,7 @@ class AbstractReduceFrontOrBackOp : public Operator { const int num_blocks = block_size > 0 ? data.size() / block_size : 0; Reducer r(ctx, out, &context_); - for (TIndex i = 0; i < num_blocks; ++i) { + for (int64_t i = 0; i < num_blocks; ++i) { r.template process( ctx, inputAccessor_.getBlockPtr(block_size, i), i, &context_); } @@ -365,7 +365,7 @@ class AbstractReduceFrontOrBackGradientOp : public Operator { bool RunOnDevice() override { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex grad_block_size = Input(REDUCTION_GRAD).size(); + int64_t grad_block_size = Input(REDUCTION_GRAD).size(); return DispatchHelper::call( this, grad_block_size); } @@ -391,21 +391,21 @@ class AbstractReduceFrontOrBackGradientOp : public Operator { CAFFE_ENFORCE_LE(num_reduce_dims_, source_shape.size()); - vector shape( - source_shape.template data(), - source_shape.template data() + source_shape.size()); + vector shape( + source_shape.template data(), + source_shape.template data() + source_shape.size()); data_grads->Resize(shape); - TIndex block_size = FirstDim + int64_t block_size = FirstDim ? data_grads->size_from_dim(num_reduce_dims_) : data_grads->size_from_dim(data_grads->ndim() - num_reduce_dims_); - TIndex block_num = block_size > 0 ? data_grads->size() / block_size : 0; + int64_t block_num = block_size > 0 ? data_grads->size() / block_size : 0; T* out = data_grads->template mutable_data(); ReducerGradient r(ctx, r_grad, &context_); - for (TIndex i = 0; i < block_num; ++i) { + for (int64_t i = 0; i < block_num; ++i) { r.template fillGrad( ctx, out + block_size * i, @@ -447,7 +447,7 @@ UnsortedSegment{op} but as if all input slices belong to a single segment. ArgumentHelper helper(def); int num_reduce_dims = helper.GetSingleArgument("num_reduce_dim", 1); typename ReducerDef::template Reducer::Meta ctx(true); - vector out_dims = ctx.getOutputShape(in[0], num_reduce_dims); + vector out_dims = ctx.getOutputShape(in[0], num_reduce_dims); return vector{ CreateTensorShape(out_dims, in[0].data_type())}; }); @@ -514,7 +514,7 @@ UnsortedSegment{op} but as if all input slices belong to a single segment. ArgumentHelper helper(def); int num_reduce_dims = helper.GetSingleArgument("num_reduce_dim", 1); typename ReducerDef::template Reducer::Meta ctx(false); - vector out_dims = ctx.getOutputShape(in[0], num_reduce_dims); + vector out_dims = ctx.getOutputShape(in[0], num_reduce_dims); return vector{ CreateTensorShape(out_dims, in[0].data_type())}; }); @@ -601,7 +601,7 @@ class AbstractSortedSegmentOp : public Operator { this, Input(INDICES)); } else { // type doesn't matter - return DoRunWithType(); + return DoRunWithType(); } } @@ -609,7 +609,7 @@ class AbstractSortedSegmentOp : public Operator { bool DoRunWithType() { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex in_block_size = Input(0).size_from_dim(1); + int64_t in_block_size = Input(0).size_from_dim(1); return DispatchHelper::call( this, in_block_size); } @@ -621,8 +621,8 @@ class AbstractSortedSegmentOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex N = segment_ids.dim(0); - const TIndex M = dataInput.dim(0); + int64_t N = segment_ids.dim(0); + const int64_t M = dataInput.dim(0); const IndexType* idxs; if (SparseFused) { // static if @@ -662,7 +662,7 @@ class AbstractSortedSegmentOp : public Operator { const SIndex* s_ids = segment_ids.template data(); const SIndex K = N > 0 ? s_ids[N - 1] + 1 : 0; - vector shape; + vector shape; shape.push_back(K); ctx.appendOutputShape(&shape); output->Resize(shape); @@ -671,13 +671,13 @@ class AbstractSortedSegmentOp : public Operator { if (N == 0) { return true; } - TIndex in_block_size = dataInput.size_from_dim(1); - TIndex out_block_size = output->size_from_dim(1); + int64_t in_block_size = dataInput.size_from_dim(1); + int64_t out_block_size = output->size_from_dim(1); // Assume the segments are sorted and there are no gaps CAFFE_ENFORCE_EQ(0, s_ids[0], "Indices must be sorted and not have gaps"); - for (TIndex i = 0; i < N;) { - TIndex start = i; + for (int64_t i = 0; i < N;) { + int64_t start = i; Reducer r(ctx, out + out_block_size * s_ids[start], &context_); for (; i < N && s_ids[start] == s_ids[i]; ++i) { @@ -730,7 +730,7 @@ class AbstractSortedSegmentGradientOp : public Operator { bool RunOnDevice() override { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1); + int64_t grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1); return DispatchHelper::call( this, grad_block_size); } @@ -742,7 +742,7 @@ class AbstractSortedSegmentGradientOp : public Operator { auto* data_grads = Output(0); CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex N = segment_ids.dim(0); + int64_t N = segment_ids.dim(0); typename ReducerGradient::Meta ctx(segment_grads, 1); for (int i = 0; i < ReducerGradient::originalInputs().size(); ++i) { @@ -760,14 +760,14 @@ class AbstractSortedSegmentGradientOp : public Operator { const SIndex* s_ids = segment_ids.template data(); const T* s_grads = segment_grads.template data(); - vector shape; + vector shape; shape.push_back(N); ctx.appendGradShape(&shape); data_grads->Resize(shape); - TIndex d_block_size = data_grads->size_from_dim(1); + int64_t d_block_size = data_grads->size_from_dim(1); const SIndex K = segment_grads.dim(0); - TIndex s_block_size = segment_grads.size_from_dim(1); + int64_t s_block_size = segment_grads.size_from_dim(1); T* out = data_grads->template mutable_data(); if (N == 0) { @@ -779,9 +779,9 @@ class AbstractSortedSegmentGradientOp : public Operator { // repeat the check from forward op CAFFE_ENFORCE_EQ( K - 1, s_ids[N - 1], "Indices must be sorted and not have gaps"); - for (TIndex i = 0; i < N;) { - TIndex start = i; - TIndex end = start; + for (int64_t i = 0; i < N;) { + int64_t start = i; + int64_t end = start; if (ReducerGradient::computeLength()) { for (; end < N && s_ids[start] == s_ids[end]; ++end) { @@ -1005,7 +1005,7 @@ class AbstractUnsortedSegmentOp : public Operator { this, Input(INDICES)); } else { // type doesn't matter - return DoRunWithType(); + return DoRunWithType(); } } @@ -1013,7 +1013,7 @@ class AbstractUnsortedSegmentOp : public Operator { bool DoRunWithType() { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex in_block_size = Input(0).size_from_dim(1); + int64_t in_block_size = Input(0).size_from_dim(1); return DispatchHelper::call( this, in_block_size); } @@ -1025,8 +1025,8 @@ class AbstractUnsortedSegmentOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex N = segment_ids.dim(0); - const TIndex M = data.dim(0); + int64_t N = segment_ids.dim(0); + const int64_t M = data.dim(0); const IndexType* idxs; if (SparseFused) { // static if @@ -1070,27 +1070,27 @@ class AbstractUnsortedSegmentOp : public Operator { K = num_segments_; } else { K = 0; - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { K = std::max(K, s_ids[i] + 1); } } - vector shape; + vector shape; shape.push_back(K); ctx.appendOutputShape(&shape); output->Resize(shape); - TIndex in_block_size = data.size_from_dim(1); - TIndex out_block_size = output->size_from_dim(1); + int64_t in_block_size = data.size_from_dim(1); + int64_t out_block_size = output->size_from_dim(1); T* out = output->template mutable_data(); reducers_.clear(); reducers_.reserve(K); - for (TIndex i = 0; i < K; ++i) { + for (int64_t i = 0; i < K; ++i) { reducers_.emplace_back(ctx, out + out_block_size * i, &context_); } - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { auto s_id = s_ids[i]; CAFFE_ENFORCE( 0 <= s_id && s_id < K, @@ -1114,7 +1114,7 @@ class AbstractUnsortedSegmentOp : public Operator { ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_); } - for (TIndex i = 0; i < K; ++i) { + for (int64_t i = 0; i < K; ++i) { reducers_[i].template finish(ctx, &context_); } // call reducers destructors (if there is any) @@ -1130,7 +1130,7 @@ class AbstractUnsortedSegmentOp : public Operator { static constexpr int kNumInputs = Reducer::kInputCount + kSelfInputs; private: - TIndex num_segments_; + int64_t num_segments_; // member field to reuse memory vector reducers_; InputAccessor inputAccessor_; @@ -1146,7 +1146,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator { bool RunOnDevice() override { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1); + int64_t grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1); return DispatchHelper::call( this, grad_block_size); } @@ -1158,7 +1158,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator { auto* data_grads = Output(0); CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex N = segment_ids.dim(0); + int64_t N = segment_ids.dim(0); typename ReducerGradient::Meta ctx(segment_grads, 1); for (int i = 0; i < ReducerGradient::originalInputs().size(); ++i) { @@ -1176,14 +1176,14 @@ class AbstractUnsortedSegmentGradientOp : public Operator { const SIndex* s_ids = segment_ids.template data(); const T* s_grads = segment_grads.template data(); - vector shape; + vector shape; shape.push_back(N); ctx.appendGradShape(&shape); data_grads->Resize(shape); - TIndex d_block_size = data_grads->size_from_dim(1); + int64_t d_block_size = data_grads->size_from_dim(1); const SIndex K = segment_grads.dim(0); - TIndex s_block_size = segment_grads.size_from_dim(1); + int64_t s_block_size = segment_grads.size_from_dim(1); T* out = data_grads->template mutable_data(); if (ReducerGradient::computeLength()) { @@ -1206,7 +1206,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator { reducers_.emplace_back(ctx, s_grads + s_block_size * i, &context_); } - for (TIndex i = 0; i < N; ++i) { + for (int64_t i = 0; i < N; ++i) { auto s_id = s_ids[i]; if (ReducerGradient::computeLength()) { reducers_[s_id].template fillGrad( @@ -1399,7 +1399,7 @@ class AbstractLengthsOp : public Operator { this, Input(INDICES)); } else { // type doesn't matter - return DoRunWithType(); + return DoRunWithType(); } } @@ -1407,7 +1407,7 @@ class AbstractLengthsOp : public Operator { bool DoRunWithType() { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex in_block_size = Input(0).size_from_dim(1); + int64_t in_block_size = Input(0).size_from_dim(1); return DispatchHelper::call( this, in_block_size); } @@ -1419,10 +1419,10 @@ class AbstractLengthsOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex dataSize = dataInput.dim(0); + const int64_t dataSize = dataInput.dim(0); // Either first dim the data or how much we pull in indexies from it - TIndex dataToReduceSize; - const TIndex outputSize = lengthsInput.dim(0); + int64_t dataToReduceSize; + const int64_t outputSize = lengthsInput.dim(0); const IndexType* indices; if (SparseFused) { // static if @@ -1454,18 +1454,18 @@ class AbstractLengthsOp : public Operator { dataInput.meta().name(), "."); - vector shape{outputSize}; + vector shape{outputSize}; ctx.appendOutputShape(&shape); output->Resize(shape); - TIndex in_block_size = dataInput.size_from_dim(1); - TIndex out_block_size = output->size_from_dim(1); + int64_t in_block_size = dataInput.size_from_dim(1); + int64_t out_block_size = output->size_from_dim(1); TData* out = output->template mutable_data(); - TIndex dataIndex = 0; - for (TIndex rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) { + int64_t dataIndex = 0; + for (int64_t rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) { Reducer reducer(ctx, out + out_block_size * rangeIndex, &context_); - for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex]; + for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex]; ++dataIndex) { IndexType idx; if (SparseFused) { // static if @@ -1536,7 +1536,7 @@ class AbstractLengthsGradientOp : public Operator { bool RunOnDevice() override { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex gradBlockSize = Input(SEGMENT_GRADS).size_from_dim(1); + int64_t gradBlockSize = Input(SEGMENT_GRADS).size_from_dim(1); return DispatchHelper::call( this, gradBlockSize); } @@ -1548,12 +1548,12 @@ class AbstractLengthsGradientOp : public Operator { auto* dataGradsOutput = Output(0); CAFFE_ENFORCE(lengthsInput.ndim() == 1, "LENGTHS must be a vector"); - TIndex reducedDataSize = 0; - TIndex numSegments = lengthsInput.dim(0); + int64_t reducedDataSize = 0; + int64_t numSegments = lengthsInput.dim(0); CAFFE_ENFORCE(segmentGradsInput.ndim() > 0); CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0)); const TLengths* lengths = lengthsInput.template data(); - for (TIndex i = 0; i < numSegments; ++i) { + for (int64_t i = 0; i < numSegments; ++i) { reducedDataSize += lengths[i]; } @@ -1572,20 +1572,20 @@ class AbstractLengthsGradientOp : public Operator { const T* segmentGrads = segmentGradsInput.template data(); - vector shape; + vector shape; shape.push_back(reducedDataSize); ctx.appendGradShape(&shape); dataGradsOutput->Resize(shape); - TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1); - TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1); + int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1); + int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1); T* dataGrads = dataGradsOutput->template mutable_data(); - TIndex dataIndex = 0; - for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { + int64_t dataIndex = 0; + for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { ReducerGradient reducer( ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_); - for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex]; + for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex]; ++dataIndex) { reducer.template fillGrad( ctx, @@ -1633,7 +1633,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator { this, Input(INDICES)); } else { // type doesn't matter - return DoRunWithType(); + return DoRunWithType(); } } @@ -1641,7 +1641,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator { bool DoRunWithType() { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class - TIndex in_block_size = Input(SEGMENT_GRADS).size_from_dim(1); + int64_t in_block_size = Input(SEGMENT_GRADS).size_from_dim(1); return DispatchHelper:: call(this, in_block_size); } @@ -1654,7 +1654,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator { auto* dataGradsOutput = Output(0); CAFFE_ENFORCE(lengthsInput.ndim() == 1, "LENGTHS must be a vector"); - TIndex numSegments = lengthsInput.dim(0); + int64_t numSegments = lengthsInput.dim(0); CAFFE_ENFORCE(segmentGradsInput.ndim() > 0); CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0)); const TLengths* lengths = lengthsInput.template data(); @@ -1668,7 +1668,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator { } // Either first dim the data or how much we pull in indexies from it - TIndex dataToReduceSize; + int64_t dataToReduceSize; const IndexType* indices = nullptr; if (SparseFused) { // static if auto& indicesInput = Input(INDICES); @@ -1680,22 +1680,22 @@ class AbstractLengthsWithMainInputGradientOp : public Operator { const T* segmentGrads = segmentGradsInput.template data(); - vector shape; + vector shape; shape.push_back(dataToReduceSize); ctx.appendGradShape(&shape); dataGradsOutput->Resize(shape); - TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1); - TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1); + int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1); + int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1); T* dataGrads = dataGradsOutput->template mutable_data(); const T* data = dataInput.template data(); - TIndex dataIndex = 0; - for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { + int64_t dataIndex = 0; + for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { ReducerGradient reducer( ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_); - for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex]; + for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex]; ++dataIndex) { IndexType data_pos; // No range checking, should've been verified in forward pass @@ -1743,7 +1743,7 @@ class AbstractLengthsWithMainInputAndForwardOutputGradientOp bool RunOnDevice() override { // If more complicated fixed size logic becomes necessary, it can be moved // to the reducer class. - TIndex in_block_size = Input(SEGMENT_GRADS).size_from_dim(1); + int64_t in_block_size = Input(SEGMENT_GRADS).size_from_dim(1); return DispatchHelper::call( this, in_block_size); } @@ -1757,7 +1757,7 @@ class AbstractLengthsWithMainInputAndForwardOutputGradientOp auto* dataGradsOutput = Output(0); CAFFE_ENFORCE(lengthsInput.ndim() == 1, "LENGTHS must be a vector"); - TIndex numSegments = lengthsInput.dim(0); + int64_t numSegments = lengthsInput.dim(0); CAFFE_ENFORCE(segmentGradsInput.ndim() > 0); CAFFE_ENFORCE(numSegments == segmentGradsInput.dim(0)); const TLengths* lengths = lengthsInput.template data(); @@ -1774,26 +1774,26 @@ class AbstractLengthsWithMainInputAndForwardOutputGradientOp CAFFE_ENFORCE(numSegments == forwardOutputInput.dim(0)); const T* forwardOutput = forwardOutputInput.template data(); - TIndex dataToReduceSize = dataInput.dim(0); + int64_t dataToReduceSize = dataInput.dim(0); const T* segmentGrads = segmentGradsInput.template data(); - vector shape; + vector shape; shape.push_back(dataToReduceSize); ctx.appendGradShape(&shape); dataGradsOutput->Resize(shape); - TIndex dataGradsBlockSize = dataGradsOutput->size_from_dim(1); - TIndex segmentBlockSize = segmentGradsInput.size_from_dim(1); + int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1); + int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1); T* dataGrads = dataGradsOutput->template mutable_data(); const T* data = dataInput.template data(); - TIndex dataIndex = 0; - for (TIndex rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { + int64_t dataIndex = 0; + for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) { ReducerGradient reducer( ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_); - for (TIndex start = dataIndex; dataIndex < start + lengths[rangeIndex]; + for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex]; ++dataIndex) { // No range checking, should've been verified in forward pass reducer.template fillGradWithMainInputAndForwardOutput( diff --git a/caffe2/operators/segment_reduction_op_gpu.cu b/caffe2/operators/segment_reduction_op_gpu.cu index 6eec2deba9ce..377ca8fb5a03 100644 --- a/caffe2/operators/segment_reduction_op_gpu.cu +++ b/caffe2/operators/segment_reduction_op_gpu.cu @@ -433,10 +433,10 @@ class CUDASparseLengthsSumOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex dataSize = dataInput.dim(0); + const int64_t dataSize = dataInput.dim(0); // Either first dim the data or how much we pull in indexies from it - TIndex dataToReduceSize; - const TIndex outputSize = lengthsInput.dim(0); + int64_t dataToReduceSize; + const int64_t outputSize = lengthsInput.dim(0); const int len_length = outputSize; auto shape = dataInput.dims(); @@ -554,10 +554,10 @@ class CUDASparseLengthsMeanOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex dataSize = dataInput.dim(0); + const int64_t dataSize = dataInput.dim(0); // Either first dim the data or how much we pull in indexies from it - TIndex dataToReduceSize; - const TIndex outputSize = lengthsInput.dim(0); + int64_t dataToReduceSize; + const int64_t outputSize = lengthsInput.dim(0); const int len_length = outputSize; auto shape = dataInput.dims(); @@ -676,10 +676,10 @@ class CUDASparseLengthsMaxOp : public Operator { auto* output = Output(0); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex dataSize = dataInput.dim(0); + const int64_t dataSize = dataInput.dim(0); // Either first dim the data or how much we pull in indexies from it - TIndex dataToReduceSize; - const TIndex outputSize = lengthsInput.dim(0); + int64_t dataToReduceSize; + const int64_t outputSize = lengthsInput.dim(0); int len_length = outputSize; auto shape = dataInput.dims(); @@ -810,10 +810,10 @@ class CUDASparseLengthsWeightedSumOp : public Operator { CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector"); CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector"); - const TIndex dataSize = dataInput.dim(0); + const int64_t dataSize = dataInput.dim(0); // Either first dim the data or how much we pull in indexies from it - const TIndex dataToReduceSize = indicesInput.dim(0); - const TIndex outputSize = lengthsInput.dim(0); + const int64_t dataToReduceSize = indicesInput.dim(0); + const int64_t outputSize = lengthsInput.dim(0); const int len_length = outputSize; auto shape = dataInput.dims(); @@ -954,7 +954,7 @@ class CUDAUnsortedSegmentSumOp : public Operator { } CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector"); - TIndex slize_sz = data.size_from_dim(1); + int64_t slize_sz = data.size_from_dim(1); K_tensor_.Resize(1); // Get maximum segment id so we can size the output. diff --git a/caffe2/operators/sequence_ops.cc b/caffe2/operators/sequence_ops.cc index 2b7b82095686..ec43ba3fbc8c 100644 --- a/caffe2/operators/sequence_ops.cc +++ b/caffe2/operators/sequence_ops.cc @@ -54,7 +54,7 @@ bool RemovePaddingOp::DoRunWithType() { CAFFE_ENFORCE_GE(in.ndim(), 1); const int32_t outer_size = in.dims()[0]; const auto block_size = std::accumulate( - in.dims().begin() + 1, in.dims().end(), 1, std::multiplies()); + in.dims().begin() + 1, in.dims().end(), 1, std::multiplies()); const auto pad_width = startPaddingWidth_ + endPaddingWidth_; // if no lengths is provided, assume it is a single full-span entry diff --git a/caffe2/operators/sequence_ops.cu b/caffe2/operators/sequence_ops.cu index 95ad9ece32d4..fa10b6cbfe94 100644 --- a/caffe2/operators/sequence_ops.cu +++ b/caffe2/operators/sequence_ops.cu @@ -237,7 +237,7 @@ bool RemovePaddingOp::DoRunWithType() { CAFFE_ENFORCE_GE(in.ndim(), 1); const int32_t outer_size = in.dims()[0]; const auto block_size = std::accumulate( - in.dims().begin() + 1, in.dims().end(), 1, std::multiplies()); + in.dims().begin() + 1, in.dims().end(), 1, std::multiplies()); // if no lengths is provided, assume it is a single full-span entry const int32_t* lengths_ptr = nullptr; diff --git a/caffe2/operators/sequence_ops.h b/caffe2/operators/sequence_ops.h index 9e0f8eb5b1b3..2b59e839fd31 100644 --- a/caffe2/operators/sequence_ops.h +++ b/caffe2/operators/sequence_ops.h @@ -25,11 +25,11 @@ class GatherPaddingOp final : public Operator { bool RunOnDevice() override { if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { - Output(0)->Resize(std::vector(0)); - Output(0)->template mutable_data(); + Output(0)->Resize(std::vector(0)); + Output(0)->template mutable_data(); if (OutputSize() == 2) { - Output(1)->Resize(std::vector(0)); - Output(1)->template mutable_data(); + Output(1)->Resize(std::vector(0)); + Output(1)->template mutable_data(); } return true; } @@ -53,7 +53,7 @@ class GatherPaddingOp final : public Operator { lengths_ptr = lengths.template data(); lengths_size = lengths.size(); } - std::vector padShape(in.dims().begin() + 1, in.dims().end()); + std::vector padShape(in.dims().begin() + 1, in.dims().end()); // output will contain accumulator over paddings Output(0)->Resize(padShape); T* padding_start_ptr = Output(0)->template mutable_data(); diff --git a/caffe2/operators/shape_op.h b/caffe2/operators/shape_op.h index 05ea7a2f7c5f..4d4c080702bf 100644 --- a/caffe2/operators/shape_op.h +++ b/caffe2/operators/shape_op.h @@ -24,22 +24,22 @@ class ShapeOp : public Operator { int numAxes = axes_.size(); if (numAxes == 0) { output->Resize(numDims); - TIndex* output_data = output->template mutable_data(); + int64_t* output_data = output->template mutable_data(); context_.CopyBytesSameDevice( - numDims * sizeof(TIndex), data.dims().data(), output_data); + numDims * sizeof(int64_t), data.dims().data(), output_data); return true; } output->Resize(numAxes); auto src = reinterpret_cast(data.dims().data()); - auto out = reinterpret_cast(output->template mutable_data()); + auto out = reinterpret_cast(output->template mutable_data()); for (int i = 0; i < numAxes; i++) { auto axis = axes_[i]; CAFFE_ENFORCE_LT(axis, numDims, "Axis out of range"); CAFFE_ENFORCE_GE(axis, 0, "Each axis should be non-negative"); context_.CopyBytesSameDevice( - sizeof(TIndex), src + axis * sizeof(TIndex), out); - out += sizeof(TIndex); + sizeof(int64_t), src + axis * sizeof(int64_t), out); + out += sizeof(int64_t); } return true; } diff --git a/caffe2/operators/slice_op.cu b/caffe2/operators/slice_op.cu index 5de302814ba2..475d8329c924 100644 --- a/caffe2/operators/slice_op.cu +++ b/caffe2/operators/slice_op.cu @@ -237,15 +237,15 @@ class SliceOp : public Operator { USE_OPERATOR_FUNCTIONS(CUDAContext); SliceOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - starts_(this->template GetRepeatedArgument("starts")), - ends_(this->template GetRepeatedArgument("ends")), + starts_(this->template GetRepeatedArgument("starts")), + ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} bool RunOnDevice() override { if (InputSize() > 1) { - return DispatchHelper>::call(this, Input(1)); + return DispatchHelper>::call(this, Input(1)); } else { - return DoRunWithType(); + return DoRunWithType(); } } @@ -282,8 +282,8 @@ class SliceOp : public Operator { output, data, starts_host_, ends_host_, &context_); } private: - std::vector starts_; - std::vector ends_; + std::vector starts_; + std::vector ends_; bool statically_inited_; Tensor starts_host_{CPU}; Tensor ends_host_{CPU}; @@ -298,17 +298,17 @@ class SliceGradientOp : public Operator { USE_OPERATOR_FUNCTIONS(CUDAContext); SliceGradientOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - starts_(this->template GetRepeatedArgument("starts")), - ends_(this->template GetRepeatedArgument("ends")), + starts_(this->template GetRepeatedArgument("starts")), + ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} AT_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); bool RunOnDevice() override { if (InputSize() == 4) { - return DispatchHelper>::call(this, Input(1)); + return DispatchHelper>::call(this, Input(1)); } else { - return DoRunWithType(); + return DoRunWithType(); } } @@ -353,8 +353,8 @@ class SliceGradientOp : public Operator { } private: - std::vector starts_; - std::vector ends_; + std::vector starts_; + std::vector ends_; bool statically_inited_; Tensor starts_host_{CPU}; Tensor ends_host_{CPU}; diff --git a/caffe2/operators/slice_op.h b/caffe2/operators/slice_op.h index aa8d4e50f0f9..e7f8919bb81c 100644 --- a/caffe2/operators/slice_op.h +++ b/caffe2/operators/slice_op.h @@ -204,15 +204,15 @@ class SliceOp : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; SliceOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - starts_(this->template GetRepeatedArgument("starts")), - ends_(this->template GetRepeatedArgument("ends")), + starts_(this->template GetRepeatedArgument("starts")), + ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} bool RunOnDevice() override { if (InputSize() > 1) { - return DispatchHelper>::call(this, Input(1)); + return DispatchHelper>::call(this, Input(1)); } else { - return DoRunWithType(); + return DoRunWithType(); } } @@ -252,8 +252,8 @@ class SliceOp : public Operator { AT_DISABLE_COPY_AND_ASSIGN(SliceOp); protected: - std::vector starts_; - std::vector ends_; + std::vector starts_; + std::vector ends_; bool statically_inited_; Tensor starts_host_{CPU}; Tensor ends_host_{CPU}; @@ -265,17 +265,17 @@ class SliceGradientOp : public Operator { USE_OPERATOR_CONTEXT_FUNCTIONS; SliceGradientOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - starts_(this->template GetRepeatedArgument("starts")), - ends_(this->template GetRepeatedArgument("ends")), + starts_(this->template GetRepeatedArgument("starts")), + ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} AT_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); bool RunOnDevice() override { if (InputSize() == 4) { - return DispatchHelper>::call(this, Input(1)); + return DispatchHelper>::call(this, Input(1)); } else { - return DoRunWithType(); + return DoRunWithType(); } } @@ -321,8 +321,8 @@ class SliceGradientOp : public Operator { private: - std::vector starts_; - std::vector ends_; + std::vector starts_; + std::vector ends_; bool statically_inited_; Tensor starts_host_{CPU}; Tensor ends_host_{CPU}; diff --git a/caffe2/operators/softmax_op_cudnn.cc b/caffe2/operators/softmax_op_cudnn.cc index 16224bef4d0e..6019024e73f3 100644 --- a/caffe2/operators/softmax_op_cudnn.cc +++ b/caffe2/operators/softmax_op_cudnn.cc @@ -71,7 +71,7 @@ class CuDNNSoftmaxOp final : public Operator { CuDNNWrapper cudnn_wrapper_; int axis_; cudnnTensorDescriptor_t desc_; - vector dims_; + vector dims_; }; @@ -137,7 +137,7 @@ class CuDNNSoftmaxGradientOp final : public Operator { CuDNNWrapper cudnn_wrapper_; int axis_; cudnnTensorDescriptor_t desc_; - vector dims_; + vector dims_; }; namespace { diff --git a/caffe2/operators/softmax_ops.cu b/caffe2/operators/softmax_ops.cu index aa01ad278144..1945b59c7b26 100644 --- a/caffe2/operators/softmax_ops.cu +++ b/caffe2/operators/softmax_ops.cu @@ -308,7 +308,7 @@ bool SoftmaxWithLossOp::RunOnDevice() { } } - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); if (losses_.size() != N) { losses_.Resize(N); } @@ -423,7 +423,7 @@ bool SpatialSoftmaxWithLossOp::RunOnDevice() { context_.cuda_stream()>>>(N, D, W, H, Xdata, Pdata); // Cross entropy - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); float* avg_loss_data = avg_loss->template mutable_data(); math::Set(1, 0.0f, avg_loss_data, &context_); diff --git a/caffe2/operators/softmax_with_loss_op.cc b/caffe2/operators/softmax_with_loss_op.cc index 5390b6c40c11..732821042735 100644 --- a/caffe2/operators/softmax_with_loss_op.cc +++ b/caffe2/operators/softmax_with_loss_op.cc @@ -252,7 +252,7 @@ bool SoftmaxWithLossOp::RunOnDevice() { } } - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); float* avg_loss_data = avg_loss->template mutable_data(); if (weight_sum != 0.0) { avg_loss_data[0] = loss_sum * scale_ / weight_sum; diff --git a/caffe2/operators/sparse_to_dense_mask_op.h b/caffe2/operators/sparse_to_dense_mask_op.h index 6b71ff4f4e7b..7bfa5eb1435d 100644 --- a/caffe2/operators/sparse_to_dense_mask_op.h +++ b/caffe2/operators/sparse_to_dense_mask_op.h @@ -93,7 +93,7 @@ class SparseToDenseMaskOp : public SparseToDenseMaskBase { static_cast(sparse_values.raw_data()); const void* default_val = default_value.raw_data(); - TIndex block_size = default_value.size(); + int64_t block_size = default_value.size(); size_t block_nbytes = default_value.nbytes(); const int cols = this->featuresCount_; @@ -105,7 +105,7 @@ class SparseToDenseMaskOp : public SparseToDenseMaskBase { if (returnPresenceMask_) { presence_mask = Output(PRESENCEMASK); } - vector shape; + vector shape; if (InputSize() == 4) { auto& lengths = Input(LENGTHS); CAFFE_ENFORCE_EQ(lengths.ndim(), 1); @@ -204,7 +204,7 @@ class SparseToDenseMaskGradientOp : public SparseToDenseMaskBase { CAFFE_ENFORCE_EQ(sparse_indices.ndim(), 1); auto& gradient_output = Input(GOUTPUT); - TIndex block_size = gradient_output.size_from_dim(1); + int64_t block_size = gradient_output.size_from_dim(1); size_t block_nbytes = gradient_output.itemsize() * block_size; const int cols = this->featuresCount_; @@ -213,7 +213,7 @@ class SparseToDenseMaskGradientOp : public SparseToDenseMaskBase { int32_t default_length = sparse_indices.dim32(0); const int32_t* lengths_vec = nullptr; auto* output = Output(GVALUES); - vector shape; + vector shape; if (InputSize() > LENGTHS) { // if the LENGTHS is set, the gradient_output has dim: // lengths * mask.size() * feature_dim diff --git a/caffe2/operators/sparse_to_dense_op.cu b/caffe2/operators/sparse_to_dense_op.cu index c62718a8ece1..74957f980131 100644 --- a/caffe2/operators/sparse_to_dense_op.cu +++ b/caffe2/operators/sparse_to_dense_op.cu @@ -7,7 +7,7 @@ namespace caffe2 { template __global__ void SparseToDenseKernel( - size_t N, TIndex block_nitems, const TInd* indices, const TData* vals, TData* dst) { + size_t N, int64_t block_nitems, const TInd* indices, const TData* vals, TData* dst) { CUDA_1D_KERNEL_LOOP(i, N) { int idx = indices[i / block_nitems]; int dst_idx = block_nitems * idx + i % block_nitems; diff --git a/caffe2/operators/spatial_softmax_with_loss_op.cc b/caffe2/operators/spatial_softmax_with_loss_op.cc index 0e1750015ef7..8a271d17b5ee 100644 --- a/caffe2/operators/spatial_softmax_with_loss_op.cc +++ b/caffe2/operators/spatial_softmax_with_loss_op.cc @@ -119,7 +119,7 @@ bool SpatialSoftmaxWithLossOp::RunOnDevice() { } // Compute the avg cross-entropy loss - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); float* avg_loss_data = avg_loss->template mutable_data(); const int* label_data = T.data(); diff --git a/caffe2/operators/text_file_reader.cc b/caffe2/operators/text_file_reader.cc index 4938d3f5ca15..3888dc2a0e76 100644 --- a/caffe2/operators/text_file_reader.cc +++ b/caffe2/operators/text_file_reader.cc @@ -156,7 +156,7 @@ class TextFileReaderReadOp : public Operator { } private: - TIndex batchSize_; + int64_t batchSize_; }; CAFFE_KNOWN_TYPE(std::unique_ptr); diff --git a/caffe2/operators/tile_op.h b/caffe2/operators/tile_op.h index 3a5dcdfd5c78..f5a3109ffd61 100644 --- a/caffe2/operators/tile_op.h +++ b/caffe2/operators/tile_op.h @@ -72,7 +72,7 @@ class TileOp : public Operator { const auto axis = input.canonical_axis_index(axis_); // reshape output to be input tiled along the axis - vector output_dims(input.dims()); + vector output_dims(input.dims()); output_dims[axis_] = output_dims[axis_] * tiles_; output->Resize(output_dims); @@ -187,7 +187,7 @@ class TileGradientOp : public Operator { const auto axis = input.canonical_axis_index(axis_); // reshape output to be input "untiled" along the axis - vector output_dims(input.dims()); + vector output_dims(input.dims()); output_dims[axis_] = output_dims[axis_] / tiles_; output->Resize(output_dims); diff --git a/caffe2/operators/top_k.cc b/caffe2/operators/top_k.cc index 7ebe9966a504..83b4787103be 100644 --- a/caffe2/operators/top_k.cc +++ b/caffe2/operators/top_k.cc @@ -16,8 +16,8 @@ namespace { template struct ValueComp { bool operator()( - const std::pair& lhs, - const std::pair& rhs) const { + const std::pair& lhs, + const std::pair& rhs) const { return lhs.first > rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second); } @@ -26,34 +26,34 @@ struct ValueComp { template void GetTopK( const T* input, - const TIndex n, - const TIndex k, - const TIndex src_offset, - const TIndex dst_offset, - const TIndex stride, + const int64_t n, + const int64_t k, + const int64_t src_offset, + const int64_t dst_offset, + const int64_t stride, T* values, - TIndex* indices, - TIndex* flatten_indices) { + int64_t* indices, + int64_t* flatten_indices) { const T* src_ptr = input + src_offset; - std::vector> heap_data; + std::vector> heap_data; heap_data.reserve(k); - for (TIndex i = 0; i < k && i < n; ++i) { + for (int64_t i = 0; i < k && i < n; ++i) { heap_data.emplace_back(*src_ptr, i); src_ptr += stride; } std::priority_queue< - std::pair, - std::vector>, + std::pair, + std::vector>, ValueComp> pq(ValueComp(), std::move(heap_data)); - for (TIndex i = k; i < n; ++i) { + for (int64_t i = k; i < n; ++i) { if (pq.top().first < *src_ptr) { pq.pop(); pq.emplace(*src_ptr, i); } src_ptr += stride; } - TIndex dst_pos = dst_offset + (std::min(k, n) - 1) * stride; + int64_t dst_pos = dst_offset + (std::min(k, n) - 1) * stride; while (!pq.empty()) { const auto& item = pq.top(); values[dst_pos] = item.first; @@ -69,13 +69,13 @@ void GetTopK( template void SetTopKGradient( const T* values, - const TIndex* indices, + const int64_t* indices, const int k, - const TIndex src_offset, - const TIndex dst_offset, - const TIndex stride, + const int64_t src_offset, + const int64_t dst_offset, + const int64_t stride, T* gradient) { - TIndex src_pos = src_offset; + int64_t src_pos = src_offset; for (int i = 0; i < k; ++i) { if (indices[src_pos] < 0) { continue; @@ -94,14 +94,14 @@ bool TopKOp::RunOnDevice() { auto* indices = Output(1); auto* flatten_indices = OutputSize() > 2 ? Output(2) : nullptr; - const std::vector& input_dims = input.dims(); + const std::vector& input_dims = input.dims(); if (axis_ == -1) { axis_ = input_dims.size() - 1; } CAFFE_ENFORCE_GE(axis_, 0); CAFFE_ENFORCE_LT(axis_, input_dims.size()); - std::vector output_dims = input_dims; + std::vector output_dims = input_dims; output_dims[axis_] = k_; values->Resize(output_dims); indices->Resize(output_dims); @@ -110,35 +110,35 @@ bool TopKOp::RunOnDevice() { } const T* input_data = input.template data(); T* values_data = values->template mutable_data(); - TIndex* indices_data = indices->template mutable_data(); - TIndex* flatten_indices_data = flatten_indices == nullptr + int64_t* indices_data = indices->template mutable_data(); + int64_t* flatten_indices_data = flatten_indices == nullptr ? nullptr - : flatten_indices->template mutable_data(); + : flatten_indices->template mutable_data(); // init values as the default value math::Set(values->size(), T(0), values_data, &context_); - math::Set( - indices->size(), TIndex(-1), indices_data, &context_); + math::Set( + indices->size(), int64_t(-1), indices_data, &context_); if (flatten_indices_data != nullptr) { - math::Set( - flatten_indices->size(), TIndex(-1), flatten_indices_data, &context_); + math::Set( + flatten_indices->size(), int64_t(-1), flatten_indices_data, &context_); } - const TIndex prev_size = std::accumulate( + const int64_t prev_size = std::accumulate( input_dims.cbegin(), input_dims.cbegin() + axis_, - TIndex(1), - std::multiplies()); - const TIndex next_size = std::accumulate( + int64_t(1), + std::multiplies()); + const int64_t next_size = std::accumulate( input_dims.cbegin() + axis_ + 1, input_dims.cend(), - TIndex(1), - std::multiplies()); - const TIndex src_offset_stride = input_dims[axis_] * next_size; - const TIndex dst_offset_stride = k_ * next_size; - TIndex src_offset = 0; - TIndex dst_offset = 0; - for (TIndex i = 0; i < prev_size; ++i) { - for (TIndex j = 0; j < next_size; ++j) { + int64_t(1), + std::multiplies()); + const int64_t src_offset_stride = input_dims[axis_] * next_size; + const int64_t dst_offset_stride = k_ * next_size; + int64_t src_offset = 0; + int64_t dst_offset = 0; + for (int64_t i = 0; i < prev_size; ++i) { + for (int64_t j = 0; j < next_size; ++j) { GetTopK( input_data, input_dims[axis_], @@ -162,34 +162,34 @@ bool TopKGradientOp::RunOnDevice() { const auto& indices = Input(1); const auto& original_input = Input(2); auto* output = Output(0); - const std::vector& values_dims = values.dims(); - const std::vector& origin_dims = original_input.dims(); + const std::vector& values_dims = values.dims(); + const std::vector& origin_dims = original_input.dims(); CAFFE_ENFORCE_EQ(values_dims.size(), origin_dims.size()); output->Resize(origin_dims); const T* values_data = values.template data(); - const TIndex* indices_data = indices.template data(); + const int64_t* indices_data = indices.template data(); T* output_data = output->template mutable_data(); if (axis_ == -1) { axis_ = values_dims.size() - 1; } const int k = values_dims[axis_]; math::Set(output->size(), T(0), output_data, &context_); - const TIndex prev_size = std::accumulate( + const int64_t prev_size = std::accumulate( values_dims.cbegin(), values_dims.cbegin() + axis_, - TIndex(1), - std::multiplies()); - const TIndex next_size = std::accumulate( + int64_t(1), + std::multiplies()); + const int64_t next_size = std::accumulate( values_dims.cbegin() + axis_ + 1, values_dims.cend(), - TIndex(1), - std::multiplies()); - const TIndex src_offset_stride = k * next_size; - const TIndex dst_offset_stride = origin_dims[axis_] * next_size; - TIndex src_offset = 0; - TIndex dst_offset = 0; - for (TIndex i = 0; i < prev_size; ++i) { - for (TIndex j = 0; j < next_size; ++j) { + int64_t(1), + std::multiplies()); + const int64_t src_offset_stride = k * next_size; + const int64_t dst_offset_stride = origin_dims[axis_] * next_size; + int64_t src_offset = 0; + int64_t dst_offset = 0; + for (int64_t i = 0; i < prev_size; ++i) { + for (int64_t j = 0; j < next_size; ++j) { SetTopKGradient( values_data, indices_data, diff --git a/caffe2/operators/top_k.cu b/caffe2/operators/top_k.cu index 6562b7fa5030..5d294236befb 100644 --- a/caffe2/operators/top_k.cu +++ b/caffe2/operators/top_k.cu @@ -23,24 +23,24 @@ namespace { template void RunHeapSelectionImpl( const T* input, - const TIndex outer_size, - const TIndex inner_size, + const int64_t outer_size, + const int64_t inner_size, const int k, T* values, - TIndex* indices, + int64_t* indices, CUDAContext* context) { constexpr int kBlockSize = 256; constexpr int kNumWarps = kBlockSize / kWarpSize; - constexpr int smem = kNumWarps * kHeapSize * (sizeof(T) + sizeof(TIndex)); + constexpr int smem = kNumWarps * kHeapSize * (sizeof(T) + sizeof(int64_t)); constexpr T kInitVal = kSelectMax ? std::numeric_limits::lowest() : std::numeric_limits::max(); - selectRowsViaHeap + selectRowsViaHeap <<cuda_stream()>>>( input, values, indices, kInitVal, - std::numeric_limits::max(), + std::numeric_limits::max(), outer_size, inner_size, k); @@ -49,16 +49,16 @@ void RunHeapSelectionImpl( template void RunRadixSelectionImpl( const T* input, - const TIndex outer_size, - const TIndex inner_size, + const int64_t outer_size, + const int64_t inner_size, const int k, T* values, - TIndex* indices, + int64_t* indices, CUDAContext* context) { const int block = std::min( math::roundUp(static_cast(inner_size), kWarpSize), CAFFE_CUDA_NUM_THREADS); - gatherTopK + gatherTopK <<cuda_stream()>>>( input, inner_size, k, outer_size, values, indices); // Unfortunately the output is not currently sorted, and there is no batch @@ -77,11 +77,11 @@ void RunRadixSelectionImpl( template void RunTopKOnLastDimCUDAImpl( const T* input, - const TIndex outer_size, - const TIndex inner_size, + const int64_t outer_size, + const int64_t inner_size, const int k, T* values, - TIndex* indices, + int64_t* indices, CUDAContext* context) { // If k is small, uses heap selection, otherwise uses radix selection. if (k < 32) { @@ -100,18 +100,18 @@ void RunTopKOnLastDimCUDAImpl( } __global__ void FlattenIndicesCUDAKernel( - const TIndex* src, - const TIndex size, - const TIndex stride, - const TIndex n, + const int64_t* src, + const int64_t size, + const int64_t stride, + const int64_t n, const int k, - TIndex* dst) { + int64_t* dst) { CUDA_1D_KERNEL_LOOP(i, size) { if (src[i] < 0) { continue; } - const TIndex x = i / stride / k; - const TIndex y = i % stride; + const int64_t x = i / stride / k; + const int64_t y = i % stride; #if __CUDA_ARCH__ >= 350 dst[i] = __ldg(src + i) * stride + x * n * stride + y; #else @@ -123,18 +123,18 @@ __global__ void FlattenIndicesCUDAKernel( template __global__ void SetTopKGradientCUDAKernel( const T* values, - const TIndex* indices, - const TIndex size, - const TIndex stride, - const TIndex n, + const int64_t* indices, + const int64_t size, + const int64_t stride, + const int64_t n, const int k, T* dst) { CUDA_1D_KERNEL_LOOP(i, size) { if (indices[i] < 0) { continue; } - const TIndex x = i / stride / k; - const TIndex y = i % stride; + const int64_t x = i / stride / k; + const int64_t y = i % stride; #if __CUDA_ARCH__ >= 350 dst[__ldg(indices + i) * stride + x * n * stride + y] = __ldg(values + i); #else @@ -187,7 +187,7 @@ bool TopKCudaOp::RunOnDevice() { auto* indices = Output(1); auto* flatten_indices = OutputSize() > 2 ? Output(2) : nullptr; - const std::vector& input_dims = input.dims(); + const std::vector& input_dims = input.dims(); if (axis_ == -1) { axis_ = input_dims.size() - 1; } @@ -195,20 +195,20 @@ bool TopKCudaOp::RunOnDevice() { CAFFE_ENFORCE_LT(axis_, input_dims.size()); const bool need_transpose = axis_ < input_dims.size() - 1; - std::vector output_dims = input_dims; + std::vector output_dims = input_dims; output_dims[axis_] = k_; - const TIndex prev_size = std::accumulate( + const int64_t prev_size = std::accumulate( input_dims.cbegin(), input_dims.cbegin() + axis_, - TIndex(1), - std::multiplies()); - const TIndex next_size = std::accumulate( + int64_t(1), + std::multiplies()); + const int64_t next_size = std::accumulate( input_dims.cbegin() + axis_ + 1, input_dims.cend(), - TIndex(1), - std::multiplies()); - const TIndex outer_size = input.size() / input_dims[axis_]; - const TIndex inner_size = input_dims[axis_]; + int64_t(1), + std::multiplies()); + const int64_t outer_size = input.size() / input_dims[axis_]; + const int64_t inner_size = input_dims[axis_]; values->Resize(output_dims); indices->Resize(output_dims); @@ -217,10 +217,10 @@ bool TopKCudaOp::RunOnDevice() { } const T* input_data = input.template data(); T* values_data = values->template mutable_data(); - TIndex* indices_data = indices->template mutable_data(); - TIndex* flatten_indices_data = flatten_indices == nullptr + int64_t* indices_data = indices->template mutable_data(); + int64_t* flatten_indices_data = flatten_indices == nullptr ? nullptr - : flatten_indices->template mutable_data(); + : flatten_indices->template mutable_data(); if (need_transpose) { const std::array dims = {static_cast(prev_size), @@ -228,9 +228,9 @@ bool TopKCudaOp::RunOnDevice() { static_cast(next_size)}; const std::array axes = {0, 2, 1}; input_transposed_buffer_.Resize( - std::vector{outer_size, inner_size}); - values_transposed_buffer_.Resize(std::vector{outer_size, k_}); - indices_transposed_buffer_.Resize(std::vector{outer_size, k_}); + std::vector{outer_size, inner_size}); + values_transposed_buffer_.Resize(std::vector{outer_size, k_}); + indices_transposed_buffer_.Resize(std::vector{outer_size, k_}); math::Transpose( 3, dims.data(), @@ -240,16 +240,16 @@ bool TopKCudaOp::RunOnDevice() { &context_); input_data = input_transposed_buffer_.template data(); values_data = values_transposed_buffer_.template mutable_data(); - indices_data = indices_transposed_buffer_.template mutable_data(); + indices_data = indices_transposed_buffer_.template mutable_data(); } // init values as the default value math::Set(values->size(), T(0), values_data, &context_); - math::Set( - indices->size(), TIndex(-1), indices_data, &context_); + math::Set( + indices->size(), int64_t(-1), indices_data, &context_); if (flatten_indices_data != nullptr) { - math::Set( - flatten_indices->size(), TIndex(-1), flatten_indices_data, &context_); + math::Set( + flatten_indices->size(), int64_t(-1), flatten_indices_data, &context_); } RunTopKOnLastDimCUDAImpl( @@ -275,8 +275,8 @@ bool TopKCudaOp::RunOnDevice() { 3, dims.data(), axes.data(), - indices_transposed_buffer_.template data(), - indices->template mutable_data(), + indices_transposed_buffer_.template data(), + indices->template mutable_data(), &context_); } @@ -287,12 +287,12 @@ bool TopKCudaOp::RunOnDevice() { CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>( - indices->template data(), + indices->template data(), indices->size(), next_size, inner_size, k_, - flatten_indices->template mutable_data()); + flatten_indices->template mutable_data()); } return true; } @@ -322,8 +322,8 @@ bool TopKGradientCudaOp::RunOnDevice() { const auto& indices = Input(1); const auto& original_input = Input(2); auto* output = Output(0); - const std::vector& values_dims = values.dims(); - const std::vector& origin_dims = original_input.dims(); + const std::vector& values_dims = values.dims(); + const std::vector& origin_dims = original_input.dims(); CAFFE_ENFORCE_EQ(values_dims.size(), origin_dims.size()); output->Resize(origin_dims); T* output_data = output->template mutable_data(); @@ -332,18 +332,18 @@ bool TopKGradientCudaOp::RunOnDevice() { } const int k = values_dims[axis_]; math::Set(output->size(), T(0), output_data, &context_); - const TIndex stride = std::accumulate( + const int64_t stride = std::accumulate( values_dims.cbegin() + axis_ + 1, values_dims.cend(), - TIndex(1), - std::multiplies()); + int64_t(1), + std::multiplies()); SetTopKGradientCUDAKernel<<< CAFFE_GET_BLOCKS(indices.size()), CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>( values.template data(), - indices.template data(), + indices.template data(), values.size(), stride, origin_dims[axis_], diff --git a/caffe2/operators/transpose_op.h b/caffe2/operators/transpose_op.h index 2de956e83aa7..fdfed0e3254f 100644 --- a/caffe2/operators/transpose_op.h +++ b/caffe2/operators/transpose_op.h @@ -33,7 +33,7 @@ class TransposeOp final : public Operator { bool RunOnDevice() override { // Do the actual transpose, which is implemented in DoRunWithType(). - return DispatchHelper>::call( + return DispatchHelper>::call( this, Input(0)); } diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu index 368325908359..f489ea723d59 100644 --- a/caffe2/operators/utility_ops.cu +++ b/caffe2/operators/utility_ops.cu @@ -246,14 +246,14 @@ bool SelectGradientOpBase::RunOnDevice() { template __global__ void AxpySliceKernel( const float* weight0, - const TIndex N, - const TIndex B, - const TIndex slice_size, + const int64_t N, + const int64_t B, + const int64_t slice_size, const float** alpha, const float** X, const T_INDEX* Indices, float* Y, - const TIndex M) { + const int64_t M) { // This implementation requires that the first weight is 1.0 CUDA_KERNEL_ASSERT(weight0[0] == 1.0); for (int i = blockIdx.x; i < N; i += gridDim.x) { @@ -288,17 +288,17 @@ bool ScatterWeightedSumOp::DoRunWithType() { CAFFE_ENFORCE_GT(X0.ndim(), 0, "X0 has to be at least the vector"); CAFFE_ENFORCE_EQ(weight0.size(), 1); - TIndex M = X0.size(); - TIndex N = X0.dim(0); - TIndex K = indices.size(); - TIndex block_size = M / N; + int64_t M = X0.size(); + int64_t N = X0.dim(0); + int64_t K = indices.size(); + int64_t block_size = M / N; float* data = output->template mutable_data(); // In order to have all device pointers of x_i (and weight_i similarly) // consecutively in device memory, copy pointers to a host vector and then // copy back into a device array. - const TIndex B = (InputSize() - 3) / 2; + const int64_t B = (InputSize() - 3) / 2; x_data_host_.Resize(B); weights_host_.Resize(B); x_data_device_.Resize(B); @@ -320,7 +320,7 @@ bool ScatterWeightedSumOp::DoRunWithType() { B, weights_host, weights_device); AxpySliceKernel<<< - std::min(K, CAFFE_MAXIMUM_NUM_BLOCKS), + std::min(K, CAFFE_MAXIMUM_NUM_BLOCKS), CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>( @@ -348,15 +348,15 @@ __global__ void scatter_assign_kernel( T* data, const Index* idxs, const T* slicesData, - TIndex N, - TIndex K, - TIndex block_size) { - for (TIndex i = blockIdx.x; i < K; i += gridDim.x) { + int64_t N, + int64_t K, + int64_t block_size) { + for (int64_t i = blockIdx.x; i < K; i += gridDim.x) { Index idx = idxs[i]; CUDA_KERNEL_ASSERT(0 <= idx && idx < N); const T* src = slicesData + block_size * i; T* dest = data + block_size * idx; - for (TIndex j = threadIdx.x; j < block_size; j += blockDim.x) { + for (int64_t j = threadIdx.x; j < block_size; j += blockDim.x) { dest[j] = src[j]; } } @@ -370,11 +370,11 @@ void ScatterAssignOp::DoScatterAssign( T* data, const Index* idxs, const T* slicesData, - TIndex N, - TIndex K, - TIndex block_size) { + int64_t N, + int64_t K, + int64_t block_size) { scatter_assign_kernel<<< - std::min(K, static_cast(CAFFE_MAXIMUM_NUM_BLOCKS)), + std::min(K, static_cast(CAFFE_MAXIMUM_NUM_BLOCKS)), CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>(data, idxs, slicesData, N, K, block_size); diff --git a/caffe2/operators/utility_ops.h b/caffe2/operators/utility_ops.h index 58b3f9c96ee9..dc170e4f6789 100644 --- a/caffe2/operators/utility_ops.h +++ b/caffe2/operators/utility_ops.h @@ -496,7 +496,7 @@ class ScatterWeightedSumOp : public Operator { private: template bool DoRunWithType() { - TIndex block_size = Input(0).size_from_dim(1); + int64_t block_size = Input(0).size_from_dim(1); return DispatchHelper, Index>::call(this, block_size); } @@ -512,10 +512,10 @@ class ScatterWeightedSumOp : public Operator { CAFFE_ENFORCE_GT(X0.size(), 0); CAFFE_ENFORCE_GT(X0.ndim(), 0, "X0 has to be at least the vector"); CAFFE_ENFORCE_EQ(weight0.size(), 1); - TIndex M = X0.size(); - TIndex N = X0.dim(0); - TIndex K = indices.size(); - TIndex block_size = M / N; + int64_t M = X0.size(); + int64_t N = X0.dim(0); + int64_t K = indices.size(); + int64_t block_size = M / N; T* data = output->template mutable_data(); const Index* idxs = indices.template data(); T w0 = *weight0.template data(); @@ -664,10 +664,10 @@ class ScatterAssignOp : public Operator { CAFFE_ENFORCE_EQ(&input, output, "In place operation is required"); CAFFE_ENFORCE_GT(input.ndim(), 0, "X0 has to be at least the vector"); - TIndex M = input.size(); - TIndex N = input.dim(0); - TIndex K = indices.size(); - TIndex block_size = M / N; + int64_t M = input.size(); + int64_t N = input.dim(0); + int64_t K = indices.size(); + int64_t block_size = M / N; CAFFE_ENFORCE_EQ(slices.size(), block_size * K); // TODO(dzhulgakov): it can be made to work with arbitrary data type by // using raw_mutable_data @@ -682,9 +682,9 @@ class ScatterAssignOp : public Operator { T* data, const Index* idxs, const T* slicesData, - TIndex N, - TIndex K, - TIndex block_size) { + int64_t N, + int64_t K, + int64_t block_size) { for (int i = 0; i < K; ++i) { Index idx = idxs[i]; // double-checking the indices, but it's fine as it's DCHECK only @@ -936,7 +936,7 @@ class HasElementsOp : public Operator { bool RunOnDevice() override { auto& input = Input(0); auto* output = Output(0); - output->Resize(std::vector{}); + output->Resize(std::vector{}); *output->template mutable_data() = input.size() > 0; return true; } @@ -953,7 +953,7 @@ class SizeOp : public Operator { auto& input = Input(0); auto* output = Output(0); - output->Resize(vector()); + output->Resize(vector()); auto* output_data = output->template mutable_data(); auto size = input.size(); @@ -1099,7 +1099,7 @@ class LengthsGatherOp : public Operator { const auto* lengths_data = lengths.template data(); const auto* indices_data = indices.template data(); - TIndex total_length = 0; + int64_t total_length = 0; for (size_t i = 0; i < indices.size(); ++i) { auto idx = indices_data[i]; CAFFE_ENFORCE_LT(idx, lengths.size()); @@ -1110,7 +1110,7 @@ class LengthsGatherOp : public Operator { output->Resize(shape); offsets_.clear(); - TIndex running_offset = 0; + int64_t running_offset = 0; offsets_.reserve(lengths.size()); for (size_t i = 0; i < lengths.size(); ++i) { offsets_.push_back(running_offset); @@ -1139,7 +1139,7 @@ class LengthsGatherOp : public Operator { return true; } - std::vector offsets_; + std::vector offsets_; INPUT_TAGS(ITEMS, LENGTHS, INDICES); }; diff --git a/caffe2/operators/utility_ops_gpu_test.cc b/caffe2/operators/utility_ops_gpu_test.cc index eb70a09aefb4..f500afaf9ed2 100644 --- a/caffe2/operators/utility_ops_gpu_test.cc +++ b/caffe2/operators/utility_ops_gpu_test.cc @@ -11,7 +11,7 @@ CAFFE2_DECLARE_string(caffe_test_root); namespace caffe2 { static void AddConstInput( - const vector& shape, + const vector& shape, const float value, const string& name, Workspace* ws) { @@ -38,7 +38,7 @@ TEST(UtilityOpGPUTest, testReshapeWithScalar) { def.add_output("OldShape"); def.add_arg()->CopyFrom(MakeArgument("shape", vector{1})); def.mutable_device_option()->set_device_type(PROTO_CUDA); - AddConstInput(vector(), 3.14, "X", &ws); + AddConstInput(vector(), 3.14, "X", &ws); // execute the op unique_ptr op(CreateOperator(def, &ws)); EXPECT_TRUE(op->Run()); diff --git a/caffe2/operators/utility_ops_test.cc b/caffe2/operators/utility_ops_test.cc index 7b4bcb3144f3..379dd52655c4 100644 --- a/caffe2/operators/utility_ops_test.cc +++ b/caffe2/operators/utility_ops_test.cc @@ -9,7 +9,7 @@ CAFFE2_DECLARE_string(caffe_test_root); namespace caffe2 { static void AddConstInput( - const vector& shape, + const vector& shape, const float value, const string& name, Workspace* ws) { @@ -32,7 +32,7 @@ TEST(UtilityOpTest, testReshapeWithScalar) { def.add_output("XNew"); def.add_output("OldShape"); def.add_arg()->CopyFrom(MakeArgument("shape", vector{1})); - AddConstInput(vector(), 3.14, "X", &ws); + AddConstInput(vector(), 3.14, "X", &ws); // execute the op unique_ptr op(CreateOperator(def, &ws)); EXPECT_TRUE(op->Run()); diff --git a/caffe2/opt/onnxifi_transformer.cc b/caffe2/opt/onnxifi_transformer.cc index 2f48ece4fd87..ce79df56ecb7 100644 --- a/caffe2/opt/onnxifi_transformer.cc +++ b/caffe2/opt/onnxifi_transformer.cc @@ -174,7 +174,7 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOp( // Feed into workspace as CPU Tensors auto* blob = ws->CreateBlob(t.name()); auto* cpu_tensor = blob->GetMutableTensor(CPU); - std::vector dims; + std::vector dims; for(const auto& d : t.dims()) { dims.push_back(d); } diff --git a/caffe2/perfkernels/embedding_lookup.cc b/caffe2/perfkernels/embedding_lookup.cc index 2b3b2fad3937..55dbeee5b9d4 100644 --- a/caffe2/perfkernels/embedding_lookup.cc +++ b/caffe2/perfkernels/embedding_lookup.cc @@ -16,10 +16,10 @@ template < typename OutType, bool IS_WEIGHT_POSITIONAL = false> static void EmbeddingLookupGenericSlow( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const InType* input, const IndexType* indices, const int* lengths, @@ -27,13 +27,13 @@ static void EmbeddingLookupGenericSlow( const float* scale_bias, // optional scale & bias params for uint8 input bool normalize_by_lengths, OutType* out) { - TIndex current = 0; + int64_t current = 0; for (int m = 0; m < output_size; ++m) { memset(out, 0, sizeof(OutType) * block_size); EigenVectorArrayMap out_vector(out, block_size); for (int i = 0; i < lengths[m]; ++i) { CAFFE_ENFORCE_LT(current, index_size); - TIndex idx = indices[current]; + int64_t idx = indices[current]; CAFFE_ENFORCE( 0 <= idx && idx < data_size, "Index ", @@ -86,10 +86,10 @@ static void EmbeddingLookupGenericSlow( IndexTypeName, IndexType, InTypeName, InType, OutTypeName, OutType, IS_WEIGHT_POSITIONAL) \ void \ EmbeddingLookup_##IndexTypeName##_##InTypeName##_##OutTypeName##_##IS_WEIGHT_POSITIONAL##__base( \ - const TIndex block_size, \ - const TIndex output_size, \ - const TIndex index_size, \ - const TIndex data_size, \ + const int64_t block_size, \ + const int64_t output_size, \ + const int64_t index_size, \ + const int64_t data_size, \ const InType* input, \ const IndexType* indices, \ const int* lengths, \ @@ -116,10 +116,10 @@ static void EmbeddingLookupGenericSlow( } \ template <> \ void EmbeddingLookup( \ - const TIndex block_size, \ - const TIndex output_size, \ - const TIndex index_size, \ - const TIndex data_size, \ + const int64_t block_size, \ + const int64_t output_size, \ + const int64_t index_size, \ + const int64_t data_size, \ const InType* input, \ const IndexType* indices, \ const int* lengths, \ diff --git a/caffe2/perfkernels/embedding_lookup.h b/caffe2/perfkernels/embedding_lookup.h index c4c6ccfe5f36..d147708970b3 100644 --- a/caffe2/perfkernels/embedding_lookup.h +++ b/caffe2/perfkernels/embedding_lookup.h @@ -36,10 +36,10 @@ template < typename OutType, bool IS_WEIGHT_POSITIONAL = false> void EmbeddingLookup( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const InType* input, const IndexType* indices, const int* lengths, diff --git a/caffe2/perfkernels/embedding_lookup_avx2.cc b/caffe2/perfkernels/embedding_lookup_avx2.cc index b01a0a65daca..cd5cb7305eba 100644 --- a/caffe2/perfkernels/embedding_lookup_avx2.cc +++ b/caffe2/perfkernels/embedding_lookup_avx2.cc @@ -13,10 +13,10 @@ namespace caffe2 { template static void EmbeddingLookup_int32_t_float_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -318,7 +318,7 @@ static void EmbeddingLookup_int32_t_float_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -376,10 +376,10 @@ static void EmbeddingLookup_int32_t_float_float__avx2_fma( } } void EmbeddingLookup_int32_t_float_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -401,10 +401,10 @@ void EmbeddingLookup_int32_t_float_float_false__avx2_fma( out); } void EmbeddingLookup_int32_t_float_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -428,10 +428,10 @@ void EmbeddingLookup_int32_t_float_float_true__avx2_fma( template static void EmbeddingLookup_int64_t_float_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -733,7 +733,7 @@ static void EmbeddingLookup_int64_t_float_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -791,10 +791,10 @@ static void EmbeddingLookup_int64_t_float_float__avx2_fma( } } void EmbeddingLookup_int64_t_float_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -816,10 +816,10 @@ void EmbeddingLookup_int64_t_float_float_false__avx2_fma( out); } void EmbeddingLookup_int64_t_float_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -843,10 +843,10 @@ void EmbeddingLookup_int64_t_float_float_true__avx2_fma( template static void EmbeddingLookup_int32_t_half_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1268,7 +1268,7 @@ static void EmbeddingLookup_int32_t_half_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -1332,10 +1332,10 @@ static void EmbeddingLookup_int32_t_half_float__avx2_fma( } } void EmbeddingLookup_int32_t_half_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1357,10 +1357,10 @@ void EmbeddingLookup_int32_t_half_float_false__avx2_fma( out); } void EmbeddingLookup_int32_t_half_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1384,10 +1384,10 @@ void EmbeddingLookup_int32_t_half_float_true__avx2_fma( template static void EmbeddingLookup_int64_t_half_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1809,7 +1809,7 @@ static void EmbeddingLookup_int64_t_half_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -1873,10 +1873,10 @@ static void EmbeddingLookup_int64_t_half_float__avx2_fma( } } void EmbeddingLookup_int64_t_half_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1898,10 +1898,10 @@ void EmbeddingLookup_int64_t_half_float_false__avx2_fma( out); } void EmbeddingLookup_int64_t_half_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1925,10 +1925,10 @@ void EmbeddingLookup_int64_t_half_float_true__avx2_fma( template static void EmbeddingLookup_int32_t_uint8_t_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2366,7 +2366,7 @@ static void EmbeddingLookup_int32_t_uint8_t_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -2432,10 +2432,10 @@ static void EmbeddingLookup_int32_t_uint8_t_float__avx2_fma( } } void EmbeddingLookup_int32_t_uint8_t_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2457,10 +2457,10 @@ void EmbeddingLookup_int32_t_uint8_t_float_false__avx2_fma( out); } void EmbeddingLookup_int32_t_uint8_t_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2484,10 +2484,10 @@ void EmbeddingLookup_int32_t_uint8_t_float_true__avx2_fma( template static void EmbeddingLookup_int64_t_uint8_t_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, @@ -2925,7 +2925,7 @@ static void EmbeddingLookup_int64_t_uint8_t_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -2991,10 +2991,10 @@ static void EmbeddingLookup_int64_t_uint8_t_float__avx2_fma( } } void EmbeddingLookup_int64_t_uint8_t_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, @@ -3016,10 +3016,10 @@ void EmbeddingLookup_int64_t_uint8_t_float_false__avx2_fma( out); } void EmbeddingLookup_int64_t_uint8_t_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, diff --git a/caffe2/perfkernels/embedding_lookup_fused_8bit_rowwise_avx2.cc b/caffe2/perfkernels/embedding_lookup_fused_8bit_rowwise_avx2.cc index b4e5c922f441..5eeb4ef3e760 100644 --- a/caffe2/perfkernels/embedding_lookup_fused_8bit_rowwise_avx2.cc +++ b/caffe2/perfkernels/embedding_lookup_fused_8bit_rowwise_avx2.cc @@ -13,10 +13,10 @@ namespace caffe2 { template static void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -316,7 +316,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -374,10 +374,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -397,10 +397,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int32_t* indices, const int* lengths, @@ -422,10 +422,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_float_float_true__avx2_fma( template static void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -725,7 +725,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -783,10 +783,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -806,10 +806,10 @@ void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const float* input, const int64_t* indices, const int* lengths, @@ -831,10 +831,10 @@ void Fused8BitRowwiseEmbeddingLookup_int64_t_float_float_true__avx2_fma( template static void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1254,7 +1254,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -1318,10 +1318,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1341,10 +1341,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int32_t* indices, const int* lengths, @@ -1366,10 +1366,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_half_float_true__avx2_fma( template static void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1789,7 +1789,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -1853,10 +1853,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1876,10 +1876,10 @@ void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const at::Half* input, const int64_t* indices, const int* lengths, @@ -1901,10 +1901,10 @@ void Fused8BitRowwiseEmbeddingLookup_int64_t_half_float_true__avx2_fma( template static void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2348,7 +2348,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float__avx2_fma( int32_t dataInd = 0; for (int32_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -2415,10 +2415,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2438,10 +2438,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int32_t* indices, const int* lengths, @@ -2463,10 +2463,10 @@ void Fused8BitRowwiseEmbeddingLookup_int32_t_uint8_t_float_true__avx2_fma( template static void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, @@ -2910,7 +2910,7 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float__avx2_fma( int64_t dataInd = 0; for (int64_t rangeIndex = 0; rangeIndex < output_size; ++rangeIndex) { float* op = &out[rangeIndex * block_size]; - TIndex j = 0; + int64_t j = 0; for (; j + 8 <= block_size; j += 8) { _mm256_storeu_ps(op + j, _mm256_setzero_ps()); } @@ -2977,10 +2977,10 @@ static void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float__avx2_fma( } } void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float_false__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, @@ -3000,10 +3000,10 @@ void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float_false__avx2_fma( out); } void Fused8BitRowwiseEmbeddingLookup_int64_t_uint8_t_float_true__avx2_fma( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const uint8_t* input, const int64_t* indices, const int* lengths, diff --git a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc index 34777eeab4f4..68c8c8709814 100644 --- a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc +++ b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc @@ -16,10 +16,10 @@ template < typename OutType, bool IS_WEIGHT_POSITIONAL = false> static void Fused8BitRowwiseEmbeddingLookupGenericSlow( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const InType* input, const IndexType* indices, const int* lengths, @@ -29,14 +29,14 @@ static void Fused8BitRowwiseEmbeddingLookupGenericSlow( // block_size is the number of elements and fused_block_size is the size of // an entire row, including scale and bias. const auto scale_bias_offset = 8 / sizeof(InType); - const TIndex fused_block_size = block_size + scale_bias_offset; - TIndex current = 0; + const int64_t fused_block_size = block_size + scale_bias_offset; + int64_t current = 0; for (int m = 0; m < output_size; ++m) { memset(out, 0, sizeof(OutType) * block_size); EigenVectorArrayMap out_vector(out, block_size); for (int i = 0; i < lengths[m]; ++i) { CAFFE_ENFORCE_LT(current, index_size); - TIndex idx = indices[current]; + int64_t idx = indices[current]; CAFFE_ENFORCE( 0 <= idx && idx < data_size, "Index ", @@ -89,10 +89,10 @@ static void Fused8BitRowwiseEmbeddingLookupGenericSlow( IndexType, InType, OutType) \ void \ Fused8BitRowwiseEmbeddingLookup_##IndexType##_##InType##_##OutType##_false__base( \ - const TIndex block_size, \ - const TIndex output_size, \ - const TIndex index_size, \ - const TIndex data_size, \ + const int64_t block_size, \ + const int64_t output_size, \ + const int64_t index_size, \ + const int64_t data_size, \ const InType* input, \ const IndexType* indices, \ const int* lengths, \ @@ -117,10 +117,10 @@ static void Fused8BitRowwiseEmbeddingLookupGenericSlow( } \ template <> \ void Fused8BitRowwiseEmbeddingLookup( \ - const TIndex block_size, \ - const TIndex output_size, \ - const TIndex index_size, \ - const TIndex data_size, \ + const int64_t block_size, \ + const int64_t output_size, \ + const int64_t index_size, \ + const int64_t data_size, \ const InType* input, \ const IndexType* indices, \ const int* lengths, \ diff --git a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h index 9605fbb39c57..85363c6ddb63 100644 --- a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h +++ b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.h @@ -42,10 +42,10 @@ template < typename OutType, bool IS_WEIGHT_POSITIONAL = false> void Fused8BitRowwiseEmbeddingLookup( - const TIndex block_size, - const TIndex output_size, - const TIndex index_size, - const TIndex data_size, + const int64_t block_size, + const int64_t output_size, + const int64_t index_size, + const int64_t data_size, const InType* input, const IndexType* indices, const int* lengths, diff --git a/caffe2/perfkernels/hp_emblookup_codegen.py b/caffe2/perfkernels/hp_emblookup_codegen.py index c69ddf5f14de..2db1cee9c9cd 100644 --- a/caffe2/perfkernels/hp_emblookup_codegen.py +++ b/caffe2/perfkernels/hp_emblookup_codegen.py @@ -166,7 +166,7 @@ def generic(IndexType, InType, OutType, use_weights, isa, fused): code.append(OutType + " *op = &out[rangeIndex * block_size];") # initialize to 0 - code.append("TIndex j = 0;") + code.append("int64_t j = 0;") code.append("for(; j + 8 <= block_size; j += 8) {") code.append("_mm256_storeu_ps(op + j, _mm256_setzero_ps());") code.append("}") @@ -312,10 +312,10 @@ for o in options: code.append(fn + "(") args = [] - args.append("const TIndex block_size,") - args.append("const TIndex output_size,") - args.append("const TIndex index_size,") - args.append("const TIndex data_size,") + args.append("const int64_t block_size,") + args.append("const int64_t output_size,") + args.append("const int64_t index_size,") + args.append("const int64_t data_size,") args.append("const " + InType + "* input,") args.append("const " + IndexType + "* indices,") args.append("const int* lengths,") diff --git a/caffe2/predictor/predictor_test.cc b/caffe2/predictor/predictor_test.cc index 326265fc66d0..ae4f73e9da0a 100644 --- a/caffe2/predictor/predictor_test.cc +++ b/caffe2/predictor/predictor_test.cc @@ -132,7 +132,7 @@ const char* metaSpec = R"DOC( )DOC"; std::unique_ptr randomTensor( - const std::vector& dims, + const std::vector& dims, CPUContext* ctx) { auto blob = make_unique(); auto* t = blob->GetMutableTensor(CPU); diff --git a/caffe2/python/pybind_state.cc b/caffe2/python/pybind_state.cc index 54d858cb8a98..81197047102f 100644 --- a/caffe2/python/pybind_state.cc +++ b/caffe2/python/pybind_state.cc @@ -392,7 +392,7 @@ void addObjectMethods(py::module& m) { }) .def( "_reshape", - [](DLPackWrapper* t, std::vector dims) { + [](DLPackWrapper* t, std::vector dims) { auto* tensor = t->tensor; tensor->Resize(dims); }); @@ -430,7 +430,7 @@ void addObjectMethods(py::module& m) { "Copy data from this tensor into a new numpy array.") .def( "init", - [](Tensor* t, std::vector dims, int caffe_type) { + [](Tensor* t, std::vector dims, int caffe_type) { const auto& meta = DataTypeToTypeMeta((TensorProto::DataType)caffe_type); CAFFE_ENFORCE( @@ -443,7 +443,7 @@ void addObjectMethods(py::module& m) { "Fail if the given data type cannot be accessed from python.") .def_property_readonly( "_shape", [](const TensorCPU& t) { return t.dims(); }) - .def("_reshape", [](TensorCPU* t, std::vector dims) { + .def("_reshape", [](TensorCPU* t, std::vector dims) { t->Resize(dims); }); @@ -1361,7 +1361,7 @@ void addGlobalMethods(py::module& m) { m.def( "infer_shapes_and_types_from_map", [](const std::vector& net_protos, - const std::map> blob_dimensions) { + const std::map> blob_dimensions) { // Parse protobuffers to NetDefs std::vector> nets; std::vector nets_ptr; @@ -1381,7 +1381,7 @@ void addGlobalMethods(py::module& m) { m.def( "infer_shapes_and_types_from_map", [](const std::vector& net_protos, - const std::map> blob_dimensions, + const std::map> blob_dimensions, const std::map int_blob_types) { // Parse protobuffers to NetDefs std::vector> nets; diff --git a/caffe2/python/pybind_state.h b/caffe2/python/pybind_state.h index 1271d67f48e7..59f39dd31303 100644 --- a/caffe2/python/pybind_state.h +++ b/caffe2/python/pybind_state.h @@ -178,7 +178,7 @@ class TensorFeeder : public BlobFeederBase { // numpy requires long int as its dims. int ndim = PyArray_NDIM(array); npy_intp* npy_dims = PyArray_DIMS(array); - std::vector dims; + std::vector dims; for (int i = 0; i < ndim; ++i) { dims.push_back(npy_dims[i]); } diff --git a/caffe2/python/pybind_state_dlpack.h b/caffe2/python/pybind_state_dlpack.h index e0122fdcc998..679152c78813 100644 --- a/caffe2/python/pybind_state_dlpack.h +++ b/caffe2/python/pybind_state_dlpack.h @@ -90,7 +90,7 @@ class DLPackWrapper { device_option.cuda_gpu_id(), "Expected same device id for DLPack and C2 tensors"); - std::vector dims; + std::vector dims; dims.reserve(dlTensor->ndim); for (int idx = 0; idx < dlTensor->ndim; ++idx) { dims.push_back(dlTensor->shape[idx]); diff --git a/caffe2/python/pybind_state_gpu.cc b/caffe2/python/pybind_state_gpu.cc index 8c547cf8eccc..3893be96ff98 100644 --- a/caffe2/python/pybind_state_gpu.cc +++ b/caffe2/python/pybind_state_gpu.cc @@ -146,7 +146,7 @@ void addCUDAObjectMethods(py::module& m) { [](const DLPackWrapper& t) { return t.tensor->dims(); }) .def( "_reshape", - [](DLPackWrapper* t, std::vector dims) { + [](DLPackWrapper* t, std::vector dims) { t->tensor->Resize(dims); }); } diff --git a/caffe2/python/pybind_state_hip.cc b/caffe2/python/pybind_state_hip.cc index a5d443cb9a7a..36e9f6a75d64 100644 --- a/caffe2/python/pybind_state_hip.cc +++ b/caffe2/python/pybind_state_hip.cc @@ -74,7 +74,7 @@ void addHIPObjectMethods(py::module& m) { [](const DLPackWrapper& t) { return t.tensor->dims(); }) .def( "_reshape", - [](DLPackWrapper* t, std::vector dims) { + [](DLPackWrapper* t, std::vector dims) { t->tensor->Resize(dims); }); } diff --git a/caffe2/python/pybind_state_mkl.cc b/caffe2/python/pybind_state_mkl.cc index dd192c325f7d..1fa5d806377d 100644 --- a/caffe2/python/pybind_state_mkl.cc +++ b/caffe2/python/pybind_state_mkl.cc @@ -70,7 +70,7 @@ class MKLMemoryFeeder : public BlobFeederBase { // numpy requires long int as its dims. int ndim = PyArray_NDIM(array); npy_intp* npy_dims = PyArray_DIMS(array); - std::vector dims; + std::vector dims; for (int i = 0; i < ndim; ++i) { dims.push_back(npy_dims[i]); } diff --git a/caffe2/queue/rebatching_queue.cc b/caffe2/queue/rebatching_queue.cc index cfb43a99f491..c768f1cfd0a1 100644 --- a/caffe2/queue/rebatching_queue.cc +++ b/caffe2/queue/rebatching_queue.cc @@ -17,7 +17,7 @@ void concat( const auto numRows = inputs.size(); // Precompute the output sizes to avoid resizing - std::vector> outputDims(numTensors); + std::vector> outputDims(numTensors); for (int i = 0; i < numTensors; ++i) { SmartTensorPrinter::PrintTensor(inputZero.at(i)); diff --git a/caffe2/sgd/ftrl_op.cc b/caffe2/sgd/ftrl_op.cc index 263406114670..a8ef4879d68e 100644 --- a/caffe2/sgd/ftrl_op.cc +++ b/caffe2/sgd/ftrl_op.cc @@ -88,10 +88,10 @@ void SparseFtrlOp::DoRun() { auto& grad = Input(GRAD); CAFFE_ENFORCE_EQ(&Input(VAR), var, "In place operation is required"); CAFFE_ENFORCE_EQ(&Input(N_Z), n_z, "In place operation is required"); - TIndex M = var->size(); - TIndex N = var->dim(0); - TIndex block_size = M / N; - TIndex K = indices.size(); + int64_t M = var->size(); + int64_t N = var->dim(0); + int64_t block_size = M / N; + int64_t K = indices.size(); DCHECK_EQ(M * 2, n_z->size()); DCHECK_EQ(grad.size(), K * block_size); T* w = var->template mutable_data(); @@ -101,7 +101,7 @@ void SparseFtrlOp::DoRun() { // TODO(cxj): use OMP when it is reliable // #pragma omp parallel for - for (TIndex i = 0; i < K; ++i) { + for (int64_t i = 0; i < K; ++i) { SIndex idx = idxs[i]; DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx << ", range 0 to " << N; @@ -116,7 +116,7 @@ void SparseFtrlOp::DoRun() { nz[idx * 2 + 1], params_); } else { - TIndex x = block_size * idx; + int64_t x = block_size * idx; ftrl_update( block_size, w + x, diff --git a/caffe2/sgd/lars_op.h b/caffe2/sgd/lars_op.h index 7b65598cdffe..a54c67ddc2fc 100644 --- a/caffe2/sgd/lars_op.h +++ b/caffe2/sgd/lars_op.h @@ -29,7 +29,7 @@ class LarsOp final : public Operator { auto& trust = Input(3); auto& lr_max = Input(4); auto* lr_rescaled = Output(0); - lr_rescaled->Resize(vector{1}); + lr_rescaled->Resize(vector{1}); X_norm_tensor_.Resize(1); T* X_norm_ = X_norm_tensor_.template mutable_data(); @@ -60,7 +60,7 @@ class LarsOp final : public Operator { private: // Compute the l2 norm of X_data and dX_data void ComputeNorms( - TIndex N, + int64_t N, const T* X_data, const T* dX_data, T* X_norm, diff --git a/caffe2/sgd/learning_rate_op.h b/caffe2/sgd/learning_rate_op.h index 4e798921e13e..5e4357d6b9ee 100644 --- a/caffe2/sgd/learning_rate_op.h +++ b/caffe2/sgd/learning_rate_op.h @@ -31,7 +31,7 @@ class LearningRateOp final : public Operator { T learning_rate = cur_base_lr_ * (*functor_)(iter); // Write to output. auto* output = Output(0); - output->Resize(vector()); + output->Resize(vector()); context_.template CopyFromCPU( 1, &learning_rate, Output(0)->template mutable_data()); return true; diff --git a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc index 476930ce4f90..4ac3524d49d8 100644 --- a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc +++ b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc @@ -13,7 +13,7 @@ namespace caffe2 { namespace { void AddNoiseInput( - const vector& shape, + const vector& shape, const string& name, Workspace* ws) { DeviceOption option; @@ -78,10 +78,10 @@ void compare( depthwiseOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR)); depthwiseOpDef.add_arg()->CopyFrom(MakeArgument("group", group)); - AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); + AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); AddNoiseInput( - vector{outputC, inputC / group, kernelH, kernelW}, "W", &ws); - AddNoiseInput(vector{outputC}, "B", &ws); + vector{outputC, inputC / group, kernelH, kernelW}, "W", &ws); + AddNoiseInput(vector{outputC}, "B", &ws); unique_ptr depthwiseOp(CreateOperator(depthwiseOpDef, &ws)); EXPECT_NE(nullptr, depthwiseOp.get()); diff --git a/caffe2/share/contrib/nnpack/nnpack_test.cc b/caffe2/share/contrib/nnpack/nnpack_test.cc index ddc451264abc..2f892118982d 100644 --- a/caffe2/share/contrib/nnpack/nnpack_test.cc +++ b/caffe2/share/contrib/nnpack/nnpack_test.cc @@ -13,7 +13,7 @@ namespace caffe2 { namespace { void AddNoiseInput( - const vector& shape, + const vector& shape, const string& name, Workspace* ws) { DeviceOption option; @@ -91,10 +91,10 @@ void compare( nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR)); nnpackOpDef.add_arg()->CopyFrom(MakeArgument("group", group)); - AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); + AddNoiseInput(vector{N, inputC, H, W}, "X", &ws); AddNoiseInput( - vector{outputC, inputC / group, kernelH, kernelW}, "W", &ws); - AddNoiseInput(vector{outputC}, "B", &ws); + vector{outputC, inputC / group, kernelH, kernelW}, "W", &ws); + AddNoiseInput(vector{outputC}, "B", &ws); unique_ptr nnpackOp(CreateOperator(nnpackOpDef, &ws)); EXPECT_NE(nullptr, nnpackOp.get()); diff --git a/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc b/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc index 9bc5200750f3..d0dd70c48952 100644 --- a/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc +++ b/caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc @@ -65,7 +65,7 @@ TensorProtos GetTensorsProto(const TensorCPU& compressed) { // Decompress tensor stored in compressed format // It is compressed using mutils.compress_data_list() void Decompress(const TensorProto& compressed, TensorCPU* outDecomp) { - vector shape(compressed.dims().begin(), compressed.dims().end()); + vector shape(compressed.dims().begin(), compressed.dims().end()); // shape stores the dimensions of data before compression, // see _compress_data_single() in mutils.py outDecomp->Resize(shape); diff --git a/caffe2/utils/filler.h b/caffe2/utils/filler.h index 7016f09a3bab..9739ca26e580 100644 --- a/caffe2/utils/filler.h +++ b/caffe2/utils/filler.h @@ -90,19 +90,19 @@ class TensorFiller { return Min(0).Max(max_segment).Dist(FD_SYNTHETIC); } - TensorFiller& Shape(const std::vector& shape) { + TensorFiller& Shape(const std::vector& shape) { shape_ = shape; return *this; } template - TensorFiller(const std::vector& shape, Type fixed_sum) + TensorFiller(const std::vector& shape, Type fixed_sum) : shape_(shape), dist_(FD_FIXEDSUM), fixed_sum_((double)fixed_sum) {} - TensorFiller(const std::vector& shape) + TensorFiller(const std::vector& shape) : shape_(shape), dist_(FD_UNIFORM), fixed_sum_(0) {} - TensorFiller() : TensorFiller(std::vector()) {} + TensorFiller() : TensorFiller(std::vector()) {} std::string DebugString() const { std::stringstream stream; @@ -123,7 +123,7 @@ class TensorFiller { } private: - std::vector shape_; + std::vector shape_; // TODO: type is unknown until a user starts to fill data; // cast everything to double for now. double min_ = 0.0; diff --git a/caffe2/utils/hip/math_hip.cc b/caffe2/utils/hip/math_hip.cc index 89fbe0193e3e..0a285480388d 100644 --- a/caffe2/utils/hip/math_hip.cc +++ b/caffe2/utils/hip/math_hip.cc @@ -714,8 +714,8 @@ DEFINE_BROADCAST_HIP_BITWISE_BINARY_FUNCTION(BitwiseXor, thrust::bit_xor) cub::DeviceReduce::func( \ nullptr, memRequired, src, dst, N, context->hip_stream()); \ auto buffer_size = \ - static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); \ - scratch_ptr->Resize(std::vector{buffer_size}); \ + static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); \ + scratch_ptr->Resize(std::vector{buffer_size}); \ cub::DeviceReduce::func( \ static_cast(scratch_ptr->mutable_data()), \ memRequired, \ @@ -1485,13 +1485,13 @@ void SumGenericIter( cub::DeviceReduce::Sum( nullptr, memRequired, it, dest, N, context->hip_stream()); auto buffer_size = - static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); + static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); if (!dest) { // allocate one more T at the end of scratch for dest - scratch_ptr->Resize(std::vector{buffer_size + 1}); + scratch_ptr->Resize(std::vector{buffer_size + 1}); dest = scratch_ptr->template mutable_data() + buffer_size; } else { - scratch_ptr->Resize(std::vector{buffer_size}); + scratch_ptr->Resize(std::vector{buffer_size}); } cub::DeviceReduce::Sum( static_cast(scratch_ptr->template mutable_data()), @@ -3473,7 +3473,7 @@ void TransposeHIPImpl( CAFFE2_SPECIALIZED_HIP_TRANSPOSE(float) CAFFE2_SPECIALIZED_HIP_TRANSPOSE(double) CAFFE2_SPECIALIZED_HIP_TRANSPOSE(int) -CAFFE2_SPECIALIZED_HIP_TRANSPOSE(TIndex) +CAFFE2_SPECIALIZED_HIP_TRANSPOSE(int64_t) #undef CAFFE2_SPECIALIZED_HIP_TRANSPOSE namespace { diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc index c54226af68ed..18e20e4fa414 100644 --- a/caffe2/utils/math_cpu.cc +++ b/caffe2/utils/math_cpu.cc @@ -2747,7 +2747,7 @@ CAFFE2_SPECIALIZED_COPY_MATRIX(double) #endif // CAFFE2_USE_MKL CAFFE2_SPECIALIZED_COPY_MATRIX(int) -CAFFE2_SPECIALIZED_COPY_MATRIX(TIndex) +CAFFE2_SPECIALIZED_COPY_MATRIX(int64_t) #ifdef CAFFE2_UNIQUE_LONG_TYPEMETA CAFFE2_SPECIALIZED_COPY_MATRIX(long) #endif @@ -3522,7 +3522,7 @@ CAFFE2_SPECIALIZED_TRANSPOSE_2D(double) #endif // CAFFE2_USE_MKL CAFFE2_SPECIALIZED_TRANSPOSE_2D(int) -CAFFE2_SPECIALIZED_TRANSPOSE_2D(TIndex) +CAFFE2_SPECIALIZED_TRANSPOSE_2D(int64_t) #ifdef CAFFE2_UNIQUE_LONG_TYPEMETA CAFFE2_SPECIALIZED_TRANSPOSE_2D(long) #endif @@ -3645,7 +3645,7 @@ void TransposeCPUImpl( CAFFE2_SPECIALIZED_TRANSPOSE(float) CAFFE2_SPECIALIZED_TRANSPOSE(double) CAFFE2_SPECIALIZED_TRANSPOSE(int) -CAFFE2_SPECIALIZED_TRANSPOSE(TIndex) +CAFFE2_SPECIALIZED_TRANSPOSE(int64_t) #ifdef CAFFE2_UNIQUE_LONG_TYPEMETA CAFFE2_SPECIALIZED_TRANSPOSE(long) #endif diff --git a/caffe2/utils/math_gpu.cu b/caffe2/utils/math_gpu.cu index 33d798417b1e..f9b113980737 100644 --- a/caffe2/utils/math_gpu.cu +++ b/caffe2/utils/math_gpu.cu @@ -648,8 +648,8 @@ DEFINE_BROADCAST_CUDA_BITWISE_BINARY_FUNCTION(BitwiseXor, thrust::bit_xor) cub::DeviceReduce::func( \ nullptr, memRequired, src, dst, N, context->cuda_stream()); \ auto buffer_size = \ - static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); \ - scratch_ptr->Resize(std::vector{buffer_size}); \ + static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); \ + scratch_ptr->Resize(std::vector{buffer_size}); \ cub::DeviceReduce::func( \ static_cast(scratch_ptr->mutable_data()), \ memRequired, \ @@ -1770,13 +1770,13 @@ CAFFE2_CUDA_EXPORT void SumGenericIter( cub::DeviceReduce::Sum( nullptr, memRequired, it, dest, N, context->cuda_stream()); auto buffer_size = - static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); + static_cast((memRequired + sizeof(T) - 1) / sizeof(T)); if (!dest) { // allocate one more T at the end of scratch for dest - scratch_ptr->Resize(std::vector{buffer_size + 1}); + scratch_ptr->Resize(std::vector{buffer_size + 1}); dest = scratch_ptr->template mutable_data() + buffer_size; } else { - scratch_ptr->Resize(std::vector{buffer_size}); + scratch_ptr->Resize(std::vector{buffer_size}); } cub::DeviceReduce::Sum( static_cast(scratch_ptr->template mutable_data()), @@ -3078,7 +3078,7 @@ CAFFE2_CUDA_EXPORT void CopyMatrix( CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX(float) CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX(double) CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX(int) -CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX(TIndex) +CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX(int64_t) #undef CAFFE2_SPECIALIZED_CUDA_COPY_MATRIX template <> @@ -3905,7 +3905,7 @@ CAFFE2_CUDA_EXPORT void TransposeCUDAImpl( CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(float) CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(double) CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(int) -CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(TIndex) +CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(int64_t) #undef CAFFE2_SPECIALIZED_CUDA_TRANSPOSE namespace { diff --git a/caffe2/utils/math_gpu_test.cc b/caffe2/utils/math_gpu_test.cc index b9f09706f986..9be1c3db6c1d 100644 --- a/caffe2/utils/math_gpu_test.cc +++ b/caffe2/utils/math_gpu_test.cc @@ -261,9 +261,9 @@ class GemmBatchedGPUTest X_ = X_blob->GetMutableTensor(CUDA); W_ = W_blob->GetMutableTensor(CUDA); Y_ = Y_blob->GetMutableTensor(CUDA); - X_->Resize(std::vector{3, 5, 10}); - W_->Resize(std::vector{3, 6, 10}); - Y_->Resize(std::vector{3, 5, 6}); + X_->Resize(std::vector{3, 5, 10}); + W_->Resize(std::vector{3, 6, 10}); + Y_->Resize(std::vector{3, 5, 6}); math::Set( X_->size(), 1.0f, X_->mutable_data(), cuda_context_.get()); math::Set( diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc index 7b210fd57337..241d19dec424 100644 --- a/caffe2/utils/math_test.cc +++ b/caffe2/utils/math_test.cc @@ -171,9 +171,9 @@ class GemmBatchedTest protected: void SetUp() override { cpu_context_ = make_unique(option_); - X_.Resize(std::vector{3, 5, 10}); - W_.Resize(std::vector{3, 6, 10}); - Y_.Resize(std::vector{3, 5, 6}); + X_.Resize(std::vector{3, 5, 10}); + W_.Resize(std::vector{3, 6, 10}); + Y_.Resize(std::vector{3, 5, 6}); math::Set( X_.size(), 1, X_.mutable_data(), cpu_context_.get()); math::Set( diff --git a/caffe2/utils/smart_tensor_printer_test.cc b/caffe2/utils/smart_tensor_printer_test.cc index e207f7c7b052..651b6ad78adc 100644 --- a/caffe2/utils/smart_tensor_printer_test.cc +++ b/caffe2/utils/smart_tensor_printer_test.cc @@ -31,7 +31,7 @@ void printTensorAndCheck(const std::vector& values) { CPUContext cpuContext; Tensor tensor( - std::vector{static_cast(values.size())}, + std::vector{static_cast(values.size())}, values, &cpuContext); diff --git a/caffe2/video/video_input_op.h b/caffe2/video/video_input_op.h index 3034e1bd4adb..e58ece491e22 100644 --- a/caffe2/video/video_input_op.h +++ b/caffe2/video/video_input_op.h @@ -462,8 +462,8 @@ VideoInputOp::VideoInputOp( CAFFE_ENFORCE_GT( operator_def.input_size(), 0, "Need to have a DBReader blob input"); - vector data_shape(5); - vector label_shape(2); + vector data_shape(5); + vector label_shape(2); // for RGB data data_shape[0] = batch_size_ * clip_per_video_ * multi_crop_count_; @@ -486,11 +486,11 @@ VideoInputOp::VideoInputOp( prefetched_label_.Resize(label_shape); } else { prefetched_label_.Resize( - vector(1, batch_size_ * clip_per_video_ * multi_crop_count_)); + vector(1, batch_size_ * clip_per_video_ * multi_crop_count_)); } prefetched_video_id_.Resize( - vector(1, batch_size_ * clip_per_video_ * multi_crop_count_)); + vector(1, batch_size_ * clip_per_video_ * multi_crop_count_)); } template diff --git a/modules/detectron/sample_as_op.cu b/modules/detectron/sample_as_op.cu index 43ebaa27405c..910d1ba3eb5e 100644 --- a/modules/detectron/sample_as_op.cu +++ b/modules/detectron/sample_as_op.cu @@ -58,7 +58,7 @@ bool SampleAsOp::RunOnDevice() { assert(count > 0); // resize Y - vector out_shape(X.dims()); + vector out_shape(X.dims()); out_shape[0] = count; Y->Resize(out_shape); diff --git a/modules/detectron/select_smooth_l1_loss_op.cu b/modules/detectron/select_smooth_l1_loss_op.cu index 98dc0bc4fa87..259f89297f10 100644 --- a/modules/detectron/select_smooth_l1_loss_op.cu +++ b/modules/detectron/select_smooth_l1_loss_op.cu @@ -99,7 +99,7 @@ bool SelectSmoothL1LossOp::RunOnDevice() { auto& S = Input(3); auto* avg_loss = Output(0); - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); if (Y.size() == 0){ math::Set( 1, static_cast(0), avg_loss->mutable_data(), &context_); diff --git a/modules/detectron/sigmoid_cross_entropy_loss_op.cu b/modules/detectron/sigmoid_cross_entropy_loss_op.cu index eb3bd9718191..a8b639058ce1 100644 --- a/modules/detectron/sigmoid_cross_entropy_loss_op.cu +++ b/modules/detectron/sigmoid_cross_entropy_loss_op.cu @@ -79,10 +79,10 @@ bool SigmoidCrossEntropyLossOp::RunOnDevice() { " vs. ", T.size(), ")"); - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); counts_.ResizeLike(X); losses_.ResizeLike(X); - normalizer_.Resize(vector()); + normalizer_.Resize(vector()); SigmoidCrossEntropyLossKernel<<< CAFFE_GET_BLOCKS(X.size()), CAFFE_CUDA_NUM_THREADS, @@ -124,7 +124,7 @@ bool SigmoidCrossEntropyLossGradientOp::RunOnDevice() { dX->ResizeLike(X); counts_.ResizeLike(X); - normalizer_.Resize(vector()); + normalizer_.Resize(vector()); SigmoidCrossEntropyLossGradientKernel<<< CAFFE_GET_BLOCKS(X.size()), CAFFE_CUDA_NUM_THREADS, diff --git a/modules/detectron/sigmoid_focal_loss_op.cu b/modules/detectron/sigmoid_focal_loss_op.cu index 0b7b4011ba07..2630cf37b10c 100644 --- a/modules/detectron/sigmoid_focal_loss_op.cu +++ b/modules/detectron/sigmoid_focal_loss_op.cu @@ -125,7 +125,7 @@ bool SigmoidFocalLossOp::RunOnDevice() { int H = X.dim32(2); int W = X.dim32(3); - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); losses_.ResizeLike(X); float* avg_loss_data = avg_loss->mutable_data(); diff --git a/modules/detectron/smooth_l1_loss_op.cu b/modules/detectron/smooth_l1_loss_op.cu index 7ded2802dd3a..30aadc5f4534 100644 --- a/modules/detectron/smooth_l1_loss_op.cu +++ b/modules/detectron/smooth_l1_loss_op.cu @@ -78,7 +78,7 @@ bool SmoothL1LossOp::RunOnDevice() { CAFFE_ENFORCE_EQ(Y_hat.size(), alpha_in.size()); CAFFE_ENFORCE_EQ(Y_hat.size(), alpha_out.size()); - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); buff_.ResizeLike(Y); // Difference diff --git a/modules/detectron/softmax_focal_loss_op.cu b/modules/detectron/softmax_focal_loss_op.cu index 8b6d1dd178f6..72b24aeb9f14 100644 --- a/modules/detectron/softmax_focal_loss_op.cu +++ b/modules/detectron/softmax_focal_loss_op.cu @@ -158,7 +158,7 @@ bool SoftmaxFocalLossOp::RunOnDevice() { losses_.Resize(N * A * H * W); P->Resize(N * D * H * W); - avg_loss->Resize(vector()); + avg_loss->Resize(vector()); math::Set( avg_loss->size(), 0.f, avg_loss->mutable_data(), &context_); math::Set( diff --git a/modules/detectron/upsample_nearest_op.cu b/modules/detectron/upsample_nearest_op.cu index 2afff9719fa8..870f0508dd9a 100644 --- a/modules/detectron/upsample_nearest_op.cu +++ b/modules/detectron/upsample_nearest_op.cu @@ -125,7 +125,7 @@ bool UpsampleNearestOp::RunOnDevice() { auto& X = Input(0); auto* Y = Output(0); - vector out_shape; + vector out_shape; for (int i = 0; i < X.ndim(); ++i) { out_shape.push_back(X.dim32(i)); }