mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: This PR move glu to Aten(CPU). Test script: ``` import torch import torch.nn.functional as F import time torch.manual_seed(0) def _time(): if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() device = "cpu" #warm up for n in [10, 100, 1000, 10000]: input = torch.randn(128, n, requires_grad=True, device=device) grad_output = torch.ones(128, n // 2, device=device) for i in range(1000): output = F.glu(input) output.backward(grad_output) for n in [10, 100, 1000, 10000]: fwd_t = 0 bwd_t = 0 input = torch.randn(128, n, requires_grad=True, device=device) grad_output = torch.ones(128, n // 2, device=device) for i in range(10000): t1 = _time() output = F.glu(input) t2 = _time() output.backward(grad_output) t3 = _time() fwd_t = fwd_t + (t2 -t1) bwd_t = bwd_t + (t3 - t2) fwd_avg = fwd_t / 10000 * 1000 bwd_avg = bwd_t / 10000 * 1000 print("input size(128, %d) forward time is %.2f (ms); backwad avg time is %.2f (ms)." % (n, fwd_avg, bwd_avg)) ``` Test device: **skx-8180.** Before: ``` input size(128, 10) forward time is 0.04 (ms); backwad avg time is 0.08 (ms). input size(128, 100) forward time is 0.06 (ms); backwad avg time is 0.14 (ms). input size(128, 1000) forward time is 0.11 (ms); backwad avg time is 0.31 (ms). input size(128, 10000) forward time is 1.52 (ms); backwad avg time is 2.04 (ms). ``` After: ``` input size(128, 10) forward time is 0.02 (ms); backwad avg time is 0.05 (ms). input size(128, 100) forward time is 0.04 (ms); backwad avg time is 0.09 (ms). input size(128, 1000) forward time is 0.07 (ms); backwad avg time is 0.17 (ms). input size(128, 10000) forward time is 0.13 (ms); backwad avg time is 1.03 (ms). ``` Fix https://github.com/pytorch/pytorch/issues/24707, https://github.com/pytorch/pytorch/issues/24708. Pull Request resolved: https://github.com/pytorch/pytorch/pull/33179 Differential Revision: D19839835 Pulled By: VitalyFedyunin fbshipit-source-id: e4d3438556a1068da2c4a7e573d6bbf8d2a6e2b9
252 lines
3.6 KiB
Plaintext
252 lines
3.6 KiB
Plaintext
# READ THIS BEFORE YOU REFACTOR ME
|
|
#
|
|
# setup.py uses the list of patterns in this file to decide
|
|
# what to delete, but it's not 100% sound. So, for example,
|
|
# if you delete aten/build/ because it's redundant with build/,
|
|
# aten/build/ will stop being cleaned. So be careful when
|
|
# refactoring this file!
|
|
|
|
## PyTorch
|
|
|
|
.coverage
|
|
.gradle
|
|
.hypothesis
|
|
.mypy_cache
|
|
*/*.pyc
|
|
*/*.so*
|
|
*/**/__pycache__
|
|
*/**/*.dylib*
|
|
*/**/*.pyc
|
|
*/**/*.pyd
|
|
*/**/*.so*
|
|
*/**/**/*.pyc
|
|
*/**/**/**/*.pyc
|
|
*/**/**/**/**/*.pyc
|
|
aten/build/
|
|
aten/src/ATen/Config.h
|
|
aten/src/ATen/cuda/CUDAConfig.h
|
|
caffe2/cpp_test/
|
|
dist/
|
|
docs/src/**/*
|
|
docs/cpp/build
|
|
docs/cpp/source/api
|
|
log
|
|
test/.coverage
|
|
test/.hypothesis/
|
|
test/cpp/api/mnist
|
|
test/custom_operator/model.pt
|
|
test/data/legacy_modules.t7
|
|
test/data/*.pt
|
|
test/backward_compatibility/new_schemas.txt
|
|
dropout_model.pt
|
|
test/generated_type_hints_smoketest.py
|
|
test/htmlcov
|
|
test/cpp_extensions/install/
|
|
test/test-reports/
|
|
third_party/build/
|
|
tools/shared/_utils_internal.py
|
|
torch.egg-info/
|
|
torch/__init__.pyi
|
|
torch/nn/functional.pyi
|
|
torch/nn/modules/*.pyi
|
|
torch/csrc/autograd/generated/*
|
|
torch/csrc/cudnn/cuDNN.cpp
|
|
torch/csrc/generated
|
|
torch/csrc/generic/TensorMethods.cpp
|
|
torch/csrc/jit/generated/*
|
|
torch/csrc/jit/fuser/config.h
|
|
torch/csrc/nn/THCUNN.cpp
|
|
torch/csrc/nn/THCUNN.cwrap
|
|
torch/bin/
|
|
torch/cmake/
|
|
torch/lib/*.a*
|
|
torch/lib/*.dll*
|
|
torch/lib/*.exe*
|
|
torch/lib/*.dylib*
|
|
torch/lib/*.h
|
|
torch/lib/*.lib
|
|
torch/lib/*.so*
|
|
torch/lib/protobuf*.pc
|
|
torch/lib/build
|
|
torch/lib/caffe2/
|
|
torch/lib/cmake
|
|
torch/lib/include
|
|
torch/lib/pkgconfig
|
|
torch/lib/protoc
|
|
torch/lib/protobuf/
|
|
torch/lib/tmp_install
|
|
torch/lib/torch_shm_manager
|
|
torch/lib/site-packages/
|
|
torch/lib/python*
|
|
torch/lib64
|
|
torch/include/
|
|
torch/share/
|
|
torch/test/
|
|
torch/version.py
|
|
# Root level file used in CI to specify certain env configs.
|
|
# E.g., see .circleci/config.yaml
|
|
env
|
|
.circleci/scripts/COMMIT_MSG
|
|
|
|
# IPython notebook checkpoints
|
|
.ipynb_checkpoints
|
|
|
|
# Editor temporaries
|
|
*.swn
|
|
*.swo
|
|
*.swp
|
|
*.swm
|
|
*~
|
|
|
|
# macOS dir files
|
|
.DS_Store
|
|
|
|
# Symbolic files
|
|
tools/shared/cwrap_common.py
|
|
|
|
# Ninja files
|
|
.ninja_deps
|
|
.ninja_log
|
|
compile_commands.json
|
|
*.egg-info/
|
|
docs/source/scripts/activation_images/
|
|
|
|
## General
|
|
|
|
# Compiled Object files
|
|
*.slo
|
|
*.lo
|
|
*.o
|
|
*.cuo
|
|
*.obj
|
|
|
|
# Compiled Dynamic libraries
|
|
*.so
|
|
*.dylib
|
|
*.dll
|
|
|
|
# Compiled Static libraries
|
|
*.lai
|
|
*.la
|
|
*.a
|
|
*.lib
|
|
|
|
# Compiled protocol buffers
|
|
*.pb.h
|
|
*.pb.cc
|
|
*_pb2.py
|
|
|
|
# Compiled python
|
|
*.pyc
|
|
*.pyd
|
|
|
|
# Compiled MATLAB
|
|
*.mex*
|
|
|
|
# IPython notebook checkpoints
|
|
.ipynb_checkpoints
|
|
|
|
# Editor temporaries
|
|
*.swn
|
|
*.swo
|
|
*.swp
|
|
*~
|
|
|
|
# Sublime Text settings
|
|
*.sublime-workspace
|
|
*.sublime-project
|
|
|
|
# Eclipse Project settings
|
|
*.*project
|
|
.settings
|
|
|
|
# QtCreator files
|
|
*.user
|
|
|
|
# PyCharm files
|
|
.idea
|
|
|
|
# OSX dir files
|
|
.DS_Store
|
|
|
|
# GDB history
|
|
.gdb_history
|
|
|
|
## Caffe2
|
|
|
|
# build, distribute, and bins (+ python proto bindings)
|
|
build
|
|
build_host_protoc
|
|
build_android
|
|
build_ios
|
|
/build_*
|
|
.build_debug/*
|
|
.build_release/*
|
|
distribute/*
|
|
*.testbin
|
|
*.bin
|
|
cmake_build
|
|
.cmake_build
|
|
gen
|
|
.setuptools-cmake-build
|
|
.pytest_cache
|
|
aten/build/*
|
|
|
|
# Bram
|
|
plsdontbreak
|
|
|
|
# Generated documentation
|
|
docs/_site
|
|
docs/gathered
|
|
_site
|
|
doxygen
|
|
docs/dev
|
|
|
|
# LevelDB files
|
|
*.sst
|
|
*.ldb
|
|
LOCK
|
|
CURRENT
|
|
MANIFEST-*
|
|
|
|
# generated version file
|
|
caffe2/version.py
|
|
|
|
# setup.py intermediates
|
|
.eggs
|
|
caffe2.egg-info
|
|
|
|
# Atom/Watchman required file
|
|
.watchmanconfig
|
|
|
|
# Files generated by CLion
|
|
cmake-build-debug
|
|
|
|
# BEGIN NOT-CLEAN-FILES (setup.py handles this marker. Do not change.)
|
|
#
|
|
# Below files are not deleted by "setup.py clean".
|
|
|
|
# Visual Studio Code files
|
|
.vscode
|
|
.vs
|
|
|
|
# YouCompleteMe config file
|
|
.ycm_extra_conf.py
|
|
|
|
# Files generated when a patch is rejected
|
|
*.orig
|
|
*.rej
|
|
|
|
# Files generated by ctags
|
|
CTAGS
|
|
GTAGS
|
|
GRTAGS
|
|
GSYMS
|
|
GPATH
|
|
tags
|
|
TAGS
|
|
|
|
|
|
# ccls file
|
|
.ccls-cache/
|