mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
A clean init for Caffe2, removing my earlier hacky
commits.
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
.DS_Store
|
||||
*.pyc
|
||||
gen*/
|
30
LICENSE
Normal file
30
LICENSE
Normal file
@ -0,0 +1,30 @@
|
||||
Copyright (c) 2015 Yangqing Jia
|
||||
All Rights Reserved.
|
||||
|
||||
== LICENSE ==
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
== DECLARATION ==
|
||||
|
||||
Some parts of the caffe2 code is derived from the original Caffe code, which is
|
||||
created by Yangqing Jia and is now a BSD-licensed open-source project. The Caffe
|
||||
license is attached as LICENSE.caffe.
|
46
LICENSE.caffe
Normal file
46
LICENSE.caffe
Normal file
@ -0,0 +1,46 @@
|
||||
*** begin Caffe license ***
|
||||
COPYRIGHT
|
||||
|
||||
All contributions by the University of California:
|
||||
Copyright (c) 2014, The Regents of the University of California (Regents)
|
||||
All rights reserved.
|
||||
|
||||
All other contributions:
|
||||
Copyright (c) 2014, the respective contributors
|
||||
All rights reserved.
|
||||
|
||||
Caffe uses a shared copyright model: each contributor holds copyright over
|
||||
their contributions to Caffe. The project versioning records all such
|
||||
contribution and copyright details. If a contributor wants to further mark
|
||||
their specific copyright on a particular contribution, they should indicate
|
||||
their copyright solely in the commit message of the change when it is
|
||||
committed.
|
||||
|
||||
LICENSE
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
CONTRIBUTION AGREEMENT
|
||||
|
||||
By contributing to the BVLC/caffe repository through pull-request, comment,
|
||||
or otherwise, the contributor releases their content to the
|
||||
license and copyright terms herein.
|
||||
*** end Caffe license ***
|
21
Makefile
Normal file
21
Makefile
Normal file
@ -0,0 +1,21 @@
|
||||
# This makefile does nothing but delegating the actual compilation to build.py.
|
||||
|
||||
all:
|
||||
@python brewery.py build
|
||||
|
||||
clean:
|
||||
@python brewery.py clean
|
||||
|
||||
reallyclean:
|
||||
@python brewery.py reallyclean
|
||||
|
||||
test:
|
||||
@python brewery.py test
|
||||
|
||||
lint:
|
||||
@find caffe2 -type f -exec python cpplint.py {} \;
|
||||
|
||||
linecount:
|
||||
@cloc --read-lang-def=caffe.cloc caffe2 pycaffe2 || \
|
||||
echo "Cloc is not available on the machine. You can install cloc with " && \
|
||||
echo " sudo apt-get install cloc"
|
16
README.md
Normal file
16
README.md
Normal file
@ -0,0 +1,16 @@
|
||||
If you are not Yangqing and you don't know what this repository is, you may have
|
||||
stumbled upon it with some links or forked repositories in the wild. Please, let
|
||||
me know since I want to make the visibility of this library as small as possible
|
||||
for now.
|
||||
|
||||
Yangqing
|
||||
(me@daggerfs.com)
|
||||
|
||||
# Caffe2
|
||||
|
||||
Caffe2 is a deep learning framework made with expression, speed, and modularity in mind. It is an experimental refactoring of Caffe.
|
||||
|
||||
## License and Citation
|
||||
|
||||
Caffe2 is released under the [BSD 2-Clause license](https://github.com/Yangqing/caffe2/blob/master/LICENSE).
|
||||
|
661
brewery.py
Normal file
661
brewery.py
Normal file
@ -0,0 +1,661 @@
|
||||
|
||||
import cPickle as pickle
|
||||
from collections import defaultdict
|
||||
import multiprocessing
|
||||
import glob
|
||||
import hashlib
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
from build_env import Env
|
||||
|
||||
class Colors(object):
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
|
||||
def BuildDebug(message, *args):
|
||||
# Note(Yangqing): if you want to know detailed message about the build,
|
||||
# uncomment the following line.
|
||||
print Colors.OKBLUE + 'DEBUG:', message % args, Colors.ENDC
|
||||
return
|
||||
|
||||
def BuildLog(message, *args):
|
||||
print Colors.OKGREEN + 'LOG:', message % args, Colors.ENDC
|
||||
|
||||
def BuildWarning(message, *args):
|
||||
print Colors.WARNING + 'WARNING:', message % args, Colors.ENDC
|
||||
|
||||
def BuildFatal(message, *args):
|
||||
print Colors.FAIL + 'FATAL:', message % args, Colors.ENDC
|
||||
print Colors.FAIL + 'Build exiting.' + Colors.ENDC
|
||||
Brewery.Finalize()
|
||||
sys.exit(1)
|
||||
|
||||
def BuildFatalIf(command, message, *args):
|
||||
if command:
|
||||
BuildFatal(message, *args)
|
||||
|
||||
_single_command_env = os.environ
|
||||
if 'PYTHONPATH' not in _single_command_env:
|
||||
_single_command_env['PYTHONPATH'] = ''
|
||||
_single_command_env['PYTHONPATH'] = (
|
||||
Env.GENDIR + ':' + _single_command_env['PYTHONPATH'])
|
||||
|
||||
def RunSingleCommand(command):
|
||||
BuildDebug(command)
|
||||
try:
|
||||
proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT, env=_single_command_env)
|
||||
stdout, _ = proc.communicate()
|
||||
if proc.returncode:
|
||||
print stdout
|
||||
return proc.returncode
|
||||
except: # all exceptions caught here.
|
||||
e = sys.exc_info()[0]
|
||||
return str(e)
|
||||
|
||||
def Glob(patterns):
|
||||
"""Globs all files with the given patterns, relative to the path of the BREW
|
||||
file."""
|
||||
files = []
|
||||
if type(patterns) is str:
|
||||
patterns = [patterns]
|
||||
for pattern in patterns:
|
||||
full_pattern = os.path.join(Brewery.CWD, pattern)
|
||||
files += glob.glob(full_pattern)
|
||||
prefix_len = len(Brewery.CWD) + 1
|
||||
return [f[prefix_len:] for f in files if os.path.isfile(f)]
|
||||
|
||||
def RectifyFileName(name):
|
||||
"""Rectifies a build file name to its absolute name."""
|
||||
if name.startswith("//"):
|
||||
# Simply replace the "//" with the root folder.
|
||||
out_name = name[2:]
|
||||
else:
|
||||
# Add the current working directory.
|
||||
out_name = os.path.join(Brewery.CWD, name)
|
||||
# check if the name exists.
|
||||
BuildFatalIf(not os.path.exists(out_name), 'Cannot find file %s' % out_name)
|
||||
return out_name
|
||||
|
||||
def RectifyFileNames(names):
|
||||
return [RectifyFileName(n) for n in sorted(names)]
|
||||
|
||||
def RectifyTarget(name):
|
||||
"""Rectifies a build target name."""
|
||||
if name.startswith("//"):
|
||||
return name
|
||||
elif name.startswith(":"):
|
||||
return Brewery.TARGET_PREFIX + name
|
||||
else:
|
||||
if Brewery.TARGET_PREFIX == '//':
|
||||
return Brewery.TARGET_PREFIX + name
|
||||
return Brewery.TARGET_PREFIX + ":" + name
|
||||
|
||||
def RectifyTargets(names):
|
||||
return [RectifyTarget(n) for n in sorted(names)]
|
||||
|
||||
def MakeGenDirs(rectified_srcs):
|
||||
for src in rectified_srcs:
|
||||
dst = os.path.join(Env.GENDIR, src)
|
||||
try:
|
||||
os.makedirs(os.path.dirname(dst))
|
||||
except OSError as e:
|
||||
pass
|
||||
|
||||
def CopyToGenDir(rectified_srcs):
|
||||
MakeGenDirs(rectified_srcs)
|
||||
for src in rectified_srcs:
|
||||
shutil.copyfile(src, GenFilename(src))
|
||||
|
||||
def GenFilename(name, new_ext=None, original_ext=None):
|
||||
if new_ext:
|
||||
if original_ext:
|
||||
new_name = name[:name.rfind(original_ext)] + new_ext
|
||||
else:
|
||||
new_name = name[:name.rfind('.') + 1] + new_ext
|
||||
else:
|
||||
new_name = name
|
||||
return os.path.join(Env.GENDIR, new_name)
|
||||
|
||||
def MergeOrderedObjs(dep_lists):
|
||||
added = set()
|
||||
output = []
|
||||
for dep_list in dep_lists:
|
||||
for item in dep_list[::-1]:
|
||||
if item not in added:
|
||||
added.add(item)
|
||||
output.insert(0, item)
|
||||
return output
|
||||
|
||||
class Brewery(object):
|
||||
# Targets store the dictionary from the target name to the build objects.
|
||||
_targets = dict()
|
||||
# Success stores whether a target is successfully built.
|
||||
_success = defaultdict(bool)
|
||||
# deps_map is a dictionary mapping each target to its dependents.
|
||||
_deps_map = dict()
|
||||
# signature_map is the map that stores the signatures for build targets.
|
||||
_signatures = defaultdict(str)
|
||||
_signature_filename = 'brewery.signature'
|
||||
# Pool is the compute pool that one can use to run a list of commands in
|
||||
# parallel.
|
||||
Pool = multiprocessing.Pool(Env.CPUS)
|
||||
#Pool = multiprocessing.Pool(1)
|
||||
CWD = ''
|
||||
TARGET_PREFIX = '//'
|
||||
TMPDIR = ''
|
||||
|
||||
def __init__(self):
|
||||
"""Brewery is a singleton and should not be instantiated."""
|
||||
raise NotImplementedError(
|
||||
'Build system error: there shall only be one brewery.')
|
||||
|
||||
@classmethod
|
||||
def InitBrewery(cls):
|
||||
"""Initializes the brewery, e.g. loads the signatures currently built."""
|
||||
try:
|
||||
os.makedirs(Env.GENDIR)
|
||||
except OSError as e:
|
||||
pass
|
||||
cls.TMPDIR = tempfile.mkdtemp()
|
||||
if os.path.exists(os.path.join(Env.GENDIR, cls._signature_filename)):
|
||||
BuildDebug('Loading the signature file.')
|
||||
cls._signatures = pickle.load(
|
||||
open(os.path.join(Env.GENDIR, cls._signature_filename)))
|
||||
cls.FindAndParseBuildFiles()
|
||||
|
||||
@classmethod
|
||||
def Finalize(cls):
|
||||
"""Finalizes the brew process."""
|
||||
if os.path.exists(Env.GENDIR):
|
||||
BuildDebug('Saving the signature file.')
|
||||
pickle.dump(cls._signatures,
|
||||
open(os.path.join(Env.GENDIR, cls._signature_filename), 'w'))
|
||||
else:
|
||||
BuildDebug('No gendir present. Exiting.')
|
||||
shutil.rmtree(cls.TMPDIR)
|
||||
|
||||
@classmethod
|
||||
def Get(cls, name):
|
||||
return cls._targets[name]
|
||||
|
||||
@classmethod
|
||||
def FindAndParseBuildFiles(cls):
|
||||
"""Find and parse all the BREW files in the subfolders."""
|
||||
build_files = [os.path.join(d[2:], f)
|
||||
for (d, _, files) in os.walk('.') if not d.startswith(Env.GENDIR)
|
||||
for f in files if f.endswith('BREW')]
|
||||
for build_file in build_files:
|
||||
# Set the current working directory of the environment, and parse the build
|
||||
# file.
|
||||
BuildDebug("Parsing %s" % build_file)
|
||||
cls.SetCwd(os.path.dirname(build_file))
|
||||
execfile(build_file)
|
||||
cls.SetCwd('')
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def SetCwd(cls, cwd):
|
||||
if cwd and not os.path.isdir(cwd):
|
||||
# cwd should either be empty, or is a directory.
|
||||
raise RuntimeError('Setting an invalid cwd: %s' % cwd)
|
||||
cls.CWD = cwd
|
||||
cls.TARGET_PREFIX = '//' + cwd
|
||||
|
||||
@classmethod
|
||||
def RunInParallel(cls, commands):
|
||||
if any(cls.Pool.map(RunSingleCommand, commands)):
|
||||
BuildWarning('Command failed.')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def Register(cls, name, target):
|
||||
BuildFatalIf(name in cls._targets,
|
||||
"%s already in build target.", name)
|
||||
BuildDebug("Registered build target %s, deps %s", name, str(target.deps))
|
||||
cls._targets[name] = target
|
||||
cls._deps_map[name] = target.deps
|
||||
|
||||
@classmethod
|
||||
def _GetExecutionChain(cls, targets):
|
||||
"""Gets the execution chain."""
|
||||
# First, verify all dependencies.
|
||||
for t in cls._targets:
|
||||
for d in cls._deps_map[t]:
|
||||
BuildFatalIf(d not in cls._targets,
|
||||
"Dependency %s for target %s does not exist.", d, t)
|
||||
if len(targets) == 0:
|
||||
targets = cls._targets
|
||||
else:
|
||||
# Get all targets that we need to build.
|
||||
seen_targets = set(targets)
|
||||
idx = 0
|
||||
while idx < len(targets):
|
||||
for d in cls._deps_map[targets[idx]]:
|
||||
if d not in seen_targets:
|
||||
seen_targets.add(d)
|
||||
targets.append(d)
|
||||
idx += 1
|
||||
# Now, create a topological order.
|
||||
inverse_deps_map = defaultdict(list)
|
||||
# Get the graph of all targets
|
||||
for t in targets:
|
||||
for d in cls._deps_map[t]:
|
||||
inverse_deps_map[d].append(t)
|
||||
deps_count = dict((t, len(cls._deps_map[t])) for t in targets)
|
||||
#BuildDebug("deps count: %s", str(deps_count))
|
||||
frontier = set(t for t in deps_count if deps_count[t] == 0)
|
||||
build_order = []
|
||||
while frontier:
|
||||
current = frontier.pop()
|
||||
#BuildDebug("processing %s", current)
|
||||
build_order.append(current)
|
||||
for t in inverse_deps_map[current]:
|
||||
deps_count[t] -= 1
|
||||
if deps_count[t] == 0:
|
||||
#BuildDebug('Add to frontier: %s', t)
|
||||
frontier.add(t)
|
||||
# If this does not cover all targets, the graph is not a DAG.
|
||||
BuildFatalIf(len(build_order) != len(targets),
|
||||
"There are cycles in the dependency graph!")
|
||||
BuildDebug('Build order: %s', str(build_order))
|
||||
return build_order
|
||||
|
||||
@classmethod
|
||||
def Signature(cls, target):
|
||||
# Returns the builtsignature of the current target.
|
||||
return cls._signatures[target]
|
||||
|
||||
@classmethod
|
||||
def Success(cls, target):
|
||||
return cls._success[target]
|
||||
|
||||
@classmethod
|
||||
def ClearSignature(cls, including_third_party=False):
|
||||
if including_third_party:
|
||||
cls._signatures = defaultdict(str)
|
||||
else:
|
||||
keys = cls._signatures.keys()
|
||||
for k in keys:
|
||||
if not k.startswith('//third_party'):
|
||||
del cls._signatures[k]
|
||||
|
||||
@classmethod
|
||||
def Build(cls, targets):
|
||||
"""Build all the targets, using their topological order."""
|
||||
BuildDebug("Start building.")
|
||||
build_order = cls._GetExecutionChain(targets)
|
||||
for t in build_order:
|
||||
BuildLog("Building %s", t)
|
||||
cls._success[t], changed, new_signature = (
|
||||
cls._targets[t].SetUpAndBuild(cls._signatures[t]))
|
||||
if cls._success[t]:
|
||||
cls._signatures[t] = new_signature
|
||||
# Finally, print a summary of the build results.
|
||||
succeeded = [key for key in cls._success if cls._success[key]]
|
||||
BuildDebug("Successfully built %d targets." % len(succeeded))
|
||||
#for key in cls._success:
|
||||
# if cls._success[key]:
|
||||
# BuildDebug(key)
|
||||
failed = [key for key in cls._success if not cls._success[key]]
|
||||
if len(failed) > 0:
|
||||
BuildWarning("Failed to build:")
|
||||
for key in failed:
|
||||
BuildWarning(key)
|
||||
|
||||
@classmethod
|
||||
def Draw(cls):
|
||||
import pydot
|
||||
graph = pydot.Dot("brewery", rankdir="LR")
|
||||
nodes = {}
|
||||
node_style = {'shape': 'box', 'color': '#0F9D58', 'style': 'filled',
|
||||
'fontcolor': '#FFFFFF'}
|
||||
for target_name in cls._targets:
|
||||
nodes[target_name] = pydot.Node('"' + target_name + '"', **node_style)
|
||||
graph.add_node(nodes[target_name])
|
||||
for target_name in cls._deps_map:
|
||||
for dep_name in cls._deps_map[target_name]:
|
||||
graph.add_edge(pydot.Edge(nodes[dep_name], nodes[target_name]))
|
||||
graph.write(graph.get_name() + '.dot', format='raw')
|
||||
with open(graph.get_name() + '.pdf', 'w') as fid:
|
||||
subprocess.call(['dot', '-Tpdf', graph.get_name() + '.dot'], stdout=fid)
|
||||
|
||||
class BuildTarget(object):
|
||||
"""A build target that can be executed with the Build() function."""
|
||||
def __init__(self, name, srcs, other_files=[], deps=[]):
|
||||
self.name = RectifyTarget(name)
|
||||
self.srcs = RectifyFileNames(srcs)
|
||||
self.files = sorted(self.srcs + other_files)
|
||||
self.deps = sorted(RectifyTargets(deps))
|
||||
self.command_groups = []
|
||||
Brewery.Register(self.name, self)
|
||||
|
||||
def GetSignature(self):
|
||||
"""Generate the signature of the build object."""
|
||||
src_digest = ''.join([hashlib.sha256(open(f, 'rb').read()).hexdigest()
|
||||
for f in self.files])
|
||||
dep_digest = ''.join([Brewery.Signature(d) for d in self.deps])
|
||||
return hashlib.sha256(src_digest + dep_digest).hexdigest()
|
||||
|
||||
def SetUpAndBuild(self, built_signature):
|
||||
self.SetUp()
|
||||
signature = self.GetSignature()
|
||||
if not all(Brewery.Success(d) for d in self.deps):
|
||||
BuildWarning("Not all dependencies have succeeded. Skipping build.")
|
||||
return False, True, signature
|
||||
if signature != built_signature:
|
||||
success = self.Build()
|
||||
return success, True, signature
|
||||
return True, False, signature
|
||||
|
||||
def SetUp(self):
|
||||
"""Set up the build object's variables.
|
||||
|
||||
This will always run even if the target has already been built. Anything
|
||||
that further dependencies will need should be implemented here.
|
||||
|
||||
If your target just emits a set of shell commands, in SetUp() you can set
|
||||
self.command_groups and use the default Build function, which basically
|
||||
sends the command groups to a execution pool.
|
||||
"""
|
||||
BuildFatal('Not implemented.')
|
||||
|
||||
def Build(self):
|
||||
"""Builds the target."""
|
||||
success = True
|
||||
for command_group in self.command_groups:
|
||||
success &= Brewery.RunInParallel(command_group)
|
||||
if not success:
|
||||
return False
|
||||
return True
|
||||
|
||||
class proto_library(BuildTarget):
|
||||
"""Builds a protobuffer library.
|
||||
|
||||
A protobuffer library builds a set of protobuffer source files to its cc and
|
||||
python source files, as well as the static library named "libname.a".
|
||||
"""
|
||||
def __init__(self, name, srcs, deps=[]):
|
||||
BuildTarget.__init__(self, name, srcs, deps=deps)
|
||||
|
||||
def SetUp(self):
|
||||
MakeGenDirs(self.srcs)
|
||||
# proto_library depends on protoc, so it would need to add that to the
|
||||
# includes folder.
|
||||
pbcc_files = [GenFilename(filename, 'pb.cc') for filename in self.srcs]
|
||||
pbo_files = [GenFilename(filename, 'pb.o') for filename in self.srcs]
|
||||
proto_commands = [
|
||||
' '.join([Env.PROTOC_BINARY, '-I.', '--cpp_out', Env.GENDIR,
|
||||
'--python_out', Env.GENDIR, filename])
|
||||
for filename in self.srcs]
|
||||
cpp_commands = [
|
||||
' '.join([Env.CC, Env.CFLAGS, Env.INCLUDES, '-c', pbcc, '-o', pbo])
|
||||
for pbcc, pbo in zip(pbcc_files, pbo_files)]
|
||||
self.cc_obj_files = pbo_files
|
||||
self.cc_obj_files += MergeOrderedObjs(
|
||||
[Brewery.Get(dep).cc_obj_files for dep in self.deps])
|
||||
self.command_groups = [proto_commands, cpp_commands]
|
||||
|
||||
|
||||
class cc_target(BuildTarget):
|
||||
def __init__(self, name, srcs, hdrs=[], deps=[], cflags=[], external_libs=[],
|
||||
build_binary=False, is_test=False, whole_archive=False,
|
||||
shared=False):
|
||||
self.hdrs = RectifyFileNames(hdrs)
|
||||
self.cflags = cflags
|
||||
self.external_libs = [
|
||||
'-l' + s if not s.startswith('-') else s for s in external_libs]
|
||||
self.build_binary = build_binary
|
||||
self.is_test = is_test
|
||||
self.whole_archive = whole_archive
|
||||
self.shared = shared
|
||||
BuildTarget.__init__(self, name, srcs, self.hdrs, deps=deps)
|
||||
|
||||
def OutputName(self, is_library=False, is_shared=False):
|
||||
name_split = self.name.split(':')
|
||||
if is_library:
|
||||
if is_shared:
|
||||
return os.path.join(
|
||||
Env.GENDIR, name_split[0][2:],
|
||||
'lib' + name_split[1] + Env.SHARED_LIB_EXT)
|
||||
else:
|
||||
return os.path.join(
|
||||
Env.GENDIR, name_split[0][2:], 'lib' + name_split[1] + '.a')
|
||||
else:
|
||||
return os.path.join(Env.GENDIR, name_split[0][2:], name_split[1])
|
||||
|
||||
def SetUp(self):
|
||||
MakeGenDirs(self.srcs)
|
||||
CopyToGenDir(self.hdrs)
|
||||
obj_files = [GenFilename(src, 'o') for src in self.srcs]
|
||||
cpp_commands = [
|
||||
' '.join([Env.CC, Env.CFLAGS, Env.INCLUDES, ' '.join(self.cflags),
|
||||
'-c', src, '-o', obj])
|
||||
for src, obj in zip(self.srcs, obj_files)]
|
||||
archive_file = self.OutputName(is_library=True)
|
||||
# Create the archive
|
||||
link_commands = [
|
||||
' '.join([Env.LINK_STATIC, archive_file] + obj_files)]
|
||||
if self.whole_archive:
|
||||
archive_file = Env.WHOLE_ARCHIVE_TEMPLATE % archive_file
|
||||
self.cc_obj_files = MergeOrderedObjs(
|
||||
[Brewery.Get(dep).cc_obj_files for dep in self.deps] +
|
||||
[self.external_libs])
|
||||
self.cc_obj_files.insert(0, archive_file)
|
||||
if self.build_binary:
|
||||
link_binary_commands = [
|
||||
' '.join([Env.LINK_BINARY, self.OutputName()] + self.cc_obj_files +
|
||||
[Env.LINKFLAGS])]
|
||||
self.command_groups = [cpp_commands, link_commands, link_binary_commands]
|
||||
elif self.shared:
|
||||
link_shared_commands = [' '.join(
|
||||
[Env.LINK_SHARED, self.OutputName(is_library=True, is_shared=True)]
|
||||
+ obj_files + self.cc_obj_files[1:] + [Env.LINKFLAGS])]
|
||||
self.command_groups = [cpp_commands, link_commands, link_shared_commands]
|
||||
else:
|
||||
self.command_groups = [cpp_commands, link_commands]
|
||||
if self.is_test:
|
||||
# Add test command
|
||||
self.command_groups.append([
|
||||
' '.join([self.OutputName(), '--caffe_test_root',
|
||||
os.path.abspath(Env.GENDIR),
|
||||
'--gtest_filter=-*.LARGE_*'])])
|
||||
|
||||
|
||||
def cc_library(*args, **kwargs):
|
||||
return cc_target(*args, **kwargs)
|
||||
|
||||
def cc_binary(*args, **kwargs):
|
||||
return cc_target(*args, build_binary=True, **kwargs)
|
||||
|
||||
def cc_test(*args, **kwargs):
|
||||
if 'cflags' not in kwargs:
|
||||
kwargs['cflags'] = []
|
||||
kwargs['cflags'].append("-DGTEST_USE_OWN_TR1_TUPLE=1")
|
||||
return cc_target(
|
||||
*args, build_binary=True, is_test=True, whole_archive=True, **kwargs)
|
||||
|
||||
|
||||
class cuda_library(BuildTarget):
|
||||
def __init__(self, name, srcs, hdrs=[], deps=[], cflags=[],
|
||||
whole_archive=False):
|
||||
self.hdrs = RectifyFileNames(hdrs)
|
||||
self.cflags = cflags
|
||||
self.whole_archive = whole_archive
|
||||
BuildTarget.__init__(self, name, srcs, self.hdrs, deps=deps)
|
||||
|
||||
def OutputName(self, is_library=False):
|
||||
name_split = self.name.split(':')
|
||||
if is_library:
|
||||
return os.path.join(
|
||||
Env.GENDIR, name_split[0][2:], 'lib' + name_split[1] + '.a')
|
||||
else:
|
||||
return os.path.join(Env.GENDIR, name_split[0][2:], name_split[1])
|
||||
|
||||
def SetUp(self):
|
||||
MakeGenDirs(self.srcs)
|
||||
CopyToGenDir(self.hdrs)
|
||||
obj_files = [GenFilename(src, 'cuo') for src in self.srcs]
|
||||
cpp_commands = [
|
||||
' '.join([Env.NVCC, Env.NVCC_CFLAGS, Env.INCLUDES,
|
||||
' '.join(self.cflags), '-c', src, '-o', obj])
|
||||
for src, obj in zip(self.srcs, obj_files)]
|
||||
archive_file = self.OutputName(is_library=True)
|
||||
# Create the archive
|
||||
link_commands = [
|
||||
' '.join([Env.LINK_STATIC, archive_file]
|
||||
+ obj_files)]
|
||||
if self.whole_archive:
|
||||
archive_file = Env.WHOLE_ARCHIVE_TEMPLATE % archive_file
|
||||
self.cc_obj_files = MergeOrderedObjs(
|
||||
[Brewery.Get(dep).cc_obj_files for dep in self.deps])
|
||||
# We will need to add nvidia link targets as well
|
||||
self.cc_obj_files.append(Env.NVCC_LINKS)
|
||||
self.cc_obj_files.insert(0, archive_file)
|
||||
self.command_groups = [cpp_commands, link_commands]
|
||||
|
||||
|
||||
class filegroup(BuildTarget):
|
||||
def __init__(self, name, srcs, deps=[]):
|
||||
self.cc_obj_files = []
|
||||
BuildTarget.__init__(self, name, srcs, deps=deps)
|
||||
|
||||
def SetUp(self):
|
||||
CopyToGenDir(self.srcs)
|
||||
|
||||
def py_library(*args, **kwargs):
|
||||
return filegroup(*args, **kwargs)
|
||||
|
||||
def cc_headers(*args, **kwargs):
|
||||
return filegroup(*args, **kwargs)
|
||||
|
||||
class py_test(BuildTarget):
|
||||
def __init__(self, name, srcs, deps=[]):
|
||||
self.cc_obj_files = []
|
||||
BuildTarget.__init__(self, name, srcs, deps=deps)
|
||||
|
||||
def SetUp(self):
|
||||
CopyToGenDir(self.srcs)
|
||||
if len(self.srcs) > 1:
|
||||
raise RuntimeError('py_test should only take one python source file.')
|
||||
# Add test command
|
||||
self.command_groups = [
|
||||
['python %s' % GenFilename(self.srcs[0])]]
|
||||
|
||||
|
||||
class cc_thirdparty_target(BuildTarget):
|
||||
"""thirdparty_target should only be used in third_party to build things with
|
||||
a pre-defined script. Note that this will also set the following values:
|
||||
cc_includes: the include folder needed for compiling dependent targets.
|
||||
cc_obj_files: the object files produced by the target.
|
||||
|
||||
When building, this script will copy all stuff to a temporary directory, so
|
||||
that the original source tree is not affected.
|
||||
"""
|
||||
def __init__(self, name, srcs, commands, cc_obj_files, deps=[]):
|
||||
self.cwd = Brewery.CWD
|
||||
self.build_dir = os.path.join(Brewery.TMPDIR, Brewery.CWD)
|
||||
self.commands = [
|
||||
'SRCDIR=%s' % self.build_dir,
|
||||
'DSTDIR=%s' % os.path.join(os.path.abspath(Env.GENDIR), "third_party"),
|
||||
'CPUS=%d' % Env.CPUS,
|
||||
'cd %s' % self.build_dir,
|
||||
] + commands
|
||||
self.cc_obj_files = [
|
||||
os.path.join(Env.GENDIR, "third_party", f)
|
||||
for f in cc_obj_files if not f.startswith('-l')] + [
|
||||
f for f in cc_obj_files if f.startswith('-l')]
|
||||
BuildTarget.__init__(self, name, srcs, deps=deps)
|
||||
|
||||
def SetUp(self):
|
||||
self.cc_obj_files += MergeOrderedObjs(
|
||||
[Brewery.Get(dep).cc_obj_files for dep in self.deps])
|
||||
|
||||
def Build(self):
|
||||
# First, copy all things to the temp directory
|
||||
shutil.copytree(self.cwd, self.build_dir)
|
||||
BuildDebug("script: %s" % str(self.commands))
|
||||
|
||||
proc = subprocess.Popen(' && '.join(self.commands), stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT, shell=True)
|
||||
stdout, _ = proc.communicate()
|
||||
if proc.returncode:
|
||||
BuildWarning("Script failed.")
|
||||
print stdout
|
||||
return False
|
||||
return True
|
||||
|
||||
class shell_script(BuildTarget):
|
||||
"""Shell scripts are directly run to generate data files. It is run from the
|
||||
root of the gendir.
|
||||
"""
|
||||
def __init__(self, name, srcs, commands, deps=[]):
|
||||
self.cwd = Brewery.CWD
|
||||
self.commands = [
|
||||
'GENDIR=%s' % os.path.abspath(Env.GENDIR),
|
||||
'CWD=%s' % self.cwd,
|
||||
'cd %s' % os.path.abspath(Env.GENDIR),
|
||||
] + commands
|
||||
BuildTarget.__init__(self, name, srcs, deps=deps)
|
||||
|
||||
def SetUp(self):
|
||||
"""A shell script should produce no cc_obj_files. This is here just so that
|
||||
a cc object can use shell_script as a data dependency.
|
||||
"""
|
||||
CopyToGenDir(self.srcs)
|
||||
self.cc_obj_files = []
|
||||
|
||||
def Build(self):
|
||||
BuildDebug("script: %s" % str(self.commands))
|
||||
proc = subprocess.Popen(' && '.join(self.commands), stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT, shell=True)
|
||||
stdout, _ = proc.communicate()
|
||||
if proc.returncode:
|
||||
BuildWarning("Script failed.")
|
||||
print stdout
|
||||
return False
|
||||
return True
|
||||
|
||||
################################################################################
|
||||
# Below are functions during the main entry.
|
||||
################################################################################
|
||||
|
||||
def main(argv):
|
||||
"""The main entry of the build script."""
|
||||
BuildLog('Welcome to Caffe2. Running command: %s' % str(argv))
|
||||
Brewery.InitBrewery()
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == 'clean':
|
||||
for folder in ['caffe2', 'pycaffe2']:
|
||||
os.system('rm -rf ' + os.path.join(Env.GENDIR, folder))
|
||||
Brewery.ClearSignature()
|
||||
elif sys.argv[1] == 'reallyclean':
|
||||
os.system('rm -rf ' + Env.GENDIR)
|
||||
BuildLog('Finished cleaning.')
|
||||
elif sys.argv[1] == 'build':
|
||||
# Build all targets.
|
||||
targets = sys.argv[2:]
|
||||
Brewery.Build(targets)
|
||||
elif sys.argv[1] == 'draw':
|
||||
# Draws the dependency graph.
|
||||
Brewery.Draw()
|
||||
else:
|
||||
BuildFatal('Unknown command: %s' % sys.argv[1])
|
||||
else:
|
||||
BuildLog('Finished parsing all build files without error.')
|
||||
Brewery.Finalize()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
156
build_env.py
Normal file
156
build_env.py
Normal file
@ -0,0 +1,156 @@
|
||||
""" build_env defines the general environment that we use to build.
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def _GetSubprocessOutput(commands):
|
||||
try:
|
||||
proc = subprocess.Popen(commands, stdout=subprocess.PIPE)
|
||||
out, err = proc.communicate()
|
||||
except OSError as err:
|
||||
print 'Cannot run command', commands, '. Return empty output.'
|
||||
return ''
|
||||
return out.strip()
|
||||
|
||||
def _GetCompilerType(CC):
|
||||
# determine compiler type.
|
||||
_COMPILER_VERSION_STR = _GetSubprocessOutput([CC, '--version'])
|
||||
if 'clang' in _COMPILER_VERSION_STR:
|
||||
return 'clang'
|
||||
elif ('g++' in _COMPILER_VERSION_STR or
|
||||
'Free Software Foundation' in _COMPILER_VERSION_STR):
|
||||
return 'g++'
|
||||
else:
|
||||
raise RuntimeError('Cannot determine C++ compiler type.')
|
||||
|
||||
|
||||
class Env(object):
|
||||
"""Env is the class that stores all the build variables."""
|
||||
# Define the compile binary commands.
|
||||
CC = 'c++'
|
||||
MPICC = 'mpic++'
|
||||
LINK_BINARY = CC + ' -o'
|
||||
LINK_SHARED = CC + ' -shared -o'
|
||||
LINK_STATIC = 'ar rcs'
|
||||
# Protobuf constants
|
||||
PROTOC_BINARY = "protoc"
|
||||
|
||||
if sys.platform == 'darwin':
|
||||
# For some reason, python on mac still recognizes the .so extensions...
|
||||
# So we will use .so here still.
|
||||
SHARED_LIB_EXT = '.so'
|
||||
elif sys.platform.startswith('linux'):
|
||||
SHARED_LIB_EXT = '.so'
|
||||
else:
|
||||
raise RuntimeError('Unknown system platform.')
|
||||
|
||||
COMPILER_TYPE = _GetCompilerType(CC)
|
||||
|
||||
#determine mpi include and mpi link flags.
|
||||
MPI_INCLUDES = _GetSubprocessOutput([MPICC, '--showme:incdirs']).split(' ')
|
||||
MPI_LIBDIRS = _GetSubprocessOutput([MPICC, '--showme:libdirs']).split(' ')
|
||||
MPI_LIBS = _GetSubprocessOutput([MPICC, '--showme:libs']).split(' ')
|
||||
if len(MPI_INCLUDES) == 1 and MPI_INCLUDES[0] == '':
|
||||
print ('MPI not found, so some libraries and binaries that use MPI will '
|
||||
'not compile correctly. If you would like to use those, you can '
|
||||
'install MPI on your machine. The easiest way to install on ubuntu '
|
||||
'is via apt-get, and on mac via homebrew.')
|
||||
# Set all values above to empty lists, so at least others will compile.
|
||||
MPI_INCLUDES = []
|
||||
MPI_LIBDIRS = []
|
||||
MPI_LIBS = []
|
||||
|
||||
# Determine the CUDA directory.
|
||||
if os.path.exists('/usr/local/cuda'):
|
||||
CUDA_DIR = '/usr/local/cuda'
|
||||
else:
|
||||
raise RuntimeError('Cannot find Cuda directory.')
|
||||
NVCC = os.path.join(CUDA_DIR, 'bin', 'nvcc')
|
||||
NVCC_INCLUDES = [os.path.join(CUDA_DIR, 'include')]
|
||||
|
||||
# Determine the NVCC link flags.
|
||||
if COMPILER_TYPE == 'clang':
|
||||
NVCC_LINKS = ('-rpath %s -L%s'
|
||||
% (os.path.join(CUDA_DIR, 'lib'), os.path.join(CUDA_DIR, 'lib')))
|
||||
elif COMPILER_TYPE == 'g++':
|
||||
NVCC_LINKS = ('-Wl,-rpath=%s -L%s'
|
||||
% (os.path.join(CUDA_DIR, 'lib64'), os.path.join(CUDA_DIR, 'lib64')))
|
||||
else:
|
||||
raise RuntimeError('Unknown compiler type to set nvcc link flags.')
|
||||
NVCC_LINKS += ' -l' + ' -l'.join([
|
||||
'cublas_static', 'curand_static', 'cuda', 'cudart_static', 'culibos'])
|
||||
if sys.platform.startswith('linux'):
|
||||
NVCC_LINKS += ' -l' + ' -l'.join(['rt', 'dl'])
|
||||
|
||||
# NVCC C flags.
|
||||
NVCC_CFLAGS = ' '.join([
|
||||
# add cflags here.
|
||||
'-Xcompiler -fPIC',
|
||||
'-O2',
|
||||
'-std=c++11',
|
||||
'-gencode=arch=compute_30,code=sm_30',
|
||||
])
|
||||
|
||||
# Determine how the compiler deals with whole archives.
|
||||
if COMPILER_TYPE == 'clang':
|
||||
WHOLE_ARCHIVE_TEMPLATE = '-Wl,-force_load,%s'
|
||||
elif COMPILER_TYPE == 'g++':
|
||||
WHOLE_ARCHIVE_TEMPLATE = '-Wl,--whole-archive %s -Wl,--no-whole-archive'
|
||||
else:
|
||||
raise RuntimeError('Unknown compiler type to set whole-archive template.')
|
||||
|
||||
# General cflags that should be added in all cc arguments.
|
||||
CFLAGS = ' '.join([
|
||||
# add cflags here.
|
||||
'-fPIC',
|
||||
'-DPIC',
|
||||
#'-O0',
|
||||
'-O2',
|
||||
#'-pg',
|
||||
'-DNDEBUG',
|
||||
'-msse',
|
||||
'-mavx',
|
||||
'-ffast-math',
|
||||
'-std=c++11',
|
||||
'-W',
|
||||
'-Wall',
|
||||
'-Wno-unused-parameter',
|
||||
'-Wno-sign-compare',
|
||||
#'-Wno-c++11-extensions',
|
||||
])
|
||||
|
||||
GENDIR = 'gen'
|
||||
# General include folders.
|
||||
INCLUDES = NVCC_INCLUDES + MPI_INCLUDES + [
|
||||
GENDIR,
|
||||
os.path.join(GENDIR, 'third_party'),
|
||||
os.path.join(GENDIR, 'third_party/include'),
|
||||
'/usr/local/include',
|
||||
]
|
||||
INCLUDES = ' '.join(['-I' + s for s in INCLUDES])
|
||||
# Python
|
||||
INCLUDES += ' ' + _GetSubprocessOutput(['python-config', '--includes'])
|
||||
# General lib folders.
|
||||
LIBDIRS = MPI_LIBDIRS + [
|
||||
'/usr/local/lib',
|
||||
]
|
||||
LIBDIRS = ' '.join(['-L' + s for s in LIBDIRS])
|
||||
# General link flags for binary targets
|
||||
LIBS = []
|
||||
LIBS = ' '.join(['-l' + s for s in LIBS])
|
||||
LINKFLAGS = ' '.join([
|
||||
# Add link flags here
|
||||
'-pthread',
|
||||
#'-pg',
|
||||
]) + ' ' + LIBDIRS + ' ' + LIBS
|
||||
PYTHON_LIBS = [_GetSubprocessOutput(['python-config', '--ldflags'])]
|
||||
|
||||
CPUS = multiprocessing.cpu_count()
|
||||
|
||||
def __init__(self):
|
||||
"""ENV is a singleton and should not be instantiated."""
|
||||
raise NotImplementedError(
|
||||
'Build system error: ENV should not be instantiated.')
|
53
caffe.cloc
Normal file
53
caffe.cloc
Normal file
@ -0,0 +1,53 @@
|
||||
Bourne Shell
|
||||
filter remove_matches ^\s*#
|
||||
filter remove_inline #.*$
|
||||
extension sh
|
||||
script_exe sh
|
||||
C
|
||||
filter remove_matches ^\s*//
|
||||
filter call_regexp_common C
|
||||
filter remove_inline //.*$
|
||||
extension c
|
||||
extension ec
|
||||
extension pgc
|
||||
C++
|
||||
filter remove_matches ^\s*//
|
||||
filter remove_inline //.*$
|
||||
filter call_regexp_common C
|
||||
extension C
|
||||
extension cc
|
||||
extension cpp
|
||||
extension cxx
|
||||
extension pcc
|
||||
C/C++ Header
|
||||
filter remove_matches ^\s*//
|
||||
filter call_regexp_common C
|
||||
filter remove_inline //.*$
|
||||
extension H
|
||||
extension h
|
||||
extension hh
|
||||
extension hpp
|
||||
CUDA
|
||||
filter remove_matches ^\s*//
|
||||
filter remove_inline //.*$
|
||||
filter call_regexp_common C
|
||||
extension cu
|
||||
Python
|
||||
filter remove_matches ^\s*#
|
||||
filter docstring_to_C
|
||||
filter call_regexp_common C
|
||||
filter remove_inline #.*$
|
||||
extension py
|
||||
make
|
||||
filter remove_matches ^\s*#
|
||||
filter remove_inline #.*$
|
||||
extension Gnumakefile
|
||||
extension Makefile
|
||||
extension am
|
||||
extension gnumakefile
|
||||
extension makefile
|
||||
filename Gnumakefile
|
||||
filename Makefile
|
||||
filename gnumakefile
|
||||
filename makefile
|
||||
script_exe make
|
4
caffe/BREW
Normal file
4
caffe/BREW
Normal file
@ -0,0 +1,4 @@
|
||||
filegroup(
|
||||
name = "caffe_python",
|
||||
srcs = ["__init__.py"],
|
||||
)
|
0
caffe/__init__.py
Normal file
0
caffe/__init__.py
Normal file
17
caffe/proto/BREW
Normal file
17
caffe/proto/BREW
Normal file
@ -0,0 +1,17 @@
|
||||
# Build file for the old caffe protocol buffers.
|
||||
|
||||
proto_library(
|
||||
name = 'caffe_proto',
|
||||
srcs = ['caffe.proto'],
|
||||
deps = [
|
||||
"//third_party/google:protobuf",
|
||||
]
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "caffe_proto_py",
|
||||
srcs = ["__init__.py"],
|
||||
deps = [
|
||||
"//caffe:caffe_python",
|
||||
]
|
||||
)
|
0
caffe/proto/__init__.py
Normal file
0
caffe/proto/__init__.py
Normal file
967
caffe/proto/caffe.proto
Normal file
967
caffe/proto/caffe.proto
Normal file
@ -0,0 +1,967 @@
|
||||
syntax = "proto2";
|
||||
|
||||
package caffe;
|
||||
|
||||
// Specifies the shape (dimensions) of a Blob.
|
||||
message BlobShape {
|
||||
repeated int64 dim = 1 [packed = true];
|
||||
}
|
||||
|
||||
message BlobProto {
|
||||
optional BlobShape shape = 7;
|
||||
repeated float data = 5 [packed = true];
|
||||
repeated float diff = 6 [packed = true];
|
||||
|
||||
// 4D dimensions -- deprecated. Use "shape" instead.
|
||||
optional int32 num = 1 [default = 0];
|
||||
optional int32 channels = 2 [default = 0];
|
||||
optional int32 height = 3 [default = 0];
|
||||
optional int32 width = 4 [default = 0];
|
||||
}
|
||||
|
||||
// The BlobProtoVector is simply a way to pass multiple blobproto instances
|
||||
// around.
|
||||
message BlobProtoVector {
|
||||
repeated BlobProto blobs = 1;
|
||||
}
|
||||
|
||||
message Datum {
|
||||
optional int32 channels = 1;
|
||||
optional int32 height = 2;
|
||||
optional int32 width = 3;
|
||||
// the actual image data, in bytes
|
||||
optional bytes data = 4;
|
||||
optional int32 label = 5;
|
||||
// Optionally, the datum could also hold float data.
|
||||
repeated float float_data = 6;
|
||||
// If true data contains an encoded image that need to be decoded
|
||||
optional bool encoded = 7 [default = false];
|
||||
}
|
||||
|
||||
message FillerParameter {
|
||||
// The filler type.
|
||||
optional string type = 1 [default = 'constant'];
|
||||
optional float value = 2 [default = 0]; // the value in constant filler
|
||||
optional float min = 3 [default = 0]; // the min value in uniform filler
|
||||
optional float max = 4 [default = 1]; // the max value in uniform filler
|
||||
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
|
||||
optional float std = 6 [default = 1]; // the std value in Gaussian filler
|
||||
// The expected number of non-zero output weights for a given input in
|
||||
// Gaussian filler -- the default -1 means don't perform sparsification.
|
||||
optional int32 sparse = 7 [default = -1];
|
||||
}
|
||||
|
||||
message NetParameter {
|
||||
optional string name = 1; // consider giving the network a name
|
||||
// The input blobs to the network.
|
||||
repeated string input = 3;
|
||||
// The shape of the input blobs.
|
||||
repeated BlobShape input_shape = 8;
|
||||
|
||||
// 4D input dimensions -- deprecated. Use "shape" instead.
|
||||
// If specified, for each input blob there should be four
|
||||
// values specifying the num, channels, height and width of the input blob.
|
||||
// Thus, there should be a total of (4 * #input) numbers.
|
||||
repeated int32 input_dim = 4;
|
||||
|
||||
// Whether the network will force every layer to carry out backward operation.
|
||||
// If set False, then whether to carry out backward is determined
|
||||
// automatically according to the net structure and learning rates.
|
||||
optional bool force_backward = 5 [default = false];
|
||||
// The current "state" of the network, including the phase, level, and stage.
|
||||
// Some layers may be included/excluded depending on this state and the states
|
||||
// specified in the layers' include and exclude fields.
|
||||
optional NetState state = 6;
|
||||
|
||||
// Print debugging information about results while running Net::Forward,
|
||||
// Net::Backward, and Net::Update.
|
||||
optional bool debug_info = 7 [default = false];
|
||||
|
||||
// The layers that make up the net. Each of their configurations, including
|
||||
// connectivity and behavior, is specified as a LayerParameter.
|
||||
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
|
||||
|
||||
// DEPRECATED: use 'layer' instead.
|
||||
repeated V1LayerParameter layers = 2;
|
||||
}
|
||||
|
||||
// NOTE
|
||||
// Update the next available ID when you add a new SolverParameter field.
|
||||
//
|
||||
// SolverParameter next available ID: 36 (last added: clip_gradients)
|
||||
message SolverParameter {
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Specifying the train and test networks
|
||||
//
|
||||
// Exactly one train net must be specified using one of the following fields:
|
||||
// train_net_param, train_net, net_param, net
|
||||
// One or more test nets may be specified using any of the following fields:
|
||||
// test_net_param, test_net, net_param, net
|
||||
// If more than one test net field is specified (e.g., both net and
|
||||
// test_net are specified), they will be evaluated in the field order given
|
||||
// above: (1) test_net_param, (2) test_net, (3) net_param/net.
|
||||
// A test_iter must be specified for each test_net.
|
||||
// A test_level and/or a test_stage may also be specified for each test_net.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Proto filename for the train net, possibly combined with one or more
|
||||
// test nets.
|
||||
optional string net = 24;
|
||||
// Inline train net param, possibly combined with one or more test nets.
|
||||
optional NetParameter net_param = 25;
|
||||
|
||||
optional string train_net = 1; // Proto filename for the train net.
|
||||
repeated string test_net = 2; // Proto filenames for the test nets.
|
||||
optional NetParameter train_net_param = 21; // Inline train net params.
|
||||
repeated NetParameter test_net_param = 22; // Inline test net params.
|
||||
|
||||
// The states for the train/test nets. Must be unspecified or
|
||||
// specified once per net.
|
||||
//
|
||||
// By default, all states will have solver = true;
|
||||
// train_state will have phase = TRAIN,
|
||||
// and all test_state's will have phase = TEST.
|
||||
// Other defaults are set according to the NetState defaults.
|
||||
optional NetState train_state = 26;
|
||||
repeated NetState test_state = 27;
|
||||
|
||||
// The number of iterations for each test net.
|
||||
repeated int32 test_iter = 3;
|
||||
|
||||
// The number of iterations between two testing phases.
|
||||
optional int32 test_interval = 4 [default = 0];
|
||||
optional bool test_compute_loss = 19 [default = false];
|
||||
// If true, run an initial test pass before the first iteration,
|
||||
// ensuring memory availability and printing the starting value of the loss.
|
||||
optional bool test_initialization = 32 [default = true];
|
||||
optional float base_lr = 5; // The base learning rate
|
||||
// the number of iterations between displaying info. If display = 0, no info
|
||||
// will be displayed.
|
||||
optional int32 display = 6;
|
||||
// Display the loss averaged over the last average_loss iterations
|
||||
optional int32 average_loss = 33 [default = 1];
|
||||
optional int32 max_iter = 7; // the maximum number of iterations
|
||||
optional string lr_policy = 8; // The learning rate decay policy.
|
||||
optional float gamma = 9; // The parameter to compute the learning rate.
|
||||
optional float power = 10; // The parameter to compute the learning rate.
|
||||
optional float momentum = 11; // The momentum value.
|
||||
optional float weight_decay = 12; // The weight decay.
|
||||
// regularization types supported: L1 and L2
|
||||
// controlled by weight_decay
|
||||
optional string regularization_type = 29 [default = "L2"];
|
||||
// the stepsize for learning rate policy "step"
|
||||
optional int32 stepsize = 13;
|
||||
// the stepsize for learning rate policy "multistep"
|
||||
repeated int32 stepvalue = 34;
|
||||
|
||||
// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
|
||||
// whenever their actual L2 norm is larger.
|
||||
optional float clip_gradients = 35 [default = -1];
|
||||
|
||||
optional int32 snapshot = 14 [default = 0]; // The snapshot interval
|
||||
optional string snapshot_prefix = 15; // The prefix for the snapshot.
|
||||
// whether to snapshot diff in the results or not. Snapshotting diff will help
|
||||
// debugging but the final protocol buffer size will be much larger.
|
||||
optional bool snapshot_diff = 16 [default = false];
|
||||
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
|
||||
enum SolverMode {
|
||||
CPU = 0;
|
||||
GPU = 1;
|
||||
}
|
||||
optional SolverMode solver_mode = 17 [default = GPU];
|
||||
// the device_id will that be used in GPU mode. Use device_id = 0 in default.
|
||||
optional int32 device_id = 18 [default = 0];
|
||||
// If non-negative, the seed with which the Solver will initialize the Caffe
|
||||
// random number generator -- useful for reproducible results. Otherwise,
|
||||
// (and by default) initialize using a seed derived from the system clock.
|
||||
optional int64 random_seed = 20 [default = -1];
|
||||
|
||||
// Solver type
|
||||
enum SolverType {
|
||||
SGD = 0;
|
||||
NESTEROV = 1;
|
||||
ADAGRAD = 2;
|
||||
}
|
||||
optional SolverType solver_type = 30 [default = SGD];
|
||||
// numerical stability for AdaGrad
|
||||
optional float delta = 31 [default = 1e-8];
|
||||
|
||||
// If true, print information about the state of the net that may help with
|
||||
// debugging learning problems.
|
||||
optional bool debug_info = 23 [default = false];
|
||||
|
||||
// If false, don't save a snapshot after training finishes.
|
||||
optional bool snapshot_after_train = 28 [default = true];
|
||||
}
|
||||
|
||||
// A message that stores the solver snapshots
|
||||
message SolverState {
|
||||
optional int32 iter = 1; // The current iteration
|
||||
optional string learned_net = 2; // The file that stores the learned net.
|
||||
repeated BlobProto history = 3; // The history for sgd solvers
|
||||
optional int32 current_step = 4 [default = 0]; // The current step for learning rate
|
||||
}
|
||||
|
||||
enum Phase {
|
||||
TRAIN = 0;
|
||||
TEST = 1;
|
||||
}
|
||||
|
||||
message NetState {
|
||||
optional Phase phase = 1 [default = TEST];
|
||||
optional int32 level = 2 [default = 0];
|
||||
repeated string stage = 3;
|
||||
}
|
||||
|
||||
message NetStateRule {
|
||||
// Set phase to require the NetState have a particular phase (TRAIN or TEST)
|
||||
// to meet this rule.
|
||||
optional Phase phase = 1;
|
||||
|
||||
// Set the minimum and/or maximum levels in which the layer should be used.
|
||||
// Leave undefined to meet the rule regardless of level.
|
||||
optional int32 min_level = 2;
|
||||
optional int32 max_level = 3;
|
||||
|
||||
// Customizable sets of stages to include or exclude.
|
||||
// The net must have ALL of the specified stages and NONE of the specified
|
||||
// "not_stage"s to meet the rule.
|
||||
// (Use multiple NetStateRules to specify conjunctions of stages.)
|
||||
repeated string stage = 4;
|
||||
repeated string not_stage = 5;
|
||||
}
|
||||
|
||||
// Specifies training parameters (multipliers on global learning constants,
|
||||
// and the name and other settings used for weight sharing).
|
||||
message ParamSpec {
|
||||
// The names of the parameter blobs -- useful for sharing parameters among
|
||||
// layers, but never required otherwise. To share a parameter between two
|
||||
// layers, give it a (non-empty) name.
|
||||
optional string name = 1;
|
||||
|
||||
// Whether to require shared weights to have the same shape, or just the same
|
||||
// count -- defaults to STRICT if unspecified.
|
||||
optional DimCheckMode share_mode = 2;
|
||||
enum DimCheckMode {
|
||||
// STRICT (default) requires that num, channels, height, width each match.
|
||||
STRICT = 0;
|
||||
// PERMISSIVE requires only the count (num*channels*height*width) to match.
|
||||
PERMISSIVE = 1;
|
||||
}
|
||||
|
||||
// The multiplier on the global learning rate for this parameter.
|
||||
optional float lr_mult = 3 [default = 1.0];
|
||||
|
||||
// The multiplier on the global weight decay for this parameter.
|
||||
optional float decay_mult = 4 [default = 1.0];
|
||||
}
|
||||
|
||||
// NOTE
|
||||
// Update the next available ID when you add a new LayerParameter field.
|
||||
//
|
||||
// LayerParameter next available layer-specific ID: 132 (last added: prelu_param)
|
||||
message LayerParameter {
|
||||
optional string name = 1; // the layer name
|
||||
optional string type = 2; // the layer type
|
||||
repeated string bottom = 3; // the name of each bottom blob
|
||||
repeated string top = 4; // the name of each top blob
|
||||
|
||||
// The train / test phase for computation.
|
||||
optional Phase phase = 10;
|
||||
|
||||
// The amount of weight to assign each top blob in the objective.
|
||||
// Each layer assigns a default value, usually of either 0 or 1,
|
||||
// to each top blob.
|
||||
repeated float loss_weight = 5;
|
||||
|
||||
// Specifies training parameters (multipliers on global learning constants,
|
||||
// and the name and other settings used for weight sharing).
|
||||
repeated ParamSpec param = 6;
|
||||
|
||||
// The blobs containing the numeric parameters of the layer.
|
||||
repeated BlobProto blobs = 7;
|
||||
|
||||
// Rules controlling whether and when a layer is included in the network,
|
||||
// based on the current NetState. You may specify a non-zero number of rules
|
||||
// to include OR exclude, but not both. If no include or exclude rules are
|
||||
// specified, the layer is always included. If the current NetState meets
|
||||
// ANY (i.e., one or more) of the specified rules, the layer is
|
||||
// included/excluded.
|
||||
repeated NetStateRule include = 8;
|
||||
repeated NetStateRule exclude = 9;
|
||||
|
||||
// Parameters for data pre-processing.
|
||||
optional TransformationParameter transform_param = 100;
|
||||
|
||||
// Parameters shared by loss layers.
|
||||
optional LossParameter loss_param = 101;
|
||||
|
||||
// Layer type-specific parameters.
|
||||
//
|
||||
// Note: certain layers may have more than one computational engine
|
||||
// for their implementation. These layers include an Engine type and
|
||||
// engine parameter for selecting the implementation.
|
||||
// The default for the engine is set by the ENGINE switch at compile-time.
|
||||
optional AccuracyParameter accuracy_param = 102;
|
||||
optional ArgMaxParameter argmax_param = 103;
|
||||
optional ConcatParameter concat_param = 104;
|
||||
optional ContrastiveLossParameter contrastive_loss_param = 105;
|
||||
optional ConvolutionParameter convolution_param = 106;
|
||||
optional DataParameter data_param = 107;
|
||||
optional DropoutParameter dropout_param = 108;
|
||||
optional DummyDataParameter dummy_data_param = 109;
|
||||
optional EltwiseParameter eltwise_param = 110;
|
||||
optional ExpParameter exp_param = 111;
|
||||
optional HDF5DataParameter hdf5_data_param = 112;
|
||||
optional HDF5OutputParameter hdf5_output_param = 113;
|
||||
optional HingeLossParameter hinge_loss_param = 114;
|
||||
optional ImageDataParameter image_data_param = 115;
|
||||
optional InfogainLossParameter infogain_loss_param = 116;
|
||||
optional InnerProductParameter inner_product_param = 117;
|
||||
optional LRNParameter lrn_param = 118;
|
||||
optional MemoryDataParameter memory_data_param = 119;
|
||||
optional MVNParameter mvn_param = 120;
|
||||
optional PoolingParameter pooling_param = 121;
|
||||
optional PowerParameter power_param = 122;
|
||||
optional PReLUParameter prelu_param = 131;
|
||||
optional PythonParameter python_param = 130;
|
||||
optional ReLUParameter relu_param = 123;
|
||||
optional SigmoidParameter sigmoid_param = 124;
|
||||
optional SoftmaxParameter softmax_param = 125;
|
||||
optional SliceParameter slice_param = 126;
|
||||
optional TanHParameter tanh_param = 127;
|
||||
optional ThresholdParameter threshold_param = 128;
|
||||
optional WindowDataParameter window_data_param = 129;
|
||||
}
|
||||
|
||||
// Message that stores parameters used to apply transformation
|
||||
// to the data layer's data
|
||||
message TransformationParameter {
|
||||
// For data pre-processing, we can do simple scaling and subtracting the
|
||||
// data mean, if provided. Note that the mean subtraction is always carried
|
||||
// out before scaling.
|
||||
optional float scale = 1 [default = 1];
|
||||
// Specify if we want to randomly mirror data.
|
||||
optional bool mirror = 2 [default = false];
|
||||
// Specify if we would like to randomly crop an image.
|
||||
optional uint32 crop_size = 3 [default = 0];
|
||||
// mean_file and mean_value cannot be specified at the same time
|
||||
optional string mean_file = 4;
|
||||
// if specified can be repeated once (would substract it from all the channels)
|
||||
// or can be repeated the same number of times as channels
|
||||
// (would subtract them from the corresponding channel)
|
||||
repeated float mean_value = 5;
|
||||
}
|
||||
|
||||
// Message that stores parameters shared by loss layers
|
||||
message LossParameter {
|
||||
// If specified, ignore instances with the given label.
|
||||
optional int32 ignore_label = 1;
|
||||
// If true, normalize each batch across all instances (including spatial
|
||||
// dimesions, but not ignored instances); else, divide by batch size only.
|
||||
optional bool normalize = 2 [default = true];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by AccuracyLayer
|
||||
message AccuracyParameter {
|
||||
// When computing accuracy, count as correct by comparing the true label to
|
||||
// the top k scoring classes. By default, only compare to the top scoring
|
||||
// class (i.e. argmax).
|
||||
optional uint32 top_k = 1 [default = 1];
|
||||
|
||||
// The "label" axis of the prediction blob, whose argmax corresponds to the
|
||||
// predicted label -- may be negative to index from the end (e.g., -1 for the
|
||||
// last axis). For example, if axis == 1 and the predictions are
|
||||
// (N x C x H x W), the label blob is expected to contain N*H*W ground truth
|
||||
// labels with integer values in {0, 1, ..., C-1}.
|
||||
optional int32 axis = 2 [default = 1];
|
||||
|
||||
// If specified, ignore instances with the given label.
|
||||
optional int32 ignore_label = 3;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ArgMaxLayer
|
||||
message ArgMaxParameter {
|
||||
// If true produce pairs (argmax, maxval)
|
||||
optional bool out_max_val = 1 [default = false];
|
||||
optional uint32 top_k = 2 [default = 1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ConcatLayer
|
||||
message ConcatParameter {
|
||||
// The axis along which to concatenate -- may be negative to index from the
|
||||
// end (e.g., -1 for the last axis). Other axes must have the
|
||||
// same dimension for all the bottom blobs.
|
||||
// By default, ConcatLayer concatenates blobs along the "channels" axis (1).
|
||||
optional int32 axis = 2 [default = 1];
|
||||
|
||||
// DEPRECATED: alias for "axis" -- does not support negative indexing.
|
||||
optional uint32 concat_dim = 1 [default = 1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ContrastiveLossLayer
|
||||
message ContrastiveLossParameter {
|
||||
//margin for dissimilar pair
|
||||
optional float margin = 1 [default = 1.0];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ConvolutionLayer
|
||||
message ConvolutionParameter {
|
||||
optional uint32 num_output = 1; // The number of outputs for the layer
|
||||
optional bool bias_term = 2 [default = true]; // whether to have bias terms
|
||||
// Pad, kernel size, and stride are all given as a single value for equal
|
||||
// dimensions in height and width or as Y, X pairs.
|
||||
optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X)
|
||||
optional uint32 pad_h = 9 [default = 0]; // The padding height
|
||||
optional uint32 pad_w = 10 [default = 0]; // The padding width
|
||||
optional uint32 kernel_size = 4; // The kernel size (square)
|
||||
optional uint32 kernel_h = 11; // The kernel height
|
||||
optional uint32 kernel_w = 12; // The kernel width
|
||||
optional uint32 group = 5 [default = 1]; // The group size for group conv
|
||||
optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X)
|
||||
optional uint32 stride_h = 13; // The stride height
|
||||
optional uint32 stride_w = 14; // The stride width
|
||||
optional FillerParameter weight_filler = 7; // The filler for the weight
|
||||
optional FillerParameter bias_filler = 8; // The filler for the bias
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 15 [default = DEFAULT];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by DataLayer
|
||||
message DataParameter {
|
||||
enum DB {
|
||||
LEVELDB = 0;
|
||||
LMDB = 1;
|
||||
}
|
||||
// Specify the data source.
|
||||
optional string source = 1;
|
||||
// Specify the batch size.
|
||||
optional uint32 batch_size = 4;
|
||||
// The rand_skip variable is for the data layer to skip a few data points
|
||||
// to avoid all asynchronous sgd clients to start at the same point. The skip
|
||||
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
|
||||
// be larger than the number of keys in the database.
|
||||
optional uint32 rand_skip = 7 [default = 0];
|
||||
optional DB backend = 8 [default = LEVELDB];
|
||||
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
|
||||
// simple scaling and subtracting the data mean, if provided. Note that the
|
||||
// mean subtraction is always carried out before scaling.
|
||||
optional float scale = 2 [default = 1];
|
||||
optional string mean_file = 3;
|
||||
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
|
||||
// crop an image.
|
||||
optional uint32 crop_size = 5 [default = 0];
|
||||
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
|
||||
// data.
|
||||
optional bool mirror = 6 [default = false];
|
||||
// Force the encoded image to have 3 color channels
|
||||
optional bool force_encoded_color = 9 [default = false];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by DropoutLayer
|
||||
message DropoutParameter {
|
||||
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
|
||||
}
|
||||
|
||||
// Message that stores parameters used by DummyDataLayer.
|
||||
// DummyDataLayer fills any number of arbitrarily shaped blobs with random
|
||||
// (or constant) data generated by "Fillers" (see "message FillerParameter").
|
||||
message DummyDataParameter {
|
||||
// This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N
|
||||
// shape fields, and 0, 1 or N data_fillers.
|
||||
//
|
||||
// If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
|
||||
// If 1 data_filler is specified, it is applied to all top blobs. If N are
|
||||
// specified, the ith is applied to the ith top blob.
|
||||
repeated FillerParameter data_filler = 1;
|
||||
repeated BlobShape shape = 6;
|
||||
|
||||
// 4D dimensions -- deprecated. Use "shape" instead.
|
||||
repeated uint32 num = 2;
|
||||
repeated uint32 channels = 3;
|
||||
repeated uint32 height = 4;
|
||||
repeated uint32 width = 5;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by EltwiseLayer
|
||||
message EltwiseParameter {
|
||||
enum EltwiseOp {
|
||||
PROD = 0;
|
||||
SUM = 1;
|
||||
MAX = 2;
|
||||
}
|
||||
optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
|
||||
repeated float coeff = 2; // blob-wise coefficient for SUM operation
|
||||
|
||||
// Whether to use an asymptotically slower (for >2 inputs) but stabler method
|
||||
// of computing the gradient for the PROD operation. (No effect for SUM op.)
|
||||
optional bool stable_prod_grad = 3 [default = true];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ExpLayer
|
||||
message ExpParameter {
|
||||
// ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
|
||||
// Or if base is set to the default (-1), base is set to e,
|
||||
// so y = exp(shift + scale * x).
|
||||
optional float base = 1 [default = -1.0];
|
||||
optional float scale = 2 [default = 1.0];
|
||||
optional float shift = 3 [default = 0.0];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by HDF5DataLayer
|
||||
message HDF5DataParameter {
|
||||
// Specify the data source.
|
||||
optional string source = 1;
|
||||
// Specify the batch size.
|
||||
optional uint32 batch_size = 2;
|
||||
|
||||
// Specify whether to shuffle the data.
|
||||
// If shuffle == true, the ordering of the HDF5 files is shuffled,
|
||||
// and the ordering of data within any given HDF5 file is shuffled,
|
||||
// but data between different files are not interleaved; all of a file's
|
||||
// data are output (in a random order) before moving onto another file.
|
||||
optional bool shuffle = 3 [default = false];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by HDF5OutputLayer
|
||||
message HDF5OutputParameter {
|
||||
optional string file_name = 1;
|
||||
}
|
||||
|
||||
message HingeLossParameter {
|
||||
enum Norm {
|
||||
L1 = 1;
|
||||
L2 = 2;
|
||||
}
|
||||
// Specify the Norm to use L1 or L2
|
||||
optional Norm norm = 1 [default = L1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ImageDataLayer
|
||||
message ImageDataParameter {
|
||||
// Specify the data source.
|
||||
optional string source = 1;
|
||||
// Specify the batch size.
|
||||
optional uint32 batch_size = 4;
|
||||
// The rand_skip variable is for the data layer to skip a few data points
|
||||
// to avoid all asynchronous sgd clients to start at the same point. The skip
|
||||
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
|
||||
// be larger than the number of keys in the database.
|
||||
optional uint32 rand_skip = 7 [default = 0];
|
||||
// Whether or not ImageLayer should shuffle the list of files at every epoch.
|
||||
optional bool shuffle = 8 [default = false];
|
||||
// It will also resize images if new_height or new_width are not zero.
|
||||
optional uint32 new_height = 9 [default = 0];
|
||||
optional uint32 new_width = 10 [default = 0];
|
||||
// Specify if the images are color or gray
|
||||
optional bool is_color = 11 [default = true];
|
||||
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
|
||||
// simple scaling and subtracting the data mean, if provided. Note that the
|
||||
// mean subtraction is always carried out before scaling.
|
||||
optional float scale = 2 [default = 1];
|
||||
optional string mean_file = 3;
|
||||
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
|
||||
// crop an image.
|
||||
optional uint32 crop_size = 5 [default = 0];
|
||||
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
|
||||
// data.
|
||||
optional bool mirror = 6 [default = false];
|
||||
optional string root_folder = 12 [default = ""];
|
||||
}
|
||||
|
||||
// Message that stores parameters InfogainLossLayer
|
||||
message InfogainLossParameter {
|
||||
// Specify the infogain matrix source.
|
||||
optional string source = 1;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by InnerProductLayer
|
||||
message InnerProductParameter {
|
||||
optional uint32 num_output = 1; // The number of outputs for the layer
|
||||
optional bool bias_term = 2 [default = true]; // whether to have bias terms
|
||||
optional FillerParameter weight_filler = 3; // The filler for the weight
|
||||
optional FillerParameter bias_filler = 4; // The filler for the bias
|
||||
|
||||
// The first axis to be lumped into a single inner product computation;
|
||||
// all preceding axes are retained in the output.
|
||||
// May be negative to index from the end (e.g., -1 for the last axis).
|
||||
optional int32 axis = 5 [default = 1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by LRNLayer
|
||||
message LRNParameter {
|
||||
optional uint32 local_size = 1 [default = 5];
|
||||
optional float alpha = 2 [default = 1.];
|
||||
optional float beta = 3 [default = 0.75];
|
||||
enum NormRegion {
|
||||
ACROSS_CHANNELS = 0;
|
||||
WITHIN_CHANNEL = 1;
|
||||
}
|
||||
optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
|
||||
optional float k = 5 [default = 1.];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by MemoryDataLayer
|
||||
message MemoryDataParameter {
|
||||
optional uint32 batch_size = 1;
|
||||
optional uint32 channels = 2;
|
||||
optional uint32 height = 3;
|
||||
optional uint32 width = 4;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by MVNLayer
|
||||
message MVNParameter {
|
||||
// This parameter can be set to false to normalize mean only
|
||||
optional bool normalize_variance = 1 [default = true];
|
||||
|
||||
// This parameter can be set to true to perform DNN-like MVN
|
||||
optional bool across_channels = 2 [default = false];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by PoolingLayer
|
||||
message PoolingParameter {
|
||||
enum PoolMethod {
|
||||
MAX = 0;
|
||||
AVE = 1;
|
||||
STOCHASTIC = 2;
|
||||
}
|
||||
optional PoolMethod pool = 1 [default = MAX]; // The pooling method
|
||||
// Pad, kernel size, and stride are all given as a single value for equal
|
||||
// dimensions in height and width or as Y, X pairs.
|
||||
optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
|
||||
optional uint32 pad_h = 9 [default = 0]; // The padding height
|
||||
optional uint32 pad_w = 10 [default = 0]; // The padding width
|
||||
optional uint32 kernel_size = 2; // The kernel size (square)
|
||||
optional uint32 kernel_h = 5; // The kernel height
|
||||
optional uint32 kernel_w = 6; // The kernel width
|
||||
optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
|
||||
optional uint32 stride_h = 7; // The stride height
|
||||
optional uint32 stride_w = 8; // The stride width
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 11 [default = DEFAULT];
|
||||
// If global_pooling then it will pool over the size of the bottom by doing
|
||||
// kernel_h = bottom->height and kernel_w = bottom->width
|
||||
optional bool global_pooling = 12 [default = false];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by PowerLayer
|
||||
message PowerParameter {
|
||||
// PowerLayer computes outputs y = (shift + scale * x) ^ power.
|
||||
optional float power = 1 [default = 1.0];
|
||||
optional float scale = 2 [default = 1.0];
|
||||
optional float shift = 3 [default = 0.0];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by PythonLayer
|
||||
message PythonParameter {
|
||||
optional string module = 1;
|
||||
optional string layer = 2;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ReLULayer
|
||||
message ReLUParameter {
|
||||
// Allow non-zero slope for negative inputs to speed up optimization
|
||||
// Described in:
|
||||
// Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
|
||||
// improve neural network acoustic models. In ICML Workshop on Deep Learning
|
||||
// for Audio, Speech, and Language Processing.
|
||||
optional float negative_slope = 1 [default = 0];
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 2 [default = DEFAULT];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by SigmoidLayer
|
||||
message SigmoidParameter {
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by SliceLayer
|
||||
message SliceParameter {
|
||||
// The axis along which to slice -- may be negative to index from the end
|
||||
// (e.g., -1 for the last axis).
|
||||
// By default, SliceLayer concatenates blobs along the "channels" axis (1).
|
||||
optional int32 axis = 3 [default = 1];
|
||||
repeated uint32 slice_point = 2;
|
||||
|
||||
// DEPRECATED: alias for "axis" -- does not support negative indexing.
|
||||
optional uint32 slice_dim = 1 [default = 1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
|
||||
message SoftmaxParameter {
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
|
||||
// The axis along which to perform the softmax -- may be negative to index
|
||||
// from the end (e.g., -1 for the last axis).
|
||||
// Any other axes will be evaluated as independent softmaxes.
|
||||
optional int32 axis = 2 [default = 1];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by TanHLayer
|
||||
message TanHParameter {
|
||||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
}
|
||||
|
||||
// Message that stores parameters used by ThresholdLayer
|
||||
message ThresholdParameter {
|
||||
optional float threshold = 1 [default = 0]; // Strictly positive values
|
||||
}
|
||||
|
||||
// Message that stores parameters used by WindowDataLayer
|
||||
message WindowDataParameter {
|
||||
// Specify the data source.
|
||||
optional string source = 1;
|
||||
// For data pre-processing, we can do simple scaling and subtracting the
|
||||
// data mean, if provided. Note that the mean subtraction is always carried
|
||||
// out before scaling.
|
||||
optional float scale = 2 [default = 1];
|
||||
optional string mean_file = 3;
|
||||
// Specify the batch size.
|
||||
optional uint32 batch_size = 4;
|
||||
// Specify if we would like to randomly crop an image.
|
||||
optional uint32 crop_size = 5 [default = 0];
|
||||
// Specify if we want to randomly mirror data.
|
||||
optional bool mirror = 6 [default = false];
|
||||
// Foreground (object) overlap threshold
|
||||
optional float fg_threshold = 7 [default = 0.5];
|
||||
// Background (non-object) overlap threshold
|
||||
optional float bg_threshold = 8 [default = 0.5];
|
||||
// Fraction of batch that should be foreground objects
|
||||
optional float fg_fraction = 9 [default = 0.25];
|
||||
// Amount of contextual padding to add around a window
|
||||
// (used only by the window_data_layer)
|
||||
optional uint32 context_pad = 10 [default = 0];
|
||||
// Mode for cropping out a detection window
|
||||
// warp: cropped window is warped to a fixed size and aspect ratio
|
||||
// square: the tightest square around the window is cropped
|
||||
optional string crop_mode = 11 [default = "warp"];
|
||||
// cache_images: will load all images in memory for faster access
|
||||
optional bool cache_images = 12 [default = false];
|
||||
// append root_folder to locate images
|
||||
optional string root_folder = 13 [default = ""];
|
||||
}
|
||||
|
||||
// DEPRECATED: use LayerParameter.
|
||||
message V1LayerParameter {
|
||||
repeated string bottom = 2;
|
||||
repeated string top = 3;
|
||||
optional string name = 4;
|
||||
repeated NetStateRule include = 32;
|
||||
repeated NetStateRule exclude = 33;
|
||||
enum LayerType {
|
||||
NONE = 0;
|
||||
ABSVAL = 35;
|
||||
ACCURACY = 1;
|
||||
ARGMAX = 30;
|
||||
BNLL = 2;
|
||||
CONCAT = 3;
|
||||
CONTRASTIVE_LOSS = 37;
|
||||
CONVOLUTION = 4;
|
||||
DATA = 5;
|
||||
DECONVOLUTION = 39;
|
||||
DROPOUT = 6;
|
||||
DUMMY_DATA = 32;
|
||||
EUCLIDEAN_LOSS = 7;
|
||||
ELTWISE = 25;
|
||||
EXP = 38;
|
||||
FLATTEN = 8;
|
||||
HDF5_DATA = 9;
|
||||
HDF5_OUTPUT = 10;
|
||||
HINGE_LOSS = 28;
|
||||
IM2COL = 11;
|
||||
IMAGE_DATA = 12;
|
||||
INFOGAIN_LOSS = 13;
|
||||
INNER_PRODUCT = 14;
|
||||
LRN = 15;
|
||||
MEMORY_DATA = 29;
|
||||
MULTINOMIAL_LOGISTIC_LOSS = 16;
|
||||
MVN = 34;
|
||||
POOLING = 17;
|
||||
POWER = 26;
|
||||
RELU = 18;
|
||||
SIGMOID = 19;
|
||||
SIGMOID_CROSS_ENTROPY_LOSS = 27;
|
||||
SILENCE = 36;
|
||||
SOFTMAX = 20;
|
||||
SOFTMAX_LOSS = 21;
|
||||
SPLIT = 22;
|
||||
SLICE = 33;
|
||||
TANH = 23;
|
||||
WINDOW_DATA = 24;
|
||||
THRESHOLD = 31;
|
||||
}
|
||||
optional LayerType type = 5;
|
||||
repeated BlobProto blobs = 6;
|
||||
repeated string param = 1001;
|
||||
repeated DimCheckMode blob_share_mode = 1002;
|
||||
enum DimCheckMode {
|
||||
STRICT = 0;
|
||||
PERMISSIVE = 1;
|
||||
}
|
||||
repeated float blobs_lr = 7;
|
||||
repeated float weight_decay = 8;
|
||||
repeated float loss_weight = 35;
|
||||
optional AccuracyParameter accuracy_param = 27;
|
||||
optional ArgMaxParameter argmax_param = 23;
|
||||
optional ConcatParameter concat_param = 9;
|
||||
optional ContrastiveLossParameter contrastive_loss_param = 40;
|
||||
optional ConvolutionParameter convolution_param = 10;
|
||||
optional DataParameter data_param = 11;
|
||||
optional DropoutParameter dropout_param = 12;
|
||||
optional DummyDataParameter dummy_data_param = 26;
|
||||
optional EltwiseParameter eltwise_param = 24;
|
||||
optional ExpParameter exp_param = 41;
|
||||
optional HDF5DataParameter hdf5_data_param = 13;
|
||||
optional HDF5OutputParameter hdf5_output_param = 14;
|
||||
optional HingeLossParameter hinge_loss_param = 29;
|
||||
optional ImageDataParameter image_data_param = 15;
|
||||
optional InfogainLossParameter infogain_loss_param = 16;
|
||||
optional InnerProductParameter inner_product_param = 17;
|
||||
optional LRNParameter lrn_param = 18;
|
||||
optional MemoryDataParameter memory_data_param = 22;
|
||||
optional MVNParameter mvn_param = 34;
|
||||
optional PoolingParameter pooling_param = 19;
|
||||
optional PowerParameter power_param = 21;
|
||||
optional ReLUParameter relu_param = 30;
|
||||
optional SigmoidParameter sigmoid_param = 38;
|
||||
optional SoftmaxParameter softmax_param = 39;
|
||||
optional SliceParameter slice_param = 31;
|
||||
optional TanHParameter tanh_param = 37;
|
||||
optional ThresholdParameter threshold_param = 25;
|
||||
optional WindowDataParameter window_data_param = 20;
|
||||
optional TransformationParameter transform_param = 36;
|
||||
optional LossParameter loss_param = 42;
|
||||
optional V0LayerParameter layer = 1;
|
||||
}
|
||||
|
||||
// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
|
||||
// in Caffe. We keep this message type around for legacy support.
|
||||
message V0LayerParameter {
|
||||
optional string name = 1; // the layer name
|
||||
optional string type = 2; // the string to specify the layer type
|
||||
|
||||
// Parameters to specify layers with inner products.
|
||||
optional uint32 num_output = 3; // The number of outputs for the layer
|
||||
optional bool biasterm = 4 [default = true]; // whether to have bias terms
|
||||
optional FillerParameter weight_filler = 5; // The filler for the weight
|
||||
optional FillerParameter bias_filler = 6; // The filler for the bias
|
||||
|
||||
optional uint32 pad = 7 [default = 0]; // The padding size
|
||||
optional uint32 kernelsize = 8; // The kernel size
|
||||
optional uint32 group = 9 [default = 1]; // The group size for group conv
|
||||
optional uint32 stride = 10 [default = 1]; // The stride
|
||||
enum PoolMethod {
|
||||
MAX = 0;
|
||||
AVE = 1;
|
||||
STOCHASTIC = 2;
|
||||
}
|
||||
optional PoolMethod pool = 11 [default = MAX]; // The pooling method
|
||||
optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
|
||||
|
||||
optional uint32 local_size = 13 [default = 5]; // for local response norm
|
||||
optional float alpha = 14 [default = 1.]; // for local response norm
|
||||
optional float beta = 15 [default = 0.75]; // for local response norm
|
||||
optional float k = 22 [default = 1.];
|
||||
|
||||
// For data layers, specify the data source
|
||||
optional string source = 16;
|
||||
// For data pre-processing, we can do simple scaling and subtracting the
|
||||
// data mean, if provided. Note that the mean subtraction is always carried
|
||||
// out before scaling.
|
||||
optional float scale = 17 [default = 1];
|
||||
optional string meanfile = 18;
|
||||
// For data layers, specify the batch size.
|
||||
optional uint32 batchsize = 19;
|
||||
// For data layers, specify if we would like to randomly crop an image.
|
||||
optional uint32 cropsize = 20 [default = 0];
|
||||
// For data layers, specify if we want to randomly mirror data.
|
||||
optional bool mirror = 21 [default = false];
|
||||
|
||||
// The blobs containing the numeric parameters of the layer
|
||||
repeated BlobProto blobs = 50;
|
||||
// The ratio that is multiplied on the global learning rate. If you want to
|
||||
// set the learning ratio for one blob, you need to set it for all blobs.
|
||||
repeated float blobs_lr = 51;
|
||||
// The weight decay that is multiplied on the global weight decay.
|
||||
repeated float weight_decay = 52;
|
||||
|
||||
// The rand_skip variable is for the data layer to skip a few data points
|
||||
// to avoid all asynchronous sgd clients to start at the same point. The skip
|
||||
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
|
||||
// be larger than the number of keys in the database.
|
||||
optional uint32 rand_skip = 53 [default = 0];
|
||||
|
||||
// Fields related to detection (det_*)
|
||||
// foreground (object) overlap threshold
|
||||
optional float det_fg_threshold = 54 [default = 0.5];
|
||||
// background (non-object) overlap threshold
|
||||
optional float det_bg_threshold = 55 [default = 0.5];
|
||||
// Fraction of batch that should be foreground objects
|
||||
optional float det_fg_fraction = 56 [default = 0.25];
|
||||
|
||||
// optional bool OBSOLETE_can_clobber = 57 [default = true];
|
||||
|
||||
// Amount of contextual padding to add around a window
|
||||
// (used only by the window_data_layer)
|
||||
optional uint32 det_context_pad = 58 [default = 0];
|
||||
|
||||
// Mode for cropping out a detection window
|
||||
// warp: cropped window is warped to a fixed size and aspect ratio
|
||||
// square: the tightest square around the window is cropped
|
||||
optional string det_crop_mode = 59 [default = "warp"];
|
||||
|
||||
// For ReshapeLayer, one needs to specify the new dimensions.
|
||||
optional int32 new_num = 60 [default = 0];
|
||||
optional int32 new_channels = 61 [default = 0];
|
||||
optional int32 new_height = 62 [default = 0];
|
||||
optional int32 new_width = 63 [default = 0];
|
||||
|
||||
// Whether or not ImageLayer should shuffle the list of files at every epoch.
|
||||
// It will also resize images if new_height or new_width are not zero.
|
||||
optional bool shuffle_images = 64 [default = false];
|
||||
|
||||
// For ConcatLayer, one needs to specify the dimension for concatenation, and
|
||||
// the other dimensions must be the same for all the bottom blobs.
|
||||
// By default it will concatenate blobs along the channels dimension.
|
||||
optional uint32 concat_dim = 65 [default = 1];
|
||||
|
||||
optional HDF5OutputParameter hdf5_output_param = 1001;
|
||||
}
|
||||
|
||||
// Message that stores parameters used by PReLULayer
|
||||
message PReLUParameter {
|
||||
// Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
|
||||
// Surpassing Human-Level Performance on ImageNet Classification, 2015.
|
||||
|
||||
// Initial value of a_i. Default is a_i=0.25 for all i.
|
||||
optional FillerParameter filler = 1;
|
||||
// Whether or not slope paramters are shared across channels.
|
||||
optional bool channel_shared = 2 [default = false];
|
||||
}
|
4
caffe2/BREW
Normal file
4
caffe2/BREW
Normal file
@ -0,0 +1,4 @@
|
||||
filegroup(
|
||||
name = "caffe2_python",
|
||||
srcs = ["__init__.py"],
|
||||
)
|
5
caffe2/__init__.py
Normal file
5
caffe2/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""
|
||||
Caffe2: A General Tool for Neural Networks.
|
||||
"""
|
||||
|
||||
__author__ = 'Yangqing Jia'
|
204
caffe2/binaries/BREW
Normal file
204
caffe2/binaries/BREW
Normal file
@ -0,0 +1,204 @@
|
||||
cc_binary(
|
||||
name = "convert_db",
|
||||
srcs = [
|
||||
"convert_db.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/db:db",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "make_cifar_db",
|
||||
srcs = [
|
||||
"make_cifar_db.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/proto:caffe2_proto",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "make_image_db",
|
||||
srcs = [
|
||||
"make_image_db.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/proto:caffe2_proto",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
external_libs = [
|
||||
"opencv_core",
|
||||
"opencv_highgui",
|
||||
"opencv_imgproc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "convert_encoded_to_raw_leveldb",
|
||||
srcs = [
|
||||
"convert_encoded_to_raw_leveldb.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/proto:caffe2_proto",
|
||||
"//third_party/leveldb:leveldb",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
external_libs = [
|
||||
"opencv_core",
|
||||
"opencv_highgui",
|
||||
"opencv_imgproc",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
cc_binary(
|
||||
name = "make_mnist_db",
|
||||
srcs = [
|
||||
"make_mnist_db.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/proto:caffe2_proto",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "print_registered_core_operators",
|
||||
srcs = [
|
||||
"print_registered_core_operators.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/image:image_ops",
|
||||
"//caffe2/image:image_ops_gpu",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "run_client",
|
||||
srcs = [
|
||||
"run_client.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/image:image_ops",
|
||||
"//caffe2/image:image_ops_gpu",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
# run_client_minimal is the binary that links in the operators that have no
|
||||
# external dependencies at all.
|
||||
cc_binary(
|
||||
name = "run_client_minimal",
|
||||
srcs = [
|
||||
"run_client.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
cc_binary(
|
||||
name = "run_plan",
|
||||
srcs = [
|
||||
"run_plan.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/image:image_ops",
|
||||
"//caffe2/image:image_ops_gpu",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
# run_plan_minimal is the binary that links in the operators that have no
|
||||
# external dependencies at all.
|
||||
cc_binary(
|
||||
name = "run_plan_minimal",
|
||||
srcs = [
|
||||
"run_plan.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
cc_binary(
|
||||
name = "run_plan_mpi",
|
||||
srcs = [
|
||||
"run_plan_mpi.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/image:image_ops",
|
||||
"//caffe2/image:image_ops_gpu",
|
||||
"//caffe2/mpi:mpi_ops",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "inspect_gpus",
|
||||
srcs = [
|
||||
"inspect_gpus.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core_gpu",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "split_db",
|
||||
srcs = [
|
||||
"split_db.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/db:db",
|
||||
"//third_party/gflags:gflags",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
)
|
38
caffe2/binaries/convert_db.cc
Normal file
38
caffe2/binaries/convert_db.cc
Normal file
@ -0,0 +1,38 @@
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(input_db, "", "The input db.");
|
||||
DEFINE_string(input_db_type, "", "The input db type.");
|
||||
DEFINE_string(output_db, "", "The output db.");
|
||||
DEFINE_string(output_db_type, "", "The output db type.");
|
||||
DEFINE_int32(batch_size, 1000, "The write batch size.");
|
||||
|
||||
using caffe2::db::Cursor;
|
||||
using caffe2::db::DB;
|
||||
using caffe2::db::Transaction;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage(
|
||||
"This script converts databases between different formats.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
FLAGS_input_db_type, FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<DB> out_db(caffe2::db::CreateDB(
|
||||
FLAGS_output_db_type, FLAGS_output_db, caffe2::db::NEW));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
std::unique_ptr<Transaction> transaction(out_db->NewTransaction());
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transaction->Put(cursor->key(), cursor->value());
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Converted " << count << " items so far.";
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "A total of " << count << " items processed.";
|
||||
return 0;
|
||||
}
|
139
caffe2/binaries/convert_encoded_to_raw_leveldb.cc
Normal file
139
caffe2/binaries/convert_encoded_to_raw_leveldb.cc
Normal file
@ -0,0 +1,139 @@
|
||||
// This script converts an image dataset to leveldb.
|
||||
//
|
||||
// FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// FLAGS_list_file should be a list of files as well as their labels, in the
|
||||
// format as
|
||||
// subfolder1/file1.JPEG 7
|
||||
// ....
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream> // NOLINT(readability/streams)
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
DEFINE_string(input_db_name, "", "The input image file name.");
|
||||
DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
DEFINE_bool(color, true, "If set, load images in color.");
|
||||
DEFINE_int32(scale, 256,
|
||||
"If FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
"value.");
|
||||
DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
|
||||
void ConvertToRawDataset(
|
||||
const string& input_db_name, const string& output_db_name) {
|
||||
// input leveldb
|
||||
std::unique_ptr<leveldb::DB> input_db;
|
||||
LOG(INFO) << "Opening input leveldb " << input_db_name;
|
||||
{
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = false;
|
||||
leveldb::DB* db_temp;
|
||||
leveldb::Status status = leveldb::DB::Open(
|
||||
options, input_db_name, &db_temp);
|
||||
CHECK(status.ok()) << "Failed to open leveldb " << input_db_name << ".";
|
||||
input_db.reset(db_temp);
|
||||
}
|
||||
|
||||
// output leveldb
|
||||
std::unique_ptr<leveldb::DB> output_db;
|
||||
std::unique_ptr<leveldb::WriteBatch> batch;
|
||||
LOG(INFO) << "Opening leveldb " << output_db_name;
|
||||
{
|
||||
leveldb::Options options;
|
||||
options.error_if_exists = true;
|
||||
options.create_if_missing = true;
|
||||
options.write_buffer_size = 268435456;
|
||||
leveldb::DB* db_temp;
|
||||
leveldb::Status status = leveldb::DB::Open(
|
||||
options, output_db_name, &db_temp);
|
||||
CHECK(status.ok()) << "Failed to open leveldb " << output_db_name
|
||||
<< ". Is it already existing?";
|
||||
output_db.reset(db_temp);
|
||||
}
|
||||
batch.reset(new leveldb::WriteBatch());
|
||||
|
||||
TensorProtos input_protos;
|
||||
TensorProtos output_protos;
|
||||
TensorProto* data = output_protos.add_protos();
|
||||
TensorProto* label = output_protos.add_protos();
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
data->add_dims(0);
|
||||
data->add_dims(0);
|
||||
if (FLAGS_color) {
|
||||
data->add_dims(3);
|
||||
}
|
||||
string value;
|
||||
|
||||
unique_ptr<leveldb::Iterator> iter;
|
||||
iter.reset(input_db->NewIterator(leveldb::ReadOptions()));
|
||||
iter->SeekToFirst();
|
||||
int count = 0;
|
||||
for (; iter->Valid(); iter->Next()) {
|
||||
CHECK(input_protos.ParseFromString(iter->value().ToString()));
|
||||
label->CopyFrom(input_protos.protos(1));
|
||||
const string& encoded_image = input_protos.protos(0).string_data(0);
|
||||
int encoded_size = encoded_image.size();
|
||||
cv::Mat img = cv::imdecode(
|
||||
cv::Mat(1, &encoded_size, CV_8UC1,
|
||||
const_cast<char*>(encoded_image.data())),
|
||||
FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (FLAGS_warp) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
|
||||
cv::INTER_LINEAR);
|
||||
data->set_dims(0, scaled_height);
|
||||
data->set_dims(1, scaled_width);
|
||||
DCHECK(resized_img.isContinuous());
|
||||
data->set_byte_data(resized_img.ptr(),
|
||||
scaled_height * scaled_width * (FLAGS_color ? 3 : 1));
|
||||
output_protos.SerializeToString(&value);
|
||||
// Put in db
|
||||
batch->Put(iter->key(), value);
|
||||
if (++count % 1000 == 0) {
|
||||
output_db->Write(leveldb::WriteOptions(), batch.get());
|
||||
batch.reset(new leveldb::WriteBatch());
|
||||
LOG(INFO) << "Processed " << count << " files.";
|
||||
}
|
||||
}
|
||||
// write the last batch
|
||||
if (count % 1000 != 0) {
|
||||
output_db->Write(leveldb::WriteOptions(), batch.get());
|
||||
}
|
||||
LOG(INFO) << "Processed a total of " << count << " files.";
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Converts an image dataset to a leveldb.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
caffe2::ConvertToRawDataset(
|
||||
FLAGS_input_db_name, FLAGS_output_db_name);
|
||||
return 0;
|
||||
}
|
30
caffe2/binaries/inspect_gpus.cc
Normal file
30
caffe2/binaries/inspect_gpus.cc
Normal file
@ -0,0 +1,30 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
|
||||
int gpu_count;
|
||||
CUDA_CHECK(cudaGetDeviceCount(&gpu_count));
|
||||
for (int i = 0; i < gpu_count; ++i) {
|
||||
LOG(INFO) << "Querying device ID = " << i;
|
||||
caffe2::DeviceQuery(i);
|
||||
}
|
||||
|
||||
std::stringstream sstream;
|
||||
// Find topology
|
||||
int can_access;
|
||||
for (int i = 0; i < gpu_count; ++i) {
|
||||
for (int j = 0; j < gpu_count; ++j) {
|
||||
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access, i, j));
|
||||
sstream << ((i == j || can_access) ? "+" : "-") << " ";
|
||||
}
|
||||
sstream << std::endl;
|
||||
}
|
||||
LOG(INFO) << "Access pattern: " << std::endl << sstream.str();
|
||||
}
|
146
caffe2/binaries/make_cifar_db.cc
Normal file
146
caffe2/binaries/make_cifar_db.cc
Normal file
@ -0,0 +1,146 @@
|
||||
//
|
||||
// This script converts the CIFAR dataset to the leveldb format used
|
||||
// by caffe to perform classification.
|
||||
// Usage:
|
||||
// convert_cifar_data input_folder output_db_file
|
||||
// The CIFAR dataset could be downloaded at
|
||||
// http://www.cs.toronto.edu/~kriz/cifar.html
|
||||
|
||||
#include <fstream> // NOLINT(readability/streams)
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(input_folder, "", "The input image file name.");
|
||||
DEFINE_string(output_train_db_name, "", "The output training leveldb name.");
|
||||
DEFINE_string(output_test_db_name, "", "The output testing leveldb name.");
|
||||
DEFINE_string(db, "leveldb", "The db type.");
|
||||
DEFINE_bool(is_cifar100, false,
|
||||
"If set, convert cifar100. Otherwise do cifar10.");
|
||||
DEFINE_bool(channel_first, false,
|
||||
"If set, write the data as channel-first (CHW order) as the old "
|
||||
"Caffe does.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using std::stringstream;
|
||||
|
||||
const int kCIFARSize = 32;
|
||||
const int kCIFARImageNBytes = kCIFARSize * kCIFARSize * 3;
|
||||
const int kCIFAR10BatchSize = 10000;
|
||||
const int kCIFAR10TestDataSize = 10000;
|
||||
const int kCIFAR10TrainBatches = 5;
|
||||
|
||||
const int kCIFAR100TrainDataSize = 50000;
|
||||
const int kCIFAR100TestDataSize = 10000;
|
||||
|
||||
void ReadImage(std::ifstream* file, int* label, char* buffer) {
|
||||
char label_char;
|
||||
if (FLAGS_is_cifar100) {
|
||||
// Skip the coarse label.
|
||||
file->read(&label_char, 1);
|
||||
}
|
||||
file->read(&label_char, 1);
|
||||
*label = label_char;
|
||||
if (FLAGS_channel_first) {
|
||||
file->read(buffer, kCIFARImageNBytes);
|
||||
} else {
|
||||
// Yes, there are better ways to do it, like in-place swap... but I am too
|
||||
// lazy so let's just write it in a memory-wasteful way.
|
||||
static char channel_first_storage[kCIFARImageNBytes];
|
||||
file->read(channel_first_storage, kCIFARImageNBytes);
|
||||
for (int c = 0; c < 3; ++c) {
|
||||
for (int i = 0; i < kCIFARSize * kCIFARSize; ++i) {
|
||||
buffer[i * 3 + c] =
|
||||
channel_first_storage[c * kCIFARSize * kCIFARSize + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void WriteToDB(const string& filename, const int num_items,
|
||||
const int& offset, db::DB* db) {
|
||||
TensorProtos protos;
|
||||
TensorProto* data = protos.add_protos();
|
||||
TensorProto* label = protos.add_protos();
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
if (FLAGS_channel_first) {
|
||||
data->add_dims(1);
|
||||
data->add_dims(3);
|
||||
data->add_dims(kCIFARSize);
|
||||
data->add_dims(kCIFARSize);
|
||||
} else {
|
||||
data->add_dims(1);
|
||||
data->add_dims(kCIFARSize);
|
||||
data->add_dims(kCIFARSize);
|
||||
data->add_dims(3);
|
||||
}
|
||||
label->set_data_type(TensorProto::INT32);
|
||||
label->add_dims(1);
|
||||
label->add_int32_data(0);
|
||||
|
||||
LOG(INFO) << "Converting file " << filename;
|
||||
std::ifstream data_file(filename.c_str(),
|
||||
std::ios::in | std::ios::binary);
|
||||
CHECK(data_file) << "Unable to open file " << filename;
|
||||
char str_buffer[kCIFARImageNBytes];
|
||||
int label_value;
|
||||
string serialized_protos;
|
||||
std::unique_ptr<db::Transaction> transaction(db->NewTransaction());
|
||||
for (int itemid = 0; itemid < num_items; ++itemid) {
|
||||
ReadImage(&data_file, &label_value, str_buffer);
|
||||
data->set_byte_data(str_buffer, kCIFARImageNBytes);
|
||||
label->set_int32_data(0, label_value);
|
||||
protos.SerializeToString(&serialized_protos);
|
||||
snprintf(str_buffer, kCIFARImageNBytes, "%05d",
|
||||
offset + itemid);
|
||||
transaction->Put(string(str_buffer), serialized_protos);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertCIFAR() {
|
||||
std::unique_ptr<db::DB> train_db(
|
||||
db::CreateDB(FLAGS_db, FLAGS_output_train_db_name, db::NEW));
|
||||
std::unique_ptr<db::DB> test_db(
|
||||
db::CreateDB(FLAGS_db, FLAGS_output_test_db_name, db::NEW));
|
||||
|
||||
if (!FLAGS_is_cifar100) {
|
||||
// This is cifar 10.
|
||||
for (int fileid = 0; fileid < kCIFAR10TrainBatches; ++fileid) {
|
||||
stringstream train_file;
|
||||
train_file << FLAGS_input_folder << "/data_batch_" << fileid + 1
|
||||
<< ".bin";
|
||||
WriteToDB(train_file.str(), kCIFAR10BatchSize,
|
||||
fileid * kCIFAR10BatchSize, train_db.get());
|
||||
}
|
||||
stringstream test_file;
|
||||
test_file << FLAGS_input_folder << "/test_batch.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR10TestDataSize, 0, test_db.get());
|
||||
} else {
|
||||
// This is cifar 100.
|
||||
stringstream train_file;
|
||||
train_file << FLAGS_input_folder << "/train.bin";
|
||||
WriteToDB(train_file.str(), kCIFAR100TrainDataSize, 0, train_db.get());
|
||||
stringstream test_file;
|
||||
test_file << FLAGS_input_folder << "/test.bin";
|
||||
WriteToDB(test_file.str(), kCIFAR100TestDataSize, 0, test_db.get());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage(
|
||||
"This script converts the CIFAR dataset to the db format used "
|
||||
"by caffe to perform classification.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
caffe2::ConvertCIFAR();
|
||||
return 0;
|
||||
}
|
146
caffe2/binaries/make_image_db.cc
Normal file
146
caffe2/binaries/make_image_db.cc
Normal file
@ -0,0 +1,146 @@
|
||||
// This script converts an image dataset to a database.
|
||||
//
|
||||
// FLAGS_input_folder is the root folder that holds all the images, and
|
||||
// FLAGS_list_file should be a list of files as well as their labels, in the
|
||||
// format as
|
||||
// subfolder1/file1.JPEG 7
|
||||
// ....
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream> // NOLINT(readability/streams)
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_bool(shuffle, false,
|
||||
"Randomly shuffle the order of images and their labels");
|
||||
DEFINE_string(input_folder, "", "The input image file name.");
|
||||
DEFINE_string(list_file, "", "The text file containing the list of images.");
|
||||
DEFINE_string(output_db_name, "", "The output training leveldb name.");
|
||||
DEFINE_string(db, "leveldb", "The db type.");
|
||||
DEFINE_bool(raw, false,
|
||||
"If set, we pre-read the images and store the raw buffer.");
|
||||
DEFINE_bool(color, true, "If set, load images in color.");
|
||||
DEFINE_int32(scale, 256,
|
||||
"If FLAGS_raw is set, scale all the images' shorter edge to the given "
|
||||
"value.");
|
||||
DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
|
||||
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
void ConvertImageDataset(
|
||||
const string& input_folder, const string& list_filename,
|
||||
const string& output_db_name, const bool shuffle) {
|
||||
std::ifstream list_file(list_filename);
|
||||
std::vector<std::pair<std::string, int> > lines;
|
||||
std::string filename;
|
||||
int file_label;
|
||||
while (list_file >> filename >> file_label) {
|
||||
lines.push_back(std::make_pair(filename, file_label));
|
||||
}
|
||||
if (FLAGS_shuffle) {
|
||||
// randomly shuffle data
|
||||
LOG(INFO) << "Shuffling data";
|
||||
std::shuffle(lines.begin(), lines.end(),
|
||||
std::default_random_engine(1701));
|
||||
}
|
||||
LOG(INFO) << "A total of " << lines.size() << " images.";
|
||||
|
||||
|
||||
LOG(INFO) << "Opening db " << output_db_name;
|
||||
std::unique_ptr<db::DB> db(db::CreateDB(FLAGS_db, output_db_name, db::NEW));
|
||||
std::unique_ptr<db::Transaction> transaction(db->NewTransaction());
|
||||
|
||||
TensorProtos protos;
|
||||
TensorProto* data = protos.add_protos();
|
||||
TensorProto* label = protos.add_protos();
|
||||
if (FLAGS_raw) {
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
data->add_dims(0);
|
||||
data->add_dims(0);
|
||||
if (FLAGS_color) {
|
||||
data->add_dims(3);
|
||||
}
|
||||
} else {
|
||||
data->set_data_type(TensorProto::STRING);
|
||||
data->add_dims(1);
|
||||
data->add_string_data("");
|
||||
}
|
||||
label->set_data_type(TensorProto::INT32);
|
||||
label->add_dims(1);
|
||||
label->add_int32_data(0);
|
||||
const int kMaxKeyLength = 256;
|
||||
char key_cstr[kMaxKeyLength];
|
||||
string value;
|
||||
int count = 0;
|
||||
|
||||
for (int item_id = 0; item_id < lines.size(); ++item_id) {
|
||||
// First, set label.
|
||||
label->set_int32_data(0, lines[item_id].second);
|
||||
if (!FLAGS_raw) {
|
||||
// Second, read images.
|
||||
std::ifstream image_file_stream(input_folder + lines[item_id].first);
|
||||
data->mutable_string_data(0)->assign(
|
||||
(std::istreambuf_iterator<char>(image_file_stream)),
|
||||
std::istreambuf_iterator<char>());
|
||||
} else {
|
||||
// Need to do some opencv magic.
|
||||
cv::Mat img = cv::imread(
|
||||
input_folder + lines[item_id].first,
|
||||
FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
// Do resizing.
|
||||
cv::Mat resized_img;
|
||||
int scaled_width, scaled_height;
|
||||
if (FLAGS_warp) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = FLAGS_scale;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = FLAGS_scale;
|
||||
scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
|
||||
} else {
|
||||
scaled_height = FLAGS_scale;
|
||||
scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
|
||||
}
|
||||
cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
|
||||
cv::INTER_LINEAR);
|
||||
data->set_dims(0, scaled_height);
|
||||
data->set_dims(1, scaled_width);
|
||||
DCHECK(resized_img.isContinuous());
|
||||
data->set_byte_data(
|
||||
resized_img.ptr(),
|
||||
scaled_height * scaled_width * (FLAGS_color ? 3 : 1));
|
||||
}
|
||||
snprintf(key_cstr, kMaxKeyLength, "%08d_%s", item_id,
|
||||
lines[item_id].first.c_str());
|
||||
protos.SerializeToString(&value);
|
||||
// Put in db
|
||||
transaction->Put(string(key_cstr), value);
|
||||
if (++count % 1000 == 0) {
|
||||
// Commit the current writes.
|
||||
transaction->Commit();
|
||||
LOG(INFO) << "Processed " << count << " files.";
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "Processed a total of " << count << " files.";
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Converts an image dataset to a db.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
caffe2::ConvertImageDataset(
|
||||
FLAGS_input_folder, FLAGS_list_file,
|
||||
FLAGS_output_db_name, FLAGS_shuffle);
|
||||
return 0;
|
||||
}
|
123
caffe2/binaries/make_mnist_db.cc
Normal file
123
caffe2/binaries/make_mnist_db.cc
Normal file
@ -0,0 +1,123 @@
|
||||
// This script converts the MNIST dataset to leveldb.
|
||||
// The MNIST dataset could be downloaded at
|
||||
// http://yann.lecun.com/exdb/mnist/
|
||||
|
||||
#include <fstream> // NOLINT(readability/streams)
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(image_file, "", "The input image file name.");
|
||||
DEFINE_string(label_file, "", "The label file name.");
|
||||
DEFINE_string(output_file, "", "The output db name.");
|
||||
DEFINE_string(db, "leveldb", "The db type.");
|
||||
DEFINE_int32(data_limit, -1,
|
||||
"If set, only output this number of data points.");
|
||||
DEFINE_bool(channel_first, false,
|
||||
"If set, write the data as channel-first (CHW order) as the old "
|
||||
"Caffe does.");
|
||||
|
||||
namespace caffe2 {
|
||||
uint32_t swap_endian(uint32_t val) {
|
||||
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
|
||||
return (val << 16) | (val >> 16);
|
||||
}
|
||||
|
||||
void convert_dataset(const char* image_filename, const char* label_filename,
|
||||
const char* db_path, const int data_limit) {
|
||||
// Open files
|
||||
std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
|
||||
std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
|
||||
CHECK(image_file) << "Unable to open file " << image_filename;
|
||||
CHECK(label_file) << "Unable to open file " << label_filename;
|
||||
// Read the magic and the meta data
|
||||
uint32_t magic;
|
||||
uint32_t num_items;
|
||||
uint32_t num_labels;
|
||||
uint32_t rows;
|
||||
uint32_t cols;
|
||||
|
||||
image_file.read(reinterpret_cast<char*>(&magic), 4);
|
||||
magic = swap_endian(magic);
|
||||
CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
|
||||
label_file.read(reinterpret_cast<char*>(&magic), 4);
|
||||
magic = swap_endian(magic);
|
||||
CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
|
||||
image_file.read(reinterpret_cast<char*>(&num_items), 4);
|
||||
num_items = swap_endian(num_items);
|
||||
label_file.read(reinterpret_cast<char*>(&num_labels), 4);
|
||||
num_labels = swap_endian(num_labels);
|
||||
CHECK_EQ(num_items, num_labels);
|
||||
image_file.read(reinterpret_cast<char*>(&rows), 4);
|
||||
rows = swap_endian(rows);
|
||||
image_file.read(reinterpret_cast<char*>(&cols), 4);
|
||||
cols = swap_endian(cols);
|
||||
|
||||
// leveldb
|
||||
std::unique_ptr<db::DB> mnist_db(db::CreateDB(FLAGS_db, db_path, db::NEW));
|
||||
std::unique_ptr<db::Transaction> transaction(mnist_db->NewTransaction());
|
||||
// Storing to db
|
||||
char label_value;
|
||||
std::vector<char> pixels(rows * cols);
|
||||
int count = 0;
|
||||
const int kMaxKeyLength = 10;
|
||||
char key_cstr[kMaxKeyLength];
|
||||
string value;
|
||||
|
||||
TensorProtos protos;
|
||||
TensorProto* data = protos.add_protos();
|
||||
TensorProto* label = protos.add_protos();
|
||||
data->set_data_type(TensorProto::BYTE);
|
||||
if (FLAGS_channel_first) {
|
||||
data->add_dims(1);
|
||||
data->add_dims(1);
|
||||
data->add_dims(rows);
|
||||
data->add_dims(cols);
|
||||
} else {
|
||||
data->add_dims(1);
|
||||
data->add_dims(rows);
|
||||
data->add_dims(cols);
|
||||
data->add_dims(1);
|
||||
}
|
||||
label->set_data_type(TensorProto::INT32);
|
||||
label->add_dims(1);
|
||||
label->add_int32_data(0);
|
||||
|
||||
LOG(INFO) << "A total of " << num_items << " items.";
|
||||
LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
|
||||
for (int item_id = 0; item_id < num_items; ++item_id) {
|
||||
image_file.read(pixels.data(), rows * cols);
|
||||
label_file.read(&label_value, 1);
|
||||
for (int i = 0; i < rows * cols; ++i) {
|
||||
data->set_byte_data(pixels.data(), rows * cols);
|
||||
}
|
||||
label->set_int32_data(0, static_cast<int>(label_value));
|
||||
snprintf(key_cstr, kMaxKeyLength, "%08d", item_id);
|
||||
protos.SerializeToString(&value);
|
||||
string keystr(key_cstr);
|
||||
|
||||
// Put in db
|
||||
transaction->Put(keystr, value);
|
||||
if (++count % 1000 == 0) {
|
||||
transaction->Commit();
|
||||
}
|
||||
if (data_limit > 0 && count == data_limit) {
|
||||
LOG(INFO) << "Reached data limit of " << data_limit << ", stop.";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace caffe2
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Converts the raw mnist dataset to a leveldb.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
caffe2::convert_dataset(FLAGS_image_file.c_str(), FLAGS_label_file.c_str(),
|
||||
FLAGS_output_file.c_str(), FLAGS_data_limit);
|
||||
return 0;
|
||||
}
|
11
caffe2/binaries/print_registered_core_operators.cc
Normal file
11
caffe2/binaries/print_registered_core_operators.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
std::cout << "CPU operator registry:" << std::endl;
|
||||
caffe2::CPUOperatorRegistry()->TEST_PrintRegisteredNames();
|
||||
std::cout << "CUDA operator registry:" << std::endl;
|
||||
caffe2::CUDAOperatorRegistry()->TEST_PrintRegisteredNames();
|
||||
}
|
54
caffe2/binaries/run_client.cc
Normal file
54
caffe2/binaries/run_client.cc
Normal file
@ -0,0 +1,54 @@
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
|
||||
#include "caffe2/core/client.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(client_file, "", "The given path to the client protobuffer.");
|
||||
DEFINE_string(output_file, "", "The output file.");
|
||||
DEFINE_int32(input_size, 0, "The input size.");
|
||||
DEFINE_int32(iter, 0, "The number of iterations for timing.");
|
||||
DEFINE_string(input_file, "",
|
||||
"The input file containing a list of float numbers.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Runs a given client.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
LOG(INFO) << "Loading client file: " << FLAGS_client_file;
|
||||
caffe2::Client client(FLAGS_client_file);
|
||||
std::vector<float> input;
|
||||
if (FLAGS_input_file.size()) {
|
||||
std::ifstream infile;
|
||||
infile.open(FLAGS_input_file, std::ios::in);
|
||||
float value;
|
||||
while (infile >> value) {
|
||||
input.push_back(value);
|
||||
}
|
||||
} else {
|
||||
input.resize(FLAGS_input_size);
|
||||
}
|
||||
LOG(INFO) << "An input of " << input.size() << " values.";
|
||||
std::vector<float> output;
|
||||
CHECK(client.Run(input, &output));
|
||||
clock_t start = clock();
|
||||
for (int i = 0; i < FLAGS_iter; ++i) {
|
||||
CHECK(client.Run(input, &output));
|
||||
}
|
||||
LOG(INFO) << "Timing: "<< FLAGS_iter << " iters took "
|
||||
<< static_cast<float>(clock() - start) / CLOCKS_PER_SEC
|
||||
<< " seconds.";
|
||||
LOG(INFO) << "Output: " << output.size() << " dims.";
|
||||
if (FLAGS_output_file.size()) {
|
||||
std::ofstream outfile;
|
||||
outfile.open(FLAGS_output_file, std::ios::out | std::ios::trunc);
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
outfile << output[i] << std::endl;
|
||||
}
|
||||
outfile.close();
|
||||
}
|
||||
// This is to allow us to use memory leak checks.
|
||||
google::ShutDownCommandLineFlags();
|
||||
return 0;
|
||||
}
|
23
caffe2/binaries/run_plan.cc
Normal file
23
caffe2/binaries/run_plan.cc
Normal file
@ -0,0 +1,23 @@
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Runs a given plan.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
LOG(INFO) << "Loading plan: " << FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
// This is to allow us to use memory leak checks.
|
||||
google::protobuf::ShutdownProtobufLibrary();
|
||||
google::ShutDownCommandLineFlags();
|
||||
return 0;
|
||||
}
|
27
caffe2/binaries/run_plan_mpi.cc
Normal file
27
caffe2/binaries/run_plan_mpi.cc
Normal file
@ -0,0 +1,27 @@
|
||||
#include <mpi.h>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(plan, "", "The given path to the plan protobuffer.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
MPI_Init(&argc, &argv);
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage("Runs a given plan.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
LOG(INFO) << "Loading plan: " << FLAGS_plan;
|
||||
caffe2::PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(FLAGS_plan, &plan_def));
|
||||
std::unique_ptr<caffe2::Workspace> workspace(new caffe2::Workspace());
|
||||
workspace->RunPlan(plan_def);
|
||||
|
||||
// This is to allow us to use memory leak checks.
|
||||
google::protobuf::ShutdownProtobufLibrary();
|
||||
google::ShutDownCommandLineFlags();
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
52
caffe2/binaries/split_db.cc
Normal file
52
caffe2/binaries/split_db.cc
Normal file
@ -0,0 +1,52 @@
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
DEFINE_string(input_db, "", "The input db.");
|
||||
DEFINE_int32(splits, 0, "The number of splits.");
|
||||
DEFINE_string(db_type, "", "The db type.");
|
||||
DEFINE_int32(batch_size, 1000, "The write batch size.");
|
||||
|
||||
using caffe2::db::Cursor;
|
||||
using caffe2::db::DB;
|
||||
using caffe2::db::Transaction;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::SetUsageMessage(
|
||||
"This script converts databases between different formats.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
std::unique_ptr<DB> in_db(caffe2::db::CreateDB(
|
||||
FLAGS_db_type, FLAGS_input_db, caffe2::db::READ));
|
||||
std::unique_ptr<Cursor> cursor(in_db->NewCursor());
|
||||
|
||||
CHECK_GT(FLAGS_splits, 0) << "Must specify the number of splits.";
|
||||
std::vector<std::unique_ptr<DB> > out_dbs;
|
||||
std::vector<std::unique_ptr<Transaction> > transactions;
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
out_dbs.push_back(
|
||||
std::unique_ptr<DB>(caffe2::db::CreateDB(
|
||||
FLAGS_db_type, FLAGS_input_db + "_split_" + std::to_string(i),
|
||||
caffe2::db::NEW)));
|
||||
transactions.push_back(
|
||||
std::unique_ptr<Transaction>(out_dbs[i]->NewTransaction()));
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
transactions[count % FLAGS_splits]->Put(cursor->key(), cursor->value());
|
||||
if (++count % FLAGS_batch_size == 0) {
|
||||
for (int i = 0; i < FLAGS_splits; ++i) {
|
||||
transactions[i]->Commit();
|
||||
}
|
||||
LOG(INFO) << "Splitted " << count << " items so far.";
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "A total of " << count << " items processed.";
|
||||
return 0;
|
||||
}
|
94
caffe2/core/BREW
Normal file
94
caffe2/core/BREW
Normal file
@ -0,0 +1,94 @@
|
||||
cc_library(
|
||||
name = "core",
|
||||
srcs = [
|
||||
"client.cc",
|
||||
"db.cc",
|
||||
"minidb.cc",
|
||||
"net.cc",
|
||||
"operator.cc",
|
||||
"typeid.cc",
|
||||
"workspace.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"blob.h",
|
||||
"client.h",
|
||||
"common.h",
|
||||
"context.h",
|
||||
"db.h",
|
||||
"net.h",
|
||||
"operator.h",
|
||||
"registry.h",
|
||||
"typeid.h",
|
||||
"types.h",
|
||||
"workspace.h"
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/proto:caffe2_proto",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//caffe2/utils:simple_queue",
|
||||
"//third_party/glog:glog",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cuda_library(
|
||||
name = "core_gpu",
|
||||
srcs = [
|
||||
"common_gpu.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"common_gpu.h",
|
||||
"context_gpu.h",
|
||||
],
|
||||
deps = [
|
||||
":core",
|
||||
]
|
||||
)
|
||||
|
||||
cc_headers(
|
||||
name = "core_cudnn",
|
||||
srcs = [
|
||||
"common_cudnn.h",
|
||||
],
|
||||
deps = [
|
||||
"//third_party/cudnn:cudnn",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "core_test",
|
||||
srcs = [
|
||||
"blob_test.cc",
|
||||
"context_test.cc",
|
||||
"operator_test.cc",
|
||||
"parallel_net_test.cc",
|
||||
"workspace_test.cc"
|
||||
],
|
||||
deps = [
|
||||
":core",
|
||||
"//gtest:gtest",
|
||||
"//gtest:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "core_test_gpu",
|
||||
srcs = [
|
||||
"blob_test_gpu.cc",
|
||||
],
|
||||
deps = [
|
||||
":core_gpu",
|
||||
"//gtest:gtest",
|
||||
"//gtest:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "registry_test",
|
||||
srcs = ["registry_test.cc"],
|
||||
deps = [
|
||||
":core",
|
||||
"//gtest:gtest",
|
||||
"//gtest:gtest_main",
|
||||
],
|
||||
)
|
209
caffe2/core/blob.h
Normal file
209
caffe2/core/blob.h
Normal file
@ -0,0 +1,209 @@
|
||||
#ifndef CAFFE2_CORE_BLOB_H_
|
||||
#define CAFFE2_CORE_BLOB_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/typeid.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace internal {
|
||||
// Destroy is a templated function that allows us to memorize the type of the
|
||||
// pointer we are storing in a void*.
|
||||
template <class T>
|
||||
void Destroy(void* pointer) {
|
||||
delete static_cast<T*>(pointer);
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
// Blob is a general container that hosts a pointer as well as checking its
|
||||
// type, and takes charge of deleting it when the blob is deallocated. A blob
|
||||
// could contain ANYTHING, although the most common case is to contain a Tensor.
|
||||
class Blob {
|
||||
public:
|
||||
typedef void (*DestroyCall)(void *);
|
||||
|
||||
Blob() : id_(internal::gUnknownType), pointer_(nullptr) {}
|
||||
|
||||
~Blob() { Reset(); }
|
||||
|
||||
template <class T>
|
||||
inline bool IsType() const { return internal::IsTypeId<T>(id_); }
|
||||
inline string TypeName() const { return internal::TypeName(id_); }
|
||||
template <class T>
|
||||
const T& Get() const {
|
||||
CHECK(IsType<T>()) << "wrong type for the Blob instance. Expected "
|
||||
<< internal::TypeName<T>() << " got "
|
||||
<< internal::TypeName(id_);
|
||||
return *static_cast<const T*>(pointer_);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T* GetMutable() {
|
||||
if (!IsType<T>()) {
|
||||
VLOG(1) << "Create new mutable object " << internal::TypeName<T>();
|
||||
if (pointer_) destroy_(pointer_);
|
||||
// If we are not of the right type, create a new instance.
|
||||
pointer_ = static_cast<void*>(new T());
|
||||
destroy_ = &internal::Destroy<T>;
|
||||
}
|
||||
id_ = internal::GetTypeId<T>();
|
||||
return static_cast<T*>(pointer_);
|
||||
}
|
||||
|
||||
inline void Reset() {
|
||||
if (pointer_) {
|
||||
destroy_(pointer_);
|
||||
pointer_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
internal::TypeId id_;
|
||||
void* pointer_;
|
||||
DestroyCall destroy_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Blob);
|
||||
};
|
||||
|
||||
|
||||
template <typename dtype, class Context>
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() : ndim_(0), size_(0), data_(nullptr),
|
||||
own_data_(true), data_source_(nullptr) {}
|
||||
|
||||
// Creates a tensor. The actual data allocation is going to be carried out
|
||||
// till the first time mutable_data() is called, so there is no overhead of
|
||||
// creating multiple tensors just as placeholders (although I haven't got a
|
||||
// clear idea where such cases would happen).
|
||||
explicit Tensor(const vector<int>& dims)
|
||||
: data_(nullptr), own_data_(true), data_source_(nullptr) {
|
||||
Reshape(dims);
|
||||
}
|
||||
|
||||
template <class SrcContext>
|
||||
Tensor(const Tensor<dtype, SrcContext>& src, Context* context)
|
||||
: data_(nullptr), own_data_(true), data_source_(nullptr) {
|
||||
Reshape(src.dims());
|
||||
context->template Copy<dtype, Context, SrcContext>(
|
||||
mutable_data(), src.data(), src.size());
|
||||
}
|
||||
|
||||
// Creates a tensor, and fills its contents with the given values. We need to
|
||||
// have a context passed in as the copy function is device dependent.
|
||||
Tensor(const vector<int>& dims, vector<dtype> values, Context* context)
|
||||
: data_(nullptr), own_data_(true), data_source_(nullptr) {
|
||||
Reshape(dims);
|
||||
CHECK_EQ(values.size(), size_);
|
||||
context->template Copy<dtype, Context, CPUContext>(
|
||||
mutable_data(), values.data(), values.size());
|
||||
}
|
||||
|
||||
// Special case of above: create a tensor of shape 1, and the given value.
|
||||
Tensor(const dtype& value, Context* context)
|
||||
: data_(nullptr), own_data_(true), data_source_(nullptr) {
|
||||
Reshape(std::vector<int>(1, 1));
|
||||
context->template Copy<dtype, Context, CPUContext>(
|
||||
mutable_data(), &value, 1);
|
||||
}
|
||||
|
||||
virtual ~Tensor() {
|
||||
Free();
|
||||
}
|
||||
|
||||
void Reshape(const vector<int>& dims) {
|
||||
CHECK_GT(dims.size(), 0);
|
||||
dims_ = dims;
|
||||
ndim_ = dims_.size();
|
||||
// Calculate the size.
|
||||
int new_size = 1;
|
||||
for (int d : dims_) {
|
||||
CHECK_GT(d, 0);
|
||||
new_size *= d;
|
||||
}
|
||||
// If the size changes, we will call Free(). The next data() call will
|
||||
// re-allocate the memory.
|
||||
if (data_ && size_ != new_size) {
|
||||
Free();
|
||||
}
|
||||
size_ = new_size;
|
||||
}
|
||||
|
||||
template <typename other_type, class OtherContext>
|
||||
inline void ReshapeLike(const Tensor<other_type, OtherContext>& src_tensor) {
|
||||
Reshape(src_tensor.dims());
|
||||
}
|
||||
|
||||
void ShareData(const Tensor& src) {
|
||||
// To share data, the sizes must be equal.
|
||||
CHECK_EQ(src.size_, size_)
|
||||
<< "Size mismatch - did you call reshape before sharing the data?";
|
||||
if (data_) Free();
|
||||
own_data_ = false;
|
||||
data_source_ = &src;
|
||||
}
|
||||
|
||||
inline int ndim() const { return ndim_; }
|
||||
inline int size() const { return size_; }
|
||||
inline const vector<int>& dims() const { return dims_; }
|
||||
inline int dim(const int i) const {
|
||||
CHECK_LT(i, ndim_) << "Exceeding ndim limit " << ndim_;
|
||||
CHECK_GE(i, 0) << "Cannot have negative index";
|
||||
return dims_[i];
|
||||
}
|
||||
|
||||
const dtype* data() const {
|
||||
if (own_data_) {
|
||||
CHECK_NOTNULL(data_);
|
||||
return data_;
|
||||
} else {
|
||||
CHECK_NOTNULL(data_source_);
|
||||
CHECK_EQ(data_source_->size_, size_) << "Source data size has changed.";
|
||||
CHECK_NOTNULL(data_source_->data());
|
||||
return data_source_->data();
|
||||
}
|
||||
}
|
||||
|
||||
dtype* mutable_data() {
|
||||
CHECK(own_data_) << "Cannot call mutable_data() from a shared tensor.";
|
||||
CHECK_GT(size_, 0) << "Cannot call mutable_data on a size 0 tensor.";
|
||||
if (!data_) Allocate();
|
||||
CHECK_NOTNULL(data_);
|
||||
return data_;
|
||||
}
|
||||
|
||||
void Allocate() {
|
||||
CHECK(data_ == nullptr);
|
||||
CHECK_GT(size_, 0);
|
||||
data_ = static_cast<dtype*>(Context::New(size_ * sizeof(dtype)));
|
||||
}
|
||||
|
||||
void Free() {
|
||||
if (own_data_) {
|
||||
if (data_) {
|
||||
Context::Delete(data_);
|
||||
}
|
||||
}
|
||||
own_data_ = true;
|
||||
data_ = nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
int ndim_;
|
||||
vector<int> dims_;
|
||||
int size_;
|
||||
dtype* data_;
|
||||
bool own_data_;
|
||||
const Tensor* data_source_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Tensor);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
#endif // CAFFE2_CORE_BLOB_H_
|
186
caffe2/core/blob_test.cc
Normal file
186
caffe2/core/blob_test.cc
Normal file
@ -0,0 +1,186 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using namespace internal; // NOLINT
|
||||
|
||||
class Foo {};
|
||||
class Bar {};
|
||||
|
||||
TEST(BlobTest, TypeId) {
|
||||
TypeId int_id = GetTypeId<int>();
|
||||
TypeId float_id = GetTypeId<float>();
|
||||
TypeId foo_id = GetTypeId<Foo>();
|
||||
TypeId bar_id = GetTypeId<Bar>();
|
||||
EXPECT_NE(int_id, float_id);
|
||||
EXPECT_NE(float_id, foo_id);
|
||||
EXPECT_NE(foo_id, bar_id);
|
||||
EXPECT_TRUE(IsTypeId<int>(int_id));
|
||||
EXPECT_TRUE(IsTypeId<float>(float_id));
|
||||
EXPECT_TRUE(IsTypeId<Foo>(foo_id));
|
||||
EXPECT_TRUE(IsTypeId<Bar>(bar_id));
|
||||
EXPECT_FALSE(IsTypeId<int>(float_id));
|
||||
EXPECT_FALSE(IsTypeId<int>(foo_id));
|
||||
EXPECT_FALSE(IsTypeId<Foo>(int_id));
|
||||
EXPECT_FALSE(IsTypeId<Foo>(bar_id));
|
||||
}
|
||||
|
||||
TEST(BlobTest, Blob) {
|
||||
Blob blob;
|
||||
|
||||
int* int_unused UNUSED_VARIABLE = blob.GetMutable<int>();
|
||||
EXPECT_TRUE(blob.IsType<int>());
|
||||
EXPECT_FALSE(blob.IsType<Foo>());
|
||||
|
||||
Foo* foo_unused UNUSED_VARIABLE = blob.GetMutable<Foo>();
|
||||
EXPECT_TRUE(blob.IsType<Foo>());
|
||||
EXPECT_FALSE(blob.IsType<int>());
|
||||
}
|
||||
|
||||
TEST(BlobDeathTest, BlobUninitialized) {
|
||||
Blob blob;
|
||||
ASSERT_DEATH(blob.Get<int>(), ".*wrong type for the Blob instance.*");
|
||||
}
|
||||
|
||||
TEST(BlobDeathTest, BlobWrongType) {
|
||||
Blob blob;
|
||||
Foo* foo_unused UNUSED_VARIABLE = blob.GetMutable<Foo>();
|
||||
EXPECT_TRUE(blob.IsType<Foo>());
|
||||
EXPECT_FALSE(blob.IsType<int>());
|
||||
// When not null, we should only call with the right type.
|
||||
EXPECT_NE(&blob.Get<Foo>(), nullptr);
|
||||
ASSERT_DEATH(blob.Get<int>(), ".*wrong type for the Blob instance.*");
|
||||
}
|
||||
|
||||
template <typename dtype> class TensorCPUTest : public ::testing::Test {};
|
||||
template <typename dtype> class TensorCPUDeathTest : public ::testing::Test {};
|
||||
typedef ::testing::Types<char, int, float> TensorTypes;
|
||||
TYPED_TEST_CASE(TensorCPUTest, TensorTypes);
|
||||
TYPED_TEST_CASE(TensorCPUDeathTest, TensorTypes);
|
||||
|
||||
TYPED_TEST(TensorCPUTest, TensorInitializedEmpty) {
|
||||
Tensor<TypeParam, CPUContext> tensor;
|
||||
EXPECT_EQ(tensor.ndim(), 0);
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 3);
|
||||
EXPECT_EQ(tensor.dim(0), 2);
|
||||
EXPECT_EQ(tensor.dim(1), 3);
|
||||
EXPECT_EQ(tensor.dim(2), 5);
|
||||
EXPECT_EQ(tensor.size(), 2 * 3 * 5);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUTest, TensorInitializedNonEmpty) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CPUContext> tensor(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 3);
|
||||
EXPECT_EQ(tensor.dim(0), 2);
|
||||
EXPECT_EQ(tensor.dim(1), 3);
|
||||
EXPECT_EQ(tensor.dim(2), 5);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
dims[0] = 7;
|
||||
dims[1] = 11;
|
||||
dims[2] = 13;
|
||||
dims.push_back(17);
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 4);
|
||||
EXPECT_EQ(tensor.dim(0), 7);
|
||||
EXPECT_EQ(tensor.dim(1), 11);
|
||||
EXPECT_EQ(tensor.dim(2), 13);
|
||||
EXPECT_EQ(tensor.dim(3), 17);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUTest, TensorShareData) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CPUContext> tensor(dims);
|
||||
Tensor<TypeParam, CPUContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
EXPECT_TRUE(other_tensor.data() != nullptr);
|
||||
EXPECT_EQ(tensor.data(), other_tensor.data());
|
||||
// Set one value, check the other
|
||||
for (int i = 0; i < tensor.size(); ++i) {
|
||||
tensor.mutable_data()[i] = i;
|
||||
EXPECT_EQ(other_tensor.data()[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUTest, TensorShareDataCanUseDifferentShapes) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
vector<int> alternate_dims(1);
|
||||
alternate_dims[0] = 2 * 3 * 5;
|
||||
Tensor<TypeParam, CPUContext> tensor(dims);
|
||||
Tensor<TypeParam, CPUContext> other_tensor(alternate_dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
EXPECT_EQ(other_tensor.ndim(), 1);
|
||||
EXPECT_EQ(other_tensor.dim(0), alternate_dims[0]);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
EXPECT_TRUE(other_tensor.data() != nullptr);
|
||||
EXPECT_EQ(tensor.data(), other_tensor.data());
|
||||
// Set one value, check the other
|
||||
for (int i = 0; i < tensor.size(); ++i) {
|
||||
tensor.mutable_data()[i] = i;
|
||||
EXPECT_EQ(other_tensor.data()[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUDeathTest, ShareDataCannotInitializeDataFromSharedTensor) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CPUContext> tensor(dims);
|
||||
Tensor<TypeParam, CPUContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
ASSERT_DEATH(other_tensor.mutable_data(), "");
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUDeathTest, CannotDoReshapewithAlias) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CPUContext> tensor(dims);
|
||||
Tensor<TypeParam, CPUContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
dims[0] = 7;
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
ASSERT_DEATH(other_tensor.data(), ".*Source data size has changed..*");
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorCPUDeathTest, CannotAccessDataWhenEmpty) {
|
||||
Tensor<TypeParam, CPUContext> tensor;
|
||||
EXPECT_EQ(tensor.ndim(), 0);
|
||||
ASSERT_DEATH(tensor.data(), ".*Check failed: 'data_' Must be non NULL.*");
|
||||
}
|
||||
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
109
caffe2/core/blob_test_gpu.cc
Normal file
109
caffe2/core/blob_test_gpu.cc
Normal file
@ -0,0 +1,109 @@
|
||||
#include <iostream> // NOLINT
|
||||
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype> class TensorGPUTest : public ::testing::Test {};
|
||||
template <typename dtype> class TensorGPUDeathTest : public ::testing::Test {};
|
||||
typedef ::testing::Types<char, int, float> TensorTypes;
|
||||
TYPED_TEST_CASE(TensorGPUTest, TensorTypes);
|
||||
TYPED_TEST_CASE(TensorGPUDeathTest, TensorTypes);
|
||||
|
||||
TYPED_TEST(TensorGPUTest, TensorInitializedEmpty) {
|
||||
Tensor<TypeParam, CUDAContext> tensor;
|
||||
EXPECT_EQ(tensor.ndim(), 0);
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 3);
|
||||
EXPECT_EQ(tensor.dim(0), 2);
|
||||
EXPECT_EQ(tensor.dim(1), 3);
|
||||
EXPECT_EQ(tensor.dim(2), 5);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorGPUTest, TensorInitializedNonEmpty) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CUDAContext> tensor(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 3);
|
||||
EXPECT_EQ(tensor.dim(0), 2);
|
||||
EXPECT_EQ(tensor.dim(1), 3);
|
||||
EXPECT_EQ(tensor.dim(2), 5);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
dims[0] = 7;
|
||||
dims[1] = 11;
|
||||
dims[2] = 13;
|
||||
dims.push_back(17);
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_EQ(tensor.ndim(), 4);
|
||||
EXPECT_EQ(tensor.dim(0), 7);
|
||||
EXPECT_EQ(tensor.dim(1), 11);
|
||||
EXPECT_EQ(tensor.dim(2), 13);
|
||||
EXPECT_EQ(tensor.dim(3), 17);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorGPUTest, TensorShareData) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CUDAContext> tensor(dims);
|
||||
Tensor<TypeParam, CUDAContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
EXPECT_TRUE(tensor.data() != nullptr);
|
||||
EXPECT_TRUE(other_tensor.data() != nullptr);
|
||||
EXPECT_EQ(tensor.data(), other_tensor.data());
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorGPUDeathTest, ShareDataCannotInitializeDataFromSharedTensor) {
|
||||
::testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CUDAContext> tensor(dims);
|
||||
Tensor<TypeParam, CUDAContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
ASSERT_DEATH(other_tensor.mutable_data(), "");
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorGPUDeathTest, CannotDoReshapewithAlias) {
|
||||
::testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 5;
|
||||
Tensor<TypeParam, CUDAContext> tensor(dims);
|
||||
Tensor<TypeParam, CUDAContext> other_tensor(dims);
|
||||
other_tensor.ShareData(tensor);
|
||||
dims[0] = 7;
|
||||
tensor.Reshape(dims);
|
||||
EXPECT_TRUE(tensor.mutable_data() != nullptr);
|
||||
ASSERT_DEATH(other_tensor.data(), "Source data size has changed.");
|
||||
}
|
||||
|
||||
TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) {
|
||||
::testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
Tensor<TypeParam, CUDAContext> tensor;
|
||||
EXPECT_EQ(tensor.ndim(), 0);
|
||||
ASSERT_DEATH(tensor.data(), "Check failed: 'data_' Must be non NULL");
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
40
caffe2/core/client.cc
Normal file
40
caffe2/core/client.cc
Normal file
@ -0,0 +1,40 @@
|
||||
#include "caffe2/core/client.h"
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
Client::Client(const string& client_def_name) : workspace_(new Workspace()) {
|
||||
SimpleClientDef client_def;
|
||||
CHECK(ReadProtoFromFile(client_def_name, &client_def));
|
||||
workspace_->RunNetOnce(client_def.init_net());
|
||||
client_def.mutable_main_net()->set_name("main");
|
||||
CHECK(workspace_->CreateNet(client_def.main_net()));
|
||||
input_blob_ = workspace_->GetBlob(client_def.input());
|
||||
output_blob_ = workspace_->GetBlob(client_def.output());
|
||||
CHECK(input_blob_ != nullptr);
|
||||
CHECK(output_blob_ != nullptr);
|
||||
}
|
||||
|
||||
Client::~Client() {
|
||||
delete workspace_;
|
||||
}
|
||||
|
||||
bool Client::Run(const vector<float>& input, vector<float>* output) {
|
||||
Tensor<float, CPUContext>* input_tensor =
|
||||
input_blob_->GetMutable<Tensor<float, CPUContext> >();
|
||||
CHECK_EQ(input_tensor->size(), input.size());
|
||||
memcpy(input_tensor->mutable_data(), input.data(),
|
||||
input.size() * sizeof(float));
|
||||
workspace_->RunNet("main");
|
||||
const Tensor<float, CPUContext>& output_tensor =
|
||||
output_blob_->Get<Tensor<float, CPUContext> >();
|
||||
output->resize(output_tensor.size());
|
||||
memcpy(output->data(), output_tensor.data(), output->size() * sizeof(float));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
41
caffe2/core/client.h
Normal file
41
caffe2/core/client.h
Normal file
@ -0,0 +1,41 @@
|
||||
// Client is a very thin wrapper over a Caffe2 interface, allowing us to do
|
||||
// a very primitive caffe network call without the need of revealing all
|
||||
// the header files inside Caffe2. Also, what we are going to deal with is
|
||||
// always float inputs and float outputs, and the input and output shapes
|
||||
// should be fixed. This is minimal and is only used by Yangqing to deal
|
||||
// with quick demo cases.
|
||||
|
||||
#ifndef CAFFE2_CORE_CLIENT_H_
|
||||
#define CAFFE2_CORE_CLIENT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Forward declaration of a Caffe workspace.
|
||||
class Blob;
|
||||
class Workspace;
|
||||
|
||||
// Workspace is a class that holds all the blobs in this run and also runs
|
||||
// the operators.
|
||||
class Client {
|
||||
public:
|
||||
explicit Client(const std::string& client_def_name);
|
||||
~Client();
|
||||
|
||||
// TODO(Yangqing): Figure out how we can deal with different types of
|
||||
// inputs.
|
||||
bool Run(const std::vector<float>& input, std::vector<float>* output);
|
||||
|
||||
private:
|
||||
// TODO(Yangqing): Are we really going to share workspaces? If not, let's
|
||||
// remove this unnecessity.
|
||||
Workspace* workspace_;
|
||||
Blob* input_blob_;
|
||||
Blob* output_blob_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_CLIENT_H_
|
42
caffe2/core/common.h
Normal file
42
caffe2/core/common.h
Normal file
@ -0,0 +1,42 @@
|
||||
#ifndef CAFFE2_CORE_COMMON_H_
|
||||
#define CAFFE2_CORE_COMMON_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
// Note(Yangqing): NVCC does not play well with unordered_map on some platforms,
|
||||
// forcing us to use std::map instead of unordered_map. This may affect speed
|
||||
// in some cases, but in most of the computation code we do not access map very
|
||||
// often, so it should be fine for us. I am putting a CaffeMap alias so we can
|
||||
// change it more easily if things work out for unordered_map down the road.
|
||||
template <typename Key, typename Value>
|
||||
using CaffeMap = std::map<Key, Value>;
|
||||
// using CaffeMap = std::unordered_map;
|
||||
using std::vector;
|
||||
|
||||
// Just in order to mark things as not implemented. Do not use in final code.
|
||||
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented."
|
||||
|
||||
// suppress an unused variable.
|
||||
#define UNUSED_VARIABLE __attribute__((unused))
|
||||
|
||||
// Disable the copy and assignment operator for a class. Note that this will
|
||||
// disable the usage of the class in std containers.
|
||||
#define DISABLE_COPY_AND_ASSIGN(classname) \
|
||||
private: \
|
||||
classname(const classname&); \
|
||||
classname& operator=(const classname&)
|
||||
|
||||
|
||||
inline string GetGradientName(const string& name) {
|
||||
return name + ".grad";
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
#endif // CAFFE2_CORE_COMMON_H_
|
162
caffe2/core/common_cudnn.h
Normal file
162
caffe2/core/common_cudnn.h
Normal file
@ -0,0 +1,162 @@
|
||||
#ifndef CAFFE2_CORE_COMMON_CUDNN_H_
|
||||
#define CAFFE2_CORE_COMMON_CUDNN_H_
|
||||
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/types.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "cudnn.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace internal {
|
||||
inline const char* cudnnGetErrorString(cudnnStatus_t status) {
|
||||
switch (status) {
|
||||
case CUDNN_STATUS_SUCCESS:
|
||||
return "CUDNN_STATUS_SUCCESS";
|
||||
case CUDNN_STATUS_NOT_INITIALIZED:
|
||||
return "CUDNN_STATUS_NOT_INITIALIZED";
|
||||
case CUDNN_STATUS_ALLOC_FAILED:
|
||||
return "CUDNN_STATUS_ALLOC_FAILED";
|
||||
case CUDNN_STATUS_BAD_PARAM:
|
||||
return "CUDNN_STATUS_BAD_PARAM";
|
||||
case CUDNN_STATUS_INTERNAL_ERROR:
|
||||
return "CUDNN_STATUS_INTERNAL_ERROR";
|
||||
case CUDNN_STATUS_INVALID_VALUE:
|
||||
return "CUDNN_STATUS_INVALID_VALUE";
|
||||
case CUDNN_STATUS_ARCH_MISMATCH:
|
||||
return "CUDNN_STATUS_ARCH_MISMATCH";
|
||||
case CUDNN_STATUS_MAPPING_ERROR:
|
||||
return "CUDNN_STATUS_MAPPING_ERROR";
|
||||
case CUDNN_STATUS_EXECUTION_FAILED:
|
||||
return "CUDNN_STATUS_EXECUTION_FAILED";
|
||||
case CUDNN_STATUS_NOT_SUPPORTED:
|
||||
return "CUDNN_STATUS_NOT_SUPPORTED";
|
||||
case CUDNN_STATUS_LICENSE_ERROR:
|
||||
return "CUDNN_STATUS_LICENSE_ERROR";
|
||||
}
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
#define CUDNN_CHECK(condition) \
|
||||
do { \
|
||||
cudnnStatus_t status = condition; \
|
||||
CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " " \
|
||||
<< "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
|
||||
<< ::caffe2::internal::cudnnGetErrorString(status); \
|
||||
} while (0)
|
||||
|
||||
|
||||
template <typename dtype> class cudnnTypeWrapper;
|
||||
template<> class cudnnTypeWrapper<float> {
|
||||
public:
|
||||
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
|
||||
};
|
||||
template<> class cudnnTypeWrapper<double> {
|
||||
public:
|
||||
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
|
||||
};
|
||||
|
||||
inline cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder& order) {
|
||||
switch (order) {
|
||||
case StorageOrder::NHWC:
|
||||
return CUDNN_TENSOR_NHWC;
|
||||
case StorageOrder::NCHW:
|
||||
return CUDNN_TENSOR_NCHW;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown cudnn equivalent for order: " << order;
|
||||
}
|
||||
// Just to suppress compiler warnings
|
||||
return CUDNN_TENSOR_NCHW;
|
||||
}
|
||||
|
||||
// cudnnDescriptorMeta is the placeholder that wraps around a
|
||||
// cudnnTensorDescriptor_t, allowing us to do descriptor change as-needed.
|
||||
class cudnnDescriptorMeta {
|
||||
public:
|
||||
cudnnDescriptorMeta() {
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&desc_));
|
||||
}
|
||||
cudnnDescriptorMeta(const cudnnDescriptorMeta& src) {
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&desc_));
|
||||
CHECK_NOTNULL(Descriptor(src.format_, src.type_, src.dims_, nullptr));
|
||||
}
|
||||
~cudnnDescriptorMeta() {
|
||||
CUDNN_CHECK(cudnnDestroyTensorDescriptor(desc_));
|
||||
}
|
||||
|
||||
inline cudnnTensorDescriptor_t Descriptor(
|
||||
const cudnnTensorFormat_t format, const cudnnDataType_t type,
|
||||
const vector<int>& dims, bool* changed) {
|
||||
if (type_ == type && format_ == format && dims_ == dims) {
|
||||
// if not changed, simply return the current descriptor.
|
||||
if (changed) *changed = false;
|
||||
return desc_;
|
||||
}
|
||||
CHECK_EQ(dims.size(), 4)
|
||||
<< "Currently only 4-dimensional descriptor supported.";
|
||||
format_ = format;
|
||||
type_ = type;
|
||||
dims_ = dims;
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
desc_, format, type, dims_[0],
|
||||
(format == CUDNN_TENSOR_NCHW? dims_[1] : dims_[3]),
|
||||
(format == CUDNN_TENSOR_NCHW? dims_[2] : dims_[1]),
|
||||
(format == CUDNN_TENSOR_NCHW? dims_[3] : dims_[2])));
|
||||
if (changed) *changed = true;
|
||||
return desc_;
|
||||
}
|
||||
|
||||
private:
|
||||
cudnnTensorDescriptor_t desc_;
|
||||
cudnnTensorFormat_t format_;
|
||||
cudnnDataType_t type_;
|
||||
vector<int> dims_;
|
||||
cudnnDescriptorMeta& operator=(const cudnnDescriptorMeta&);
|
||||
};
|
||||
|
||||
class CuDNNWrapper {
|
||||
public:
|
||||
// The default cuda context constructor.
|
||||
explicit CuDNNWrapper(CUDAContext* context)
|
||||
: cuda_context_(context), cudnn_handle_(nullptr) {}
|
||||
|
||||
virtual ~CuDNNWrapper() {
|
||||
if (cudnn_handle_) {
|
||||
CUDNN_CHECK(cudnnDestroy(cudnn_handle_));
|
||||
}
|
||||
}
|
||||
|
||||
cudnnHandle_t& cudnn_handle() {
|
||||
if (!cudnn_handle_) {
|
||||
CUDNN_CHECK(cudnnCreate(&cudnn_handle_));
|
||||
CUDNN_CHECK(cudnnSetStream(
|
||||
cudnn_handle_, cuda_context_->cuda_stream()));
|
||||
}
|
||||
return cudnn_handle_;
|
||||
}
|
||||
|
||||
void cudnnSetNumTensorDescriptors(int n) {
|
||||
cudnn_tensor_descriptors_.resize(n);
|
||||
}
|
||||
|
||||
template <typename dtype>
|
||||
inline cudnnTensorDescriptor_t cudnnGetTensor4dDesc(
|
||||
const int index, const cudnnTensorFormat_t cudnn_format,
|
||||
const vector<int>& dims, bool* changed) {
|
||||
return cudnn_tensor_descriptors_.at(index).Descriptor(
|
||||
cudnn_format, cudnnTypeWrapper<dtype>::type, dims, changed);
|
||||
}
|
||||
|
||||
protected:
|
||||
// Pointer to an external cuda context that the cudnn wrapper will use.
|
||||
CUDAContext* cuda_context_;
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
std::vector<cudnnDescriptorMeta> cudnn_tensor_descriptors_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_COMMON_CUDNN_H_
|
113
caffe2/core/common_gpu.cc
Normal file
113
caffe2/core/common_gpu.cc
Normal file
@ -0,0 +1,113 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
int gDefaultGPUID = 0;
|
||||
}
|
||||
|
||||
void SetDefaultGPUID(const int deviceid) { gDefaultGPUID = deviceid; }
|
||||
int GetDefaultGPUID() { return gDefaultGPUID; }
|
||||
|
||||
void DeviceQuery(const int device) {
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
|
||||
std::stringstream ss;
|
||||
ss << std::endl;
|
||||
ss << "Device id: " << device << std::endl;
|
||||
ss << "Major revision number: " << prop.major << std::endl;
|
||||
ss << "Minor revision number: " << prop.minor << std::endl;
|
||||
ss << "Name: " << prop.name << std::endl;
|
||||
ss << "Total global memory: " << prop.totalGlobalMem << std::endl;
|
||||
ss << "Total shared memory per block: " << prop.sharedMemPerBlock
|
||||
<< std::endl;
|
||||
ss << "Total registers per block: " << prop.regsPerBlock << std::endl;
|
||||
ss << "Warp size: " << prop.warpSize << std::endl;
|
||||
ss << "Maximum memory pitch: " << prop.memPitch << std::endl;
|
||||
ss << "Maximum threads per block: " << prop.maxThreadsPerBlock
|
||||
<< std::endl;
|
||||
ss << "Maximum dimension of block: "
|
||||
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
|
||||
<< prop.maxThreadsDim[2] << std::endl;
|
||||
ss << "Maximum dimension of grid: "
|
||||
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
|
||||
<< prop.maxGridSize[2] << std::endl;
|
||||
ss << "Clock rate: " << prop.clockRate << std::endl;
|
||||
ss << "Total constant memory: " << prop.totalConstMem << std::endl;
|
||||
ss << "Texture alignment: " << prop.textureAlignment << std::endl;
|
||||
ss << "Concurrent copy and execution: "
|
||||
<< (prop.deviceOverlap ? "Yes" : "No") << std::endl;
|
||||
ss << "Number of multiprocessors: " << prop.multiProcessorCount
|
||||
<< std::endl;
|
||||
ss << "Kernel execution timeout: "
|
||||
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No") << std::endl;
|
||||
LOG(INFO) << ss.str();
|
||||
return;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
const char* cublasGetErrorString(cublasStatus_t error) {
|
||||
switch (error) {
|
||||
case CUBLAS_STATUS_SUCCESS:
|
||||
return "CUBLAS_STATUS_SUCCESS";
|
||||
case CUBLAS_STATUS_NOT_INITIALIZED:
|
||||
return "CUBLAS_STATUS_NOT_INITIALIZED";
|
||||
case CUBLAS_STATUS_ALLOC_FAILED:
|
||||
return "CUBLAS_STATUS_ALLOC_FAILED";
|
||||
case CUBLAS_STATUS_INVALID_VALUE:
|
||||
return "CUBLAS_STATUS_INVALID_VALUE";
|
||||
case CUBLAS_STATUS_ARCH_MISMATCH:
|
||||
return "CUBLAS_STATUS_ARCH_MISMATCH";
|
||||
case CUBLAS_STATUS_MAPPING_ERROR:
|
||||
return "CUBLAS_STATUS_MAPPING_ERROR";
|
||||
case CUBLAS_STATUS_EXECUTION_FAILED:
|
||||
return "CUBLAS_STATUS_EXECUTION_FAILED";
|
||||
case CUBLAS_STATUS_INTERNAL_ERROR:
|
||||
return "CUBLAS_STATUS_INTERNAL_ERROR";
|
||||
#if CUDA_VERSION >= 6000
|
||||
case CUBLAS_STATUS_NOT_SUPPORTED:
|
||||
return "CUBLAS_STATUS_NOT_SUPPORTED";
|
||||
#if CUDA_VERSION >= 6050
|
||||
case CUBLAS_STATUS_LICENSE_ERROR:
|
||||
return "CUBLAS_STATUS_LICENSE_ERROR";
|
||||
#endif // CUDA_VERSION >= 6050
|
||||
#endif // CUDA_VERSION >= 6000
|
||||
}
|
||||
}
|
||||
|
||||
const char* curandGetErrorString(curandStatus_t error) {
|
||||
switch (error) {
|
||||
case CURAND_STATUS_SUCCESS:
|
||||
return "CURAND_STATUS_SUCCESS";
|
||||
case CURAND_STATUS_VERSION_MISMATCH:
|
||||
return "CURAND_STATUS_VERSION_MISMATCH";
|
||||
case CURAND_STATUS_NOT_INITIALIZED:
|
||||
return "CURAND_STATUS_NOT_INITIALIZED";
|
||||
case CURAND_STATUS_ALLOCATION_FAILED:
|
||||
return "CURAND_STATUS_ALLOCATION_FAILED";
|
||||
case CURAND_STATUS_TYPE_ERROR:
|
||||
return "CURAND_STATUS_TYPE_ERROR";
|
||||
case CURAND_STATUS_OUT_OF_RANGE:
|
||||
return "CURAND_STATUS_OUT_OF_RANGE";
|
||||
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
|
||||
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
|
||||
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
|
||||
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
|
||||
case CURAND_STATUS_LAUNCH_FAILURE:
|
||||
return "CURAND_STATUS_LAUNCH_FAILURE";
|
||||
case CURAND_STATUS_PREEXISTING_FAILURE:
|
||||
return "CURAND_STATUS_PREEXISTING_FAILURE";
|
||||
case CURAND_STATUS_INITIALIZATION_FAILED:
|
||||
return "CURAND_STATUS_INITIALIZATION_FAILED";
|
||||
case CURAND_STATUS_ARCH_MISMATCH:
|
||||
return "CURAND_STATUS_ARCH_MISMATCH";
|
||||
case CURAND_STATUS_INTERNAL_ERROR:
|
||||
return "CURAND_STATUS_INTERNAL_ERROR";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace caffe2
|
68
caffe2/core/common_gpu.h
Normal file
68
caffe2/core/common_gpu.h
Normal file
@ -0,0 +1,68 @@
|
||||
#ifndef CAFFE2_CORE_COMMON_GPU_H_
|
||||
#define CAFFE2_CORE_COMMON_GPU_H_
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand.h>
|
||||
#include <driver_types.h> // cuda driver types
|
||||
// #include <thrust/device_vector.h>
|
||||
// #include <thrust/functional.h>
|
||||
|
||||
#include "glog/logging.h"
|
||||
#include "caffe2/core/common.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Sets and gets the default GPU id. If the function is not called, we will use
|
||||
// GPU 0 ast he default gpu id. If there is an operator that says it runs on the
|
||||
// GPU but did not specify which GPU, this default gpuid is going to be used.
|
||||
void SetDefaultGPUID(const int deviceid);
|
||||
int GetDefaultGPUID();
|
||||
void DeviceQuery(const int deviceid);
|
||||
|
||||
namespace internal {
|
||||
const char* cublasGetErrorString(cublasStatus_t error);
|
||||
const char* curandGetErrorString(curandStatus_t error);
|
||||
} // namespace internal
|
||||
|
||||
// CUDA: various checks for different function calls.
|
||||
#define CUDA_CHECK(condition) \
|
||||
do { \
|
||||
cudaError_t error = condition; \
|
||||
CHECK_EQ(error, cudaSuccess) \
|
||||
<< "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
|
||||
<< cudaGetErrorString(error); \
|
||||
} while (0)
|
||||
|
||||
#define CUBLAS_CHECK(condition) \
|
||||
do { \
|
||||
cublasStatus_t status = condition; \
|
||||
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) \
|
||||
<< "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
|
||||
<< ::caffe2::internal::cublasGetErrorString(status); \
|
||||
} while (0)
|
||||
|
||||
#define CURAND_CHECK(condition) \
|
||||
do { \
|
||||
curandStatus_t status = condition; \
|
||||
CHECK_EQ(status, CURAND_STATUS_SUCCESS) \
|
||||
<< "Error at: " << __FILE__ << ":" << __LINE__ << ": " \
|
||||
<< ::caffe2::internal::curandGetErrorString(status); \
|
||||
} while (0)
|
||||
|
||||
#define CUDA_1D_KERNEL_LOOP(i, n) \
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
i < (n); \
|
||||
i += blockDim.x * gridDim.x)
|
||||
|
||||
// TODO(Yangqing): Yuck. Figure out a better way?
|
||||
const int CAFFE_CUDA_NUM_THREADS = 1024;
|
||||
|
||||
// CUDA: number of blocks for threads.
|
||||
inline int CAFFE_GET_BLOCKS(const int N) {
|
||||
return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
#endif // CAFFE2_CORE_COMMON_GPU_H_
|
53
caffe2/core/context.h
Normal file
53
caffe2/core/context.h
Normal file
@ -0,0 +1,53 @@
|
||||
#ifndef CAFFE2_CORE_CONTEXT_H_
|
||||
#define CAFFE2_CORE_CONTEXT_H_
|
||||
|
||||
#include <random>
|
||||
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class CPUContext {
|
||||
public:
|
||||
CPUContext() : random_generator_(0) {}
|
||||
explicit CPUContext(const DeviceOption& device_option)
|
||||
: random_generator_(device_option.random_seed()) {
|
||||
DCHECK_EQ(device_option.device_type(), CPU);
|
||||
}
|
||||
virtual ~CPUContext() {}
|
||||
inline void SwitchToDevice() {}
|
||||
inline bool FinishDeviceComputation() { return true; }
|
||||
|
||||
inline std::mt19937& RandGenerator() { return random_generator_; }
|
||||
|
||||
static void* New(size_t nbytes) {
|
||||
void* data = new char[nbytes];
|
||||
memset(data, 0, nbytes);
|
||||
return data;
|
||||
}
|
||||
static void Delete(void* data) { delete[] static_cast<char*>(data); }
|
||||
|
||||
// Two copy functions that deals with cross-device copies.
|
||||
template <class DstContext, class SrcContext>
|
||||
inline void Memcpy(void* dst, const void* src, size_t nbytes);
|
||||
template <typename T, class DstContext, class SrcContext>
|
||||
inline void Copy(T* dst, const T* src, int n) {
|
||||
Memcpy<DstContext, SrcContext>(static_cast<void*>(dst),
|
||||
static_cast<const void*>(src),
|
||||
n * sizeof(T));
|
||||
}
|
||||
|
||||
protected:
|
||||
std::mt19937 random_generator_;
|
||||
};
|
||||
|
||||
template<>
|
||||
inline void CPUContext::Memcpy<CPUContext, CPUContext>(
|
||||
void* dst, const void* src, size_t nbytes) {
|
||||
memcpy(dst, src, nbytes);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_CONTEXT_H_
|
143
caffe2/core/context_gpu.h
Normal file
143
caffe2/core/context_gpu.h
Normal file
@ -0,0 +1,143 @@
|
||||
#ifndef CAFFE2_CORE_CONTEXT_GPU_H_
|
||||
#define CAFFE2_CORE_CONTEXT_GPU_H_
|
||||
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/types.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class CUDAContext {
|
||||
public:
|
||||
// The default cuda context constructor.
|
||||
CUDAContext()
|
||||
: cuda_stream_(nullptr), cublas_handle_(nullptr),
|
||||
random_seed_(1701), curand_generator_(nullptr) {
|
||||
cuda_gpu_id_ = GetDefaultGPUID();
|
||||
CUDA_CHECK(cudaSetDevice(cuda_gpu_id_));
|
||||
CUDA_CHECK(cudaStreamCreate(&cuda_stream_));
|
||||
}
|
||||
|
||||
explicit CUDAContext(const DeviceOption& option)
|
||||
: cuda_stream_(nullptr), cublas_handle_(nullptr),
|
||||
random_seed_(option.random_seed()), curand_generator_(nullptr) {
|
||||
DCHECK_EQ(option.device_type(), CUDA);
|
||||
cuda_gpu_id_ = option.has_cuda_gpu_id() ?
|
||||
option.cuda_gpu_id() : GetDefaultGPUID();
|
||||
CUDA_CHECK(cudaSetDevice(cuda_gpu_id_));
|
||||
CUDA_CHECK(cudaStreamCreate(&cuda_stream_));
|
||||
}
|
||||
|
||||
virtual ~CUDAContext() {
|
||||
if (curand_generator_) {
|
||||
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
|
||||
}
|
||||
if (cublas_handle_) {
|
||||
CUBLAS_CHECK(cublasDestroy(cublas_handle_));
|
||||
}
|
||||
if (cuda_stream_) {
|
||||
CUDA_CHECK(cudaStreamDestroy(cuda_stream_));
|
||||
}
|
||||
}
|
||||
|
||||
inline void SwitchToDevice() {
|
||||
CUDA_CHECK(cudaSetDevice(cuda_gpu_id_));
|
||||
}
|
||||
|
||||
inline bool FinishDeviceComputation() {
|
||||
cudaError_t error = cudaStreamSynchronize(cuda_stream_);
|
||||
if (error != cudaSuccess) {
|
||||
LOG(ERROR) << cudaGetErrorString(error);
|
||||
return false;
|
||||
}
|
||||
error = cudaPeekAtLastError();
|
||||
if (error != cudaSuccess) {
|
||||
LOG(ERROR) << cudaGetErrorString(error);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int cuda_gpu_id() { return cuda_gpu_id_; }
|
||||
|
||||
inline cudaStream_t& cuda_stream() { return cuda_stream_; }
|
||||
|
||||
cublasHandle_t& cublas_handle() {
|
||||
if (!cublas_handle_) {
|
||||
CUBLAS_CHECK(cublasCreate(&cublas_handle_));
|
||||
CUBLAS_CHECK(cublasSetPointerMode(
|
||||
cublas_handle_, CUBLAS_POINTER_MODE_DEVICE));
|
||||
CUBLAS_CHECK(cublasSetStream(cublas_handle_, cuda_stream_));
|
||||
}
|
||||
return cublas_handle_;
|
||||
}
|
||||
|
||||
curandGenerator_t& curand_generator() {
|
||||
if (!curand_generator_) {
|
||||
CURAND_CHECK(curandCreateGenerator(
|
||||
&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
|
||||
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(
|
||||
curand_generator_, random_seed_));
|
||||
CURAND_CHECK(curandSetStream(curand_generator_, cuda_stream_));
|
||||
}
|
||||
return curand_generator_;
|
||||
}
|
||||
|
||||
static void* New(size_t nbytes) {
|
||||
void* dev_ptr;
|
||||
CUDA_CHECK(cudaMalloc(&dev_ptr, nbytes));
|
||||
CUDA_CHECK(cudaMemset(dev_ptr, 0, nbytes));
|
||||
return dev_ptr;
|
||||
}
|
||||
|
||||
static void Delete(void* data) {
|
||||
cudaError_t error = cudaFree(data);
|
||||
// For some reason, in Python runtime we sometimes delete a data pointer
|
||||
// after the cuda runtime exits - this is odd but is probably caused by
|
||||
// a static workspace that pycaffe2 uses, and the destruction got entangled
|
||||
// in some race condition. Anyway, since cuda runtime is exiting anyway, we
|
||||
// will not need to worry about memory leak, so we basically ignore it.
|
||||
// This is definitely not ideal but works for now.
|
||||
if (error != cudaSuccess && error != cudaErrorCudartUnloading) {
|
||||
LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": "
|
||||
<< cudaGetErrorString(error);
|
||||
}
|
||||
}
|
||||
|
||||
template <class DstContext, class SrcContext>
|
||||
inline void Copy(void* dst, const void* src, size_t nbytes) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(
|
||||
dst, src, nbytes, cudaMemcpyDefault, cuda_stream_));
|
||||
// TODO(Yangqing): do we want to synchronize inside copy?
|
||||
CUDA_CHECK(cudaStreamSynchronize(cuda_stream_));
|
||||
}
|
||||
|
||||
template <typename T, class DstContext, class SrcContext>
|
||||
inline void Copy(T* dst, const T* src, int n) {
|
||||
Copy<DstContext, SrcContext>(static_cast<void*>(dst),
|
||||
static_cast<const void*>(src),
|
||||
n * sizeof(T));
|
||||
}
|
||||
|
||||
protected:
|
||||
int cuda_gpu_id_;
|
||||
cudaStream_t cuda_stream_;
|
||||
cublasHandle_t cublas_handle_;
|
||||
int random_seed_;
|
||||
curandGenerator_t curand_generator_;
|
||||
};
|
||||
|
||||
// For the CPU context, we also allow a (probably expensive) function
|
||||
// to copy the data from a cuda context.
|
||||
template<>
|
||||
inline void CPUContext::Memcpy<CPUContext, CUDAContext>(
|
||||
void* dst, const void* src, size_t nbytes) {
|
||||
CUDAContext context;
|
||||
context.Copy<CPUContext, CUDAContext>(dst, src, nbytes);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_CONTEXT_GPU_H_
|
45
caffe2/core/context_test.cc
Normal file
45
caffe2/core/context_test.cc
Normal file
@ -0,0 +1,45 @@
|
||||
#include <random>
|
||||
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// This is a test that make sure the random number generator works as expected,
|
||||
// with a specific seed that generates specific responses. I think it should
|
||||
// be the same number across platforms since we use mt19937 explicitly.
|
||||
TEST(CPUContextTest, TestRandomNumberGenerator) {
|
||||
DeviceOption option;
|
||||
option.set_random_seed(1701);
|
||||
CPUContext context(option);
|
||||
std::uniform_int_distribution<int> dist(0, 100);
|
||||
/*
|
||||
// These numbers are manually verified off-line.
|
||||
EXPECT_EQ(dist(context.RandGenerator()), 46);
|
||||
EXPECT_EQ(dist(context.RandGenerator()), 4);
|
||||
EXPECT_EQ(dist(context.RandGenerator()), 94);
|
||||
EXPECT_EQ(dist(context.RandGenerator()), 26);
|
||||
EXPECT_EQ(dist(context.RandGenerator()), 67);
|
||||
*/
|
||||
}
|
||||
|
||||
TEST(CPUContextTest, TestAllocDealloc) {
|
||||
float* data = static_cast<float*>(CPUContext::New(10 * sizeof(float)));
|
||||
EXPECT_NE(data, nullptr);
|
||||
float* dst_data = static_cast<float*>(CPUContext::New(10 * sizeof(float)));
|
||||
EXPECT_NE(dst_data, nullptr);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
data[i] = i;
|
||||
}
|
||||
DeviceOption option;
|
||||
CPUContext context(option);
|
||||
context.Copy<float, CPUContext, CPUContext>(dst_data, data, 10);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
EXPECT_FLOAT_EQ(dst_data[i], i);
|
||||
}
|
||||
CPUContext::Delete(data);
|
||||
CPUContext::Delete(dst_data);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
9
caffe2/core/db.cc
Normal file
9
caffe2/core/db.cc
Normal file
@ -0,0 +1,9 @@
|
||||
#include "caffe2/core/db.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
DEFINE_REGISTRY(Caffe2DBRegistry, DB, const string&, Mode);
|
||||
|
||||
} // namespacd db
|
||||
} // namespace caffe2
|
62
caffe2/core/db.h
Normal file
62
caffe2/core/db.h
Normal file
@ -0,0 +1,62 @@
|
||||
#ifndef CAFFE2_CORE_DB_H_
|
||||
#define CAFFE2_CORE_DB_H_
|
||||
|
||||
#include "caffe2/core/registry.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
enum Mode { READ, WRITE, NEW };
|
||||
|
||||
class Cursor {
|
||||
public:
|
||||
Cursor() { }
|
||||
virtual ~Cursor() { }
|
||||
virtual void SeekToFirst() = 0;
|
||||
virtual void Next() = 0;
|
||||
virtual string key() = 0;
|
||||
virtual string value() = 0;
|
||||
virtual bool Valid() = 0;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Cursor);
|
||||
};
|
||||
|
||||
class Transaction {
|
||||
public:
|
||||
Transaction() { }
|
||||
virtual ~Transaction() { }
|
||||
virtual void Put(const string& key, const string& value) = 0;
|
||||
virtual void Commit() = 0;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Transaction);
|
||||
};
|
||||
|
||||
class DB {
|
||||
public:
|
||||
DB(const string& source, Mode mode) : mode_(mode) {
|
||||
// This constructor does nothing. The actual opening should be done in the
|
||||
// derived constructors.
|
||||
}
|
||||
virtual ~DB() { }
|
||||
virtual void Close() = 0;
|
||||
virtual Cursor* NewCursor() = 0;
|
||||
virtual Transaction* NewTransaction() = 0;
|
||||
|
||||
protected:
|
||||
Mode mode_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(DB);
|
||||
};
|
||||
|
||||
DECLARE_REGISTRY(Caffe2DBRegistry, DB, const string&, Mode);
|
||||
#define REGISTER_CAFFE2_DB(name, ...) \
|
||||
REGISTER_CLASS(Caffe2DBRegistry, name, __VA_ARGS__)
|
||||
|
||||
inline DB* CreateDB(const string& db_type, const string& source, Mode mode) {
|
||||
return Caffe2DBRegistry()->Create(db_type, source, mode);
|
||||
}
|
||||
|
||||
} // namespace db
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_DB_H_
|
134
caffe2/core/minidb.cc
Normal file
134
caffe2/core/minidb.cc
Normal file
@ -0,0 +1,134 @@
|
||||
#include <cstdio>
|
||||
#include <mutex>
|
||||
|
||||
#include "caffe2/core/db.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
class MiniDBCursor : public Cursor {
|
||||
public:
|
||||
explicit MiniDBCursor(FILE* f, std::mutex* mutex)
|
||||
: file_(f), lock_(*mutex) {}
|
||||
~MiniDBCursor() {}
|
||||
|
||||
void SeekToFirst() override {
|
||||
fseek(file_, 0, SEEK_SET);
|
||||
CHECK(!feof(file_)) << "Hmm, empty file?";
|
||||
// Read the first item.
|
||||
valid_ = true;
|
||||
Next();
|
||||
}
|
||||
|
||||
void Next() override {
|
||||
if (fread(&key_len_, sizeof(int), 1, file_) == 0) {
|
||||
// Reaching EOF.
|
||||
valid_ = false;
|
||||
return;
|
||||
}
|
||||
CHECK_EQ(fread(&value_len_, sizeof(int), 1, file_), 1);
|
||||
CHECK_GT(key_len_, 0);
|
||||
CHECK_GT(value_len_, 0);
|
||||
if (key_len_ > key_.size()) {
|
||||
key_.resize(key_len_);
|
||||
}
|
||||
if (value_len_ > value_.size()) {
|
||||
value_.resize(value_len_);
|
||||
}
|
||||
CHECK_EQ(fread(key_.data(), sizeof(char), key_len_, file_), key_len_);
|
||||
CHECK_EQ(fread(value_.data(), sizeof(char), value_len_, file_), value_len_);
|
||||
}
|
||||
|
||||
string key() override {
|
||||
CHECK(valid_) << "Invalid position!";
|
||||
return string(key_.data(), key_len_);
|
||||
}
|
||||
|
||||
string value() override {
|
||||
CHECK(valid_) << "Invalid position!";
|
||||
return string(value_.data(), value_len_);
|
||||
}
|
||||
|
||||
bool Valid() override { return valid_; }
|
||||
|
||||
private:
|
||||
FILE* file_;
|
||||
std::lock_guard<std::mutex> lock_;
|
||||
bool valid_;
|
||||
int key_len_;
|
||||
vector<char> key_;
|
||||
int value_len_;
|
||||
vector<char> value_;
|
||||
};
|
||||
|
||||
class MiniDBTransaction : public Transaction {
|
||||
public:
|
||||
explicit MiniDBTransaction(FILE* f, std::mutex* mutex)
|
||||
: file_(f), lock_(*mutex) {}
|
||||
~MiniDBTransaction() { Commit(); }
|
||||
|
||||
void Put(const string& key, const string& value) override {
|
||||
int key_len = key.size();
|
||||
int value_len = value.size();
|
||||
CHECK_EQ(fwrite(&key_len, sizeof(int), 1, file_), 1);
|
||||
CHECK_EQ(fwrite(&value_len, sizeof(int), 1, file_), 1);
|
||||
CHECK_EQ(fwrite(key.c_str(), sizeof(char), key_len, file_), key_len);
|
||||
CHECK_EQ(fwrite(value.c_str(), sizeof(char), value_len, file_), value_len);
|
||||
}
|
||||
|
||||
void Commit() override {
|
||||
CHECK_EQ(fflush(file_), 0);
|
||||
}
|
||||
|
||||
private:
|
||||
FILE* file_;
|
||||
std::lock_guard<std::mutex> lock_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(MiniDBTransaction);
|
||||
};
|
||||
|
||||
class MiniDB : public DB {
|
||||
public:
|
||||
MiniDB(const string& source, Mode mode) : DB(source, mode), file_(nullptr) {
|
||||
switch (mode) {
|
||||
case NEW:
|
||||
file_ = fopen(source.c_str(), "wb");
|
||||
break;
|
||||
case WRITE:
|
||||
file_ = fopen(source.c_str(), "ab");
|
||||
fseek(file_, 0, SEEK_END);
|
||||
break;
|
||||
case READ:
|
||||
file_ = fopen(source.c_str(), "rb");
|
||||
break;
|
||||
}
|
||||
CHECK(file_) << "Cannot open file: " << source;
|
||||
LOG(INFO) << "Opened MiniDB " << source;
|
||||
}
|
||||
~MiniDB() { Close(); }
|
||||
|
||||
void Close() override { fclose(file_); }
|
||||
|
||||
Cursor* NewCursor() override {
|
||||
CHECK_EQ(this->mode_, READ);
|
||||
return new MiniDBCursor(file_, &file_access_mutex_);
|
||||
}
|
||||
|
||||
Transaction* NewTransaction() override {
|
||||
CHECK(this->mode_ == NEW || this->mode_ == WRITE);
|
||||
return new MiniDBTransaction(file_, &file_access_mutex_);
|
||||
}
|
||||
|
||||
private:
|
||||
FILE* file_;
|
||||
// access mutex makes sure we don't have multiple cursors/transactions
|
||||
// reading the same file.
|
||||
std::mutex file_access_mutex_;
|
||||
};
|
||||
|
||||
REGISTER_CAFFE2_DB(MiniDB, MiniDB);
|
||||
REGISTER_CAFFE2_DB(minidb, MiniDB);
|
||||
|
||||
} // namespace db
|
||||
} // namespace caffe2
|
191
caffe2/core/net.cc
Normal file
191
caffe2/core/net.cc
Normal file
@ -0,0 +1,191 @@
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
NetBase* CreateNet(const NetDef& net_def, Workspace* ws) {
|
||||
if (!net_def.has_net_type() || net_def.net_type() == "simple") {
|
||||
VLOG(1) << "Creating simple net.";
|
||||
return new SimpleNet(net_def, ws);
|
||||
} else if (net_def.net_type() == "parallel") {
|
||||
VLOG(1) << "Creating parallel net.";
|
||||
return new ParallelNet(net_def, ws);
|
||||
} else {
|
||||
LOG(ERROR) << "Unknown net type: " << net_def.net_type();
|
||||
return nullptr;
|
||||
}
|
||||
// Just to suppress compiler warning
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SimpleNet::SimpleNet(const NetDef& net_def, Workspace* ws)
|
||||
: NetBase(net_def, ws) {
|
||||
// Initialize the operators
|
||||
for (const OperatorDef& operator_def : net_def.operators()) {
|
||||
VLOG(1) << "Creating operator " << operator_def.name()
|
||||
<< ":" << operator_def.type();
|
||||
if (!operator_def.has_device_option()) {
|
||||
operators_.emplace_back(
|
||||
CreateOperator(operator_def, net_def.device_option(), ws));
|
||||
} else {
|
||||
operators_.emplace_back(CreateOperator(operator_def, ws));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SimpleNet::Verify() {
|
||||
for (auto& op : operators_) {
|
||||
VLOG(1) << "Verifying operator " << op->def().name()
|
||||
<< "(" << op->def().type() << ").";
|
||||
if (op.get() == nullptr || !op->Verify()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SimpleNet::Run() {
|
||||
VLOG(1) << "Running net.";
|
||||
for (const auto& op : operators_) {
|
||||
VLOG(1) << "Running operator " << op->def().name()
|
||||
<< "(" << op->def().type() << ").";
|
||||
// TODO(Yangqing): convert this sequential run to event-based.
|
||||
if (!op->Run()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ParallelNet::ParallelNet(const NetDef& net_def, Workspace* ws)
|
||||
: NetBase(net_def, ws), operator_nodes_(net_def.operators_size()) {
|
||||
// Blob creator allows us to track which operator created which blob.
|
||||
std::map<string, int> blob_creator;
|
||||
// Initialize the operators
|
||||
for (int idx = 0; idx < net_def.operators_size(); ++idx) {
|
||||
const OperatorDef& op_def = net_def.operators(idx);
|
||||
VLOG(1) << "Creating operator #" << idx << ": "
|
||||
<< op_def.name() << ":" << op_def.type();
|
||||
if (!op_def.has_device_option()) {
|
||||
operator_nodes_[idx].operator_.reset(
|
||||
CreateOperator(op_def, net_def.device_option(), ws));
|
||||
} else {
|
||||
operator_nodes_[idx].operator_.reset(CreateOperator(op_def, ws));
|
||||
}
|
||||
// Check the inputs, and set up parents if necessary.
|
||||
for (const string& input : op_def.inputs()) {
|
||||
if (blob_creator.count(input) == 0) {
|
||||
VLOG(1) << "Input " << input << " not produced by this net. "
|
||||
<< "Assuming it is pre-existing.";
|
||||
} else {
|
||||
int parent = blob_creator[input];
|
||||
VLOG(1) << "op dependency: " << parent << "->" << idx;
|
||||
operator_nodes_[idx].parents_.push_back(parent);
|
||||
operator_nodes_[parent].children_.push_back(idx);
|
||||
}
|
||||
}
|
||||
for (const string& output : op_def.outputs()) {
|
||||
if (blob_creator.count(output) != 0) {
|
||||
LOG(WARNING) << "Output " << output << " produced again. "
|
||||
<< "Such operation is not strictly tested. "
|
||||
<< "Use at your own risk.";
|
||||
}
|
||||
blob_creator[output] = idx;
|
||||
}
|
||||
}
|
||||
// Figure out the initial frontier - this is the one we will feed into the job
|
||||
// queue to start a run.
|
||||
for (int idx = 0; idx < operator_nodes_.size(); ++idx) {
|
||||
if (operator_nodes_[idx].parents_.size() == 0) {
|
||||
initial_frontier_.push_back(idx);
|
||||
}
|
||||
}
|
||||
// Finally, start the workers.
|
||||
CHECK_GT(net_def.num_workers(), 0) << "Must specify the number of workers.";
|
||||
for (int i = 0; i < net_def.num_workers(); ++i) {
|
||||
VLOG(1) << "Start worker #" << i;
|
||||
workers_.push_back(std::thread(&ParallelNet::WorkerFunction, this));
|
||||
}
|
||||
}
|
||||
|
||||
ParallelNet::~ParallelNet() {
|
||||
// Safely join all the workers before exiting.
|
||||
job_queue_.NoMoreJobs();
|
||||
VLOG(1) << "Joining workers.";
|
||||
for (auto& worker : workers_) {
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
|
||||
bool ParallelNet::Verify() {
|
||||
for (auto& op_node : operator_nodes_) {
|
||||
auto& op = op_node.operator_;
|
||||
VLOG(1) << "Verifying operator " << op->def().name()
|
||||
<< "(" << op->def().type() << ").";
|
||||
if (op.get() == nullptr || !op->Verify()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParallelNet::Run() {
|
||||
VLOG(1) << "Running parallel net.";
|
||||
// First, set up job queue.
|
||||
remaining_ops_ = operator_nodes_.size();
|
||||
success_ = true;
|
||||
// TODO(jiayq): Start all worker threads.
|
||||
// Initialize the runtime parent count.
|
||||
for (auto& node : operator_nodes_) {
|
||||
node.runtime_parent_count_ = node.parents_.size();
|
||||
}
|
||||
// Kickstart the job queue.
|
||||
for (auto& value : initial_frontier_) {
|
||||
job_queue_.Push(value);
|
||||
}
|
||||
std::unique_lock<std::mutex> mutex_lock(remaining_ops_mutex_);
|
||||
while (remaining_ops_ > 0) {
|
||||
VLOG(2) << "Remaining ops to run: " << remaining_ops_;
|
||||
cv_.wait(mutex_lock);
|
||||
}
|
||||
VLOG(2) << "All ops finished running.";
|
||||
// If the above while loop finished, we know that the current run finished.
|
||||
return success_;
|
||||
}
|
||||
|
||||
void ParallelNet::WorkerFunction() {
|
||||
// WorkerFunctions() is an infinite loop until there are no more jobs to run.
|
||||
while (true) {
|
||||
int idx;
|
||||
// If there is no more jobs - meaning that the ParallelNet is destructing -
|
||||
// we will exit safely.
|
||||
if (!job_queue_.Pop(&idx)) {
|
||||
return;
|
||||
}
|
||||
VLOG(1) << "Running operator #" << idx << " "
|
||||
<< operator_nodes_[idx].operator_->def().name()
|
||||
<< "(" << operator_nodes_[idx].operator_->def().type() << ").";
|
||||
bool this_success = operator_nodes_[idx].operator_->Run();
|
||||
for (int child : operator_nodes_[idx].children_) {
|
||||
int count = --operator_nodes_[child].runtime_parent_count_;
|
||||
// The count should never be smaller than zero.
|
||||
DCHECK_GE(count, 0)
|
||||
<< "Found runtime parent count smaller than zero for "
|
||||
<< "operator node "
|
||||
<< operator_nodes_[child].operator_->def().name()
|
||||
<< "(" << operator_nodes_[child].operator_->def().type() << ").";
|
||||
if (count == 0) {
|
||||
VLOG(2) << "Pushing operator #" << child << " to queue.";
|
||||
job_queue_.Push(child);
|
||||
}
|
||||
}
|
||||
// Notify that the processed op is incremented by one.
|
||||
std::unique_lock<std::mutex> mutex_lock(remaining_ops_mutex_);
|
||||
--remaining_ops_;
|
||||
success_ &= this_success;
|
||||
DCHECK_GE(remaining_ops_, 0);
|
||||
cv_.notify_one();
|
||||
VLOG(2) << "Finished executing operator #" << idx;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
90
caffe2/core/net.h
Normal file
90
caffe2/core/net.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef CAFFE2_CORE_NET_H_
|
||||
#define CAFFE2_CORE_NET_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <thread> // NOLINT
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/registry.h"
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/utils/simple_queue.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class OperatorBase;
|
||||
|
||||
// Net is a thin struct that owns all the operators together with the operator
|
||||
// contexts.
|
||||
class NetBase {
|
||||
public:
|
||||
NetBase(const NetDef& net_def, Workspace* ws) {}
|
||||
virtual ~NetBase() {}
|
||||
virtual bool Verify() = 0;
|
||||
virtual bool Run() = 0;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(NetBase);
|
||||
};
|
||||
|
||||
// Essentially, we won't expect too many Net instances, so we will simply
|
||||
// have a function that produces different net implementations. If needed we can
|
||||
// switch to a registration pattern later.
|
||||
NetBase* CreateNet(const NetDef& net_def, Workspace* ws);
|
||||
|
||||
// This is the very basic structure you need to run a network - all it
|
||||
// does is simply to run everything in sequence. If you want more fancy control
|
||||
// such as a DAG-like execution, check out other better net implementations.
|
||||
class SimpleNet final : public NetBase {
|
||||
public:
|
||||
SimpleNet(const NetDef& net_def, Workspace* ws);
|
||||
bool Verify() override;
|
||||
bool Run() override;
|
||||
|
||||
protected:
|
||||
vector<unique_ptr<OperatorBase> > operators_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(SimpleNet);
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
struct OperatorNode {
|
||||
unique_ptr<OperatorBase> operator_;
|
||||
vector<int> children_;
|
||||
vector<int> parents_;
|
||||
std::atomic<int> runtime_parent_count_;
|
||||
};
|
||||
}
|
||||
|
||||
class ParallelNet final : public NetBase {
|
||||
public:
|
||||
ParallelNet(const NetDef& net_def, Workspace* ws);
|
||||
~ParallelNet();
|
||||
bool Verify() override;
|
||||
bool Run() override;
|
||||
// WorkerFunction() is a function wrapper to allow us to run worker threads.
|
||||
// It checks out one ready-to-run operator from the job queue, runs it,
|
||||
// notifies all its children, and for any children that is ready, enqueues
|
||||
// it to the job queue.
|
||||
void WorkerFunction();
|
||||
|
||||
protected:
|
||||
vector<internal::OperatorNode> operator_nodes_;
|
||||
vector<int> initial_frontier_;
|
||||
SimpleQueue<int> job_queue_;
|
||||
std::vector<std::thread> workers_;
|
||||
int remaining_ops_;
|
||||
bool success_;
|
||||
std::mutex remaining_ops_mutex_;
|
||||
std::condition_variable cv_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(ParallelNet);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_NET_H_
|
121
caffe2/core/operator.cc
Normal file
121
caffe2/core/operator.cc
Normal file
@ -0,0 +1,121 @@
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// TODO(Yangqing): move all the checks to a less fatal check mechanism.
|
||||
OperatorBase::OperatorBase(const OperatorDef& operator_def, Workspace* ws)
|
||||
: operator_def_(operator_def) {
|
||||
for (auto& arg : operator_def.args()) {
|
||||
CHECK_GT(arg.name().size(), 0) << "Argument must have a name.";
|
||||
CHECK_EQ(arg_map_.count(arg.name()), 0) << "Duplicated argument name.";
|
||||
arg_map_[arg.name()] = &arg;
|
||||
}
|
||||
for (const string& input_str : operator_def_.inputs()) {
|
||||
inputs_.push_back(CHECK_NOTNULL(ws->GetBlob(input_str)));
|
||||
}
|
||||
for (const string& output_str : operator_def_.outputs()) {
|
||||
outputs_.push_back(CHECK_NOTNULL(ws->CreateBlob(output_str)));
|
||||
}
|
||||
}
|
||||
|
||||
// Parameter getters. You can use these to get the arguments that you want.
|
||||
// We need to deal with the fact that we cannot really template into
|
||||
// protocol buffers... yuck.
|
||||
#define INSTANTIATE_GET_SINGLE_ARGUMENT(dtype, fieldname) \
|
||||
template <> \
|
||||
dtype OperatorBase::GetSingleArgument<dtype>( \
|
||||
const string& name, const dtype& default_value) { \
|
||||
if (arg_map_.count(name) == 0) { \
|
||||
DVLOG(1) << "Using default parameter value " << default_value; \
|
||||
return default_value; \
|
||||
} \
|
||||
CHECK(arg_map_[name]->has_##fieldname()) \
|
||||
<< "Argument does not have the right field: expected " \
|
||||
<< #fieldname; \
|
||||
return arg_map_[name]->fieldname(); \
|
||||
}
|
||||
|
||||
INSTANTIATE_GET_SINGLE_ARGUMENT(float, f)
|
||||
INSTANTIATE_GET_SINGLE_ARGUMENT(int, i)
|
||||
INSTANTIATE_GET_SINGLE_ARGUMENT(string, s)
|
||||
// Undefine the argument just to be safe.
|
||||
#undef INSTANTIATE_GET_SINGLE_ARGUMENT
|
||||
|
||||
#define INSTANTIATE_GET_REPEATED_ARGUMENT(dtype, fieldname) \
|
||||
template <> \
|
||||
vector<dtype> OperatorBase::GetRepeatedArgument<dtype>( \
|
||||
const string& name) { \
|
||||
if (arg_map_.count(name) == 0) { \
|
||||
return vector<dtype>(); \
|
||||
} \
|
||||
vector<dtype> values; \
|
||||
CHECK(arg_map_[name]->fieldname##_size()) \
|
||||
<< "Argument does not have the right field: expected " \
|
||||
<< #fieldname; \
|
||||
for (const auto& v : arg_map_[name]->fieldname()) values.push_back(v); \
|
||||
return values; \
|
||||
}
|
||||
|
||||
INSTANTIATE_GET_REPEATED_ARGUMENT(float, floats)
|
||||
INSTANTIATE_GET_REPEATED_ARGUMENT(int, ints)
|
||||
INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings)
|
||||
#undef INSTANTIATE_GET_REPEATED_ARGUMENT
|
||||
|
||||
bool OperatorBase::Verify() {
|
||||
// Check Blob counts.
|
||||
if (operator_def_.inputs_size() < MinInput() ||
|
||||
operator_def_.inputs_size() > MaxInput()) {
|
||||
LOG(ERROR) << "Input size " << operator_def_.inputs_size()
|
||||
<< " not in range [min=" << MinInput() << ", max="
|
||||
<< MaxInput() << "].";
|
||||
LOG(ERROR) << "Error at operator " << operator_def_.name() << ":"
|
||||
<< operator_def_.type();
|
||||
return false;
|
||||
}
|
||||
if (operator_def_.outputs_size() < MinOutput() ||
|
||||
operator_def_.outputs_size() > MaxOutput()) {
|
||||
LOG(ERROR) << "Output size " << operator_def_.outputs_size()
|
||||
<< " not in range [min=" << MinOutput() << ", max="
|
||||
<< MaxOutput() << "].";
|
||||
LOG(ERROR) << "Error at operator " << operator_def_.name() << ":"
|
||||
<< operator_def_.type();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
OperatorBase* CreateOperator(const OperatorDef& operator_def,
|
||||
const DeviceOption& device_option,
|
||||
Workspace* ws) {
|
||||
const string& key = operator_def.type();
|
||||
switch (operator_def.device_option().device_type()) {
|
||||
case CPU:
|
||||
VLOG(1) << "Creating CPU operator " << key;
|
||||
return CPUOperatorRegistry()->Create(key, operator_def, ws);
|
||||
case CUDA:
|
||||
VLOG(1) << "Creating CUDA operator " << key;
|
||||
// In Cuda, if we have cudnn, we will prefer to use cudnn first.
|
||||
if (CUDNNOperatorRegistry()->Has(key)) {
|
||||
VLOG(1) << "Using CuDNN implementation.";
|
||||
return CUDNNOperatorRegistry()->Create(key, operator_def, ws);
|
||||
}
|
||||
return CUDAOperatorRegistry()->Create(key, operator_def, ws);
|
||||
}
|
||||
// Just to suppress some compiler error
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
|
||||
} // namespace caffe2
|
233
caffe2/core/operator.h
Normal file
233
caffe2/core/operator.h
Normal file
@ -0,0 +1,233 @@
|
||||
#ifndef CAFFE2_CORE_OPERATOR_H_
|
||||
#define CAFFE2_CORE_OPERATOR_H_
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/registry.h"
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class OperatorBase {
|
||||
public:
|
||||
// The constructor of the operator. Note that you should not do any
|
||||
// custom initializations in the constructor; instead, do those in the
|
||||
// SetUp() function.
|
||||
explicit OperatorBase(const OperatorDef& operator_def, Workspace* ws);
|
||||
virtual ~OperatorBase() {}
|
||||
|
||||
// Verify return true if an operator is set up correctly. This cannot be
|
||||
// implemented in the constructor, because there will be calls to overridden
|
||||
// functions.
|
||||
virtual bool Verify();
|
||||
|
||||
// Parameter getters. You can use these to get the arguments that you want.
|
||||
bool HasArgument(const string& name) { return (arg_map_.count(name) > 0); }
|
||||
template <typename T>
|
||||
|
||||
// Functions that deal with arguments. Basically, this allows us to map an
|
||||
// argument mane to a specific type of argument that we are trying to access.
|
||||
T GetSingleArgument(const string& name, const T& default_value);
|
||||
template <typename T>
|
||||
vector<T> GetRepeatedArgument(const string& name);
|
||||
|
||||
template <typename MessageType>
|
||||
MessageType GetAnyMessageArgument(const string& name) {
|
||||
CHECK(arg_map_.count(name)) << "Cannot find parameter named " << name;
|
||||
MessageType message;
|
||||
CHECK(message.ParseFromString(arg_map_[name]->s()))
|
||||
<< "Faild to parse content from the string";
|
||||
return message;
|
||||
}
|
||||
template <typename MessageType>
|
||||
vector<MessageType> GetAnyRepeatedMessageArgument(const string& name) {
|
||||
CHECK(arg_map_.count(name)) << "Cannot find parameter named " << name;
|
||||
vector<MessageType> messages(arg_map_[name]->strings_size());
|
||||
for (int i = 0; i < messages.size(); ++i) {
|
||||
CHECK(messages[i].ParseFromString(arg_map_[name]->strings(i)))
|
||||
<< "Faild to parse content from the string";
|
||||
}
|
||||
return messages;
|
||||
}
|
||||
|
||||
// Get the inputs and outputs as specific types.
|
||||
template <typename T>
|
||||
inline const T& Input(int idx) {
|
||||
DCHECK_LT(idx, inputs_.size());
|
||||
return inputs_.at(idx)->template Get<T>();
|
||||
}
|
||||
template <typename T>
|
||||
inline T* Output(int idx) {
|
||||
DCHECK_LT(idx, outputs_.size());
|
||||
return outputs_.at(idx)->template GetMutable<T>();
|
||||
}
|
||||
template <typename T>
|
||||
inline bool InputIsType(int idx) {
|
||||
return inputs_.at(idx)->template IsType<T>();
|
||||
}
|
||||
inline int InputSize() { return inputs_.size(); }
|
||||
inline int OutputSize() { return outputs_.size(); }
|
||||
inline const vector<const Blob*>& Inputs() const { return inputs_; }
|
||||
inline const vector<Blob*>& Outputs() { return outputs_; }
|
||||
|
||||
virtual bool Run() { NOT_IMPLEMENTED; return false; }
|
||||
|
||||
inline const OperatorDef& def() { return operator_def_; }
|
||||
|
||||
protected:
|
||||
// Do not manually override these functions. Instead, use INPUT_OUTPUT_STATS
|
||||
// macro below.
|
||||
virtual int MinInput() { return 0; }
|
||||
virtual int MaxInput() { return INT_MAX; }
|
||||
virtual int MinOutput() { return 0; }
|
||||
virtual int MaxOutput() { return INT_MAX; }
|
||||
|
||||
private:
|
||||
CaffeMap<string, const Argument*> arg_map_;
|
||||
OperatorDef operator_def_;
|
||||
vector<const Blob*> inputs_;
|
||||
vector<Blob*> outputs_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(OperatorBase);
|
||||
};
|
||||
|
||||
// If your operator does not need any specialized contructor or destructor,
|
||||
// you can simply use this to save two lines of code.
|
||||
#define USE_SIMPLE_BASE_CTOR_DTOR(name) \
|
||||
name(const OperatorDef& operator_def, Workspace* ws) \
|
||||
: OperatorBase(operator_def, ws) {} \
|
||||
virtual ~name() {}
|
||||
|
||||
// INPUT_OUTPUT_STATS gives the statistics of the input and output that are
|
||||
// legal. If the max input/output is not limited, you can specify INT_MAX.
|
||||
// TODO(Yangqing): If necessary, add ability to specify that n_input = n_output.
|
||||
#define INPUT_OUTPUT_STATS(min_input, max_input, min_output, max_output) \
|
||||
protected: \
|
||||
int MinInput() override { return min_input; } \
|
||||
int MaxInput() override { return max_input; } \
|
||||
int MinOutput() override { return min_output; } \
|
||||
int MaxOutput() override { return max_output; }
|
||||
|
||||
// INPUT_TAGS and OUTPUT_TAGS are optional features to name the indices of the
|
||||
// operator's inputs and outputs, in order to avoid confusion. For example, for
|
||||
// a fully convolution layer that has input, weight and bias, you can define its
|
||||
// input tags as:
|
||||
// INPUT_TAGS(INPUT, WEIGHT, BIAS);
|
||||
// And in the code, instead of doing
|
||||
// auto& weight = Input(1);
|
||||
// you can now do
|
||||
// auto& weight = Input(WEIGHT);
|
||||
// to make it more clear.
|
||||
#define INPUT_TAGS(first_input, ...) \
|
||||
enum _InputTags { first_input = 0, __VA_ARGS__ }
|
||||
#define OUTPUT_TAGS(first_input, ...) \
|
||||
enum _OutputTags { first_input = 0, __VA_ARGS__ }
|
||||
|
||||
|
||||
// Operator is the class that you usually want to derive, if your operator will
|
||||
// run on different devices. You should then implement the RunOnDevice()
|
||||
// function.
|
||||
template <typename dtype, class DeviceContext>
|
||||
class Operator : public OperatorBase {
|
||||
public:
|
||||
// The constructor of the operator. Note that you should not do any
|
||||
// custom initializations in the constructor; instead, do those in the
|
||||
// SetUp() function.
|
||||
explicit Operator(const OperatorDef& operator_def, Workspace* ws)
|
||||
: OperatorBase(operator_def, ws),
|
||||
device_context_(operator_def.device_option()) {
|
||||
// In the constructor, we switch to the device so that the child class
|
||||
// constructors will run on that device.
|
||||
device_context_.SwitchToDevice();
|
||||
}
|
||||
virtual ~Operator() {}
|
||||
|
||||
inline const Tensor<dtype, DeviceContext>& Input(int idx) {
|
||||
return OperatorBase::template Input<Tensor<dtype, DeviceContext> >(idx); }
|
||||
inline Tensor<dtype, DeviceContext>* Output(int idx) {
|
||||
return OperatorBase::template Output<Tensor<dtype, DeviceContext> >(idx);
|
||||
}
|
||||
|
||||
// The run function of Operator switches to the device, and then carries out
|
||||
// the actual computation with RunOnDevice(). You should implement RunOnDevice
|
||||
// instead of Run().
|
||||
bool Run() final {
|
||||
device_context_.SwitchToDevice();
|
||||
bool result = RunOnDevice();
|
||||
result &= device_context_.FinishDeviceComputation();
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual bool RunOnDevice() = 0;
|
||||
|
||||
protected:
|
||||
DeviceContext device_context_;
|
||||
DISABLE_COPY_AND_ASSIGN(Operator);
|
||||
};
|
||||
|
||||
#define USE_OPERATOR_BASE_FUNCTIONS \
|
||||
using OperatorBase::GetSingleArgument; \
|
||||
using OperatorBase::GetRepeatedArgument; \
|
||||
using OperatorBase::def; \
|
||||
using OperatorBase::InputIsType; \
|
||||
using OperatorBase::InputSize; \
|
||||
using OperatorBase::OutputSize; \
|
||||
using Operator<dtype, DeviceContext>::device_context_; \
|
||||
using Operator<dtype, DeviceContext>::Input; \
|
||||
using Operator<dtype, DeviceContext>::Output
|
||||
|
||||
#define USE_SIMPLE_CTOR_DTOR(name) \
|
||||
name(const OperatorDef& operator_def, Workspace* ws) \
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws) {} \
|
||||
virtual ~name() {}
|
||||
|
||||
// The operator registry. Since we are not expecting a great number of devices,
|
||||
// we will simply have an if-then type command and allocate the actual
|
||||
// generation to device-specific registerers.
|
||||
// Note that although we have CUDA and CUDNN here, the registerers themselves do
|
||||
// not depend on specific cuda or cudnn libraries. This means that we will be
|
||||
// able to compile it even when there is no cuda available - we simply do not
|
||||
// link any cuda or cudnn operators.
|
||||
DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
|
||||
REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
|
||||
#define REGISTER_CPU_OPERATOR(name, ...) \
|
||||
REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
|
||||
|
||||
DECLARE_REGISTRY(CUDAOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
#define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) \
|
||||
REGISTER_CREATOR(CUDAOperatorRegistry, key, __VA_ARGS__)
|
||||
#define REGISTER_CUDA_OPERATOR(name, ...) \
|
||||
REGISTER_CLASS(CUDAOperatorRegistry, name, __VA_ARGS__)
|
||||
|
||||
DECLARE_REGISTRY(CUDNNOperatorRegistry, OperatorBase,
|
||||
const OperatorDef&, Workspace*);
|
||||
#define REGISTER_CUDNN_OPERATOR_CREATOR(key, ...) \
|
||||
REGISTER_CREATOR(CUDNNOperatorRegistry, key, __VA_ARGS__)
|
||||
#define REGISTER_CUDNN_OPERATOR(name, ...) \
|
||||
REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__)
|
||||
|
||||
// Creates an operator with the given operator definition and device option.
|
||||
OperatorBase* CreateOperator(const OperatorDef& operator_def,
|
||||
const DeviceOption& device_option,
|
||||
Workspace* ws);
|
||||
|
||||
// Create an operator with the given operator definition, and the device
|
||||
// option that is specified in the operator definition.
|
||||
inline OperatorBase* CreateOperator(const OperatorDef& operator_def,
|
||||
Workspace* ws) {
|
||||
return CreateOperator(operator_def, operator_def.device_option(), ws);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_OPERATOR_H_
|
213
caffe2/core/operator_test.cc
Normal file
213
caffe2/core/operator_test.cc
Normal file
@ -0,0 +1,213 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class JustTest : public OperatorBase {
|
||||
public:
|
||||
explicit JustTest(const OperatorDef& op_def, Workspace* ws)
|
||||
: OperatorBase(op_def, ws) {}
|
||||
bool Run() override { return true; }
|
||||
INPUT_OUTPUT_STATS(0, 1, 0, 1);
|
||||
};
|
||||
REGISTER_CPU_OPERATOR(JustTest, JustTest);
|
||||
REGISTER_CUDA_OPERATOR(JustTest, JustTest);
|
||||
|
||||
|
||||
TEST(OperatorTest, RegistryWorks) {
|
||||
OperatorDef op_def;
|
||||
Workspace ws;
|
||||
op_def.set_type("JustTest");
|
||||
EXPECT_NE(nullptr, CreateOperator(op_def, &ws));
|
||||
op_def.mutable_device_option()->set_device_type(CUDA);
|
||||
EXPECT_NE(nullptr, CreateOperator(op_def, &ws));
|
||||
|
||||
CPUOperatorRegistry()->TEST_PrintRegisteredNames();
|
||||
}
|
||||
|
||||
TEST(OperatorDeathTest, CannotUseUninitializedBlob) {
|
||||
Workspace ws;
|
||||
OperatorDef op_def;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output");
|
||||
EXPECT_DEATH(CreateOperator(op_def, &ws), "Check failed");
|
||||
}
|
||||
|
||||
TEST(OperatorTest, TestParameterAccess) {
|
||||
OperatorDef op_def;
|
||||
Workspace ws;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output");
|
||||
{
|
||||
Argument* arg = op_def.add_args();
|
||||
arg->set_name("arg0");
|
||||
arg->set_f(0.1);
|
||||
}
|
||||
{
|
||||
Argument* arg = op_def.add_args();
|
||||
arg->set_name("arg1");
|
||||
arg->add_ints(1);
|
||||
arg->add_ints(2);
|
||||
}
|
||||
{
|
||||
Argument* arg = op_def.add_args();
|
||||
arg->set_name("arg2");
|
||||
arg->set_s("argstring");
|
||||
}
|
||||
EXPECT_NE(ws.CreateBlob("input"), nullptr);
|
||||
OperatorBase op(op_def, &ws);
|
||||
EXPECT_TRUE(op.Verify());
|
||||
EXPECT_FLOAT_EQ(op.GetSingleArgument<float>("arg0", 0.0), 0.1);
|
||||
vector<int> i = op.GetRepeatedArgument<int>("arg1");
|
||||
EXPECT_EQ(i.size(), 2);
|
||||
EXPECT_EQ(i[0], 1);
|
||||
EXPECT_EQ(i[1], 2);
|
||||
EXPECT_EQ(op.GetSingleArgument<string>("arg2", "default"), "argstring");
|
||||
}
|
||||
|
||||
|
||||
TEST(OperatorDeathTest, CannotAccessParameterWithWrongType) {
|
||||
OperatorDef op_def;
|
||||
Workspace ws;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output");
|
||||
{
|
||||
Argument* arg = op_def.add_args();
|
||||
arg->set_name("arg0");
|
||||
arg->set_f(0.1);
|
||||
}
|
||||
EXPECT_NE(ws.CreateBlob("input"), nullptr);
|
||||
OperatorBase op(op_def, &ws);
|
||||
EXPECT_TRUE(op.Verify());
|
||||
EXPECT_FLOAT_EQ(op.GetSingleArgument<float>("arg0", 0.0), 0.1);
|
||||
EXPECT_DEATH(op.GetSingleArgument<int>("arg0", 0),
|
||||
"Argument does not have the right field: expected i");
|
||||
}
|
||||
|
||||
TEST(OperatorDeathTest, CannotAccessRepeatedParameterWithWrongType) {
|
||||
OperatorDef op_def;
|
||||
Workspace ws;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output");
|
||||
{
|
||||
Argument* arg = op_def.add_args();
|
||||
arg->set_name("arg0");
|
||||
arg->add_floats(0.1);
|
||||
}
|
||||
EXPECT_NE(ws.CreateBlob("input"), nullptr);
|
||||
OperatorBase op(op_def, &ws);
|
||||
EXPECT_TRUE(op.Verify());
|
||||
auto args = op.GetRepeatedArgument<float>("arg0");
|
||||
EXPECT_EQ(args.size(), 1);
|
||||
EXPECT_FLOAT_EQ(args[0], 0.1);
|
||||
EXPECT_DEATH(op.GetRepeatedArgument<int>("arg0"),
|
||||
"Argument does not have the right field: expected ints");
|
||||
}
|
||||
|
||||
TEST(OperatorTest, TestDefaultValue) {
|
||||
OperatorDef op_def;
|
||||
Workspace ws;
|
||||
OperatorBase op(op_def, &ws);
|
||||
EXPECT_FLOAT_EQ(
|
||||
op.GetSingleArgument<float>("arg-nonexisting", 0.5), 0.5);
|
||||
}
|
||||
|
||||
TEST(OperatorTest, TestSetUp) {
|
||||
Workspace ws;
|
||||
OperatorDef op_def;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output");
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("input"));
|
||||
unique_ptr<OperatorBase> op(CreateOperator(op_def, &ws));
|
||||
EXPECT_NE(nullptr, op.get());
|
||||
EXPECT_TRUE(op->Verify());
|
||||
EXPECT_TRUE(ws.HasBlob("output"));
|
||||
}
|
||||
|
||||
TEST(OperatorTest, TestSetUpInputOutputCount) {
|
||||
Workspace ws;
|
||||
OperatorDef op_def;
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_inputs("input2");
|
||||
op_def.add_outputs("output");
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("input"));
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("input2"));
|
||||
unique_ptr<OperatorBase> op(CreateOperator(op_def, &ws));
|
||||
EXPECT_NE(nullptr, op.get());
|
||||
EXPECT_TRUE(ws.HasBlob("output"));
|
||||
// Because JustTest will only accept one single input, this will return false.
|
||||
EXPECT_FALSE(op->Verify());
|
||||
|
||||
op_def.clear_inputs();
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("output2");
|
||||
op.reset(CreateOperator(op_def, &ws));
|
||||
EXPECT_NE(nullptr, op.get());
|
||||
// Because JustTest will only produce one single output, this will return
|
||||
// false.
|
||||
EXPECT_FALSE(op->Verify());
|
||||
}
|
||||
|
||||
NetDef GetNetDefForTest() {
|
||||
NetDef net_def;
|
||||
OperatorDef op_def;
|
||||
net_def.set_name("NetForTest");
|
||||
op_def.set_name("JustTest0");
|
||||
op_def.set_type("JustTest");
|
||||
op_def.add_inputs("input");
|
||||
op_def.add_outputs("hidden");
|
||||
net_def.add_operators()->CopyFrom(op_def);
|
||||
op_def.set_name("JustTest1");
|
||||
op_def.set_inputs(0, "hidden");
|
||||
op_def.set_outputs(0, "output");
|
||||
net_def.add_operators()->CopyFrom(op_def);
|
||||
return net_def;
|
||||
}
|
||||
|
||||
TEST(NetTest, TestScaffoldingSimpleNet) {
|
||||
NetDef net_def = GetNetDefForTest();
|
||||
net_def.set_net_type("simple");
|
||||
Workspace ws;
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("input"));
|
||||
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
EXPECT_NE(nullptr, net.get());
|
||||
EXPECT_TRUE(net->Verify());
|
||||
EXPECT_TRUE(ws.HasBlob("input"));
|
||||
EXPECT_TRUE(ws.HasBlob("hidden"));
|
||||
EXPECT_TRUE(ws.HasBlob("output"));
|
||||
EXPECT_TRUE(net->Run());
|
||||
}
|
||||
|
||||
TEST(NetTest, TestScaffoldingParallelNet) {
|
||||
NetDef net_def = GetNetDefForTest();
|
||||
net_def.set_net_type("parallel");
|
||||
net_def.set_num_workers(1);
|
||||
Workspace ws;
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("input"));
|
||||
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
EXPECT_NE(nullptr, net.get());
|
||||
EXPECT_TRUE(net->Verify());
|
||||
EXPECT_TRUE(ws.HasBlob("input"));
|
||||
EXPECT_TRUE(ws.HasBlob("hidden"));
|
||||
EXPECT_TRUE(ws.HasBlob("output"));
|
||||
EXPECT_TRUE(net->Run());
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
134
caffe2/core/parallel_net_test.cc
Normal file
134
caffe2/core/parallel_net_test.cc
Normal file
@ -0,0 +1,134 @@
|
||||
#include <chrono> // NOLINT
|
||||
#include <ctime>
|
||||
#include <thread> // NOLINT
|
||||
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "google/protobuf/text_format.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using std::clock_t;
|
||||
using std::clock;
|
||||
|
||||
// SleepOp basically sleeps for a given number of seconds.
|
||||
class SleepOp final : public OperatorBase {
|
||||
public:
|
||||
SleepOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: OperatorBase(operator_def, ws),
|
||||
ms_(OperatorBase::GetSingleArgument<int>("ms", 1000)) {
|
||||
DCHECK_GT(ms_, 0);
|
||||
DCHECK_LT(ms_, 3600 * 1000) << "Really? This long?";
|
||||
}
|
||||
|
||||
bool Run() final {
|
||||
clock_t start = clock();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(ms_));
|
||||
clock_t end = clock();
|
||||
if (OperatorBase::OutputSize()) {
|
||||
vector<clock_t>* output = OperatorBase::Output<vector<clock_t> >(0);
|
||||
output->resize(2);
|
||||
(*output)[0] = start;
|
||||
(*output)[1] = end;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
int ms_;
|
||||
// We allow arbitrary inputs and at most one output so that we can
|
||||
// test scaffolding of networks. If the output is 1, it will be filled with
|
||||
// vector<clock_t> with two elements: start time and end time.
|
||||
INPUT_OUTPUT_STATS(0, INT_MAX, 0, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(SleepOp);
|
||||
};
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Sleep, SleepOp)
|
||||
REGISTER_CUDA_OPERATOR(Sleep, SleepOp)
|
||||
} // namespace
|
||||
|
||||
const char kSleepNetDefString[] =
|
||||
" name: \"sleepnet\""
|
||||
" net_type: \"parallel\""
|
||||
" num_workers: 2"
|
||||
" operators {"
|
||||
" outputs: \"sleep1\""
|
||||
" name: \"sleep1\""
|
||||
" type: \"Sleep\""
|
||||
" args {"
|
||||
" name: \"ms\""
|
||||
" i: 100"
|
||||
" }"
|
||||
" }"
|
||||
" operators {"
|
||||
" inputs: \"sleep1\""
|
||||
" outputs: \"sleep2\""
|
||||
" name: \"sleep2\""
|
||||
" type: \"Sleep\""
|
||||
" args {"
|
||||
" name: \"ms\""
|
||||
" i: 100"
|
||||
" }"
|
||||
" }"
|
||||
" operators {"
|
||||
" outputs: \"sleep3\""
|
||||
" name: \"sleep3\""
|
||||
" type: \"Sleep\""
|
||||
" args {"
|
||||
" name: \"ms\""
|
||||
" i: 150"
|
||||
" }"
|
||||
" }";
|
||||
|
||||
|
||||
TEST(ParallelNetTest, TestParallelNetTiming) {
|
||||
NetDef net_def;
|
||||
CHECK(google::protobuf::TextFormat::ParseFromString(
|
||||
string(kSleepNetDefString), &net_def));
|
||||
// Below is the parallel version
|
||||
Workspace ws;
|
||||
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
EXPECT_NE(nullptr, net.get());
|
||||
EXPECT_TRUE(net->Verify());
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
EXPECT_TRUE(net->Run());
|
||||
// Inspect the time - it should be around 2000 milliseconds, since sleep3 can
|
||||
// run in parallel with sleep1 and sleep2.
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now() - start_time);
|
||||
int milliseconds = duration.count();
|
||||
// We should be seeing 200 ms. This adds a little slack time.
|
||||
EXPECT_GT(milliseconds, 180);
|
||||
EXPECT_LT(milliseconds, 220);
|
||||
}
|
||||
|
||||
// For sanity check, we also test the sequential time - it should take 0.35
|
||||
// seconds instead since everything has to be sequential.
|
||||
TEST(SimpleNetTest, TestSimpleNetTiming) {
|
||||
NetDef net_def;
|
||||
CHECK(google::protobuf::TextFormat::ParseFromString(
|
||||
string(kSleepNetDefString), &net_def));
|
||||
net_def.set_net_type("simple");
|
||||
Workspace ws;
|
||||
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
||||
EXPECT_NE(nullptr, net.get());
|
||||
EXPECT_TRUE(net->Verify());
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
EXPECT_TRUE(net->Run());
|
||||
// Inspect the time - it should be around 2000 milliseconds, since sleep3 can
|
||||
// run in parallel with sleep1 and sleep2.
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now() - start_time);
|
||||
int milliseconds = duration.count();
|
||||
// We should be seeing 350 ms. This adds a little slack time.
|
||||
EXPECT_GT(milliseconds, 330);
|
||||
EXPECT_LT(milliseconds, 370);
|
||||
}
|
||||
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
||||
|
112
caffe2/core/registry.h
Normal file
112
caffe2/core/registry.h
Normal file
@ -0,0 +1,112 @@
|
||||
#ifndef CAFFE2_CORE_REGISTRY_H_
|
||||
#define CAFFE2_CORE_REGISTRY_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Registry is a class that allows one to register classes by a specific
|
||||
// key, usually a string specifying the name. For each key type and object type,
|
||||
// there should be only one single registry responsible for it.
|
||||
|
||||
template <class ObjectType, class... Args>
|
||||
class Registry {
|
||||
public:
|
||||
typedef ObjectType* (*Creator)(Args ...);
|
||||
typedef CaffeMap<string, Creator> CreatorRegistry;
|
||||
|
||||
Registry() : registry_() {}
|
||||
|
||||
void Register(const string& key, Creator creator) {
|
||||
// The if statement below is essentially the same as the following line:
|
||||
// CHECK_EQ(registry_.count(key), 0) << "Key " << key
|
||||
// << " registered twice.";
|
||||
// However, CHECK_EQ depends on google logging, and since registration is
|
||||
// carried out at static initialization time, we do not want to have an
|
||||
// explicit dependency on glog's initialization function.
|
||||
if (registry_.count(key) != 0) {
|
||||
std::cerr << "Key " << key << " already registered." << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
registry_[key] = creator;
|
||||
}
|
||||
|
||||
inline bool Has(const string& key) { return (registry_.count(key) != 0); }
|
||||
|
||||
ObjectType* Create(const string& key, Args ... args) {
|
||||
if (registry_.count(key) == 0) {
|
||||
std::cerr << "Key " << key << " not found." << std::endl;
|
||||
std::cerr << "Available keys:" << std::endl;
|
||||
TEST_PrintRegisteredNames();
|
||||
std::cerr << "Returning null pointer.";
|
||||
return nullptr;
|
||||
}
|
||||
return registry_[key](args...);
|
||||
}
|
||||
|
||||
// This function should only used in test code to inspect registered names.
|
||||
// You should only call this function after google glog is initialized -
|
||||
// do NOT call it in static initializations.
|
||||
void TEST_PrintRegisteredNames() {
|
||||
std::vector<string> keys;
|
||||
for (const auto& it : registry_) {
|
||||
keys.push_back(it.first);
|
||||
}
|
||||
std::sort(keys.begin(), keys.end());
|
||||
for (const string& key : keys) {
|
||||
std::cout << "Registry key: " << key << std::endl;
|
||||
}
|
||||
std::cout << "A total of " << keys.size() << " registered keys."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
CreatorRegistry registry_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Registry);
|
||||
};
|
||||
|
||||
template <class ObjectType, class... Args>
|
||||
class Registerer {
|
||||
public:
|
||||
Registerer(const string& key, Registry<ObjectType, Args...>* registry,
|
||||
typename Registry<ObjectType, Args...>::Creator creator) {
|
||||
registry->Register(key, creator);
|
||||
}
|
||||
|
||||
template <class DerivedType>
|
||||
static ObjectType* DefaultCreator(Args ... args) {
|
||||
return new DerivedType(args...);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
|
||||
Registry<ObjectType, __VA_ARGS__>* RegistryName(); \
|
||||
typedef Registerer<ObjectType, __VA_ARGS__> Registerer##RegistryName;
|
||||
|
||||
#define DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
|
||||
Registry<ObjectType, __VA_ARGS__>* RegistryName() { \
|
||||
static Registry<ObjectType, __VA_ARGS__>* registry = \
|
||||
new Registry<ObjectType, __VA_ARGS__>(); \
|
||||
return registry; \
|
||||
}
|
||||
// Note(Yangqing): The __VA_ARGS__ below allows one to specify a templated
|
||||
// creator with comma in its templated arguments.
|
||||
#define REGISTER_CREATOR(RegistryName, key, ...) \
|
||||
Registerer##RegistryName g_##RegistryName##_##key( \
|
||||
#key, RegistryName(), __VA_ARGS__);
|
||||
|
||||
// Note(Yangqing): The __VA_ARGS__ below allows one to specify a templated class
|
||||
// with comma in its templated arguments.
|
||||
#define REGISTER_CLASS(RegistryName, key, ...) \
|
||||
Registerer##RegistryName g_##RegistryName##_##key( \
|
||||
#key, RegistryName(), \
|
||||
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
|
||||
|
||||
} // namespace caffe2
|
||||
#endif // CAFFE2_CORE_REGISTRY_H_
|
48
caffe2/core/registry_test.cc
Normal file
48
caffe2/core/registry_test.cc
Normal file
@ -0,0 +1,48 @@
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include "caffe2/core/registry.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class Foo {
|
||||
public:
|
||||
explicit Foo(int x) { LOG(INFO) << "Foo " << x; }
|
||||
};
|
||||
|
||||
DECLARE_REGISTRY(FooRegistry, Foo, int);
|
||||
DEFINE_REGISTRY(FooRegistry, Foo, int);
|
||||
#define REGISTER_FOO(clsname) \
|
||||
REGISTER_CLASS(FooRegistry, clsname, clsname)
|
||||
|
||||
class Bar : public Foo {
|
||||
public:
|
||||
explicit Bar(int x) : Foo(x) { LOG(INFO) << "Bar " << x; }
|
||||
};
|
||||
REGISTER_FOO(Bar);
|
||||
|
||||
class AnotherBar : public Foo {
|
||||
public:
|
||||
explicit AnotherBar(int x) : Foo(x) {
|
||||
LOG(INFO) << "AnotherBar " << x;
|
||||
}
|
||||
};
|
||||
REGISTER_FOO(AnotherBar);
|
||||
|
||||
TEST(RegistryTest, CanRunCreator) {
|
||||
unique_ptr<Foo> bar(FooRegistry()->Create("Bar", 1));
|
||||
EXPECT_TRUE(bar != nullptr) << "Cannot create bar.";
|
||||
unique_ptr<Foo> another_bar(FooRegistry()->Create("AnotherBar", 1));
|
||||
EXPECT_TRUE(another_bar != nullptr);
|
||||
}
|
||||
|
||||
TEST(RegistryTest, ReturnNullOnNonExistingCreator) {
|
||||
EXPECT_EQ(
|
||||
FooRegistry()->Create("Non-existing bar", 1), nullptr);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
11
caffe2/core/typeid.cc
Normal file
11
caffe2/core/typeid.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include "caffe2/core/typeid.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace caffe2 {
|
||||
namespace internal {
|
||||
|
||||
std::map<TypeId, string> g_caffe2_type_name_map;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace caffe2
|
63
caffe2/core/typeid.h
Normal file
63
caffe2/core/typeid.h
Normal file
@ -0,0 +1,63 @@
|
||||
#ifndef CAFFE2_CORE_TYPEID_H_
|
||||
#define CAFFE2_CORE_TYPEID_H_
|
||||
|
||||
#include <map>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "caffe2/core/common.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace internal {
|
||||
|
||||
static_assert(sizeof(void*) <= sizeof(int64_t),
|
||||
"This does not happen often, but int64_t is not enough for "
|
||||
"pointers on this platform.");
|
||||
typedef int64_t TypeId;
|
||||
extern std::map<TypeId, string> g_caffe2_type_name_map;
|
||||
const TypeId gUnknownType = 0;
|
||||
|
||||
template <class T>
|
||||
class TypeIdRegisterer {
|
||||
public:
|
||||
TypeIdRegisterer() {
|
||||
CHECK_EQ(g_caffe2_type_name_map.count(id()), 0)
|
||||
<< "Registerer instantiated twice.";
|
||||
g_caffe2_type_name_map[id()] = typeid(T).name();
|
||||
}
|
||||
inline TypeId id() {
|
||||
return reinterpret_cast<TypeId>(type_id_bit);
|
||||
}
|
||||
|
||||
private:
|
||||
bool type_id_bit[1];
|
||||
};
|
||||
|
||||
// id = TypeId<T>() gives a unique type id for the given class, which can be
|
||||
// verified by IsType<T>(id). This allows us to check the type of object
|
||||
// pointers during run-time.
|
||||
template <class T>
|
||||
TypeId GetTypeId() {
|
||||
static TypeIdRegisterer<T> reg;
|
||||
return reg.id();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool IsTypeId(TypeId id) {
|
||||
return (id == GetTypeId<T>());
|
||||
}
|
||||
|
||||
inline string TypeName(TypeId id) {
|
||||
if (id == gUnknownType) return "UNKNOWN";
|
||||
return g_caffe2_type_name_map[id];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline string TypeName() {
|
||||
return TypeName(GetTypeId<T>());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_TYPEID_H_
|
27
caffe2/core/types.h
Normal file
27
caffe2/core/types.h
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef CAFFE2_CORE_TYPES_H_
|
||||
#define CAFFE2_CORE_TYPES_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Storage orders that are often used in the image applications.
|
||||
enum StorageOrder {
|
||||
UNKNOWN = 0,
|
||||
NHWC = 1,
|
||||
NCHW = 2,
|
||||
};
|
||||
|
||||
inline StorageOrder StringToStorageOrder(const string& str) {
|
||||
if (str == "NHWC") {
|
||||
return StorageOrder::NHWC;
|
||||
} else if (str == "NCHW") {
|
||||
return StorageOrder::NCHW;
|
||||
} else {
|
||||
return StorageOrder::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_TYPES_H_
|
177
caffe2/core/workspace.cc
Normal file
177
caffe2/core/workspace.cc
Normal file
@ -0,0 +1,177 @@
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/net.h"
|
||||
#include "caffe2/core/workspace.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
Blob* Workspace::CreateBlob(const string& name) {
|
||||
if (HasBlob(name)) {
|
||||
VLOG(1) << "Blob " << name << " already exists. Skipping.";
|
||||
} else {
|
||||
VLOG(1) << "Creating blob " << name;
|
||||
(*blob_map_)[name] = unique_ptr<Blob>(new Blob());
|
||||
}
|
||||
return (*blob_map_)[name].get();
|
||||
}
|
||||
|
||||
const Blob* Workspace::GetBlob(const string& name) const {
|
||||
if (!HasBlob(name)) {
|
||||
LOG(WARNING) << "Blob " << name << " not in the workspace.";
|
||||
// TODO(Yangqing): do we want to always print out the list of blobs here?
|
||||
LOG(WARNING) << "Current blobs:";
|
||||
for (const auto& entry : *blob_map_) {
|
||||
LOG(WARNING) << entry.first;
|
||||
}
|
||||
return nullptr;
|
||||
} else {
|
||||
return (*blob_map_)[name].get();
|
||||
}
|
||||
}
|
||||
|
||||
bool Workspace::CreateNet(const NetDef& net_def) {
|
||||
CHECK(net_def.has_name()) << "Net definition should have a name.";
|
||||
if (net_map_.count(net_def.name()) > 0) {
|
||||
LOG(WARNING) << "Overwriting existing network of the same name.";
|
||||
// Note(Yangqing): Why do we explicitly erase it here? Some components of
|
||||
// the old network, such as a opened LevelDB, may prevent us from creating a
|
||||
// new network before the old one is deleted. Thus we will need to first
|
||||
// erase the old one before the new one can be constructed.
|
||||
net_map_.erase(net_def.name());
|
||||
}
|
||||
// Create a new net with its name.
|
||||
LOG(INFO) << "Initializing network " << net_def.name();
|
||||
net_map_[net_def.name()] =
|
||||
unique_ptr<NetBase>(caffe2::CreateNet(net_def, this));
|
||||
if (net_map_[net_def.name()].get() == nullptr) {
|
||||
LOG(ERROR) << "Error when creating the network.";
|
||||
net_map_.erase(net_def.name());
|
||||
return false;
|
||||
}
|
||||
if (!net_map_[net_def.name()]->Verify()) {
|
||||
LOG(ERROR) << "Error when setting up network " << net_def.name();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Workspace::DeleteNet(const string& name) {
|
||||
if (net_map_.count(name)) {
|
||||
net_map_.erase(name);
|
||||
}
|
||||
}
|
||||
|
||||
bool Workspace::RunNet(const string& name) {
|
||||
if (!net_map_.count(name)) {
|
||||
LOG(ERROR) << "Network " << name << " does not exist yet.";
|
||||
return false;
|
||||
}
|
||||
return net_map_[name]->Run();
|
||||
}
|
||||
|
||||
bool Workspace::RunOperatorOnce(const OperatorDef& op_def) {
|
||||
std::unique_ptr<OperatorBase> op(CreateOperator(op_def, this));
|
||||
if (!op->Verify()) {
|
||||
LOG(ERROR) << "Error when setting up operator " << op_def.name();
|
||||
return false;
|
||||
}
|
||||
if (!op->Run()) {
|
||||
LOG(ERROR) << "Error when running operator " << op_def.name();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool Workspace::RunNetOnce(const NetDef& net_def) {
|
||||
std::unique_ptr<NetBase> net(caffe2::CreateNet(net_def, this));
|
||||
if (!net->Verify()) {
|
||||
LOG(ERROR) << "Error when setting up network " << net_def.name();
|
||||
return false;
|
||||
}
|
||||
if (!net->Run()) {
|
||||
LOG(ERROR) << "Error when running network " << net_def.name();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Workspace::RunPlan(const PlanDef& plan) {
|
||||
LOG(INFO) << "Started executing plan.";
|
||||
if (plan.networks_size() == 0 || plan.execution_steps_size() == 0) {
|
||||
LOG(WARNING) << "Nothing to run - did you define a correct plan?";
|
||||
// We will do nothing, but the plan is still legal so we will return true.
|
||||
return true;
|
||||
}
|
||||
LOG(INFO) << "Initializing networks.";
|
||||
|
||||
for (const NetDef& net_def : plan.networks()) {
|
||||
if (!CreateNet(net_def)) {
|
||||
LOG(ERROR) << "Failed initializing the networks.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
clock_t start_time = clock();
|
||||
for (const ExecutionStep& step : plan.execution_steps()) {
|
||||
clock_t step_start_time = clock();
|
||||
if (!ExecuteStepRecursive(step)) {
|
||||
LOG(ERROR) << "Failed initializing step " << step.name();
|
||||
return false;
|
||||
}
|
||||
LOG(INFO) << "Step " << step.name() << " took "
|
||||
<< static_cast<float>(clock() - step_start_time) / CLOCKS_PER_SEC
|
||||
<< " seconds.";
|
||||
}
|
||||
LOG(INFO) << "Total plan took "
|
||||
<< static_cast<float>(clock() - start_time) / CLOCKS_PER_SEC
|
||||
<< " seconds.";
|
||||
LOG(INFO) << "Plan executed successfully.";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Workspace::ExecuteStepRecursive(const ExecutionStep& step) {
|
||||
LOG(INFO) << "Running execution step " << step.name();
|
||||
if (!(step.substeps_size() == 0 || step.networks_size() == 0)) {
|
||||
LOG(ERROR) << "An ExecutionStep should either have substeps or networks "
|
||||
<< "but not both.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (step.substeps_size()) {
|
||||
int iterations = step.has_iterations() ? step.iterations() : 1;
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
for (const ExecutionStep& substep : step.substeps()) {
|
||||
if (!ExecuteStepRecursive(substep)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
// If this ExecutionStep just contains nets, we can directly run it.
|
||||
vector<NetBase*> networks;
|
||||
// Collect the networks to run.
|
||||
for (const string& network_name : step.networks()) {
|
||||
if (!net_map_.count(network_name)) {
|
||||
LOG(ERROR) << "Network " << network_name << " not found.";
|
||||
return false;
|
||||
}
|
||||
VLOG(1) << "Going to execute network " << network_name;
|
||||
networks.push_back(net_map_[network_name].get());
|
||||
}
|
||||
int iterations = step.has_iterations() ? step.iterations() : 1;
|
||||
VLOG(1) << "Executing networks for " << iterations << " iterations.";
|
||||
for (int iter = 0; iter < iterations; ++iter) {
|
||||
VLOG(1) << "Executing network iteration " << iter;
|
||||
for (NetBase* network : networks) {
|
||||
if (!network->Run()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
93
caffe2/core/workspace.h
Normal file
93
caffe2/core/workspace.h
Normal file
@ -0,0 +1,93 @@
|
||||
#ifndef CAFFE2_CORE_WORKSPACE_H_
|
||||
#define CAFFE2_CORE_WORKSPACE_H_
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
#include "caffe2/core/blob.h"
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/registry.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class NetBase;
|
||||
|
||||
// Workspace is a class that holds all the blobs in this run and also runs
|
||||
// the operators.
|
||||
class Workspace {
|
||||
public:
|
||||
typedef CaffeMap<string, unique_ptr<Blob> > BlobMap;
|
||||
typedef CaffeMap<string, unique_ptr<NetBase> > NetMap;
|
||||
// Initializes an empty workspace.
|
||||
Workspace() : blob_map_(new BlobMap()), root_folder_(".") {}
|
||||
explicit Workspace(const string& root_folder)
|
||||
: blob_map_(new BlobMap()), net_map_(), root_folder_(root_folder) {}
|
||||
~Workspace() {}
|
||||
|
||||
// Return a list of blob names. This may be a bit slow since it will involve
|
||||
// creation of multiple temp variables - if possible, use HasBlob() or
|
||||
// GetBlob() below with given names.
|
||||
vector<string> Blobs() {
|
||||
vector<string> names;
|
||||
for (auto& entry : *blob_map_) {
|
||||
names.push_back(entry.first);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
// Return the root folder of the workspace.
|
||||
const string& RootFolder() { return root_folder_; }
|
||||
inline bool HasBlob(const string& name) const {
|
||||
return blob_map_->count(name);
|
||||
}
|
||||
Blob* CreateBlob(const string& name);
|
||||
const Blob* GetBlob(const string& name) const;
|
||||
inline Blob* GetBlob(const string& name) {
|
||||
return const_cast<Blob*>(
|
||||
static_cast<const Workspace*>(this)->GetBlob(name));
|
||||
}
|
||||
|
||||
// CreateNet creates a network in the current workspace. It can then
|
||||
// be referred to by RunNet().
|
||||
bool CreateNet(const NetDef& net_def);
|
||||
void DeleteNet(const string& net_name);
|
||||
bool RunNet(const string& net_name);
|
||||
vector<string> Nets() {
|
||||
vector<string> names;
|
||||
for (auto& entry : net_map_) {
|
||||
names.push_back(entry.first);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
// RunPlan runs a plan that has multiple nets and execution steps.
|
||||
bool RunPlan(const PlanDef& plan_def);
|
||||
|
||||
// RunOperatorOnce and RunNetOnce runs an operator or net once. The difference
|
||||
// between RunNet and RunNetOnce lies in the fact that RunNet allows you to
|
||||
// have a persistent net object, while RunNetOnce creates a net and discards
|
||||
// it on the fly - this may make things like database read and random number
|
||||
// generators repeat the same thing over multiple calls.
|
||||
bool RunOperatorOnce(const OperatorDef& op_def);
|
||||
bool RunNetOnce(const NetDef& net_def);
|
||||
|
||||
|
||||
protected:
|
||||
bool ExecuteStepRecursive(const ExecutionStep& execution);
|
||||
|
||||
private:
|
||||
// If a workspace is shared with another one, the blob_map_ is going to be
|
||||
// shared, but net_map_ will not be.
|
||||
// TODO(Yangqing): Are we really going to share workspaces? If not, let's
|
||||
// remove this unnecessity.
|
||||
unique_ptr<BlobMap> blob_map_;
|
||||
NetMap net_map_;
|
||||
string root_folder_;
|
||||
DISABLE_COPY_AND_ASSIGN(Workspace);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_CORE_WORKSPACE_H_
|
50
caffe2/core/workspace_test.cc
Normal file
50
caffe2/core/workspace_test.cc
Normal file
@ -0,0 +1,50 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
class Foo {};
|
||||
|
||||
TEST(WorkspaceTest, BlobAccess) {
|
||||
Workspace ws;
|
||||
|
||||
EXPECT_FALSE(ws.HasBlob("nonexisting"));
|
||||
EXPECT_EQ(ws.GetBlob("nonexisting"), nullptr);
|
||||
|
||||
EXPECT_EQ(ws.GetBlob("newblob"), nullptr);
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("newblob"));
|
||||
EXPECT_NE(nullptr, ws.GetBlob("newblob"));
|
||||
EXPECT_TRUE(ws.HasBlob("newblob"));
|
||||
|
||||
// Different names should still be not created.
|
||||
EXPECT_FALSE(ws.HasBlob("nonexisting"));
|
||||
EXPECT_EQ(ws.GetBlob("nonexisting"), nullptr);
|
||||
|
||||
// Check if the returned Blob is OK for all operations
|
||||
Blob* blob = ws.GetBlob("newblob");
|
||||
int* int_unused UNUSED_VARIABLE = blob->GetMutable<int>();
|
||||
EXPECT_TRUE(blob->IsType<int>());
|
||||
EXPECT_FALSE(blob->IsType<Foo>());
|
||||
EXPECT_NE(&blob->Get<int>(), nullptr);
|
||||
|
||||
// Re-creating the blob does not change the content as long as it already
|
||||
// exists.
|
||||
EXPECT_NE(nullptr, ws.CreateBlob("newblob"));
|
||||
EXPECT_TRUE(blob->IsType<int>());
|
||||
EXPECT_FALSE(blob->IsType<Foo>());
|
||||
// When not null, we should only call with the right type.
|
||||
EXPECT_NE(&blob->Get<int>(), nullptr);
|
||||
}
|
||||
|
||||
TEST(WorkspaceTest, RunEmptyPlan) {
|
||||
PlanDef plan_def;
|
||||
Workspace ws;
|
||||
EXPECT_TRUE(ws.RunPlan(plan_def));
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
|
33
caffe2/db/BREW
Normal file
33
caffe2/db/BREW
Normal file
@ -0,0 +1,33 @@
|
||||
# This folder contains database implementations that has third third_party
|
||||
# dependencies.
|
||||
|
||||
cc_library(
|
||||
name = "db",
|
||||
srcs = [
|
||||
"leveldb.cc",
|
||||
"lmdb.cc",
|
||||
],
|
||||
deps = [
|
||||
":zmqdb",
|
||||
"//caffe2/core:core",
|
||||
"//third_party/glog:glog",
|
||||
"//third_party/leveldb:leveldb",
|
||||
"//third_party/liblmdb:lmdb",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "zmqdb",
|
||||
srcs = [
|
||||
"zmqdb.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//third_party/glog:glog",
|
||||
"//third_party/leveldb:leveldb",
|
||||
"//third_party/liblmdb:lmdb",
|
||||
"//third_party/libzmq:libzmq",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
82
caffe2/db/leveldb.cc
Normal file
82
caffe2/db/leveldb.cc
Normal file
@ -0,0 +1,82 @@
|
||||
#include "caffe2/core/db.h"
|
||||
#include "glog/logging.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
class LevelDBCursor : public Cursor {
|
||||
public:
|
||||
explicit LevelDBCursor(leveldb::Iterator* iter)
|
||||
: iter_(iter) { SeekToFirst(); }
|
||||
~LevelDBCursor() { delete iter_; }
|
||||
void SeekToFirst() override { iter_->SeekToFirst(); }
|
||||
void Next() override { iter_->Next(); }
|
||||
string key() override { return iter_->key().ToString(); }
|
||||
string value() override { return iter_->value().ToString(); }
|
||||
bool Valid() override { return iter_->Valid(); }
|
||||
|
||||
private:
|
||||
leveldb::Iterator* iter_;
|
||||
};
|
||||
|
||||
class LevelDBTransaction : public Transaction {
|
||||
public:
|
||||
explicit LevelDBTransaction(leveldb::DB* db) : db_(db) {
|
||||
CHECK_NOTNULL(db_);
|
||||
batch_.reset(new leveldb::WriteBatch());
|
||||
}
|
||||
~LevelDBTransaction() { Commit(); }
|
||||
void Put(const string& key, const string& value) override {
|
||||
batch_->Put(key, value);
|
||||
}
|
||||
void Commit() override {
|
||||
leveldb::Status status = db_->Write(leveldb::WriteOptions(), batch_.get());
|
||||
batch_.reset(new leveldb::WriteBatch());
|
||||
CHECK(status.ok()) << "Failed to write batch to leveldb "
|
||||
<< std::endl << status.ToString();
|
||||
}
|
||||
|
||||
private:
|
||||
leveldb::DB* db_;
|
||||
std::unique_ptr<leveldb::WriteBatch> batch_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(LevelDBTransaction);
|
||||
};
|
||||
|
||||
class LevelDB : public DB {
|
||||
public:
|
||||
LevelDB(const string& source, Mode mode) : DB(source, mode) {
|
||||
leveldb::Options options;
|
||||
options.block_size = 65536;
|
||||
options.write_buffer_size = 268435456;
|
||||
options.max_open_files = 100;
|
||||
options.error_if_exists = mode == NEW;
|
||||
options.create_if_missing = mode != READ;
|
||||
leveldb::DB* db_temp;
|
||||
leveldb::Status status = leveldb::DB::Open(options, source, &db_temp);
|
||||
CHECK(status.ok()) << "Failed to open leveldb " << source
|
||||
<< std::endl << status.ToString();
|
||||
db_.reset(db_temp);
|
||||
LOG(INFO) << "Opened leveldb " << source;
|
||||
}
|
||||
|
||||
void Close() override { db_.reset(); }
|
||||
Cursor* NewCursor() override {
|
||||
return new LevelDBCursor(db_->NewIterator(leveldb::ReadOptions()));
|
||||
}
|
||||
Transaction* NewTransaction() override {
|
||||
return new LevelDBTransaction(db_.get());
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<leveldb::DB> db_;
|
||||
};
|
||||
|
||||
REGISTER_CAFFE2_DB(LevelDB, LevelDB);
|
||||
// For lazy-minded, one can also call with lower-case name.
|
||||
REGISTER_CAFFE2_DB(leveldb, LevelDB);
|
||||
|
||||
} // namespace db
|
||||
} // namespace caffe2
|
136
caffe2/db/lmdb.cc
Normal file
136
caffe2/db/lmdb.cc
Normal file
@ -0,0 +1,136 @@
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "caffe2/core/db.h"
|
||||
#include "glog/logging.h"
|
||||
#include "lmdb.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
constexpr size_t LMDB_MAP_SIZE = 1099511627776; // 1 TB
|
||||
|
||||
inline void MDB_CHECK(int mdb_status) {
|
||||
CHECK_EQ(mdb_status, MDB_SUCCESS) << mdb_strerror(mdb_status);
|
||||
}
|
||||
|
||||
class LMDBCursor : public Cursor {
|
||||
public:
|
||||
explicit LMDBCursor(MDB_env* mdb_env)
|
||||
: mdb_env_(mdb_env), valid_(false) {
|
||||
MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_));
|
||||
MDB_CHECK(mdb_dbi_open(mdb_txn_, NULL, 0, &mdb_dbi_));
|
||||
MDB_CHECK(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_));
|
||||
SeekToFirst();
|
||||
}
|
||||
virtual ~LMDBCursor() {
|
||||
mdb_cursor_close(mdb_cursor_);
|
||||
mdb_dbi_close(mdb_env_, mdb_dbi_);
|
||||
mdb_txn_abort(mdb_txn_);
|
||||
}
|
||||
void SeekToFirst() override { Seek(MDB_FIRST); }
|
||||
void Next() override { Seek(MDB_NEXT); }
|
||||
string key() override {
|
||||
return string(static_cast<const char*>(mdb_key_.mv_data), mdb_key_.mv_size);
|
||||
}
|
||||
string value() override {
|
||||
return string(static_cast<const char*>(mdb_value_.mv_data),
|
||||
mdb_value_.mv_size);
|
||||
}
|
||||
bool Valid() override { return valid_; }
|
||||
|
||||
private:
|
||||
void Seek(MDB_cursor_op op) {
|
||||
int mdb_status = mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, op);
|
||||
if (mdb_status == MDB_NOTFOUND) {
|
||||
valid_ = false;
|
||||
} else {
|
||||
MDB_CHECK(mdb_status);
|
||||
valid_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
MDB_env* mdb_env_;
|
||||
MDB_txn* mdb_txn_;
|
||||
MDB_dbi mdb_dbi_;
|
||||
MDB_cursor* mdb_cursor_;
|
||||
MDB_val mdb_key_, mdb_value_;
|
||||
bool valid_;
|
||||
};
|
||||
|
||||
class LMDBTransaction final : public Transaction {
|
||||
public:
|
||||
explicit LMDBTransaction(MDB_env* mdb_env)
|
||||
: mdb_env_(mdb_env) {
|
||||
MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn_));
|
||||
MDB_CHECK(mdb_dbi_open(mdb_txn_, NULL, 0, &mdb_dbi_));
|
||||
}
|
||||
~LMDBTransaction() {
|
||||
MDB_CHECK(mdb_txn_commit(mdb_txn_));
|
||||
mdb_dbi_close(mdb_env_, mdb_dbi_);
|
||||
mdb_txn_abort(mdb_txn_);
|
||||
}
|
||||
void Put(const string& key, const string& value) override;
|
||||
void Commit() override {
|
||||
MDB_CHECK(mdb_txn_commit(mdb_txn_));
|
||||
mdb_dbi_close(mdb_env_, mdb_dbi_);
|
||||
mdb_txn_abort(mdb_txn_);
|
||||
// Begin a new transaction.
|
||||
MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn_));
|
||||
MDB_CHECK(mdb_dbi_open(mdb_txn_, NULL, 0, &mdb_dbi_));
|
||||
}
|
||||
|
||||
private:
|
||||
MDB_env* mdb_env_;
|
||||
MDB_dbi mdb_dbi_;
|
||||
MDB_txn* mdb_txn_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(LMDBTransaction);
|
||||
};
|
||||
|
||||
class LMDB : public DB {
|
||||
public:
|
||||
LMDB(const string& source, Mode mode);
|
||||
virtual ~LMDB() { Close(); }
|
||||
void Close() override {
|
||||
if (mdb_env_ != NULL) {
|
||||
mdb_env_close(mdb_env_);
|
||||
mdb_env_ = NULL;
|
||||
}
|
||||
}
|
||||
Cursor* NewCursor() override { return new LMDBCursor(mdb_env_); }
|
||||
Transaction* NewTransaction() override {
|
||||
return new LMDBTransaction(mdb_env_);
|
||||
}
|
||||
|
||||
private:
|
||||
MDB_env* mdb_env_;
|
||||
};
|
||||
|
||||
LMDB::LMDB(const string& source, Mode mode) : DB(source, mode) {
|
||||
MDB_CHECK(mdb_env_create(&mdb_env_));
|
||||
MDB_CHECK(mdb_env_set_mapsize(mdb_env_, LMDB_MAP_SIZE));
|
||||
if (mode == NEW) {
|
||||
CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << "failed";
|
||||
}
|
||||
int flags = 0;
|
||||
if (mode == READ) {
|
||||
flags = MDB_RDONLY | MDB_NOTLS;
|
||||
}
|
||||
MDB_CHECK(mdb_env_open(mdb_env_, source.c_str(), flags, 0664));
|
||||
LOG(INFO) << "Opened lmdb " << source;
|
||||
}
|
||||
|
||||
void LMDBTransaction::Put(const string& key, const string& value) {
|
||||
MDB_val mdb_key, mdb_value;
|
||||
mdb_key.mv_data = const_cast<char*>(key.data());
|
||||
mdb_key.mv_size = key.size();
|
||||
mdb_value.mv_data = const_cast<char*>(value.data());
|
||||
mdb_value.mv_size = value.size();
|
||||
MDB_CHECK(mdb_put(mdb_txn_, mdb_dbi_, &mdb_key, &mdb_value, 0));
|
||||
}
|
||||
|
||||
REGISTER_CAFFE2_DB(LMDB, LMDB);
|
||||
REGISTER_CAFFE2_DB(lmdb, LMDB);
|
||||
|
||||
} // namespace db
|
||||
} // namespace caffe2
|
103
caffe2/db/zmqdb.cc
Normal file
103
caffe2/db/zmqdb.cc
Normal file
@ -0,0 +1,103 @@
|
||||
#include <errno.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "caffe2/core/db.h"
|
||||
#include "glog/logging.h"
|
||||
#include "zmq.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
typedef char ZmqCommand;
|
||||
typedef int ZmqMessageSize;
|
||||
const ZmqCommand kQueryMessageSize = 's';
|
||||
const ZmqCommand kGet = 'g';
|
||||
|
||||
class ZmqDBCursor : public Cursor {
|
||||
public:
|
||||
explicit ZmqDBCursor(void* requester)
|
||||
: requester_(requester), buffer_(nullptr), received_size_(0),
|
||||
buffer_size_(0) {
|
||||
// Figure out the buffer size.
|
||||
CHECK_EQ(
|
||||
zmq_send(requester_, &kQueryMessageSize, sizeof(ZmqCommand), 0),
|
||||
sizeof(ZmqCommand))
|
||||
<< "Incorrect zmq communication when querying message size.";
|
||||
CHECK_EQ(
|
||||
zmq_recv(requester_, &buffer_size_, sizeof(ZmqMessageSize), 0),
|
||||
sizeof(ZmqMessageSize))
|
||||
<< "Incorrect zmq communication when fetching message size.";
|
||||
CHECK_GT(buffer_size_, 0) << "Incorrect buffer size obtained.";
|
||||
buffer_.reset(new char[buffer_size_]);
|
||||
// obtain the first value.
|
||||
Next();
|
||||
}
|
||||
|
||||
~ZmqDBCursor() {}
|
||||
void SeekToFirst() override { /* do nothing */ }
|
||||
void Next() override {
|
||||
CHECK_EQ(
|
||||
zmq_send(requester_, &kGet, sizeof(ZmqCommand), 0), sizeof(ZmqCommand))
|
||||
<< "Incorrect zmq communication when sending request.";
|
||||
received_size_ = zmq_recv(requester_, buffer_.get(), buffer_size_, 0);
|
||||
CHECK_GT(received_size_, 0) << "Received no message.";
|
||||
}
|
||||
string key() override { return ""; }
|
||||
string value() override {
|
||||
return string(buffer_.get(), received_size_);
|
||||
}
|
||||
virtual bool Valid() { return true; }
|
||||
|
||||
private:
|
||||
void* requester_;
|
||||
unique_ptr<char[]> buffer_;
|
||||
int received_size_;
|
||||
ZmqMessageSize buffer_size_;
|
||||
};
|
||||
|
||||
|
||||
class ZmqDB : public DB {
|
||||
public:
|
||||
ZmqDB(const string& source, Mode mode)
|
||||
: DB(source, mode), context_(zmq_ctx_new()),
|
||||
requester_(zmq_socket(context_, ZMQ_REQ)) {
|
||||
CHECK_EQ(mode, READ) << "ZeroMQ DB only supports read mode.";
|
||||
VLOG(1) << "Connecting to ZeroMQ server: " << source;
|
||||
int ret = zmq_connect(requester_, source.c_str());
|
||||
CHECK_EQ(ret, 0) << "Error in connecting to zmq server. "
|
||||
<< "Error is: " << errno;
|
||||
VLOG(1) << "Opened ZeroMQ server: " << source;
|
||||
}
|
||||
|
||||
~ZmqDB() { Close(); }
|
||||
|
||||
void Close() override {
|
||||
if (!requester_) {
|
||||
zmq_close(requester_);
|
||||
requester_ = nullptr;
|
||||
zmq_ctx_destroy(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Cursor* NewCursor() override {
|
||||
return new ZmqDBCursor(requester_);
|
||||
}
|
||||
Transaction* NewTransaction() override {
|
||||
// TODO(Yangqing): Do I really need to just do log fatal?
|
||||
LOG(FATAL) << "ZeroMQ DB does not support writing with a transaction.";
|
||||
return nullptr; // dummy placeholder to suppress old compiler warnings.
|
||||
}
|
||||
|
||||
private:
|
||||
void* context_;
|
||||
void* requester_;
|
||||
};
|
||||
|
||||
REGISTER_CAFFE2_DB(ZmqDB, ZmqDB);
|
||||
// For lazy-minded, one can also call with lower-case name.
|
||||
REGISTER_CAFFE2_DB(zmqdb, ZmqDB);
|
||||
|
||||
} // namespace db
|
||||
} // namespace caffe2
|
17
caffe2/end_to_end_test/BREW
Normal file
17
caffe2/end_to_end_test/BREW
Normal file
@ -0,0 +1,17 @@
|
||||
cc_test(
|
||||
name = "end_to_end_tests",
|
||||
srcs = [
|
||||
"end_to_end_tests.cc",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/db:db",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/operators:core_ops_gpu",
|
||||
"//caffe2/operators:core_ops_cudnn",
|
||||
"//caffe2/utils:proto_utils",
|
||||
"//data/toy:toy_models",
|
||||
"//data/mnist:mnist_models",
|
||||
"//gtest:gtest_main",
|
||||
],
|
||||
)
|
189
caffe2/end_to_end_test/end_to_end_tests.cc
Normal file
189
caffe2/end_to_end_test/end_to_end_tests.cc
Normal file
@ -0,0 +1,189 @@
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
#include "gflags/gflags.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
DECLARE_string(caffe_test_root);
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
const char kToyRegressionTestPlanPath[] = "/data/toy/toy_regression.pbtxt";
|
||||
const char kMNISTLinearClassificationPath[] =
|
||||
"/data/mnist/linear_classifier_plan.pbtxt";
|
||||
const char kMNISTTwoLayerReluClassificationPath[] =
|
||||
"/data/mnist/mnist_relu_network.pbtxt";
|
||||
const char kMNISTLeNetClassificationPath[] =
|
||||
"/data/mnist/mnist_lenet.pbtxt";
|
||||
const char kMNISTLeNetClassificationGPUPath[] =
|
||||
"/data/mnist/mnist_lenet_gpu.pbtxt";
|
||||
const char kMNISTLeNetNHWCClassificationPath[] =
|
||||
"/data/mnist/mnist_lenet_nhwc.pbtxt";
|
||||
const char kMNISTLeNetNHWCClassificationGPUPath[] =
|
||||
"/data/mnist/mnist_lenet_nhwc_gpu.pbtxt";
|
||||
const char kMNISTLeNetGroupConvClassificationPath[] =
|
||||
"/data/mnist/mnist_lenet_group_convolution.pbtxt";
|
||||
const char kMNISTLeNetGroupConvNHWCClassificationPath[] =
|
||||
"/data/mnist/mnist_lenet_group_convolution_nhwc.pbtxt";
|
||||
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
void ExpectTensorEquivalence(const Workspace& ws, const string& name_a,
|
||||
const string& name_b,
|
||||
const float relative_error) {
|
||||
const Blob* a = ws.GetBlob(name_a);
|
||||
EXPECT_TRUE(a != nullptr);
|
||||
EXPECT_TRUE((a->IsType<Tensor<dtype, DeviceContext> >()));
|
||||
int size = a->Get<Tensor<dtype, DeviceContext> >().size();
|
||||
const dtype* a_data = a->Get<Tensor<dtype, DeviceContext> >().data();
|
||||
const Blob* b = ws.GetBlob(name_b);
|
||||
EXPECT_TRUE(b != nullptr);
|
||||
EXPECT_TRUE((b->IsType<Tensor<dtype, DeviceContext> >()));
|
||||
EXPECT_EQ(size, (b->Get<Tensor<dtype, DeviceContext> >().size()));
|
||||
const dtype* b_data = b->Get<Tensor<dtype, DeviceContext> >().data();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
EXPECT_NEAR(a_data[i], b_data[i], relative_error);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ToyRegressionTest, TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kToyRegressionTestPlanPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
ExpectTensorEquivalence<float, CPUContext>(workspace, "W", "W_gt", 0.005);
|
||||
}
|
||||
|
||||
TEST(MNISTLinearClassificationTest, TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLinearClassificationPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 85%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.85);
|
||||
}
|
||||
|
||||
TEST(MNISTTwoLayerReluClassificationTest, TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTTwoLayerReluClassificationPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
TEST(MNISTLeNetClassificationTest, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetClassificationPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
TEST(MNISTLeNetClassificationTestGPU, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetClassificationGPUPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CUDAContext> >()));
|
||||
CPUContext context;
|
||||
Tensor<float, CPUContext> accuracy_tensor(
|
||||
accuracy->Get<Tensor<float, CUDAContext> >(), &context);
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
|
||||
TEST(MNISTLeNetNHWCClassificationTest, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetNHWCClassificationPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
TEST(MNISTLeNetNHWCClassificationGPUTest, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetNHWCClassificationGPUPath, &plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CUDAContext> >()));
|
||||
CPUContext context;
|
||||
Tensor<float, CPUContext> accuracy_tensor(
|
||||
accuracy->Get<Tensor<float, CUDAContext> >(), &context);
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(MNISTLeNetGroupConvolutionClassificationTest, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetGroupConvClassificationPath,
|
||||
&plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
TEST(MNISTLeNetGroupConvolutionNHWCClassificationTest, LARGE_TestRunPlan) {
|
||||
PlanDef plan_def;
|
||||
CHECK(ReadProtoFromFile(
|
||||
FLAGS_caffe_test_root + kMNISTLeNetGroupConvNHWCClassificationPath,
|
||||
&plan_def));
|
||||
Workspace workspace;
|
||||
workspace.RunPlan(plan_def);
|
||||
const Blob* accuracy = workspace.GetBlob("accuracy");
|
||||
EXPECT_TRUE(accuracy != nullptr);
|
||||
EXPECT_TRUE((accuracy->IsType<Tensor<float, CPUContext> >()));
|
||||
auto& accuracy_tensor = accuracy->Get<Tensor<float, CPUContext> >();
|
||||
EXPECT_EQ(accuracy_tensor.size(), 1);
|
||||
// Accuracy should be above 90%.
|
||||
EXPECT_GT(accuracy_tensor.data()[0], 0.90);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
32
caffe2/image/BREW
Normal file
32
caffe2/image/BREW
Normal file
@ -0,0 +1,32 @@
|
||||
cc_library(
|
||||
name = "image_ops",
|
||||
srcs = [
|
||||
"image_input_op.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"image_input_op.h",
|
||||
],
|
||||
deps = [
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/operators:core_ops",
|
||||
"//caffe2/utils:math",
|
||||
"//caffe2/utils:proto_utils",
|
||||
],
|
||||
external_libs = [
|
||||
"opencv_core",
|
||||
"opencv_highgui",
|
||||
"opencv_imgproc",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cuda_library(
|
||||
name = "image_ops_gpu",
|
||||
srcs = Glob(["*_gpu.cc"]) + Glob(["*.cu"]),
|
||||
deps = [
|
||||
":image_ops",
|
||||
"//caffe2/core:core_gpu",
|
||||
"//caffe2/utils:math_gpu",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
7
caffe2/image/image_input_op.cc
Normal file
7
caffe2/image/image_input_op.cc
Normal file
@ -0,0 +1,7 @@
|
||||
#include "caffe2/image/image_input_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CPU_OPERATOR(ImageInput, ImageInputOp<CPUContext>);
|
||||
|
||||
} // namespace caffe2
|
205
caffe2/image/image_input_op.h
Normal file
205
caffe2/image/image_input_op.h
Normal file
@ -0,0 +1,205 @@
|
||||
#ifndef CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|
||||
#define CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/operators/prefetch_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <class DeviceContext>
|
||||
class ImageInputOp final
|
||||
: public PrefetchOperator<DeviceContext> {
|
||||
public:
|
||||
using OperatorBase::OutputSize;
|
||||
using PrefetchOperator<DeviceContext>::prefetch_thread_;
|
||||
explicit ImageInputOp(const OperatorDef& operator_def,
|
||||
Workspace* ws);
|
||||
~ImageInputOp() {
|
||||
if (prefetch_thread_.get() != nullptr) {
|
||||
prefetch_thread_->join();
|
||||
}
|
||||
}
|
||||
|
||||
bool Prefetch() override;
|
||||
bool CopyPrefetched() override;
|
||||
|
||||
private:
|
||||
unique_ptr<db::DB> db_;
|
||||
unique_ptr<db::Cursor> cursor_;
|
||||
CPUContext cpu_context_;
|
||||
Tensor<float, CPUContext> prefetched_image_;
|
||||
Tensor<int, CPUContext> prefetched_label_;
|
||||
int batch_size_;
|
||||
string db_name_;
|
||||
string db_type_;
|
||||
float mean_;
|
||||
float std_;
|
||||
bool color_;
|
||||
int scale_;
|
||||
bool warp_;
|
||||
int crop_;
|
||||
bool mirror_;
|
||||
INPUT_OUTPUT_STATS(0, 0, 2, 2);
|
||||
DISABLE_COPY_AND_ASSIGN(ImageInputOp);
|
||||
};
|
||||
|
||||
template <class DeviceContext>
|
||||
ImageInputOp<DeviceContext>::ImageInputOp(
|
||||
const OperatorDef& operator_def, Workspace* ws)
|
||||
: PrefetchOperator<DeviceContext>(operator_def, ws),
|
||||
batch_size_(
|
||||
OperatorBase::template GetSingleArgument<int>("batch_size", 0)),
|
||||
db_name_(
|
||||
OperatorBase::template GetSingleArgument<string>("db", "")),
|
||||
db_type_(OperatorBase::template GetSingleArgument<string>(
|
||||
"db_type", "leveldb")),
|
||||
mean_(OperatorBase::template GetSingleArgument<float>("mean", 0.)),
|
||||
std_(OperatorBase::template GetSingleArgument<float>("std", 1.)),
|
||||
color_(OperatorBase::template GetSingleArgument<int>("color", 1)),
|
||||
scale_(OperatorBase::template GetSingleArgument<int>("scale", -1)),
|
||||
warp_(OperatorBase::template GetSingleArgument<int>("warp", 0)),
|
||||
crop_(OperatorBase::template GetSingleArgument<int>("crop", -1)),
|
||||
mirror_(OperatorBase::template GetSingleArgument<int>("mirror", 0)) {
|
||||
CHECK_GT(batch_size_, 0) << "Batch size should be nonnegative.";
|
||||
CHECK_GT(db_name_.size(), 0) << "Must provide a leveldb name.";
|
||||
CHECK_GT(scale_, 0) << "Must provide the scaling factor.";
|
||||
CHECK_GT(crop_, 0) << "Must provide the cropping value.";
|
||||
CHECK_GE(scale_, crop_)
|
||||
<< "The scale value must be no smaller than the crop value.";
|
||||
|
||||
DLOG(INFO) << "Creating an image input op with the following setting: ";
|
||||
DLOG(INFO) << " Outputting in batches of " << batch_size_ << " images;";
|
||||
DLOG(INFO) << " Treating input image as "
|
||||
<< (color_ ? "color " : "grayscale ") << "image;";
|
||||
DLOG(INFO) << " Scaling image to " << scale_
|
||||
<< (warp_ ? " with " : " without ") << "warping;";
|
||||
DLOG(INFO) << " Cropping image to " << crop_
|
||||
<< (mirror_ ? " with " : " without ") << "random mirroring;";
|
||||
DLOG(INFO) << " Subtract mean " << mean_ << " and divide by std " << std_
|
||||
<< ".";
|
||||
db_.reset(db::CreateDB(db_type_, db_name_, db::READ));
|
||||
cursor_.reset(db_->NewCursor());
|
||||
cursor_->SeekToFirst();
|
||||
prefetched_image_.Reshape(
|
||||
vector<int>{batch_size_, crop_, crop_, (color_ ? 3 : 1)});
|
||||
prefetched_label_.Reshape(vector<int>(1, batch_size_));
|
||||
}
|
||||
|
||||
template <class DeviceContext>
|
||||
bool ImageInputOp<DeviceContext>::Prefetch() {
|
||||
std::bernoulli_distribution mirror_this_image(0.5);
|
||||
float* image_data = prefetched_image_.mutable_data();
|
||||
int channels = color_ ? 3 : 1;
|
||||
for (int item_id = 0; item_id < batch_size_; ++item_id) {
|
||||
// LOG(INFO) << "Prefetching item " << item_id;
|
||||
// process data
|
||||
TensorProtos protos;
|
||||
CHECK(protos.ParseFromString(cursor_->value())) << cursor_->value();
|
||||
const TensorProto& image = protos.protos(0);
|
||||
const TensorProto& label = protos.protos(1);
|
||||
cv::Mat final_img;
|
||||
if (image.data_type() == TensorProto::STRING) {
|
||||
// Do the image manipuiation, and copy the content.
|
||||
DCHECK_EQ(image.string_data_size(), 1);
|
||||
|
||||
const string& encoded_image = image.string_data(0);
|
||||
int encoded_size = encoded_image.size();
|
||||
cv::Mat img = cv::imdecode(
|
||||
cv::Mat(1, &encoded_size, CV_8UC1,
|
||||
const_cast<char*>(encoded_image.data())),
|
||||
color_ ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
|
||||
// Do resizing.
|
||||
int scaled_width, scaled_height;
|
||||
if (warp_) {
|
||||
scaled_width = scale_;
|
||||
scaled_height = scale_;
|
||||
} else if (img.rows > img.cols) {
|
||||
scaled_width = scale_;
|
||||
scaled_height = static_cast<float>(img.rows) * scale_ / img.cols;
|
||||
} else {
|
||||
scaled_height = scale_;
|
||||
scaled_width = static_cast<float>(img.cols) * scale_ / img.rows;
|
||||
}
|
||||
cv::resize(img, final_img, cv::Size(scaled_width, scaled_height), 0, 0,
|
||||
cv::INTER_LINEAR);
|
||||
} else if (image.data_type() == TensorProto::BYTE) {
|
||||
// In this case, we will always just take the bytes as the raw image.
|
||||
CHECK_EQ(image.dims_size(), (color_ ? 3 : 2));
|
||||
CHECK_GE(image.dims(0), crop_)
|
||||
<< "Image height must be bigger than crop.";
|
||||
CHECK_GE(image.dims(1), crop_) << "Image width must be bigger than crop.";
|
||||
CHECK(!color_ || image.dims(2) == 3);
|
||||
final_img = cv::Mat(
|
||||
image.dims(0), image.dims(1), color_ ? CV_8UC3 : CV_8UC1,
|
||||
const_cast<char*>(image.byte_data().data()));
|
||||
}
|
||||
// find the cropped region, and copy it to the destination matrix with
|
||||
// mean subtraction and scaling.
|
||||
int width_offset =
|
||||
std::uniform_int_distribution<>(0, final_img.cols - crop_)(
|
||||
cpu_context_.RandGenerator());
|
||||
int height_offset =
|
||||
std::uniform_int_distribution<>(0, final_img.rows - crop_)(
|
||||
cpu_context_.RandGenerator());
|
||||
// DVLOG(1) << "offset: " << height_offset << ", " << width_offset;
|
||||
if (mirror_ && mirror_this_image(cpu_context_.RandGenerator())) {
|
||||
// Copy mirrored image.
|
||||
for (int h = height_offset; h < height_offset + crop_; ++h) {
|
||||
for (int w = width_offset + crop_ - 1; w >= width_offset; --w) {
|
||||
const cv::Vec3b& cv_data = final_img.at<cv::Vec3b>(h, w);
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
*(image_data++) =
|
||||
(static_cast<uint8_t>(cv_data[c]) - mean_) / std_;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Copy normally.
|
||||
for (int h = height_offset; h < height_offset + crop_; ++h) {
|
||||
for (int w = width_offset; w < width_offset + crop_; ++w) {
|
||||
const cv::Vec3b& cv_data = final_img.at<cv::Vec3b>(h, w);
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
*(image_data++) =
|
||||
(static_cast<uint8_t>(cv_data[c]) - mean_) / std_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Copy the label
|
||||
DCHECK_EQ(label.data_type(), TensorProto::INT32);
|
||||
DCHECK_EQ(label.int32_data_size(), 1);
|
||||
prefetched_label_.mutable_data()[item_id] = label.int32_data(0);
|
||||
// Advance to the next item.
|
||||
cursor_->Next();
|
||||
if (!cursor_->Valid()) {
|
||||
cursor_->SeekToFirst();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class DeviceContext>
|
||||
bool ImageInputOp<DeviceContext>::CopyPrefetched() {
|
||||
// The first output is the image data.
|
||||
auto* image_output = OperatorBase::Output<Tensor<float, DeviceContext> >(0);
|
||||
image_output->ReshapeLike(prefetched_image_);
|
||||
this->device_context_.template Copy<float, DeviceContext, CPUContext>(
|
||||
image_output->mutable_data(), prefetched_image_.data(),
|
||||
prefetched_image_.size());
|
||||
// The second output is the label.
|
||||
auto* label_output = OperatorBase::Output<Tensor<int, DeviceContext> >(1);
|
||||
label_output->ReshapeLike(prefetched_label_);
|
||||
this->device_context_.template Copy<int, DeviceContext, CPUContext>(
|
||||
label_output->mutable_data(), prefetched_label_.data(),
|
||||
prefetched_label_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|
||||
|
9
caffe2/image/image_input_op_gpu.cc
Normal file
9
caffe2/image/image_input_op_gpu.cc
Normal file
@ -0,0 +1,9 @@
|
||||
#include "caffe2/core/common_gpu.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/image/image_input_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CUDA_OPERATOR(ImageInput, ImageInputOp<CUDAContext>);
|
||||
|
||||
} // namespace caffe2
|
19
caffe2/mpi/BREW
Normal file
19
caffe2/mpi/BREW
Normal file
@ -0,0 +1,19 @@
|
||||
cc_headers(
|
||||
name = "mpi_common",
|
||||
srcs = [
|
||||
"mpi_common.h",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mpi_ops",
|
||||
srcs = [
|
||||
"allreduce_op.cc"
|
||||
],
|
||||
deps = [
|
||||
":mpi_common",
|
||||
"//caffe2/core:core",
|
||||
],
|
||||
external_libs = Env.MPI_LIBS,
|
||||
whole_archive = True,
|
||||
)
|
37
caffe2/mpi/allreduce_op.cc
Normal file
37
caffe2/mpi/allreduce_op.cc
Normal file
@ -0,0 +1,37 @@
|
||||
#include <mpi.h>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/mpi/mpi_common.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// AllreduceOp does Allreduce using MPI. Currently, only SUM is supported.
|
||||
template <typename dtype, class DeviceContext>
|
||||
class AllreduceOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
USE_SIMPLE_CTOR_DTOR(AllreduceOp);
|
||||
|
||||
bool RunOnDevice() {
|
||||
auto& input = Input(0);
|
||||
auto* output = Output(0);
|
||||
output->ReshapeLike(input);
|
||||
MPI_Allreduce(const_cast<dtype*>(input.data()),
|
||||
output->mutable_data(), input.size(),
|
||||
MPIDataTypeWrapper<dtype>::type(), MPI_SUM, MPI_COMM_WORLD);
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Input: X; Output: X_reduced.
|
||||
INPUT_OUTPUT_STATS(1, 1, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(AllreduceOp);
|
||||
};
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Allreduce, AllreduceOp<float, CPUContext>);
|
||||
// Note: Allreduce does not work on CUDA devices as of OpenMPI 1.8.4 yet. In the
|
||||
// future we can simply initialize it here.
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
26
caffe2/mpi/mpi_common.h
Normal file
26
caffe2/mpi/mpi_common.h
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef CAFFE2_MPI_MPI_COMMON_H_
|
||||
#define CAFFE2_MPI_MPI_COMMON_H_
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
inline void CheckInitializedMPI() {
|
||||
int flag;
|
||||
MPI_Initialized(&flag);
|
||||
CHECK(flag) << "MPI does not seem to have been initialized.";
|
||||
}
|
||||
|
||||
template <typename T> class MPIDataTypeWrapper;
|
||||
|
||||
#define MPI_DATATYPE_WRAPPER(c_type, mpi_type) \
|
||||
template<> class MPIDataTypeWrapper<c_type> { \
|
||||
public: \
|
||||
inline static MPI_Datatype type() { return mpi_type; } \
|
||||
};
|
||||
|
||||
MPI_DATATYPE_WRAPPER(float, MPI_FLOAT)
|
||||
MPI_DATATYPE_WRAPPER(double, MPI_DOUBLE)
|
||||
// Note(Yangqing): as necessary, add more specializations.
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_MPI_MPI_COMMON_H_
|
98
caffe2/operators/BREW
Normal file
98
caffe2/operators/BREW
Normal file
@ -0,0 +1,98 @@
|
||||
cc_headers(
|
||||
name = "operators_headers",
|
||||
srcs = Glob(["*.h"]),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "core_ops",
|
||||
srcs = [
|
||||
"accumulate_op.cc",
|
||||
"accuracy_op.cc",
|
||||
"averagepool_op.cc",
|
||||
"conv_op.cc",
|
||||
"cross_entropy_op.cc",
|
||||
"depth_split_op.cc",
|
||||
"dropout_op.cc",
|
||||
"elementwise_op.cc",
|
||||
"filler_op.cc",
|
||||
"fully_connected_op.cc",
|
||||
"l2_distance_op.cc",
|
||||
"load_save_op.cc",
|
||||
"local_response_normalization_op.cc",
|
||||
"loss_op.cc",
|
||||
"maxpool_op.cc",
|
||||
"order_switch_ops.cc",
|
||||
"relu_op.cc",
|
||||
"softmax_op.cc",
|
||||
"summarize_op.cc",
|
||||
"tensor_protos_db_input.cc",
|
||||
"utility_ops.cc",
|
||||
],
|
||||
deps = [
|
||||
":operators_headers",
|
||||
"//caffe2/core:core",
|
||||
"//caffe2/utils:math",
|
||||
"//caffe2/utils:proto_utils",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cuda_library(
|
||||
name = "core_ops_gpu",
|
||||
srcs = [
|
||||
"accumulate_op.cu",
|
||||
"accuracy_op.cu",
|
||||
"averagepool_op.cu",
|
||||
"conv_op.cu",
|
||||
"cross_entropy_op.cu",
|
||||
"depth_split_op.cu",
|
||||
"dropout_op.cu",
|
||||
"elementwise_op_gpu.cc",
|
||||
"filler_op.cu",
|
||||
"fully_connected_op_gpu.cc",
|
||||
"l2_distance_op.cu",
|
||||
"load_save_op.cu",
|
||||
"local_response_normalization_op.cu",
|
||||
"loss_op_gpu.cc",
|
||||
"maxpool_op.cu",
|
||||
"order_switch_ops.cu",
|
||||
"relu_op.cu",
|
||||
"softmax_op.cu",
|
||||
"summarize_op.cu",
|
||||
"tensor_protos_db_input_gpu.cc",
|
||||
"utility_ops_gpu.cc",
|
||||
],
|
||||
deps = [
|
||||
":operators_headers",
|
||||
"//caffe2/core:core_gpu",
|
||||
"//caffe2/utils:math_gpu",
|
||||
"//caffe2/utils:proto_utils",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "core_ops_cudnn",
|
||||
srcs = [
|
||||
"softmax_op_cudnn.cc",
|
||||
],
|
||||
deps = [
|
||||
":operators_headers",
|
||||
"//caffe2/core:core_cudnn",
|
||||
"//caffe2/core:core_gpu",
|
||||
"//caffe2/utils:math_gpu",
|
||||
"//third_party/cudnn:cudnn",
|
||||
],
|
||||
whole_archive = True,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "core_ops_test",
|
||||
srcs = Glob(["*_test.cc"]),
|
||||
deps = [
|
||||
":core_ops",
|
||||
":core_ops_gpu",
|
||||
":core_ops_cudnn",
|
||||
"//gtest:gtest_main",
|
||||
]
|
||||
)
|
7
caffe2/operators/accumulate_op.cc
Normal file
7
caffe2/operators/accumulate_op.cc
Normal file
@ -0,0 +1,7 @@
|
||||
#include "caffe2/operators/accumulate_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Accumulate, AccumulateOp<float, CPUContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
8
caffe2/operators/accumulate_op.cu
Normal file
8
caffe2/operators/accumulate_op.cu
Normal file
@ -0,0 +1,8 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/accumulate_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(Accumulate, AccumulateOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
50
caffe2/operators/accumulate_op.h
Normal file
50
caffe2/operators/accumulate_op.h
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef CAFFE2_OPERATORS_ACCUMULATE_OP_H_
|
||||
#define CAFFE2_OPERATORS_ACCUMULATE_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Accumulate operator accumulates the input tensor to the output tensor. If the
|
||||
// output tensor already has the right size, we add to it; otherwise, we first
|
||||
// initialize the output tensor to all zeros, and then do accumulation. Any
|
||||
// further calls to the operator, given that no one else fiddles with the output
|
||||
// in the interim, will do simple accumulations.
|
||||
template <typename dtype, class DeviceContext>
|
||||
class AccumulateOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
AccumulateOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
kOne(static_cast<dtype>(1), &device_context_),
|
||||
gamma_(static_cast<dtype>(
|
||||
OperatorBase::template GetSingleArgument<float>("gamma", 1.0)),
|
||||
&device_context_) {}
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& input = Input(0);
|
||||
auto* output = Output(0);
|
||||
if (output->dims() != input.dims()) {
|
||||
LOG(INFO) << "Reshaping and initializing output.";
|
||||
output->ReshapeLike(input);
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output->size(), 0, output->mutable_data(), &device_context_);
|
||||
}
|
||||
math::Axpby<dtype, DeviceContext>(
|
||||
input.size(), kOne.data(), input.data(), gamma_.data(),
|
||||
output->mutable_data(), &device_context_);
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tensor<dtype, DeviceContext> kOne;
|
||||
Tensor<dtype, DeviceContext> gamma_;
|
||||
INPUT_OUTPUT_STATS(1, 1, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(AccumulateOp);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_ACCUMULATE_OP_H_
|
40
caffe2/operators/accuracy_op.cc
Normal file
40
caffe2/operators/accuracy_op.cc
Normal file
@ -0,0 +1,40 @@
|
||||
#include "caffe2/operators/accuracy_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <>
|
||||
bool AccuracyOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(PREDICTION);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CPUContext> >(LABEL);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
Y->Reshape(std::vector<int>{1});
|
||||
const auto* Xdata = X.data();
|
||||
const auto* labeldata = label.data();
|
||||
int correct = 0;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
float maxval = std::numeric_limits<float>::lowest();
|
||||
int maxid = 0;
|
||||
for (int j = 0; j < D; ++j) {
|
||||
if (Xdata[i * D + j] > maxval) {
|
||||
maxval = Xdata[i * D + j];
|
||||
maxid = j;
|
||||
}
|
||||
}
|
||||
if (maxid == labeldata[i]) {
|
||||
++correct;
|
||||
}
|
||||
}
|
||||
DCHECK_LE(correct, N);
|
||||
Y->mutable_data()[0] = static_cast<float>(correct) / N;
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Accuracy, AccuracyOp<float, CPUContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
56
caffe2/operators/accuracy_op.cu
Normal file
56
caffe2/operators/accuracy_op.cu
Normal file
@ -0,0 +1,56 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/accuracy_op.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
__global__ void AccuracyKernel(const int N, const int D, const float* Xdata,
|
||||
const int* labeldata, float* accuracy) {
|
||||
int count = 0;
|
||||
CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
float maxval = Xdata[i * D];
|
||||
int maxid = 0;
|
||||
for (int j = 1; j < D; ++j) {
|
||||
if (Xdata[i * D + j] > maxval) {
|
||||
maxval = Xdata[i * D + j];
|
||||
maxid = j;
|
||||
}
|
||||
}
|
||||
if (maxid == labeldata[i]) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
atomicAdd(accuracy, static_cast<float>(count));
|
||||
}
|
||||
__global__ void AccuracyDivideKernel(const int N, float* accuracy) {
|
||||
*accuracy /= N;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool AccuracyOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(PREDICTION);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CUDAContext> >(LABEL);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
Y->Reshape(std::vector<int>(1, 1));
|
||||
math::Set<float, CUDAContext>(1, 0, Y->mutable_data(), &device_context_);
|
||||
AccuracyKernel<<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
N, D, X.data(), label.data(), Y->mutable_data());
|
||||
// This is going to be executed only in one single kernel. Not very beautiful,
|
||||
// but probably we have to do this?
|
||||
AccuracyDivideKernel<<<1, 1, 0, device_context_.cuda_stream()>>>(
|
||||
N, Y->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(Accuracy, AccuracyOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
24
caffe2/operators/accuracy_op.h
Normal file
24
caffe2/operators/accuracy_op.h
Normal file
@ -0,0 +1,24 @@
|
||||
#ifndef CAFFE2_OPERATORS_ACCURACY_OP_H_
|
||||
#define CAFFE2_OPERATORS_ACCURACY_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class AccuracyOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_SIMPLE_CTOR_DTOR(AccuracyOp);
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
|
||||
protected:
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, 1);
|
||||
INPUT_TAGS(PREDICTION, LABEL);
|
||||
DISABLE_COPY_AND_ASSIGN(AccuracyOp);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_ACCURACY_OP_H_
|
194
caffe2/operators/averagepool_op.cc
Normal file
194
caffe2/operators/averagepool_op.cc
Normal file
@ -0,0 +1,194 @@
|
||||
#include "caffe2/operators/averagepool_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using std::max;
|
||||
using std::min;
|
||||
|
||||
template <>
|
||||
bool AveragePoolOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
ConvPoolOpBase::SetOutputSize(X, Y, X.dim(1));
|
||||
|
||||
const float* Xdata = X.data();
|
||||
float* Ydata = Y->mutable_data();
|
||||
math::Set<float, CPUContext>(
|
||||
Y->size(), 0, Ydata, &device_context_);
|
||||
// The main loop
|
||||
int channels = X.dim(1);
|
||||
int height = X.dim(2);
|
||||
int width = X.dim(3);
|
||||
int pooled_height = Y->dim(2);
|
||||
int pooled_width = Y->dim(3);
|
||||
for (int n = 0; n < X.dim(0); ++n) {
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
int hstart = ph * stride_h_ - pad_t_;
|
||||
int wstart = pw * stride_w_ - pad_l_;
|
||||
int hend = min(hstart + kernel_h_, height);
|
||||
int wend = min(wstart + kernel_w_, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
const int pool_index = ph * pooled_width + pw;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
const int input_index = h * width + w;
|
||||
Ydata[pool_index] += Xdata[input_index];
|
||||
}
|
||||
}
|
||||
Ydata[pool_index] /= (hend - hstart) * (wend - wstart);
|
||||
}
|
||||
}
|
||||
// Do offset.
|
||||
Xdata += height * width;
|
||||
Ydata += pooled_height * pooled_width;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
int height = X.dim(1);
|
||||
int width = X.dim(2);
|
||||
int channels = X.dim(3);
|
||||
ConvPoolOpBase::SetOutputSize(X, Y, channels);
|
||||
const float* Xdata = X.data();
|
||||
float* Ydata = Y->mutable_data();
|
||||
math::Set<float, CPUContext>(Y->size(), 0, Ydata, &device_context_);
|
||||
// The main loop
|
||||
int pooled_height = Y->dim(1);
|
||||
int pooled_width = Y->dim(2);
|
||||
for (int n = 0; n < X.dim(0); ++n) {
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
int hstart = ph * stride_h_ - pad_t_;
|
||||
int wstart = pw * stride_w_ - pad_l_;
|
||||
int hend = min(hstart + kernel_h_, height);
|
||||
int wend = min(wstart + kernel_w_, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
const int pool_index = (ph * pooled_width + pw) * channels;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
const int input_index = (h * width + w) * channels;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
Ydata[pool_index + c] += Xdata[input_index + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
float scale = 1. / (hend - hstart) / (wend - wstart);
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
Ydata[pool_index + c] *= scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Do offset.
|
||||
Xdata += X.size() / X.dim(0);
|
||||
Ydata += Y->size() / Y->dim(0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolGradientOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(0);
|
||||
auto& dY = Input(1);
|
||||
auto* dX = Output(0);
|
||||
// TODO(Yangqing): Add shape checks.
|
||||
dX->ReshapeLike(X);
|
||||
math::Set<float, CPUContext>(
|
||||
X.size(), 0, dX->mutable_data(), &device_context_);
|
||||
const float* dYdata = dY.data();
|
||||
float* dXdata = dX->mutable_data();
|
||||
int channels = X.dim(1);
|
||||
CHECK_EQ(channels, dY.dim(1));
|
||||
int height = X.dim(2);
|
||||
int width = X.dim(3);
|
||||
ConvPoolOpBase<float, CPUContext>::ComputePads(height, width);
|
||||
int pooled_height = dY.dim(2);
|
||||
int pooled_width = dY.dim(3);
|
||||
// The main loop
|
||||
for (int n = 0; n < X.dim(0); ++n) {
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
int hstart = ph * stride_h_ - pad_t_;
|
||||
int wstart = pw * stride_w_ - pad_l_;
|
||||
int hend = min(hstart + kernel_h_, height);
|
||||
int wend = min(wstart + kernel_w_, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
float scale = 1. / (hend - hstart) / (wend - wstart);
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
dXdata[h * width + w] +=
|
||||
dYdata[ph * pooled_width + pw] * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// offset
|
||||
dXdata += height * width;
|
||||
dYdata += pooled_height * pooled_width;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(0);
|
||||
auto& dY = Input(1);
|
||||
CHECK_EQ(dY.ndim(), 4);
|
||||
auto* dX = Output(0);
|
||||
// TODO(Yangqing): Add shape checks.
|
||||
dX->ReshapeLike(X);
|
||||
math::Set<float, CPUContext>(
|
||||
X.size(), 0, dX->mutable_data(), &device_context_);
|
||||
const float* dYdata = dY.data();
|
||||
float* dXdata = dX->mutable_data();
|
||||
// The main loop
|
||||
int height = X.dim(1);
|
||||
int width = X.dim(2);
|
||||
ConvPoolOpBase<float, CPUContext>::ComputePads(height, width);
|
||||
int pooled_height = dY.dim(1);
|
||||
int pooled_width = dY.dim(2);
|
||||
int channels = X.dim(3);
|
||||
CHECK_EQ(channels, dY.dim(3));
|
||||
for (int n = 0; n < X.dim(0); ++n) {
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
int hstart = ph * stride_h_ - pad_t_;
|
||||
int wstart = pw * stride_w_ - pad_l_;
|
||||
int hend = min(hstart + kernel_h_, height);
|
||||
int wend = min(wstart + kernel_w_, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
float scale = 1. / (hend - hstart) / (wend - wstart);
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
dXdata[(h * width + w) * channels + c] +=
|
||||
dYdata[(ph * pooled_width + pw) * channels + c] * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// offset
|
||||
dXdata += X.size() / X.dim(0);
|
||||
dYdata += dY.size() / dY.dim(0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(AveragePool, AveragePoolOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(AveragePoolGradient, AveragePoolGradientOp<float, CPUContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
218
caffe2/operators/averagepool_op.cu
Normal file
218
caffe2/operators/averagepool_op.cu
Normal file
@ -0,0 +1,218 @@
|
||||
#include <cfloat>
|
||||
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/averagepool_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
template <typename dtype>
|
||||
__global__ void AveragePoolForwardNCHW(
|
||||
const int nthreads, const dtype* bottom_data,
|
||||
const int num, const int channels, const int height,
|
||||
const int width, const int pooled_height, const int pooled_width,
|
||||
const int kernel_h, const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_t, const int pad_l, dtype* top_data) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
int pw = index % pooled_width;
|
||||
int ph = (index / pooled_width) % pooled_height;
|
||||
int c = (index / pooled_width / pooled_height) % channels;
|
||||
int n = index / pooled_width / pooled_height / channels;
|
||||
int hstart = ph * stride_h - pad_t;
|
||||
int wstart = pw * stride_w - pad_l;
|
||||
int hend = min(hstart + kernel_h, height);
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
dtype output = 0;
|
||||
bottom_data += n * channels * height * width;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
int idx = c * height * width + h * width + w;
|
||||
output += bottom_data[idx];
|
||||
}
|
||||
}
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
top_data[index] = output / pool_size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename dtype>
|
||||
__global__ void AveragePoolForwardNHWC(
|
||||
const int nthreads, const dtype* bottom_data,
|
||||
const int num, const int height, const int width,
|
||||
const int channels, const int pooled_height, const int pooled_width,
|
||||
const int kernel_h, const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_t, const int pad_l, dtype* top_data) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
int c = index % channels;
|
||||
int pw = (index / channels) % pooled_width;
|
||||
int ph = (index / channels / pooled_width) % pooled_height;
|
||||
int n = index / channels / pooled_width / pooled_height;
|
||||
int hstart = ph * stride_h - pad_t;
|
||||
int wstart = pw * stride_w - pad_l;
|
||||
int hend = min(hstart + kernel_h, height);
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
dtype output = 0;
|
||||
bottom_data += n * height * width * channels;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
output += bottom_data[(h * width + w) * channels + c];
|
||||
}
|
||||
}
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
top_data[index] = output / pool_size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename dtype>
|
||||
__global__ void AvePoolBackwardNCHW(const int nthreads,
|
||||
const dtype* const top_diff, const int num, const int channels,
|
||||
const int height, const int width, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t,
|
||||
const int pad_l, dtype* const bottom_diff) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
// find out the local index
|
||||
// find out the local offset
|
||||
const int w = index % width + pad_l;
|
||||
const int h = (index / width) % height + pad_t;
|
||||
const int c = (index / width / height) % channels;
|
||||
const int n = index / width / height / channels;
|
||||
const int phstart = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
|
||||
const int phend = min(h / stride_h + 1, pooled_height);
|
||||
const int pwstart = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
|
||||
const int pwend = min(w / stride_w + 1, pooled_width);
|
||||
dtype gradient = 0;
|
||||
const dtype* const top_diff_slice =
|
||||
top_diff + (n * channels + c) * pooled_height * pooled_width;
|
||||
for (int ph = phstart; ph < phend; ++ph) {
|
||||
for (int pw = pwstart; pw < pwend; ++pw) {
|
||||
// figure out the pooling size
|
||||
int hstart = ph * stride_h - pad_t;
|
||||
int wstart = pw * stride_w - pad_l;
|
||||
int hend = min(hstart + kernel_h, height);
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
gradient += top_diff_slice[ph * pooled_width + pw] / pool_size;
|
||||
}
|
||||
}
|
||||
bottom_diff[index] = gradient;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename dtype>
|
||||
__global__ void AvePoolBackwardNHWC(const int nthreads,
|
||||
const dtype* const top_diff, const int num, const int height,
|
||||
const int width, const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t,
|
||||
const int pad_l, dtype* const bottom_diff) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
// find out the local index
|
||||
// find out the local offset
|
||||
const int c = index % channels;
|
||||
const int w = index / channels % width + pad_l;
|
||||
const int h = (index / channels / width) % height + pad_t;
|
||||
const int n = index / channels / width / height;
|
||||
const int phstart = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
|
||||
const int phend = min(h / stride_h + 1, pooled_height);
|
||||
const int pwstart = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
|
||||
const int pwend = min(w / stride_w + 1, pooled_width);
|
||||
dtype gradient = 0;
|
||||
const dtype* const top_diff_slice =
|
||||
top_diff + n * pooled_height * pooled_width * channels + c;
|
||||
for (int ph = phstart; ph < phend; ++ph) {
|
||||
for (int pw = pwstart; pw < pwend; ++pw) {
|
||||
// figure out the pooling size
|
||||
int hstart = ph * stride_h - pad_t;
|
||||
int wstart = pw * stride_w - pad_l;
|
||||
int hend = min(hstart + kernel_h, height);
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
gradient +=
|
||||
top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
|
||||
}
|
||||
}
|
||||
bottom_diff[index] = gradient;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool AveragePoolOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
ConvPoolOpBase<float, CUDAContext>::SetOutputSize(X, Y, X.dim(1));
|
||||
int output_size = Y->size();
|
||||
AveragePoolForwardNCHW<float><<<CAFFE_GET_BLOCKS(output_size),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
output_size, X.data(), X.dim(0), X.dim(1), X.dim(2), X.dim(3),
|
||||
Y->dim(2), Y->dim(3), kernel_h_, kernel_w_, stride_h_, stride_w_,
|
||||
pad_t_, pad_l_, Y->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
ConvPoolOpBase<float, CUDAContext>::SetOutputSize(X, Y, X.dim(3));
|
||||
int output_size = Y->size();
|
||||
AveragePoolForwardNHWC<float><<<CAFFE_GET_BLOCKS(output_size),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
output_size, X.data(), X.dim(0), X.dim(1), X.dim(2), X.dim(3),
|
||||
Y->dim(1), Y->dim(2), kernel_h_, kernel_w_, stride_h_, stride_w_,
|
||||
pad_t_, pad_l_, Y->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(0);
|
||||
auto& dY = Input(1);
|
||||
CHECK_EQ(dY.ndim(), 4);
|
||||
auto* dX = Output(0);
|
||||
dX->ReshapeLike(X);
|
||||
ConvPoolOpBase<float, CUDAContext>::ComputePads(X.dim(2), X.dim(3));
|
||||
AvePoolBackwardNCHW<float><<<CAFFE_GET_BLOCKS(X.size()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
X.size(), dY.data(), X.dim(0), X.dim(1), X.dim(2), X.dim(3),
|
||||
dY.dim(2), dY.dim(3), kernel_h_, kernel_w_, stride_h_, stride_w_,
|
||||
pad_t_, pad_l_, dX->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool AveragePoolGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(0);
|
||||
auto& dY = Input(1);
|
||||
CHECK_EQ(dY.ndim(), 4);
|
||||
auto* dX = Output(0);
|
||||
dX->ReshapeLike(X);
|
||||
ConvPoolOpBase<float, CUDAContext>::ComputePads(X.dim(1), X.dim(2));
|
||||
AvePoolBackwardNHWC<float><<<CAFFE_GET_BLOCKS(X.size()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
X.size(), dY.data(), X.dim(0), X.dim(1), X.dim(2), X.dim(3),
|
||||
dY.dim(1), dY.dim(2), kernel_h_, kernel_w_, stride_h_, stride_w_,
|
||||
pad_t_, pad_l_, dX->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(AveragePool, AveragePoolOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(AveragePoolGradient, AveragePoolGradientOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
50
caffe2/operators/averagepool_op.h
Normal file
50
caffe2/operators/averagepool_op.h
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef CAFFE2_OPERATORS_AVERAGEPOOL_OP_H_
|
||||
#define CAFFE2_OPERATORS_AVERAGEPOOL_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/operators/conv_pool_op_base.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class AveragePoolOp final : public ConvPoolOpBase<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_CONV_POOL_BASE_FUNCTIONS;
|
||||
AveragePoolOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, DeviceContext>(operator_def, ws) {}
|
||||
~AveragePoolOp() {}
|
||||
|
||||
bool RunOnDeviceWithOrderNCHW() override;
|
||||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
// Input: X
|
||||
// Output: Y
|
||||
INPUT_OUTPUT_STATS(1, 1, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(AveragePoolOp);
|
||||
};
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class AveragePoolGradientOp final :
|
||||
public ConvPoolOpBase<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_CONV_POOL_BASE_FUNCTIONS;
|
||||
AveragePoolGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, DeviceContext>(operator_def, ws) {}
|
||||
~AveragePoolGradientOp() {}
|
||||
|
||||
bool RunOnDeviceWithOrderNCHW() override;
|
||||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
// Input: X, Y_grad
|
||||
// Output: X_grad
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(AveragePoolGradientOp);
|
||||
};
|
||||
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_AVERAGEPOOL_OP_H_
|
10
caffe2/operators/conv_op.cc
Normal file
10
caffe2/operators/conv_op.cc
Normal file
@ -0,0 +1,10 @@
|
||||
#include "caffe2/operators/conv_op.h"
|
||||
#include "caffe2/operators/conv_op_impl.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Conv, ConvOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(ConvGradient, ConvGradientOp<float, CPUContext>)
|
||||
|
||||
} // namespace
|
||||
} // namespace caffe2
|
10
caffe2/operators/conv_op.cu
Normal file
10
caffe2/operators/conv_op.cu
Normal file
@ -0,0 +1,10 @@
|
||||
#include "caffe2/operators/conv_op.h"
|
||||
#include "caffe2/operators/conv_op_impl.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(Conv, ConvOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(ConvGradient, ConvGradientOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
61
caffe2/operators/conv_op.h
Normal file
61
caffe2/operators/conv_op.h
Normal file
@ -0,0 +1,61 @@
|
||||
#ifndef CAFFE2_OPERATORS_CONV_OP_H_
|
||||
#define CAFFE2_OPERATORS_CONV_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/operators/conv_pool_op_base.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class ConvOp final : public ConvPoolOpBase<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_CONV_POOL_BASE_FUNCTIONS;
|
||||
ConvOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, DeviceContext>(operator_def, ws),
|
||||
kOne(1, &device_context_), kZero(0, &device_context_) {}
|
||||
~ConvOp() {}
|
||||
|
||||
bool RunOnDeviceWithOrderNCHW() override;
|
||||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
private:
|
||||
Tensor<dtype, DeviceContext> col_buffer_;
|
||||
Tensor<dtype, DeviceContext> bias_multiplier_;
|
||||
Tensor<dtype, DeviceContext> kOne;
|
||||
Tensor<dtype, DeviceContext> kZero;
|
||||
// Input: X, W, b
|
||||
// Output: Y
|
||||
INPUT_TAGS(INPUT, FILTER, BIAS);
|
||||
INPUT_OUTPUT_STATS(3, 3, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(ConvOp);
|
||||
};
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class ConvGradientOp final : public ConvPoolOpBase<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_CONV_POOL_BASE_FUNCTIONS;
|
||||
ConvGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, DeviceContext>(operator_def, ws),
|
||||
kOne(1, &device_context_), kZero(0, &device_context_) {}
|
||||
~ConvGradientOp() {}
|
||||
|
||||
bool RunOnDeviceWithOrderNCHW() override;
|
||||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
private:
|
||||
Tensor<dtype, DeviceContext> col_buffer_;
|
||||
Tensor<dtype, DeviceContext> bias_multiplier_;
|
||||
Tensor<dtype, DeviceContext> kOne;
|
||||
Tensor<dtype, DeviceContext> kZero;
|
||||
// input: X, W, b, dY
|
||||
// output: dW, db, and optionally dX
|
||||
INPUT_TAGS(INPUT, FILTER, BIAS, OUTPUT_GRAD);
|
||||
OUTPUT_TAGS(FILTER_GRAD, BIAS_GRAD, INPUT_GRAD);
|
||||
INPUT_OUTPUT_STATS(4, 4, 2, 3);
|
||||
DISABLE_COPY_AND_ASSIGN(ConvGradientOp);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CONV_OP_H_
|
63
caffe2/operators/conv_op_cudnn.cu.working
Normal file
63
caffe2/operators/conv_op_cudnn.cu.working
Normal file
@ -0,0 +1,63 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/conv_pool_op_base.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype>
|
||||
class CudnnConvOp final : public ConvPoolOpBase<dtype, CUDAContext> {
|
||||
public:
|
||||
CudnnConvOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, CUDAContext>(operator_def, ws),
|
||||
kOne(1, &device_context_), kZero(0, &device_context_) {}
|
||||
~CudnnConvOp() {}
|
||||
|
||||
bool ConfigureCudnnConvolution() {
|
||||
CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_));
|
||||
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
|
||||
filter_desc, GetCudnnTensorFormat(order_), ))
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
// TODO: Reshape
|
||||
|
||||
for (int i)
|
||||
}
|
||||
|
||||
private:
|
||||
cudnnTensorDescriptor_t bottom_desc_;
|
||||
cudnnFilterDescriptor_t filter_desc_;
|
||||
cudnnTensorDescriptor_t bias_desc_;
|
||||
cudnnTensorDescriptor_t top_desc_;
|
||||
cudnnConvolutionDescriptor_t conv_desc_;
|
||||
// Input: X, W, b
|
||||
// Output: Y
|
||||
INPUT_OUTPUT_STATS(3, 3, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(ConvOp);
|
||||
};
|
||||
|
||||
/*
|
||||
template <typename dtype, class DeviceContext>
|
||||
class ConvGradientOp final : public ConvPoolOpBase<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_CONV_POOL_BASE_FUNCTIONS;
|
||||
ConvGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvPoolOpBase<dtype, DeviceContext>(operator_def, ws),
|
||||
kOne(1, &device_context_), kZero(0, &device_context_) {}
|
||||
~ConvGradientOp() {}
|
||||
|
||||
bool RunOnDeviceWithOrderNCHW() override;
|
||||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
private:
|
||||
Tensor<dtype, DeviceContext> col_buffer_;
|
||||
Tensor<dtype, DeviceContext> bias_multiplier_;
|
||||
Tensor<dtype, DeviceContext> kOne;
|
||||
Tensor<dtype, DeviceContext> kZero;
|
||||
// input: X, W, b, dY
|
||||
// output: dW, db, and optionally dX
|
||||
INPUT_OUTPUT_STATS(4, 4, 2, 3);
|
||||
DISABLE_COPY_AND_ASSIGN(ConvGradientOp);
|
||||
};
|
||||
*/
|
||||
|
||||
} // namespace caffe2
|
336
caffe2/operators/conv_op_impl.h
Normal file
336
caffe2/operators/conv_op_impl.h
Normal file
@ -0,0 +1,336 @@
|
||||
// conv_op_impl.h is the templated implementation of the conv_op.h file.
|
||||
#ifndef CAFFE2_OPERATORS_CONV_OP_IMPL_H_
|
||||
#define CAFFE2_OPERATORS_CONV_OP_IMPL_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/operators/conv_op.h"
|
||||
#include "caffe2/operators/conv_pool_op_base.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool ConvOp<dtype, DeviceContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& bias = Input(BIAS);
|
||||
auto* Y = Output(0);
|
||||
const int N = X.dim(0), C = X.dim(1), H = X.dim(2), W = X.dim(3);
|
||||
DCHECK_EQ(filter.ndim(), 4);
|
||||
const int M = filter.dim(0);
|
||||
DCHECK_EQ(filter.dim(1), C);
|
||||
DCHECK_EQ(filter.dim(2), kernel_h_);
|
||||
DCHECK_EQ(filter.dim(3), kernel_w_);
|
||||
DCHECK_EQ(bias.ndim(), 1);
|
||||
DCHECK_EQ(bias.dim(0), M);
|
||||
ConvPoolOpBase<dtype, DeviceContext>::SetOutputSize(X, Y, filter.dim(0));
|
||||
// The dimension of each kernel
|
||||
const int kernel_dim = C * kernel_h_ * kernel_w_;
|
||||
// The offset corresponding to a single input image, and a single output
|
||||
// image.
|
||||
const int input_offset = C * H * W;
|
||||
const int output_offset = Y->size() / Y->dim(0);
|
||||
// The output image size is the spatial size of the output.
|
||||
const int output_image_size = Y->dim(2) * Y->dim(3);
|
||||
// The col buffer is stored in CHW order as well - kernel_dim, and the height
|
||||
// and width.
|
||||
col_buffer_.Reshape(std::vector<int>{
|
||||
C, kernel_h_, kernel_w_, Y->dim(2), Y->dim(3)});
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, output_image_size));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output_image_size, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
const dtype* Xdata = X.data();
|
||||
dtype* col_buffer_data = col_buffer_.mutable_data();
|
||||
dtype* Ydata = Y->mutable_data();
|
||||
// Im2col, followed by gemm.
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
math::Im2col<dtype, DeviceContext, StorageOrder::NCHW>(
|
||||
Xdata, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_, stride_h_, stride_w_, col_buffer_data,
|
||||
&device_context_);
|
||||
// Weight term
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasNoTrans, M, output_image_size, kernel_dim,
|
||||
kOne.data(), filter.data(), col_buffer_data, kZero.data(), Ydata,
|
||||
&device_context_);
|
||||
// Bias term
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasNoTrans, M, output_image_size, 1, kOne.data(),
|
||||
bias.data(), bias_multiplier_.data(), kOne.data(), Ydata,
|
||||
&device_context_);
|
||||
Xdata += input_offset;
|
||||
Ydata += output_offset;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// The implementations.
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool ConvOp<dtype, DeviceContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& bias = Input(BIAS);
|
||||
auto* Y = Output(0);
|
||||
const int N = X.dim(0), H = X.dim(1), W = X.dim(2), C = X.dim(3);
|
||||
DCHECK_EQ(filter.ndim(), 4);
|
||||
const int M = filter.dim(0);
|
||||
DCHECK_EQ(filter.dim(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim(2), kernel_w_);
|
||||
DCHECK_EQ(filter.dim(3), C);
|
||||
DCHECK_EQ(bias.ndim(), 1);
|
||||
DCHECK_EQ(bias.dim(0), M);
|
||||
ConvPoolOpBase<dtype, DeviceContext>::SetOutputSize(X, Y, filter.dim(0));
|
||||
// The dimension of each kernel
|
||||
const int kernel_dim = kernel_h_ * kernel_w_ * C;
|
||||
// The offset corresponding to a single input image, and a single output
|
||||
// image.
|
||||
const int input_offset = H * W * C;
|
||||
const int output_offset = Y->size() / Y->dim(0);
|
||||
// The output image size is the spatial size of the output.
|
||||
const int output_image_size = Y->dim(1) * Y->dim(2);
|
||||
// The col buffer is stored in HWC order as well - kernel_dim, and the height
|
||||
// and width.
|
||||
const dtype* Xdata = X.data();
|
||||
dtype* Ydata = Y->mutable_data();
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, output_image_size));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output_image_size, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
// Specialized path for 1 by 1 convolution
|
||||
if (kernel_dim == C && Y->dim(1) == X.dim(1) && Y->dim(2) == X.dim(2)) {
|
||||
if (bias_multiplier_.size() != N * H * W) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, N * H * W));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
N * H * W, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasTrans, N * H * W, M, C, kOne.data(), Xdata,
|
||||
filter.data(), kZero.data(), Ydata, &device_context_);
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasNoTrans, N * H * W, M, 1, kOne.data(),
|
||||
bias_multiplier_.data(), bias.data(), kOne.data(), Ydata,
|
||||
&device_context_);
|
||||
} else {
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, output_image_size));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output_image_size, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
col_buffer_.Reshape(std::vector<int>{
|
||||
Y->dim(1), Y->dim(2), kernel_h_, kernel_w_, C});
|
||||
dtype* col_buffer_data = col_buffer_.mutable_data();
|
||||
// Im2col, followed by gemm.
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
math::Im2col<dtype, DeviceContext, StorageOrder::NHWC>(
|
||||
Xdata, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_, stride_h_, stride_w_, col_buffer_data,
|
||||
&device_context_);
|
||||
// Weight term
|
||||
// Wait, is this right....?
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasTrans, output_image_size, M, kernel_dim,
|
||||
kOne.data(), col_buffer_data, filter.data(), kZero.data(), Ydata,
|
||||
&device_context_);
|
||||
// Bias term
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasNoTrans, output_image_size, M, 1, kOne.data(),
|
||||
bias_multiplier_.data(), bias.data(), kOne.data(), Ydata,
|
||||
&device_context_);
|
||||
Xdata += input_offset;
|
||||
Ydata += output_offset;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool ConvGradientOp<dtype, DeviceContext>::RunOnDeviceWithOrderNCHW() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& bias = Input(BIAS);
|
||||
auto& dY = Input(OUTPUT_GRAD);
|
||||
auto* dfilter = Output(FILTER_GRAD);
|
||||
auto* dbias = Output(BIAS_GRAD);
|
||||
const int N = X.dim(0), C = X.dim(1), H = X.dim(2), W = X.dim(3);
|
||||
ConvPoolOpBase<dtype, DeviceContext>::ComputePads(H, W);
|
||||
DCHECK_EQ(filter.ndim(), 4);
|
||||
const int M = filter.dim(0);
|
||||
DCHECK_EQ(filter.dim(1), C);
|
||||
DCHECK_EQ(filter.dim(2), kernel_h_);
|
||||
DCHECK_EQ(filter.dim(3), kernel_w_);
|
||||
DCHECK_EQ(bias.ndim(), 1);
|
||||
DCHECK_EQ(bias.dim(0), M);
|
||||
dfilter->ReshapeLike(filter);
|
||||
dbias->ReshapeLike(bias);
|
||||
// The dimension of each kernel
|
||||
const int kernel_dim = C * kernel_h_ * kernel_w_;
|
||||
// The offset corresponding to a single input image, and a single output
|
||||
// image.
|
||||
const int input_offset = C * H * W;
|
||||
const int output_offset = dY.size() / dY.dim(0);
|
||||
// The output image size is the spatial size of the output.
|
||||
const int output_image_size = dY.dim(2) * dY.dim(3);
|
||||
// The col buffer is stored in CHW order as well - kernel_dim, and the height
|
||||
// and width.
|
||||
col_buffer_.Reshape(std::vector<int>{kernel_dim, output_image_size});
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, output_image_size));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output_image_size, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
const dtype* Xdata = X.data();
|
||||
const dtype* filter_data = filter.data();
|
||||
const dtype* dYdata = dY.data();
|
||||
dtype* col_buffer_data = col_buffer_.mutable_data();
|
||||
dtype* dfilter_data = dfilter->mutable_data();
|
||||
dtype* dbias_data = dbias->mutable_data();
|
||||
// Pre-setting the gradients to zero.
|
||||
math::Set<dtype, DeviceContext>(dfilter->size(), 0, dfilter_data,
|
||||
&device_context_);
|
||||
math::Set<dtype, DeviceContext>(dbias->size(), 0, dbias_data,
|
||||
&device_context_);
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
// When we compute the gradient with respect to the filters, we need to do
|
||||
// im2col to allow gemm-type computation.
|
||||
math::Im2col<dtype, DeviceContext, StorageOrder::NCHW>(
|
||||
Xdata, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_, stride_h_, stride_w_, col_buffer_data,
|
||||
&device_context_);
|
||||
// Gradient with respect to filter.
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasTrans, M, kernel_dim, output_image_size,
|
||||
kOne.data(), dYdata + output_offset * image_id, col_buffer_data,
|
||||
kOne.data(), dfilter_data, &device_context_);
|
||||
// Gradient with respect to bias
|
||||
math::Gemv<dtype, DeviceContext>(
|
||||
CblasNoTrans, M, output_image_size, kOne.data(),
|
||||
dYdata + output_offset * image_id, bias_multiplier_.data(),
|
||||
kOne.data(), dbias_data, &device_context_);
|
||||
Xdata += input_offset;
|
||||
}
|
||||
if (OutputSize() == 3) {
|
||||
// Compute the gradient w.r.t. the input.
|
||||
auto *dX = Output(INPUT_GRAD);
|
||||
dX->ReshapeLike(X);
|
||||
dtype* dXdata = dX->mutable_data();
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
// Compute gradient into col_buffer.
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasTrans, CblasNoTrans, kernel_dim, output_image_size, M,
|
||||
kOne.data(), filter_data, dYdata + output_offset * image_id,
|
||||
kZero.data(), col_buffer_data, &device_context_);
|
||||
math::Col2im<dtype, DeviceContext, StorageOrder::NCHW>(
|
||||
col_buffer_data, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_,
|
||||
stride_h_, stride_w_, dXdata, &device_context_);
|
||||
dXdata += input_offset;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool ConvGradientOp<dtype, DeviceContext>::RunOnDeviceWithOrderNHWC() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& bias = Input(BIAS);
|
||||
auto& dY = Input(OUTPUT_GRAD);
|
||||
auto* dfilter = Output(FILTER_GRAD);
|
||||
auto* dbias = Output(BIAS_GRAD);
|
||||
const int N = X.dim(0), H = X.dim(1), W = X.dim(2), C = X.dim(3);
|
||||
ConvPoolOpBase<dtype, DeviceContext>::ComputePads(H, W);
|
||||
DCHECK_EQ(filter.ndim(), 4);
|
||||
const int M = filter.dim(0);
|
||||
DCHECK_EQ(filter.dim(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim(2), kernel_w_);
|
||||
DCHECK_EQ(filter.dim(3), C);
|
||||
DCHECK_EQ(bias.ndim(), 1);
|
||||
DCHECK_EQ(bias.dim(0), M);
|
||||
dfilter->ReshapeLike(filter);
|
||||
dbias->ReshapeLike(bias);
|
||||
// The dimension of each kernel
|
||||
const int kernel_dim = kernel_h_ * kernel_w_ * C;
|
||||
// The offset corresponding to a single input image, and a single output
|
||||
// image.
|
||||
const int input_offset = H * W * C;
|
||||
const int output_offset = dY.size() / dY.dim(0);
|
||||
// The output image size is the spatial size of the output.
|
||||
const int output_image_size = dY.dim(1) * dY.dim(2);
|
||||
// The col buffer is stored in CHW order as well - kernel_dim, and the height
|
||||
// and width.
|
||||
col_buffer_.Reshape(std::vector<int>{output_image_size, kernel_dim});
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Reshape(std::vector<int>(1, output_image_size));
|
||||
math::Set<dtype, DeviceContext>(
|
||||
output_image_size, static_cast<dtype>(1),
|
||||
bias_multiplier_.mutable_data(), &device_context_);
|
||||
}
|
||||
const dtype* Xdata = X.data();
|
||||
const dtype* const filter_data = filter.data();
|
||||
const dtype* const dYdata = dY.data();
|
||||
dtype* col_buffer_data = col_buffer_.mutable_data();
|
||||
dtype* dfilter_data = dfilter->mutable_data();
|
||||
dtype* dbias_data = dbias->mutable_data();
|
||||
// Pre-setting the gradients to zero.
|
||||
math::Set<dtype, DeviceContext>(dfilter->size(), 0, dfilter_data,
|
||||
&device_context_);
|
||||
math::Set<dtype, DeviceContext>(dbias->size(), 0, dbias_data,
|
||||
&device_context_);
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
// When we compute the gradient with respect to the filters, we need to do
|
||||
// im2col to allow gemm-type computation.
|
||||
math::Im2col<dtype, DeviceContext, StorageOrder::NHWC>(
|
||||
Xdata, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_, stride_h_, stride_w_, col_buffer_data,
|
||||
&device_context_);
|
||||
// Gradient with respect to filter.
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasTrans, CblasNoTrans, M, kernel_dim, output_image_size,
|
||||
kOne.data(), dYdata + output_offset * image_id, col_buffer_data,
|
||||
kOne.data(), dfilter_data, &device_context_);
|
||||
// Gradient with respect to bias
|
||||
math::Gemv<dtype, DeviceContext>(
|
||||
CblasTrans, output_image_size, M, kOne.data(),
|
||||
dYdata + output_offset * image_id, bias_multiplier_.data(),
|
||||
kOne.data(), dbias_data, &device_context_);
|
||||
Xdata += input_offset;
|
||||
}
|
||||
if (OutputSize() == 3) {
|
||||
// Compute the gradient w.r.t. the input.
|
||||
auto *dX = Output(INPUT_GRAD);
|
||||
dX->ReshapeLike(X);
|
||||
dtype* dXdata = dX->mutable_data();
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
// Compute gradient into col_buffer.
|
||||
math::Gemm<dtype, DeviceContext>(
|
||||
CblasNoTrans, CblasNoTrans, output_image_size, kernel_dim, M,
|
||||
kOne.data(), dYdata + output_offset * image_id, filter_data,
|
||||
kZero.data(), col_buffer_data, &device_context_);
|
||||
math::Col2im<dtype, DeviceContext, StorageOrder::NHWC>(
|
||||
col_buffer_data, C, H, W, kernel_h_, kernel_w_,
|
||||
pad_t_, pad_l_, pad_b_, pad_r_,
|
||||
stride_h_, stride_w_, dXdata, &device_context_);
|
||||
dXdata += input_offset;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CONV_OP_IMPL_H_
|
222
caffe2/operators/conv_pool_op_base.h
Normal file
222
caffe2/operators/conv_pool_op_base.h
Normal file
@ -0,0 +1,222 @@
|
||||
#ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
|
||||
#define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/proto/caffe2_legacy.pb.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
// This macro is here just to allow us to experiment with padding values that
|
||||
// determines, when we have an odd number of pads, which side gets the one
|
||||
// additional pad value, the head side, or the tail side. Setting it to false
|
||||
// will enable the distbelief behavior, and setting it to true will enable
|
||||
// a behavior more consistent with Caffe and CuDNN.
|
||||
const bool PAD_HEAD_MORE = false;
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class ConvPoolOpBase : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
ConvPoolOpBase(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
legacy_pad_(static_cast<LegacyPadding>(
|
||||
OperatorBase::GetSingleArgument<int>(
|
||||
"legacy_pad", LegacyPadding::NOTSET))),
|
||||
pad_(OperatorBase::GetSingleArgument<int>("pad", 0)),
|
||||
pad_t_(OperatorBase::GetSingleArgument<int>("pad_t", 0)),
|
||||
pad_l_(OperatorBase::GetSingleArgument<int>("pad_l", 0)),
|
||||
pad_b_(OperatorBase::GetSingleArgument<int>("pad_b", 0)),
|
||||
pad_r_(OperatorBase::GetSingleArgument<int>("pad_r", 0)),
|
||||
kernel_h_(OperatorBase::GetSingleArgument<int>(
|
||||
"kernel_h", OperatorBase::GetSingleArgument<int>("kernel", 0))),
|
||||
kernel_w_(OperatorBase::GetSingleArgument<int>(
|
||||
"kernel_w", OperatorBase::GetSingleArgument<int>("kernel", 0))),
|
||||
stride_h_(OperatorBase::GetSingleArgument<int>(
|
||||
"stride_h", OperatorBase::GetSingleArgument<int>("stride", 1))),
|
||||
stride_w_(OperatorBase::GetSingleArgument<int>(
|
||||
"stride_w", OperatorBase::GetSingleArgument<int>("stride", 1))),
|
||||
order_(StringToStorageOrder(
|
||||
OperatorBase::GetSingleArgument<string>("order", "NHWC"))) {
|
||||
CHECK_GT(kernel_h_, 0);
|
||||
CHECK_GT(kernel_w_, 0);
|
||||
// For the padding, they should either be the legacy padding strategy
|
||||
// (VALID or SAME), or an explicit, non-negative value.
|
||||
if (legacy_pad_ != LegacyPadding::NOTSET) {
|
||||
CHECK(!OperatorBase::HasArgument("pad") &&
|
||||
!OperatorBase::HasArgument("pad_t") &&
|
||||
!OperatorBase::HasArgument("pad_l") &&
|
||||
!OperatorBase::HasArgument("pad_b") &&
|
||||
!OperatorBase::HasArgument("pad_r"))
|
||||
<< "If you use legacy padding, you should not specify any specific "
|
||||
"padding values.";
|
||||
} else if (OperatorBase::HasArgument("pad")) {
|
||||
// if pad is set, it overrides the individual values.
|
||||
pad_t_ = pad_;
|
||||
pad_l_ = pad_;
|
||||
pad_b_ = pad_;
|
||||
pad_t_ = pad_;
|
||||
}
|
||||
CHECK_GE(pad_, 0);
|
||||
CHECK_GE(pad_t_, 0);
|
||||
CHECK_GE(pad_l_, 0);
|
||||
CHECK_GE(pad_b_, 0);
|
||||
CHECK_GE(pad_r_, 0);
|
||||
CHECK_GT(stride_h_, 0);
|
||||
CHECK_GT(stride_w_, 0);
|
||||
}
|
||||
|
||||
// Sets the output size. The output channel is manually provided since
|
||||
// it may not be identical to the input channels.
|
||||
// This function can be used in the forward functions to obtain the output
|
||||
// sizes.
|
||||
void SetOutputSize(const Tensor<dtype, DeviceContext>& input,
|
||||
Tensor<dtype, DeviceContext>* output,
|
||||
int output_channel) {
|
||||
DCHECK_EQ(input.ndim(), 4);
|
||||
DCHECK_GT(input.size(), 0);
|
||||
int N = input.dim(0);
|
||||
bool channel_first;
|
||||
int C, H, W;
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
channel_first = false;
|
||||
H = input.dim(1);
|
||||
W = input.dim(2);
|
||||
C = input.dim(3);
|
||||
break;
|
||||
case StorageOrder::NCHW:
|
||||
// Old Caffe order.
|
||||
channel_first = true;
|
||||
C = input.dim(1);
|
||||
H = input.dim(2);
|
||||
W = input.dim(3);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown Storage order: " << order_;
|
||||
}
|
||||
CHECK_GE(H, kernel_h_);
|
||||
CHECK_GE(W, kernel_w_);
|
||||
int output_height, output_width;
|
||||
ComputeSizeAndPad(H, stride_h_, kernel_h_,
|
||||
&pad_t_, &pad_b_, &output_height);
|
||||
ComputeSizeAndPad(W, stride_w_, kernel_w_,
|
||||
&pad_l_, &pad_r_, &output_width);
|
||||
if (channel_first) {
|
||||
output->Reshape(
|
||||
std::vector<int>{N, output_channel, output_height, output_width});
|
||||
} else {
|
||||
output->Reshape(
|
||||
std::vector<int>{N, output_height, output_width, output_channel});
|
||||
}
|
||||
DVLOG(2) << "In: N " << N << " C " << C << " H " << H << " W " << W;
|
||||
DVLOG(2) << "Out: C " << output_channel << " H " << output_height
|
||||
<< " W " << output_width;
|
||||
}
|
||||
|
||||
// ComputePads could be used in backward functions to figure out the padding
|
||||
// values for the given input.
|
||||
void ComputePads(const int height, const int width) {
|
||||
if (legacy_pad_ != LegacyPadding::NOTSET) {
|
||||
int output_unused;
|
||||
ComputeSizeAndPad(height, stride_h_, kernel_h_,
|
||||
&pad_t_, &pad_b_, &output_unused);
|
||||
ComputeSizeAndPad(width, stride_w_, kernel_w_,
|
||||
&pad_l_, &pad_r_, &output_unused);
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
DVLOG(2) << "Running NHWC";
|
||||
return RunOnDeviceWithOrderNHWC();
|
||||
case StorageOrder::NCHW:
|
||||
DVLOG(2) << "Running NCHW";
|
||||
return RunOnDeviceWithOrderNCHW();
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||
}
|
||||
// To suppress old compiler warnings
|
||||
return true;
|
||||
}
|
||||
|
||||
// The actual function that does the computation, if the different
|
||||
// storage order leads to different implementations.
|
||||
virtual bool RunOnDeviceWithOrderNHWC() { NOT_IMPLEMENTED; return false; }
|
||||
virtual bool RunOnDeviceWithOrderNCHW() { NOT_IMPLEMENTED; return false; }
|
||||
|
||||
virtual ~ConvPoolOpBase() {}
|
||||
|
||||
protected:
|
||||
int pad_t_;
|
||||
int pad_l_;
|
||||
int pad_b_;
|
||||
int pad_r_;
|
||||
int kernel_h_;
|
||||
int kernel_w_;
|
||||
int stride_h_;
|
||||
int stride_w_;
|
||||
StorageOrder order_;
|
||||
|
||||
inline void ComputeSizeAndPad(
|
||||
const int in_size, const int stride, const int kernel,
|
||||
int* pad_head, int* pad_tail, int* out_size) {
|
||||
if (legacy_pad_ == LegacyPadding::NOTSET) {
|
||||
// We will just use the direct padding head and tail values, but we
|
||||
// will verify that they are non-negative.
|
||||
CHECK_GE(*pad_head, 0);
|
||||
CHECK_GE(*pad_tail, 0);
|
||||
*out_size = static_cast<int>(
|
||||
static_cast<float>(in_size + *pad_head + *pad_tail - kernel) / stride
|
||||
+ 1);
|
||||
} else {
|
||||
int legacy_target_size;
|
||||
switch (legacy_pad_) {
|
||||
case LegacyPadding::VALID:
|
||||
legacy_target_size =
|
||||
std::ceil(static_cast<float>(in_size - kernel + 1) / stride);
|
||||
break;
|
||||
case LegacyPadding::SAME:
|
||||
legacy_target_size = std::ceil(static_cast<float>(in_size) / stride);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unsupported raw pad value.";
|
||||
}
|
||||
int pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
|
||||
// In legacy padding, if there is an odd padding value, we will need
|
||||
// to pad more on the tail side.
|
||||
if (PAD_HEAD_MORE) {
|
||||
*pad_head = (pad_needed + 1) / 2;
|
||||
} else {
|
||||
*pad_head = pad_needed / 2;
|
||||
}
|
||||
*pad_tail = pad_needed - *pad_head;
|
||||
*out_size = static_cast<int>(
|
||||
static_cast<float>(in_size + pad_needed - kernel) / stride + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LegacyPadding legacy_pad_;
|
||||
int pad_;
|
||||
DISABLE_COPY_AND_ASSIGN(ConvPoolOpBase);
|
||||
};
|
||||
|
||||
#define USE_CONV_POOL_BASE_FUNCTIONS \
|
||||
USE_OPERATOR_BASE_FUNCTIONS; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::pad_t_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::pad_l_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::pad_b_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::pad_r_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::kernel_h_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::kernel_w_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::stride_h_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::stride_w_; \
|
||||
using ConvPoolOpBase<dtype, DeviceContext>::order_
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
|
58
caffe2/operators/cross_entropy_op.cc
Normal file
58
caffe2/operators/cross_entropy_op.cc
Normal file
@ -0,0 +1,58 @@
|
||||
#include "caffe2/operators/cross_entropy_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CPUContext> >(1);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
Y->Reshape(std::vector<int>{N});
|
||||
const auto* Xdata = X.data();
|
||||
const auto* labeldata = label.data();
|
||||
auto* Ydata = Y->mutable_data();
|
||||
for (int i = 0; i < N; ++i) {
|
||||
DCHECK_LT(labeldata[i], D);
|
||||
Ydata[i] = -log(std::max(Xdata[i * D + labeldata[i]], kLOG_THRESHOLD()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyGradientOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CPUContext> >(1);
|
||||
auto& dY = Input(2);
|
||||
auto* dX = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
DCHECK_EQ(dY.ndim(), 1);
|
||||
DCHECK_EQ(dY.dim(0), N);
|
||||
dX->ReshapeLike(X);
|
||||
math::Set<float, CPUContext>(dX->size(), 0.f, dX->mutable_data(),
|
||||
&device_context_);
|
||||
const float* Xdata = X.data();
|
||||
const float* dYdata = dY.data();
|
||||
const int* labeldata = label.data();
|
||||
float* dXdata = dX->mutable_data();
|
||||
for (int i = 0; i < N; ++i) {
|
||||
DCHECK_LT(labeldata[i], D);
|
||||
dXdata[i * D + labeldata[i]] =
|
||||
- dYdata[i] / std::max(Xdata[i * D + labeldata[i]], kLOG_THRESHOLD());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CPU_OPERATOR(LabelCrossEntropy,
|
||||
LabelCrossEntropyOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(LabelCrossEntropyGradient,
|
||||
LabelCrossEntropyGradientOp<float, CPUContext>)
|
||||
} // namespace caffe2
|
70
caffe2/operators/cross_entropy_op.cu
Normal file
70
caffe2/operators/cross_entropy_op.cu
Normal file
@ -0,0 +1,70 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/cross_entropy_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
__global__ void LabelCrossEntropyKernel(
|
||||
const int N, const int D, const float* Xdata, const int* labeldata,
|
||||
const float log_threshold, float* Ydata) {
|
||||
CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
Ydata[i] = -logf(max(Xdata[i * D + labeldata[i]], log_threshold));
|
||||
}
|
||||
}
|
||||
__global__ void LabelCrossEntropyGradientKernel(
|
||||
const int N, const int D, const float* Xdata, const int* labeldata,
|
||||
const float* dYdata, const float log_threshold, float* dXdata) {
|
||||
CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
int idx = i * D + labeldata[i];
|
||||
dXdata[idx] = - dYdata[i] / max(Xdata[idx], log_threshold);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CUDAContext> >(1);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
Y->Reshape(std::vector<int>(1, N));
|
||||
LabelCrossEntropyKernel<<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
N, D, X.data(), label.data(), kLOG_THRESHOLD(), Y->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyGradientOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& label = OperatorBase::Input<Tensor<int, CUDAContext> >(1);
|
||||
auto& dY = Input(2);
|
||||
auto* dX = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim(0);
|
||||
int D = X.dim(1);
|
||||
DCHECK_EQ(label.ndim(), 1);
|
||||
DCHECK_EQ(label.dim(0), N);
|
||||
DCHECK_EQ(dY.ndim(), 1);
|
||||
DCHECK_EQ(dY.dim(0), N);
|
||||
dX->ReshapeLike(X);
|
||||
math::Set<float, CUDAContext>(
|
||||
dX->size(), 0.f, dX->mutable_data(), &device_context_);
|
||||
LabelCrossEntropyGradientKernel<<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
N, D, X.data(), label.data(), dY.data(), kLOG_THRESHOLD(),
|
||||
dX->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(LabelCrossEntropy,
|
||||
LabelCrossEntropyOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(LabelCrossEntropyGradient,
|
||||
LabelCrossEntropyGradientOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
44
caffe2/operators/cross_entropy_op.h
Normal file
44
caffe2/operators/cross_entropy_op.h
Normal file
@ -0,0 +1,44 @@
|
||||
#ifndef CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
||||
#define CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class LabelCrossEntropyOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_SIMPLE_CTOR_DTOR(LabelCrossEntropyOp);
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
|
||||
protected:
|
||||
static constexpr dtype kLOG_THRESHOLD() { return 1e-20; }
|
||||
// Input: X, label
|
||||
// Output: Y
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(LabelCrossEntropyOp);
|
||||
};
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class LabelCrossEntropyGradientOp final
|
||||
: public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_SIMPLE_CTOR_DTOR(LabelCrossEntropyGradientOp);
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
|
||||
protected:
|
||||
// Input: X, label, dY
|
||||
// Ouptut: dX. There is no gradient with respect to the label.
|
||||
static constexpr dtype kLOG_THRESHOLD() { return 1e-20; }
|
||||
INPUT_OUTPUT_STATS(3, 3, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(LabelCrossEntropyGradientOp);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
9
caffe2/operators/db.cc
Normal file
9
caffe2/operators/db.cc
Normal file
@ -0,0 +1,9 @@
|
||||
#include "caffe2/operators/db.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace db {
|
||||
|
||||
DEFINE_REGISTRY(Caffe2DBRegistry, DB, const string&, Mode);
|
||||
|
||||
} // namespacd db
|
||||
} // namespace caffe2
|
9
caffe2/operators/depth_split_op.cc
Normal file
9
caffe2/operators/depth_split_op.cc
Normal file
@ -0,0 +1,9 @@
|
||||
#include "caffe2/operators/depth_split_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(DepthSplit, DepthSplitOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(DepthConcat, DepthConcatOp<float, CPUContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
|
10
caffe2/operators/depth_split_op.cu
Normal file
10
caffe2/operators/depth_split_op.cu
Normal file
@ -0,0 +1,10 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/depth_split_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(DepthSplit, DepthSplitOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(DepthConcat, DepthConcatOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
|
141
caffe2/operators/depth_split_op.h
Normal file
141
caffe2/operators/depth_split_op.h
Normal file
@ -0,0 +1,141 @@
|
||||
#ifndef CAFFE2_OPERATORS_DEPTH_SPLIT_OP_H_
|
||||
#define CAFFE2_OPERATORS_DEPTH_SPLIT_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/types.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class DepthSplitOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
DepthSplitOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
order_(StringToStorageOrder(
|
||||
OperatorBase::GetSingleArgument<string>("order", "NHWC"))) {}
|
||||
bool RunOnDevice() override;
|
||||
|
||||
protected:
|
||||
StorageOrder order_;
|
||||
// Input: X, dimensions
|
||||
// The dimensions are stored in CPU.
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, INT_MAX);
|
||||
DISABLE_COPY_AND_ASSIGN(DepthSplitOp);
|
||||
};
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class DepthConcatOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
DepthConcatOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
order_(StringToStorageOrder(
|
||||
OperatorBase::GetSingleArgument<string>("order", "NHWC"))) {}
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
|
||||
protected:
|
||||
StorageOrder order_;
|
||||
// Input: a number of tensors. Output: Y, dimensions
|
||||
// The dimensions are stored in CPU.
|
||||
INPUT_OUTPUT_STATS(1, INT_MAX, 2, 2);
|
||||
DISABLE_COPY_AND_ASSIGN(DepthConcatOp);
|
||||
};
|
||||
|
||||
|
||||
// Implementations
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool DepthSplitOp<dtype, DeviceContext>::RunOnDevice() {
|
||||
auto& input = Input(0);
|
||||
auto& dimensions =
|
||||
OperatorBase::Input<Tensor<int, CPUContext> >(1);
|
||||
const int* dim_data = dimensions.data();
|
||||
DCHECK_EQ(dimensions.size(), OutputSize());
|
||||
DCHECK_EQ(std::accumulate(dim_data, dim_data + OutputSize(), 0),
|
||||
(order_ == StorageOrder::NCHW ? input.dim(1) : input.dim(3)));
|
||||
int input_offset = 0;
|
||||
for (int i = 0; i < OutputSize(); ++i) {
|
||||
auto* output = Output(i);
|
||||
int M, N, lda;
|
||||
switch (order_) {
|
||||
case StorageOrder::NCHW:
|
||||
output->Reshape(vector<int>{
|
||||
input.dim(0), dim_data[i], input.dim(2), input.dim(3)});
|
||||
M = input.dim(0);
|
||||
N = dim_data[i] * input.dim(2) * input.dim(3);
|
||||
lda = input.size() / input.dim(0);
|
||||
break;
|
||||
case StorageOrder::NHWC:
|
||||
output->Reshape(vector<int>{
|
||||
input.dim(0), input.dim(1), input.dim(2), dim_data[i]});
|
||||
M = input.dim(0) * input.dim(1) * input.dim(2);
|
||||
N = dim_data[i];
|
||||
lda = input.dim(3);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unsupported storage order: " << order_;
|
||||
}
|
||||
math::CopyMatrix<dtype, DeviceContext>(
|
||||
M, N, input.data() + input_offset, lda, output->mutable_data(), N,
|
||||
&device_context_);
|
||||
input_offset += N;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
bool DepthConcatOp<dtype, DeviceContext>::RunOnDevice() {
|
||||
auto* output = Output(0);
|
||||
Tensor<int, CPUContext>* dimensions =
|
||||
OperatorBase::Output<Tensor<int, CPUContext> >(1);
|
||||
dimensions->Reshape(vector<int>(1, InputSize()));
|
||||
int* dim_data = dimensions->mutable_data();
|
||||
int output_channels = 0;
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
dim_data[i] =
|
||||
(order_ == StorageOrder::NCHW ? Input(i).dim(1) : Input(i).dim(3));
|
||||
output_channels += dim_data[i];
|
||||
}
|
||||
auto& input_zero = Input(0);
|
||||
output->Reshape(vector<int>{
|
||||
input_zero.dim(0),
|
||||
order_ == StorageOrder::NCHW ? output_channels : input_zero.dim(1),
|
||||
order_ == StorageOrder::NCHW ? input_zero.dim(2) : input_zero.dim(2),
|
||||
order_ == StorageOrder::NCHW ? input_zero.dim(3) : output_channels});
|
||||
int output_offset = 0;
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
auto& input = Input(i);
|
||||
int M, N, ldb;
|
||||
switch (order_) {
|
||||
case StorageOrder::NCHW:
|
||||
CHECK_EQ(input.dim(0), output->dim(0));
|
||||
CHECK_EQ(input.dim(2), output->dim(2));
|
||||
CHECK_EQ(input.dim(3), output->dim(3));
|
||||
M = input.dim(0);
|
||||
N = input.size() / M;
|
||||
ldb = output->size() / output->dim(0);
|
||||
break;
|
||||
case StorageOrder::NHWC:
|
||||
CHECK_EQ(input.dim(0), output->dim(0));
|
||||
CHECK_EQ(input.dim(1), output->dim(1));
|
||||
CHECK_EQ(input.dim(2), output->dim(2));
|
||||
M = input.dim(0) * input.dim(1) * input.dim(2);
|
||||
N = input.dim(3);
|
||||
ldb = output->dim(3);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unsupported storage order: " << order_;
|
||||
}
|
||||
math::CopyMatrix<dtype, DeviceContext>(
|
||||
M, N, input.data(), N, output->mutable_data() + output_offset, ldb,
|
||||
&device_context_);
|
||||
output_offset += N;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_DEPTH_SPLIT_OP_H_
|
52
caffe2/operators/dropout_op.cc
Normal file
52
caffe2/operators/dropout_op.cc
Normal file
@ -0,0 +1,52 @@
|
||||
#include "caffe2/operators/dropout_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <>
|
||||
bool DropoutOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
Tensor<bool, CPUContext>* mask =
|
||||
OperatorBase::Output<Tensor<bool, CPUContext> >(1);
|
||||
Y->Reshape(X.dims());
|
||||
mask->Reshape(X.dims());
|
||||
DCHECK_GT(X.size(), 0);
|
||||
float scale = 1. / (1. - ratio_);
|
||||
// mask=true means keep, and mask=false means not keep, so we will
|
||||
// generate probability depending on 1-ratio.
|
||||
std::bernoulli_distribution dist(1. - ratio_);
|
||||
const float* Xdata = X.data();
|
||||
float* Ydata = Y->mutable_data();
|
||||
bool* mask_data = mask->mutable_data();
|
||||
auto& gen = device_context_.RandGenerator();
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
mask_data[i] = dist(gen);
|
||||
Ydata[i] = Xdata[i] * scale * mask_data[i];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool DropoutGradientOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& dY = Input(0);
|
||||
const Tensor<bool, CPUContext>& mask =
|
||||
OperatorBase::Input<Tensor<bool, CPUContext> >(1);
|
||||
auto* dX = Output(0);
|
||||
DCHECK_GT(dY.size(), 0);
|
||||
DCHECK_EQ(dY.size(), mask.size());
|
||||
dX->Reshape(dY.dims());
|
||||
const float* dYdata = dY.data();
|
||||
const bool* mask_data = mask.data();
|
||||
float* dXdata = dX->mutable_data();
|
||||
for (int i = 0; i < dY.size(); ++i) {
|
||||
dXdata[i] = dYdata[i] * mask_data[i];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(Dropout, DropoutOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(DropoutGrad, DropoutGradientOp<float, CPUContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
68
caffe2/operators/dropout_op.cu
Normal file
68
caffe2/operators/dropout_op.cu
Normal file
@ -0,0 +1,68 @@
|
||||
#include "caffe2/operators/dropout_op.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
__global__ void DropoutKernel(const int N, const float ratio,
|
||||
const float* Xdata, float* Ydata,
|
||||
bool* maskdata) {
|
||||
const float scale = 1. / (1. - ratio);
|
||||
CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
maskdata[i] = (Ydata[i] > ratio);
|
||||
Ydata[i] = Xdata[i] * scale * maskdata[i];
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool DropoutOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
auto* mask = OperatorBase::Output<Tensor<bool, CUDAContext> >(1);
|
||||
Y->Reshape(X.dims());
|
||||
mask->Reshape(X.dims());
|
||||
DCHECK_GT(X.size(), 0);
|
||||
// We do a simple trick here: since curand cannot generate random
|
||||
// boolean numbers, we will generate into dY and write the result to
|
||||
// mask.
|
||||
float* Ydata = Y->mutable_data();
|
||||
CURAND_CHECK(curandGenerateUniform(
|
||||
device_context_.curand_generator(), Ydata, X.size()));
|
||||
DropoutKernel<<<CAFFE_GET_BLOCKS(X.size()), CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
X.size(), ratio_, X.data(), Ydata, mask->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
__global__ void DropoutGradientKernel(const int N, const float* dYdata,
|
||||
const bool* maskdata, float* dXdata) {
|
||||
CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
dXdata[i] = dYdata[i] * maskdata[i];
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool DropoutGradientOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& dY = Input(0);
|
||||
auto& mask =
|
||||
OperatorBase::Input<Tensor<bool, CUDAContext> >(1);
|
||||
auto* dX = Output(0);
|
||||
DCHECK_GT(dY.size(), 0);
|
||||
DCHECK_EQ(dY.size(), mask.size());
|
||||
dX->Reshape(dY.dims());
|
||||
DropoutGradientKernel<<<CAFFE_GET_BLOCKS(dY.size()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0, device_context_.cuda_stream()>>>(
|
||||
dY.size(), dY.data(), mask.data(), dX->mutable_data());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
REGISTER_CUDA_OPERATOR(Dropout, DropoutOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(DropoutGrad, DropoutGradientOp<float, CUDAContext>)
|
||||
} // namespace
|
||||
} // namespace caffe2
|
53
caffe2/operators/dropout_op.h
Normal file
53
caffe2/operators/dropout_op.h
Normal file
@ -0,0 +1,53 @@
|
||||
#ifndef CAFFE2_OPERATORS_DROPOUT_OP_H_
|
||||
#define CAFFE2_OPERATORS_DROPOUT_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class DropoutOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
DropoutOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
ratio_(OperatorBase::GetSingleArgument<float>("ratio", 0.5)) {
|
||||
DCHECK_GT(ratio_, 0);
|
||||
DCHECK_LT(ratio_, 1);
|
||||
}
|
||||
|
||||
bool RunOnDevice();
|
||||
|
||||
protected:
|
||||
float ratio_;
|
||||
// Input: X; Output: Y, mask.
|
||||
INPUT_OUTPUT_STATS(1, 1, 2, 2);
|
||||
DISABLE_COPY_AND_ASSIGN(DropoutOp);
|
||||
};
|
||||
|
||||
template <typename dtype, class DeviceContext>
|
||||
class DropoutGradientOp final : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
DropoutGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<dtype, DeviceContext>(operator_def, ws),
|
||||
ratio_(OperatorBase::GetSingleArgument<float>("ratio", 0.5)) {
|
||||
DCHECK_GT(ratio_, 0);
|
||||
DCHECK_LT(ratio_, 1);
|
||||
}
|
||||
|
||||
bool RunOnDevice();
|
||||
|
||||
protected:
|
||||
float ratio_;
|
||||
// Input: dY, mask; Output: dX
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(DropoutGradientOp);
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_DROPOUT_OP_H_
|
12
caffe2/operators/elementwise_op.cc
Normal file
12
caffe2/operators/elementwise_op.cc
Normal file
@ -0,0 +1,12 @@
|
||||
#include "caffe2/operators/elementwise_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
REGISTER_CPU_OPERATOR(Add, AddOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(Sub, SubOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(Mul, MulOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(Div, DivOp<float, CPUContext>)
|
||||
|
||||
} // namespace
|
||||
} // namespace caffe2
|
54
caffe2/operators/elementwise_op.h
Normal file
54
caffe2/operators/elementwise_op.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
||||
#define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename dtype, class DeviceContext, class Functor>
|
||||
class BinaryElementwiseOp : public Operator<dtype, DeviceContext> {
|
||||
public:
|
||||
USE_OPERATOR_BASE_FUNCTIONS;
|
||||
USE_SIMPLE_CTOR_DTOR(BinaryElementwiseOp);
|
||||
|
||||
bool RunOnDevice() final {
|
||||
auto& input0 = Input(0);
|
||||
auto& input1 = Input(1);
|
||||
auto* output = Output(0);
|
||||
CHECK_EQ(input0.size(), input1.size());
|
||||
output->ReshapeLike(input0);
|
||||
Functor()(input0.size(), input0.data(), input1.data(),
|
||||
output->mutable_data(), &device_context_);
|
||||
return true;
|
||||
}
|
||||
|
||||
INPUT_OUTPUT_STATS(2, 2, 1, 1);
|
||||
DISABLE_COPY_AND_ASSIGN(BinaryElementwiseOp);
|
||||
};
|
||||
|
||||
|
||||
#define CAFFE2_BINARY_FUNCTOR_WRAPPER(name) \
|
||||
template <typename dtype, class DeviceContext> \
|
||||
struct name##Functor { \
|
||||
inline void operator()(const int n, const dtype* x, const dtype* y, \
|
||||
dtype* output, DeviceContext* device_context) { \
|
||||
math::name<dtype, DeviceContext>(n, x, y, output, device_context); \
|
||||
} \
|
||||
}; \
|
||||
template <typename dtype, class DC> \
|
||||
using name##Op = \
|
||||
BinaryElementwiseOp<dtype, DC, name##Functor<dtype, DC> >
|
||||
|
||||
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Add);
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Sub);
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Mul);
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Div);
|
||||
#undef CAFFE2_BINARY_FUNCTOR_WRAPPER
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
13
caffe2/operators/elementwise_op_gpu.cc
Normal file
13
caffe2/operators/elementwise_op_gpu.cc
Normal file
@ -0,0 +1,13 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/elementwise_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
REGISTER_CUDA_OPERATOR(Add, AddOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(Sub, SubOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(Mul, MulOp<float, CUDAContext>)
|
||||
REGISTER_CUDA_OPERATOR(Div, DivOp<float, CUDAContext>)
|
||||
|
||||
} // namespace
|
||||
} // namespace caffe2
|
25
caffe2/operators/filler_op.cc
Normal file
25
caffe2/operators/filler_op.cc
Normal file
@ -0,0 +1,25 @@
|
||||
#include "caffe2/operators/filler_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <>
|
||||
bool RangeFillOp<float, CPUContext>::Fill(
|
||||
Tensor<float, CPUContext>* output) {
|
||||
float* data = output->mutable_data();
|
||||
for (int i = 0; i < output->size(); ++i) {
|
||||
data[i] = i;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
REGISTER_CPU_OPERATOR(UniformFill, UniformFillOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(ConstantFill, ConstantFillOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(GaussianFill, GaussianFillOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(XavierFill, XavierFillOp<float, CPUContext>)
|
||||
REGISTER_CPU_OPERATOR(RangeFill, RangeFillOp<float, CPUContext>)
|
||||
|
||||
} // namespace
|
||||
} // namespace caffe2
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user