mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Hello, This is a patch to fix `ResourceWarning: unclosed file`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/15746 Differential Revision: D13587286 Pulled By: soumith fbshipit-source-id: 08ac34c5b51d9334867f65a2927bff11511553f3
939 lines
34 KiB
Python
939 lines
34 KiB
Python
## @package caffe_translator
|
|
# Module caffe2.python.caffe_translator
|
|
#!/usr/bin/env python2
|
|
|
|
import argparse
|
|
import copy
|
|
import logging
|
|
import re
|
|
import numpy as np # noqa
|
|
|
|
from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
|
|
from caffe.proto import caffe_pb2
|
|
from caffe2.python import core, utils, workspace
|
|
from google.protobuf import text_format
|
|
|
|
logging.basicConfig()
|
|
log = logging.getLogger("caffe_translator")
|
|
log.setLevel(logging.INFO)
|
|
|
|
|
|
def _StateMeetsRule(state, rule):
|
|
"""A function that reproduces Caffe's StateMeetsRule functionality."""
|
|
if rule.HasField('phase') and rule.phase != state.phase:
|
|
return False
|
|
if rule.HasField('min_level') and state.level < rule.min_level:
|
|
return False
|
|
if rule.HasField('max_level') and state.level > rule.max_level:
|
|
return False
|
|
curr_stages = set(list(state.stage))
|
|
# all stages in rule.stages should be in, otherwise it's not a match.
|
|
if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
|
|
return False
|
|
# none of the stage in rule.stages should be in, otherwise it's not a match.
|
|
if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
|
|
return False
|
|
# If none of the nonmatch happens, return True.
|
|
return True
|
|
|
|
|
|
def _ShouldInclude(net_state, layer):
|
|
"""A function that reproduces Caffe's inclusion and exclusion rule."""
|
|
ret = (len(layer.include) == 0)
|
|
# check exclude rules: if any exclusion is met, we shouldn't include.
|
|
ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
|
|
if len(layer.include):
|
|
# check include rules: if any inclusion is met, we should include.
|
|
ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
|
|
return ret
|
|
|
|
|
|
def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
|
|
dim_map = {}
|
|
ws = workspace.C.Workspace()
|
|
for param in net_params.protos:
|
|
ws.create_blob(param.name) \
|
|
.feed(utils.Caffe2TensorToNumpyArray(param))
|
|
external_input = net.op[0].input[0]
|
|
ws.create_blob(external_input).feed(dummy_input)
|
|
# Get dimensions with legacy pad
|
|
for i in range(len(net.op)):
|
|
op_def = net.op[i]
|
|
ws._run_operator(op_def.SerializeToString())
|
|
if i in legacy_pad_ops:
|
|
output = op_def.output[0]
|
|
blob_legacy = ws.fetch_blob(output)
|
|
dim_map[i] = blob_legacy.shape
|
|
return dim_map
|
|
|
|
|
|
def _GetLegacyPadArgs(op_def, arg_map):
|
|
pads = {}
|
|
keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
|
|
is_pad = 'pad' in arg_map
|
|
if is_pad:
|
|
for k in keys:
|
|
pads[k] = arg_map['pad'].i
|
|
else:
|
|
pads = {x: arg_map[x].i for x in keys}
|
|
return pads
|
|
|
|
|
|
def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
|
|
n1, c1, h1, w1 = dim1
|
|
n2, c2, h2, w2 = dim2
|
|
assert(n1 == n2)
|
|
assert(c1 == c2)
|
|
is_pad = 'pad' in arg_map
|
|
if h1 != h2 or w1 != w2:
|
|
if h1 == h2 + 1:
|
|
pads['pad_b'] += 1
|
|
elif h1 != h2:
|
|
raise Exception("Unexpected dimensions for height:", h1, h2)
|
|
if w1 == w2 + 1:
|
|
pads['pad_r'] += 1
|
|
elif w1 != w2:
|
|
raise Exception("Unexpected dimensions for width:", w1, w2)
|
|
if is_pad:
|
|
op_def.arg.remove(arg_map['pad'])
|
|
args = []
|
|
for name in pads.keys():
|
|
arg = caffe2_pb2.Argument()
|
|
arg.name = name
|
|
arg.i = pads[name]
|
|
args.append(arg)
|
|
op_def.arg.extend(args)
|
|
else:
|
|
for name in pads.keys():
|
|
arg_map[name].i = pads[name]
|
|
|
|
|
|
def _RemoveLegacyPad(net, net_params, input_dims):
|
|
legacy_pad_ops = []
|
|
for i in range(len(net.op)):
|
|
op_def = net.op[i]
|
|
if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
|
|
op_def.type):
|
|
for arg in op_def.arg:
|
|
if arg.name == 'legacy_pad':
|
|
legacy_pad_ops.append(i)
|
|
break
|
|
if legacy_pad_ops:
|
|
n, c, h, w = input_dims
|
|
dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
|
|
dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)
|
|
|
|
# Running with the legacy pad argument removed
|
|
# compare the dimensions and adjust pad argument when necessary
|
|
ws = workspace.C.Workspace()
|
|
|
|
external_input = net.op[0].input[0]
|
|
ws.create_blob(external_input).feed_blob(dummy_input)
|
|
for param in net_params.protos:
|
|
ws.create_blob(param.name) \
|
|
.feed_blob(utils.Caffe2TensorToNumpyArray(param))
|
|
|
|
for i in range(len(net.op)):
|
|
op_def = net.op[i]
|
|
if i in legacy_pad_ops:
|
|
arg_map = {}
|
|
for arg in op_def.arg:
|
|
arg_map[arg.name] = arg
|
|
pads = _GetLegacyPadArgs(op_def, arg_map)
|
|
# remove legacy pad arg
|
|
for j in range(len(op_def.arg)):
|
|
arg = op_def.arg[j]
|
|
if arg.name == 'legacy_pad':
|
|
del op_def.arg[j]
|
|
break
|
|
output = op_def.output[0]
|
|
# use a new name to avoid the interference with inplace
|
|
nonlegacy_output = output + '_nonlegacy'
|
|
op_def.output[0] = nonlegacy_output
|
|
ws._run_operator(op_def.SerializeToString())
|
|
blob_nonlegacy = ws.fetch_blob(nonlegacy_output)
|
|
# reset output name
|
|
op_def.output[0] = output
|
|
|
|
dim1 = dim_map[i]
|
|
dim2 = blob_nonlegacy.shape
|
|
_AdjustDims(op_def, arg_map, pads, dim1, dim2)
|
|
|
|
ws._run_operator(op_def.SerializeToString())
|
|
return net
|
|
|
|
|
|
def _GetBlobDimMap(net, net_params, dummy_input):
|
|
dim_map = {}
|
|
ws = workspace.C.Workspace()
|
|
for param in net_params.protos:
|
|
ws.create_blob(param.name) \
|
|
.feed(utils.Caffe2TensorToNumpyArray(param))
|
|
external_input = net.op[0].input[0]
|
|
ws.create_blob(external_input).feed(dummy_input)
|
|
# Get dimensions with legacy pad
|
|
for i in range(len(net.op)):
|
|
op_def = net.op[i]
|
|
ws._run_operator(op_def.SerializeToString())
|
|
for output in op_def.output:
|
|
blob = ws.fetch_blob(output)
|
|
dim_map[output] = blob.shape
|
|
return dim_map
|
|
|
|
|
|
def _GetInputDims(caffe_net):
|
|
input_dims = []
|
|
if caffe_net.input_dim:
|
|
input_dims = caffe_net.input_dim
|
|
elif caffe_net.input_shape:
|
|
input_dims = caffe_net.input_shape[0].dim
|
|
elif caffe_net.layer[0].input_param.shape:
|
|
# getting input dimension from first layer
|
|
input_dims = caffe_net.layer[0].input_param.shape[0].dim
|
|
return input_dims
|
|
|
|
|
|
class TranslatorRegistry(object):
|
|
registry_ = {}
|
|
|
|
@classmethod
|
|
def Register(cls, op_name):
|
|
"""A decorator for registering gradient mappings."""
|
|
|
|
def Wrapper(func):
|
|
cls.registry_[op_name] = func
|
|
return func
|
|
|
|
return Wrapper
|
|
|
|
@classmethod
|
|
def TranslateLayer(cls, layer, pretrained_blobs, is_test, **kwargs):
|
|
try:
|
|
caffe_ops, params = cls.registry_[layer.type](
|
|
layer, pretrained_blobs, is_test, **kwargs)
|
|
except KeyError:
|
|
raise KeyError('No translator registered for layer: %s yet.' %
|
|
str(layer))
|
|
if caffe_ops is None:
|
|
caffe_ops = []
|
|
if type(caffe_ops) is not list:
|
|
caffe_ops = [caffe_ops]
|
|
return caffe_ops, params
|
|
|
|
@classmethod
|
|
def TranslateModel(
|
|
cls,
|
|
caffe_net,
|
|
pretrained_net,
|
|
is_test=False,
|
|
net_state=None,
|
|
remove_legacy_pad=False,
|
|
input_dims=None
|
|
):
|
|
net_state = caffe_pb2.NetState() if net_state is None else net_state
|
|
net = caffe2_pb2.NetDef()
|
|
net.name = caffe_net.name
|
|
net_params = caffe2_pb2.TensorProtos()
|
|
if len(caffe_net.layers) > 0:
|
|
raise ValueError(
|
|
'I think something is wrong. This translation script '
|
|
'only accepts new style layers that are stored in the '
|
|
'layer field.'
|
|
)
|
|
if not input_dims:
|
|
input_dims = _GetInputDims(caffe_net)
|
|
for layer in caffe_net.layer:
|
|
if not _ShouldInclude(net_state, layer):
|
|
log.info('Current net state does not need layer {}'
|
|
.format(layer.name))
|
|
continue
|
|
log.info('Translate layer {}'.format(layer.name))
|
|
# Get pretrained one
|
|
pretrained_layers = (
|
|
[l for l in pretrained_net.layer
|
|
if l.name == layer.name] + [l
|
|
for l in pretrained_net.layers
|
|
if l.name == layer.name]
|
|
)
|
|
if len(pretrained_layers) > 1:
|
|
raise ValueError(
|
|
'huh? more than one pretrained layer of one name?')
|
|
elif len(pretrained_layers) == 1:
|
|
pretrained_blobs = [
|
|
utils.CaffeBlobToNumpyArray(blob)
|
|
for blob in pretrained_layers[0].blobs
|
|
]
|
|
else:
|
|
# No pretrained layer for the given layer name. We'll just pass
|
|
# no parameter blobs.
|
|
# print 'No pretrained layer for layer', layer.name
|
|
pretrained_blobs = []
|
|
operators, params = cls.TranslateLayer(
|
|
layer, pretrained_blobs, is_test, net=net,
|
|
net_params=net_params, input_dims=input_dims)
|
|
net.op.extend(operators)
|
|
net_params.protos.extend(params)
|
|
if remove_legacy_pad:
|
|
assert input_dims, \
|
|
'Please specify input_dims to remove legacy_pad'
|
|
net = _RemoveLegacyPad(net, net_params, input_dims)
|
|
return net, net_params
|
|
|
|
|
|
def TranslateModel(*args, **kwargs):
|
|
return TranslatorRegistry.TranslateModel(*args, **kwargs)
|
|
|
|
|
|
def ConvertTensorProtosToInitNet(net_params, input_name):
|
|
"""Takes the net_params returned from TranslateModel, and wrap it as an
|
|
init net that contain GivenTensorFill.
|
|
|
|
This is a very simple feature that only works with float tensors, and is
|
|
only intended to be used in an environment where you want a single
|
|
initialization file - for more complex cases, use a db to store the
|
|
parameters.
|
|
"""
|
|
init_net = caffe2_pb2.NetDef()
|
|
for tensor in net_params.protos:
|
|
if len(tensor.float_data) == 0:
|
|
raise RuntimeError(
|
|
"Only float tensors are supported in this util.")
|
|
op = core.CreateOperator(
|
|
"GivenTensorFill", [], [tensor.name],
|
|
arg=[
|
|
utils.MakeArgument("shape", list(tensor.dims)),
|
|
utils.MakeArgument("values", tensor.float_data)])
|
|
init_net.op.extend([op])
|
|
init_net.op.extend([core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
|
|
return init_net
|
|
|
|
|
|
def BaseTranslate(layer, caffe2_type):
|
|
"""A simple translate interface that maps the layer input and output."""
|
|
caffe2_op = caffe2_pb2.OperatorDef()
|
|
caffe2_op.type = caffe2_type
|
|
caffe2_op.input.extend(layer.bottom)
|
|
caffe2_op.output.extend(layer.top)
|
|
return caffe2_op
|
|
|
|
|
|
def AddArgument(op, key, value):
|
|
"""Makes an argument based on the value type."""
|
|
op.arg.extend([utils.MakeArgument(key, value)])
|
|
|
|
################################################################################
|
|
# Common translators for layers.
|
|
################################################################################
|
|
|
|
|
|
@TranslatorRegistry.Register("Input")
|
|
def TranslateInput(layer, pretrained_blobs, is_test, **kwargs):
|
|
return [], []
|
|
|
|
|
|
@TranslatorRegistry.Register("VideoData")
|
|
def TranslateVideoData(layer, pretrained_blobs, is_test, **kwargs):
|
|
return [], []
|
|
|
|
|
|
@TranslatorRegistry.Register("Data")
|
|
def TranslateData(layer, pretrained_blobs, is_test, **kwargs):
|
|
return [], []
|
|
|
|
|
|
# A function used in convolution, pooling and deconvolution to deal with
|
|
# conv pool specific parameters.
|
|
def _TranslateStridePadKernelHelper(param, caffe_op):
|
|
try:
|
|
if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
|
|
len(param.pad) > 1):
|
|
raise NotImplementedError(
|
|
"Translator currently does not support non-conventional "
|
|
"pad/kernel/stride settings."
|
|
)
|
|
stride = param.stride[0] if len(param.stride) else 1
|
|
pad = param.pad[0] if len(param.pad) else 0
|
|
kernel = param.kernel_size[0] if len(param.kernel_size) else 0
|
|
except TypeError:
|
|
# This catches the case of a PoolingParameter, in which case we are
|
|
# having non-repeating pad, stride and kernel.
|
|
stride = param.stride
|
|
pad = param.pad
|
|
kernel = param.kernel_size
|
|
# Get stride
|
|
if param.HasField("stride_h") or param.HasField("stride_w"):
|
|
AddArgument(caffe_op, "stride_h", param.stride_h)
|
|
AddArgument(caffe_op, "stride_w", param.stride_w)
|
|
else:
|
|
AddArgument(caffe_op, "stride", stride)
|
|
# Get pad
|
|
if param.HasField("pad_h") or param.HasField("pad_w"):
|
|
if param.pad_h == param.pad_w:
|
|
AddArgument(caffe_op, "pad", param.pad_h)
|
|
else:
|
|
AddArgument(caffe_op, "pad_t", param.pad_h)
|
|
AddArgument(caffe_op, "pad_b", param.pad_h)
|
|
AddArgument(caffe_op, "pad_l", param.pad_w)
|
|
AddArgument(caffe_op, "pad_r", param.pad_w)
|
|
else:
|
|
AddArgument(caffe_op, "pad", pad)
|
|
# Get kernel
|
|
if param.HasField("kernel_h") or param.HasField("kernel_w"):
|
|
AddArgument(caffe_op, "kernel_h", param.kernel_h)
|
|
AddArgument(caffe_op, "kernel_w", param.kernel_w)
|
|
else:
|
|
AddArgument(caffe_op, "kernel", kernel)
|
|
|
|
|
|
@TranslatorRegistry.Register("Convolution3D")
|
|
def TranslateConvNd(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.convolution3d_param
|
|
caffe_op = BaseTranslate(layer, "Conv")
|
|
output = caffe_op.output[0]
|
|
caffe_op.input.append(output + '_w')
|
|
|
|
AddArgument(
|
|
caffe_op,
|
|
"kernels",
|
|
[param.kernel_depth, param.kernel_size, param.kernel_size])
|
|
AddArgument(
|
|
caffe_op,
|
|
"strides",
|
|
[param.temporal_stride, param.stride, param.stride])
|
|
temporal_pad = 0
|
|
spatial_pad = 0
|
|
if hasattr(param, 'temporal_pad'):
|
|
temporal_pad = param.temporal_pad
|
|
if hasattr(param, 'pad'):
|
|
spatial_pad = param.pad
|
|
AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
|
|
|
|
# weight
|
|
params = [
|
|
utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
|
|
# bias
|
|
if len(pretrained_blobs) == 2:
|
|
caffe_op.input.append(output + '_b')
|
|
params.append(
|
|
utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), output + '_b'))
|
|
return caffe_op, params
|
|
|
|
|
|
@TranslatorRegistry.Register("Convolution")
|
|
def TranslateConv(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.convolution_param
|
|
caffe_op = BaseTranslate(layer, "Conv")
|
|
output = caffe_op.output[0]
|
|
caffe_op.input.append(output + '_w')
|
|
_TranslateStridePadKernelHelper(param, caffe_op)
|
|
# weight
|
|
params = [
|
|
utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
|
|
# bias
|
|
if len(pretrained_blobs) == 2:
|
|
caffe_op.input.append(output + '_b')
|
|
params.append(
|
|
utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), output + '_b'))
|
|
# Group convolution option
|
|
if param.group != 1:
|
|
AddArgument(caffe_op, "group", param.group)
|
|
# Get dilation - not tested. If you have a model and this checks out,
|
|
# please provide a test and uncomment this.
|
|
if len(param.dilation) > 0:
|
|
if len(param.dilation) == 1:
|
|
AddArgument(caffe_op, "dilation", param.dilation[0])
|
|
elif len(param.dilation) == 2:
|
|
AddArgument(caffe_op, "dilation_h", param.dilation[0])
|
|
AddArgument(caffe_op, "dilation_w", param.dilation[1])
|
|
return caffe_op, params
|
|
|
|
|
|
@TranslatorRegistry.Register("Deconvolution")
|
|
def TranslateDeconv(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.convolution_param
|
|
if param.group > 1:
|
|
raise NotImplementedError(
|
|
"Translator currently does not support group deconvolution."
|
|
)
|
|
caffe_op = BaseTranslate(layer, "ConvTranspose")
|
|
output = caffe_op.output[0]
|
|
_TranslateStridePadKernelHelper(param, caffe_op)
|
|
caffe_op.input.extend([output + '_w'])
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
|
|
if param.bias_term:
|
|
bias = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), output + '_b'
|
|
)
|
|
caffe_op.input.extend([output + '_b'])
|
|
return caffe_op, [weight, bias]
|
|
else:
|
|
return caffe_op, [weight]
|
|
|
|
|
|
@TranslatorRegistry.Register("Crop")
|
|
def TranslateCrop(layer, pretrained_blobs, is_test, **kwargs):
|
|
net, net_params, input_dims = kwargs['net'], kwargs['net_params'], kwargs['input_dims']
|
|
n, c, h, w = input_dims
|
|
dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
|
|
dim_map = _GetBlobDimMap(net, net_params, dummy_input)
|
|
param = layer.crop_param
|
|
axis, offsets = param.axis, param.offset
|
|
caffe_op = BaseTranslate(layer, "Slice")
|
|
input_1 = caffe_op.input[1]
|
|
input_1_dim = dim_map[input_1]
|
|
starts, ends = [], []
|
|
dims = len(dim_map[input_1])
|
|
assert len(offsets) == 1, 'Caffe Translator for Crop only works for offset \
|
|
of 1 for now'
|
|
for _ in range(axis):
|
|
starts.append(0)
|
|
ends.append(-1)
|
|
end_offset = [int(offsets[0] + input_1_dim[i]) for i in range(axis, dims)]
|
|
ends.extend(end_offset)
|
|
starts.extend([offsets[0]] * len(end_offset))
|
|
op = caffe2_pb2.OperatorDef()
|
|
op.input.extend([caffe_op.input[0]])
|
|
op.output.extend(caffe_op.output)
|
|
op.arg.extend(caffe_op.arg)
|
|
op.type = caffe_op.type
|
|
AddArgument(op, "starts", starts)
|
|
AddArgument(op, "ends", ends)
|
|
return op, []
|
|
|
|
@TranslatorRegistry.Register("ReLU")
|
|
def TranslateRelu(layer, pretrained_blobs, is_test, **kwargs):
|
|
return BaseTranslate(layer, "Relu"), []
|
|
|
|
|
|
@TranslatorRegistry.Register("Pooling")
|
|
def TranslatePool(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.pooling_param
|
|
if param.pool == caffe_pb2.PoolingParameter.MAX:
|
|
caffe_op = BaseTranslate(layer, "MaxPool")
|
|
elif param.pool == caffe_pb2.PoolingParameter.AVE:
|
|
caffe_op = BaseTranslate(layer, "AveragePool")
|
|
_TranslateStridePadKernelHelper(param, caffe_op)
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
try:
|
|
# In the Facebook port of Caffe, a torch_pooling field was added to
|
|
# map the pooling computation of Torch. Essentially, it uses
|
|
# floor((height + 2 * padding - kernel) / stride) + 1
|
|
# instead of
|
|
# ceil((height + 2 * padding - kernel) / stride) + 1
|
|
# which is Caffe's version.
|
|
# Torch pooling is actually the same as Caffe2 pooling, so we don't
|
|
# need to do anything.
|
|
is_torch_pooling = param.torch_pooling
|
|
except AttributeError:
|
|
is_torch_pooling = False
|
|
if not is_torch_pooling:
|
|
AddArgument(caffe_op, "legacy_pad",
|
|
caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
|
|
if param.global_pooling:
|
|
AddArgument(caffe_op, "global_pooling", 1)
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Pooling3D")
|
|
def TranslatePool3D(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.pooling3d_param
|
|
if param.pool == caffe_pb2.Pooling3DParameter.MAX:
|
|
caffe_op = BaseTranslate(layer, "MaxPool")
|
|
|
|
elif param.pool == caffe_pb2.Pooling3DParameter.AVE:
|
|
caffe_op = BaseTranslate(layer, "AveragePool")
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
AddArgument(
|
|
caffe_op,
|
|
"kernels",
|
|
[param.kernel_depth, param.kernel_size, param.kernel_size])
|
|
|
|
AddArgument(
|
|
caffe_op,
|
|
"strides",
|
|
[param.temporal_stride, param.stride, param.stride])
|
|
temporal_pad = 0
|
|
spatial_pad = 0
|
|
if hasattr(param, 'temporal_pad'):
|
|
temporal_pad = param.temporal_pad
|
|
if hasattr(param, 'pad'):
|
|
spatial_pad = param.pad
|
|
AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("LRN")
|
|
def TranslateLRN(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "LRN")
|
|
caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
|
|
param = layer.lrn_param
|
|
if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
|
|
raise ValueError(
|
|
"Does not support norm region other than across channels.")
|
|
AddArgument(caffe_op, "size", int(param.local_size))
|
|
AddArgument(caffe_op, "alpha", float(param.alpha))
|
|
AddArgument(caffe_op, "beta", float(param.beta))
|
|
AddArgument(caffe_op, "bias", float(param.k))
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("InnerProduct")
|
|
def TranslateInnerProduct(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.inner_product_param
|
|
try:
|
|
if param.axis != 1 or param.transpose:
|
|
raise ValueError(
|
|
"We don't have testing case for non-default axis and transpose "
|
|
"cases yet so we are disabling it for now. If you have a model "
|
|
"with this, please do send us your model for us to update this "
|
|
"support, and you are more than welcome to send a PR for this.")
|
|
except AttributeError:
|
|
# We might be using an historic Caffe protobuf that does not have axis
|
|
# and transpose arguments, so we will silently pass.
|
|
pass
|
|
caffe_op = BaseTranslate(layer, "FC")
|
|
output = caffe_op.output[0]
|
|
caffe_op.input.extend([output + '_w', output + '_b'])
|
|
# To provide the old-style 4-dimensional blob (1, 1, dim_output, dim_input)
|
|
# case, we always explicitly reshape the pretrained blob.
|
|
if pretrained_blobs[0].ndim not in [2, 4]:
|
|
raise ValueError("Unexpected weight ndim.")
|
|
if (pretrained_blobs[0].ndim == 4 and
|
|
list(pretrained_blobs[0].shape[:2]) != [1, 1]):
|
|
raise ValueError(
|
|
"If pretrained blob has 4 dims (old-style Caffe), the first two "
|
|
"should be of value 1, but I got " + str(pretrained_blobs[0].shape))
|
|
weight = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[0].reshape(-1, pretrained_blobs[0].shape[-1]),
|
|
output + '_w'
|
|
)
|
|
bias = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), output + '_b'
|
|
)
|
|
return caffe_op, [weight, bias]
|
|
|
|
|
|
@TranslatorRegistry.Register("Dropout")
|
|
def TranslateDropout(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Dropout")
|
|
caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
|
|
param = layer.dropout_param
|
|
AddArgument(caffe_op, "ratio", param.dropout_ratio)
|
|
if (is_test):
|
|
AddArgument(caffe_op, "is_test", 1)
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Softmax")
|
|
def TranslateSoftmax(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Softmax")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("SoftmaxWithLoss")
|
|
def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test, **kwargs):
|
|
softmax_op = core.CreateOperator(
|
|
"Softmax", [layer.bottom[0]],
|
|
layer.bottom[0] + "_translator_autogen_softmax")
|
|
xent_op = core.CreateOperator(
|
|
"LabelCrossEntropy",
|
|
[softmax_op.output[0], layer.bottom[1]],
|
|
layer.bottom[0] + "_translator_autogen_xent")
|
|
loss_op = core.CreateOperator(
|
|
"AveragedLoss",
|
|
xent_op.output[0],
|
|
layer.top[0])
|
|
return [softmax_op, xent_op, loss_op], []
|
|
|
|
|
|
@TranslatorRegistry.Register("Accuracy")
|
|
def TranslateAccuracy(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Accuracy")
|
|
if layer.accuracy_param.top_k != 1:
|
|
AddArgument(caffe_op, "top_k", layer.accuracy_param.top_k)
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Concat")
|
|
def TranslateConcat(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Concat")
|
|
caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("TanH")
|
|
def TranslateTanH(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Tanh")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("InstanceNorm")
|
|
def TranslateInstanceNorm(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "InstanceNorm")
|
|
output = caffe_op.output[0]
|
|
weight = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[0].flatten(), output + '_w')
|
|
bias = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), output + '_b')
|
|
caffe_op.input.extend([output + '_w', output + '_b'])
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
return caffe_op, [weight, bias]
|
|
|
|
|
|
@TranslatorRegistry.Register("BatchNorm")
|
|
def TranslateBatchNorm(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "SpatialBN")
|
|
output = caffe_op.output[0]
|
|
param = layer.batch_norm_param
|
|
AddArgument(caffe_op, "is_test", is_test)
|
|
AddArgument(caffe_op, "epsilon", param.eps)
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
|
|
caffe_op.input.extend(
|
|
[output + "_scale",
|
|
output + "_bias",
|
|
output + "_mean",
|
|
output + "_var"])
|
|
if not is_test:
|
|
caffe_op.output.extend(
|
|
[output + "_mean",
|
|
output + "_var",
|
|
output + "_saved_mean",
|
|
output + "_saved_var"])
|
|
|
|
n_channels = pretrained_blobs[0].shape[0]
|
|
if pretrained_blobs[2][0] != 0:
|
|
mean = utils.NumpyArrayToCaffe2Tensor(
|
|
(1. / pretrained_blobs[2][0]) * pretrained_blobs[0],
|
|
output + '_mean')
|
|
var = utils.NumpyArrayToCaffe2Tensor(
|
|
(1. / pretrained_blobs[2][0]) * pretrained_blobs[1],
|
|
output + '_var')
|
|
else:
|
|
raise RuntimeError("scalar is zero.")
|
|
if len(pretrained_blobs) > 3:
|
|
# IntelCaffe and NVCaffe uses fused BN+Scale,
|
|
# three blobs for BN and two blobs for Scale,
|
|
# so that the total number of blobs becomes five (including scale and bias).
|
|
scale = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[3].flatten(),
|
|
output + '_scale')
|
|
bias = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[4].flatten(),
|
|
output + '_bias')
|
|
else:
|
|
pretrained_blobs[2][0] = 1
|
|
pretrained_blobs[2] = np.tile(pretrained_blobs[2], (n_channels, ))
|
|
scale = utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[2],
|
|
output + '_scale')
|
|
bias = utils.NumpyArrayToCaffe2Tensor(
|
|
np.zeros_like(pretrained_blobs[2]),
|
|
output + '_bias')
|
|
|
|
return caffe_op, [scale, bias, mean, var]
|
|
|
|
|
|
@TranslatorRegistry.Register("Eltwise")
|
|
def TranslateElementWise(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.eltwise_param
|
|
# TODO(jiayq): if we have a protobuf that uses this, lift this constraint
|
|
# and verify that we can correctly translate.
|
|
if len(param.coeff) or param.operation != 1:
|
|
raise RuntimeError("This eltwise layer is not yet supported.")
|
|
caffe_op = BaseTranslate(layer, "Sum")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Scale")
|
|
def TranslateScale(layer, pretrained_blobs, is_test, **kwargs):
|
|
mul_op = BaseTranslate(layer, "Mul")
|
|
scale_param = layer.scale_param
|
|
AddArgument(mul_op, "axis", scale_param.axis)
|
|
AddArgument(mul_op, "broadcast", True)
|
|
if len(mul_op.input) == 1:
|
|
# the scale parameter is in pretrained blobs
|
|
if scale_param.num_axes != 1:
|
|
raise RuntimeError("This path has not been verified yet.")
|
|
|
|
output = mul_op.output[0]
|
|
mul_op_param = output + 'scale_w'
|
|
mul_op.input.append(mul_op_param)
|
|
weights = []
|
|
weights.append(utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[0].flatten(), mul_op_param))
|
|
|
|
add_op = None
|
|
if len(pretrained_blobs) == 1:
|
|
# No bias-term in Scale layer
|
|
pass
|
|
elif len(pretrained_blobs) == 2:
|
|
# Caffe Scale layer supports a bias term such that it computes
|
|
# (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
|
|
# Include a separate Add op for the bias followed by Mul.
|
|
add_op = copy.deepcopy(mul_op)
|
|
add_op.type = "Add"
|
|
add_op_param = output + 'scale_b'
|
|
internal_blob = output + "_internal"
|
|
del mul_op.output[:]
|
|
mul_op.output.append(internal_blob)
|
|
del add_op.input[:]
|
|
add_op.input.append(internal_blob)
|
|
add_op.input.append(add_op_param)
|
|
weights.append(utils.NumpyArrayToCaffe2Tensor(
|
|
pretrained_blobs[1].flatten(), add_op_param))
|
|
else:
|
|
raise RuntimeError("Unexpected number of pretrained blobs in Scale")
|
|
|
|
caffe_ops = [mul_op]
|
|
if add_op:
|
|
caffe_ops.append(add_op)
|
|
assert len(caffe_ops) == len(weights)
|
|
return caffe_ops, weights
|
|
elif len(mul_op.input) == 2:
|
|
# TODO(jiayq): find a protobuf that uses this and verify.
|
|
raise RuntimeError("This path has not been verified yet.")
|
|
else:
|
|
raise RuntimeError("Unexpected number of inputs.")
|
|
|
|
|
|
@TranslatorRegistry.Register("Reshape")
|
|
def TranslateReshape(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Reshape")
|
|
caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
|
|
reshape_param = layer.reshape_param
|
|
AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Flatten")
|
|
def TranslateFlatten(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.flatten_param
|
|
if param.end_axis != -1:
|
|
raise NotImplementedError("flatten_param.end_axis not supported yet.")
|
|
|
|
if param.axis == 0:
|
|
caffe_op = BaseTranslate(layer, "FlattenToVec")
|
|
elif param.axis == 1:
|
|
caffe_op = BaseTranslate(layer, "Flatten")
|
|
else:
|
|
# This could be a Reshape op, but dim size is not known here.
|
|
raise NotImplementedError(
|
|
"Not supported yet for flatten_param.axis {}.".format(param.axis))
|
|
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("Sigmoid")
|
|
def TranslateSigmoid(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "Sigmoid")
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("ROIPooling")
|
|
def TranslateROIPooling(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "RoIPool")
|
|
AddArgument(caffe_op, "order", "NCHW")
|
|
|
|
if is_test:
|
|
AddArgument(caffe_op, "is_test", is_test)
|
|
else:
|
|
# Only used for gradient computation
|
|
caffe_op.output.append(caffe_op.output[0] + '_argmaxes')
|
|
|
|
param = layer.roi_pooling_param
|
|
if param.HasField('pooled_h'):
|
|
AddArgument(caffe_op, 'pooled_h', param.pooled_h)
|
|
if param.HasField('pooled_w'):
|
|
AddArgument(caffe_op, 'pooled_w', param.pooled_w)
|
|
if param.HasField('spatial_scale'):
|
|
AddArgument(caffe_op, 'spatial_scale', param.spatial_scale)
|
|
|
|
return caffe_op, []
|
|
|
|
|
|
@TranslatorRegistry.Register("PReLU")
|
|
def TranslatePRelu(layer, pretrained_blobs, is_test, **kwargs):
|
|
caffe_op = BaseTranslate(layer, "PRelu")
|
|
output = caffe_op.output[0]
|
|
caffe_op.input.extend([output + '_Slope'])
|
|
slope = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_Slope')
|
|
|
|
return caffe_op, [slope]
|
|
|
|
|
|
@TranslatorRegistry.Register("Reduction")
|
|
def TranslateReduction(layer, pretrained_blobs, is_test, **kwargs):
|
|
param = layer.reduction_param
|
|
if param.operation == caffe_pb2.ReductionParameter.SUM:
|
|
caffe_op = BaseTranslate(layer, "ReduceBackSum")
|
|
elif param.operation == caffe_pb2.ReductionParameter.MEAN:
|
|
caffe_op = BaseTranslate(layer, "ReduceBackMean")
|
|
else:
|
|
raise NotImplementedError("Not yet supported")
|
|
|
|
if param.axis > 0:
|
|
# We can't figure out the number of dims to reduce from positive axis
|
|
# for back reduction since the shape info is not known here.
|
|
raise NotImplementedError("Not yet supported")
|
|
num_reduce_dim = -param.axis
|
|
AddArgument(caffe_op, "num_reduce_dim", num_reduce_dim)
|
|
|
|
return caffe_op, []
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
description="Utilitity to convert pretrained caffe models to Caffe2 models.")
|
|
parser.add_argument("prototext", help="Caffe prototext.")
|
|
parser.add_argument("caffemodel", help="Caffe trained model.")
|
|
parser.add_argument("--init_net", help="Caffe2 initialization net.",
|
|
default="init_net.pb")
|
|
parser.add_argument("--predict_net", help="Caffe2 prediction net.",
|
|
default="predict_net.pb")
|
|
parser.add_argument("--remove_legacy_pad", help="Remove legacy pad \
|
|
(Only works for nets with one input blob)",
|
|
action="store_true",
|
|
default=False)
|
|
parser.add_argument("--input_dims", help="Dimension of input blob", nargs='+',
|
|
type=int, default=[])
|
|
args = parser.parse_args()
|
|
|
|
caffenet = caffe_pb2.NetParameter()
|
|
caffenet_pretrained = caffe_pb2.NetParameter()
|
|
input_proto = args.prototext
|
|
input_caffemodel = args.caffemodel
|
|
output_init_net = args.init_net
|
|
output_predict_net = args.predict_net
|
|
|
|
with open(input_proto) as f:
|
|
text_format.Merge(f.read(), caffenet)
|
|
with open(input_caffemodel, 'rb') as f:
|
|
caffenet_pretrained.ParseFromString(f.read())
|
|
net, pretrained_params = TranslateModel(
|
|
caffenet, caffenet_pretrained, is_test=True,
|
|
remove_legacy_pad=args.remove_legacy_pad,
|
|
input_dims=args.input_dims
|
|
)
|
|
|
|
# Assume there is one input and one output
|
|
external_input = net.op[0].input[0]
|
|
external_output = net.op[-1].output[0]
|
|
|
|
net.external_input.extend([external_input])
|
|
net.external_input.extend([param.name for param in pretrained_params.protos])
|
|
net.external_output.extend([external_output])
|
|
init_net = ConvertTensorProtosToInitNet(pretrained_params, external_input)
|
|
|
|
with open(output_predict_net, 'wb') as f:
|
|
f.write(net.SerializeToString())
|
|
with open(output_predict_net + 'txt', 'w') as f:
|
|
f.write(str(net))
|
|
with open(output_init_net, 'wb') as f:
|
|
f.write(init_net.SerializeToString())
|