mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13377 * Enable junk fill for the default CPU allocator. The first diff only enables this for the tests. A second diff will change the default of zero-fill to false. * Fix tests to use 64-bit counters that IterOp and LearningRateOp demands. * Fix kernels that uses uninitialized memory. Reviewed By: salexspb Differential Revision: D10866512 fbshipit-source-id: 17860e77e63a203edf46d0da0335608f77884821
65 lines
2.8 KiB
Python
65 lines
2.8 KiB
Python
import numpy as np
|
|
import unittest
|
|
|
|
from caffe2.python import core, workspace, test_util
|
|
|
|
|
|
class TestToyRegression(test_util.TestCase):
|
|
def testToyRegression(self):
|
|
"""Tests a toy regression end to end.
|
|
|
|
The test code carries a simple toy regression in the form
|
|
y = 2.0 x1 + 1.5 x2 + 0.5
|
|
by randomly generating gaussian inputs and calculating the ground
|
|
truth outputs in the net as well. It uses a standard SGD to then
|
|
train the parameters.
|
|
"""
|
|
workspace.ResetWorkspace()
|
|
init_net = core.Net("init")
|
|
W = init_net.UniformFill([], "W", shape=[1, 2], min=-1., max=1.)
|
|
B = init_net.ConstantFill([], "B", shape=[1], value=0.0)
|
|
W_gt = init_net.GivenTensorFill(
|
|
[], "W_gt", shape=[1, 2], values=[2.0, 1.5])
|
|
B_gt = init_net.GivenTensorFill([], "B_gt", shape=[1], values=[0.5])
|
|
LR = init_net.ConstantFill([], "LR", shape=[1], value=-0.1)
|
|
ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
|
|
ITER = init_net.ConstantFill([], "ITER", shape=[1], value=0,
|
|
dtype=core.DataType.INT64)
|
|
|
|
train_net = core.Net("train")
|
|
X = train_net.GaussianFill([], "X", shape=[64, 2], mean=0.0, std=1.0)
|
|
Y_gt = X.FC([W_gt, B_gt], "Y_gt")
|
|
Y_pred = X.FC([W, B], "Y_pred")
|
|
dist = train_net.SquaredL2Distance([Y_gt, Y_pred], "dist")
|
|
loss = dist.AveragedLoss([], ["loss"])
|
|
# Get gradients for all the computations above. Note that in fact we
|
|
# don't need to get the gradient the Y_gt computation, but we'll just
|
|
# leave it there. In many cases, I am expecting one to load X and Y
|
|
# from the disk, so there is really no operator that will calculate the
|
|
# Y_gt input.
|
|
input_to_grad = train_net.AddGradientOperators([loss], skip=2)
|
|
# updates
|
|
train_net.Iter(ITER, ITER)
|
|
train_net.LearningRate(ITER, "LR", base_lr=-0.1,
|
|
policy="step", stepsize=20, gamma=0.9)
|
|
train_net.WeightedSum([W, ONE, input_to_grad[str(W)], LR], W)
|
|
train_net.WeightedSum([B, ONE, input_to_grad[str(B)], LR], B)
|
|
for blob in [loss, W, B]:
|
|
train_net.Print(blob, [])
|
|
|
|
# the CPU part.
|
|
plan = core.Plan("toy_regression")
|
|
plan.AddStep(core.ExecutionStep("init", init_net))
|
|
plan.AddStep(core.ExecutionStep("train", train_net, 200))
|
|
|
|
workspace.RunPlan(plan)
|
|
W_result = workspace.FetchBlob("W")
|
|
B_result = workspace.FetchBlob("B")
|
|
np.testing.assert_array_almost_equal(W_result, [[2.0, 1.5]], decimal=2)
|
|
np.testing.assert_array_almost_equal(B_result, [0.5], decimal=2)
|
|
workspace.ResetWorkspace()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|