mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Summary: Sets the random seed at the start of C++ tests so that everything is super deterministic. I made sure we only generate random values from torch instead of `std::`, so that this seed always applies. I.e. I do: ``` torch::randint(2, {2}, at::kInt64) ``` instead of ``` std::rand() % 2 ``` Also got rid of the tests that test the random seeding, since it would interfere here. And the test is not useful since we just use ATen's seeding mechanism, which should work. Fixes #7288 #7286 #7289 ebetica ezyang Closes https://github.com/pytorch/pytorch/pull/8903 Differential Revision: D8667269 Pulled By: goldsborough fbshipit-source-id: a833e86e156d5e68dae8c53a4b1c433cb0608b6c
233 lines
6.4 KiB
C++
233 lines
6.4 KiB
C++
#include <catch.hpp>
|
|
|
|
#include <torch/nn/modules/linear.h>
|
|
#include <torch/nn/modules/rnn.h>
|
|
#include <torch/optim/adam.h>
|
|
#include <torch/tensor.h>
|
|
#include <torch/utils.h>
|
|
|
|
#include <test/cpp/api/util.h>
|
|
|
|
using namespace torch::nn;
|
|
|
|
template <typename R, typename Func>
|
|
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
|
|
torch::manual_seed(0);
|
|
|
|
auto nhid = 32;
|
|
auto model = std::make_shared<torch::SimpleContainer>();
|
|
auto l1 = model->add(Linear(1, nhid), "l1");
|
|
auto rnn = model->add(model_maker(nhid), "rnn");
|
|
auto lo = model->add(Linear(nhid, 1), "lo");
|
|
|
|
torch::optim::Adam optimizer(model->parameters(), 1e-2);
|
|
auto forward_op = [&](torch::Tensor x) {
|
|
auto T = x.size(0);
|
|
auto B = x.size(1);
|
|
x = x.view({T * B, 1});
|
|
x = l1->forward(x).view({T, B, nhid}).tanh_();
|
|
x = rnn->forward(x).output[T - 1];
|
|
x = lo->forward(x);
|
|
return x;
|
|
};
|
|
|
|
if (cuda) {
|
|
model->cuda();
|
|
}
|
|
|
|
float running_loss = 1;
|
|
int epoch = 0;
|
|
auto max_epoch = 1500;
|
|
while (running_loss > 1e-2) {
|
|
auto bs = 16U;
|
|
auto nlen = 5U;
|
|
|
|
const auto backend = cuda ? torch::kCUDA : torch::kCPU;
|
|
auto inputs =
|
|
torch::rand({nlen, bs, 1}, backend).round().toType(torch::kFloat32);
|
|
auto labels = inputs.sum(0).detach();
|
|
inputs.set_requires_grad(true);
|
|
|
|
auto outputs = forward_op(inputs);
|
|
torch::Tensor loss = torch::mse_loss(outputs, labels);
|
|
|
|
optimizer.zero_grad();
|
|
loss.backward();
|
|
optimizer.step();
|
|
|
|
running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01;
|
|
if (epoch > max_epoch) {
|
|
return false;
|
|
}
|
|
epoch++;
|
|
}
|
|
return true;
|
|
};
|
|
|
|
void check_lstm_sizes(RNNOutput output) {
|
|
// Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
|
|
// 10 and 16 time steps (10 x 16 x n)
|
|
|
|
REQUIRE(output.output.ndimension() == 3);
|
|
REQUIRE(output.output.size(0) == 10);
|
|
REQUIRE(output.output.size(1) == 16);
|
|
REQUIRE(output.output.size(2) == 64);
|
|
|
|
REQUIRE(output.state.ndimension() == 4);
|
|
REQUIRE(output.state.size(0) == 2); // (hx, cx)
|
|
REQUIRE(output.state.size(1) == 3); // layers
|
|
REQUIRE(output.state.size(2) == 16); // Batchsize
|
|
REQUIRE(output.state.size(3) == 64); // 64 hidden dims
|
|
|
|
// Something is in the hiddens
|
|
REQUIRE(output.state.norm().toCFloat() > 0);
|
|
}
|
|
|
|
TEST_CASE("rnn") {
|
|
torch::manual_seed(0);
|
|
SECTION("sizes") {
|
|
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
|
|
auto x = torch::randn({10, 16, 128}, torch::requires_grad());
|
|
auto output = model->forward(x);
|
|
auto y = x.mean();
|
|
|
|
y.backward();
|
|
check_lstm_sizes(output);
|
|
|
|
auto next = model->forward(x, output.state);
|
|
|
|
check_lstm_sizes(next);
|
|
|
|
torch::Tensor diff = next.state - output.state;
|
|
|
|
// Hiddens changed
|
|
REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
|
|
}
|
|
|
|
SECTION("outputs") {
|
|
// Make sure the outputs match pytorch outputs
|
|
LSTM model(2, 2);
|
|
for (auto& v : model->parameters()) {
|
|
float size = v->numel();
|
|
auto p = static_cast<float*>(v->data().storage()->data());
|
|
for (size_t i = 0; i < size; i++) {
|
|
p[i] = i / size;
|
|
}
|
|
}
|
|
|
|
auto x = torch::empty({3, 4, 2}, torch::requires_grad());
|
|
float size = x.data().numel();
|
|
auto p = static_cast<float*>(x.data().storage()->data());
|
|
for (size_t i = 0; i < size; i++) {
|
|
p[i] = (size - i) / size;
|
|
}
|
|
|
|
auto out = model->forward(x);
|
|
REQUIRE(out.output.ndimension() == 3);
|
|
REQUIRE(out.output.size(0) == 3);
|
|
REQUIRE(out.output.size(1) == 4);
|
|
REQUIRE(out.output.size(2) == 2);
|
|
|
|
auto flat = out.output.data().view(3 * 4 * 2);
|
|
float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
|
|
0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
|
|
0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
|
|
0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
|
|
for (size_t i = 0; i < 3 * 4 * 2; i++) {
|
|
REQUIRE(std::abs(flat[i].toCFloat() - c_out[i]) < 1e-3);
|
|
}
|
|
|
|
REQUIRE(out.state.ndimension() == 4); // (hx, cx) x layers x B x 2
|
|
REQUIRE(out.state.size(0) == 2);
|
|
REQUIRE(out.state.size(1) == 1);
|
|
REQUIRE(out.state.size(2) == 4);
|
|
REQUIRE(out.state.size(3) == 2);
|
|
flat = out.state.data().view(16);
|
|
float h_out[] = {0.7889,
|
|
0.9003,
|
|
0.7769,
|
|
0.8905,
|
|
0.7635,
|
|
0.8794,
|
|
0.7484,
|
|
0.8666,
|
|
1.1647,
|
|
1.6106,
|
|
1.1425,
|
|
1.5726,
|
|
1.1187,
|
|
1.5329,
|
|
1.0931,
|
|
1.4911};
|
|
for (size_t i = 0; i < 16; i++) {
|
|
REQUIRE(std::abs(flat[i].toCFloat() - h_out[i]) < 1e-3);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/LSTM") {
|
|
REQUIRE(test_RNN_xor<LSTM>(
|
|
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/GRU") {
|
|
REQUIRE(
|
|
test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/RNN") {
|
|
SECTION("relu") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
|
|
}
|
|
SECTION("tanh") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("rnn_cuda", "[cuda]") {
|
|
SECTION("sizes") {
|
|
torch::manual_seed(0);
|
|
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
|
|
model->cuda();
|
|
auto x = torch::randn(
|
|
{10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
|
|
auto output = model->forward(x);
|
|
auto y = x.mean();
|
|
|
|
y.backward();
|
|
check_lstm_sizes(output);
|
|
|
|
auto next = model->forward(x, output.state);
|
|
|
|
check_lstm_sizes(next);
|
|
|
|
torch::Tensor diff = next.state - output.state;
|
|
|
|
// Hiddens changed
|
|
REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
|
|
}
|
|
|
|
SECTION("lstm") {
|
|
REQUIRE(test_RNN_xor<LSTM>(
|
|
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
|
|
}
|
|
|
|
SECTION("gru") {
|
|
REQUIRE(test_RNN_xor<GRU>(
|
|
[](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
|
|
}
|
|
|
|
SECTION("rnn") {
|
|
SECTION("relu") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true));
|
|
}
|
|
SECTION("tanh") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true));
|
|
}
|
|
}
|
|
}
|