pytorch/test/cpp/api/rnn.cpp

#include <catch.hpp>

#include <torch/nn/modules/linear.h>
#include <torch/nn/modules/rnn.h>
#include <torch/optim/adam.h>
#include <torch/tensor.h>
#include <torch/utils.h>

#include <test/cpp/api/util.h>

using namespace torch::nn;

template <typename R, typename Func>
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
  torch::manual_seed(0);

  auto nhid = 32;
  auto model = std::make_shared<torch::SimpleContainer>();
  auto l1 = model->add(Linear(1, nhid), "l1");
  auto rnn = model->add(model_maker(nhid), "rnn");
  auto lo = model->add(Linear(nhid, 1), "lo");

  torch::optim::Adam optimizer(model->parameters(), 1e-2);
  auto forward_op = [&](torch::Tensor x) {
    auto T = x.size(0);
    auto B = x.size(1);
    x = x.view({T * B, 1});
    x = l1->forward(x).view({T, B, nhid}).tanh_();
    x = rnn->forward(x).output[T - 1];
    x = lo->forward(x);
    return x;
  };

  if (cuda) {
    model->cuda();
  }

  float running_loss = 1;
  int epoch = 0;
  auto max_epoch = 1500;
  while (running_loss > 1e-2) {
    auto bs = 16U;
    auto nlen = 5U;

    const auto backend = cuda ? torch::kCUDA : torch::kCPU;
    auto inputs =
        torch::rand({nlen, bs, 1}, backend).round().toType(torch::kFloat32);
    auto labels = inputs.sum(0).detach();
    inputs.set_requires_grad(true);

    auto outputs = forward_op(inputs);
    torch::Tensor loss = torch::mse_loss(outputs, labels);

    optimizer.zero_grad();
    loss.backward();
    optimizer.step();

    running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01;
    if (epoch > max_epoch) {
      return false;
    }
    epoch++;
  }
  return true;
};

void check_lstm_sizes(RNNOutput output) {
  // Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
  // 10 and 16 time steps (10 x 16 x n)

  REQUIRE(output.output.ndimension() == 3);
  REQUIRE(output.output.size(0) == 10);
  REQUIRE(output.output.size(1) == 16);
  REQUIRE(output.output.size(2) == 64);

  REQUIRE(output.state.ndimension() == 4);
  REQUIRE(output.state.size(0) == 2); // (hx, cx)
  REQUIRE(output.state.size(1) == 3); // layers
  REQUIRE(output.state.size(2) == 16); // Batchsize
  REQUIRE(output.state.size(3) == 64); // 64 hidden dims

  // Something is in the hiddens
  REQUIRE(output.state.norm().toCFloat() > 0);
}

TEST_CASE("rnn") {
  torch::manual_seed(0);
  SECTION("sizes") {
    LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
    auto x = torch::randn({10, 16, 128}, torch::requires_grad());
    auto output = model->forward(x);
    auto y = x.mean();

    y.backward();
    check_lstm_sizes(output);

    auto next = model->forward(x, output.state);

    check_lstm_sizes(next);

    torch::Tensor diff = next.state - output.state;

    // Hiddens changed
    REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
  }

  SECTION("outputs") {
    // Make sure the outputs match pytorch outputs
    LSTM model(2, 2);
    for (auto& v : model->parameters()) {
      float size = v->numel();
      auto p = static_cast<float*>(v->data().storage()->data());
      for (size_t i = 0; i < size; i++) {
        p[i] = i / size;
      }
    }

    auto x = torch::empty({3, 4, 2}, torch::requires_grad());
    float size = x.data().numel();
    auto p = static_cast<float*>(x.data().storage()->data());
    for (size_t i = 0; i < size; i++) {
      p[i] = (size - i) / size;
    }

    auto out = model->forward(x);
    REQUIRE(out.output.ndimension() == 3);
    REQUIRE(out.output.size(0) == 3);
    REQUIRE(out.output.size(1) == 4);
    REQUIRE(out.output.size(2) == 2);

    auto flat = out.output.data().view(3 * 4 * 2);
    float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
                     0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
                     0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
                     0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
    for (size_t i = 0; i < 3 * 4 * 2; i++) {
      REQUIRE(std::abs(flat[i].toCFloat() - c_out[i]) < 1e-3);
    }

    REQUIRE(out.state.ndimension() == 4); // (hx, cx) x layers x B x 2
    REQUIRE(out.state.size(0) == 2);
    REQUIRE(out.state.size(1) == 1);
    REQUIRE(out.state.size(2) == 4);
    REQUIRE(out.state.size(3) == 2);
    flat = out.state.data().view(16);
    float h_out[] = {0.7889,
                     0.9003,
                     0.7769,
                     0.8905,
                     0.7635,
                     0.8794,
                     0.7484,
                     0.8666,
                     1.1647,
                     1.6106,
                     1.1425,
                     1.5726,
                     1.1187,
                     1.5329,
                     1.0931,
                     1.4911};
    for (size_t i = 0; i < 16; i++) {
      REQUIRE(std::abs(flat[i].toCFloat() - h_out[i]) < 1e-3);
    }
  }
}

TEST_CASE("rnn/integration/LSTM") {
  REQUIRE(test_RNN_xor<LSTM>(
      [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
}

TEST_CASE("rnn/integration/GRU") {
  REQUIRE(
      test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
}

TEST_CASE("rnn/integration/RNN") {
  SECTION("relu") {
    REQUIRE(test_RNN_xor<RNN>(
        [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
  }
  SECTION("tanh") {
    REQUIRE(test_RNN_xor<RNN>(
        [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
  }
}

TEST_CASE("rnn_cuda", "[cuda]") {
  SECTION("sizes") {
    torch::manual_seed(0);
    LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
    model->cuda();
    auto x = torch::randn(
        {10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
    auto output = model->forward(x);
    auto y = x.mean();

    y.backward();
    check_lstm_sizes(output);

    auto next = model->forward(x, output.state);

    check_lstm_sizes(next);

    torch::Tensor diff = next.state - output.state;

    // Hiddens changed
    REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
  }

  SECTION("lstm") {
    REQUIRE(test_RNN_xor<LSTM>(
        [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
  }

  SECTION("gru") {
    REQUIRE(test_RNN_xor<GRU>(
        [](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
  }

  SECTION("rnn") {
    SECTION("relu") {
      REQUIRE(test_RNN_xor<RNN>(
          [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true));
    }
    SECTION("tanh") {
      REQUIRE(test_RNN_xor<RNN>(
          [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true));
    }
  }
}