From d0999451eb582519e4548bc5f50f6e0e6cb51d5b Mon Sep 17 00:00:00 2001 From: Vivek Panyam Date: Wed, 27 May 2020 00:15:30 -0400 Subject: [PATCH] [WIP][Torch] Seeds / Determinism --- .../backends/torchscript/torch_backend.cc | 9 +++++++++ source/neuropod/multiprocess/multiprocess.cc | 5 +++++ .../multiprocess/multiprocess_worker.cc | 7 ++++++- source/neuropod/multiprocess/ope_load_config.cc | 8 ++++++++ source/neuropod/multiprocess/ope_load_config.hh | 5 +++++ source/neuropod/options.hh | 17 +++++++++++++++++ 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/source/neuropod/backends/torchscript/torch_backend.cc b/source/neuropod/backends/torchscript/torch_backend.cc index da19d735..b21f2662 100644 --- a/source/neuropod/backends/torchscript/torch_backend.cc +++ b/source/neuropod/backends/torchscript/torch_backend.cc @@ -233,6 +233,9 @@ TorchNeuropodBackend::TorchNeuropodBackend(const std::string &neuropod_path, con void TorchNeuropodBackend::load_model_internal() { + at::globalContext().setDeterministicCuDNN(options_.torch_cudnn_deterministic); + at::globalContext().setBenchmarkCuDNN(!options_.torch_cudnn_benchmark); + // Get the model from the neuropod auto graph_stream = loader_->get_istream_for_file("0/data/model.pt"); @@ -297,6 +300,12 @@ std::unique_ptr TorchNeuropodBackend::infer_internal(const Neu { torch::NoGradGuard guard; + // Seed if we need to + if (options_.seed >= 0) + { + torch::manual_seed(options_.seed); + } + // Get inference schema const auto &method = model_->get_method("forward"); const auto &schema = SCHEMA(method); diff --git a/source/neuropod/multiprocess/multiprocess.cc b/source/neuropod/multiprocess/multiprocess.cc index b5f68107..f08631fa 100644 --- a/source/neuropod/multiprocess/multiprocess.cc +++ b/source/neuropod/multiprocess/multiprocess.cc @@ -190,6 +190,11 @@ class MultiprocessNeuropodBackend : public NeuropodBackendWithDefaultAllocator(config.neuropod_path, config.default_backend_overrides); + neuropod = stdx::make_unique(config.neuropod_path, config.default_backend_overrides, opts); allocator = neuropod->get_tensor_allocator(); inputs.clear(); control_channel.send_message(LOAD_SUCCESS); diff --git a/source/neuropod/multiprocess/ope_load_config.cc b/source/neuropod/multiprocess/ope_load_config.cc index 0c88078e..b1882966 100644 --- a/source/neuropod/multiprocess/ope_load_config.cc +++ b/source/neuropod/multiprocess/ope_load_config.cc @@ -40,6 +40,10 @@ void ipc_serialize(std::ostream &out, const ope_load_config &data) { ipc_serialize(out, data.neuropod_path); ipc_serialize(out, data.default_backend_overrides); + + ipc_serialize(out, data.seed); + ipc_serialize(out, data.torch_cudnn_deterministic); + ipc_serialize(out, data.torch_cudnn_benchmark); } template <> @@ -47,6 +51,10 @@ void ipc_deserialize(std::istream &in, ope_load_config &data) { ipc_deserialize(in, data.neuropod_path); ipc_deserialize(in, data.default_backend_overrides); + + ipc_deserialize(in, data.seed); + ipc_deserialize(in, data.torch_cudnn_deterministic); + ipc_deserialize(in, data.torch_cudnn_benchmark); } } // namespace neuropod diff --git a/source/neuropod/multiprocess/ope_load_config.hh b/source/neuropod/multiprocess/ope_load_config.hh index 71af4edb..b4121b89 100644 --- a/source/neuropod/multiprocess/ope_load_config.hh +++ b/source/neuropod/multiprocess/ope_load_config.hh @@ -29,6 +29,11 @@ struct ope_load_config // See the docs in `neuropod.hh` std::vector default_backend_overrides; + + // Fields from options.hh that are relevant to the worker process + int64_t seed; + bool torch_cudnn_deterministic; + bool torch_cudnn_benchmark; }; // Serialization specializations for ope_load_config diff --git a/source/neuropod/options.hh b/source/neuropod/options.hh index 0d1342a0..5c36aa48 100644 --- a/source/neuropod/options.hh +++ b/source/neuropod/options.hh @@ -72,6 +72,23 @@ struct RuntimeOptions // immediately loading the model. If this is set to `false`, the model will // not be loaded until the `load_model` method is called on the Neuropod. bool load_model_at_construction = true; + + // EXPERIMENTAL + // A seed to use when running a graph + // Note: this currently only applies to TorchScript models + int64_t seed = -1; + + // EXPERIMENTAL + // Whether or not to run in deterministic mode. See https://pytorch.org/docs/stable/notes/randomness.html#cudnn + // Note: this currently only applies to TorchScript models and affects all torchscript models in the process. + // Should only be used with OPE to avoid this issue. + bool torch_cudnn_deterministic = false; + + // EXPERIMENTAL + // Whether or not to enable cudnn benchmark. See https://pytorch.org/docs/stable/notes/randomness.html#cudnn + // Note: this currently only applies to TorchScript models and affects all torchscript models in the process. + // Should only be used with OPE to avoid this issue. + bool torch_cudnn_benchmark = false; }; } // namespace neuropod