[WIP][Torch] Seeds / Determinism

uber · May 27, 2020 · d099945 · d099945
1 parent 40fb3b3
commit d099945
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 1 deletion.
diff --git a/source/neuropod/backends/torchscript/torch_backend.cc b/source/neuropod/backends/torchscript/torch_backend.cc
@@ -233,6 +233,9 @@ TorchNeuropodBackend::TorchNeuropodBackend(const std::string &neuropod_path, con
 
 void TorchNeuropodBackend::load_model_internal()
 {
+    at::globalContext().setDeterministicCuDNN(options_.torch_cudnn_deterministic);
+    at::globalContext().setBenchmarkCuDNN(!options_.torch_cudnn_benchmark);
+
     // Get the model from the neuropod
     auto graph_stream = loader_->get_istream_for_file("0/data/model.pt");
 
@@ -297,6 +300,12 @@ std::unique_ptr<NeuropodValueMap> TorchNeuropodBackend::infer_internal(const Neu
 {
     torch::NoGradGuard guard;
 
+    // Seed if we need to
+    if (options_.seed >= 0)
+    {
+        torch::manual_seed(options_.seed);
+    }
+
     // Get inference schema
     const auto &method    = model_->get_method("forward");
     const auto &schema    = SCHEMA(method);

diff --git a/source/neuropod/multiprocess/multiprocess.cc b/source/neuropod/multiprocess/multiprocess.cc
@@ -190,6 +190,11 @@ class MultiprocessNeuropodBackend : public NeuropodBackendWithDefaultAllocator<S
         load_config_.neuropod_path             = neuropod_path_;
         load_config_.default_backend_overrides = default_backend_overrides;
 
+        // Options we want to pass to the worker process
+        load_config_.seed                      = options_.seed;
+        load_config_.torch_cudnn_deterministic = options_.torch_cudnn_deterministic;
+        load_config_.torch_cudnn_benchmark     = options_.torch_cudnn_benchmark;
+
         if (options.load_model_at_construction)
         {
             load_model();

diff --git a/source/neuropod/multiprocess/multiprocess_worker.cc b/source/neuropod/multiprocess/multiprocess_worker.cc
@@ -57,8 +57,13 @@ void multiprocess_worker_loop(const std::string &control_queue_name)
                 ope_load_config config;
                 received.get(config);
 
+                RuntimeOptions opts;
+                opts.seed                      = config.seed;
+                opts.torch_cudnn_deterministic = config.torch_cudnn_deterministic;
+                opts.torch_cudnn_benchmark     = config.torch_cudnn_benchmark;
+
                 // Load a neuropod
-                neuropod  = stdx::make_unique<Neuropod>(config.neuropod_path, config.default_backend_overrides);
+                neuropod  = stdx::make_unique<Neuropod>(config.neuropod_path, config.default_backend_overrides, opts);
                 allocator = neuropod->get_tensor_allocator();
                 inputs.clear();
                 control_channel.send_message(LOAD_SUCCESS);

diff --git a/source/neuropod/multiprocess/ope_load_config.cc b/source/neuropod/multiprocess/ope_load_config.cc
@@ -40,13 +40,21 @@ void ipc_serialize(std::ostream &out, const ope_load_config &data)
 {
     ipc_serialize(out, data.neuropod_path);
     ipc_serialize(out, data.default_backend_overrides);
+
+    ipc_serialize(out, data.seed);
+    ipc_serialize(out, data.torch_cudnn_deterministic);
+    ipc_serialize(out, data.torch_cudnn_benchmark);
 }
 
 template <>
 void ipc_deserialize(std::istream &in, ope_load_config &data)
 {
     ipc_deserialize(in, data.neuropod_path);
     ipc_deserialize(in, data.default_backend_overrides);
+
+    ipc_deserialize(in, data.seed);
+    ipc_deserialize(in, data.torch_cudnn_deterministic);
+    ipc_deserialize(in, data.torch_cudnn_benchmark);
 }
 
 } // namespace neuropod
diff --git a/source/neuropod/multiprocess/ope_load_config.hh b/source/neuropod/multiprocess/ope_load_config.hh
@@ -29,6 +29,11 @@ struct ope_load_config
 
     // See the docs in `neuropod.hh`
     std::vector<BackendLoadSpec> default_backend_overrides;
+
+    // Fields from options.hh that are relevant to the worker process
+    int64_t seed;
+    bool    torch_cudnn_deterministic;
+    bool    torch_cudnn_benchmark;
 };
 
 // Serialization specializations for ope_load_config

diff --git a/source/neuropod/options.hh b/source/neuropod/options.hh
@@ -72,6 +72,23 @@ struct RuntimeOptions
     // immediately loading the model. If this is set to `false`, the model will
     // not be loaded until the `load_model` method is called on the Neuropod.
     bool load_model_at_construction = true;
+
+    // EXPERIMENTAL
+    // A seed to use when running a graph
+    // Note: this currently only applies to TorchScript models
+    int64_t seed = -1;
+
+    // EXPERIMENTAL
+    // Whether or not to run in deterministic mode. See https://pytorch.org/docs/stable/notes/randomness.html#cudnn
+    // Note: this currently only applies to TorchScript models and affects all torchscript models in the process.
+    // Should only be used with OPE to avoid this issue.
+    bool torch_cudnn_deterministic = false;
+
+    // EXPERIMENTAL
+    // Whether or not to enable cudnn benchmark. See https://pytorch.org/docs/stable/notes/randomness.html#cudnn
+    // Note: this currently only applies to TorchScript models and affects all torchscript models in the process.
+    // Should only be used with OPE to avoid this issue.
+    bool torch_cudnn_benchmark = false;
 };
 
 } // namespace neuropod