Revert "[BE] Modify PyObjectSlot the assume only a single interpreter is in use (pytorch#158407)"

pytorchmergebot · pytorchmergebot · commit 5620e617c9ef · 2025-07-25T16:09:39.000Z
This reverts commit 255c054. Reverted pytorch#158407 on behalf of https://github.com/ZainRizvi due to Reverting as per offline discussion to fix internal breaks. @PaliC will reland this as a codev diff. Instructions here: https://fburl.com/fixing-ghfirst-reverts ([comment](pytorch#158288 (comment)))
diff --git a/c10/core/impl/PyObjectSlot.cpp b/c10/core/impl/PyObjectSlot.cpp
@@ -44,7 +44,19 @@ PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
   if (interpreter) {
     return *interpreter;
   }
-  TORCH_CHECK(false, "cannot access PyObject for Tensor - no interpreter set");
+  TORCH_CHECK(
+      false,
+      "cannot access PyObject for Tensor on interpreter ",
+      (*pyobj_interpreter_.load())->name());
+}
+
+bool PyObjectSlot::check_interpreter(PyInterpreter* interpreter) {
+  return interpreter == pyobj_interpreter();
+}
+
+bool PyObjectSlot::has_pyobj_nonhermetic() {
+  return check_pyobj(pyobj_interpreter(), /*ignore_hermetic_tls=*/true)
+      .has_value();
 }
 
 bool PyObjectSlot::owns_pyobj() {
diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h
@@ -28,7 +28,48 @@ struct C10_API PyObjectSlot {
       PyInterpreter* self_interpreter,
       PyObject* pyobj,
       PyInterpreterStatus status) {
-    pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed);
+    impl::PyInterpreter* expected = nullptr;
+    switch (status) {
+      case impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED:
+        // caller guarantees there is no multithreaded access; if there is
+        // no data race OK to do a relaxed store
+        pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed);
+        break;
+      case impl::PyInterpreterStatus::TAGGED_BY_US:
+        // no tagging is necessary, the tag is already correct
+        break;
+      case impl::PyInterpreterStatus::MAYBE_UNINITIALIZED:
+        // attempt to claim this TensorImpl with the specified interpreter
+        // tag
+        if (pyobj_interpreter_.compare_exchange_strong(
+                expected, self_interpreter, std::memory_order_acq_rel)) {
+          break;
+        }
+        // test if, actually, it was already tagged by us!  this situation can't
+        // be caused by a race, but it could be caused by a situation
+        // where someone conservatively tagged the tensor as MAYBE_UNINITIALIZED
+        // (because they didn't pre-check the tag) when actually it was
+        // owned by the interpreter
+        if (expected == self_interpreter) {
+          break;
+        }
+        // fallthrough, we lost the race.  We are guaranteed not to lose the
+        // race with ourself, as calls to init_pyobj with the same interpreter
+        // ID must be sequentialized by the GIL
+        [[fallthrough]];
+      case impl::PyInterpreterStatus::TAGGED_BY_OTHER:
+        TORCH_CHECK(
+            false,
+            "cannot allocate PyObject for Tensor on interpreter ",
+            self_interpreter,
+            " that has already been used by another torch deploy interpreter ",
+            pyobj_interpreter_.load());
+    }
+
+    // we are the ONLY thread that can have gotten to this point.  It is not
+    // possible to conflict with another zero interpreter as access is protected
+    // by GIL
+    // NB: owns_pyobj tag is initially false
     pyobj_ = pyobj;
   }
 
@@ -56,16 +97,30 @@ struct C10_API PyObjectSlot {
   std::optional<PyObject*> check_pyobj(
       PyInterpreter* self_interpreter,
       bool ignore_hermetic_tls = false) const {
+    // Note [Memory ordering on Python interpreter tag]
     impl::PyInterpreter* interpreter =
         pyobj_interpreter_.load(std::memory_order_acquire);
     if (interpreter == nullptr) {
+      // NB: This never returns DEFINITELY_UNINITIALIZED because there is
+      // always the possibility that another thread races to initialize
+      // after we query here.  The only time when we can conclude a tensor
+      // is definitely uninitialized is when we have just allocated it and
+      // it cannot have escaped to other threads yet
       return std::nullopt;
-    }
-
-    if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
-      return std::nullopt;
+    } else if (interpreter == self_interpreter) {
+      // NB: pyobj_ could still be null!
+      if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
+        return std::nullopt;
+      } else {
+        return _unchecked_untagged_pyobj();
+      }
     } else {
-      return _unchecked_untagged_pyobj();
+      TORCH_CHECK(
+          false,
+          "cannot access PyObject for Tensor on interpreter ",
+          (*self_interpreter)->name(),
+          " that has already been used by another torch deploy interpreter ",
+          (*pyobj_interpreter_.load())->name());
     }
   }
 
@@ -75,6 +130,13 @@ struct C10_API PyObjectSlot {
 
   PyInterpreter& load_pyobj_interpreter() const;
 
+  // Check if the PyObjectSlot's interpreter is the same as the specified
+  // interpreter
+  bool check_interpreter(PyInterpreter* interpreter);
+
+  // Check if the PyObjectSlot is holding a PyObject, owned or non-owned
+  bool has_pyobj_nonhermetic();
+
   bool owns_pyobj();
 
   void set_owns_pyobj(bool b);
diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp
@@ -98,6 +98,17 @@ PyObject* THPStorage_Wrap(c10::Storage storage) {
   }
   c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
 
+  // If the StorageImpl has a PyObject that is managed by a different
+  // interpreter than the current one, create a new StorageImpl that points to
+  // the same data and then create the Python storage from that.
+  // NOTE: This is only supposed to happen in MultiPy  // codespell:ignore
+  if (pyobj_slot->has_pyobj_nonhermetic() &&
+      !pyobj_slot->check_interpreter(getPyInterpreter())) {
+    return THPStorage_NewWithStorage(
+        THPStorageClass,
+        c10::newStorageImplFromRefcountedDataPtr(storage),
+        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+  }
   std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(
       getPyInterpreter(), /*ignore_hermetic_tls=*/false);
   c10::impl::PyInterpreterStatus status =