pysal · martinfleis · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/libpysal/graph/_spatial_lag.py b/libpysal/graph/_spatial_lag.py
@@ -13,7 +13,8 @@ def _lag_spatial(graph, y, categorical=False, ties="raise"):
     graph : Graph
         libpysal.graph.Graph
     y : array
-        numpy array with dimensionality conforming to w
+        numpy array with dimensionality conforming to w. Can be 2D if all
+        columns are numerical.
     categorical : bool
         True if y is categorical, False if y is continuous.
     ties : {'raise', 'random', 'tryself'}, optional
@@ -83,49 +84,87 @@ def _lag_spatial(graph, y, categorical=False, ties="raise"):
     if isinstance(y, list):
         y = np.array(y)
 
-    if (
-        isinstance(y.dtype, pd.CategoricalDtype)
+    if y.ndim == 1 and (
+        categorical
+        or isinstance(y.dtype, pd.CategoricalDtype)
         or pd.api.types.is_object_dtype(y.dtype)
         or pd.api.types.is_bool_dtype(y.dtype)
         or pd.api.types.is_string_dtype(y.dtype)
     ):
-        categorical = True
-    if categorical:
-        if isinstance(y, np.ndarray):
-            y = pd.Series(y, index=graph.unique_ids)
-
-        df = pd.DataFrame(data=graph.adjacency)
-        df["neighbor_label"] = y.loc[graph.adjacency.index.get_level_values(1)].values
-        df["own_label"] = y.loc[graph.adjacency.index.get_level_values(0)].values
-        df["neighbor_idx"] = df.index.get_level_values(1)
-        df["focal_idx"] = df.index.get_level_values(0)
-        gb = df.groupby(["focal", "neighbor_label"]).count().groupby(level="focal")
-        n_ties = gb.apply(_check_ties).sum()
-        if n_ties and ties == "raise":
-            raise ValueError(
-                f"There are {n_ties} ties that must be broken "
-                f"to define the categorical "
-                "spatial lag for these observations. To address this "
-                "issue, consider setting `ties='tryself'` "
-                "or `ties='random'` or consult the documentation "
-                "about ties and the categorical spatial lag."
-            )
-        # either there are ties and random|tryself specified or
-        # there are no ties
-        gb = df.groupby(by=["focal"])
-        if ties == "random" or ties == "raise":
-            return gb.apply(_get_categorical_lag).values
-        elif ties == "tryself" or ties == "raise":
-            return gb.apply(_get_categorical_lag, ties="tryself").values
-        else:
-            raise ValueError(
-                f"Received option ties='{ties}', but only options "
-                "'raise','random','tryself' are supported."
-            )
+        return _categorical(graph, y, ties=ties)
 
     return sp @ y
 
 
+def _categorical(graph, y, ties):
+    """
+    Compute the categorical spatial lag for each observation in a graph.
+
+    Parameters
+    ----------
+    graph : object
+    y : array-like (numpy.ndarray or pandas.Series)
+        Categorical labels for each observation.
+    ties : {'raise', 'random', 'tryself'}
+        How to handle ties when multiple neighbor categories are equally frequent:
+          - 'raise' : raise a ValueError if any tie exists.
+          - 'random': break ties uniformly at random.
+          - 'tryself': if the focal unit's own label is among the tied labels,
+                       choose the focal label; otherwise break ties (deterministic
+                       choice defined by helper routine).
+
+    Returns
+    -------
+    numpy.ndarray
+        An array of categorical spatial lag values aligned with graph.unique_ids.
+
+    Raises
+    ------
+    ValueError
+        - If ties are present and ties == 'raise'.
+        - If ties is not one of 'raise', 'random', or 'tryself'.
+
+    Notes
+    -----
+    The implementation groups adjacency entries by focal unit and counts neighbor
+    labels to determine the modal category per focal. Tie detection and
+    resolution are delegated to the helper functions _check_ties and
+    _get_categorical_lag. Using 'random' produces nondeterministic outputs unless
+    a random seed is fixed externally.
+    """
+    if isinstance(y, np.ndarray):
+        y = pd.Series(y, index=graph.unique_ids)
+
+    df = pd.DataFrame(data=graph.adjacency)
+    df["neighbor_label"] = y.loc[graph.adjacency.index.get_level_values(1)].values
+    df["own_label"] = y.loc[graph.adjacency.index.get_level_values(0)].values
+    df["neighbor_idx"] = df.index.get_level_values(1)
+    df["focal_idx"] = df.index.get_level_values(0)
+    gb = df.groupby(["focal", "neighbor_label"]).count().groupby(level="focal")
+    n_ties = gb.apply(_check_ties).sum()
+    if n_ties and ties == "raise":
+        raise ValueError(
+            f"There are {n_ties} ties that must be broken "
+            f"to define the categorical "
+            "spatial lag for these observations. To address this "
+            "issue, consider setting `ties='tryself'` "
+            "or `ties='random'` or consult the documentation "
+            "about ties and the categorical spatial lag."
+        )
+    # either there are ties and random|tryself specified or
+    # there are no ties
+    gb = df.groupby(by=["focal"])
+    if ties == "random" or ties == "raise":
+        return gb.apply(_get_categorical_lag).values
+    elif ties == "tryself" or ties == "raise":
+        return gb.apply(_get_categorical_lag, ties="tryself").values
+    else:
+        raise ValueError(
+            f"Received option ties='{ties}', but only options "
+            "'raise','random','tryself' are supported."
+        )
+
+
 def _check_ties(focal):
     """Reduction to determine if a focal unit has multiple modes for neighbor labels.
 

diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py
@@ -2035,18 +2035,20 @@ def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=Fals
 
         return higher
 
-    def lag(self, y, categorical=False, ties="raise"):
+    def lag(self, y, categorical=None, ties="raise"):
         """Spatial lag operator
 
         Constructs spatial lag based on neighbor relations of the graph.
 
 
         Parameters
         ----------
-        y : array
-            numpy array with dimensionality conforming to w
+        y : array_like
+            Array-like aligned with the graph. Can be 2-dimensional if
+            all columns are numerical.
         categorical : bool
-            True if y is categorical, False if y is continuous.
+            True if y is categorical, False if y is continuous. If None, it is
+            derived from the dtype of ``y``.
         ties : {'raise', 'random', 'tryself'}, optional
             Policy on how to break ties when a focal unit has multiple
             modes for a categorical lag.

diff --git a/libpysal/graph/tests/test_spatial_lag.py b/libpysal/graph/tests/test_spatial_lag.py
@@ -71,3 +71,27 @@ def test_categorical_custom_index(self):
         np.testing.assert_array_equal(
             expected, self.g.lag(["foo", "bar", "foo", "foo"])
         )
+
+    def test_2d_array(self):
+        ys = np.arange(27).reshape(9, 3)
+        lag = self.gc.lag(ys)
+
+        expected = np.array(
+            [
+                [6.0, 7.0, 8.0],
+                [6.0, 7.0, 8.0],
+                [9.0, 10.0, 11.0],
+                [10.0, 11.0, 12.0],
+                [12.0, 13.0, 14.0],
+                [14.0, 15.0, 16.0],
+                [15.0, 16.0, 17.0],
+                [18.0, 19.0, 20.0],
+                [18.0, 19.0, 20.0],
+            ]
+        )
+
+        np.testing.assert_array_almost_equal(lag, expected)
+
+        # test equality to 1d
+        for i in range(2):
+            np.testing.assert_array_equal(self.gc.lag(ys[:, i]), lag[:, i])