Skip to content

Commit 5e1e809

Browse files
authored
feat: create session-scoped cut, DataFrame, MultiIndex, Index, Series, to_datetime, and to_timedelta methods (#2157)
* docs: remove import bigframes.pandas as bpd boilerplate from many samples Also, fixes several constructors that didn't take a session for compatibility with multi-session applications. * fix docs * fix unit tests * skip sklearn test * fix snapshot * plumb through session for from_tuples and from_arrays * add from_frame * make sure polars session isnt skipped on Kokoro * fix apply doctest * make doctest conftest available everywhere * add python version flexibility for to_dict * disambiguate explicit names * disambiguate explicit name none versus no name * fix for column name comparison in pandas bin op * avoid setting column labels in special case of Series(block) * revert doctest changes * revert doctest changes * revert df docstrings * add polars series unit tests * restore a test * Revert "restore a test" This reverts commit 765b678. * skip null * skip unsupported tests * revert more docs changes * revert more docs * revert more docs * fix unit tests python 3.13 * add test to reproduce name error * add tests for session scoped methods * fix mypy errors
1 parent 615a620 commit 5e1e809

File tree

11 files changed

+375
-38
lines changed

11 files changed

+375
-38
lines changed

bigframes/core/indexes/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,16 @@ def to_series(
383383

384384
name = self.name if name is None else name
385385
if index is None:
386-
return bigframes.series.Series(data=self, index=self, name=name)
386+
return bigframes.series.Series(
387+
data=self, index=self, name=name, session=self._session
388+
)
387389
else:
388-
return bigframes.series.Series(data=self, index=Index(index), name=name)
390+
return bigframes.series.Series(
391+
data=self,
392+
index=Index(index, session=self._session),
393+
name=name,
394+
session=self._session,
395+
)
389396

390397
def get_level_values(self, level) -> Index:
391398
level_n = level if isinstance(level, int) else self.names.index(level)

bigframes/core/indexes/multi.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import cast, Hashable, Iterable, Sequence
17+
from typing import cast, Hashable, Iterable, Optional, Sequence, TYPE_CHECKING
1818

1919
import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex
2020
import pandas
@@ -23,6 +23,9 @@
2323
from bigframes.core import expression as ex
2424
from bigframes.core.indexes.base import Index
2525

26+
if TYPE_CHECKING:
27+
import bigframes.session
28+
2629

2730
class MultiIndex(Index, vendored_pandas_multindex.MultiIndex):
2831
__doc__ = vendored_pandas_multindex.MultiIndex.__doc__
@@ -33,21 +36,25 @@ def from_tuples(
3336
tuples: Iterable[tuple[Hashable, ...]],
3437
sortorder: int | None = None,
3538
names: Sequence[Hashable] | Hashable | None = None,
39+
*,
40+
session: Optional[bigframes.session.Session] = None,
3641
) -> MultiIndex:
3742
pd_index = pandas.MultiIndex.from_tuples(tuples, sortorder, names)
3843
# Index.__new__ should detect multiple levels and properly create a multiindex
39-
return cast(MultiIndex, Index(pd_index))
44+
return cast(MultiIndex, Index(pd_index, session=session))
4045

4146
@classmethod
4247
def from_arrays(
4348
cls,
4449
arrays,
4550
sortorder: int | None = None,
4651
names=None,
52+
*,
53+
session: Optional[bigframes.session.Session] = None,
4754
) -> MultiIndex:
4855
pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names)
4956
# Index.__new__ should detect multiple levels and properly create a multiindex
50-
return cast(MultiIndex, Index(pd_index))
57+
return cast(MultiIndex, Index(pd_index, session=session))
5158

5259
def __eq__(self, other) -> Index: # type: ignore
5360
import bigframes.operations as ops
@@ -71,3 +78,38 @@ def __eq__(self, other) -> Index: # type: ignore
7178
index_labels=[None],
7279
)
7380
)
81+
82+
83+
class MultiIndexAccessor:
84+
"""Proxy to MultiIndex constructors to allow a session to be passed in."""
85+
86+
def __init__(self, session: bigframes.session.Session):
87+
self._session = session
88+
89+
def __call__(self, *args, **kwargs) -> MultiIndex:
90+
"""Construct a MultiIndex using the associated Session.
91+
92+
See :class:`bigframes.pandas.MultiIndex`.
93+
"""
94+
return MultiIndex(*args, session=self._session, **kwargs)
95+
96+
def from_arrays(self, *args, **kwargs) -> MultiIndex:
97+
"""Construct a MultiIndex using the associated Session.
98+
99+
See :func:`bigframes.pandas.MultiIndex.from_arrays`.
100+
"""
101+
return MultiIndex.from_arrays(*args, session=self._session, **kwargs)
102+
103+
def from_frame(self, *args, **kwargs) -> MultiIndex:
104+
"""Construct a MultiIndex using the associated Session.
105+
106+
See :func:`bigframes.pandas.MultiIndex.from_frame`.
107+
"""
108+
return cast(MultiIndex, MultiIndex.from_frame(*args, **kwargs))
109+
110+
def from_tuples(self, *args, **kwargs) -> MultiIndex:
111+
"""Construct a MultiIndex using the associated Session.
112+
113+
See :func:`bigframes.pandas.MultiIndex.from_tuples`.
114+
"""
115+
return MultiIndex.from_tuples(*args, session=self._session, **kwargs)

bigframes/core/log_adapter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ def method_logger(method=None, /, *, custom_base_name: Optional[str] = None):
155155
def outer_wrapper(method):
156156
@functools.wraps(method)
157157
def wrapper(*args, **kwargs):
158-
api_method_name = getattr(method, LOG_OVERRIDE_NAME, method.__name__)
158+
api_method_name = getattr(
159+
method, LOG_OVERRIDE_NAME, method.__name__
160+
).lower()
159161
if custom_base_name is None:
160162
qualname_parts = getattr(method, "__qualname__", method.__name__).split(
161163
"."

bigframes/core/reshape/tile.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import typing
18+
from typing import Optional, TYPE_CHECKING
1819

1920
import bigframes_vendored.constants as constants
2021
import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
@@ -31,6 +32,9 @@
3132
import bigframes.operations.aggregations as agg_ops
3233
import bigframes.series
3334

35+
if TYPE_CHECKING:
36+
import bigframes.session
37+
3438

3539
def cut(
3640
x,
@@ -42,6 +46,7 @@ def cut(
4246
*,
4347
right: typing.Optional[bool] = True,
4448
labels: typing.Union[typing.Iterable[str], bool, None] = None,
49+
session: Optional[bigframes.session.Session] = None,
4550
) -> bigframes.series.Series:
4651
if (
4752
labels is not None
@@ -65,7 +70,7 @@ def cut(
6570
raise ValueError("Cannot cut empty array.")
6671

6772
if not isinstance(x, bigframes.series.Series):
68-
x = bigframes.series.Series(x)
73+
x = bigframes.series.Series(x, session=session)
6974

7075
if isinstance(bins, int):
7176
if bins <= 0:

bigframes/core/tools/datetimes.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
from collections.abc import Mapping
1618
from datetime import date, datetime
17-
from typing import Optional, Union
19+
from typing import Optional, TYPE_CHECKING, Union
1820

1921
import bigframes_vendored.constants as constants
2022
import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
@@ -25,6 +27,9 @@
2527
import bigframes.operations as ops
2628
import bigframes.series
2729

30+
if TYPE_CHECKING:
31+
import bigframes.session
32+
2833

2934
def to_datetime(
3035
arg: Union[
@@ -37,6 +42,7 @@ def to_datetime(
3742
utc: bool = False,
3843
format: Optional[str] = None,
3944
unit: Optional[str] = None,
45+
session: Optional[bigframes.session.Session] = None,
4046
) -> Union[pd.Timestamp, datetime, bigframes.series.Series]:
4147
if isinstance(arg, (int, float, str, datetime, date)):
4248
return pd.to_datetime(
@@ -52,7 +58,7 @@ def to_datetime(
5258
f"to datetime is not implemented. {constants.FEEDBACK_LINK}"
5359
)
5460

55-
arg = bigframes.series.Series(arg)
61+
arg = bigframes.series.Series(arg, session=session)
5662

5763
if format and unit and arg.dtype in (bigframes.dtypes.INT_DTYPE, bigframes.dtypes.FLOAT_DTYPE): # type: ignore
5864
raise ValueError("cannot specify both format and unit")

bigframes/formatting_helpers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,14 @@ def progress_callback(
105105
"""Displays a progress bar while the query is running"""
106106
global current_display, current_display_id, previous_display_html
107107

108-
import bigframes._config
109-
import bigframes.core.events
108+
try:
109+
import bigframes._config
110+
import bigframes.core.events
111+
except ImportError:
112+
# Since this gets called from __del__, skip if the import fails to avoid
113+
# ImportError: sys.meta_path is None, Python is likely shutting down.
114+
# This will allow cleanup to continue.
115+
return
110116

111117
progress_bar = bigframes._config.options.display.progress_bar
112118

bigframes/pandas/__init__.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
from __future__ import annotations
1818

19-
from collections import namedtuple
20-
from datetime import date, datetime
19+
import collections
20+
import datetime
2121
import inspect
2222
import sys
2323
import typing
@@ -198,18 +198,18 @@ def to_datetime(
198198

199199
@typing.overload
200200
def to_datetime(
201-
arg: Union[int, float, str, datetime, date],
201+
arg: Union[int, float, str, datetime.datetime, datetime.date],
202202
*,
203203
utc: bool = False,
204204
format: Optional[str] = None,
205205
unit: Optional[str] = None,
206-
) -> Union[pandas.Timestamp, datetime]:
206+
) -> Union[pandas.Timestamp, datetime.datetime]:
207207
...
208208

209209

210210
def to_datetime(
211211
arg: Union[
212-
Union[int, float, str, datetime, date],
212+
Union[int, float, str, datetime.datetime, datetime.date],
213213
vendored_pandas_datetimes.local_iterables,
214214
bigframes.series.Series,
215215
bigframes.dataframe.DataFrame,
@@ -218,8 +218,9 @@ def to_datetime(
218218
utc: bool = False,
219219
format: Optional[str] = None,
220220
unit: Optional[str] = None,
221-
) -> Union[pandas.Timestamp, datetime, bigframes.series.Series]:
222-
return bigframes.core.tools.to_datetime(
221+
) -> Union[pandas.Timestamp, datetime.datetime, bigframes.series.Series]:
222+
return global_session.with_default_session(
223+
bigframes.session.Session.to_datetime,
223224
arg,
224225
utc=utc,
225226
format=format,
@@ -322,7 +323,7 @@ def clean_up_by_session_id(
322323
__version__ = bigframes.version.__version__
323324

324325
# Other public pandas attributes
325-
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
326+
NamedAgg = collections.namedtuple("NamedAgg", ["column", "aggfunc"])
326327

327328
options = config.options
328329
"""Global :class:`~bigframes._config.Options` to configure BigQuery DataFrames."""

bigframes/pandas/core/tools/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def to_timedelta(
3535
return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit))
3636

3737
if pdtypes.is_list_like(arg):
38-
return to_timedelta(series.Series(arg), unit, session=session)
38+
return to_timedelta(series.Series(arg, session=session), unit, session=session)
3939

4040
return pd.to_timedelta(arg, unit)
4141

0 commit comments

Comments
 (0)