Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: DataFrameGroupby.agg no works with unnamed tuples
  • Loading branch information
TrevorBergeron committed Sep 13, 2024
commit c8f191852294322cd5bc01741101ce2690378df6
10 changes: 4 additions & 6 deletions bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,12 +414,10 @@ def _agg_named(self, **kwargs) -> df.DataFrame:
raise NotImplementedError(
f"Only string aggregate names supported. {constants.FEEDBACK_LINK}"
)
if not hasattr(v, "column") or not hasattr(v, "aggfunc"):
import bigframes.pandas as bpd

raise TypeError(f"kwargs values must be {bpd.NamedAgg.__qualname__}")
col_id = self._resolve_label(v.column)
aggregations.append((col_id, agg_ops.lookup_agg_func(v.aggfunc)))
if not isinstance(v, tuple) or (len(v) != 2):
raise TypeError("kwargs values must be 2-tuples of column, aggfunc")
col_id = self._resolve_label(v[0])
aggregations.append((col_id, agg_ops.lookup_agg_func(v[1])))
column_labels.append(k)
agg_block, _ = self._block.aggregate(
by_column_ids=self._by_col_ids,
Expand Down
20 changes: 20 additions & 0 deletions tests/system/small/test_groupby.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could also add test cases for:

  • v is not a tuple
  • v is a tuple but its length is not 2
    ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,26 @@ def test_dataframe_groupby_agg_named(scalars_df_index, scalars_pandas_df_index):
pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)


def test_dataframe_groupby_agg_kw_tuples(scalars_df_index, scalars_pandas_df_index):
col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
bf_result = (
scalars_df_index[col_names]
.groupby("string_col")
.agg(
agg1=("int64_too", "sum"),
agg2=("float64_col", "max"),
)
)
pd_result = (
scalars_pandas_df_index[col_names]
.groupby("string_col")
.agg(agg1=("int64_too", "sum"), agg2=("float64_col", "max"))
)
bf_result_computed = bf_result.to_pandas()

pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)


@pytest.mark.parametrize(
("as_index"),
[
Expand Down