From 61f95ff344d9be751958bf0400d03e202a10e6dd Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Sat, 30 Nov 2024 22:17:01 -0800
Subject: [PATCH 1/4] BUG: Groupby sum for object type should be None instead
 of 0 for all nan values

---
 pandas/_libs/groupby.pyx                 | 8 ++++++--
 pandas/tests/groupby/test_categorical.py | 1 +
 pandas/tests/groupby/test_timegrouper.py | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index d7e485f74e58b..7cca4bb793296 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -717,8 +717,12 @@ def group_sum(
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    # the below is equivalent to `np.zeros_like(out)` but faster
-    sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
+    if sum_t is object:
+        # For object dtype, fill value should not be 0 (#60229)
+        sumx = np.empty((<object>out).shape, dtype=object)
+    else:
+        # the below is equivalent to `np.zeros_like(out)` but faster
+        sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
     compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 6d84dae1d25d8..809c960be20b6 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -361,6 +361,7 @@ def test_observed(request, using_infer_string, observed):
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0
         )
+        expected.loc[expected.C == 0, "C"] = None
 
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index a7712d9dc6586..dd1680738a7c6 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
                 unit=df.index.unit,
             )
             expected = DataFrame(
-                {"Buyer": 0, "Quantity": 0},
+                {"Buyer": None, "Quantity": 0},
                 index=exp_dti,
             )
             # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"

From 15a762847e4dcbfed6ef53086a36e8b48b1006d3 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Mon, 2 Dec 2024 21:50:10 -0800
Subject: [PATCH 2/4] BUG: Add not to whatsnew/v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index e74bd2f745b94..0e8b9e2da058d 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -735,6 +735,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
+- Bug in :meth:`DataFrameGroupBy.sum` and :math:`SeriesGroupBy.sum` where in case of all-nan values for object dtype the result is incorrectly set to 0 instead of ``None``. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
 - Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)

From fba9f78df084bf857cb040478128fadac29e902c Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Sat, 15 Feb 2025 23:53:14 -0800
Subject: [PATCH 3/4] Groupby sum for all-nan object array should be nan
 instead of None

---
 pandas/_libs/groupby.pyx                 |  8 ++++++--
 pandas/tests/groupby/test_categorical.py |  2 +-
 pandas/tests/groupby/test_reductions.py  | 15 +++++++++++++++
 pandas/tests/groupby/test_timegrouper.py |  2 +-
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 03ac344b2fcca..e9935256a2430 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -727,7 +727,7 @@ def group_sum(
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
     if sum_t is object:
         # For object dtype, fill value should not be 0 (#60229)
-        sumx = np.empty((<object>out).shape, dtype=object)
+        sumx = np.full((<object>out).shape, NAN, dtype=object)
     else:
         # the below is equivalent to `np.zeros_like(out)` but faster
         sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
@@ -764,7 +764,10 @@ def group_sum(
                     if uses_mask:
                         isna_result = result_mask[lab, j]
                     else:
-                        isna_result = _treat_as_na(sumx[lab, j], is_datetimelike)
+                        isna_result = (
+                            _treat_as_na(sumx[lab, j], is_datetimelike) and
+                            nobs[lab, j] > 0
+                        )
 
                     if isna_result:
                         # If sum is already NA, don't add to it. This is important for
@@ -799,6 +802,7 @@ def group_sum(
                             compensation[lab, j] = 0
                         sumx[lab, j] = t
                 elif not skipna:
+                    nobs[lab, j] += 1
                     if uses_mask:
                         result_mask[lab, j] = True
                     else:
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e26c1574d0b4a..db63dff553c7c 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -358,7 +358,7 @@ def test_observed(request, using_infer_string, observed):
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0
         )
-        expected.loc[expected.C == 0, "C"] = None
+        expected.loc[expected.C == 0, "C"] = np.nan
 
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index ea876cfdf4933..33306cc756fb1 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -514,6 +514,21 @@ def test_sum_skipna_object(skipna):
     tm.assert_series_equal(result, expected)
 
 
+def test_sum_allnan_object(skipna):
+    # GH#60229
+    df = DataFrame(
+        {
+            "val": [np.nan] * 10,
+            "cat": ["A", "B"] * 5,
+        }
+    ).astype({"val": object})
+    expected = Series(
+        [np.nan, np.nan], index=pd.Index(["A", "B"], name="cat"), name="val"
+    ).astype(object)
+    result = df.groupby("cat")["val"].sum(skipna=skipna)
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "func, values, dtype, result_dtype",
     [
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index e1d7db3c0c04a..63d0ca0de9bb1 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
                 unit=df.index.unit,
             )
             expected = DataFrame(
-                {"Buyer": None, "Quantity": 0},
+                {"Buyer": np.nan, "Quantity": 0},
                 index=exp_dti,
             )
             # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"

From 45eb9af36383070f042cd8bfbefc8ffaae63ff73 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 18 Feb 2025 14:36:44 -0800
Subject: [PATCH 4/4] Fix behavior for DataFrame.sum and Series.sum

---
 doc/source/whatsnew/v3.0.0.rst         |  2 +-
 pandas/core/nanops.py                  |  7 +++++++
 pandas/tests/frame/test_arithmetic.py  |  2 +-
 pandas/tests/frame/test_reductions.py  | 15 +++++++++++++++
 pandas/tests/series/test_reductions.py | 14 ++++++++++++++
 5 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a43ce386cb877..f97d0048287b5 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -750,13 +750,13 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
+- Bug in :meth:`DataFrame.sum`, :meth:`Series.sum`, :meth:`DataFrameGroupBy.sum` and :math:`SeriesGroupBy.sum` where in case of all-nan values for object dtype the result is incorrectly set to 0 instead of ``nan``. (:issue:`60229`)
 - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
 - Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
-- Bug in :meth:`DataFrameGroupBy.sum` and :math:`SeriesGroupBy.sum` where in case of all-nan values for object dtype the result is incorrectly set to 0 instead of ``None``. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index d1dc0ff809497..e671106a82db9 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -638,6 +638,13 @@ def nansum(
     the_sum = values.sum(axis, dtype=dtype_sum)
     the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
 
+    if dtype.kind == "O" and skipna and min_count == 0:
+        # GH#60229 For object dtype, sum of all-NA array should be nan
+        if isinstance(the_sum, np.ndarray):
+            the_sum[mask.sum(axis=axis) == mask.shape[axis]] = np.nan
+        elif mask.all():
+            the_sum = np.nan
+
     return the_sum
 
 
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 8239de3f39c20..c792bb69f18d4 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1186,7 +1186,7 @@ def test_frame_single_columns_object_sum_axis_1():
     }
     df = DataFrame(data)
     result = df.sum(axis=1)
-    expected = Series(["A", 1.2, 0])
+    expected = Series(["A", 1.2, np.nan])
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 64e686d25faa7..1eeea60d184fa 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -420,10 +420,25 @@ def test_stat_operators_attempt_obj_array(self, method, df, axis):
         assert df.values.dtype == np.object_
         result = getattr(df, method)(axis=axis)
         expected = getattr(df.astype("f8"), method)(axis=axis).astype(object)
+        if method == "sum":
+            # GH#60229 in case of all-NA object array, sum should be nan
+            expected[df.isna().all(axis=axis)] = np.nan
         if axis in [1, "columns"] and method in ["min", "max"]:
             expected[expected.isna()] = None
         tm.assert_series_equal(result, expected)
 
+    def test_object_sum_allna(self):
+        # GH#60229
+        df = DataFrame({"a": [np.nan] * 5, "b": [pd.NA] * 5}, dtype=object)
+
+        result = df.sum(axis=0, skipna=True)
+        expected = Series([np.nan, np.nan], index=["a", "b"], dtype=object)
+        tm.assert_series_equal(result, expected)
+
+        result = df.sum(axis=0, skipna=False)
+        expected = Series([np.nan, pd.NA], index=["a", "b"], dtype=object)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
     def test_mixed_ops(self, op):
         # GH#16116
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index 86ce60b1fc12b..b6f2d03405749 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -111,6 +111,20 @@ def test_prod_numpy16_bug():
     assert not isinstance(result, Series)
 
 
+@pytest.mark.parametrize("nan_val", [np.nan, pd.NA])
+def test_object_sum_allna(nan_val):
+    # GH#60229
+    ser = Series([nan_val] * 5, dtype=object)
+
+    result = ser.sum(axis=0, skipna=True)
+    expected = np.nan
+    tm.assert_equal(result, expected)
+
+    result = ser.sum(axis=0, skipna=False)
+    expected = nan_val
+    tm.assert_equal(result, expected)
+
+
 @pytest.mark.parametrize("func", [np.any, np.all])
 @pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}])
 def test_validate_any_all_out_keepdims_raises(kwargs, func):