From d2f0c7c7ef31b691d7fe2185d5e9481937a27be3 Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Mon, 3 Mar 2025 14:54:21 +0800 Subject: [PATCH 1/9] make array 1D before calling maybe_infer_to_datetimelike --- pandas/core/construction.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ada492787a179..6a6ae544c5b29 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,7 +612,10 @@ def sanitize_array( if dtype is None: subarr = data if data.dtype == object and infer_object: - subarr = maybe_infer_to_datetimelike(data) + # GH#61026 + if data.ndim != 1: + subarr = subarr.ravel() + subarr = maybe_infer_to_datetimelike(subarr) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype From afc3c231d6c0feaf5308e07f7d1535c4ccceb5be Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Mon, 3 Mar 2025 15:11:25 +0800 Subject: [PATCH 2/9] add desc to whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c967b97cb2ef6..966bb043177d9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -696,6 +696,7 @@ Indexing - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) +- Bug in :meth:`DataFrame.__setitem__` throwing a ``ValueError`` when setting a column with a 2D object array (:issue:`61026`) Missing ^^^^^^^ From c967d6ba691ae438d29d2740c6afdcaec17e6ae5 Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Mon, 3 Mar 2025 15:30:55 +0800 Subject: [PATCH 3/9] update the order of whatsnew entries --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 966bb043177d9..7d8679e1ec7d8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -692,11 +692,11 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) +- Bug in :meth:`DataFrame.__setitem__` throwing a ``ValueError`` when setting a column with a 2D object array (:issue:`61026`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) -- Bug in :meth:`DataFrame.__setitem__` throwing a ``ValueError`` when setting a column with a 2D object array (:issue:`61026`) Missing ^^^^^^^ From fe1a1d1414afdcbd75e9af95636dbe13bbb81b27 Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 09:19:43 +0800 Subject: [PATCH 4/9] reshape the array back after maybe_infer_to_datetimelike --- pandas/core/construction.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 6a6ae544c5b29..e60c545e83b77 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,10 +612,8 @@ def sanitize_array( if dtype is None: subarr = data if data.dtype == object and infer_object: - # GH#61026 - if data.ndim != 1: - subarr = subarr.ravel() - subarr = maybe_infer_to_datetimelike(subarr) + fltarr = data.ravel() # GH#61026 + subarr = maybe_infer_to_datetimelike(fltarr).reshape(data.shape) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype From e6cdec8c5cccccb22cb1c45f8d6dfddf11aa5684 Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 09:20:33 +0800 Subject: [PATCH 5/9] add a test --- pandas/tests/frame/indexing/test_setitem.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 20dd7b0c4d3e7..db2c24e97893b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -815,6 +815,19 @@ def test_setitem_index_object_dtype_not_inferring(self): } ) tm.assert_frame_equal(df, expected) + + def test_setitem_2d_object_array(self): + # GH#61026 + df = DataFrame({ + "c1": [1, 2, 3, 4, 5], + }) + arr = np.array([["A"], ["B"], ["C"], ["D"], ["E"]], dtype=object) + df["c1"] = arr + + expected = DataFrame({ + "c1": ["A", "B", "C", "D", "E"], + }) + tm.assert_frame_equal(df, expected) class TestSetitemTZAwareValues: From 68dfd0ecc4c57e065f0a64a8a85f7113c3e4235f Mon Sep 17 00:00:00 2001 From: tonyyuyiding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 09:59:12 +0800 Subject: [PATCH 6/9] formatting by pre-commit --- pandas/core/construction.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e60c545e83b77..3da78a2972e77 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,7 +612,7 @@ def sanitize_array( if dtype is None: subarr = data if data.dtype == object and infer_object: - fltarr = data.ravel() # GH#61026 + fltarr = data.ravel() # GH#61026 subarr = maybe_infer_to_datetimelike(fltarr).reshape(data.shape) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index db2c24e97893b..e7e119b8bf32c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -815,18 +815,22 @@ def test_setitem_index_object_dtype_not_inferring(self): } ) tm.assert_frame_equal(df, expected) - + def test_setitem_2d_object_array(self): # GH#61026 - df = DataFrame({ - "c1": [1, 2, 3, 4, 5], - }) + df = DataFrame( + { + "c1": [1, 2, 3, 4, 5], + } + ) arr = np.array([["A"], ["B"], ["C"], ["D"], ["E"]], dtype=object) df["c1"] = arr - - expected = DataFrame({ - "c1": ["A", "B", "C", "D", "E"], - }) + + expected = DataFrame( + { + "c1": ["A", "B", "C", "D", "E"], + } + ) tm.assert_frame_equal(df, expected) From 537671a7a063c28cc1bb23a273d86c764fd0e628 Mon Sep 17 00:00:00 2001 From: Tony Ding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 10:31:20 +0800 Subject: [PATCH 7/9] remove reshape --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3da78a2972e77..3e08d6d59b56e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -613,7 +613,7 @@ def sanitize_array( subarr = data if data.dtype == object and infer_object: fltarr = data.ravel() # GH#61026 - subarr = maybe_infer_to_datetimelike(fltarr).reshape(data.shape) + subarr = maybe_infer_to_datetimelike(fltarr) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype From b7ab1559c4bea6a3474571c6410b608b869c38ed Mon Sep 17 00:00:00 2001 From: Tony Ding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 11:26:16 +0800 Subject: [PATCH 8/9] fix testcase issue --- pandas/core/construction.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3e08d6d59b56e..826692424c1cf 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -612,8 +612,10 @@ def sanitize_array( if dtype is None: subarr = data if data.dtype == object and infer_object: - fltarr = data.ravel() # GH#61026 - subarr = maybe_infer_to_datetimelike(fltarr) + if data.ndim != 1: + # GH#61026 + subarr = data.ravel() + subarr = maybe_infer_to_datetimelike(subarr) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype From fc8d55d00de58a45173524af1fe9ad446ca1083a Mon Sep 17 00:00:00 2001 From: Tony Ding <tonyyuyiding@gmail.com> Date: Tue, 4 Mar 2025 11:47:13 +0800 Subject: [PATCH 9/9] fix testcase issue (copy) --- pandas/core/construction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 826692424c1cf..23b292c34eca4 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -615,6 +615,8 @@ def sanitize_array( if data.ndim != 1: # GH#61026 subarr = data.ravel() + if copy: + subarr = subarr.copy() subarr = maybe_infer_to_datetimelike(subarr) elif data.dtype.kind == "U" and using_string_dtype(): from pandas.core.arrays.string_ import StringDtype