From 835edf643162493918ac46b6dc2e712e427ccbcd Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 11:58:51 +0000
Subject: [PATCH 01/26] fix: pass dtypes to read_json with pyarrow engine

---
 doc/source/whatsnew/v3.0.0.rst      |  1 +
 pandas/io/json/_json.py             | 73 ++++++++++++++++++++---------
 pandas/tests/io/json/test_pandas.py | 22 +++++++++
 3 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 090be6dc250ba..a7c4ab67c73b7 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -732,6 +732,7 @@ I/O
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
+- Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index e032e26d771d7..b28f29944eb34 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -32,6 +32,7 @@
 from pandas.core.dtypes.common import (
     ensure_str,
     is_string_dtype,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import PeriodDtype
 
@@ -43,6 +44,7 @@
     isna,
     notna,
     to_datetime,
+    ArrowDtype,
 )
 from pandas.core.reshape.concat import concat
 from pandas.core.shared_docs import _shared_docs
@@ -942,29 +944,56 @@ def read(self) -> DataFrame | Series:
         obj: DataFrame | Series
         with self:
             if self.engine == "pyarrow":
-                pyarrow_json = import_optional_dependency("pyarrow.json")
-                pa_table = pyarrow_json.read_json(self.data)
-                return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
+                obj = self._read_pyarrow()
             elif self.engine == "ujson":
-                if self.lines:
-                    if self.chunksize:
-                        obj = concat(self)
-                    elif self.nrows:
-                        lines = list(islice(self.data, self.nrows))
-                        lines_json = self._combine_lines(lines)
-                        obj = self._get_object_parser(lines_json)
-                    else:
-                        data = ensure_str(self.data)
-                        data_lines = data.split("\n")
-                        obj = self._get_object_parser(self._combine_lines(data_lines))
-                else:
-                    obj = self._get_object_parser(self.data)
-                if self.dtype_backend is not lib.no_default:
-                    return obj.convert_dtypes(
-                        infer_objects=False, dtype_backend=self.dtype_backend
-                    )
-                else:
-                    return obj
+                obj = self._read_ujson()
+        
+        return obj
+
+    def _read_pyarrow(self) -> DataFrame:
+        """
+        Read JSON using the pyarrow engine.
+        """
+        pyarrow_json = import_optional_dependency("pyarrow.json")
+        options = None
+
+        if isinstance(self.dtype, dict):
+            pa = import_optional_dependency("pyarrow")
+            fields = [
+                (field, pandas_dtype(dtype).pyarrow_dtype)
+                for field, dtype in self.dtype.items()
+                if isinstance(pandas_dtype(dtype), ArrowDtype)
+            ]
+
+            schema = pa.schema(fields)
+            options = pyarrow_json.ParseOptions(explicit_schema=schema)
+            
+        pa_table = pyarrow_json.read_json(self.data, parse_options=options)
+        return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
+    
+    def _read_ujson(self) -> DataFrame | Series:
+        """
+        Read JSON using the ujson engine.
+        """
+        if self.lines:
+            if self.chunksize:
+                obj = concat(self)
+            elif self.nrows:
+                lines = list(islice(self.data, self.nrows))
+                lines_json = self._combine_lines(lines)
+                obj = self._get_object_parser(lines_json)
+            else:
+                data = ensure_str(self.data)
+                data_lines = data.split("\n")
+                obj = self._get_object_parser(self._combine_lines(data_lines))
+        else:
+            obj = self._get_object_parser(self.data)
+        if self.dtype_backend is not lib.no_default:
+            return obj.convert_dtypes(
+                infer_objects=False, dtype_backend=self.dtype_backend
+            )
+        else:
+            return obj
 
     def _get_object_parser(self, json: str) -> DataFrame | Series:
         """
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 144b36166261b..c03f203deba11 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2183,6 +2183,28 @@ def test_read_json_dtype_backend(
         # string_storage setting -> ignore that for checking the result
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
+    @td.skip_if_no("pyarrow") # type: ignore
+    def test_read_json_pyarrow_with_dtype(self, datapath):
+        dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
+
+        df = read_json(
+            datapath("io", "json", "data", "line_delimited.json"),
+            dtype=dtype,
+            lines=True,
+            engine="pyarrow",
+            dtype_backend="pyarrow",
+        )
+
+        result = df.dtypes
+        expected = Series(
+            [
+                pd.ArrowDtype.construct_from_string("int32[pyarrow]"),
+                pd.ArrowDtype.construct_from_string("int64[pyarrow]"),
+            ],
+            index=["a", "b"],
+        )
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("orient", ["split", "records", "index"])
     def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
         # GH#50750

From 074b3cb876ca23d25578a73b805965a715437415 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 12:14:37 +0000
Subject: [PATCH 02/26] fix: code checks

---
 doc/source/whatsnew/v3.0.0.rst      | 2 +-
 pandas/io/json/_json.py             | 8 ++++----
 pandas/tests/io/json/test_pandas.py | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a7c4ab67c73b7..f3f6bb9bf08d5 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -725,6 +725,7 @@ I/O
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
+- Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -732,7 +733,6 @@ I/O
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
-- Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index b28f29944eb34..5256b91388198 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -37,6 +37,7 @@
 from pandas.core.dtypes.dtypes import PeriodDtype
 
 from pandas import (
+    ArrowDtype,
     DataFrame,
     Index,
     MultiIndex,
@@ -44,7 +45,6 @@
     isna,
     notna,
     to_datetime,
-    ArrowDtype,
 )
 from pandas.core.reshape.concat import concat
 from pandas.core.shared_docs import _shared_docs
@@ -947,7 +947,7 @@ def read(self) -> DataFrame | Series:
                 obj = self._read_pyarrow()
             elif self.engine == "ujson":
                 obj = self._read_ujson()
-        
+
         return obj
 
     def _read_pyarrow(self) -> DataFrame:
@@ -967,10 +967,10 @@ def _read_pyarrow(self) -> DataFrame:
 
             schema = pa.schema(fields)
             options = pyarrow_json.ParseOptions(explicit_schema=schema)
-            
+
         pa_table = pyarrow_json.read_json(self.data, parse_options=options)
         return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
-    
+
     def _read_ujson(self) -> DataFrame | Series:
         """
         Read JSON using the ujson engine.
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index c03f203deba11..0d2ef94be2d5c 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2183,7 +2183,7 @@ def test_read_json_dtype_backend(
         # string_storage setting -> ignore that for checking the result
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
-    @td.skip_if_no("pyarrow") # type: ignore
+    @td.skip_if_no("pyarrow")
     def test_read_json_pyarrow_with_dtype(self, datapath):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
 

From 28fa3322cb24856abc501977117cdd496dfdbe9c Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:57:41 +0000
Subject: [PATCH 03/26] fix: commit checks

---
 pandas/io/json/_json.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 5256b91388198..7934d625851d1 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -959,11 +959,11 @@ def _read_pyarrow(self) -> DataFrame:
 
         if isinstance(self.dtype, dict):
             pa = import_optional_dependency("pyarrow")
-            fields = [
-                (field, pandas_dtype(dtype).pyarrow_dtype)
-                for field, dtype in self.dtype.items()
-                if isinstance(pandas_dtype(dtype), ArrowDtype)
-            ]
+            fields = []
+            for field, dtype in self.dtype.items():
+                pd_dtype = pandas_dtype(dtype)
+                if isinstance(pd_dtype, ArrowDtype):
+                    fields.append((field, pd_dtype.pyarrow_dtype))
 
             schema = pa.schema(fields)
             options = pyarrow_json.ParseOptions(explicit_schema=schema)

From 5a8158b703b67c3ce5f0a4496871823a6b48457f Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:17:01 +0000
Subject: [PATCH 04/26] fix: commit checks

---
 pandas/io/json/_json.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 7934d625851d1..9069de4896b6b 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -981,13 +981,13 @@ def _read_ujson(self) -> DataFrame | Series:
             elif self.nrows:
                 lines = list(islice(self.data, self.nrows))
                 lines_json = self._combine_lines(lines)
-                obj = self._get_object_parser(lines_json)
+                obj: DataFrame | Series = self._get_object_parser(lines_json)
             else:
                 data = ensure_str(self.data)
                 data_lines = data.split("\n")
-                obj = self._get_object_parser(self._combine_lines(data_lines))
+                obj: DataFrame | Series = self._get_object_parser(self._combine_lines(data_lines))
         else:
-            obj = self._get_object_parser(self.data)
+            obj: DataFrame | Series = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend

From 73f18a4f888da76b436bdc91fca2c6dcd37b615a Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:27:14 +0000
Subject: [PATCH 05/26] fix: commit checks

---
 pandas/io/json/_json.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 9069de4896b6b..e9ec4d7881414 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -981,13 +981,19 @@ def _read_ujson(self) -> DataFrame | Series:
             elif self.nrows:
                 lines = list(islice(self.data, self.nrows))
                 lines_json = self._combine_lines(lines)
-                obj: DataFrame | Series = self._get_object_parser(lines_json)
+                obj: DataFrame | Series = self._get_object_parser(
+                    lines_json
+                    )
             else:
                 data = ensure_str(self.data)
                 data_lines = data.split("\n")
-                obj: DataFrame | Series = self._get_object_parser(self._combine_lines(data_lines))
+                obj: DataFrame | Series = self._get_object_parser(
+                    self._combine_lines(data_lines)
+                )
         else:
-            obj: DataFrame | Series = self._get_object_parser(self.data)
+            obj: DataFrame | Series = self._get_object_parser(
+                self.data
+            )
         if self.dtype_backend is not lib.no_default:
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend

From 72675c92644784cad5d8654b6e0a5a68260de519 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:34:55 +0000
Subject: [PATCH 06/26] fic: formatting

---
 pandas/io/json/_json.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index e9ec4d7881414..b9325c485b554 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -981,9 +981,7 @@ def _read_ujson(self) -> DataFrame | Series:
             elif self.nrows:
                 lines = list(islice(self.data, self.nrows))
                 lines_json = self._combine_lines(lines)
-                obj: DataFrame | Series = self._get_object_parser(
-                    lines_json
-                    )
+                obj: DataFrame | Series = self._get_object_parser(lines_json)
             else:
                 data = ensure_str(self.data)
                 data_lines = data.split("\n")
@@ -991,9 +989,7 @@ def _read_ujson(self) -> DataFrame | Series:
                     self._combine_lines(data_lines)
                 )
         else:
-            obj: DataFrame | Series = self._get_object_parser(
-                self.data
-            )
+            obj: DataFrame | Series = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend

From bf830f5ab70060e55a6506efd1dbda070b465cdd Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:59:46 +0000
Subject: [PATCH 07/26] fix: commit checks

---
 pandas/io/json/_json.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index b9325c485b554..4ad6d3a3a4b4a 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -975,21 +975,20 @@ def _read_ujson(self) -> DataFrame | Series:
         """
         Read JSON using the ujson engine.
         """
+        obj: DataFrame | Series
         if self.lines:
             if self.chunksize:
                 obj = concat(self)
             elif self.nrows:
                 lines = list(islice(self.data, self.nrows))
                 lines_json = self._combine_lines(lines)
-                obj: DataFrame | Series = self._get_object_parser(lines_json)
+                obj = self._get_object_parser(lines_json)
             else:
                 data = ensure_str(self.data)
                 data_lines = data.split("\n")
-                obj: DataFrame | Series = self._get_object_parser(
-                    self._combine_lines(data_lines)
-                )
+                obj = self._get_object_parser(self._combine_lines(data_lines))
         else:
-            obj: DataFrame | Series = self._get_object_parser(self.data)
+            obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend

From 46369f24834f5c1abfab323a22c00aa98b1bc6be Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 08:45:59 +0000
Subject: [PATCH 08/26] feat: change type conversion

---
 pandas/io/json/_json.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 4ad6d3a3a4b4a..055a572650eb7 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -957,19 +957,13 @@ def _read_pyarrow(self) -> DataFrame:
         pyarrow_json = import_optional_dependency("pyarrow.json")
         options = None
 
-        if isinstance(self.dtype, dict):
-            pa = import_optional_dependency("pyarrow")
-            fields = []
-            for field, dtype in self.dtype.items():
-                pd_dtype = pandas_dtype(dtype)
-                if isinstance(pd_dtype, ArrowDtype):
-                    fields.append((field, pd_dtype.pyarrow_dtype))
-
-            schema = pa.schema(fields)
-            options = pyarrow_json.ParseOptions(explicit_schema=schema)
-
-        pa_table = pyarrow_json.read_json(self.data, parse_options=options)
-        return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
+        pa_table = pyarrow_json.read_json(self.data)
+        df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
+
+        if self.dtype:
+            df = df.astype(self.dtype)
+
+        return df
 
     def _read_ujson(self) -> DataFrame | Series:
         """

From 025fb30d0ec4200fb6a68eb7ad7a93e35c99f962 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 08:50:27 +0000
Subject: [PATCH 09/26] Update _json.py

---
 pandas/io/json/_json.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 055a572650eb7..2162309f44a9b 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -955,7 +955,6 @@ def _read_pyarrow(self) -> DataFrame:
         Read JSON using the pyarrow engine.
         """
         pyarrow_json = import_optional_dependency("pyarrow.json")
-        options = None
 
         pa_table = pyarrow_json.read_json(self.data)
         df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)

From e1d202d1b16d2ba8713bd83d6ce9bd90757e20d3 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 09:13:45 +0000
Subject: [PATCH 10/26] Update _json.py

---
 pandas/io/json/_json.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 2162309f44a9b..eccb69ff71018 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -32,12 +32,10 @@
 from pandas.core.dtypes.common import (
     ensure_str,
     is_string_dtype,
-    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import PeriodDtype
 
 from pandas import (
-    ArrowDtype,
     DataFrame,
     Index,
     MultiIndex,

From 3954c842f1592bef6258050594629d023d06aaf2 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 17:29:58 +0000
Subject: [PATCH 11/26] Update _json.py

---
 pandas/io/json/_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index eccb69ff71018..8152cb0875ec6 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -957,7 +957,7 @@ def _read_pyarrow(self) -> DataFrame:
         pa_table = pyarrow_json.read_json(self.data)
         df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
 
-        if self.dtype:
+        if isinstance(dict, self.dtype):
             df = df.astype(self.dtype)
 
         return df

From 2572a3287c23e4e656374ef58ee526213405d8c1 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 18:25:52 +0000
Subject: [PATCH 12/26] Update _json.py

---
 pandas/io/json/_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 8152cb0875ec6..ec65b75cf6f4d 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -957,7 +957,7 @@ def _read_pyarrow(self) -> DataFrame:
         pa_table = pyarrow_json.read_json(self.data)
         df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
 
-        if isinstance(dict, self.dtype):
+        if isinstance(self.dtype, dict):
             df = df.astype(self.dtype)
 
         return df

From 0d85bfe9e35bbfaa405fdac3c96540131e460ae3 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 19:52:18 +0000
Subject: [PATCH 13/26] Update pandas/tests/io/json/test_pandas.py

Co-authored-by: William Ayd <william.ayd@icloud.com>
---
 pandas/tests/io/json/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 0d2ef94be2d5c..d9be640d1f2d2 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2196,7 +2196,8 @@ def test_read_json_pyarrow_with_dtype(self, datapath):
         )
 
         result = df.dtypes
-        expected = Series(
+        pa = pytest.importorskip("pyarrow")
+        expected = Series([pd.ArrowDtype(pa.int32()), pd.ArrowDtype(pa.int64())], ...)
             [
                 pd.ArrowDtype.construct_from_string("int32[pyarrow]"),
                 pd.ArrowDtype.construct_from_string("int64[pyarrow]"),

From 00f2085592a70837255791ee96e0ff31c70afba0 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 20:18:39 +0000
Subject: [PATCH 14/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 0d2ef94be2d5c..d2811968c991b 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2186,9 +2186,10 @@ def test_read_json_dtype_backend(
     @td.skip_if_no("pyarrow")
     def test_read_json_pyarrow_with_dtype(self, datapath):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
+        json = '{"a": 1, "b": 2}'
 
         df = read_json(
-            datapath("io", "json", "data", "line_delimited.json"),
+            StringIO(json),
             dtype=dtype,
             lines=True,
             engine="pyarrow",

From 18f69c56243fda86cfb85086496c9ba682bbae07 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Tue, 4 Mar 2025 20:38:13 +0000
Subject: [PATCH 15/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 60751692db676..5acccb9245aa1 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2198,7 +2198,8 @@ def test_read_json_pyarrow_with_dtype(self, datapath):
 
         result = df.dtypes
         pa = pytest.importorskip("pyarrow")
-        expected = Series([pd.ArrowDtype(pa.int32()), pd.ArrowDtype(pa.int64())], ...)
+        expected = Series(
+            [pd.ArrowDtype(pa.int32()), pd.ArrowDtype(pa.int64())],
             [
                 pd.ArrowDtype.construct_from_string("int32[pyarrow]"),
                 pd.ArrowDtype.construct_from_string("int64[pyarrow]"),

From de9726699b6bd10ca5d88291c62f5257b4b7f474 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 5 Mar 2025 07:44:17 +0000
Subject: [PATCH 16/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 5acccb9245aa1..6edd41aa4df4b 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2184,7 +2184,7 @@ def test_read_json_dtype_backend(
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @td.skip_if_no("pyarrow")
-    def test_read_json_pyarrow_with_dtype(self, datapath):
+    def test_read_json_pyarrow_with_dtype(self):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
         json = '{"a": 1, "b": 2}'
 

From e87097f59c119dd0f864e6409f470196980bfa4b Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 5 Mar 2025 08:16:05 +0000
Subject: [PATCH 17/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 6edd41aa4df4b..264339c102e3f 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1,6 +1,6 @@
 import datetime
 from datetime import timedelta
-from io import StringIO
+from io import StringIO, BytesIO
 import json
 import os
 import sys
@@ -2186,10 +2186,10 @@ def test_read_json_dtype_backend(
     @td.skip_if_no("pyarrow")
     def test_read_json_pyarrow_with_dtype(self):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
-        json = '{"a": 1, "b": 2}'
+        json = b'{"a": 1, "b": 2}\n'
 
         df = read_json(
-            StringIO(json),
+            BytesIO(json),
             dtype=dtype,
             lines=True,
             engine="pyarrow",
@@ -2199,8 +2199,7 @@ def test_read_json_pyarrow_with_dtype(self):
         result = df.dtypes
         pa = pytest.importorskip("pyarrow")
         expected = Series(
-            [pd.ArrowDtype(pa.int32()), pd.ArrowDtype(pa.int64())],
-            [
+            data=[
                 pd.ArrowDtype.construct_from_string("int32[pyarrow]"),
                 pd.ArrowDtype.construct_from_string("int64[pyarrow]"),
             ],

From a855a59895f64ff88c0490680daa89edd81c42de Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 5 Mar 2025 08:25:54 +0000
Subject: [PATCH 18/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 264339c102e3f..4fb75eea63316 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1,6 +1,9 @@
 import datetime
 from datetime import timedelta
-from io import StringIO, BytesIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 import json
 import os
 import sys
@@ -2197,7 +2200,6 @@ def test_read_json_pyarrow_with_dtype(self):
         )
 
         result = df.dtypes
-        pa = pytest.importorskip("pyarrow")
         expected = Series(
             data=[
                 pd.ArrowDtype.construct_from_string("int32[pyarrow]"),

From a4b7f95e4dfbdf118ae5b3a4b2699c1ca1d7d680 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 5 Mar 2025 18:50:50 +0000
Subject: [PATCH 19/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 4fb75eea63316..1fb2f6a2cc8b6 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2187,6 +2187,7 @@ def test_read_json_dtype_backend(
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @td.skip_if_no("pyarrow")
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     def test_read_json_pyarrow_with_dtype(self):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
         json = b'{"a": 1, "b": 2}\n'

From 6406840522458e74765b1126d81af4663d2c6c30 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Thu, 6 Mar 2025 07:41:31 +0000
Subject: [PATCH 20/26] Update _json.py

---
 pandas/io/json/_json.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index ec65b75cf6f4d..6b4f6c05c3123 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -32,10 +32,12 @@
 from pandas.core.dtypes.common import (
     ensure_str,
     is_string_dtype,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import PeriodDtype
 
 from pandas import (
+    ArrowDtype,
     DataFrame,
     Index,
     MultiIndex,
@@ -953,12 +955,23 @@ def _read_pyarrow(self) -> DataFrame:
         Read JSON using the pyarrow engine.
         """
         pyarrow_json = import_optional_dependency("pyarrow.json")
-
-        pa_table = pyarrow_json.read_json(self.data)
-        df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
+        options = None
 
         if isinstance(self.dtype, dict):
-            df = df.astype(self.dtype)
+            pa = import_optional_dependency("pyarrow")
+            fields = []
+            for field, dtype in self.dtype.items():
+                pd_dtype = pandas_dtype(dtype)
+                if isinstance(pd_dtype, ArrowDtype):
+                    fields.append((field, pd_dtype.pyarrow_dtype))
+
+            schema = pa.schema(fields)
+            options = pyarrow_json.ParseOptions(
+                explicit_schema=schema, unexpected_field_behavior="infer"
+            )
+
+        pa_table = pyarrow_json.read_json(self.data, parse_options=options)
+        df = arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend)
 
         return df
 

From 4626ad743f4b4bc9c6949a051e3893ff8f6d2a84 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Fri, 7 Mar 2025 16:57:58 +0000
Subject: [PATCH 21/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 1fb2f6a2cc8b6..a023f8642ed83 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2188,7 +2188,13 @@ def test_read_json_dtype_backend(
 
     @td.skip_if_no("pyarrow")
     @pytest.mark.filterwarnings("ignore::DeprecationWarning")
-    def test_read_json_pyarrow_with_dtype(self):
+    def test_read_json_pyarrow_with_dtype(self, request):
+        pa = pytest.importorskip("pyarrow")
+        version_tuple = tuple(map(int, pa.__version__.split('.')))
+
+        if version_tuple[0] < 16:
+            request.applymarker(pytest.mark.filterwarnings("ignore::DeprecationWarning"))
+
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
         json = b'{"a": 1, "b": 2}\n'
 

From 7d7171b2acb7bfe946fb4d74efd9d39a8c20ebe2 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Fri, 7 Mar 2025 17:04:18 +0000
Subject: [PATCH 22/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index a023f8642ed83..56fb4602acb57 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2190,10 +2190,12 @@ def test_read_json_dtype_backend(
     @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     def test_read_json_pyarrow_with_dtype(self, request):
         pa = pytest.importorskip("pyarrow")
-        version_tuple = tuple(map(int, pa.__version__.split('.')))
+        version_tuple = tuple(map(int, pa.__version__.split(".")))
 
         if version_tuple[0] < 16:
-            request.applymarker(pytest.mark.filterwarnings("ignore::DeprecationWarning"))
+            request.applymarker(
+                pytest.mark.filterwarnings("ignore::DeprecationWarning")
+            )
 
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
         json = b'{"a": 1, "b": 2}\n'

From 883b84b62089bf7d8d4d77cd81ccf9516260920b Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Fri, 7 Mar 2025 17:44:22 +0000
Subject: [PATCH 23/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 56fb4602acb57..b2e603dbc0aa7 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -30,6 +30,7 @@
     read_json,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
 from pandas.io.json import ujson_dumps
 
@@ -2187,12 +2188,10 @@ def test_read_json_dtype_backend(
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @td.skip_if_no("pyarrow")
-    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     def test_read_json_pyarrow_with_dtype(self, request):
         pa = pytest.importorskip("pyarrow")
-        version_tuple = tuple(map(int, pa.__version__.split(".")))
 
-        if version_tuple[0] < 16:
+        if Version(pa.__version__) < Version("16.0"):
             request.applymarker(
                 pytest.mark.filterwarnings("ignore::DeprecationWarning")
             )

From 80881ae28fc5675646e4ccc9b0692d54c8d278b6 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Fri, 7 Mar 2025 18:09:48 +0000
Subject: [PATCH 24/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index b2e603dbc0aa7..cc14cfabd2852 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2191,7 +2191,7 @@ def test_read_json_dtype_backend(
     def test_read_json_pyarrow_with_dtype(self, request):
         pa = pytest.importorskip("pyarrow")
 
-        if Version(pa.__version__) < Version("16.0"):
+        if Version(pa.__version__) <= Version("16.0"):
             request.applymarker(
                 pytest.mark.filterwarnings("ignore::DeprecationWarning")
             )

From 8df8914a56231f3ceff461256bc76717cfdf001f Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 19 Mar 2025 08:40:07 +0000
Subject: [PATCH 25/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index cc14cfabd2852..dd270b9c4639e 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2188,14 +2188,8 @@ def test_read_json_dtype_backend(
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @td.skip_if_no("pyarrow")
-    def test_read_json_pyarrow_with_dtype(self, request):
-        pa = pytest.importorskip("pyarrow")
-
-        if Version(pa.__version__) <= Version("16.0"):
-            request.applymarker(
-                pytest.mark.filterwarnings("ignore::DeprecationWarning")
-            )
-
+    @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+    def test_read_json_pyarrow_with_dtype(self):
         dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
         json = b'{"a": 1, "b": 2}\n'
 

From 5c581fc885e32e520876f85f907e36d5232b9cf1 Mon Sep 17 00:00:00 2001
From: william larkin <56956489+will-larkin@users.noreply.github.com>
Date: Wed, 19 Mar 2025 08:43:08 +0000
Subject: [PATCH 26/26] Update test_pandas.py

---
 pandas/tests/io/json/test_pandas.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index dd270b9c4639e..fde9940ea78eb 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -30,7 +30,6 @@
     read_json,
 )
 import pandas._testing as tm
-from pandas.util.version import Version
 
 from pandas.io.json import ujson_dumps