diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index d8b97bb92c3b0b..c87143a959cf2f 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -367,6 +367,7 @@ I/O
- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
+- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`)
Plotting
^^^^^^^^
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index a6cec7ea8fb163..871bc4a8221c20 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -1,7 +1,5 @@
""" Google BigQuery support """
-import warnings
-
def _try_import():
# since pandas is a dependency of pandas-gbq
@@ -26,7 +24,7 @@ def _try_import():
def read_gbq(query, project_id=None, index_col=None, col_order=None,
reauth=False, auth_local_webserver=False, dialect=None,
location=None, configuration=None, credentials=None,
- private_key=None, verbose=None):
+ use_bqstorage_api=None, private_key=None, verbose=None):
"""
Load data from Google BigQuery.
@@ -103,6 +101,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
*New in version 0.8.0 of pandas-gbq*.
.. versionadded:: 0.24.0
+ use_bqstorage_api : bool, default False
+ Use the `BigQuery Storage API
+ `__ to
+ download query results quickly, but at an increased cost. To use this
+ API, first `enable it in the Cloud Console
+ `__.
+ You must also have the `bigquery.readsessions.create
+ `__
+ permission on the project you are billing queries to.
+
+ This feature requires version 0.10.0 or later of the ``pandas-gbq``
+ package. It also requires the ``google-cloud-bigquery-storage`` and
+ ``fastavro`` packages.
+
+ .. versionadded:: 0.25.0
private_key : str, deprecated
Deprecated in pandas-gbq version 0.8.0. Use the ``credentials``
parameter and
@@ -131,22 +144,27 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
"""
pandas_gbq = _try_import()
- if dialect is None:
- dialect = "legacy"
- warnings.warn(
- 'The default value for dialect is changing to "standard" in a '
- 'future version of pandas-gbq. Pass in dialect="legacy" to '
- "disable this warning.",
- FutureWarning,
- stacklevel=2,
- )
+ kwargs = {}
+
+ # START: new kwargs. Don't populate unless explicitly set.
+ if use_bqstorage_api is not None:
+ kwargs["use_bqstorage_api"] = use_bqstorage_api
+ # END: new kwargs
+
+ # START: deprecated kwargs. Don't populate unless explicitly set.
+ if verbose is not None:
+ kwargs["verbose"] = verbose
+
+ if private_key is not None:
+ kwargs["private_key"] = private_key
+ # END: deprecated kwargs
return pandas_gbq.read_gbq(
query, project_id=project_id, index_col=index_col,
col_order=col_order, reauth=reauth,
auth_local_webserver=auth_local_webserver, dialect=dialect,
location=location, configuration=configuration,
- credentials=credentials, verbose=verbose, private_key=private_key)
+ credentials=credentials, **kwargs)
def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
index 87ffc94f7d0469..21e0a63bf4ce79 100644
--- a/pandas/tests/io/test_gbq.py
+++ b/pandas/tests/io/test_gbq.py
@@ -8,7 +8,6 @@
import pandas as pd
from pandas import DataFrame
-import pandas.util.testing as tm
api_exceptions = pytest.importorskip("google.api_core.exceptions")
bigquery = pytest.importorskip("google.cloud.bigquery")
@@ -90,16 +89,59 @@ def make_mixed_dataframe_v2(test_size):
index=range(test_size))
-def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
- # Default dialect is changing to standard SQL. See:
- # https://github.com/pydata/pandas-gbq/issues/195
+def test_read_gbq_with_deprecated_kwargs(monkeypatch):
+ captured_kwargs = {}
- def mock_read_gbq(*args, **kwargs):
+ def mock_read_gbq(sql, **kwargs):
+ captured_kwargs.update(kwargs)
return DataFrame([[1.0]])
- monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
- with tm.assert_produces_warning(FutureWarning):
- pd.read_gbq("SELECT 1")
+ monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
+ private_key = object()
+ pd.read_gbq("SELECT 1", verbose=True, private_key=private_key)
+
+ assert captured_kwargs["verbose"]
+ assert captured_kwargs["private_key"] is private_key
+
+
+def test_read_gbq_without_deprecated_kwargs(monkeypatch):
+ captured_kwargs = {}
+
+ def mock_read_gbq(sql, **kwargs):
+ captured_kwargs.update(kwargs)
+ return DataFrame([[1.0]])
+
+ monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
+ pd.read_gbq("SELECT 1")
+
+ assert "verbose" not in captured_kwargs
+ assert "private_key" not in captured_kwargs
+
+
+def test_read_gbq_with_new_kwargs(monkeypatch):
+ captured_kwargs = {}
+
+ def mock_read_gbq(sql, **kwargs):
+ captured_kwargs.update(kwargs)
+ return DataFrame([[1.0]])
+
+ monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
+ pd.read_gbq("SELECT 1", use_bqstorage_api=True)
+
+ assert captured_kwargs["use_bqstorage_api"]
+
+
+def test_read_gbq_without_new_kwargs(monkeypatch):
+ captured_kwargs = {}
+
+ def mock_read_gbq(sql, **kwargs):
+ captured_kwargs.update(kwargs)
+ return DataFrame([[1.0]])
+
+ monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
+ pd.read_gbq("SELECT 1")
+
+ assert "use_bqstorage_api" not in captured_kwargs
@pytest.mark.single