Skip to content

Commit

Permalink
ENH: Add parameter to download BigQuery results with the BigQuery Sto…
Browse files Browse the repository at this point in the history
…rage API (pandas-dev#26104)

Adds new `use_bqstorage_api` parameter to `read_gbq`. This can speed up
downloads of large data frames.
  • Loading branch information
tswast authored and yhaque1213 committed Apr 22, 2019
1 parent 35fcd72 commit 15b7533
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 21 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Expand Up @@ -367,6 +367,7 @@ I/O
- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`)

Plotting
^^^^^^^^
Expand Down
44 changes: 31 additions & 13 deletions pandas/io/gbq.py
@@ -1,7 +1,5 @@
""" Google BigQuery support """

import warnings


def _try_import():
# since pandas is a dependency of pandas-gbq
Expand All @@ -26,7 +24,7 @@ def _try_import():
def read_gbq(query, project_id=None, index_col=None, col_order=None,
reauth=False, auth_local_webserver=False, dialect=None,
location=None, configuration=None, credentials=None,
private_key=None, verbose=None):
use_bqstorage_api=None, private_key=None, verbose=None):
"""
Load data from Google BigQuery.
Expand Down Expand Up @@ -103,6 +101,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
*New in version 0.8.0 of pandas-gbq*.
.. versionadded:: 0.24.0
use_bqstorage_api : bool, default False
Use the `BigQuery Storage API
<https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
download query results quickly, but at an increased cost. To use this
API, first `enable it in the Cloud Console
<https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
You must also have the `bigquery.readsessions.create
<https://cloud.google.com/bigquery/docs/access-control#roles>`__
permission on the project you are billing queries to.
This feature requires version 0.10.0 or later of the ``pandas-gbq``
package. It also requires the ``google-cloud-bigquery-storage`` and
``fastavro`` packages.
.. versionadded:: 0.25.0
private_key : str, deprecated
Deprecated in pandas-gbq version 0.8.0. Use the ``credentials``
parameter and
Expand Down Expand Up @@ -131,22 +144,27 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
"""
pandas_gbq = _try_import()

if dialect is None:
dialect = "legacy"
warnings.warn(
'The default value for dialect is changing to "standard" in a '
'future version of pandas-gbq. Pass in dialect="legacy" to '
"disable this warning.",
FutureWarning,
stacklevel=2,
)
kwargs = {}

# START: new kwargs. Don't populate unless explicitly set.
if use_bqstorage_api is not None:
kwargs["use_bqstorage_api"] = use_bqstorage_api
# END: new kwargs

# START: deprecated kwargs. Don't populate unless explicitly set.
if verbose is not None:
kwargs["verbose"] = verbose

if private_key is not None:
kwargs["private_key"] = private_key
# END: deprecated kwargs

return pandas_gbq.read_gbq(
query, project_id=project_id, index_col=index_col,
col_order=col_order, reauth=reauth,
auth_local_webserver=auth_local_webserver, dialect=dialect,
location=location, configuration=configuration,
credentials=credentials, verbose=verbose, private_key=private_key)
credentials=credentials, **kwargs)


def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
Expand Down
58 changes: 50 additions & 8 deletions pandas/tests/io/test_gbq.py
Expand Up @@ -8,7 +8,6 @@

import pandas as pd
from pandas import DataFrame
import pandas.util.testing as tm

api_exceptions = pytest.importorskip("google.api_core.exceptions")
bigquery = pytest.importorskip("google.cloud.bigquery")
Expand Down Expand Up @@ -90,16 +89,59 @@ def make_mixed_dataframe_v2(test_size):
index=range(test_size))


def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
# Default dialect is changing to standard SQL. See:
# https://github.com/pydata/pandas-gbq/issues/195
def test_read_gbq_with_deprecated_kwargs(monkeypatch):
captured_kwargs = {}

def mock_read_gbq(*args, **kwargs):
def mock_read_gbq(sql, **kwargs):
captured_kwargs.update(kwargs)
return DataFrame([[1.0]])

monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
with tm.assert_produces_warning(FutureWarning):
pd.read_gbq("SELECT 1")
monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
private_key = object()
pd.read_gbq("SELECT 1", verbose=True, private_key=private_key)

assert captured_kwargs["verbose"]
assert captured_kwargs["private_key"] is private_key


def test_read_gbq_without_deprecated_kwargs(monkeypatch):
captured_kwargs = {}

def mock_read_gbq(sql, **kwargs):
captured_kwargs.update(kwargs)
return DataFrame([[1.0]])

monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
pd.read_gbq("SELECT 1")

assert "verbose" not in captured_kwargs
assert "private_key" not in captured_kwargs


def test_read_gbq_with_new_kwargs(monkeypatch):
captured_kwargs = {}

def mock_read_gbq(sql, **kwargs):
captured_kwargs.update(kwargs)
return DataFrame([[1.0]])

monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
pd.read_gbq("SELECT 1", use_bqstorage_api=True)

assert captured_kwargs["use_bqstorage_api"]


def test_read_gbq_without_new_kwargs(monkeypatch):
captured_kwargs = {}

def mock_read_gbq(sql, **kwargs):
captured_kwargs.update(kwargs)
return DataFrame([[1.0]])

monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq)
pd.read_gbq("SELECT 1")

assert "use_bqstorage_api" not in captured_kwargs


@pytest.mark.single
Expand Down

0 comments on commit 15b7533

Please sign in to comment.