In [1]:
from google.cloud import bigquery

# Create a "Client" object
client = bigquery.Client()

In [2]:
# Construct a reference to the "openaq" dataset
dataset_ref = client.dataset("openaq", project="bigquery-public-data")

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)

# Construct a reference to the "global_air_quality" table
table_ref = dataset_ref.table("global_air_quality")

# API request - fetch the table
table = client.get_table(table_ref)

# Preview the first five lines of the "global_air_quality" table
client.list_rows(table, max_results=5).to_dataframe()

Unnamed: 0,location,city,country,pollutant,value,timestamp,unit,source_name,latitude,longitude,averaged_over_in_hours,location_geom
0,"Płock, ul. Reja",Płock,PL,bc,0.3,2022-05-09 18:00:00+00:00,µg/m³,GIOS,1.0,52.550938,19.709791,POINT(52.550938 1)
1,"Legnica, al. Rzeczypospolitej",Legnica,PL,bc,0.67909,2022-05-16 05:00:00+00:00,µg/m³,GIOS,1.0,51.204503,16.180513,POINT(51.204503 1)
2,"Włocławek, ul. Okrzei",Włocławek,PL,bc,2.64,2022-04-29 06:00:00+00:00,µg/m³,GIOS,1.0,52.658467,19.059314,POINT(52.658467 1)
3,"Zielonka, Bory Tucholskie",Zielonka,PL,bc,0.41,2022-05-04 08:00:00+00:00,µg/m³,GIOS,1.0,53.662136,17.933986,POINT(53.662136 1)
4,"Kędzierzyn-Koźle, ul. Śmiałego",Kędzierzyn-Koźle,PL,bc,1.32639,2022-05-14 21:00:00+00:00,µg/m³,GIOS,1.0,50.349608,18.236575,POINT(50.349608 1)


In [5]:
# Query to select countries with units of "ppm"
first_query =   """
                SELECT country
                FROM `bigquery-public-data.openaq.global_air_quality`
                WHERE unit = 'ppm'
                """

# Set up the query (cancel the query if it would use too much of 
# your quota, with the limit set to 10 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9) # 10Mb
first_query_job = client.query(first_query, job_config=safe_config)

# API request - run the query, and return a pandas DataFrame
    first_results = first_query_job.to_dataframe()

# View top few rows of results
print(first_results.head())

  country
0      AR
1      IL
2      AR
3      IL
4      AR


In [6]:
first_results

Unnamed: 0,country
0,AR
1,IL
2,AR
3,IL
4,AR
...,...
1286104,ZA
1286105,ZA
1286106,ZA
1286107,ZA


In [11]:
# Query to select and group countries with avg units of "ppm" > 0.2
query = """
        SELECT country, avg(value) as avg_value
        FROM `bigquery-public-data.openaq.global_air_quality`
        WHERE unit = 'ppm'
        GROUP BY country
        HAVING avg(value) > .2
        """

# Set up the query (cancel the query if it would use too much of 
# your quota, with the limit set to 10 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9) # 10Mb
query_job = client.query(query, job_config=safe_config)

# API request - run the query, and return a pandas DataFrame
results = query_job.to_dataframe()

# View top few rows of results
results

Unnamed: 0,country,avg_value
0,AR,0.210005
1,PE,0.207729
2,EC,17.168485
3,AU,12.318521
4,BR,0.492593
5,MX,0.988086
