In [1]:
from google.cloud import bigquery

# Create a "Client" object
client = bigquery.Client()

In [2]:
# Construct a reference to the "world_bank_intl_education" dataset
dataset_ref = client.dataset("world_bank_intl_education", project="bigquery-public-data")

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)

# Construct a reference to the "international_education" table
table_ref = dataset_ref.table("international_education")

# API request - fetch the table
table = client.get_table(table_ref)

# Preview the first five lines of the "international_education" table
client.list_rows(table, max_results=5).to_dataframe()

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,value,year
0,Suriname,SUR,"Population, ages 0-14 (% of total)",SP.POP.0014.TO.ZS,26.69082,2016
1,Tajikistan,TJK,"Gross enrolment ratio, pre-primary, male (%)",SE.PRE.ENRR.MA,11.7556,2016
2,Tajikistan,TJK,Percentage of teachers in pre-primary educatio...,UIS.TRTP.0,100.0,2016
3,Tajikistan,TJK,Population of the official age for pre-primary...,SP.PRE.TOTL.FE.IN,397896.0,2016
4,Iraq,IRQ,Population growth (annual %),SP.POP.GROW,2.965164,2016


In [4]:
# Which countries spend the largest fraction of GDP on education?
country_spend_pct_query = """
                          SELECT country_name, avg(value) as avg_ed_spending_pct
                          FROM `bigquery-public-data.world_bank_intl_education.international_education`
                          WHERE indicator_code = 'SE.XPD.TOTL.GD.ZS'AND year BETWEEN 2010 AND 2017
                          GROUP BY country_name
                          ORDER BY avg_ed_spending_pct DESC
                          """

# Set up the query (cancel the query if it would use too much of 
# your quota, with the limit set to 1 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
country_spend_pct_query_job = client.query(country_spend_pct_query, job_config=safe_config)

# API request - run the query, and return a pandas DataFrame
country_spending_results = country_spend_pct_query_job.to_dataframe()

# View top few rows of results
print(country_spending_results)

                 country_name  avg_ed_spending_pct
0                        Cuba            12.837270
1       Micronesia, Fed. Sts.            12.467750
2             Solomon Islands            10.001080
3                     Moldova             8.372153
4                     Namibia             8.349610
..                        ...                  ...
152                  Cambodia             1.706404
153        West Bank and Gaza             1.503760
154               South Sudan             1.409726
155                    Monaco             1.409606
156  Central African Republic             1.214010

[157 rows x 2 columns]


In [9]:
countries = country_spending_results.country_name.to_list()
avg = country_spending_results.avg_ed_spending_pct.to_list()
dir_countries = dict(zip(countries,avg))

In [18]:
print(countries.index('United States'),' , ',dir_countries['United States'])

52  ,  5.2300701141357395


In [19]:
code_count_query = """
                   SELECT indicator_code, indicator_name, COUNT(1) AS num_rows
                   FROM `bigquery-public-data.world_bank_intl_education.international_education`
                   WHERE year = 2016
                   GROUP BY indicator_name, indicator_code
                   HAVING COUNT(1) >= 175
                   ORDER BY COUNT(1) DESC
                   """

# Set up the query
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
code_count_query_job = client.query(code_count_query, job_config=safe_config)

# API request - run the query, and return a pandas DataFrame
code_count_results = code_count_query_job.to_dataframe()

# View top few rows of results
print(code_count_results.head())

      indicator_code                      indicator_name  num_rows
0        SP.POP.GROW        Population growth (annual %)       232
1        SP.POP.TOTL                   Population, total       232
2     IT.NET.USER.P2     Internet users (per 100 people)       223
3  SP.POP.0014.TO.ZS  Population, ages 0-14 (% of total)       213
4  SP.POP.1564.MA.IN        Population, ages 15-64, male       213
