In [None]:
import os
import pandas as pd
from loguru import logger

from google.cloud import bigquery
from google.oauth2 import service_account

# BigQuery API. Работа с таблицами (создание, удаление, схемы)

In [None]:
# BigQuery connect
big_query_key_path = "XXXXXXX.json"
project_id = 'my_project'
dataset="test1"
table_name="table_test"
table_id=f"{project_id}.{dataset}.{table_name}"

def get_client_and_table(dataset):
    key_path = big_query_key_path
    credentials = service_account.Credentials.from_service_account_file(
        key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
    )

    client = bigquery.Client(credentials=credentials, project=credentials.project_id,)
    return client, credentials

client, credentials = get_client_and_table(dataset=dataset)

In [None]:
# list tables in dataset
tables = client.list_tables(f'{project_id}.{dataset}')
for table_item in tables:
    print(table_item.table_id)

In [None]:
# get count rows in table
table = client.get_table(table_id)
print('{} rows in {}'.format(table.num_rows,table.table_id))

In [None]:
# get schema and find field
table = client.get_table(table_id)
for field in table.schema:
    if 'count' in field.name:
        print(field)
    print(field)

In [None]:
# Create new blank table with schema
# https://medium.com/pipeline-a-data-engineering-resource/how-to-create-nested-schemas-in-python-using-the-google-bigquery-api-8d86b1602cbd
# https://cloud.google.com/bigquery/docs/schemas

schema = [
    bigquery.SchemaField('headers', 'RECORD', mode='NULLABLE', fields=[
        bigquery.SchemaField('tag', 'STRING', 'NULLABLE'), 
        bigquery.SchemaField('last_date', 'STRING', 'NULLABLE'), 
        bigquery.SchemaField('server', 'STRING', 'NULLABLE'), 
        bigquery.SchemaField('length', 'INTEGER', 'NULLABLE')]),
    bigquery.SchemaField('group_id', 'INTEGER', mode='NULLABLE'),
    bigquery.SchemaField('json', 'RECORD', mode='NULLABLE', fields=[
        bigquery.SchemaField('field1', 'STRING', 'NULLABLE'), 
        bigquery.SchemaField('field2', 'INTEGER', 'NULLABLE'), 
        bigquery.SchemaField('name', 'STRING', 'NULLABLE'), 
        bigquery.SchemaField('id', 'STRING', 'NULLABLE')]),
    bigquery.SchemaField('event', 'STRING', mode='NULLABLE'),
    bigquery.SchemaField('date_upload', 'DATETIME', mode='NULLABLE'),
]

table = bigquery.Table(table_id, schema=schema)
table = client.create_table(table)
print(
    "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)

In [None]:
# Delete Table
client.delete_table(table_id, not_found_ok=True)  
print("Deleted table '{}'.".format(table_id))

In [None]:
# access to rows
table = client.get_table(table_id)
df = client.list_rows(table).to_dataframe()
df.head(3)

In [None]:
# Delete ALL ROWS from table BigQuery
query = (f'DELETE FROM {table_id} WHERE true')
query_job = client.query(query)
table = client.get_table(table_id)
print('{} rows in {}'.format(table.num_rows,table.table_id))

# BigQuery API. QUERY

In [None]:
def __load_df_from_bq(table_id, query=None):
    if query is None:
        query = f"SELECT * FROM {table_id}"
    result = client.query(query).to_dataframe()
    logger.info("Finish load {}, {}".format(dataset, result.shape))
    return result

In [None]:
def get_none_query(table_id):
    return __load_df_from_bq(table_id=table_id)

In [None]:
def get_is_query(table_id):
    query = (
        """
        SELECT *
        FROM my_project.test1.table_test 
        WHERE date_diff(current_date(),date_upload,DAY) <= 7
        LIMIT 1000
        """
    )    
    return __load_df_from_bq_2(table_id=table_id, query=query)

In [None]:
df_standart=get_none_query(table_id)
df_new_query=get_is_query(table_id)