In [None]:
import pandas as pd
import matplotlib
from google.cloud import bigquery

In [None]:
PROJECT = "yhcr-prd-phm-bia-core"
CLIENT = bigquery.Client(project=PROJECT)

def run_sql_query(sql, destination=None):
    """Quick way to run sql queries with bigquery library
    
    Can be used to run sql queries exactly as they would run using the 
    BigQuery SQL Workspace. By setting the "destination" argument, the results
    of a query can be stored as a new table/overwrite an existing table at the
    table id specified.

    Args:
        sql: string, the SQL command to be run
        destination: string (default: None), a table id where the results
            of the SQL command will be stored, if None then results aren't 
            stored

    Returns:
        bigquery.table.Table, containing table object of the stored results of 
            the query if destination argument isn't none
        -- otherwise --
        bigquery.job.queryjob, if no destination is provided and results aren't
            stored

    Example:
    ```python
    # queries example table and stores results in new table
    table = run_sql_query(
        sql = "SELECT * FROM `example.table.id`",
        destination = "destination.for.results"
    )
    
    # caps "value" column at 100
    query = run_sql_query(
        sql = "UPDATE example.table.id SET value = 100 WHERE value > 100"
    )
    ```
    """
    
    if destination:
        job_config = bigquery.QueryJobConfig(
            destination=destination, 
            write_disposition="WRITE_TRUNCATE"
        )
    else:
        job_config=None
    
    query_job = CLIENT.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.
    
    if destination:
        result_table = CLIENT.get_table(destination)
        return result_table
    else:
        return query_job


In [None]:
project_id = "yhcr-prd-phm-bia-core"
asd_snomed_codes = [
    "35919005", "442314000", "23560001", "231536004", "718393002", "408856003", 
    "373618009", "71961003", "702450004", "723332005", "712884004", 
    "39951000119105", "870307006", "870308001", "870305003", "870306002",
    "870303005", "870304004", "870269009", "870270005", "870268001", "870266002",
    "870267006", "870264004", "870265003", "870262000", "870263005", "870260008",
    "870261007", "870280009", "870282001", "68618008", "432091002", "708037001",
    "719600006", "766824003", "722287002", "771512003", "733623005", "43614003",
    "702732007", "408857007", "783089006", "191692007", "191693002", "191690004",
    "771448004", "770790004", "191689008" 
]

sql_asd_snomed_list = "(" + ", ".join([f'"{code}"' for code in asd_snomed_codes]) + ")"

def return_yr_date_diff_sql(from_date, to_date, var_name):
    diff_fn = f"DATE_DIFF({to_date}, {from_date}, DAY) / 365.25"
    return f"FLOOR({diff_fn}) AS {var_name}"

age = return_yr_date_diff_sql("demo.DOB_formatted", "CURRENT_DATE()", "age")
age_at_diagnosis = return_yr_date_diff_sql("demo.DOB_formatted", 
                                           "diag.diagnosis_date", 
                                           "age_at_diagnosis")

ethnic_group_regex = "REGEXP_EXTRACT(demo.census_ethnicity, r'^(.+?):')"
ethnic_group = f"""
    CASE
        WHEN {ethnic_group_regex} IS NOT NULL THEN {ethnic_group_regex}
        ELSE "Unknown"
    END AS ethnic_group
"""

ethnic_subgroup_regex = "REGEXP_EXTRACT(demo.census_ethnicity, r':(.+?)-')" 
ethnic_subgroup = f"""
    CASE
        WHEN {ethnic_subgroup_regex} IS NOT NULL THEN {ethnic_subgroup_regex}
        ELSE "Other"
    END AS ethnic_subgroup
"""

sex = """
    CASE
        WHEN demo.remapped_gender = 45766034 THEN "Male"
        WHEN demo.remapped_gender = 45766035 THEN "Female"
        ELSE "Unknown"
    END AS sex
"""
project = "yhcr-prd-phm-bia-core"
srcode_table = f"`{project}..tbl_SRCode`"
demographics_table = f"`{project}.CY_STAGING_DATABASE.src_DemoGraphics_MASTER`"
lsoas_table = f"`{project}.CY_LOOKUPS.tbl_lsoa_boundaries`"
wards_table = f"`{project}.CY_LOOKUPS.tbl_lsoa_to_ward`"
# build SQL query
sql = f"""
    WITH diag AS (
        SELECT person_id, MIN(src_dateeventrecorded) AS diagnosis_date
        FROM {srcode_table}
        WHERE src_snomedcode IN {sql_asd_snomed_list} 
        GROUP BY person_id
    )
    SELECT diag.*, {age}, {age_at_diagnosis}, {ethnic_group}, 
        {ethnic_subgroup}, {sex}, lsoas.lat_long, lsoas.lsoa_name, 
        wards.ward_name, wards.ward_code
    FROM diag
    LEFT JOIN {demographics_table} demo
    ON diag.person_id = demo.person_id
    LEFT JOIN {lsoas_table} lsoas
    ON lsoas.LSOA_code = demo.LSOA
    LEFT JOIN {wards_table} wards
    ON wards.LSOA_code = demo.LSOA
"""

asd_master_tab = f"{project_id}.CY_ASD_data.ASD_master_tab"
query = run_sql_query(sql=sql, destination=asd_master_tab)

In [None]:
def return_add_sub_group_sql(table, colname, condition=False):
    if condition:
        condition = f"WHERE {condition}"
    else:
        condition = ""
    sql = f"""
        WITH group_tab AS (
            SELECT person_id
            FROM {table}
            {condition}
            GROUP BY person_id
        )
        SELECT asd.*, IF(group_tab.person_id IS NULL, false, true) AS {colname}
        FROM {asd_master_tab} asd
        LEFT JOIN group_tab
        ON group_tab.person_id = asd.person_id
    """
    return sql

In [None]:
exclusions_tab = f"`{project}.CY_MYSPACE_SR.exclusions`"

sql = return_add_sub_group_sql(exclusions_tab, 
                               "perm_exclusion", 
                               "PermanentExclusionCount > 0")
run_sql_query(sql, destination=asd_master_tab)

In [None]:
sql = return_add_sub_group_sql(exclusions_tab, 
                               "fixed_term_exclusion", 
                               'Category = "FIXD" OR Category = "LNCH"')
run_sql_query(sql, destination=asd_master_tab)

In [None]:
cpp_tab = "yhcr-prd-phm-bia-core.CY_FDM_ChildrensSocialCare.CPP"
cpp_sql = return_add_sub_group_sql(cpp_tab,  
                                   "has_protection_plan")  
run_sql_query(cpp_sql, destination=asd_master_tab)

cic_tab = "yhcr-prd-phm-bia-core.CY_FDM_ChildrensSocialCare.CiC"
cic_sql = return_add_sub_group_sql(cic_tab,  
                                   "in_care") 
run_sql_query(cic_sql, destination=asd_master_tab)

cinp_tab = "yhcr-prd-phm-bia-core.CY_FDM_ChildrensSocialCare.CiNP"
cinp_sql = return_add_sub_group_sql(cinp_tab,  
                                    "child_in_need")
run_sql_query(cinp_sql, destination=asd_master_tab)

In [None]:
factors_tab = f"{project_id}.CY_FDM_ChildrensSocialCare.tbl_FactorLookup"
assessments_tab = f"{project_id}.CY_FDM_ChildrensSocialCare.Assessments"
sql = f"""
    SELECT asd.*, factor.Category, factor.Subcategory, 
    FROM `{assessments_tab}` assess
    LEFT JOIN {factors_tab} factor
    ON factor.FactorID = assess.FactorID
    LEFT JOIN {asd_master_tab} asd
    ON assess.person_id = asd.person_id
    WHERE asd.person_id IS NOT NULL
"""
social_care_assessments_tab = f"{project_id}.CY_ASD_data.ASD_CSC_assessments"
run_sql_query(sql, destination=social_care_assessments_tab)

In [None]:
eyfs_cols = ["COMG01", "COMG02", "COMG03", "PHYG04", "PHYG05", "PSEG06", 
             "PSEG07", "PSEG08", "LITG09", "LITG10", "MATG11", "MATG12", 
             "UTWG13", "UTWG14", "UTWG15", "EXPG16",  "EXPG17"]
eyfs_score_cols = [f"CAST({col} AS INT) AS {col}_score"  
                   for col in eyfs_cols]
eyfs_score_cols_sql = ", ".join(eyfs_score_cols)
eyfs_cat_cols = [(f'CASE WHEN {col} = "1" THEN "Emerging"' 
                  f'WHEN {col} = "2" THEN "Expected"' 
                  f'WHEN {col} = "3" THEN "Exceeding"'  
                  f'ELSE NULL END AS {col}_cat') 
                 for col in eyfs_cols]
eyfs_cat_cols_sql = ", ".join(eyfs_cat_cols)
not_null_sql = ('WHERE '  + 
                ' AND '.join([f'{col} IS NOT NULL' for col in eyfs_cols]) + ' AND ' +
                ' AND '.join([f'{col} != "A"' for col in eyfs_cols]))
eyfsp_tab = "yhcr-prd-phm-bia-core.CY_FDM_DepartmentForEducation.src_EYFSP"
project_id = "yhcr-prd-phm-bia-core"
asd_master_tab = f"{project_id}.CY_ASD_data.ASD_master_tab"
eyfs_asd_sql = f"""
    SELECT eyfs.person_id, {eyfs_score_cols_sql}, {eyfs_cat_cols_sql}, GLD, 
        IMD, IDACIRank, Gender, asd.age_at_diagnosis,
        IF(asd.person_id IS NULL, false, true) AS asd_diagnosis
    FROM `{eyfsp_tab}` eyfs
    LEFT JOIN {asd_master_tab} asd 
    ON asd.person_id = eyfs.person_id
    {not_null_sql}
"""
asd_eyfs_tab = f"{project_id}.CY_ASD_data.EYFSP_ASD_tab"
run_sql_query(eyfs_asd_sql, destination=asd_eyfs_tab)

In [None]:
asd_master_tab = f"{project_id}.CY_ASD_data.ASD_master_tab"
ks4_tab = "yhcr-prd-phm-bia-core.CY_FDM_DepartmentForEducation.src_KS4_pupil"
ks4_view_sql = f"""
    CREATE VIEW `{project_id}.CY_ASD_data.KS4_view` AS (
        SELECT ks4.person_id, FLOOR(GCSEAC) AS gcse_ac, GCSEAC > 0 AS one_ac, 
            FLOOR(GCSEAG) AS gcse_ag, GCSEAG > 0 AS one_ag, SENActPlus, 
            SENSchAction, FSM, LangNotEng,
            IF(asd.person_id IS NULL, false, true) AS asd_diagnosis
        FROM `{ks4_tab}` as ks4
        LEFT JOIN {asd_master_tab} asd 
        ON asd.person_id = ks4.person_id
        WHERE GCSEAC IS NOT NULL AND GCSEAG IS NOT NULL
    )
"""
run_sql_query(ks4_view_sql)