In [None]:
import pandas as pd
import pandas_gbq
import google.auth
from arcgis.features import FeatureLayer

credentials, project = google.auth.default()

# Define Functions

In [None]:
def SQL_constructor(year, quarter, day_week, sql_query):
    trip_table= f'replica-customer.cal_nev.cal_nev_{year}_{quarter}_{day_week}_trip'
    segment_table= f'replica-customer.cal_nev.cal_nev_{year}_{quarter}_network_segments'
    geo_table= f'replica-customer.Geos.trct_2020'
    population_table= f'replica-customer.cal_nev.cal_nev_{year}_{quarter}_population'
    sql_query = sql_query.replace('trip_table', trip_table)
    sql_query = sql_query.replace('segment_table', segment_table)
    sql_query = sql_query.replace('geo_table', geo_table)
    sql_query = sql_query.replace('population_table', population_table)
    return sql_query
def get_data(year, quarter, day_week, sql_query):
    sql = SQL_constructor(year, quarter, day_week, sql_query)
    print(sql)
    data = pandas_gbq.read_gbq(sql, project_id=project, credentials=credentials)
    data['year'] = year
    data['quarter'] = quarter
    data['day_week'] = day_week
    return data
# Gets data from the TRPA server
def get_fs_data(service_url):
    feature_layer = FeatureLayer(service_url)
    query_result = feature_layer.query()
    # Convert the query result to a list of dictionaries
    feature_list = query_result.features
    # Create a pandas DataFrame from the list of dictionaries
    all_data = pd.DataFrame([feature.attributes for feature in feature_list])
    # return data frame
    return all_data
def in_basin(x, tahoe_geom):
    if x in tahoe_geom['GEO_ID_FULL'].values:
        return 'In Basin'
    else:
        return 'Out of Basin'
def trip_type(x,y):
    if (x == 'In Basin' and y == 'Out of Basin')| (x == 'Out of Basin' and y == 'In Basin'):
        return 'Visit Tahoe'
    elif x == 'In Basin' and y == 'In Basin':
        return 'Internal'
    else:
        return 'External'
def trip_purpose(x):
    if x == 'RECREATION':
        return 'RECREATION'
    else:
        return 'EVERYDAY TAHOE'
def trip_category(x,y):
    if x == 'External':
        return 'External'
    elif x == 'Visit Tahoe':
        return 'Visit Tahoe'
    elif y == 'RECREATION':
        return 'RECREATION'
    else:
        return 'EVERYDAY TAHOE'

# Import SQL Queries

In [None]:
Trip_SQL = open('Trip_Query_Param.sql', 'r').read()
Segment_SQL = open('Segment_Query_Param.sql', 'r').read()

# Bring in Trip Counts

In [None]:
Quarters = ['Q2','Q4']
Years = [2021, 2022]
day = ['saturday', 'thursday']
year_quarter_dict = {
    2021: ['Q2', 'Q4'],
    2022: ['Q4'],
    2023: ['Q2', 'Q4']
}

trip_data = pd.DataFrame()
for year in year_quarter_dict.keys():
    for quarter in year_quarter_dict[year]:
        for day_week in day:
            data = get_data(year, quarter, day_week, Trip_SQL)
            if len(data) > 0:
                trip_data = pd.concat([trip_data, data])


## Process Trip counts for grouping and in/out of basin fields

In [None]:
#Pull in tahoe geometry, filter to block groups 2020, write a function to assign in basin or out of basin and then apply it to
#origin_bgrp, destination_bgrp, and BLOCKGROUP
tahoe_geom = get_fs_data('https://maps.trpa.org/server/rest/services/Demographics/MapServer/27')
tahoe_geom = tahoe_geom[tahoe_geom['YEAR'] == 2010]
tahoe_geom = tahoe_geom[tahoe_geom['GEOGRAPHY']=='Block Group']
#drop the last four characters of the TRPAID column

tahoe_geom['GEO_ID_FULL'] = tahoe_geom['TRPAID'].str[:-4]

In [None]:
private_auto_list = ['CARPOOL', 'ON_DEMAND_AUTO', 'PRIVATE_AUTO']
trip_data['origin_basin'] = trip_data.apply(lambda x: in_basin(x['origin_bgrp'], tahoe_geom), axis=1)
trip_data['destination_basin'] = trip_data.apply(lambda x: in_basin(x['destination_bgrp'], tahoe_geom), axis=1)
trip_data['resident_basin'] = trip_data.apply(lambda x: in_basin(x['BLOCKGROUP'], tahoe_geom), axis=1)
trip_data['trip_type'] = trip_data.apply(lambda x: trip_type(x['origin_basin'], x['destination_basin']), axis=1)
trip_data['trip_purpose'] = trip_data['travel_purpose'].apply(trip_purpose)
trip_data['trip_category'] = trip_data.apply(lambda x: trip_category(x['trip_type'], x['trip_purpose']), axis=1)
trip_data['grouped_mode'] = trip_data['mode'].apply(lambda x: 'Car' if x in private_auto_list else x)
trip_data.to_csv('Trip_data.csv', index=False)

# Group trips

In [None]:
grouping_variables  = ['year', 'quarter', 'day_week', 'mode', 'travel_purpose',
                       'origin_basin', 'destination_basin', 'resident_basin',
                       'trip_type', 'trip_purpose', 'trip_category', 'grouped_mode']
trip_data_grouped = trip_data.groupby(grouping_variables).agg({'number_of_trips':'sum',
                                                               'total_distance_in_basin':'sum',
                                                               'total_distance_miles':'sum'}).reset_index()
trip_data_grouped['total_distance_in_basin_miles'] = trip_data_grouped['total_distance_in_basin'] * 0.000000621371
trip_data_grouped.to_csv('Trip_data_grouped.csv', index=False)

# Segments

In [None]:
day = ['saturday', 'thursday']
year_quarter_dict = {
    2023: ['Q2', 'Q4']
}

segment_data ={}
for year in year_quarter_dict.keys():
    for quarter in year_quarter_dict[year]:
        for day_week in day:
            segements = f'{year}_{quarter}_{day_week}'
            segment_data[segements] = get_data(year, quarter, day_week, Segment_SQL)


In [None]:
resident_pivot = {}
for key in segment_data.keys():
    #add a column called resident_basin to the segment data and assign in basin or out of basin
    segment_data[key]['resident_basin'] = segment_data[key].apply(lambda x: in_basin(x['BLOCKGROUP'], tahoe_geom), axis=1)
    segement_pivot = f'{key}_pivot'
    resident_pivot[segement_pivot] = segment_data[key].groupby(['osmid', 'resident_basin']).agg({'number_of_trips':'sum'}).reset_index()
    resident_pivot[segement_pivot] = resident_pivot[segement_pivot].pivot(index='osmid', columns='resident_basin', values='number_of_trips').reset_index()


In [None]:
modeshare_pivot = {}
for key in segment_data.keys():
    #Group by modeshare
    segment_data[key]['resident_basin'] = segment_data[key].apply(lambda x: in_basin(x['BLOCKGROUP'], tahoe_geom), axis=1)
    segement_pivot = f'{key}_pivot'
    resident_pivot[segement_pivot] = segment_data[key].groupby(['osmid', 'resident_basin']).agg({'number_of_trips':'sum'}).reset_index()
    resident_pivot[segement_pivot] = resident_pivot[segement_pivot].pivot(index='osmid', columns='resident_basin', values='number_of_trips').reset_index()


In [None]:
for key in resident_pivot.keys():
    resident_pivot[key].to_csv(f'{key}.csv', index=False)