# RTP Update

## Setup

In [3]:
# import packages
import pandas as pd
import pathlib
from pathlib import Path
import os
import arcpy
from utils import *
import numpy as np
import pickle

# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows    = 999

# my workspace 
workspace = r"C:\Users\mbindl\Desktop\Workspace.gdb"
# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir = local_path.parents[0] / 'data'
# folder to save processed data
out_dir  = local_path.parents[0] / 'data/processed_data'
# workspace gdb for stuff that doesnt work in memory
# gdb = os.path.join(local_path,'Workspace.gdb')
gdb = workspace
# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# get parcels from the database
# network path to connection files
filePath = "F:/GIS/PARCELUPDATE/Workspace/"
# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
sdeTabular = os.path.join(filePath, "Tabular.sde")
sdeEdit    = os.path.join(filePath, "Edit.sde")

# schema for the final output
final_schema = ['APN', 'Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt',
                'ZONING_ID', 'EXISTING_LANDUSE', 'COUNTY', 'JURISDICTION', 'OWNERSHIP_TYPE',
                'IPES_SCORE', 'VHR', 'BLOCK_GROUP', 'TAZ', 'RETIRED', 
                'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE','EXISTING_LANDUSE',  
                'WITHIN_BONUS_UNIT_BNDRY', 'WITHIN_TRPA_BNDY', 
                'MAX_RESIDENTIAL_UNITS', 'MAX_COMMERCIAL_FLOOR_AREA', 'MAX_TAU_UNITS',
                'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE']

# Pickle variables
# part 1 - spatial join categories, occupancy rates, and parcels
parcel_pickle_part1    = data_dir / 'parcel_pickle1.pkl'

In [None]:
# get parcel data from Edit SDE
parcel_db = Path(sdeEdit) / "SDE.Parcel\\SDE.Parcel_History_Attributed"
# query 2022 rows
sdf_units = pd.DataFrame.spatial.from_featureclass(parcel_db)
sdf_units = sdf_units.loc[sdf_units['YEAR'] == 2022]
sdf_units.spatial.sr = sr

# # get parcel level data from Collection SDE
# vhr feature layer polygons 
vhr_db = Path(sdeCollect) / "SDE.Parcel\\SDE.Parcel_VHR"
sdf_vhr = pd.DataFrame.spatial.from_featureclass(vhr_db)
sdf_vhr.spatial.sr = sr
# filter vhr layer to active status
sdf_vhr = sdf_vhr.loc[sdf_vhr['Status'] == 'Active']


# get parcel level data from LTinfo
dfIPES       = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetParcelIPESScores/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
dfLCV_LTinfo = pd.read_json('https://www.laketahoeinfo.org/WebServices/GetParcelsByLandCapability/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476')
dfRetired    = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetAllParcels/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
dfBankedDev  = pd.read_json('https://www.laketahoeinfo.org/WebServices/GetBankedDevelopmentRights/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476')
dfTransacted = pd.read_json('https://www.laketahoeinfo.org/WebServices/GetTransactedAndBankedDevelopmentRights/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476')
dfAllParcels = pd.read_json('https://www.laketahoeinfo.org/WebServices/GetAllParcels/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476')

# get use tables 
# zoning data
sde_engine = get_conn('sde')
with sde_engine.begin() as conn:
    df_uses    = pd.read_sql("SELECT * FROM sde.SDE.PermissibleUses", conn)
    df_special = pd.read_sql("SELECT * FROM sde.SDE.Special_Designation", conn)

tab_engine = get_conn('sde_tabular')
with tab_engine.begin() as conn:
    df_permit = pd.read_sql("SELECT * FROM sde.SDE.Accela_Record_Details", conn)


In [None]:
def get_conn(db):
    # Get database user and password from environment variables on machine running script
    db_user             = os.environ.get('DB_USER')
    db_password         = os.environ.get('DB_PASSWORD')

    # driver is the ODBC driver for SQL Server
    driver              = 'ODBC Driver 17 for SQL Server'
    # server names are
    sql_12              = 'sql12'
    sql_14              = 'sql14'
    # make it case insensitive
    db = db.lower()
    # make sql database connection with pyodbc
    if db   == 'sde_tabular':
        connection_string = f"DRIVER={driver};SERVER={sql_12};DATABASE={db};UID={db_user};PWD={db_password}"
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = create_engine(connection_url)
    elif db == 'tahoebmpsde':
        connection_string = f"DRIVER={driver};SERVER={sql_14};DATABASE={db};UID={db_user};PWD={db_password}"
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = create_engine(connection_url)
    elif db == 'sde':
        connection_string = f"DRIVER={driver};SERVER={sql_12};DATABASE={db};UID={db_user};PWD={db_password}"
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = create_engine(connection_url)
    # else return None
    else:
        engine = None
    # connection file to use in pd.read_sql
    return engine

In [None]:

# function to get where Zoningin_ID Use_Type = Multi-Family and Density
def get_mf_zones(df):
    columns_to_keep = ['Zoning_ID', 'Use_Type', 'Density']
    # filter Use_Type to Multiple Family Dwelling
    df = df.loc[df['Use_Type'] == 'Multiple Family Dwelling']
    return df[columns_to_keep]

def get_sf_zones(df):
    columns_to_keep = ['Zoning_ID', 'Use_Type', 'Density']
    # filter Use_Type to Single Family Dwelling
    df = df.loc[df['Use_Type'] == 'Single Family Dwelling']
    return df[columns_to_keep]

def get_sf_only_zones(df):
    columns_to_keep = ['Zoning_ID', 'Use_Type', 'Density']
    # filter Use_Type to Single Family Dwelling and not Multiple Family Dwelling
    dfMF = get_mf_zones(df)
    dfSF = get_sf_zones(df)
    df = dfSF.loc[~dfSF['Zoning_ID'].isin(dfMF['Zoning_ID'])]
    return df[columns_to_keep]

def get_recieving_zones(df):
    columns_to_keep = ['Zoning_ID', 'Use_Type', 'Density']
    # filter Use_Type to Single Family Dwelling and not Multiple Family Dwelling
    dfMF = get_mf_zones(df)
    dfSF = get_sf_zones(df)
    df = dfSF.loc[~dfSF['Zoning_ID'].isin(dfMF['Zoning_ID'])]
    return df[columns_to_keep]

def get_sending_zones(df):
    columns_to_keep = ['Zoning_ID', 'Use_Type', 'Density']
    # filter Use_Type to Single Family Dwelling and not Multiple Family Dwelling
    dfMF = get_mf_zones(df)
    dfSF = get_sf_zones(df)
    df = dfMF.loc[~dfMF['Zoning_ID'].isin(dfSF['Zoning_ID'])]
    return df[columns_to_keep]


dfMF = get_mf_zones(df_uses)
dfMF.Use_Type.value_counts()

dfSF = get_sf_zones(df_uses)
dfSF.Use_Type.value_counts()

dfSF_only = get_sf_only_zones(df_uses)
dfSF_only.Use_Type.value_counts()

dfSend    = get_send_only()
dfRecieve = get_recieve_only()

In [None]:
# parcel development layer polygons
parcel_db = Path(sdeEdit) / "SDE.Parcel\\SDE.Parcel_History_Attributed"
# query 2022 rows
sdf_units = pd.DataFrame.spatial.from_featureclass(parcel_db)
sdf_units = sdf_units.loc[sdf_units['YEAR'] == 2022]
sdf_units.spatial.sr = sr

In [None]:
"""
ParcelTables_to_ParcelFeatures.py
Created: March 13th, 2020
Last Updated: June 14th, 2023
Mason Bindl, Tahoe Regional Planning Agency
Amy Fish, Tahoe Regional Planning Agency

This python script was developed to move data from 
Accela, LTinfo, and BMP databases to TRPA's dynamic Enterprise Geodatabase.
This ETL process updates parcel based feature classes for Development Rights, BMPs, LCVs, LCCs, 
Historic Parcels, Securities, Grading Exceptions, Deed Restrictions, and Soils Hydro Projects

This script uses Python 3.x and was designed to be used with 
the default ArcGIS Pro python enivorment ""C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/python.exe"", with
no need for installing new libraries.

This script runs nightly at 10pm on Arc10 from scheduled task "ParcelETL"
"""
#--------------------------------------------------------------------------------------------------------#
# import packages and modules
# base packages
import os
import sys
import logging
from datetime import datetime
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
# ESRI packages
import arcpy
from arcgis.features import FeatureSet, GeoAccessor, GeoSeriesAccessor
# email packages
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# set overwrite to true
arcpy.env.overwriteOutput = True
arcpy.env.workspace = "C:\GIS\Scratch.gdb"

# in memory output file path
wk_memory = "memory" + "\\"
# set workspace and sde connections 
working_folder = "C:\GIS"
workspace      = "C:\GIS\Scratch.gdb"

# network path to connection files
filePath = "C:\\GIS\\DB_CONNECT"
# database file path 
sdeBase = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
# Feature dataset to unversion and register as version
fdata = sdeCollect + "\\sde_collection.SDE.Parcel"
# string to use in updaetSDE function
sdeString  = fdata + "\\sde_collection.SDE."

# connect to bmp SQL dataabase
connection_string = "DRIVER={ODBC Driver 17 for SQL Server};SERVER=sql14;DATABASE=tahoebmpsde;UID=sde;PWD=staff"
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
engine = create_engine(connection_url)

##--------------------------------------------------------------------------------------#
## EMAIL and LOG FILE SETTINGS ##
##--------------------------------------------------------------------------------------#
## LOGGING SETUP
# Configure the logging
log_file_path = os.path.join(working_folder, "Parcel_Development_ETL_Log.log")  
# setup basic logging configuration
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    filename=log_file_path,  # Set the log file path
                    filemode='w')
# Create a logger
logger = logging.getLogger(__name__)
# Log start message
logger.info("Script Started: " + str(datetime.datetime.now()) + "\n")

## EMAIL SETUP
# path to text file
fileToSend = log_file_path
# email parameters
subject = "Parcel Development ETL"
sender_email = "infosys@trpa.org"
# password = ''
receiver_email = "mbindl@trpa.gov"

#---------------------------------------------------------------------------------------#
## FUNCTIONS ##
#---------------------------------------------------------------------------------------#

# send email with attachments
def send_mail(body):
    msg = MIMEMultipart()
    msg['Subject'] = subject
    msg['From'] = sender_email
    msg['To'] = receiver_email

    msgText = MIMEText('%s<br><br>Cheers,<br>GIS Team' % (body), 'html')
    msg.attach(msgText)

    attachment = MIMEText(open(fileToSend).read())
    attachment.add_header("Content-Disposition", "attachment", filename = os.path.basename(fileToSend))
    msg.attach(attachment)

    try:
        with smtplib.SMTP("mail.smtp2go.com", 25) as smtpObj:
            smtpObj.ehlo()
            smtpObj.starttls()
#             smtpObj.login(sender_email, password)
            smtpObj.sendmail(sender_email, receiver_email, msg.as_string())
    except Exception as e:
        logger.error(e)

# # update staging layers
def updateStagingLayer(name, df, fields):
    # copy fields to keep
    dfOut = df[fields].copy()
    # specify output feature class
    outFC = os.path.join(workspace, name)
    # spaital dataframe to feature class
    dfOut.spatial.to_featureclass(outFC, sanitize_columns=False)
    # confirm feature class was created
    print(f"\nUpdated staging layer:{outFC}")
    logger.info(f"\nUpdated staging layer:{outFC}")

# replaces features in outfc with exact same schema
def updateSDECollectFC(fcList):
    for fc in fcList:
        inputFC = os.path.join(workspace, fc)
        dsc = arcpy.Describe(inputFC)
        fields = dsc.fields
        out_fields = [dsc.OIDFieldName, dsc.lengthFieldName, dsc.areaFieldName]
        fieldnames = [field.name if field.name != 'Shape' else 'SHAPE@' for field in fields if field.name not in out_fields]
        outfc = sdeString + fc
        # deletes all rows from the SDE feature class
        arcpy.TruncateTable_management(outfc)
        logger.info("\nDeleted all records in: {}\n".format(outfc))
        from time import strftime  
        logger.info("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
        # insert rows from Temporary feature class to SDE feature class
        with arcpy.da.InsertCursor(outfc, fieldnames) as oCursor:
            count = 0
            with arcpy.da.SearchCursor(inputFC, fieldnames) as iCursor:
                for row in iCursor:
                    oCursor.insertRow(row)
                    count += 1
                    if count % 100 == 0:
                        logger.info("Inserting record %d into %s SDE feature class" % (count, outfc))
                logger.info(f"\nDone updating: {outfc}")
            

#---------------------------------------------------------------------------------------#
## GET DATA
#---------------------------------------------------------------------------------------#

# start timer for the get data requests
startTimer = datetime.datetime.now()

# get feature classes from enterprise geodatabase
bonusBoundary= os.path.join(sdeBase, "sde.SDE.Planning\sde.SDE.Bonus_unit_boundary")
mfAllowed    = os.path.join(sdeBase, "sde.SDE.Planning\sde.SDE.Multifamily_Allowed_Zone")
parcelMaster = os.path.join(sdeBase, "sde.SDE.Parcels\\sde.SDE.Parcel_Master")
parcelIPES   = os.path.join(sdeCollect, fdata, "sde_collection.SDE.Parcel_LTinfo_IPES")
parcelDeed   = os.path.join(sdeCollect, fdata, "sde_collection.SDE.Parcel_LTinfo_DeedRestriction")

# create spatial dataframe from parcel master SDE
sdfParcels = pd.DataFrame.spatial.from_featureclass(parcelMaster)
sdfIPES    = pd.DataFrame.spatial.from_featureclass(parcelIPES)
sdfDeed    = pd.DataFrame.spatial.from_featureclass(parcelDeed)

# report how long it took to get the data
endTimer = datetime.datetime.now() - startTimer
print("\nTime it took to get the data: {}".format(endTimer))   
logger.info("\nTime it took to get the data: {}".format(endTimer)) 


#---------------------------------------------------------------------------------------#
## TRANSFORM TO STAGING LAYERS
#---------------------------------------------------------------------------------------#
# join IPES, join Deed, MF Allowed Spatial Join, field calc of % allowed

try:
    #---------------------------------------------------------------------------------------#
    # CREATE STAGING LAYERS ##
    #---------------------------------------------------------------------------------------#
    # start timer for the get data requests
    startTimer = datetime.datetime.now()
    #---------------------------------------------------------------------------------------#

    # name of feature class
    name = "Parcel_Development"

    # spatial join

    # List of DataFrames
    dfs = [sdfParcels, sdfIPES]

    # # Merge DataFrames horizontally
    # combined_df = pd.merge(dfs[0], dfs[1], on='APN')
    # for df in dfs[2:]:
    #     combined_df = pd.merge(combined_df, df, on='APN')
    df = pd.merge(sdfParcels, sdfIPES, on='APN', how='left')
       # rename some of the fields
    df.rename(columns={"JURISDICTION_x": "JURISDICTION",
                        "OWNERSHIP_TYPE_x":'OWNERSHIP_TYPE',
                        "EXISTING_LANDUSE_x":"EXISTING_LANDUSE",
                        "SHAPE_x":"SHAPE"
                            }, inplace=True)
    df['MF_ALLOWED'] = "No"
    df['PERCENT_COVERAGE_ALLOWED'] = (df.ESTIMATED_COVERAGE_ALLOWED/df.PARCEL_SQFT)*100
    # Print the combined DataFrame
    # specify fields to keep
    fields = ['APN',
                'APO_ADDRESS',
                'PSTL_TOWN',
                'PSTL_STATE',
                'PSTL_ZIP5',
                'JURISDICTION',
                'COUNTY',
                'OWNERSHIP_TYPE',
                'COUNTY_LANDUSE_DESCRIPTION',
                'EXISTING_LANDUSE',
                'REGIONAL_LANDUSE',
                'AS_SUM',
                'TAX_SUM',
                'TAX_YEAR',
                'YEAR_BUILT',
                'UNITS',
                'BEDROOMS',
                'BATHROOMS',
                'BUILDING_SQFT',
                'ESTIMATED_COVERAGE_ALLOWED',
                'IMPERVIOUS_SURFACE_SQFT',
                'CATCHMENT',
                'PLAN_ID',
                'PLAN_NAME',
                'ZONING_ID',
                'ZONING_DESCRIPTION',
                'TOWN_CENTER',
                'LOCATION_TO_TOWNCENTER',
                'WITHIN_TRPA_BNDY',
                'LOCAL_PLAN_HYPERLINK',
                'DESIGN_GUIDELINES_HYPERLINK',
                'LTINFO_HYPERLINK',
                'PARCEL_ACRES',
                'PARCEL_SQFT',
                'WITHIN_BONUSUNIT_BNDY',
                'PERCENT_COVERAGE_ALLOWED',
                'MF_ALLOWED',
                'SF_ALLOWED',
                'SENDING_ZONE',
                'RECIEVING_ZONE',
                'DEED_RESTRICTION',
                'IPES_SCORE',
                'DENSITY_ALLOWED',
                'MAX_RESIDENTIAL_UNITS',
                'MAX_COMMERCIAL_FLOOR_AREA',
                'MAX_TAU_UNITS',
                ''
                #IPES Fields
                'IPESScore',
                'IPESScoreType',
                'BaseAllowableCoveragePercent',
                'IPESTotalAllowableCoverageSqFt',
                'ParcelHasDOAC',
                'HistoricOrImportedIpesScore',
                'CalculationDate',
                'FieldEvaluationDate',
                'RelativeErosionHazardScore',
                'RunoffPotentialScore',
                'AccessScore',
                'UtilityInSEZScore',
                'ConditionOfWatershedScore',
                'AbilityToRevegetateScore',
                'WaterQualityImprovementsScore',
                'ProximityToLakeScore',
                'LimitedIncentivePoints',
                'TotalParcelArea',
                'IPESBuildingSiteArea',
                'SEZLandArea',
                'SEZSetbackArea',
                'InternalNotes',
                'PublicNotes',
            # Deed fields
            # 'RecordingNumber',
            # 'RecordingDate',
            # 'Description',
            # 'DeedRestrictionStatus',
            # 'DeedRestrictionType',
            # 'ProjectAreaFileNumber',
            # 'ScoreSheetUrl',
            # 'Status',
            # 'ParcelNickname',
            'SHAPE']
            
    # update staging feature class from dataframe
    updateStagingLayer(name, df, fields)
    
    #---------------------------------------------------------------------------------------#
    # report how long it took to get the data
    endTimer = datetime.datetime.now() - startTimer
    print("\nTime it took to create staging layers: {}".format(endTimer))       
    #---------------------------------------------------------------------------------------#

    ##--------------------------------------------------------------------------------------------------------#
    ## BEGIN SDE UPDATES ##
    ##--------------------------------------------------------------------------------------------------------#

#     # disconnect all users
#     print("\nDisconnecting all users...")
#     arcpy.DisconnectUser(sdeCollect, "ALL")

#     # unregister the sde feature class as versioned
#     print ("\nUnregistering feature dataset as versioned...")
#     arcpy.UnregisterAsVersioned_management(fdata,"NO_KEEP_EDIT","COMPRESS_DEFAULT")
#     print ("\nFinished unregistering feature dataset as versioned.")

#     # #---------------------------------------------------------------------------------------#

#     # feature class list
#     fcs =["Parcel_Development"]

#     # function to update all collection SDE feature classes in list
#     updateSDECollectFC(fcs)

#     #---------------------------------------------------------------------------------------#
#     # report how long it took to get the data
#     endTimer = datetime.datetime.now() - startTimer 
#     logger.info(f"\nTime it took to update Collection SDE feature classes: {endTimer}") 
#     #---------------------------------------------------------------------------------------#

#     ##--------------------------------------------------------------------------------------------------------#
#     ## END OF UPDATES ##
#     ##--------------------------------------------------------------------------------------------------------#

#     # disconnect all users
#     print("\nDisconnecting all users...")
#     logger.info("\nDisconnecting all users...")
#     arcpy.DisconnectUser(sdeCollect, "ALL")

#     print("\nRegistering feature dataset as versioned...")
#     logger.info("\nRegistering feature dataset as versioned...")
#     # register SDE feature class as versioned
#     arcpy.RegisterAsVersioned_management(fdata, "NO_EDITS_TO_BASE")
#     print("\nFinished registering feature dataset as versioned.")
#     logger.info("\nFinished registering feature dataset as versioned.")
    
    # report how long it took to run the script
    runTime = datetime.datetime.now() - startTimer
    logger.info(f"\nTime it took to run this script: {runTime}")

    # send email with header based on try/except result
    header = "SUCCESS - Parcel feature classes were updated."
    send_mail(header)
    print('Sending email...')

# catch any arcpy errors
except arcpy.ExecuteError:
    print(arcpy.GetMessages())
    logger.debug(arcpy.GetMessages())
    # send email with header based on try/except result
    header = "ERROR - Arcpy Exception - Check Log"
    send_mail(header)
    print('Sending email...')

# catch system errors
except Exception:
    e = sys.exc_info()[1]
    print(e.args[0])
    logger.debug(e)
    # send email with header based on try/except result
    header = "ERROR - System Error - Check Log"
    send_mail(header)
    print('Sending email...')

### Forecast Formetted Table

In [None]:
pathCSV = data_dir / "RegionalTransportationPlan/2023/data/Forecasts_Table1.csv"
print(pathCSV)

In [7]:
from great_tables import *
import pandas as pd

forecast = pd.read_csv(data_dir / "Forecasts_Table1.csv")
# drop notes column 
forecast.drop(columns=['Notes'], inplace=True)
# change column names
forecast.rename(columns={'Change by 2050': 'Change(#)', 'Percent Change': 'Change(%)'}, inplace=True)

In [8]:
GT(forecast).tab_header(title="Table 1. Forecast Data Summary").tab_spanner(
    label="", columns=['Category', 'Variable','Base Year 2022',  'Forecast 2050', 'Change(#)', 'Change(%)']).tab_stub(
        rowname_col='Variable', groupname_col='Category').tab_style(
            style=style.fill(color="aliceblue"), locations=loc.body()).save("forecast.jpeg")


### Transit Stacked Bar

In [None]:
# get data for transit ridership
def get_data_transit():
    url = "https://www.laketahoeinfo.org/WebServices/GetTransitMonitoringData/CSV/e17aeb86-85e3-4260-83fd-a2b32501c476"

    dfTransit = pd.read_csv(url)
    dfTransit['Month'] = pd.to_datetime(dfTransit['Month'])
    dfTransit['Month'] = dfTransit['Month'].dt.strftime('%Y-%m')
    # filter out rows where RouteType is not Paratransit, Commuter, or Seasonal Fixed
    df = dfTransit.loc[~dfTransit['RouteType'].isin(['Paratransit', 'Commuter', 'Seasonal Fixed Route'])]
    # df = dfTransit.loc[dfTransit['RouteType'] != 'Paratransit']

    # replace transit operator values with abreviations
    df['TransitOperator'] = df['TransitOperator'].replace(
        ['Tahoe Transportation District',
       'Tahoe Truckee Area Regional Transit',
       'South Shore Transportation Management Association'],
       ["TTD", "TART", "SSTMA"])
    # route name = route type + transit operator
    df['RouteName'] = df['RouteType'] + ' - ' + df['TransitOperator']
    # group by RouteType, TransitOperator, and Month with sum of MonthlyRidership
    df = df.groupby(['RouteName', 'Month'])['MonthlyRidership'].sum().reset_index()
    # rename columns to Date, Name, Ridership
    df.rename(columns={'Month':'Date', 'RouteName':'Name', 'MonthlyRidership':'Ridership'}, inplace=True)
    # sort by Date
    df = df.sort_values('Date')
    return df

# html/3.3.a_Transit_Ridership.html
def plot_transit(df):
    trendline(
        df,
        path_html="html/3.3.a_Transit_Ridership.html",
        div_id="3.3.a_Transit_Ridership",
        x="Date",
        y="Ridership",
        color="Name",
        color_sequence=["#023f64", "#7ebfb5", "#a48352", "#FC9A62"],
        sort="Date",
        orders=None,
        x_title="Date",
        y_title="Ridership",
        markers=True,
        hover_data=None,
        tickvals=None,
        ticktext=None,
        tickangle=None,
        hovermode="x unified",
        format=",.0f",
        custom_data=["Name"],
        hovertemplate="<br>".join([
            "<b>%{y:,.0f}</b> riders on",
            "<i>%{customdata[0]}</i> lines"
                ])+"<extra></extra>",
        additional_formatting = dict(
                                    title = "Transit Ridership",
                                    margin=dict(t=20),
                                    legend=dict(
                                        # title="Transit Ridership",
                                        orientation="h",
                                        entrywidth=120,
                                        yanchor="bottom",
                                        y=1.05,
                                        xanchor="right",
                                        x=0.95,
                                    ))
    )