In [78]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Update Notes: 

### 12-20-2021

* change Date -> Data_Date

### 12-21-2021

* add Data_HourMinute column

## TODO
needs to change the FY related number to dynamic number

# Tip for quick search

* Needs attention: the place where needs update or better logic
* question to be answered: the place where things are still not clear
* Manual Check: Unit test where you can drill in to find the data that leads to the check results for a specific project and specific check
* TODO: things needs to be done
* bookmark: stop point from last visit


# Admin Notes:


1. The AMTool dataset is archived daily as csv files and used for the project book check. 
The csv files are located at: 
r'\\ct.dot.ca.gov\dfshq\DIROFC\Asset Management\4e Project Book\Tableau Dashboards\DataLake'

2. The excel input files are checked daily and archived with datestamp whenever it is modified.
The continuously updated excel input files are located at: r'\\ct.dot.ca.gov\dfshq\DIROFC\Asset Management\4e Project Book\Projectbook_WorkingFolder\excel'
The excel input file are archived at: r'\\ct.dot.ca.gov\dfshq\DIROFC\Asset Management\4e Project Book\Tableau Dashboards\Data_MiscInput'
To recover the archived excel file used in project book check for a target date, select the excel file with latest datestamp but is still earlier than the target date.

3. The check summary export action is logged daily. It can be used for daily monitoring. 
The file export log is located at: \\ct.dot.ca.gov\dfshq\DIROFC\Asset Management\4e Project Book\Projectbook_WorkingFolder\output_internal\log

4. The published data are at:

    * csv files for district asset manager: http://svgcshopp.dot.ca.gov/DataLake/ProjectBookCheck/
    * csv files for HQ AM: \\ct.dot.ca.gov\dfshq\DIROFC\Asset Management\4e Project Book\Projectbook_WorkingFolder\output_internal
    * tableau workbook with live data source: https://tableau.dot.ca.gov/#/site/AssetManagement/workbooks/1815/views


<a id='TableOfContents'></a>

# Table Of Contents

## Data Preprocessing

### [Global Constants](#GlobalConstants)


### [Load and cleanup source data](#Read_Data)

* [Counties](#Counties)
* [Programming_Summary](#Programming_Summary)
* [ProgrammingList](#ProgrammingList)
* [SHOPP_Raw_Data](#SHOPP_Raw_Data)
* [TenYrShopp_Perf_RawData](#TenYrShopp_Perf_RawData)


## Add fields to SHOPP raw data (calculate and join)
* [Calculated Fields](#AddDataColumns)
* [Join Tables](#DataJoining)


## [Export Data](#Export_Data)



## [Final Clean Up](#FinalCleanUp)


# Import common modules

In [79]:

from datetime import datetime
import os.path

# import requests
import pandas as pd

import numpy as np
import re

import shutil

In [80]:
from itertools import product

In [81]:
import time
start_time = time.time()

In [82]:
#show dataframe without skip column
pd.options.display.max_columns = 100

In [83]:
# from config_datasource import *
from projectbookcheck_utilityfunction import *
from constants import *

# Data clean process

* funding amount: remove dollar sign, 
* fill missing value, string, numerical, 
* remove leading single quote for string value
* strip off leading and trailing space 

* regulate column names




<a id='GlobalConstants'></a>
## Global Constants

In [84]:
TARGETDATE = datetime.today().strftime("%m-%d-%Y")
CURRENT_FY = fiscalyear (datetime.today())

<a id='Read_Data'></a>

# Read Data


In [85]:
if DATA_SOURCE_TYPE == 'csv':

    filename = 'TenYrShopp_PerfM_Raw_Data_'
    df_perf_raw_data = pd.read_csv(r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE))

    filename = 'Rawdata_Bridge_Worksheet_'
    df_brg_raw_data = pd.read_csv(r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE), skiprows = [0], header = 0)

    filename = 'Rawdata_Drainage_Worksheet_'
    df_drain_raw_data = pd.read_csv(r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE), 
                                    header = 0, 
#                                     keep_default_na=False,
#                                     na_values = ['',' ']
                                   )

    filename = 'Rawdata_TMS_Worksheet_'
    df_tms_raw_data = pd.read_csv(r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE), header = 0)

    filename = 'Rawdata_FishPassage_Worksheet_'
    df_fp_raw_data = pd.read_csv(r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE), header = 0)
    
    filename = 'projectbook_draft'
    filepath_draft_projectbook = r'{}\{}.csv'.format(r'http://svgcshopp.dot.ca.gov/DataLake/ProjectBookCheck/', filename)
    df_draft_pb = pd.read_csv(filepath_draft_projectbook, header = 0)
    
    
    filename = 'TenYrShopp_RawData_'
    path_to_file = r'{}\{}{}.csv'.format(DATALAKE_FOLDER, filename, TARGETDATE)
    t = os.path.getmtime(path_to_file)
    DATA_HHMM = int(datetime.fromtimestamp(t).strftime('%H%M'))
else:
    print('skip getting csv data.')

## Raw Data Bridge Worksheet


In [86]:
#rename columns 
#with manual edits

dict_rename_bridge_worksheet = {
 'ID': 'AMT_ID',
 'Bridge №': 'BridgeNo',
 'Work Type': 'WorkType',
 'Brdige / TunnelWork Description': 'WorkDescription',
 'Bridge /TunnelHealth Pre': 'Health Pre',
 'Bridge /TunnelHealth Post': 'Health Post',
 'BridgeScourPre': 'Scour_Pre',
 'BridgeScourPost': 'Scour_Post',
 'BridgeSeismicPre': 'Seismic_Pre',
 'BridgeSeismicPost': 'Seismic_Post',
 'BridgeGds MvmtPre': 'GdsMvmt_Pre',
 'BridgeGds MvmtPost': 'GdsMvmt_Post',
 'Exist(sf)': 'Deck_Exist(sf)',
 'Additional(sf)': 'Deck_Additional(sf)',
 'Y/N': 'Paint_Y/N',
 'Condition': 'Paint_Condition',
 'Paint Area(sf)': 'Paint Area(sf)',
 'Y/N.1': 'ElectricalMechanical_Y/N',
 'Condition.1': 'ElectricalMechanical_Condition',
 'Area(sf)': 'ElectricalMechanical_Area(sf)',
 'Y/N.2': 'ApproachSlab_Y/N',
 'Replaced(sf)': 'ApproachSlab_Replaced(sf)',
 'New(sf)': 'ApproachSlab_New(sf)',
 'Y/N.3': 'Rail_Y/N',
 'Good(lf)': 'Rail_Good(lf)',
 'Fair(lf)': 'Rail_Fair(lf)',
 'Poor(lf)': 'Rail_Poor(lf)',
 'Additonal(lf)': 'Rail_Additonal(lf)',
 'Post Good(lf)': 'Rail_Post Good(lf)',
 'Post Fair(lf)': 'Rail_Post Fair(lf)',
 'Post Poor(lf)': 'Rail_Post Poor(lf)',
 'Post New(lf)': 'Rail_Post New(lf)',
 'FishPassage(Y/N)': 'FishPassage(Y/N)',
}

df_brg_raw_data.rename(dict_rename_bridge_worksheet, axis = 1, inplace = True)

In [87]:
df_brg_raw_data.name = 'df_brg_raw_data'

In [88]:
df_brg_raw_data['Rail_Good(lf)'].fillna(0, inplace = True)
df_brg_raw_data['Rail_Fair(lf)'].fillna(0, inplace = True)
df_brg_raw_data['Rail_Poor(lf)'].fillna(0, inplace = True)

In [89]:
df_brg_raw_data['Rail_Total(lf)'] = (df_brg_raw_data['Rail_Good(lf)'] 
                                             + df_brg_raw_data[ 'Rail_Fair(lf)'] 
                                             + df_brg_raw_data['Rail_Poor(lf)'])

## Raw Data Drainage Worksheet


In [90]:
df_drain_raw_data.name = 'df_drain_raw_data'

dict_drain_rename = {
 'ID': 'AMT_ID',
 'Data Date':'Data Date_Drainage'
                               }
df_drain_raw_data.rename(dict_drain_rename, axis = 1, inplace = True)

df_drain_raw_data['EA'] = df_drain_raw_data['EA'].apply(remove_punction)
df_drain_raw_data['EFIS'] = df_drain_raw_data['EFIS'].apply(remove_punction)

df_drain_raw_data['SYSNO'] = df_drain_raw_data['SYSNO'].apply(remove_punction)
df_drain_raw_data['INETNO'] = df_drain_raw_data['INETNO'].apply(remove_punction)
df_drain_raw_data['OUTETNO'] = df_drain_raw_data['OUTETNO'].apply(remove_punction)

df_drain_raw_data['Data Date_Drainage'] = df_drain_raw_data['Data Date_Drainage'].apply(regulate_timestamp_format)

In [91]:
def calc_drain_unique_ID(df):
    if pd.isnull(df['SYSNO']) or pd.isnull(df['INETNO']) or pd.isnull(df['OUTETNO']):
        return None
    else:
        return (df['SYSNO'] + "_"+ df['INETNO'] + "_"+ df['OUTETNO'])
df_drain_raw_data['Unique Culvert ID'] = df_drain_raw_data.apply(calc_drain_unique_ID, axis = 1)

In [92]:
# df_drain_raw_data[df_drain_raw_data['AMT_ID']==22780]

Unnamed: 0,Program,District,AMT_ID,EA,EFIS,Project Location,Section,ActID,Activity Description,SYSNO,INETNO,OUTETNO,Inspected Date,Health Assessment,Diameter (in),Width (ft),Height (ft),Number of Barrels,Length (ft),Extension of Existing Culvert (LF/Barrel,Pre-Condition,Total Added/Reduced Length (LF),Pre-Condition Quantity for Perf Tab,New Quantities for Perf Tab,Total Quantity of Culvert (LF),Cleaned date,Non-Structural(last known condition),Fish Passage Priority List (Yes/No),Priority Identifier,Stream Name,Is the proposed treatment expected to remediate the fish passage priority barrier? (Yes/No/NA),Addressing Fish Passage not in the Priority List (Yes/No)?,Should Count toward Fish Passage not in the Priority List (Yes/No),Comment,HQ Notes,Last Saved,Saved by,Data Date_Drainage,Status,Unique Culvert ID
19573,SHOPP,5,22780,1N900,521000139,--/ (primary location),PRG,C01,Replace/Install Culverts (201.151),360010000997,360010000997002,360010000997001,12/18/19,80,0.0,14.0,12.0,1.0,225.96,,Good,,225.96,0.0,225.96,,,Yes,201855.0,Valencia Creek,,,,Fish Passage,,07/07/21 5:09PM,Darron Hill,06-28-2021,Active,360010000997_360010000997002_360010000997001
19574,SHOPP,5,22780,1N900,521000139,--/ (primary location),PRG,C01,Replace/Install Culverts (201.151),360010000997,360010000997003,360010000997002,12/18/19,80,0.0,14.0,12.0,1.0,95.0,,Good,,95.0,0.0,95.0,,,Yes,201855.0,Valencia Creek,,,,Fish Passage,,07/07/21 5:09PM,Darron Hill,06-28-2021,Active,360010000997_360010000997003_360010000997002
19575,SHOPP,5,22780,1N900,521000139,--/ (primary location),PRG,C01,Replace/Install Culverts (201.151),360010000997,360010000997004,360010000997003,12/18/19,80,0.0,14.0,12.0,1.0,90.0,,Good,,90.0,0.0,90.0,,,Yes,201855.0,Valencia Creek,,,,Fish Passage,,07/07/21 5:09PM,Darron Hill,06-28-2021,Active,360010000997_360010000997004_360010000997003
19576,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360010000997,360010000997002,360010000997001,12/18/19,80,0.0,14.0,12.0,1.0,225.96,,Good,,225.96,0.0,225.96,,,Yes,201855.0,Valencia Creek,No,No,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360010000997_360010000997002_360010000997001
19577,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360010000997,360010000997003,360010000997002,12/18/19,80,0.0,14.0,12.0,1.0,95.0,,Good,,95.0,0.0,95.0,,,Yes,201855.0,Valencia Creek,---,,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360010000997_360010000997003_360010000997002
19578,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360010000997,360010000997004,360010000997003,12/18/19,80,0.0,14.0,12.0,1.0,90.0,,Good,,90.0,0.0,90.0,,,Yes,201855.0,Valencia Creek,---,,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360010000997_360010000997004_360010000997003
19579,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360014000988,360014000988002,360014000988001,12/18/19,80,0.0,14.0,12.0,1.0,141.36,,Good,,141.36,0.0,141.36,,,Yes,201856.0,Valencia Creek,---,,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360014000988_360014000988002_360014000988001
19580,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360014000988,360014000988003,360014000988002,12/18/19,80,0.0,14.0,12.0,1.0,186.79,,Good,,186.79,0.0,186.79,,,Yes,201856.0,Valencia Creek,Yes,No,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360014000988_360014000988003_360014000988002
19581,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360014000988,360014000988004,360014000988003,12/18/19,80,0.0,14.0,12.0,1.0,75.0,,Good,,75.0,0.0,75.0,,,Yes,201856.0,Valencia Creek,---,No,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360014000988_360014000988004_360014000988003
19582,SHOPP,5,22780,1N900,521000139,SCR-001-9.8/10.0 (primary location),TYP,C01,Replace/Install Culverts (201.151),360016001000,360016001000004,360016001000002,08/24/09,80,24.0,0.0,0.0,1.0,62.98,,Good,,62.98,0.0,62.98,,,No,,,,,,Fish Passage,,12/09/21 11:07AM,Jimmy Walth,09-20-2021,Active,360016001000_360016001000004_360016001000002


In [93]:
df_drain_raw_data[df_drain_raw_data['AMT_ID']==22780]['Is the proposed treatment expected to remediate the fish passage priority barrier? (Yes/No/NA)'].unique()

array([nan, 'No', '---', 'Yes'], dtype=object)

## Raw Data TMS Worksheet

In [94]:
dict_TMS_rename = {
 'ID': 'AMT_ID',
 'Data Date':'Data Date_TMS'
                               }
df_tms_raw_data.rename(dict_TMS_rename, axis = 1, inplace = True)

df_tms_raw_data.name = 'df_tms_raw_data'

df_tms_raw_data['Data Date_TMS'] = df_tms_raw_data['Data Date_TMS'].apply(regulate_timestamp_format)

## Raw Data FP Worksheet

In [95]:
#rename columns
dict_rename = {
    'ID': 'AMT_ID',
    'Fish Passage Type(Priority List /Not Priority List)' : 'Fish Passage Type (Priority List / Not Priority List)',
    'Should countas addressingFish Passage(Yes/No)?': 'Should count as addressing Fish Passage (Yes/No)?'
              }

df_fp_raw_data = df_fp_raw_data.rename(dict_rename, axis = 1)

<a id='TenYrShopp_Perf_RawData'></a>
## TenYrShopp_Perf_RawData


In [96]:
#rename columns
dict_rename_perf_rawdata = {
    'ID': 'AMT_ID',
              }
df_perf_raw_data = df_perf_raw_data.rename(dict_rename_perf_rawdata, axis = 1)

In [97]:
cols_strip = ['EA','EFIS','PPNO']
for c in cols_strip :
    df_perf_raw_data[c] = df_perf_raw_data[c].str.strip("'")

In [98]:
#data clean 
#data type regulation

df_perf_raw_data['Quantity'] = df_perf_raw_data['Quantity'].fillna(0)
df_perf_raw_data['Assets in Good Cond'] = df_perf_raw_data['Assets in Good Cond'].fillna(0)
df_perf_raw_data['Assets in Fair Cond'] = df_perf_raw_data['Assets in Fair Cond'].fillna(0)
df_perf_raw_data['Assets in Poor Cond'] = df_perf_raw_data['Assets in Poor Cond'].fillna(0)
df_perf_raw_data['New Assets Added'] = df_perf_raw_data['New Assets Added'].fillna(0)

df_perf_raw_data['EFIS'] = pd.to_numeric(df_perf_raw_data['EFIS'], errors='coerce')


In [99]:
#data trimming
#row
df_perf_raw_data= df_perf_raw_data[df_perf_raw_data['District'] != 56]
#column
df_perf_raw_data.drop(['PID Cycle', 'TYP','ProjectedSHOPP Cycle','RequestedRTL FY','DistrictPriority'],
  axis='columns', inplace=True, errors='ignore')

In [100]:
df_perf_raw_data.name = 'df_perf_raw_data'

# Add columns to performance data

In [101]:
#include only the project in projectbook and active section

df_perf_raw_data_1 = pd.merge(df_perf_raw_data,
                              df_draft_pb[['AMT_ID', 'Section','Planning or Post-Planning', 'Advertised Year']], 
                how ='inner', 
                left_on = ['AMT_ID', 'Section'],
                right_on = ['AMT_ID', 'Section'], 
                             )

In [102]:
df_perf_raw_data_1['Post-Fair'].fillna(0, inplace = True)

df_perf_raw_data_1['F2G Achieved'] = df_perf_raw_data_1['Post-Fair'] - df_perf_raw_data_1['Assets in Fair Cond']

In [103]:
df_perf_raw_data_1['Post-Poor'].fillna(0, inplace = True)

df_perf_raw_data_1['P2G Achieved'] = df_perf_raw_data_1['Post-Poor'] - df_perf_raw_data_1['Assets in Poor Cond']

In [104]:
dict_rename_performance ={
    'New Assets Added':'New Achieved',
}

df_perf_raw_data_1 = df_perf_raw_data_1.rename(dict_rename_performance, axis = 1)


In [105]:
df_perf_raw_data_1['Performance Objective'].fillna('',inplace = True)

In [106]:
df_perf_raw_data_1['Concatenate ID+Objective'] = df_perf_raw_data_1['AMT_ID'].astype(str) + df_perf_raw_data_1['Performance Objective']

In [107]:
def ck_reviewed(df):
    if pd.isna(df['Review Date']):
        return 'No'
    else:
        return 'Yes'
        
df_perf_raw_data_1['Reviewed?'] = df_perf_raw_data_1.apply(ck_reviewed, axis = 1)   

In [108]:
# df_perf_raw_data_1.shape

## common functions

In [109]:
def calc_review_status(df, col_name, NA_msg = 'No relavent data for review'):
    
    # if there is active entry of "NO" or "New", no active entry of "YES" --> Needs Review
    # if there is active entry of "NO" or "New", and active entry of "YES" --> Partially Reviewed
    # if there is no active entry of "NO" or "New", and active entry of "YES" --> Review Complete
    # if there is no active entry of "NO" or "New", no active entry of "YES" --> No relavent data for review
        
    if ('No' in df[col_name]) : 
        if 'Yes' in df[col_name]:
            return 'Partially Reviewed'
        else:
            return 'Needs Review'
    elif 'Yes' in df[col_name]:
        return 'Review Complete'
    elif 'New' in df[col_name]:
        return 'All New'
    else:
        return NA_msg
        

In [110]:
#keep only projects within project book and active section

df_brg_raw_data_1 = pd.merge(df_brg_raw_data, df_draft_pb[['AMT_ID', 'Section']], 
                             how = 'inner', left_on =['AMT_ID', 'Section'],right_on =['AMT_ID', 'Section'],)

#keep only projects within project book and active section

df_tms_raw_data_1 = pd.merge(df_tms_raw_data, df_draft_pb[['AMT_ID', 'Section']], 
                             how = 'inner', left_on =['AMT_ID', 'Section'],right_on =['AMT_ID', 'Section'],)


#keep only projects within project book and active section

df_drain_raw_data_1 = pd.merge(df_drain_raw_data, df_draft_pb[['AMT_ID', 'Section']], 
                             how = 'inner', left_on =['AMT_ID', 'Section'],right_on =['AMT_ID', 'Section'],)


df_fp_raw_data_1= pd.merge(df_fp_raw_data, df_draft_pb[['AMT_ID', 'Section']], 
                             how = 'inner', left_on =['AMT_ID', 'Section'],right_on =['AMT_ID', 'Section'],)



In [111]:
# add Bridge WS Health Review Status column to performance raw data
def ck_brg_health_data(df):
    if pd.isna(df['Health Pre']):
        return "No Bridge Health"
    elif pd.isna(df['Health Post']):
        return 'No'
    else:
        return 'Yes'
        
df_brg_raw_data_1['Post-Condition for Bridge Health entered?']= df_brg_raw_data_1.apply(ck_brg_health_data, axis = 1)     

temp1 = df_brg_raw_data_1.groupby(['AMT_ID', 'Section'])['Post-Condition for Bridge Health entered?'].agg(['unique']).reset_index()

temp1['Bridge WS Health Review Status'] = temp1.apply(calc_review_status, args = ['unique'], axis = 1)
        
df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section','Bridge WS Health Review Status']], 
                              how = 'left', left_on = ['AMT_ID', 'Section'],  right_on = ['AMT_ID', 'Section'])

df_perf_raw_data_1['Bridge WS Health Review Status'].fillna('No Bridge Worksheet', inplace = True)

In [112]:
# df_perf_raw_data_1.groupby(['AMT_ID', 'Section'])['Post-Condition for Bridge Health entered?'].agg(['unique']).reset_index()


In [113]:
# df_perf_raw_data_1.shape

In [114]:
# temp1 = df_brg_raw_data_1.groupby(['AMT_ID', 'Section'])['Post-Condition for Bridge Health entered?'].agg(['unique']).reset_index()

# temp1['Bridge WS Health Review Status'] = temp1.apply(calc_review_status, args = ['unique'], axis = 1)


# temp1[temp1['AMT_ID'] ==  11281]


In [115]:
# add TMS WS Health Review Status column to performance raw data

def ck_tms_data(df):
    if pd.isna(df['Asset Post-Condition']):
        return 'No'
    elif df['Asset Post-Condition'] == 'New':
        return 'New'
    else:
        return 'Yes'
        
df_tms_raw_data_1['Post-Condition entered?']= df_tms_raw_data_1.apply(ck_tms_data, axis = 1)     

temp1 = df_tms_raw_data_1.groupby(['AMT_ID', 'Section','TMS Structural or Technology'])['Post-Condition entered?'].agg(['unique']).reset_index()

temp1['TMS WS Review Status'] = temp1.apply(calc_review_status, args = ['unique', 'No TMS Worksheet'], axis = 1)


In [116]:
temp1['Performance Objective'] = temp1.apply(lambda df: 'Transportation Management System Structures'  if 'Structures' in df['TMS Structural or Technology'] else np.nan, axis = 1)
temp1['TMS Structure Review Status'] = temp1.apply(lambda df: df['TMS WS Review Status']  if 'Structures' in df['TMS Structural or Technology'] else np.nan, axis = 1)
df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section','Performance Objective','TMS Structure Review Status']], 
                              how = 'left', left_on = ['AMT_ID', 'Section','Performance Objective'],  right_on = ['AMT_ID', 'Section','Performance Objective'])

In [117]:
temp1['Performance Objective'] = temp1.apply(lambda df: 'Transportation Management Systems' if 'Technology' in df['TMS Structural or Technology'] else np.nan, axis = 1)
temp1['TMS Technology Review Status'] = temp1.apply(lambda df: df['TMS WS Review Status']  if 'Technology' in df['TMS Structural or Technology'] else np.nan, axis = 1)
df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section','Performance Objective','TMS Technology Review Status']], 
                              how = 'left', left_on = ['AMT_ID', 'Section','Performance Objective'],  right_on = ['AMT_ID', 'Section','Performance Objective'])

In [118]:
def combine_tms_ws_reviews(df):
    '''
    combine review status of the two columns of the tms structure and tms technology
    '''
    if pd.isnull(df['TMS Technology Review Status']):
        return df['TMS Structure Review Status']
    else:
        return df['TMS Technology Review Status']
df_perf_raw_data_1['TMS WS Review Status'] = df_perf_raw_data_1.apply(combine_tms_ws_reviews, axis = 1)

In [119]:
# df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section','TMS Structural or Technology','TMS WS Review Status']], 
#                               how = 'left', left_on = ['AMT_ID', 'Section'],  right_on = ['AMT_ID', 'Section'])

df_perf_raw_data_1['TMS WS Review Status'].fillna('No TMS Worksheet', inplace = True)

In [120]:
# df_tms_raw_data_1.head()

In [121]:
# df_perf_raw_data_1['Performance Objective'].unique()

In [122]:
# AMT_ID = 19289
# df_perf_raw_data_1[(df_perf_raw_data_1['AMT_ID'] == AMT_ID)
#                    & (df_perf_raw_data_1['Performance Objective'].isin(['Transportation Management System', 'Transportation Management System Structures']))
# #                   & (df_perf_raw_data_1['TMS Structural or Technology'] == 'Technology')
                   
# #                    & (df_perf_raw_data_1['TMS Structural or Technology'] == 'Technology & Structures')
#                   ]

In [123]:
# temp1 = df_tms_raw_data_1.groupby(['AMT_ID', 'Section','TMS Structural or Technology'])['Post-Condition entered?'].agg(['unique']).reset_index()

# temp1['TMS WS Review Status'] = temp1.apply(calc_review_status, args = ['unique'], axis = 1)

# temp1[temp1['AMT_ID'] == AMT_ID]

In [124]:
# df_tms_raw_data_1[df_tms_raw_data_1['AMT_ID'] == AMT_ID]

In [125]:
def combine_review_status_for_same_performance_objective(df, ws_review_status_col, WS_missing_msg):
    
    if WS_missing_msg in df[ws_review_status_col] :
        return WS_missing_msg
    
    elif len(df['Reviewed?']) ==1 and ('No' in df['Reviewed?']):
        if 'Review Complete' not in df[ws_review_status_col]:
            return 'Needs Review'
        else: 
            return 'Partially Reviewed'
    else:    # df['Reviewed?'] = 'Yes'
        if 'Needs Review' not in df[ws_review_status_col]:
            return 'Review Complete'
        else:
            return 'Partially Reviewed'

In [126]:
# df_perf_raw_data_2 = df_perf_raw_data_1.copy()

In [127]:
# df_perf_raw_data_1 = df_perf_raw_data_2

In [128]:
performance_objective = 'Transportation Management Systems'
ws_review_status_col = 'TMS WS Review Status'
combined_ws_review_status_col = 'Combined TMS Technology Review Status'

temp1 = df_perf_raw_data_1[
    (df_perf_raw_data_1['Performance Objective'] == performance_objective) 
#     & (df_perf_raw_data_1['TMS Structural or Technology'] == 'Technology')                 
                          ].groupby(['AMT_ID', 'Section','Performance Objective'])['Reviewed?', ws_review_status_col ].agg(set).reset_index()

temp1[combined_ws_review_status_col] = temp1.apply(
    combine_review_status_for_same_performance_objective, 
    args = [ws_review_status_col,'No TMS Worksheet'], axis = 1)


df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section', 'Performance Objective',combined_ws_review_status_col]], 
                              how = 'left', left_on = ['AMT_ID', 'Section','Performance Objective'],  right_on = ['AMT_ID', 'Section','Performance Objective'])

  temp1 = df_perf_raw_data_1[


In [129]:
performance_objective = 'Transportation Management System Structures'
ws_review_status_col = 'TMS WS Review Status'
combined_ws_review_status_col = 'Combined TMS Structures Review Status'

temp1 = df_perf_raw_data_1[
    (df_perf_raw_data_1['Performance Objective'] == performance_objective)
#     &(df_perf_raw_data_1['TMS Structural or Technology'] == 'Technology & Structures')        
                          ].groupby(['AMT_ID', 'Section','Performance Objective'])['Reviewed?', ws_review_status_col ].agg(set).reset_index()

temp1[combined_ws_review_status_col] = temp1.apply(
    combine_review_status_for_same_performance_objective, 
    args = [ws_review_status_col,'No TMS Worksheet'], axis = 1)


df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section', 'Performance Objective',combined_ws_review_status_col]], 
                              how = 'left', left_on = ['AMT_ID', 'Section','Performance Objective'],  right_on = ['AMT_ID', 'Section','Performance Objective'])

  temp1 = df_perf_raw_data_1[


In [130]:
performance_objective = 'Bridge and Tunnel Health'
ws_review_status_col = 'Bridge WS Health Review Status'
combined_ws_review_status_col = 'Combined Bridge Health Review Status'

temp1 = df_perf_raw_data_1[df_perf_raw_data_1['Performance Objective'] == performance_objective].groupby(['AMT_ID', 'Section', 'Performance Objective'])['Reviewed?', ws_review_status_col ].agg(set).reset_index()

temp1[combined_ws_review_status_col] = temp1.apply(
    combine_review_status_for_same_performance_objective, 
    args = [ws_review_status_col, 'No Bridge Worksheet'], axis = 1)


df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section', combined_ws_review_status_col]], 
                              how = 'left', left_on = ['AMT_ID', 'Section'],  right_on = ['AMT_ID', 'Section'])

  temp1 = df_perf_raw_data_1[df_perf_raw_data_1['Performance Objective'] == performance_objective].groupby(['AMT_ID', 'Section', 'Performance Objective'])['Reviewed?', ws_review_status_col ].agg(set).reset_index()


In [131]:
#group performance objectives and calculate review status for groupable ACT Ids
#summarize review status for each group of [AMT_ID, Section, Combined ActID]

def group_target_act_id(df):
    '''
    CS, SLR, ADA, 
    '''
    if df['ActID'] in ['H05','H06','H08','H13','H21','H33']:
        return 'Complete Street'
    elif df['ActID'] in ['I19','I20']:
        return 'Sea Level Rise'
    elif df['ActID'] in ['F21','F22','F23','F24','F25','F26','F27','F28', 'F31', 'F34']:
        return 'ADA'    
    else:
        return 'Can not be combined'
    
df_perf_raw_data_1['Combined ActID'] = df_perf_raw_data_1.apply(group_target_act_id, axis = 1)     

temp1 = df_perf_raw_data_1.groupby(['AMT_ID', 'Section','Combined ActID'])['Reviewed?'].agg(['unique']).reset_index()

temp1['Combined ActID Review Status'] = temp1.apply(calc_review_status, args = ['unique'], axis = 1)   

temp1[temp1['Combined ActID']!= 'Can not be combined']['Combined ActID Review Status'] = 'Not Applicable'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp1[temp1['Combined ActID']!= 'Can not be combined']['Combined ActID Review Status'] = 'Not Applicable'


In [132]:
rename_dict = {
    'Combined ActID': 'Combined Performance Objective',
}

temp1 = temp1.rename(rename_dict, axis = 1)

In [133]:
def mark_combined_act_id(df):
    if df['Performance Objective'] in ["Complete Streets Fix Existing","Complete Streets Build New"]:
        return 'Complete Street'
        
    elif df['Performance Objective'] in ["ADA Pedestrian Infrastructure"]:
        return 'ADA'

    elif df['Performance Objective'] in ["Sea Level Rise"]:
        return 'Sea Level Rise'
    else:
        return 'Can not be combined'

    
df_perf_raw_data_1['Combined Performance Objective'] = df_perf_raw_data_1.apply(mark_combined_act_id, axis = 1)

In [134]:
df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp1[['AMT_ID', 'Section', 'Combined Performance Objective','Combined ActID Review Status']], 
                              how = 'left', left_on = ['AMT_ID', 'Section', 'Combined Performance Objective',],  
                              right_on = ['AMT_ID', 'Section', 'Combined Performance Objective',])

In [135]:
df_perf_raw_data_1 ['Combined ActID Review Status'].fillna('No Valid Act ID Data Available', inplace = True)

In [136]:
#summarize review status for each group of [AMT_ID, Section, Program Objective]

temp = df_perf_raw_data_1.groupby(['AMT_ID', 'Section','Performance Objective'])['Reviewed?'].agg(['unique']).reset_index()

temp['Performance Objective Review Status'] = temp.apply(calc_review_status, args = ['unique'], axis = 1)   

df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, temp[['AMT_ID', 'Section', 'Performance Objective','Performance Objective Review Status']], 
                              how = 'left', left_on = ['AMT_ID', 'Section', 'Performance Objective'],  
                              right_on = ['AMT_ID', 'Section', 'Performance Objective'])

In [137]:
def combine_review_status(df):
    if df['Combined Performance Objective'] == 'Can not be combined':
        return df['Performance Objective Review Status']
    else:
        return df['Combined ActID Review Status']

df_perf_raw_data_1['Review Status'] = df_perf_raw_data_1.apply(combine_review_status, axis = 1)

In [138]:
# df_perf_raw_data_1['Review Status'].value_counts()

## Fish Passage review status

In [139]:
def ck_priority_fp_review(df, FP1, FP2):
    '''
    check if the FP1 column to see if the activity for current data row is within FP priority list
    return 'Not in Priority list' if not in the priority list
    If it is in the priority list, check FP2 column, to see the data should be counted as priority list 
    '''
    if df[FP1] != 'Priority List':
        return "Not in Priority list"
    elif pd.isna(df[FP2]) : 
        return 'No'
    else:
        return 'Yes'

def ck_NONpriority_fp_review(df, FP1, FP2):
    if df[FP1] == 'Priority List':
        return "In Priority list"
    elif pd.isna(df[FP2]): 
        return 'No'
    else:
        return 'Yes'

In [140]:
rename_dict = {
    'Fish PassagePriority List(Yes/No)': 'Fish Passage Priority List (Yes/No)',
    'Is the proposedtreatmentexpected toremediate thefish passagepriority barrier?(Yes/No/NA)': 'Should Count toward Fish Passage Priority List (Yes/No)',
    'PriorityIdentifier': 'Priority Identifier',
    'AddressingFish Passagenot in the Priority List(Yes/No)': 'Addressing Fish Passage not in the Priority List (Yes/No)?',
}

df_brg_raw_data_1 = df_brg_raw_data_1.rename(rename_dict, axis = 1)


rename_dict = {
    'Is the proposed treatment expected to remediate the fish passage priority barrier? (Yes/No/NA)': 'Should Count toward Fish Passage Priority List (Yes/No)',
}

df_drain_raw_data_1 = df_drain_raw_data_1.rename(rename_dict, axis = 1)



rename_dict = {
    'Fish Passage Type (Priority List / Not Priority List)': 'Fish Passage Priority List (Yes/No)',
    'Should count as addressing Fish Passage (Yes/No)?': 'Should Count toward Fish Passage Priority List (Yes/No)',
    'Priority Identifieror FP Identification': 'Priority Identifier',    
}

df_fp_raw_data_1 = df_fp_raw_data_1.rename(rename_dict, axis = 1)


In [141]:
# df_brg_raw_data_1.columns

In [142]:
FP1 = 'Fish Passage Priority List (Yes/No)'
FP2 = 'Should Count toward Fish Passage Priority List (Yes/No)'

df_brg_raw_data_1['Fish Passage in the Priority List'] = df_brg_raw_data_1['Fish Passage Priority List (Yes/No)'].apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_brg_raw_data_1['FP in Priority List reviewed by FP program?'] = df_brg_raw_data_1.apply(ck_priority_fp_review, args = [FP1, FP2], axis = 1)  
df_brg_raw_data_1['Fish Passage NOT in the Priority List'] = df_brg_raw_data_1['Addressing Fish Passage not in the Priority List (Yes/No)?'].apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_brg_raw_data_1['FP NOT in Priority List reviewed by FP program?'] = df_brg_raw_data_1.apply(ck_NONpriority_fp_review, args = [FP1, FP2], axis = 1)     


df_drain_raw_data_1['Fish Passage in the Priority List'] = df_drain_raw_data_1['Fish Passage Priority List (Yes/No)'].apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_drain_raw_data_1['FP in Priority List reviewed by FP program?'] = df_drain_raw_data_1.apply(ck_priority_fp_review, args = [FP1, FP2], axis = 1)  
df_drain_raw_data_1['Fish Passage NOT in the Priority List'] = df_drain_raw_data_1['Addressing Fish Passage not in the Priority List (Yes/No)?'].apply(lambda x: 'Yes' if x == 'Yes' else 'No')
df_drain_raw_data_1['FP NOT in Priority List reviewed by FP program?'] = df_drain_raw_data_1.apply(ck_NONpriority_fp_review, args = [FP1, FP2], axis = 1)  


df_fp_raw_data_1['Fish Passage in the Priority List'] = df_fp_raw_data_1['Fish Passage Priority List (Yes/No)'].apply(lambda x: 'Yes' if x == 'Priority List' else 'No')
df_fp_raw_data_1['FP in Priority List reviewed by FP program?'] = df_fp_raw_data_1.apply(ck_priority_fp_review, args = [FP1, FP2], axis = 1)
df_fp_raw_data_1['Fish Passage NOT in the Priority List'] = df_fp_raw_data_1['Fish Passage Priority List (Yes/No)'].apply(lambda x: 'No' if x == 'Priority List' else 'Yes')
df_fp_raw_data_1['FP NOT in Priority List reviewed by FP program?'] = df_fp_raw_data_1.apply(ck_NONpriority_fp_review, args = [FP1, FP2], axis = 1)  

In [143]:
# df_drain_raw_data_1[df_drain_raw_data_1['AMT_ID']==22867]

In [144]:
target_cols = ['AMT_ID', 'Section','District','Priority Identifier','Should Count toward Fish Passage Priority List (Yes/No)']

In [145]:
temp = df_brg_raw_data_1[df_brg_raw_data_1['Fish Passage Priority List (Yes/No)'] == 'Yes'][target_cols]
temp['Worksheet'] = 'Bridge'

temp1 = df_drain_raw_data_1[df_drain_raw_data_1['Fish Passage Priority List (Yes/No)'] == 'Yes'][target_cols]
temp1['Worksheet'] = 'Drainage'
temp = temp.append(temp1)

temp2 = df_fp_raw_data_1[df_fp_raw_data_1['Fish Passage Priority List (Yes/No)'] == 'Priority List'][target_cols]
temp2['Worksheet'] = 'Fish Passage'
temp = temp.append(temp2)

In [146]:
temp['Priority Identifier'].fillna(0, inplace = True)
temp['Priority Identifier'] = temp['Priority Identifier'].astype(int)

In [147]:
#bookmark

temp[temp['AMT_ID']==22780]

Unnamed: 0,AMT_ID,Section,District,Priority Identifier,Should Count toward Fish Passage Priority List (Yes/No),Worksheet
6624,22780,TYP,5,201855,No,Drainage
6625,22780,TYP,5,201855,---,Drainage
6626,22780,TYP,5,201855,---,Drainage
6627,22780,TYP,5,201856,---,Drainage
6628,22780,TYP,5,201856,Yes,Drainage
6629,22780,TYP,5,201856,---,Drainage


In [148]:
temp['Should Count toward Fish Passage Priority List (Yes/No)'].fillna('Needs Review', inplace = True)

temp_group = temp.groupby(['Worksheet','AMT_ID', 'Section', 'District', 'Priority Identifier'])['Should Count toward Fish Passage Priority List (Yes/No)'].agg('value_counts').reset_index(name = 'Counts')

df_fp_list = temp_group.pivot(index=['Worksheet','AMT_ID', 'Section', 'District', 'Priority Identifier',], columns='Should Count toward Fish Passage Priority List (Yes/No)', values='Counts').reset_index()


In [149]:
if 'No' not in df_fp_list.columns:
    df_fp_list['No'] = 0

if 'Yes' not in df_fp_list.columns:
    df_fp_list['Yes'] = 0

In [150]:
df_fp_list.fillna(0, inplace = True)

df_fp_list['Grand Total'] = df_fp_list['Needs Review'] + df_fp_list['No'] + df_fp_list['Yes']

In [151]:
df_fp_list = pd.merge(df_fp_list, df_draft_pb[['AMT_ID','Section','EA_','Advertised Year',]],
                     how = 'left', left_on = ['AMT_ID','Section',], right_on = ['AMT_ID','Section',]
                     )

In [152]:
df_fp_list['RTL'] =  df_fp_list['Advertised Year'].apply(lambda x: int(x[-2:]) +2000)

In [153]:
rename_dict = {
    'EA_': 'EA',
    'Needs Review':'Should Count Towards FP Priority List: Needs Review',
    'No':'Should Count Towards FP Priority List: No',
    'Yes':'Should Count Towards FP Priority List: Yes',
    'Grand Total': 'Should Count Towards FP Priority List: Grand Total',
}

df_fp_list = df_fp_list.rename(rename_dict, axis = 1)

In [154]:
df_fp_list['EA'] = df_fp_list['EA'].apply(remove_punction)

In [155]:
df_fp_list= df_fp_list[['Worksheet', 'AMT_ID', 'Section', 'District', 'EA','RTL', 
                        'Priority Identifier',
       'Should Count Towards FP Priority List: Needs Review',
       'Should Count Towards FP Priority List: No',
       'Should Count Towards FP Priority List: Yes',
       'Should Count Towards FP Priority List: Grand Total', ]]

In [156]:
# df_fp_list

In [157]:
temp = df_fp_list.groupby(['Worksheet', 'AMT_ID', 'Section','District', 'EA','RTL',])[['Should Count Towards FP Priority List: Needs Review',
       'Should Count Towards FP Priority List: No',
       'Should Count Towards FP Priority List: Yes',
       'Should Count Towards FP Priority List: Grand Total', ]].sum().reset_index()

In [158]:
def calc_FP_review_status(df):
#     if df['Priority Identifier'] == 0: 
#         return 'No priority FP identified'
    
    if df['Should Count Towards FP Priority List: Needs Review'] == 0:
        return 'Review Complete'
    elif df['Should Count Towards FP Priority List: No'] + df['Should Count Towards FP Priority List: Yes'] == 0:
        return 'Needs Review'
    else:
        return 'Partially Reviewed'
temp['Priority FP Review Status'] = temp.apply(calc_FP_review_status, axis = 1)

In [159]:
def calc_fp_worksheet(df):
    if df['Performance Objective'] == 'Fish Passage':
        if df['Perf Activity Category'] == 'Sustainability/Climate Change':
            return 'Fish Passage'
        else:
            return df['Perf Activity Category']
    else:
        return 'NA'
    
df_perf_raw_data_1['Worksheet'] = df_perf_raw_data_1.apply(calc_fp_worksheet, axis = 1)

In [160]:
df_perf_raw_data_1 = pd.merge(df_perf_raw_data_1, 
                              temp[['AMT_ID','Section','Worksheet','Priority FP Review Status']], 
                              how = 'left', 
                              left_on = ['AMT_ID','Section','Worksheet'], 
                              right_on = ['AMT_ID','Section','Worksheet'])

In [161]:
# df_fp_list.groupby(['AMT_ID','Section','Worksheet'])['AMT_ID'].count()

In [162]:
df_perf_raw_data_1['Priority FP Review Status'].fillna('No priority FP identified', inplace = True)

In [163]:
def combine_review_status_columns(df):
    '''
    combine review status from different review status columns into one combined review status column
    '''
    AMT_ID = df['AMT_ID']
    Section = df['Section']
    
    if df['Performance Objective'] == 'Fish Passage':
        return df['Priority FP Review Status']
    
    elif df['Performance Objective'] == "Bridge and Tunnel Health":
        return df['Combined Bridge Health Review Status']
    
    elif df['Performance Objective'] == "Transportation Management Systems":
        return df['Combined TMS Technology Review Status']
    
    elif df['Performance Objective'] == "Transportation Management System Structures":
        return df['Combined TMS Structures Review Status']        
    
    else:
        return df['Review Status'] 
    
df_perf_raw_data_1['Combined Review Status'] = df_perf_raw_data_1.apply(combine_review_status_columns, axis = 1)   

In [164]:
# AMT_ID = 18672 
# Section = 'PRG'

# df_perf_raw_data_1[(df_perf_raw_data_1['AMT_ID'] == AMT_ID) & (df_perf_raw_data_1['Section'] == Section)
#                    & (df_perf_raw_data_1['Performance Objective'] == 'Complete Streets Fix Existing')
#                 ]

In [165]:
# AMT_ID = 18672 
# Section = 'PRG'

# df_perf_raw_data_1[(df_perf_raw_data_1['AMT_ID'] == AMT_ID) & (df_perf_raw_data_1['Section'] == Section)
#                    & (df_perf_raw_data_1['Combined ActID'] == "Complete Street")
#                 ]

In [166]:
def ck_HQ_review_complete(df):
    # if modified after review, return Needs Review
    if pd.notna(df['PerformanceChange Date After Review']):
        return 'Needs Re-review'
    else: #
        return df['Combined Review Status']
    
     
df_perf_raw_data_1['Is HQ Review Complete?'] = df_perf_raw_data_1.apply(ck_HQ_review_complete, axis = 1)   

df_perf_raw_data_1['Is HQ Review Complete?'].fillna('No Need for Review', inplace = True)

In [167]:
df_perf_raw_data_1['Date'] = TARGETDATE

df_perf_raw_data_1['PerformanceChange Date After Review'].fillna('NA', inplace = True)

In [168]:
# #DEBUG: Bridge Health
# # AMT_ID = 13550
# AMT_ID = 23253
# AMT_ID = 21974

# df_perf_raw_data_1[
#     (df_perf_raw_data_1['AMT_ID'] == AMT_ID)
#     & (df_perf_raw_data_1['Performance Objective'].isin(["Bridge and Tunnel Health"]))
# #     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
# ][['AMT_ID','Section','ActID','Review Date','Reviewed?','Worksheet','Combined ActID','Bridge WS Health Review Status','Is HQ Review Complete?']]

In [169]:
# df_brg_raw_data_1[df_brg_raw_data_1['AMT_ID'] == AMT_ID][['AMT_ID', 'Section','Health Pre','Health Post','Post-Condition for Bridge Health entered?']]

In [170]:
# #DEBUG: TMS Technology

# AMT_ID = 15955   # No TMS worksheet
# AMT_ID = 21663   # Partially Reviewed
# # AMT_ID = 19289  # Needs Review
# AMT_ID = 19939   # Partially Reviewed
# df_perf_raw_data_1[
#     (df_perf_raw_data_1['AMT_ID'] == AMT_ID)
#     & (df_perf_raw_data_1['Performance Objective']=="Transportation Management Systems")
# #     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
# ][['AMT_ID','Section','ActID','Review Date','Reviewed?','Worksheet','Performance Objective','TMS WS Review Status','Combined TMS Technology Review Status','Is HQ Review Complete?']]

In [171]:
# #DEBUG: TMS Structures
# AMT_ID = 19543   # Partially Reviewed

# df_perf_raw_data_1[
#     (df_perf_raw_data_1['AMT_ID'] == AMT_ID)
#     & (df_perf_raw_data_1['Performance Objective'].isin(["Transportation Management System Structures"]))
# #     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
# ][['AMT_ID','Section','ActID','Review Date','Reviewed?','Worksheet','Performance Objective','TMS WS Review Status','Combined TMS Technology Review Status','Combined TMS Structures Review Status','Is HQ Review Complete?']]

In [172]:
# df_tms_raw_data_1[(df_tms_raw_data_1['AMT_ID'] == AMT_ID)]
# [['AMT_ID','Section','TMS Structural or Technology','Post-Condition entered?']]

In [173]:
# #DEBUG: Complete Street

# AMT_ID = 20245
# df_perf_raw_data_1[
#     (df_perf_raw_data_1['AMT_ID'] == AMT_ID)
# #     & (df_perf_raw_data_1['Performance Objective'].isin(["Complete Streets Fix Existing","Complete Streets Build New"]))
# #     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
# ][['AMT_ID','Section','ActID','Review Date','Reviewed?','Worksheet',
#    'Combined ActID','Combined ActID Review Status','Performance Objective','Performance Objective Review Status','Review Status','Is HQ Review Complete?']]

In [174]:
#DEBUG Fish Passage

AMT_ID = 22780
# AMT_ID = 20275
df_perf_raw_data_1[
    (df_perf_raw_data_1['AMT_ID'] == AMT_ID)
    & (df_perf_raw_data_1['Performance Objective'] == 'Fish Passage')
#     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
][['AMT_ID','Section','ActID','Review Date','Reviewed?','Worksheet','Combined ActID','Priority FP Review Status']]

Unnamed: 0,AMT_ID,Section,ActID,Review Date,Reviewed?,Worksheet,Combined ActID,Priority FP Review Status
10125,22780,TYP,C17,,No,Drainage,Can not be combined,Review Complete


In [None]:
# df_fp_list[df_fp_list['AMT_ID'] == AMT_ID]

In [None]:
# df_perf_raw_data_1[
#     (df_perf_raw_data_1['AMT_ID'] == 20240)
#     & (df_perf_raw_data_1['Performance Objective'] == 'Complete Streets Fix Existing')
# #     & (df_perf_raw_data_1['Performance Objective'] == 'No Performance Objective in the SHSMP')
# ]


<a id='Export_Data'></a>

# Export Data


In [175]:
df_drain_raw_data_1['Should Count toward Fish Passage not in the Priority List (Yes/No)'].fillna('N/A', inplace = True)

In [176]:
df_brg_raw_data_1['Data_Date'] = TARGETDATE
df_drain_raw_data_1['Data_Date'] = TARGETDATE
df_fp_raw_data_1['Data_Date'] = TARGETDATE
df_fp_list['Data_Date'] = TARGETDATE

df_brg_raw_data_1['Data_HourMinute'] = DATA_HHMM
df_drain_raw_data_1['Data_HourMinute'] = DATA_HHMM
df_fp_raw_data_1['Data_HourMinute'] = DATA_HHMM
df_fp_list['Data_HourMinute'] = DATA_HHMM


export_data(df_brg_raw_data_1, 'bridge_worksheet', PROJECTBOOKCHECK_HTTPSEVER_FOLDER, LOG_FILE)

export_data(df_drain_raw_data_1, 'drainage_worksheet', PROJECTBOOKCHECK_HTTPSEVER_FOLDER, LOG_FILE)

export_data(df_fp_raw_data_1, 'fishpassage_worksheet', PROJECTBOOKCHECK_HTTPSEVER_FOLDER, LOG_FILE)

export_data(df_fp_list, 'fishpassage_list', PROJECTBOOKCHECK_HTTPSEVER_FOLDER, LOG_FILE)

processing table: 894it [00:00, 5959.63it/s]


Signing into AssetManagement at https://tableau.dot.ca.gov
Publishing bridge_worksheet.hyper to Sandbox_ProjectBookCheck_Automation...


processing table: 11234it [00:02, 5514.60it/s]


Signing into AssetManagement at https://tableau.dot.ca.gov
Publishing drainage_worksheet.hyper to Sandbox_ProjectBookCheck_Automation...


processing table: 5it [00:00, 1665.86it/s]


Signing into AssetManagement at https://tableau.dot.ca.gov
Publishing fishpassage_worksheet.hyper to Sandbox_ProjectBookCheck_Automation...


processing table: 26it [00:00, 8697.02it/s]


Signing into AssetManagement at https://tableau.dot.ca.gov
Publishing fishpassage_list.hyper to Sandbox_ProjectBookCheck_Automation...


In [177]:
def convert_quantity(df):
    '''
    get the numerical rows of the quanity, assign zero for non-numerical rows
    '''
    try: 
        return float(df['Quantity'])
    except:
        if df['Quantity'] == 'Yes':
            return 1
        else:
            return 0
df_perf_raw_data_1['Quantity_Number'] = df_perf_raw_data_1.apply(convert_quantity, axis = 1)

In [178]:
# for v in df_perf_raw_data_1['Quantity'].values:
#     try: 
#         _ = float(v)
#     except:
#         print(v)

In [179]:
# export performance raw data with review summary
df_perf_raw_data_1['Data_Date'] = TARGETDATE
df_perf_raw_data_1['Data_HourMinute'] = DATA_HHMM
out_cols = [
    'District', 'AMT_ID', 'EA', 'EFIS', 'PPNO', 'Location', 'County',
       'Route', 'BackPM', 'AheadPM', 'ProjectedRTL FY',
       'Main Activity Category', 'Section', 'ActID', 'Perf Activity Category',
       'Activity Detail', 'Performance Objective', 'Unit of Measurement',
       'Quantity_Number', 'Assets in Good Cond', 'Assets in Fair Cond',
       'Assets in Poor Cond', 'New Achieved', 'Comment', 'Guidance',
       'Last Saved', 'Saved By', 'Post-Good', 'Post-Fair', 'Post-Poor',
       'HQ ProgramReview - Agree with District?', 'HQ Comment', 'Review Date',
       'PerformanceChange Date After Review', 'Status','Concatenate ID+Objective',
       'Planning or Post-Planning', 'Advertised Year', 'F2G Achieved',
       'P2G Achieved', 
       'Is HQ Review Complete?', 'Data_Date','Data_HourMinute','Combined ActID','Combined Performance Objective']

export_data(df_perf_raw_data_1[out_cols], 'performance_review_summary', PROJECTBOOKCHECK_HTTPSEVER_FOLDER, LOG_FILE)


processing table: 485it [00:00, 4849.99it/s]

Table 'Extract' does not exist in extract performance_review_summary.hyper, creating.


processing table: 20383it [00:03, 5194.09it/s]


Signing into AssetManagement at https://tableau.dot.ca.gov
Publishing performance_review_summary.hyper to Sandbox_ProjectBookCheck_Automation...


In [180]:
# df_perf_raw_data_1['Activity Detail'].unique()

# df_perf_raw_data_1[
#     (df_perf_raw_data_1['Activity Detail'] == 'Abandon/Remove Culvert (201.151)')
#     & (df_perf_raw_data_1['ProjectedRTL FY'] == '2021/22')
#     & (df_perf_raw_data_1['Unit of Measurement'] == 'Each')
#                   ]['Quantity'].astype(float).sum()

# df_perf_raw_data_1['ProjectedRTL FY'].unique()

# df_perf_raw_data_1[
#     (df_perf_raw_data_1['Activity Detail'] == 'Abandon/Remove Culvert (201.151)')
#                   ]

# df_perf_raw_data_1[df_perf_raw_data_1['AMT_ID'] == 11358][['AMT_ID', 
#        'Main Activity Category', 'Section', 'ActID', 'Perf Activity Category',
#        'Activity Detail', 'Performance Objective','Unit of Measurement',
#        'Quantity','Assets in Good Cond', 'Assets in Fair Cond',
#        'Assets in Poor Cond', 'New Achieved']]


<a id='FinalCleanUp'></a>
## Final Clean Up

In [181]:

#clean up tableau publishing log file

import os
import glob
# get a recursive list of file paths that matches pattern
fileList = glob.glob('./*.log')
# Iterate over the list of filepaths & remove each file.
for filePath in fileList:
    try:
        os.remove(filePath)
    except OSError:
        print("Error while deleting file")


In [182]:
end_time =  time.time()
elapsed = end_time - start_time
print('time elapsed : {} seconds'.format(elapsed))

time elapsed : 3657.309934616089 seconds
