In [1]:
import plotly.express as px
import csv
import pandas as pd
import datetime
import plotly.io as pio

fnames = ['jan_19.csv', 'feb_19.csv', 'mar_19.csv', 'apr_19.csv', 'may_19.csv', 'jun_19.csv', 'jul_19.csv', 'aug_19.csv', 'sep_19.csv', 'oct_19.csv', 'nov_19.csv', 'dec_19.csv']
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


# Open all sunburst files
# for month in months:
#     file_save_name = path + 'sunburst_' + month + '.html'
#     fh = open(file_save_name, 'w')
#     fh.close()

# Open average fare and tip file
# file_save_name = path + 'bar_avg_fare_tip_per_month.html'
# fh = open(file_save_name, 'w')
# fh.close()

# Open frequency of ride call times
# file_save_name = path + 'histogram_num_pass_per_hour_per_weekday.html'
# fh = open(file_save_name, 'w')
# fh.close()

dataframes_2019 = []
total_tips_2019 = []
total_fares_2019 = []
total_rides_2019 = []

data = {
    'pickup':[],
    'dropoff':[],
    'passengers':[],
    'distance':[],
    'pu_loc':[],
    'do_loc':[],
    'fare':[],
    'tip':[],
    'hour':[],
    'weekday':[],
    'month':[],
    'year':[],
    'day':[],
    'total_fare':0,
    'total_tip':0,
    'count':0
}

for fname in fnames:
    with open(fname) as f:
        count = 0
        total_fare = 0
        total_tip = 0
        reader = csv.reader(f)
        for line in reader:
            if line[0] != 'tpep_pickup_datetime':
                time_obj = datetime.datetime.strptime(line[0], '%Y-%m-%d  %H:%M:%S')
                
                count+=1
                total_fare+=abs(float(line[6]))
                total_tip+=abs(float(line[7]))

                data['pickup'].append(line[0])
                data['dropoff'].append(line[1])
                if line[2] != '':
                    data['passengers'].append(int(line[2]))
                else:
                    data['passengers'].append(1)
                data['distance'].append(float(line[3]))
                data['pu_loc'].append(line[4])
                data['do_loc'].append(line[5])
                data['fare'].append(abs(float(line[6])))
                data['tip'].append(float(line[7]))
                data['hour'].append(int(time_obj.strftime('%H')))
                data['weekday'].append(time_obj.strftime('%A'))
                data['month'].append(time_obj.strftime('%b'))
                data['year'].append(time_obj.strftime('%Y'))
                data['day'].append(time_obj.strftime('%d'))

        total_rides_2019.append(count)
        total_fares_2019.append(total_fare)
        total_tips_2019.append(total_tip)

df_2019 = pd.DataFrame(data)
        
# ==============================
# Average fare and tip per month
# ==============================

fares = [round(total_fares_2019[i]/total_rides_2019[i], 2) for i in range(len(months))]
tips = [round(total_tips_2019[i]/total_rides_2019[i], 2) for i in range(len(months))]

print('fares', fares)
print('tips', tips)

fig = px.bar(x=months, y=fares, title='Average fare stacked on average tip per month',
            labels = {
                'x':'Month',
                'y':'Amount of money ($)'
            },)
fig.add_bar(x=months, y=tips, name='Tip amount')
fig.update_layout(barmode='stack')
fig.update_layout()
fig.show()

# file_save_name = path + 'bar_avg_fare_tip_per_month.html'
# pio.write_html(fig, file=file_save_name, auto_open=True, full_html=False)


# =====================================
# Number of passengers for each weekday
# =====================================

color_map={
    '(?)':'black',
    'Sunday':'coral',
    'Monday':'orangered',
    'Tuesday':'lightgoldenrodyellow',
    'Wednesday':'lightseagreen',
    'Thursday':'lightsteelblue',
    'Friday':'indigo',
    'Saturday':'mediumpurple'
}

for i in range(0, len(months)):
    sunburst_title = 'Distribution of number of passengers per car ride, separated by day ({})'.format(months[i])
    df_2019_month = df_2019[df_2019['month']==months[i]]
    fig = px.sunburst(df_2019_month, path=['weekday', 'passengers'], values='passengers',
                title=sunburst_title, color_discrete_map=color_map)
    fig.show()

    # file_save_name = path + 'sunburst_' + months[i] + '.html'
    # pio.write_html(fig, file=file_save_name, auto_open=True, full_html=False)

# =======================================
# Frequency of ride call times in a month
# =======================================

user_input = 1
df_2019_jan = df_2019[df_2019['month']==months[0]]
df_2019_jan_date = df_2019_jan[df_2019_jan['day']=='01']

fig = px.histogram(df_2019_jan, x='hour', animation_frame='weekday',
                title='Frequency of ride call times in January, separated by weekday',
                labels={
                    'hour':'Hour of day',
                    'y':'Number of rides'
                })
fig.show()

# file_save_name = path + 'histogram_num_pass_per_hour_per_weekday.html'
# pio.write_html(fig, file=file_save_name, auto_open=True, full_html=False)

fares [12.26, 12.67, 13.06, 13.21, 13.47, 14.67, 13.62, 13.7, 13.85, 13.66, 13.42, 13.63]
tips [1.83, 2.16, 2.22, 2.21, 2.26, 2.26, 2.18, 2.17, 2.29, 2.28, 2.25, 2.25]
