In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
sns.set_style("whitegrid")
sns.set_palette("GnBu_d")
import folium
from folium import plugins
from folium.plugins import HeatMap
from datetime import datetime
from datetime import timedelta
import math
import random 
import timeit
from haversine import haversine # conda install -c conda-forge haversine ''

In [2]:
brm = pd.read_csv("resources/bremen.csv")
FRB = pd.read_csv("resources/freiburg.csv")

In [3]:
brm["trip_duration"] = brm["trip_duration"].astype(str).str.extract("days (.*?)\.")
FRB["trip_duration"] = FRB["trip_duration"].astype(str).str.extract("days (.*?)\.")

Ab hier:

To work with the duration of the Trips we need to convert them to a useful type:

In [4]:
brm["trip_duration"] = brm["trip_duration"].apply(pd.Timedelta)
FRB["trip_duration"] = FRB["trip_duration"].apply(pd.Timedelta)

After the convertion we can sum the time daily driven:

In [5]:
brm_daily_driven = brm.groupby("day").agg({"trip_duration":"sum"})
FRB_daily_driven = FRB.groupby("day").agg({"trip_duration":"sum"})

For a more detailed aggregation we add a daytime to the variables, so we can calculate time driven by daytime. This also allows a better mapping of how weather effects demand.

In [6]:
brm["daytime"] = "null"
FRB["daytime"] = "null"

Define Daytimes: Morning: 6 - 11 | Noon: 11 - 16 | Evening: 16 - 21 | Night: 21 - 6

In [7]:
brm_morning = brm[(brm["time"] > "06:00:00") & (brm["time"] <= "11:00:00")] #saving rows where time is in defined range to a new df
brm_noon = brm[(brm["time"] > "11:00:00") & (brm["time"] <= "16:00:00")]
brm_evening = brm[(brm["time"] > "16:00:00") & (brm["time"] <= "21:00:00")]
brm_night = brm[(brm["time"] > "21:00:00")] + brm[(brm["time"] <= "06:00:00")]

In [8]:
FRB_morning = FRB[(FRB["time"] > "06:00:00") & (FRB["time"] <= "11:00:00")]
FRB_noon = FRB[(FRB["time"] > "11:00:00") & (FRB["time"] <= "16:00:00")]
FRB_evening = FRB[(FRB["time"] > "16:00:00") & (FRB["time"] <= "21:00:00")]
FRB_night = FRB[(FRB["time"] > "21:00:00")] + FRB[(FRB["time"] <= "06:00:00")]

adding corresponding values to column:

In [9]:
for i in brm_morning.index: # using indexes from lists above to cycle through the whole data
   brm.at[i , "daytime"] = 0 # morning refers to 0
for i in brm_noon.index:
   brm.at[i , "daytime"] = 1 # noon -> 1
for i in brm_evening.index:
   brm.at[i , "daytime"] = 2 # evening -> 2
for i in brm_night.index:
   brm.at[i , "daytime"] = 3 # night -> 3

In [10]:
for i in FRB_morning.index:
   FRB.at[i , "daytime"] = 0
for i in FRB_noon.index:
   FRB.at[i , "daytime"] = 1
for i in FRB_evening.index:
   FRB.at[i , "daytime"] = 2
for i in FRB_night.index:
   FRB.at[i , "daytime"] = 3

Summarizing data over day and daytime via aggregation of the trip durations taken.

In [11]:
brm_daytime_driven = brm.groupby(["day","daytime"]).agg({"trip_duration":"sum"})
brm_daytime_driven["trip_duration"] = brm_daytime_driven["trip_duration"].astype(str).str.extract("days (.*?)\.")

In [12]:
FRB_daytime_driven = FRB.groupby(["day","daytime"]).agg({"trip_duration":"sum"})
FRB_daytime_driven["trip_duration"] = FRB_daytime_driven["trip_duration"].astype(str).str.extract("days (.*?)\.")

In [13]:
# Anzeige von dem ganzen gedöns das ich ausgerechnet hab und ein Auschnitt von einem Tag aus Freiburg zur Überprüfung:
print(brm_daytime_driven)# falls jemand weiss wie man daytime als Spalten bekommt sagt bescheid 😄 (hab 1std versucht das hinzubekomme dann wars mir zu blöd 🤣)
print(FRB_daytime_driven)
print(FRB[FRB["day"] == "2019-05-07"])

trip_duration
day        daytime              
2019-01-20 0            04:21:00
           1            20:37:00
           2            08:41:00
           3            05:38:00
2019-01-21 0            16:45:00
...                          ...
2020-01-19 3            19:32:00
2020-01-20 0            21:32:00
           1            21:54:00
           2            17:04:00
           3            06:35:00

[1451 rows x 1 columns]
                   trip_duration
day        daytime              
2019-05-06 1            00:07:00
2019-05-07 0            00:07:00
           1            00:32:00
2019-05-08 0            00:03:00
           1            00:18:00
...                          ...
2020-01-19 3            02:26:00
2020-01-20 0            05:46:00
           1            22:31:00
           2            14:34:00
           3            12:07:00

[1008 rows x 1 columns]
          day      time  b_number      city trip_duration   orig_lat  \
1  2019-05-07  10:42:00     32560  frei

In [14]:
brm["hour"] = brm["time"].str[:-6]

In [15]:
brm_hourly_driven = brm.groupby(["day","hour"]).agg({"trip_duration":"sum"})

In [16]:
brm_hourly_driven.to_csv("generated/brm_hourly_driven.csv")