In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
sns.set_style("whitegrid")
sns.set_palette("GnBu_d")
import folium
from folium import plugins
from folium.plugins import HeatMap
from datetime import datetime
from datetime import timedelta
import math
import random 
import timeit
from haversine import haversine # conda install -c conda-forge haversine ''

In [4]:
brm = pd.read_csv("../resources/bremen.csv")
frb = pd.read_csv("../resources/freiburg.csv")

In [5]:
brm["trip_duration"] = brm["trip_duration"].astype(str).str.extract("days (.*?)\.")
frb["trip_duration"] = frb["trip_duration"].astype(str).str.extract("days (.*?)\.")

Ab hier:

To work with the duration of the Trips we need to convert them to a useful type:

In [6]:
brm["trip_duration"] = brm["trip_duration"].apply(pd.Timedelta)
frb["trip_duration"] = frb["trip_duration"].apply(pd.Timedelta)

After the convertion we can sum the time daily driven:

In [7]:
brm_daily_driven = brm.groupby("day").agg({"trip_duration":"sum"})
frb_daily_driven = frb.groupby("day").agg({"trip_duration":"sum"})

For a more detailed aggregation we add a daytime to the variables, so we can calculate time driven by daytime. This also allows a better mapping of how weather effects demand.

In [8]:
brm["daytime"] = "null"
frb["daytime"] = "null"

Define Daytimes: Morning: 6 - 11 | Noon: 11 - 16 | Evening: 16 - 21 | Night: 21 - 6

In [9]:
brm_morning = brm[(brm["time"] > "06:00:00") & (brm["time"] <= "11:00:00")] #saving rows where time is in defined range to a new df
brm_noon = brm[(brm["time"] > "11:00:00") & (brm["time"] <= "16:00:00")]
brm_evening = brm[(brm["time"] > "16:00:00") & (brm["time"] <= "21:00:00")]
brm_night = brm[(brm["time"] > "21:00:00")] + brm[(brm["time"] <= "06:00:00")]

In [10]:
frb_morning = frb[(frb["time"] > "06:00:00") & (frb["time"] <= "11:00:00")]
frb_noon = frb[(frb["time"] > "11:00:00") & (frb["time"] <= "16:00:00")]
frb_evening = frb[(frb["time"] > "16:00:00") & (frb["time"] <= "21:00:00")]
frb_night = frb[(frb["time"] > "21:00:00")] + frb[(frb["time"] <= "06:00:00")]

adding corresponding values to column:

In [11]:
for i in brm_morning.index: # using indexes from lists above to cycle through the whole data
   brm.at[i , "daytime"] = 0 # morning refers to 0
for i in brm_noon.index:
   brm.at[i , "daytime"] = 1 # noon -> 1
for i in brm_evening.index:
   brm.at[i , "daytime"] = 2 # evening -> 2
for i in brm_night.index:
   brm.at[i , "daytime"] = 3 # night -> 3

In [12]:
for i in frb_morning.index:
   frb.at[i , "daytime"] = 0
for i in frb_noon.index:
   frb.at[i , "daytime"] = 1
for i in frb_evening.index:
   frb.at[i , "daytime"] = 2
for i in frb_night.index:
   frb.at[i , "daytime"] = 3

Summarizing data over day and daytime via aggregation of the trip durations taken.

In [13]:
brm_daytime_driven = brm.groupby(["day","daytime"]).agg({"trip_duration":"sum"})
brm_daytime_driven["trip_duration"] = brm_daytime_driven["trip_duration"].astype(str).str.extract("days (.*?)\.")

In [14]:
frb_daytime_driven = frb.groupby(["day","daytime"]).agg({"trip_duration":"sum"})
frb_daytime_driven["trip_duration"] = frb_daytime_driven["trip_duration"].astype(str).str.extract("days (.*?)\.")

In [15]:
# Anzeige von dem ganzen gedöns das ich ausgerechnet hab und ein Auschnitt von einem Tag aus Freiburg zur Überprüfung:
#print(brm_daytime_driven)# falls jemand weiss wie man daytime als Spalten bekommt sagt bescheid 😄 (hab 1std versucht das hinzubekomme dann wars mir zu blöd 🤣)
#print(frb_daytime_driven)
#print(frb[frb["day"] == "2019-05-07"])

In [16]:
brm["hour"] = brm["time"].str[:-6]
brm_hourly_driven = brm.groupby(["day","hour"]).agg({"trip_duration":"sum"})
brm_hourly_driven.to_csv("../generated/brm_hourly_driven.csv")

In [17]:
frb["hour"] = frb["time"].str[:-6]
frb_hourly_driven = frb.groupby(["day","hour"]).agg({"trip_duration":"sum"})
frb_hourly_driven.to_csv("../generated/frb_hourly_driven.csv")