In [78]:
install.packages("tidyverse")
install.packages("lubridate")
install.packages("ggplot2")

In [109]:
library(tidyverse)
library(lubridate)
library(ggplot2)

In [110]:
bike_202101 <- read_csv("../input/bike-data/Bikesharing/bike_sharing_202101.csv")
bike_202102 <- read_csv("../input/bike-data/Bikesharing/bike_sharing_202102.csv")
bike_202103 <- read_csv("../input/bike-data/Bikesharing/bike_sharing_202103.csv")
bike_202104 <- read_csv("../input/bike-data/Bikesharing/bike_sharing_202104.csv")
bike_202105 <- read_csv("../input/bike-data/Bikesharing/bike_sharing_202105.csv")

In [111]:
all_trips <-bind_rows(bike_202101, bike_202102, 
                      bike_202103, bike_202104, bike_202105)

In [112]:
all_trips <- all_trips %>% select(-c(start_lat, start_lng, end_lat, end_lng))

all_trips <-  all_trips %>% mutate(member_casual = recode(member_casual
                                ,"Subscriber" = "member"
                                ,"Customer" = "casual"))

In [113]:
all_trips["date"] <- as.Date(all_trips$started_at)
all_trips["month"] <- format(as.Date(all_trips$date), "%m")
all_trips["day"] <- format(as.Date(all_trips$date), "%d")
all_trips["year"] <- format(as.Date(all_trips$date), "%Y")
all_trips["day_of_week"] <- format(as.Date(all_trips$date), "%A")

In [114]:
all_trips$ride_length <- difftime(all_trips$ended_at,all_trips$started_at)

In [117]:
all_trips$ride_length <- as.numeric(as.character(all_trips$ride_length))
is.numeric(all_trips$ride_length)

In [118]:
all_trips_v2 <- all_trips[!(all_trips$start_station_name == "HQ QR" | all_trips$ride_length<0),]

In [121]:
summary(all_trips_v2$ride_length)

In [122]:
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = mean)
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = median)
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = max)
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = min)

In [123]:
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)

In [124]:
all_trips_v2$day_of_week <- ordered(all_trips_v2$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))

In [125]:
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)

In [126]:
# analyze ridership data by type and weekday
all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>%  #creates weekday field using wday()
  group_by(member_casual, weekday) %>%  #groups by usertype and weekday
  summarise(number_of_rides = n()                            #calculates the number of rides and average duration 
  ,average_duration = mean(ride_length)) %>%         # calculates the average duration
  arrange(member_casual, weekday)                                # sorts

In [127]:
# Let's visualize the number of rides by rider type
all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>% 
  group_by(member_casual, weekday) %>% 
  summarise(number_of_rides = n()
            ,average_duration = mean(ride_length)) %>% 
  arrange(member_casual, weekday)  %>% 
  ggplot(aes(x = weekday, y = number_of_rides, fill = member_casual)) +
  geom_col(position = "dodge")

In [128]:
# Let's create a visualization for average duration
all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>% 
  group_by(member_casual, weekday) %>% 
  summarise(number_of_rides = n()
            ,average_duration = mean(ride_length)) %>% 
  arrange(member_casual, weekday)  %>% 
  ggplot(aes(x = weekday, y = average_duration, fill = member_casual)) +
  geom_col(position = "dodge")

In [129]:
counts <- aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)

In [133]:
ls()