In [None]:
# all imports go here
%matplotlib inline
import pandas as pd
import matplotlib as mpl

In [None]:
# read file into data-frame
df = pd.read_csv('ViewingActivity.csv')

df.shape

In [None]:
# take a little look at the data
df.head()

## Preparing the data

In [None]:
# rename the columns to easier to write names
df.columns = ['name', 'startTime', 'duration', 'attr', 'title', 'suppVideoType', 'device', 'bookmark', 'latestBookmark', 'country']

df.head()

In [None]:
# to remove user-specific lines from the data-frame uncomment the below line and change User1 to the name of the user
# df = df[df['name'].str.contains('User1') == False]

df.head()

In [None]:
df.dtypes

In [None]:
# convert startTime to datetime-format
df['startTime'] = pd.to_datetime(df['startTime'], utc=True)

df.dtypes

In [None]:
# make startTime into index
df = df.set_index('startTime')

# convert from utc to cet
df.index = df.index.tz_convert('Europe/Berlin')

# reset index -> startTime is column again
df = df.reset_index()

df.head()

In [None]:
# convert duration to timedelta
df['duration'] = pd.to_timedelta(df['duration'])
df.dtypes

In [None]:
# change show_name to show you want to analyze 
show_name = 'The Office (U.S.)'

# remove other shows from data-frame
show = df[df['title'].str.contains(show_name, regex=False)]

show.shape

In [None]:
show = show[(show['duration'] > '0 days 00:01:00')]
show.shape

In [None]:
duration = show['duration'].sum()

print('You watched', show_name, 'a duration of', duration)

# Creating Some Diagrams Visualizing the Data

In [None]:
# add weekday-column
show['weekday'] = show['startTime'].dt.weekday

# convert numbers of weekdays to actual weekday-names
show['weekday'].replace({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}, inplace=True)

# add hour-column
show['hour'] = show['startTime'].dt.hour

show.head()

## Visualize Episodes Watched by Weekday

In [None]:
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
show['weekday'] = pd.Categorical(show['weekday'], categories=weekdays, ordered=True)

show_by_day = show['weekday'].value_counts()
show_by_day = show_by_day.sort_index()
mpl.rcParams.update({'font.size': 22})
show_by_day.plot(kind='bar', figsize=(30,10), title='Episodes Watched By Day')

## Visualize Episodes Watched by Hour

In [None]:
show['hour'] = pd.Categorical(show['hour'], categories=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23], ordered=True)
show_by_hour = show['hour'].value_counts()
show_by_hour = show_by_hour.sort_index()
show_by_hour.plot(kind='bar', figsize=(30,10), title='Episodes Watched By Hour')