## Import pandas library

In [2]:
import pandas as pd

## Read the data into a DataFrame

In [3]:
views = pd.read_csv(
    '../data/feed-views.log',
    sep='\t',
    header=None,
    names=['datetime', 'user']
)
views

Unnamed: 0,datetime,user
0,2020-04-17 12:01:08.463179,artem
1,2020-04-17 12:01:23.743946,artem
2,2020-04-17 12:27:30.646665,artem
3,2020-04-17 12:35:44.884757,artem
4,2020-04-17 12:35:52.735016,artem
...,...,...
1071,2020-05-21 18:45:20.441142,valentina
1072,2020-05-21 23:03:06.457819,maxim
1073,2020-05-21 23:23:49.995349,pavel
1074,2020-05-21 23:49:22.386789,artem


## Convert datetime to datetime64[ns]

In [4]:
views['datetime'] = pd.to_datetime(views['datetime'])

## Extract Year, Month, Day, Hour, Minute, and Second

In [5]:
views['year'] = views['datetime'].dt.year
views['month'] = views['datetime'].dt.month
views['day'] = views['datetime'].dt.day
views['hour'] = views['datetime'].dt.hour
views['minute'] = views['datetime'].dt.minute
views['second'] = views['datetime'].dt.second
views

Unnamed: 0,datetime,user,year,month,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22


## Create the daytime Column

In [6]:
bins = [0, 4, 7, 11, 17, 20, 24]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']

views['daytime'] = pd.cut(x=views['hour'], bins=bins, labels=labels, right=False)
views

Unnamed: 0,datetime,user,year,month,day,hour,minute,second,daytime
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8,afternoon
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23,afternoon
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30,afternoon
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44,afternoon
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20,early evening
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6,evening
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49,evening
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22,evening


## Set user as the Index

In [7]:
views.set_index('user', inplace=True)
views

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening


## Count the Number of Elements

In [8]:
num_elements = views.count()
num_elements

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

## Count Elements in Each daytime Category

In [9]:
daytime_counts = views['daytime'].value_counts()
daytime_counts

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: count, dtype: int64

## Sort Values by Hour, Minute, and Second

In [10]:
sorted_views = views.sort_values(by=['hour', 'minute', 'second'])
sorted_views

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


## Calculate the minimum and maximum for the hour column and the mode for the daytime column

In [11]:
min_hour = views['hour'].min()
max_hour = views['hour'].max()

daytime_mode = views['daytime'].mode()[0]

print(f"Minimum hour: {min_hour}\nMaximum hour: {max_hour}\nMost common daytime: {daytime_mode}")

Minimum hour: 0
Maximum hour: 23
Most common daytime: evening


## Calculate Maximum Hour for night and Minimum Hour for morning

In [12]:
max_hour_night = views[views['daytime'] == 'night']['hour'].max()
min_hour_morning = views[views['daytime'] == 'morning']['hour'].min()

print(f"Maximum hour for night: {max_hour_night}\nMinimum hour for morning: {min_hour_morning}")


Maximum hour for night: 3
Minimum hour for morning: 8


## Who visited the page at maximum hour for night:

In [13]:
visited_at_max_hour_night = views.loc[views.daytime == 'night'].hour.idxmax()
visited_at_max_hour_night

'konstantin'

## Who visited the page at minimum hour for morning:

In [14]:
visited_at_min_hour_morning = views.loc[views.daytime == 'morning'].hour.idxmin()
visited_at_min_hour_morning

'alexander'

## Show the 3 earliest hours in the morning

In [15]:
earliest_3_morning = views.loc[views.daytime == 'morning'].nsmallest(3, 'hour').hour
earliest_3_morning

user
alexander    8
alexander    8
artem        9
Name: hour, dtype: int32

## Show the 3 latest hours in the morning

In [16]:
latest_3_morning = views.loc[views.daytime == 'morning'].nlargest(3, 'hour').hour
latest_3_morning

user
konstantin    10
maxim         10
konstantin    10
Name: hour, dtype: int32

## Get the basic statistics for the columns

In [17]:
stats = views.describe()
stats

Unnamed: 0,datetime,year,month,day,hour,minute,second
count,1076,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020-05-10 09:00:41.211420672,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
min,2020-04-17 12:01:08.463179,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020-05-10 01:13:49.857472,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020-05-11 22:48:35.302552832,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020-05-14 14:44:34.749530624,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020-05-22 10:36:14.662600,2020.0,5.0,30.0,23.0,59.0,59.0
std,,0.0,0.335557,4.906567,6.95549,17.689388,17.405506


## Calculate Interquartile Range (IQR) for hour

In [19]:
q1 = stats.loc['25%', 'hour']
q3 = stats.loc['75%', 'hour']
iqr = q3 - q1
iqr

np.float64(9.0)