In [0]:
!pip install -U -q PyDrive

In [0]:
import pandas as pd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
!mkdir data

In [0]:
file_id = '1kgByP3EZHL8xAm-oGaBpf0-fPdVIYRaY'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('data/feed-views.log')

In [0]:
views = pd.read_csv('data/feed-views.log', sep='\t', header=None,
                    names=['datetime', 'user'], index_col='user')

In [0]:
views['datetime'] = pd.to_datetime(views['datetime'])

In [0]:
cols = ['year', 'month', 'day', 'hour', 'min', 'sec']

for i in range(len(cols)):
    views[cols[i]] = views.apply(lambda row: row['datetime'].timetuple()[i],
                                 axis=1)

In [0]:
labels = ['night', 'early morning', 'morning',
          'afternoon', 'early evening', 'evening']

views['daytime'] = pd.cut(views.hour, [0, 4, 7, 11, 17, 20, 24], labels=labels,
                          right=False, include_lowest=True)

In [10]:
views.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,min,sec,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon


In [11]:
views.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1076 entries, artem to artem
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   datetime  1076 non-null   datetime64[ns]
 1   year      1076 non-null   int64         
 2   month     1076 non-null   int64         
 3   day       1076 non-null   int64         
 4   hour      1076 non-null   int64         
 5   min       1076 non-null   int64         
 6   sec       1076 non-null   int64         
 7   daytime   1076 non-null   category      
dtypes: category(1), datetime64[ns](1), int64(6)
memory usage: 68.5+ KB


In [12]:
views.count()

datetime    1076
year        1076
month       1076
day         1076
hour        1076
min         1076
sec         1076
daytime     1076
dtype: int64

In [13]:
views.daytime.value_counts()

evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: daytime, dtype: int64

In [14]:
views.sort_values(by=['hour', 'min', 'sec'])

Unnamed: 0_level_0,datetime,year,month,day,hour,min,sec,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


In [15]:
max(views.loc[views.daytime == 'night'].hour)

3

In [16]:
views.loc[views.daytime == 'night'].hour.idxmax()

'konstantin'

In [17]:
min(views.loc[views.daytime == 'morning'].hour)

8

In [18]:
views.loc[views.daytime == 'morning'].hour.idxmin()

'alexander'

In [19]:
views.hour.mode()

0    22
dtype: int64

In [20]:
views.daytime.mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): [night < early morning < morning < afternoon < early evening < evening]

In [21]:
views.loc[views.daytime == 'morning'].nsmallest(3, 'hour')

Unnamed: 0_level_0,datetime,year,month,day,hour,min,sec,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alexander,2020-05-15 08:16:03.918402,2020,5,15,8,16,3,morning
alexander,2020-05-15 08:35:01.471463,2020,5,15,8,35,1,morning
artem,2020-04-24 09:42:47.598208,2020,4,24,9,42,47,morning


In [22]:
views.loc[views.daytime == 'morning'].nlargest(3, 'hour')

Unnamed: 0_level_0,datetime,year,month,day,hour,min,sec,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
konstantin,2020-04-18 10:53:52.623447,2020,4,18,10,53,52,morning
maxim,2020-04-18 10:56:55.833899,2020,4,18,10,56,55,morning
konstantin,2020-04-18 10:57:37.331258,2020,4,18,10,57,37,morning


In [23]:
views.describe()

Unnamed: 0,year,month,day,hour,min,sec
count,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
std,0.0,0.335557,4.906567,6.95549,17.689388,17.405506
min,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020.0,5.0,30.0,23.0,59.0,59.0


In [24]:
iqr = views.describe()['hour']['75%'] - views.describe()['hour']['25%']
iqr

9.0