In [2]:
import pandas as pd

views = pd.read_csv('../feed-views.log', sep='\t')
views.columns = ['datetime', 'user']
views['datetime'] = pd.to_datetime(views['datetime'])

views['year'] = views['datetime'].dt.year
views['month'] = views['datetime'].dt.month
views['day'] = views['datetime'].dt.day
views['hour'] = views['datetime'].dt.hour
views['minute'] = views['datetime'].dt.minute
views['second'] = views['datetime'].dt.second

bins = [-1, 3.59, 6.59, 10.59, 16.59, 19.59, 23.59]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
views['daytime'] = pd.cut(views['hour'] + views['minute']/60 + views['second']/3600, bins=bins, labels=labels)

num_elements = views.count()

time_of_day_counts = views['daytime'].value_counts()

views_sorted = views.sort_values(by=['hour', 'minute', 'second'])

max_hour_night = views[views['daytime'] == 'night']['hour'].max()

min_hour_morning = views[views['daytime'] == 'morning']['hour'].min()

example_night_user = views[views['daytime'] == 'night'].iloc[0]
example_morning_user = views[views['daytime'] == 'morning'].iloc[0]

mode_hour = views['hour'].mode()
mode_daytime = views['daytime'].mode()

earliest_morning = views[views['daytime'] == 'morning'].nsmallest(3, 'datetime')
print("3 самых утренних часа:")
print(earliest_morning[['hour', 'daytime']])

latest_morning = views[views['daytime'] == 'morning'].nlargest(3, 'datetime')
print("\n3 последние часа:")
print(latest_morning[['hour', 'daytime']])

statistics = views.describe()
print("\nОсновные статистические данные:")
print(statistics)

iqr = statistics.loc['75%']['hour'] - statistics.loc['25%']['hour']
print(f"\nМежквартильный разброс за час: {iqr}")

3 самых утренних часа:
     hour  daytime
87      9  morning
104    10  morning
126     9  morning

3 последние часа:
      hour  daytime
1039     9  morning
1038     9  morning
975     10  morning

Основные статистические данные:
                            datetime    year        month          day  \
count                           1075  1075.0  1075.000000  1075.000000   
mean   2020-05-10 09:31:19.697698048  2020.0     4.871628    13.549767   
min       2020-04-17 12:01:23.743946  2020.0     4.000000     1.000000   
25%    2020-05-10 01:13:52.574879488  2020.0     5.000000    11.000000   
50%    2020-05-11 22:48:40.637413888  2020.0     5.000000    13.000000   
75%    2020-05-14 14:44:35.500612608  2020.0     5.000000    15.000000   
max       2020-05-22 10:36:14.662600  2020.0     5.000000    30.000000   
std                              NaN     0.0     0.334659     4.907723   

              hour       minute       second  
count  1075.000000  1075.000000  1075.000000  
mean    