In [2]:
import pandas as pd

city_bike = pd.read_csv('data/citibike-tripdata.csv', sep=',')

In [4]:
cb_data = city_bike.copy()

In [6]:
cb_data['start station id'].isna().sum()

print(cb_data[['starttime', 'stoptime']].dtypes)



starttime    object
stoptime     object
dtype: object


In [10]:
most_popular_station = cb_data['start station id'].mode()[0]
print(int(most_popular_station))

most_popular_bike = cb_data['bikeid'].mode()[0]
print(int(most_popular_bike))

281
33887


In [11]:
# Считаем количество каждого типа клиентов
user_counts = cb_data['usertype'].value_counts()

# Находим долю преобладающего типа
dominant_share = user_counts.max() / user_counts.sum()

# Округляем до сотых
print(round(dominant_share, 2))


0.77


In [12]:
# Считаем количество поездок по полу
gender_counts = cb_data['gender'].value_counts()

# Находим максимальное значение
most_trips = gender_counts.max()

# Выводим результат
print(most_trips)


183582


In [13]:
start_unique = cb_data['start station id'].nunique()
end_unique = cb_data['end station id'].nunique()
print(start_unique, end_unique)


759 765


In [17]:
min_age = cb_data['age'].min()
print(min_age)


19


In [16]:
cb_data['age']

0         41
1         52
2         46
3         37
4         27
          ..
299995    31
299996    39
299997    30
299998    46
299999    55
Name: age, Length: 300000, dtype: int64

In [18]:
start_counts = cb_data['start station name'].value_counts()
least_used = start_counts.idxmin()
print(least_used)


Eastern Pkwy & Washington Ave


In [19]:
end_counts = cb_data['end station name'].value_counts()
most_used = end_counts.idxmax()
print(most_used)


West St & Chambers St


In [20]:
cb_data = city_bike.copy()

# Удаляем два столбца с идентификаторами
cb_data.drop(['start station id', 'end station id'], axis=1, inplace=True)

# Выводим количество оставшихся столбцов
print(cb_data.shape[1])


12


In [21]:
# Шаг 1: Добавляем столбец возраста
cb_data['age'] = 2018 - cb_data['birth year']

# Шаг 2: Удаляем столбец birth year
cb_data.drop('birth year', axis=1, inplace=True)

# Шаг 3: Фильтруем клиентов старше 60 лет
older_than_60 = cb_data[cb_data['age'] > 60]

# Шаг 4: Считаем количество таких поездок
print(len(older_than_60))


11837


In [22]:
display(cb_data)

Unnamed: 0,starttime,stoptime,start station name,start station latitude,start station longitude,end station name,end station latitude,end station longitude,bikeid,usertype,gender,age
0,2018-09-01 00:00:05.2690,2018-09-01 00:27:20.6340,MacDougal St & Washington Sq,40.732264,-73.998522,Clinton Ave & Myrtle Ave,40.693261,-73.968896,25577,Subscriber,1,38
1,2018-09-01 00:00:11.2810,2018-09-01 00:02:23.4810,Cadman Plaza West & Montague St,40.693830,-73.990539,Schermerhorn St & Court St,40.691029,-73.991834,34377,Subscriber,0,49
2,2018-09-01 00:00:20.6490,2018-09-01 00:55:58.5470,1 Ave & E 62 St,40.761227,-73.960940,Smith St & 3 St,40.678724,-73.995991,30496,Subscriber,1,43
3,2018-09-01 00:00:21.7460,2018-09-01 00:07:38.5830,St James Pl & Oliver St,40.713079,-73.998512,Park Pl & Church St,40.713342,-74.009355,28866,Subscriber,2,34
4,2018-09-01 00:00:27.3150,2018-09-01 02:21:25.3080,W 13 St & 6 Ave,40.736494,-73.997044,W 4 St & 7 Ave S,40.734011,-74.002939,20943,Customer,1,24
...,...,...,...,...,...,...,...,...,...,...,...,...
299995,2018-09-05 19:08:27.8460,2018-09-05 19:15:51.4940,W 26 St & 8 Ave,40.747348,-73.997236,W 37 St & 10 Ave,40.756604,-73.997901,19531,Subscriber,2,28
299996,2018-09-05 19:08:28.3700,2018-09-05 19:20:01.5080,Kent Ave & N 7 St,40.720368,-73.961651,Graham Ave & Conselyea St,40.715143,-73.944507,34020,Subscriber,1,36
299997,2018-09-05 19:08:27.5090,2018-09-05 19:13:40.5060,Gansevoort St & Hudson St,40.739448,-74.005070,W 18 St & 6 Ave,40.739713,-73.994564,33220,Subscriber,1,27
299998,2018-09-05 19:08:29.2300,2018-09-05 20:04:29.3220,W 11 St & 6 Ave,40.735324,-73.998004,Broadway & W 60 St,40.769155,-73.981918,34744,Subscriber,0,43


In [23]:
# Убедимся, что столбцы в datetime
cb_data['starttime'] = pd.to_datetime(cb_data['starttime'])
cb_data['stoptime'] = pd.to_datetime(cb_data['stoptime'])

# Вычисляем длительность в минутах
cb_data['trip duration'] = (cb_data['stoptime'] - cb_data['starttime']).dt.total_seconds() // 60

# Выводим длительность поездки под индексом 3
print(int(cb_data.loc[3, 'trip duration']))


7


In [25]:
# Создаём признак weekend — 1, если суббота (5) или воскресенье (6)
cb_data['weekend'] = cb_data['starttime'].dt.dayofweek.isin([5, 6]).astype(int)

# Подсчёт поездок, начинавшихся в выходные
num_weekend_trips = cb_data['weekend'].sum()

print(num_weekend_trips)


115135


In [26]:
# Получаем час начала поездки
hours = cb_data['starttime'].dt.hour

# Создаём функцию для определения времени суток
def get_time_of_day(hour):
    if 0 <= hour <= 6:
        return 'night'
    elif 6 < hour <= 12:
        return 'morning'
    elif 12 < hour <= 18:
        return 'day'
    else:  # 18 < hour <= 23
        return 'evening'

# Применяем функцию к каждому часу
cb_data['time_of_day'] = hours.apply(get_time_of_day)

# Считаем количество поездок для ночи и дня
count_day = (cb_data['time_of_day'] == 'day').sum()
count_night = (cb_data['time_of_day'] == 'night').sum()

# Вычисляем во сколько раз дневных поездок больше ночных
ratio = round(count_day / count_night)

print(ratio)

9
