In [None]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input('Which city do you want to see?').lower()
    while(city not in CITY_DATA):
        city = input('Not found, please enter (chicago, new york city, or washington) again:').lower()

    # TO DO: get user input for month (all, january, february, ... , june)
    month = input('Which month do you want to see?').title()
    while(month.lower() not in ['all', 'january', 'february', 'march', 'april', 'may', 'june']):
        month = input('Not found, please enter (all, january, february, ..., or june) again:').title()

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    day = input('Which day do you want to see?').title()
    while(day.lower() not in ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']):
        day = input('Not found, please enter (all, monday, tuesday, ..., or sunday) again:').title()

    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month_name()
    df['day'] = df['Start Time'].dt.weekday_name
    if month != 'All':
        df = df[df['month'] == month]
    if day != 'All':
        df = df[df['day'] == day]
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print('The most common month is:', df['month'].mode()[0], '\n')

    # TO DO: display the most common day of week
    print('The most common day of week is:', df['day'].mode()[0], '\n')

    # TO DO: display the most common start hour
    print('The most common start hour is:', df['Start Time'].dt.hour.mode()[0], '\n')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print('The most commonly used start station is:', df['Start Station'].mode()[0])

    # TO DO: display most commonly used end station
    print('\nThe most commonly used end station is:', df['End Station'].mode()[0])

    # 按照相同格式合并车站，取众数
    # TO DO: display most frequent combination of start station and end station trip
    df['trip'] = df['Start Station'] + ' --> ' + df['End Station']
    print('\nThe most frequent combination of start station and end station trip is:', df['trip'].mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print('The total travel time is:', df['Trip Duration'].sum())

    # TO DO: display mean travel time
    print('\nThe mean travel time is:', df['Trip Duration'].mean())

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print('The user types are:')
    for i in range(0, len(df['User Type'].value_counts())):
        print(df['User Type'].value_counts().index[i], ':', df['User Type'].value_counts()[i])

    # TO DO: Display counts of gender
    try:
        df['Gender']
    except:
        print('\nWashington has no gender data.')
    else:
        print('\nThe counts of gender are:')
        for i in range(0, len(df['Gender'].value_counts())):
            print(df['Gender'].value_counts().index[i], ':', df['Gender'].value_counts()[i])

    # TO DO: Display earliest, most recent, and most common year of birth
    try:
        df['Birth Year']
    except:
        print('\nWashington has no Birth Year data.')
    else:
        df['Birth Year'] = pd.to_datetime(df['Birth Year'])
        year = df['Birth Year'].dt.year
        print('\nThe earliest, most recent, and most common year of birth is:', year.min(), year.max(), year.mode()[0])
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


In [None]:
# 项目二审阅，其中用车站组合用 groupby 进行分组再排序
top = df.groupby(['Start Station', 'End Station']).size().idxmax()
print("The most frequent combination of start station and end station trip is {} to {}".format(top[0], top[1]))