In [105]:
import pandas as pd
from datetime import datetime
import time

In [106]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [107]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (int/str) month - number of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """
    CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df["Start Time"])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df["Start Time"].dt.month
    df['day_of_week'] = df["Start Time"].dt.weekday_name


    # filter by month if applicable
    if month != 'all':
        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df["day_of_week"] == day.title()]
    
    return df

In [6]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (int/str) month - number of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True :
        city = input("choose the city : \n 1 - chicago \n 2 - new york city \n 3- washington").lower()
        if city in CITY_DATA: 
            break;
        else : print("ERROR : invalid city name!!\n" + 40 * "-" )

    # TO DO: get user input for month (all, january, february, ... , june)
    while True :
        month = input("Enter the name of the  month (all, january, february, ... , june):").lower()
        if month == "all":  
            break
        else:
            try : 
                month = datetime.strptime(month, '%B').month
                break;
            except : 
                    print("ERROR : invalid month name!!\n" + 40 * "-" )
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while True :
        day = input("Enter the name of the  week (all, monday, tuesday, ... sunday):").lower()
        if day == "all":  
            break
        else: 
            try : 
                datetime.strptime(day, '%A')
                break;
            except : 
                print("ERROR : invalid week day name!!\n" + 40 * "-" )

    print('-'*40)
    return city, month, day

In [7]:
city , month , week = get_filters()


Hello! Let's explore some US bikeshare data!
choose the city : 
 1 - chicago 
 2 - new york city 
 3- washingtonchicago
Enter the name of the  month (all, january, february, ... , june):all
Enter the name of the  week (all, monday, tuesday, ... sunday):amm
ERROR : invalid week day name!!
----------------------------------------
Enter the name of the  week (all, monday, tuesday, ... sunday):all
----------------------------------------


In [108]:
df = load_data("washington",3,"saturday")

In [111]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    print("\n\n"+ 100 * '-')
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    df["time_stats"]=  df['Start Time'].dt.month
    print("the most common month for travelling is           : %s "%df["time_stats"].mode()[0])
     # TO DO: display the most common day of week
    df["time_stats"]=  df['Start Time'].dt.day
    print("the most common day of the week for travelling is : %s "%df["time_stats"].mode()[0])
    # TO DO: display the most common start hour
    df["time_stats"]=  df['Start Time'].dt.hour
    print("the most common start hour for travelling is      : %s "%df["time_stats"].mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)

In [112]:
time_stats(df)



----------------------------------------------------------------------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month for travelling is           : 3 
the most common day of the week for travelling is : 25 
the most common start hour for travelling is      : 14 

This took 0.008783102035522461 seconds.
----------------------------------------------------------------------------------------------------


In [8]:
df["Start Station"].describe()

count                        2113
unique                        395
top       Streeter Dr & Grand Ave
freq                           31
Name: Start Station, dtype: object

In [103]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    print("\n\n"+ 100 * '-')
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print("the most commonly used start station         : %s "%df["Start Station"].mode()[0])


    # TO DO: display most commonly used end station
    print("the most commonly used end station           : %s "%df["End Station"].mode()[0])

    # TO DO: display most frequent combination of start station and end station trip
    print("\nthe most most frequent combination of start station and end station trip:")
    result = df[["Start Station","End Station"]].groupby(['Start Station', 'End Station']).size().nlargest(1)
    print("Start Station  : %s"%result.index[0][0])
    print("End   Station  : %s"%result.index[0][1])
    print("Count of trips : %s"%result.values[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)

In [113]:
station_stats(df)



----------------------------------------------------------------------------------------------------

Calculating The Most Popular Stations and Trip...

the most commonly used start station         : Jefferson Dr & 14th St SW 
the most commonly used end station           : Jefferson Dr & 14th St SW 

the most most frequent combination of start station and end station trip:
Start Station  : Jefferson Memorial
End   Station  : Jefferson Memorial
Count of trips : 21

This took 0.015615463256835938 seconds.
----------------------------------------


7

In [12]:
df.columns

Index(['Unnamed: 0', 'Start Time', 'End Time', 'Trip Duration',
       'Start Station', 'End Station', 'User Type', 'Gender', 'Birth Year',
       'month', 'day_of_week', 'time_stats'],
      dtype='object')

In [13]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    print("\n\n"+ 100 * '-')
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print("the total travel time : %s "%sum(df["Trip Duration"]))

    # TO DO: display mean travel time
    print("the mean travel time  : %s "%df["Trip Duration"].mean())

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)


In [14]:
trip_duration_stats(df)


Calculating Trip Duration...

the total travel time : 1644923 
the mean travel time  : 778.4775201135826 

This took 0.0009741783142089844 seconds.
----------------------------------------


In [49]:
user = df["User Type"].value_counts()
for index,value in user.items(): 
    print(index,value)

Subscriber 1826
Customer 287


In [124]:
def user_stats(df):
    """Displays statistics on bikeshare users."""
    print("\n\n"+ 100 * '-')
    print('Calculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    userTypes = df["User Type"].value_counts()
    print("\nthe count of user types : %s different user Types "%len(userTypes))
    for index, value in userTypes.items():
        print("   -   user Type :  {:10s}   ---- count : {:d}".format(index,value))

    # TO DO: Display counts of gender
    try:
        gender = df["Gender"].value_counts()
        print("\nthe count of Gender  : %s different Gender Types "%len(userTypes))
        for index, value in gender.items():
            print("   -   Gender    :  {:10s}   ---- count : {:d}".format(index,value))

        # TO DO: Display earliest, most recent, and most common year of birth
        print("\nthe earliest    year of birth  : %s"%df["Birth Year"].min())
        print("the most recent year of birth  : %s"%df["Birth Year"].max())
        print("the most common year of birth  : %s"%df["Birth Year"].mode()[0])
    except : 
        print("\n\nERROR : can not calculate statics for Birth year and Gender for this city!!\n" )
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)


In [125]:
user_stats(df)



----------------------------------------------------------------------------------------------------
Calculating User Stats...


the count of user types : 2 different user Types 
   -   user Type :  Subscriber   ---- count : 3300
   -   user Type :  Customer     ---- count : 2644


ERROR : can not calculate statics for Birth year and Gender for this city!!


This took 0.0019526481628417969 seconds.
----------------------------------------------------------------------------------------------------


In [115]:
df.columns

Index(['Unnamed: 0', 'Start Time', 'End Time', 'Trip Duration',
       'Start Station', 'End Station', 'User Type', 'month', 'day_of_week',
       'time_stats'],
      dtype='object')