In [1]:
import pandas as pd
import sys
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt
import datetime
from sklearn.linear_model import LinearRegression
import seaborn as sns
sns.set(style="ticks")
%matplotlib inline

In [3]:
hubway_data = pd.read_csv('hubway_trips.csv')
hubway_data.head(7)

Unnamed: 0,seq_id,hubway_id,status,duration,start_date,strt_statn,end_date,end_statn,bike_nr,subsc_type,zip_code,birth_date,gender
0,1,8,Closed,9,7/28/2011 10:12:00,23.0,7/28/2011 10:12:00,23.0,B00468,Registered,'97217,1976.0,Male
1,2,9,Closed,220,7/28/2011 10:21:00,23.0,7/28/2011 10:25:00,23.0,B00554,Registered,'02215,1966.0,Male
2,3,10,Closed,56,7/28/2011 10:33:00,23.0,7/28/2011 10:34:00,23.0,B00456,Registered,'02108,1943.0,Male
3,4,11,Closed,64,7/28/2011 10:35:00,23.0,7/28/2011 10:36:00,23.0,B00554,Registered,'02116,1981.0,Female
4,5,12,Closed,12,7/28/2011 10:37:00,23.0,7/28/2011 10:37:00,23.0,B00554,Registered,'97214,1983.0,Female
5,6,13,Closed,19,7/28/2011 10:39:00,23.0,7/28/2011 10:39:00,23.0,B00456,Registered,'02021,1951.0,Male
6,7,14,Closed,24,7/28/2011 10:47:00,23.0,7/28/2011 10:47:00,23.0,B00554,Registered,'02140,1971.0,Female


In [4]:
hubway_stn_data = pd.read_csv('hubway_stations.csv')
hubway_stn_data.head(7)

Unnamed: 0,id,terminal,station,municipal,lat,lng,status
0,3,B32006,Colleges of the Fenway,Boston,42.340021,-71.100812,Existing
1,4,C32000,Tremont St. at Berkeley St.,Boston,42.345392,-71.069616,Existing
2,5,B32012,Northeastern U / North Parking Lot,Boston,42.341814,-71.090179,Existing
3,6,D32000,Cambridge St. at Joy St.,Boston,42.361285,-71.06514,Existing
4,7,A32000,Fan Pier,Boston,42.353412,-71.044624,Existing
5,8,A32001,Union Square - Brighton Ave. at Cambridge St.,Boston,42.353334,-71.137313,Existing
6,9,A32002,Agganis Arena - 925 Comm Ave.,Boston,42.351313,-71.116174,Existing


In [5]:
hubway_data.dtypes

seq_id          int64
hubway_id       int64
status         object
duration        int64
start_date     object
strt_statn    float64
end_date       object
end_statn     float64
bike_nr        object
subsc_type     object
zip_code       object
birth_date    float64
gender         object
dtype: object

#    Part1--- Find Missing values in hubway_trips.csv

In [6]:
hubway_data.isnull().sum()

seq_id              0
hubway_id           0
status              0
duration            0
start_date          0
strt_statn         14
end_date            0
end_statn          45
bike_nr           466
subsc_type          0
zip_code       472766
birth_date    1228381
gender         472611
dtype: int64

# Part 2--- Remove records in which strt_statn or end_statn are missing

In [7]:
indexNames = hubway_data[ (hubway_data['strt_statn'].isnull()) | (hubway_data['end_statn'].isnull()) ].index
hubway_data.drop(indexNames , inplace=True)
hubway_data.isnull().sum()  # to weather check the station are removed or not

seq_id              0
hubway_id           0
status              0
duration            0
start_date          0
strt_statn          0
end_date            0
end_statn           0
bike_nr           465
subsc_type          0
zip_code       472747
birth_date    1228358
gender         472592
dtype: int64

# Part 3--- Which bike is issued most of the time i.e bike_nr

In [8]:
hubway_data['bike_nr'].value_counts().idxmax()

'B00490'

# Part 4--- Which is the famous start station?

In [9]:
hubway_data['strt_statn'].value_counts().idxmax()

22.0

# Part 5--- Which is the famous end station?

In [10]:
hubway_data['end_statn'].value_counts().idxmax()

22.0

# Part 6--- How many male and female issued bikes?

In [11]:
hubway_data['gender'].value_counts().reset_index()

Unnamed: 0,index,gender
0,Male,834688
1,Female,271693


# Part 7--- What is the favorite check-out hour? 

In [12]:
hubway_data['start_date'].value_counts().idxmax()

'7/30/2013 17:18:00'

# Part 8--- Which is the famous start station?

In [13]:
hubway_data['strt_statn'].value_counts().idxmax()

22.0

# Part 9--- Find missing values in hubways_stations.csv

In [14]:
hubway_stn_data.isnull().sum()

id           0
terminal     0
station      0
municipal    0
lat          0
lng          0
status       0
dtype: int64

# Part 10--- Join hubways_stations.csv with relevant record in hubway_data.csv

In [15]:
merged_inner = pd.merge(left=hubway_data,right=hubway_stn_data, left_on='hubway_id', right_on='id',)
merged_inner.shape
merged_inner

Unnamed: 0,seq_id,hubway_id,status_x,duration,start_date,strt_statn,end_date,end_statn,bike_nr,subsc_type,zip_code,birth_date,gender,id,terminal,station,municipal,lat,lng,status_y
0,1,8,Closed,9,7/28/2011 10:12:00,23.0,7/28/2011 10:12:00,23.0,B00468,Registered,'97217,1976.0,Male,8,A32001,Union Square - Brighton Ave. at Cambridge St.,Boston,42.353334,-71.137313,Existing
1,2,9,Closed,220,7/28/2011 10:21:00,23.0,7/28/2011 10:25:00,23.0,B00554,Registered,'02215,1966.0,Male,9,A32002,Agganis Arena - 925 Comm Ave.,Boston,42.351313,-71.116174,Existing
2,3,10,Closed,56,7/28/2011 10:33:00,23.0,7/28/2011 10:34:00,23.0,B00456,Registered,'02108,1943.0,Male,10,A32003,B.U. Central - 725 Comm. Ave.,Boston,42.350075,-71.105884,Existing
3,4,11,Closed,64,7/28/2011 10:35:00,23.0,7/28/2011 10:36:00,23.0,B00554,Registered,'02116,1981.0,Female,11,A32004,Longwood Ave / Binney St,Boston,42.338629,-71.106500,Existing
4,5,12,Closed,12,7/28/2011 10:37:00,23.0,7/28/2011 10:37:00,23.0,B00554,Registered,'97214,1983.0,Female,12,B32002,Ruggles Station / Columbus Ave.,Boston,42.335911,-71.088496,Existing
5,6,13,Closed,19,7/28/2011 10:39:00,23.0,7/28/2011 10:39:00,23.0,B00456,Registered,'02021,1951.0,Male,13,C32002,Boston Medical Center - 721 Mass. Ave.,Boston,42.334057,-71.074030,Removed
6,7,14,Closed,24,7/28/2011 10:47:00,23.0,7/28/2011 10:47:00,23.0,B00554,Registered,'02140,1971.0,Female,14,B32003,HMS / HSPH - Ave. Louis Pasteur at Longwood Ave.,Boston,42.337171,-71.102797,Existing
7,8,15,Closed,7,7/28/2011 10:48:00,23.0,7/28/2011 10:48:00,23.0,B00554,Registered,'02140,1971.0,Female,15,A32005,Harvard Real Estate - Brighton Mills - 370 Wes...,Boston,42.361667,-71.138020,Existing
8,9,16,Closed,8,7/28/2011 11:01:00,23.0,7/28/2011 11:01:00,23.0,B00554,Registered,'97214,1983.0,Female,16,C32003,Back Bay / South End Station,Boston,42.347433,-71.076163,Existing
9,10,17,Closed,1108,7/28/2011 11:55:00,47.0,7/28/2011 12:13:00,40.0,B00550,Registered,'01867,1994.0,Male,17,A32006,Harvard University Housing - 111 Western Ave. ...,Boston,42.365074,-71.119581,Existing
