In [135]:
!nvidia-smi -L

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [136]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [137]:
import pandas as pd
import numpy as np

In [138]:

if 'google.colab' in str(get_ipython()):
  from google.colab import drive
  # drive.mount('/content/drive')
  proj_dir = "/content/drive/MyDrive/ece884_project/"
else:
  proj_dir = ""

Data fields

- id - a unique identifier for each trip
- vendor_id - a code indicating the provider associated with the trip record
- pickup_datetime - date and time when the meter was engaged
- dropoff_datetime - date and time when the meter was disengaged
- passenger_count - the number of passengers in the vehicle (driver entered value)
- pickup_longitude - the longitude where the meter was engaged
- pickup_latitude - the latitude where the meter was engaged
- dropoff_longitude - the longitude where the meter was disengaged
- dropoff_latitude - the latitude where the meter was disengaged 
- store_and_fwd_flag - This flag indicates whether the trip record was held in vehicle memory before sending to the vendor because the vehicle did not have a - connection to the server - Y=store and forward; N=not a store and forward trip
- trip_duration - duration of the trip in seconds

In [139]:
df = pd.read_csv(f"{proj_dir}misc_data/taxi/taxi.csv", low_memory=False)

In [140]:
df["pickup_datetime"] = pd.to_datetime(df.pickup_datetime, format='%Y-%m-%d %H:%M:%S')
df["dropoff_datetime"] = pd.to_datetime(df.dropoff_datetime, format='%Y-%m-%d %H:%M:%S')

In [141]:
def create_features(data, col):
    data['dayofweek'] = data[col].dt.dayofweek
    data['month'] = data[col].dt.month
    data['year'] = data[col].dt.year
    data['quarter'] = data[col].dt.quarter
    data['dayofyear'] = data[col].dt.dayofyear
    data['weekofyear'] = data[col].dt.isocalendar().week
    data['dayofmonth'] = data[col].dt.day
    data['hour'] = data[col].dt.hour
    data['minute'] = data[col].dt.minute
    data['second'] = data[col].dt.second
    data['horizon'] = (data[col] - data[col].min()) / np.timedelta64(1, 's')

    X = data[['year','quarter','month','dayofweek',
           'dayofyear','dayofmonth','weekofyear',
            'hour', 'minute', 'second', 'horizon']]

    X.columns = [str(x) + "_" + col for x in X.columns]
    
    return X

In [144]:
taxi_data = pd.concat([create_features(df, 'pickup_datetime'),
           create_features(df, 'dropoff_datetime'),
           df[["passenger_count",	"pickup_longitude",	"pickup_latitude",	"dropoff_longitude",	"dropoff_latitude",	"trip_duration"]]], axis=1)

In [146]:
taxi_data

Unnamed: 0,year_pickup_datetime,quarter_pickup_datetime,month_pickup_datetime,dayofweek_pickup_datetime,dayofyear_pickup_datetime,dayofmonth_pickup_datetime,weekofyear_pickup_datetime,hour_pickup_datetime,minute_pickup_datetime,second_pickup_datetime,...,hour_dropoff_datetime,minute_dropoff_datetime,second_dropoff_datetime,horizon_dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,trip_duration
0,2016,1,3,0,74,14,11,17,24,55,...,17,32,30,6370139.0,1,-73.982155,40.767937,-73.964630,40.765602,455
1,2016,2,6,6,164,12,23,0,43,35,...,0,54,38,14086267.0,1,-73.980415,40.738564,-73.999481,40.731152,663
2,2016,1,1,1,19,19,3,11,35,24,...,12,10,48,1598837.0,1,-73.979027,40.763939,-74.005333,40.710087,2124
3,2016,2,4,2,97,6,14,19,32,31,...,19,39,40,8364969.0,1,-74.010040,40.719971,-74.012268,40.706718,429
4,2016,1,3,5,86,26,12,13,30,55,...,13,38,10,7392879.0,1,-73.973053,40.793209,-73.972923,40.782520,435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1458639,2016,2,4,4,99,8,14,13,31,4,...,13,44,2,8516431.0,4,-73.982201,40.745522,-73.994911,40.740170,778
1458640,2016,1,1,6,10,10,1,7,35,15,...,7,46,10,805359.0,1,-74.000946,40.747379,-73.970184,40.796547,655
1458641,2016,2,4,4,113,22,16,6,57,41,...,7,10,25,9702414.0,1,-73.959129,40.768799,-74.004433,40.707371,764
1458642,2016,1,1,1,5,5,1,15,56,26,...,16,2,39,403148.0,1,-73.982079,40.749062,-73.974632,40.757107,373


In [147]:
taxi_data.to_csv(f"{proj_dir}data_clean/taxi.csv", index=False)