In [2]:
#This script reads in cleaned and merged data and attaches weather information, stored in PostgreSQL database,
#to each date and time.
#It saves the final data in csv files to be used in the demand_prediction.ipynb file.

import pandas as pd
import numpy as np
import psycopg2

#This function extracts date and time from timestamp object. 
def extract_date_time(input):
    dt = input.date()
    date = f"{dt:%Y-%m-%d}"
    time = input.hour
    return date,time

#This function retrieves hourly weather information from PostgreSQL database and extracts date and time bucket.
def weather_history():
    conn = psycopg2.connect(dbname=DBNAME,user=USER,password=PASSWORD)
    raw = pd.read_sql("SELECT time,precipintensity,temperature FROM weather_newyork;", conn)
    raw['Date'], raw['Start_Time'] = zip(*raw['time'].map(extract_date_time))
    raw = raw.drop(columns=['time'])
    return raw

#This function reads merged station-level ride data, select relevant rows according to
#desired criteria, and add weather data. 
def final_processing(read_filename,save_filename):
    global history
    raw = pd.read_csv(read_filename)
    
    #select workdays
    #raw = raw[raw['Holiday']<1]
    
    #select rows during peak hours: 6-10 am 
    #raw = raw[(raw['Start_Time']<21)&(raw['Start_Time']>12)]
    
    #delete rows with less than 10 rides
    #raw = raw[raw['Count'] > 10] 
    
    #associate weather with date
    cols = ['Start_Time', 'Date']
    raw = raw.join(history.set_index(cols), on=cols)
    
    #raw = raw.drop(columns=['Holiday'])
    
    #drop unnecessary columns and rename columns. 
    raw = raw.drop(columns=['Date','Start_Station_Name'])
    raw = raw.rename(columns={'precipintensity': 'Precipitation', 'temperature': 'Temperature'})
    raw.to_csv(save_filename,index=False)
    
if __name__ == '__main__':
    
    #retrieve weather history as pandas dataframe    
    history = weather_history()
                                      
    #generate final data 
    final_processing('demand_merged_data.csv','demand_prediction_data.csv')
    