In [1]:
# importing libraries
import pandas as pd
import numpy as np
pd.set_option('max_colwidth', 400)
from pytz import timezone
from datetime import datetime

In [2]:
# load cleaned_elec_data csv
electric_data_df = pd.read_csv('Resources\cleaned_elec_data.csv')
electric_data_df.head(-20)

Unnamed: 0,Demand,Date/Time,City
0,7269.0,7/1/2018 1:00,nyc
1,690707.0,7/1/2018 2:00,nyc
2,60725.0,7/1/2018 3:00,nyc
3,6539.0,7/1/2018 4:00,nyc
4,6415.0,7/1/2018 5:00,nyc
...,...,...,...
164183,866.0,4/25/2020 0:00,seattle
164184,,4/25/2020 1:00,seattle
164185,,4/25/2020 2:00,seattle
164186,,4/25/2020 3:00,seattle


In [3]:
#format date/time
electric_data_df["Date/Time"]=pd.to_datetime(electric_data_df["Date/Time"]).dt.strftime("%Y-%m-%d %H:%M:%S")
electric_data_df.head(-20)

Unnamed: 0,Demand,Date/Time,City
0,7269.0,2018-07-01 01:00:00,nyc
1,690707.0,2018-07-01 02:00:00,nyc
2,60725.0,2018-07-01 03:00:00,nyc
3,6539.0,2018-07-01 04:00:00,nyc
4,6415.0,2018-07-01 05:00:00,nyc
...,...,...,...
164183,866.0,2020-04-25 00:00:00,seattle
164184,,2020-04-25 01:00:00,seattle
164185,,2020-04-25 02:00:00,seattle
164186,,2020-04-25 03:00:00,seattle


In [4]:
# define local time zones for each city
city_timezones = {
    'dallas': 'America/Chicago',
    'houston': 'America/Chicago',
    'la': 'America/Los_Angeles',
    'nyc': 'America/New_York',
    'philadelphia': 'America/New_York',
    'phoenix': 'America/Phoenix',
    'san antonio': 'America/Chicago',
    'san diego': 'America/Los_Angeles',
    'san jose': 'America/Los_Angeles',
    'seattle': 'America/Los_Angeles'
}

In [5]:
#convert datetime to Central Time
def convert_to_central(dt_str, city):
    local_tz = timezone(city_timezones[city])
    dt = datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S')
    dt_local = local_tz.localize(dt)
    dt_central = dt_local.astimezone(timezone('America/Chicago'))
    return dt_central

In [6]:
#convert 'Date/Time' column to Central Time
electric_data_df['Central Time'] = electric_data_df.apply(lambda row: convert_to_central(row['Date/Time'], row['City']), axis=1)
electric_data_df.head(-20)

Unnamed: 0,Demand,Date/Time,City,Central Time
0,7269.0,2018-07-01 01:00:00,nyc,2018-07-01 00:00:00-05:00
1,690707.0,2018-07-01 02:00:00,nyc,2018-07-01 01:00:00-05:00
2,60725.0,2018-07-01 03:00:00,nyc,2018-07-01 02:00:00-05:00
3,6539.0,2018-07-01 04:00:00,nyc,2018-07-01 03:00:00-05:00
4,6415.0,2018-07-01 05:00:00,nyc,2018-07-01 04:00:00-05:00
...,...,...,...,...
164183,866.0,2020-04-25 00:00:00,seattle,2020-04-25 02:00:00-05:00
164184,,2020-04-25 01:00:00,seattle,2020-04-25 03:00:00-05:00
164185,,2020-04-25 02:00:00,seattle,2020-04-25 04:00:00-05:00
164186,,2020-04-25 03:00:00,seattle,2020-04-25 05:00:00-05:00


In [7]:
#convert 'Date/Time' column to Unix time
electric_data_df['Unix Time'] = pd.to_datetime(electric_data_df['Central Time']).astype('int64') // 10**9
electric_data_df.head(-20)

Unnamed: 0,Demand,Date/Time,City,Central Time,Unix Time
0,7269.0,2018-07-01 01:00:00,nyc,2018-07-01 00:00:00-05:00,1530421200
1,690707.0,2018-07-01 02:00:00,nyc,2018-07-01 01:00:00-05:00,1530424800
2,60725.0,2018-07-01 03:00:00,nyc,2018-07-01 02:00:00-05:00,1530428400
3,6539.0,2018-07-01 04:00:00,nyc,2018-07-01 03:00:00-05:00,1530432000
4,6415.0,2018-07-01 05:00:00,nyc,2018-07-01 04:00:00-05:00,1530435600
...,...,...,...,...,...
164183,866.0,2020-04-25 00:00:00,seattle,2020-04-25 02:00:00-05:00,1587798000
164184,,2020-04-25 01:00:00,seattle,2020-04-25 03:00:00-05:00,1587801600
164185,,2020-04-25 02:00:00,seattle,2020-04-25 04:00:00-05:00,1587805200
164186,,2020-04-25 03:00:00,seattle,2020-04-25 05:00:00-05:00,1587808800


In [8]:
# drop central time column
electric_data_df.drop(columns=['Central Time'], inplace=True)
electric_data_df.head(-20)

Unnamed: 0,Demand,Date/Time,City,Unix Time
0,7269.0,2018-07-01 01:00:00,nyc,1530421200
1,690707.0,2018-07-01 02:00:00,nyc,1530424800
2,60725.0,2018-07-01 03:00:00,nyc,1530428400
3,6539.0,2018-07-01 04:00:00,nyc,1530432000
4,6415.0,2018-07-01 05:00:00,nyc,1530435600
...,...,...,...,...
164183,866.0,2020-04-25 00:00:00,seattle,1587798000
164184,,2020-04-25 01:00:00,seattle,1587801600
164185,,2020-04-25 02:00:00,seattle,1587805200
164186,,2020-04-25 03:00:00,seattle,1587808800


In [9]:
# rename columns
electric_data_df.rename(columns={"Demand": "demand", "Date/Time": "date_time", "City": "city", "Unix Time": "unix_time"}, inplace=True)
electric_data_df.head(-20)

Unnamed: 0,demand,date_time,city,unix_time
0,7269.0,2018-07-01 01:00:00,nyc,1530421200
1,690707.0,2018-07-01 02:00:00,nyc,1530424800
2,60725.0,2018-07-01 03:00:00,nyc,1530428400
3,6539.0,2018-07-01 04:00:00,nyc,1530432000
4,6415.0,2018-07-01 05:00:00,nyc,1530435600
...,...,...,...,...
164183,866.0,2020-04-25 00:00:00,seattle,1587798000
164184,,2020-04-25 01:00:00,seattle,1587801600
164185,,2020-04-25 02:00:00,seattle,1587805200
164186,,2020-04-25 03:00:00,seattle,1587808800


In [10]:
# show columns
electric_data_df.columns

Index(['demand', 'date_time', 'city', 'unix_time'], dtype='object')

In [11]:
# create dallas data frame
dallas_df = electric_data_df[electric_data_df["city"]=="dallas"]
dallas_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,,2018-07-01 01:00:00,dallas,1530424800
1,,2018-07-01 02:00:00,dallas,1530428400
2,,2018-07-01 03:00:00,dallas,1530432000
3,,2018-07-01 04:00:00,dallas,1530435600
4,,2018-07-01 05:00:00,dallas,1530439200
...,...,...,...,...
16531,,2020-05-19 20:00:00,dallas,1589936400
16532,,2020-05-19 21:00:00,dallas,1589940000
16533,,2020-05-19 22:00:00,dallas,1589943600
16534,,2020-05-19 23:00:00,dallas,1589947200


In [12]:
# create houston data frame
houston_df = electric_data_df[electric_data_df["city"]=="houston"]
houston_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,,2018-07-01 01:00:00,houston,1530424800
1,,2018-07-01 02:00:00,houston,1530428400
2,,2018-07-01 03:00:00,houston,1530432000
3,,2018-07-01 04:00:00,houston,1530435600
4,,2018-07-01 05:00:00,houston,1530439200
...,...,...,...,...
16531,,2020-05-19 20:00:00,houston,1589936400
16532,,2020-05-19 21:00:00,houston,1589940000
16533,,2020-05-19 22:00:00,houston,1589943600
16534,,2020-05-19 23:00:00,houston,1589947200


In [13]:
# create la data frame
la_df = electric_data_df[electric_data_df["city"]=="la"]
la_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,10681.0,2018-07-01 01:00:00,la,1530432000
1,101907.0,2018-07-01 02:00:00,la,1530435600
2,907076.0,2018-07-01 03:00:00,la,1530439200
3,9508.0,2018-07-01 04:00:00,la,1530442800
4,9431.0,2018-07-01 05:00:00,la,1530446400
...,...,...,...,...
16531,10893.0,2020-05-19 20:00:00,la,1589943600
16532,11263.0,2020-05-19 21:00:00,la,1589947200
16533,10952.0,2020-05-19 22:00:00,la,1589950800
16534,10338.0,2020-05-19 23:00:00,la,1589954400


In [14]:
# create nyc data frame
nyc_df = electric_data_df[electric_data_df["city"]=="nyc"]
nyc_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,7269.0,2018-07-01 01:00:00,nyc,1530421200
1,690707.0,2018-07-01 02:00:00,nyc,1530424800
2,60725.0,2018-07-01 03:00:00,nyc,1530428400
3,6539.0,2018-07-01 04:00:00,nyc,1530432000
4,6415.0,2018-07-01 05:00:00,nyc,1530435600
...,...,...,...,...
16531,46074.0,2020-05-19 20:00:00,nyc,1589932800
16532,40708.0,2020-05-19 21:00:00,nyc,1589936400
16533,46107.0,2020-05-19 22:00:00,nyc,1589940000
16534,4440.0,2020-05-19 23:00:00,nyc,1589943600


In [15]:
# create philadelphia data frame
philadelphia_df = electric_data_df[electric_data_df["city"]=="philadelphia"]
philadelphia_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,43907.0,2018-07-01 01:00:00,philadelphia,1530421200
1,4423.0,2018-07-01 02:00:00,philadelphia,1530424800
2,40743.0,2018-07-01 03:00:00,philadelphia,1530428400
3,5230.0,2018-07-01 04:00:00,philadelphia,1530432000
4,50752.0,2018-07-01 05:00:00,philadelphia,1530435600
...,...,...,...,...
16531,,2020-05-19 20:00:00,philadelphia,1589932800
16532,,2020-05-19 21:00:00,philadelphia,1589936400
16533,,2020-05-19 22:00:00,philadelphia,1589940000
16534,,2020-05-19 23:00:00,philadelphia,1589943600


In [16]:
# create phoenix data frame
phoenix_df = electric_data_df[electric_data_df["city"]=="phoenix"]
phoenix_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,34907.0,2018-07-01 01:00:00,phoenix,1530432000
1,3256.0,2018-07-01 02:00:00,phoenix,1530435600
2,3065.0,2018-07-01 03:00:00,phoenix,1530439200
3,2929.0,2018-07-01 04:00:00,phoenix,1530442800
4,2833.0,2018-07-01 05:00:00,phoenix,1530446400
...,...,...,...,...
15955,,2020-04-25 20:00:00,phoenix,1587870000
15956,,2020-04-25 21:00:00,phoenix,1587873600
15957,,2020-04-25 22:00:00,phoenix,1587877200
15958,,2020-04-25 23:00:00,phoenix,1587880800


In [17]:
# create san antonio data frame
san_antonio_df = electric_data_df[electric_data_df["city"]=="san antonio"]
san_antonio_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,,2018-07-01 01:00:00,san antonio,1530424800
1,,2018-07-01 02:00:00,san antonio,1530428400
2,,2018-07-01 03:00:00,san antonio,1530432000
3,,2018-07-01 04:00:00,san antonio,1530435600
4,,2018-07-01 05:00:00,san antonio,1530439200
...,...,...,...,...
16531,,2020-05-19 20:00:00,san antonio,1589936400
16532,,2020-05-19 21:00:00,san antonio,1589940000
16533,,2020-05-19 22:00:00,san antonio,1589943600
16534,,2020-05-19 23:00:00,san antonio,1589947200


In [18]:
# create san diego data frame
san_diego_df = electric_data_df[electric_data_df["city"]=="san diego"]
san_diego_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,2023.0,2018-07-01 01:00:00,san diego,1530432000
1,1896.0,2018-07-01 02:00:00,san diego,1530435600
2,18507.0,2018-07-01 03:00:00,san diego,1530439200
3,1825.0,2018-07-01 04:00:00,san diego,1530442800
4,10798.0,2018-07-01 05:00:00,san diego,1530446400
...,...,...,...,...
16531,2220.0,2020-05-19 20:00:00,san diego,1589943600
16532,23107.0,2020-05-19 21:00:00,san diego,1589947200
16533,22207.0,2020-05-19 22:00:00,san diego,1589950800
16534,2056.0,2020-05-19 23:00:00,san diego,1589954400


In [19]:
# create san jose data frame
san_jose_df = electric_data_df[electric_data_df["city"]=="san jose"]
san_jose_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,12522.0,2018-07-01 01:00:00,san jose,1530432000
1,110745.0,2018-07-01 02:00:00,san jose,1530435600
2,11200.0,2018-07-01 03:00:00,san jose,1530439200
3,10822.0,2018-07-01 04:00:00,san jose,1530442800
4,10644.0,2018-07-01 05:00:00,san jose,1530446400
...,...,...,...,...
16531,115078.0,2020-05-19 20:00:00,san jose,1589943600
16532,110782.0,2020-05-19 21:00:00,san jose,1589947200
16533,11592.0,2020-05-19 22:00:00,san jose,1589950800
16534,11083.0,2020-05-19 23:00:00,san jose,1589954400


In [20]:
# create seattle data frame
seattle_df = electric_data_df[electric_data_df["city"]=="seattle"]
seattle_df.reset_index(drop=True)

Unnamed: 0,demand,date_time,city,unix_time
0,809.0,2018-07-01 01:00:00,seattle,1530432000
1,7079.0,2018-07-01 02:00:00,seattle,1530435600
2,753.0,2018-07-01 03:00:00,seattle,1530439200
3,748.0,2018-07-01 04:00:00,seattle,1530442800
4,745.0,2018-07-01 05:00:00,seattle,1530446400
...,...,...,...,...
15955,,2020-04-25 20:00:00,seattle,1587870000
15956,,2020-04-25 21:00:00,seattle,1587873600
15957,,2020-04-25 22:00:00,seattle,1587877200
15958,,2020-04-25 23:00:00,seattle,1587880800


In [21]:
# dst date/times
dates_to_remove = ['2018-03-11 01:00:00', '2018-11-04 01:00:00', '2019-03-10 01:00:00', '2019-11-03 01:00:00', '2020-03-08 01:00:00']

In [22]:
# drop dst dates from cities, excluding phoenix
dallas_df = dallas_df[~dallas_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
houston_df = houston_df[~houston_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
la_df = la_df[~la_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
nyc_df = nyc_df[~nyc_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
philadelphia_df = philadelphia_df[~philadelphia_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
san_antonio_df = san_antonio_df[~san_antonio_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
san_diego_df = san_diego_df[~san_diego_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
san_jose_df = san_jose_df[~san_jose_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)
seattle_df = seattle_df[~seattle_df['date_time'].isin(dates_to_remove)].reset_index(drop=True)

In [23]:
# dedup city dfs
dallas_df = dallas_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
houston_df = houston_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
la_df = la_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
nyc_df = nyc_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
philadelphia_df = philadelphia_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
phoenix_df = phoenix_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
san_antonio_df = san_antonio_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
san_diego_df = san_diego_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
san_jose_df = san_jose_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)
seattle_df = seattle_df.drop_duplicates(subset=['date_time']).reset_index(drop=True)

In [24]:
# convert city dfs to csvs
dallas_df.to_csv("Output/dallas.csv", index=False)
houston_df.to_csv("Output/houston.csv", index=False)
la_df.to_csv("Output/la.csv", index=False)
nyc_df.to_csv("Output/nyc.csv", index=False)
philadelphia_df.to_csv("Output/philadelphia.csv", index=False)
phoenix_df.to_csv("Output/phoenix.csv", index=False)
san_antonio_df.to_csv("Output/san_antonio.csv", index=False)
san_diego_df.to_csv("Output/san_diego.csv", index=False)
san_jose_df.to_csv("Output/san_jose.csv", index=False)
seattle_df.to_csv("Output/seattle.csv", index=False)

In [25]:
#load dallas_climate json
dallas_climate_df = pd.read_json('Resources/dallas.json')
dallas_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530507600,Clear,clear-night,0.0000,0.00,89.56,91.71,65.31,0.45,1011.9,10.84,12.40,164.0,0.06,0.0,9.617,,,
1,1530511200,Clear,clear-night,0.0000,0.00,88.35,92.26,68.29,0.52,1012.7,9.94,9.94,179.0,0.04,0.0,9.817,,,
2,1530514800,Clear,clear-night,0.0000,0.00,87.05,91.45,69.26,0.56,1012.8,8.92,10.31,186.0,0.04,0.0,9.510,,,
3,1530518400,Clear,clear-night,0.0000,0.00,86.04,90.51,69.61,0.58,1012.7,8.47,8.47,189.0,0.18,0.0,9.617,,,
4,1530522000,Clear,clear-night,0.0000,0.00,84.80,89.74,70.68,0.63,1012.9,5.61,5.61,193.0,0.15,0.0,9.617,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590120000,Humid,clear-night,0.0043,0.03,77.70,79.27,73.61,0.87,1009.1,7.52,17.01,133.0,0.10,0.0,10.000,rain,301.3,
16550,1590123600,Humid,clear-night,0.0032,0.03,77.02,78.58,73.38,0.89,1009.2,8.09,18.12,145.0,0.07,0.0,10.000,rain,296.9,
16551,1590127200,Humid,clear-night,0.0011,0.04,76.09,77.71,73.44,0.92,1009.2,9.02,20.79,160.0,0.17,0.0,10.000,rain,294.3,
16552,1590130800,Humid and Partly Cloudy,partly-cloudy-night,0.0005,0.03,75.31,76.89,73.04,0.93,1008.4,10.61,26.87,161.0,0.45,0.0,10.000,rain,294.8,


In [26]:
#load houston_climate json
houston_climate_df = pd.read_json('Resources/houston.json')
houston_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530507600,Humid,clear-night,0.0000,0.00,81.87,87.88,74.15,0.78,1015.6,5.45,9.31,191.0,0.01,0.0,8.479,,,
1,1530511200,Humid,clear-night,0.0000,0.00,81.29,86.90,74.17,0.79,1015.4,5.30,7.54,195.0,0.00,0.0,7.880,,,
2,1530514800,Humid,clear-night,0.0000,0.00,80.34,85.34,74.47,0.82,1014.9,5.49,5.49,218.0,0.00,0.0,8.751,,,
3,1530518400,Humid,clear-night,0.0000,0.00,79.75,84.06,74.09,0.83,1014.8,3.53,8.39,187.0,0.01,0.0,7.772,,,
4,1530522000,Humid,clear-night,0.0000,0.00,79.23,83.02,74.02,0.84,1015.0,4.25,4.25,210.0,0.05,0.0,7.772,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590120000,Humid,clear-night,0.0009,0.02,78.34,81.33,75.23,0.90,1011.3,7.99,18.56,144.0,0.31,0.0,10.000,rain,298.4,
16550,1590123600,Humid,clear-night,0.0000,0.00,77.60,79.42,75.38,0.93,1011.4,8.18,19.40,141.0,0.26,0.0,10.000,,298.7,
16551,1590127200,Humid,clear-night,0.0000,0.00,77.17,78.98,75.09,0.93,1010.7,8.83,20.91,148.0,0.24,0.0,10.000,,298.6,
16552,1590130800,Humid,clear-night,0.0003,0.03,76.50,78.28,74.71,0.94,1010.5,9.44,22.11,150.0,0.27,0.0,10.000,rain,297.7,


In [27]:
#load la_climate json
la_climate_df = pd.read_json('Resources/la.json')
la_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone
0,1530428400,Clear,clear-night,0.0,0.0,65.45,65.45,58.64,0.79,1014.5,4.23,4.23,243.0,0.25,0.0,9.798,,
1,1530432000,Overcast,cloudy,0.0,0.0,65.16,65.16,58.62,0.79,1014.4,3.95,3.95,193.0,0.88,0.0,9.777,,
2,1530435600,Overcast,cloudy,0.0,0.0,64.58,64.58,58.23,0.80,1014.1,4.21,4.21,185.0,0.92,0.0,9.778,,
3,1530439200,Overcast,cloudy,0.0,0.0,64.46,64.46,57.87,0.79,1013.9,4.01,4.04,175.0,0.99,0.0,9.782,,
4,1530442800,Overcast,cloudy,0.0,0.0,64.19,64.19,57.96,0.80,1014.1,3.81,3.81,182.0,1.00,0.0,9.108,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590040800,Clear,clear-night,0.0,0.0,62.51,62.51,53.33,0.72,1015.2,2.21,2.39,119.0,0.00,0.0,10.000,,325.4
16550,1590044400,Clear,clear-night,0.0,0.0,61.35,61.35,53.41,0.75,1015.1,1.08,1.75,126.0,0.00,0.0,10.000,,325.6
16551,1590048000,Clear,clear-night,0.0,0.0,60.28,60.28,53.22,0.78,1015.0,1.16,2.51,134.0,0.00,0.0,10.000,,326.1
16552,1590051600,Clear,clear-night,0.0,0.0,59.41,59.41,52.84,0.79,1014.6,1.04,2.77,103.0,0.00,0.0,10.000,,326.3


In [28]:
#load nyc_climate json
nyc_climate_df = pd.read_json('Resources/nyc.json')
nyc_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530504000,Clear,clear-night,0.0000,0.00,83.18,87.93,71.08,0.67,1017.0,3.27,3.27,242.0,0.03,0.0,9.784,,,
1,1530507600,Clear,clear-night,0.0000,0.00,82.55,86.45,69.88,0.66,1017.2,2.40,2.40,234.0,0.02,0.0,9.763,,,
2,1530511200,Clear,clear-night,0.0000,0.00,79.89,82.86,69.55,0.71,1017.4,3.64,3.64,256.0,0.02,0.0,9.876,,,
3,1530514800,Clear,clear-night,0.0000,0.00,79.07,81.70,69.37,0.72,1017.3,5.51,5.51,254.0,0.02,0.0,9.793,,,
4,1530518400,Clear,clear-night,0.0000,0.00,78.12,79.12,69.24,0.74,1017.2,1.95,2.90,255.0,0.02,0.0,9.799,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590116400,Clear,clear-night,0.0013,0.01,54.86,54.86,50.05,0.84,1025.6,4.74,12.65,182.0,0.03,0.0,10.000,rain,318.5,
16550,1590120000,Clear,clear-night,0.0007,0.01,54.17,54.17,49.73,0.85,1025.5,4.52,9.88,211.0,0.05,0.0,10.000,rain,318.9,
16551,1590123600,Clear,clear-night,0.0002,0.01,54.01,54.01,49.83,0.86,1025.3,3.26,6.51,227.0,0.07,0.0,10.000,rain,319.5,
16552,1590127200,Clear,clear-night,0.0000,0.00,54.06,54.06,49.86,0.86,1024.9,3.35,4.69,238.0,0.24,0.0,10.000,,320.1,


In [29]:
#load philadelphia_climate json
philadelphia_climate_df = pd.read_json('Resources/philadelphia.json')
philadelphia_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530504000,Clear,clear-night,0.0000,0.00,80.27,83.79,70.79,0.73,1017.3,3.03,3.21,238.0,0.12,0.0,9.964,,,
1,1530507600,Clear,clear-night,0.0000,0.00,79.74,82.67,69.59,0.71,1017.2,2.39,2.39,226.0,0.05,0.0,9.705,,,
2,1530511200,Clear,clear-night,0.0000,0.00,78.14,79.19,69.70,0.75,1017.6,1.78,1.78,226.0,0.20,0.0,9.553,,,
3,1530514800,Clear,clear-night,0.0000,0.00,76.94,78.02,69.75,0.79,1017.6,0.95,1.21,192.0,0.12,0.0,9.401,,,
4,1530518400,Clear,clear-night,0.0000,0.00,76.07,77.20,69.96,0.81,1017.5,1.23,1.49,214.0,0.05,0.0,8.918,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590116400,Clear,clear-night,0.0018,0.02,59.13,59.13,51.07,0.75,1025.2,3.80,10.73,116.0,0.04,0.0,10.000,rain,326.7,
16550,1590120000,Clear,clear-night,0.0008,0.02,57.91,57.91,51.15,0.78,1025.1,3.62,8.36,108.0,0.07,0.0,10.000,rain,325.6,
16551,1590123600,Clear,clear-night,0.0000,0.00,56.78,56.78,51.21,0.82,1024.9,2.96,6.50,94.0,0.08,0.0,10.000,,324.5,
16552,1590127200,Drizzle,rain,0.0001,1.00,56.02,56.02,51.50,0.85,1024.3,2.47,6.14,96.0,0.08,0.0,10.000,rain,323.4,


In [30]:
#load phoenix_climate json
phoenix_climate_df = pd.read_json('Resources/phoenix.json')
phoenix_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530428400,Clear,clear-night,0.0,0.0,86.82,86.82,34.57,0.16,1008.9,4.30,11.27,225.0,0.0,0.0,9.997,,,
1,1530432000,Clear,clear-night,0.0,0.0,83.37,83.37,36.45,0.19,1009.3,4.00,4.00,282.0,0.0,0.0,9.997,,,
2,1530435600,Clear,clear-night,0.0,0.0,82.22,82.22,34.88,0.18,1009.5,2.47,2.47,279.0,0.0,0.0,9.997,,,
3,1530439200,Clear,clear-night,0.0,0.0,80.34,80.34,33.96,0.19,1010.1,2.98,4.12,107.0,0.0,0.0,9.997,,,
4,1530442800,Clear,clear-night,0.0,0.0,79.34,79.34,31.52,0.18,1010.4,2.74,3.28,106.0,0.0,0.0,9.997,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590040800,Clear,clear-night,0.0,0.0,70.28,70.28,25.81,0.19,1011.3,3.01,3.01,43.0,0.0,0.0,10.000,,331.1,
16550,1590044400,Clear,clear-night,0.0,0.0,68.90,68.90,25.03,0.19,1011.5,3.51,3.51,41.0,0.0,0.0,10.000,,329.6,
16551,1590048000,Clear,clear-night,0.0,0.0,68.57,68.57,25.77,0.20,1011.5,4.04,4.04,25.0,0.0,0.0,10.000,,326.8,
16552,1590051600,Clear,clear-night,0.0,0.0,63.19,63.19,28.06,0.26,1011.5,4.22,4.22,19.0,0.0,0.0,10.000,,325.1,


In [31]:
#load san_antonio_climate json
san_antonio_climate_df = pd.read_json('Resources/san_antonio.json')
san_antonio_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530507600,Humid,clear-night,0.0000,0.00,81.09,86.15,73.38,0.77,1014.7,9.02,9.02,131.0,0.01,0.0,9.997,,,
1,1530511200,Humid,clear-night,0.0000,0.00,79.40,82.99,72.74,0.80,1014.7,9.18,9.18,131.0,0.02,0.0,9.976,,,
2,1530514800,Humid and Mostly Cloudy,partly-cloudy-night,0.0000,0.00,78.22,79.70,73.07,0.84,1014.3,11.20,11.20,141.0,0.75,0.0,9.976,,,
3,1530518400,Humid and Mostly Cloudy,partly-cloudy-night,0.0000,0.00,78.22,79.67,72.85,0.84,1014.6,8.01,8.01,162.0,0.75,0.0,9.976,,,
4,1530522000,Humid and Partly Cloudy,partly-cloudy-night,0.0000,0.00,77.08,78.47,72.18,0.85,1013.6,4.81,4.81,181.0,0.45,0.0,9.976,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590120000,Humid,clear-night,0.0016,0.01,81.08,85.98,73.08,0.77,1008.7,10.41,22.80,132.0,0.06,0.0,10.000,rain,293.5,
16550,1590123600,Humid,clear-night,0.0026,0.02,79.37,82.93,72.72,0.80,1008.7,10.42,23.74,125.0,0.05,0.0,10.000,rain,291.0,
16551,1590127200,Humid,clear-night,0.0018,0.03,77.80,79.28,73.00,0.85,1008.9,8.87,23.95,127.0,0.25,0.0,10.000,rain,289.4,
16552,1590130800,Humid and Mostly Cloudy,partly-cloudy-night,0.0020,0.03,78.33,79.90,73.72,0.86,1008.5,9.80,23.36,120.0,0.73,0.0,10.000,rain,289.3,


In [32]:
#load san_diego_climate json
san_diego_climate_df = pd.read_json('Resources/san_diego.json')
san_diego_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone
0,1530428400,Overcast,cloudy,0.0,0.0,61.71,61.71,55.47,0.80,1015.9,3.53,3.53,210.0,1.00,0.0,9.969,,
1,1530432000,Overcast,cloudy,0.0,0.0,62.26,62.26,55.50,0.79,1015.7,3.06,3.06,231.0,1.00,0.0,9.972,,
2,1530435600,Overcast,cloudy,0.0,0.0,62.27,62.27,55.52,0.79,1015.3,4.34,4.34,166.0,1.00,0.0,9.962,,
3,1530439200,Overcast,cloudy,0.0,0.0,62.13,62.13,55.29,0.78,1015.0,3.91,3.91,223.0,0.99,0.0,9.943,,
4,1530442800,Overcast,cloudy,0.0,0.0,61.52,61.52,55.84,0.82,1015.2,3.37,3.37,207.0,1.00,0.0,9.921,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590040800,Clear,clear-night,0.0,0.0,61.80,61.80,55.77,0.81,1015.7,2.80,3.47,299.0,0.02,0.0,10.000,,321.7
16550,1590044400,Clear,clear-night,0.0,0.0,61.14,61.14,55.76,0.82,1015.5,2.18,2.36,295.0,0.02,0.0,10.000,,321.2
16551,1590048000,Clear,clear-night,0.0,0.0,59.91,59.91,55.38,0.85,1015.4,2.33,2.33,319.0,0.01,0.0,10.000,,320.4
16552,1590051600,Clear,clear-night,0.0,0.0,58.99,58.99,55.18,0.87,1014.8,1.40,2.63,46.0,0.06,0.0,10.000,,319.9


In [33]:
#load san_jose_climate json
san_jose_climate_df = pd.read_json('Resources/san_jose.json')
san_jose_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,ozone,precipType
0,1530428400,Clear,clear-night,0.0,0.0,67.78,67.78,53.27,0.60,1012.8,6.36,8.58,141.0,0.17,0.0,9.988,,
1,1530432000,Clear,clear-night,0.0,0.0,66.09,66.09,53.14,0.63,1013.1,3.15,5.36,143.0,0.01,0.0,9.988,,
2,1530435600,Clear,clear-night,0.0,0.0,64.30,64.30,53.15,0.67,1013.0,4.74,5.84,117.0,0.00,0.0,9.997,,
3,1530439200,Clear,clear-night,0.0,0.0,63.42,63.42,53.22,0.69,1012.9,4.51,6.80,160.0,0.06,0.0,9.988,,
4,1530442800,Clear,clear-night,0.0,0.0,61.73,61.73,53.10,0.73,1013.0,3.48,5.91,121.0,0.08,0.0,9.978,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590040800,Clear,clear-night,0.0,0.0,54.39,54.39,48.10,0.79,1019.7,5.02,7.67,349.0,0.06,0.0,10.000,344.6,
16550,1590044400,Clear,clear-night,0.0,0.0,53.67,53.67,47.60,0.80,1019.4,4.82,6.61,334.0,0.06,0.0,10.000,342.2,
16551,1590048000,Clear,clear-night,0.0,0.0,52.85,52.85,47.39,0.82,1019.1,3.42,4.50,344.0,0.03,0.0,10.000,338.9,
16552,1590051600,Clear,clear-night,0.0,0.0,52.39,52.39,47.27,0.83,1018.7,1.71,3.36,313.0,0.04,0.0,10.000,336.4,


In [34]:
#load seattle_climate json
seattle_climate_df = pd.read_json('Resources/seattle.json')
seattle_climate_df.head(-20)

Unnamed: 0,time,summary,icon,precipIntensity,precipProbability,temperature,apparentTemperature,dewPoint,humidity,pressure,windSpeed,windGust,windBearing,cloudCover,uvIndex,visibility,precipType,ozone,precipAccumulation
0,1530428400,Overcast,cloudy,0.0000,0.00,59.32,59.32,55.21,0.86,1022.1,4.81,4.81,207.0,1.00,0.0,8.395,,,
1,1530432000,Overcast,cloudy,0.0000,0.00,58.96,58.96,55.24,0.87,1022.0,5.02,5.02,201.0,1.00,0.0,3.775,,,
2,1530435600,Overcast,cloudy,0.0000,0.00,58.56,58.56,55.45,0.89,1022.1,4.20,4.20,199.0,1.00,0.0,3.560,,,
3,1530439200,Overcast,cloudy,0.0000,0.00,58.26,58.26,55.68,0.91,1022.1,3.66,3.66,195.0,1.00,0.0,3.482,,,
4,1530442800,Overcast,cloudy,0.0007,0.15,58.04,58.04,55.68,0.92,1022.0,4.19,4.19,190.0,1.00,0.0,3.351,rain,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16549,1590040800,Mostly Cloudy,partly-cloudy-night,0.0072,0.22,52.29,52.29,48.76,0.88,1017.2,5.29,9.14,140.0,0.74,0.0,10.000,rain,373.1,
16550,1590044400,Possible Drizzle,rain,0.0082,0.26,51.30,51.30,47.68,0.87,1017.2,5.57,9.44,144.0,0.76,0.0,7.567,rain,372.1,
16551,1590048000,Possible Drizzle,rain,0.0069,0.26,50.53,50.53,46.93,0.87,1017.1,5.57,9.17,136.0,0.73,0.0,10.000,rain,369.5,
16552,1590051600,Possible Light Rain,rain,0.0092,0.33,49.82,47.82,46.65,0.89,1017.0,5.33,9.24,136.0,0.86,0.0,9.952,rain,367.8,


In [35]:
# convert climate_dfs to csv
dallas_climate_df.to_csv("Output/dallas_climate.csv", index=False)
houston_climate_df.to_csv("Output/houston_climate.csv", index=False)
la_climate_df.to_csv("Output/la_climate.csv", index=False)
nyc_climate_df.to_csv("Output/nyc_climate.csv", index=False)
philadelphia_climate_df.to_csv("Output/philadelphia_climate.csv", index=False)
phoenix_climate_df.to_csv("Output/phoenix_climate.csv", index=False)
san_antonio_climate_df.to_csv("Output/san_antonio_climate.csv", index=False)
san_diego_climate_df.to_csv("Output/san_diego_climate.csv", index=False)
san_jose_climate_df.to_csv("Output/san_jose_climate.csv", index=False)
seattle_climate_df.to_csv("Output/seattle_climate.csv", index=False)