In [1]:
import pandas as pd
import requests

In [2]:
selected_ticker = 'XOM'

In [3]:
data = pd.DataFrame(pd.date_range(start='2010-01-01', end='2021-02-28', closed=None), columns=['Date'])
print(data.shape)
data.head(2)

(4077, 1)


Unnamed: 0,Date
0,2010-01-01
1,2010-01-02


In [4]:
path_stock = '../full_dataset/stock_closing_nyse.csv'
path_sentiment_global = '../full_dataset/news-sentiment-index_global.csv' # keyword: exxon, oil, gas
path_sentiment_finance = '../full_dataset/news-sentiment-index_finance.csv' # keyword: exxon, oil, gas
path_stock_index = '../full_dataset/stock_index.csv'
path_price_commodity = '../full_dataset/price_commodity.csv' # filter: crude, gasoline, settle price
path_interest_rate = '../full_dataset/interest_rate.csv' # filter: monthly, exclude forecast
path_inventory_outlook = '../full_dataset/inventory_outlook.csv' # filter: crude, Us, monthly
path_jodi_demand = '../full_dataset/jodi_demand.csv' # filter: jodi, demand
path_mobility_apple = '../full_dataset/mobility_apple.csv' # filter: US, sub-region
path_mobility_google = '../full_dataset/mobility_google.csv' # filter: US


In [5]:
stock_nyse = pd.read_csv(path_stock, parse_dates=['Date Value'])
stock = stock_nyse[stock_nyse.Ticker == selected_ticker][['Date Value','Value']].rename(columns={'Value':'stock_closing_usd', 'Date Value':'Date'}).sort_values('Date')

sentiment_global = pd.read_csv(path_sentiment_global, parse_dates=['DateTime'])
sentiment_global = sentiment_global[['DateTime','Index']].rename(columns={'DateTime':'Date','Index':'sentiment_global_index'}).sort_values('Date')

sentiment_finance = pd.read_csv(path_sentiment_finance, parse_dates=['DateTime'])
sentiment_finance = sentiment_finance[['DateTime','Index']].rename(columns={'DateTime':'Date','Index':'sentiment_finance_index'}).sort_values('Date')

stock_index = pd.read_csv(path_stock_index, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
stock_index = stock_index.pivot_table(index=['Date'], columns='Description', values='Value').reset_index()

price_commodity = pd.read_csv(path_price_commodity, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
price_commodity = price_commodity.pivot_table(index=['Date'], columns='Commodity And Exchange', values='Value').reset_index()

interest_rate = pd.read_csv(path_interest_rate, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
interest_rate['indicator_region'] = interest_rate['Indicator'] + '_' + interest_rate['Region']
interest_rate = interest_rate.pivot_table(index=['Date','Period Value'], columns='indicator_region', values='Value').reset_index().rename(columns={'Period Value' : 'interest_rate_freq'})

inventory_outlook = pd.read_csv(path_inventory_outlook, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
inventory_outlook['sector_unit'] = inventory_outlook['Sub-Sector Level 3'] + '_' + inventory_outlook['Unit']
inventory_outlook = inventory_outlook.pivot_table(index=['Date'], columns='sector_unit', values='Value').reset_index()

jodi_demand = pd.read_csv(path_jodi_demand, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
jodi_demand['indicator'] = 'JODI_demand_' + jodi_demand['Country']
jodi_demand = jodi_demand.pivot_table(index=['Date','Period Value'], columns='indicator', values='Value').reset_index().rename(columns={'Period Value' : 'jodi_demand_freq'})

mobility_apple = pd.read_csv(path_mobility_apple, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
mobility_apple['indicator'] = mobility_apple['Transportation Type'] + '_' + mobility_apple['Country'] + '_' + mobility_apple['Region']
mobility_apple = mobility_apple.pivot_table(index=['Date'], columns='indicator', values='Value').reset_index()

mobility_google = pd.read_csv(path_mobility_google, parse_dates=['Date Value']).rename(columns={'Date Value':'Date'})
mobility_google['indicator'] = mobility_google['Indicator'] + '_' + mobility_google['Sub Region 1'] + '_' + mobility_google['Sub Region 2']
mobility_google = mobility_google.pivot_table(index=['Date'], columns='indicator', values='Value').reset_index()

In [6]:
# merge data
data = pd.merge(data, stock, on = ['Date'], how='left')
data = pd.merge(data, sentiment_global, on = ['Date'], how='left')
data = pd.merge(data, sentiment_finance, on = ['Date'], how='left')
data = pd.merge(data, stock_index, on = ['Date'], how='left')
data = pd.merge(data, price_commodity, on = ['Date'], how='left')
data = pd.merge(data, interest_rate, on = ['Date'], how='left')
data = pd.merge(data, inventory_outlook, on = ['Date'], how='left')
data = pd.merge(data, jodi_demand, on = ['Date'], how='left')
data = pd.merge(data, mobility_apple, on = ['Date'], how='left')
data = pd.merge(data, mobility_google, on = ['Date'], how='left')
data

Unnamed: 0,Date,stock_closing_usd,sentiment_global_index,sentiment_finance_index,DOW JONES COMPOSITE AVERAGE,DOW JONES INDUSTRIAL AVERAGE,DOW JONES TRANSPORTATION AVERAGE,DOW JONES UTILITY AVERAGE,S&P 500,ICE BRENT CRUDE OIL FUTURES,...,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_SULLIVAN COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_TIOGA COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_TOMPKINS COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_ULSTER COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WARREN COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WASHINGTON COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WAYNE COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WESTCHESTER COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WYOMING COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_YATES COUNTY
0,2010-01-01,,,,,,,,,,...,,,,,,,,,,
1,2010-01-02,,,,,,,,,,...,,,,,,,,,,
2,2010-01-03,,,,,,,,,,...,,,,,,,,,,
3,2010-01-04,69.15,,,,1.82,,,,80.12,...,,,,,,,,,,
4,2010-01-05,69.42,,,,1.82,,,,80.59,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4072,2021-02-24,56.70,,,10547.40,31961.86,13630.55,825.65,3925.43,67.04,...,-25.0,-23.0,-44.0,-27.0,-24.0,-22.0,-21.0,-37.0,-23.0,-18.0
4073,2021-02-25,55.76,-0.3,-0.1,10363.89,31402.01,13372.26,814.39,3829.34,66.88,...,-24.0,-23.0,-43.0,-27.0,-22.0,-21.0,-18.0,-37.0,-20.0,-15.0
4074,2021-02-26,54.37,,-0.2,10231.46,30932.37,13331.27,795.61,3811.15,66.13,...,-25.0,-22.0,-37.0,-28.0,-21.0,-23.0,-17.0,-36.0,-21.0,-15.0
4075,2021-02-27,,,,,,,,,,...,-11.0,-4.0,-12.0,-16.0,-17.0,-11.0,-6.0,-16.0,-1.0,-11.0


In [7]:
data.to_csv('../data/data_2010_2021.csv', index=False)

In [8]:
mobility_google.sort_values('Date', ascending=True)

indicator,Date,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_ALBANY COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_ALLEGANY COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_BRONX COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_BROOME COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_CATTARAUGUS COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_CAYUGA COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_CHAUTAUQUA COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_CHEMUNG COUNTY,GROCERY AND PHARMACY PERCENT CHANGE FROM BASELINE_NEW YORK_CHENANGO COUNTY,...,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_SULLIVAN COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_TIOGA COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_TOMPKINS COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_ULSTER COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WARREN COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WASHINGTON COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WAYNE COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WESTCHESTER COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_WYOMING COUNTY,WORKPLACES PERCENT CHANGE FROM BASELINE_NEW YORK_YATES COUNTY
0,2020-02-15,-4.0,-11.0,-8.0,1.0,3.0,-4.0,-1.0,-4.0,13.0,...,3.0,,12.0,,,-1.0,-3.0,,-1.0,1.0
1,2020-02-16,-6.0,4.0,-4.0,-2.0,5.0,-1.0,9.0,2.0,8.0,...,2.0,,15.0,1.0,1.0,-1.0,-1.0,-3.0,6.0,-4.0
2,2020-02-17,2.0,-1.0,-8.0,1.0,-4.0,,-1.0,-11.0,6.0,...,-31.0,-22.0,-9.0,-31.0,-27.0,-25.0,-24.0,-46.0,-24.0,-26.0
3,2020-02-18,-7.0,1.0,-6.0,-1.0,-2.0,-8.0,-2.0,-9.0,-3.0,...,-5.0,-1.0,-9.0,,-15.0,-16.0,-14.0,-14.0,-14.0,-13.0
4,2020-02-19,-3.0,-4.0,-6.0,,-5.0,-2.0,-4.0,-3.0,-1.0,...,2.0,1.0,-9.0,3.0,-10.0,-11.0,-13.0,-12.0,-11.0,-14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,2021-03-03,-3.0,-3.0,-7.0,-7.0,27.0,-13.0,9.0,2.0,,...,-26.0,-25.0,-44.0,-27.0,-25.0,-22.0,-20.0,-37.0,-21.0,-18.0
383,2021-03-04,-6.0,-4.0,-7.0,-9.0,20.0,-19.0,,-4.0,-2.0,...,-24.0,-23.0,-42.0,-27.0,-24.0,-19.0,-18.0,-38.0,-21.0,-16.0
384,2021-03-05,-8.0,-9.0,-10.0,-10.0,17.0,-19.0,-4.0,-9.0,-10.0,...,-27.0,-23.0,-34.0,-27.0,-21.0,-23.0,-20.0,-37.0,-25.0,-18.0
385,2021-03-06,-7.0,-13.0,-13.0,-6.0,26.0,-15.0,-2.0,-6.0,5.0,...,-14.0,-7.0,-11.0,-12.0,-13.0,-11.0,-8.0,-15.0,-9.0,-8.0
