In [1]:
import os
import numpy as np
import pandas as pd
import io
import requests
import json
import prettytable
import matplotlib.pyplot as plt

%matplotlib inline

# Get data

**What data do we need?**
- GDP by states by industry: https://www.bea.gov/system/files/2020-01/qgdpstate0120_2.pdf
- Employmnet by states by industry:
    - In order to get the data, we probably need to get the employment data by industry from each states' website.
        - NY: https://labor.ny.gov/stats/2016-2026-Statewide-and-Regional-Long-Term-Industry-Projections.xlsx
        - NJ: https://www.nj.gov/labor/lpa/employ/indoccpj/20162026%20Industry%20Employment%20Projections%20Reports.xlsx
        - ...so the 2016-2026 employment projection is avaliable on each states websites
        
- Labor Productivity by states by industry:
    - Private nonfarm productivity and costs by state and region: https://www.bls.gov/lpc/lpc-by-state-and-region.xlsx
    - https://www.bls.gov/opub/mlr/2019/article/bls-publishes-experimental-state-level-labor-productivity-measures.htm
        

## GDP by states by industry
https://www.bls.gov/developers/api_signature_v2.htm

#### Try API

In [None]:
my_bls_api = "ccdcbc9910a744a5be046d1aa70ac25c"

In [48]:
# headers = {'Content-type': 'application/json'}
# data = json.dumps({"seriesid": ['IPUAN1111__L010','IPUAN1111__L011'],"startyear":"2000", "endyear":"2019"})
# p = requests.post('https://api.bls.gov/ccdcbc9910a744a5be046d1aa70ac25c/v2/timeseries/data/', data=data, headers=headers)
# json_data = json.loads(p.text)
# for series in json_data['Results']['series']:
#     x=prettytable.PrettyTable(["series_id","year","period","value","footnotes"])
#     seriesId = series['seriesID']
#     for item in series['data']:
#         year = item['year']
#         period = item['period']
#         value = item['value']
#         footnotes=""
#         for footnote in item['footnotes']:
#             if footnote:
#                 footnotes = footnotes + footnote['text'] + ','
# #         if 'M01' <= period <= 'M12':
#         x.add_row([seriesId,year,period,value,footnotes[0:-1]])
#     output = open(seriesId + '.txt','w')
#     output.write (x.get_string())
#     output.close()

#### Reading from PDF


In [51]:
import tabula

#### 1. by states

In [67]:
gdp_df = tabula.read_pdf(os.path.join(os.getcwd(),"data/qgdpstate0120_2.pdf"), 
                         pages = 7)

In [69]:
gdp_df = gdp_df[0]

In [72]:
gdp_df = gdp_df.iloc[3:]

In [76]:
gdp_df = gdp_df.iloc[:,[0,1,2,3,4]]

In [82]:
gdp_df = pd.concat([gdp_df,gdp_df["Millions of dollars"].str.split(" ",expand = True)],axis=1)

In [87]:
gdp_df = gdp_df.drop(columns=["Millions of dollars","Unnamed: 3",5,6,7,8])

In [89]:
gdp_df.columns = ["areas","2018-1","2018-2","2018-3","2018-4","2019-1","2019-2","2019-3"]

In [93]:
gdp_df = gdp_df.reset_index(drop=True)

In [95]:
gdp_df.head()

Unnamed: 0,areas,2018-1,2018-2,2018-3,2018-4,2019-1,2019-2,2019-3
0,United States,20163159,20510177,20749752,20897804,21098827,21340267,21542540
1,New England,1070863,1084947,1096174,1101531,1118703,1130084,1142586
2,Connecticut,272854,273884,278295,277874,281659,284357,287560
3,Maine,63698,64746,65434,65545,66590,67138,67905
4,Massachusetts,558472,568220,573506,577754,586347,592588,599092


#### 2. by states by industry 2019 Q3

In [133]:
gdp_by_state_by_inds = tabula.read_pdf(os.path.join(os.getcwd(),"data/qgdpstate0120_2.pdf"), 
                         pages = 6)

In [134]:
gdp_by_state_by_inds = gdp_by_state_by_inds[0]

In [135]:
gdp_by_state_by_inds.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Seasonally adjusted at annual rates,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,,,,,,,,,,Percentage points,,,,,,
1,,Other services (except Real estate and governm...,Management of companies and enterprises,,,Administrative and remediation services suppor...,,,,"Arts, Educational services social assistance H...",,,Accomodation and food services,,enterprises),
2,United States,0.11 0.42,0.16,,,0.10,,,,0.08 0.19,0.02,,0.17,,,0.05 0.01
3,New England,0.09 0.56,0.20,,,0.12,,,,0.23 0.21,0.04,,0.17,,,0.05 -0.01
4,Connecticut,0.03 0.37,0.19,,,0.11,,,,0.27 0.17,0.04,,0.11,,,0.07 -0.04


In [136]:
gdp_by_state_by_inds = gdp_by_state_by_inds[2:]

In [137]:
gdp_by_state_by_inds[["Real estate and rental and leasing",
                      "Professional, scientific, and technical services"]] = gdp_by_state_by_inds["Unnamed: 1"].str.split(" ",expand = True)

In [138]:
gdp_by_state_by_inds[["Educational services",
                      "Health care and social assistance"]] = gdp_by_state_by_inds[
                                                            "Seasonally adjusted at annual rates"
                                                                                  ].str.split(" ", expand = True)

In [139]:
gdp_by_state_by_inds[["Other services (except government and government enterprises)",
                      "Government and government enterprises"]] = gdp_by_state_by_inds["Unnamed: 14"].str.split(" ", expand = True)

In [140]:
gdp_by_state_by_inds = gdp_by_state_by_inds.drop(columns= ["Unnamed: 1","Unnamed: 14","Seasonally adjusted at annual rates"])

In [141]:
gdp_by_state_by_inds = gdp_by_state_by_inds.dropna(axis=1)

In [143]:
gdp_by_state_by_inds.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 2,Unnamed: 5,Unnamed: 9,Unnamed: 11,Real estate and rental and leasing,"Professional, scientific, and technical services",Educational services,Health care and social assistance,Other services (except government and government enterprises),Government and government enterprises
2,United States,0.16,0.1,0.02,0.17,0.11,0.42,0.08,0.19,0.05,0.01
3,New England,0.2,0.12,0.04,0.17,0.09,0.56,0.23,0.21,0.05,-0.01
4,Connecticut,0.19,0.11,0.04,0.11,0.03,0.37,0.27,0.17,0.07,-0.04
5,Maine,0.27,0.15,0.05,0.33,0.07,0.26,0.06,0.15,0.05,-0.1
6,Massachusetts,0.2,0.1,0.03,0.16,0.13,0.72,0.24,0.22,0.05,0.0


In [146]:
name_dic = {"Unnamed: 0":"areas",
    "Unnamed: 2":"Management of companies and enterprises",
"Unnamed: 5":"Administrative and support and waste management and remediation services",
"Unnamed: 9":"Arts, entertainment, and recreation", 
"Unnamed: 11":"Accomodation and food services"}

In [148]:
gdp_by_state_by_inds = gdp_by_state_by_inds.rename(columns=name_dic)

In [149]:
gdp_by_state_by_inds

Unnamed: 0,areas,Management of companies and enterprises,Administrative and support and waste management and remediation services,"Arts, entertainment, and recreation",Accomodation and food services,Real estate and rental and leasing,"Professional, scientific, and technical services",Educational services,Health care and social assistance,Other services (except government and government enterprises),Government and government enterprises
2,United States,0.16,0.1,0.02,0.17,0.11,0.42,0.08,0.19,0.05,0.01
3,New England,0.2,0.12,0.04,0.17,0.09,0.56,0.23,0.21,0.05,-0.01
4,Connecticut,0.19,0.11,0.04,0.11,0.03,0.37,0.27,0.17,0.07,-0.04
5,Maine,0.27,0.15,0.05,0.33,0.07,0.26,0.06,0.15,0.05,-0.1
6,Massachusetts,0.2,0.1,0.03,0.16,0.13,0.72,0.24,0.22,0.05,0.0
7,New Hampshire,0.12,0.2,0.04,0.12,0.08,0.57,0.15,0.22,0.04,0.06
8,Rhode Island,0.27,0.22,0.06,0.28,0.05,0.28,0.37,0.35,0.04,-0.03
9,Vermont,0.09,0.12,0.03,0.29,0.07,0.41,0.16,0.22,0.05,-0.02
10,Mideast,0.15,0.0,0.02,0.19,0.09,0.46,0.06,0.21,0.04,-0.07
11,Delaware,0.15,0.05,0.0,0.13,0.1,0.35,0.04,0.21,0.03,0.05
