In [2]:
# import gc
import pandas as pd
import numpy as np

import beaapi
from beaapi.beaapi_error import no_data_err_msg, multiple_err_msg

In [3]:
# Get key from unversioned file
import os
from dotenv import load_dotenv
load_dotenv()
beakey = os.environ.get("beakey") #and be careful as USERID is passed back in the 'param' dict of many return values.

In [4]:
def gen_year_list(start, l, n):
    return [','.join([str(y) for y in np.arange(l)+start+i*l]) for i in range(n)]

## MNE

Note: I was looking at what directions were availabe for each type, but then Thomas sent me his manual list. Could check that they line up.

In [32]:
#MNE
# Lots of queries fail de to time-outs. So try to pull smaller windows of years (I manually checked that there's no data before the start date) and then combine later. Should normally work, but still happens intermittently so leave in the try/except.
# The reshaping of the large tables takes lots of memory, so skip check while we pull everything and check later
do_checks = False
class_MNE = beaapi.get_parameter_values(beakey, 'MNE', 'Classification')
MNE_tbls = {}


print("DI")
years_dict_di = {("CountryByIndustry", "Outward"): gen_year_list(1997, 12, 2),
                 ("CountryByIndustry", "Inward"): gen_year_list(1997, 12, 2),
                 ("StatebyCountryofUBO","Outward"): gen_year_list(1997, 6, 4),
                 ("StatebyCountryofUBO","Inward"): gen_year_list(1997, 6, 4)} 
for cl_MNE in class_MNE["key"]:
    directions = ["Outward", "Inward"] if cl_MNE not in ['StatebyCountryofUBOOutward','StatebyCountryofUBOInward'] else [] # could put more in here
    for direction in directions:
        years = years_dict_di.get((cl_MNE, direction),['All'])
        for year in years:
            #print((cl_MNE, direction, year, 'DI'))
            try:
                bea_tbl = beaapi.get_data(beakey, "MNE", DirectionOfInvestment=direction, 
                                          Classification=cl_MNE, Year=year, GetFootnotes='Yes', 
                                          do_checks=do_checks)
                MNE_tbls[(cl_MNE, direction, year, 'DI')] = bea_tbl
                #beaapi.to_wide_vars_in_cols(bea_tbl)
                #beaapi.to_wide_vars_in_rows(bea_tbl)
            except beaapi.BEAAPIFailure as e:
                print("query_fail_str: cl_MNE=" + cl_MNE + ". direction=" + direction)
            except beaapi.BEAAPIResponseError as e:
                if e.args[0]==no_data_err_msg:
                    print("no data: cl_MNE=" + cl_MNE + ". direction=" + direction)
                    #pass
                elif e.args[0]=='Note that for state-level data on U.S. affiliates, directionOfInvestment should be state':
                    print(cl_MNE + direction + year + e.args[0])
                    pass
                else:
                    raise e

print("AMNE")
years_dict_amne = {("StatebyCountryofUBO", "Outward"): gen_year_list(2009, 6, 2)}
for cl_MNE in class_MNE["key"]:
    directions = ["Outward", "Inward", "State", "Parent"] if cl_MNE not in ['StatebyCountryofUBOOutward','StatebyCountryofUBOInwardAll', 'StatebyCountryofUBOParentAll'] else ['State'] # could put more in here
    for direction in directions:
        years = years_dict_amne.get((cl_MNE, direction),['All'])
        for year in years:
            #print((cl_MNE, direction, year, 'DI'))
            try:
                bea_tbl = beaapi.get_data(beakey, "MNE", DirectionOfInvestment=direction, 
                                          OwnershipLevel="1", NonBankAffiliatesOnly="0", Classification=cl_MNE, Year=year, 
                                          GetFootnotes='Yes', do_checks=do_checks)
                MNE_tbls[(cl_MNE, direction, year, 'AMNE')] = bea_tbl
                #beaapi.to_wide_vars_in_cols(bea_tbl)
                #beaapi.to_wide_vars_in_rows(bea_tbl)
            except beaapi.BEAAPIFailure as e:
                print("query_fail_str: cl_MNE=" + cl_MNE + ". direction=" + direction)
            except beaapi.BEAAPIResponseError as e:
                if e.args[0]==no_data_err_msg or (e.args[0]==multiple_err_msg and no_data_err_msg in e.messages):
                    print("no data: cl_MNE=" + cl_MNE + ". direction=" + direction)
                    #pass
                elif e.args[0]=='Note that for state-level data on U.S. affiliates, directionOfInvestment should be state':
                    print(cl_MNE + direction + year + e.args[0])
                    pass
                else:
                    raise e

DI
no data: cl_MNE=CountryByDestination. direction=Outward
no data: cl_MNE=CountryByDestination. direction=Inward
no data: cl_MNE=CountrybyType. direction=Outward
no data: cl_MNE=CountryofUBO. direction=Outward
no data: cl_MNE=CountryofUBObyDestination. direction=Outward
no data: cl_MNE=CountryofUBObyDestination. direction=Inward
no data: cl_MNE=CountryofUBObyIndustry. direction=Outward
no data: cl_MNE=countryofUBObyType. direction=Outward
no data: cl_MNE=IndustryByCountry. direction=Outward
no data: cl_MNE=IndustryByCountry. direction=Inward
no data: cl_MNE=IndustrybyCountryofUBO. direction=Outward
no data: cl_MNE=IndustrybyCountryofUBO. direction=Inward
no data: cl_MNE=IndustryByDestination. direction=Outward
no data: cl_MNE=IndustryByDestination. direction=Inward
no data: cl_MNE=IndustrybyType. direction=Outward
no data: cl_MNE=IndustryofSales. direction=Outward
no data: cl_MNE=IndustryofSales. direction=Inward
no data: cl_MNE=IndustryofUSParent. direction=Outward
no data: cl_MNE=In

In [37]:
cl_dir = {"DI":{}, "AMNE":{}}
for cl_MNE, direction, _, t in MNE_tbls.keys():
    if cl_MNE not in cl_dir[t]:
        cl_dir[t][cl_MNE] = [direction]
    else:
        cl_dir[t][cl_MNE] = list(set([direction]).union(set(cl_dir[t][cl_MNE])))


di_k_v_list = list(zip(*[(k, v) for k,v in cl_dir["DI"].items()]))
pd.DataFrame({"Classification":di_k_v_list[0], "DirectionOfInvestment":di_k_v_list[1]}).to_csv("../docs/tables/MNE_DI_classdir.csv", index=False)

amne_k_v_list = list(zip(*[(k, v) for k,v in cl_dir["AMNE"].items()]))
pd.DataFrame({"Classification":amne_k_v_list[0], "DirectionOfInvestment":amne_k_v_list[1]}).to_csv("../docs/tables/MNE_AMNE_classdir.csv", index=False)


In [5]:
# Consolidate tables
MNE_tbls2 = {}
to_consolidate = {}
for (cl_MNE, direction, year, mne_type), tbl in MNE_tbls.items():
    if year=='All':
        MNE_tbls2[(cl_MNE, direction, year, mne_type)] = tbl
    else:
        to_consolidate[(cl_MNE, direction, mne_type)] = to_consolidate.get((cl_MNE, direction, mne_type),[])+ [tbl]

for (cl_MNE, direction, mne_type), tbl_list in to_consolidate.items():
    MNE_tbls2[(cl_MNE, direction, 'All', mne_type)] = pd.concat(tbl_list, axis=0)

# reset the attrs
bea_tbl = MNE_tbls2[list(MNE_tbls2.keys())[0]]
time_invariant_vars = bea_tbl.attrs['time_invariant_vars']
time_invariant_keys = bea_tbl.attrs['time_invariant_keys']
time_variant_vars = bea_tbl.attrs['time_variant_vars']
time_variant_keys = bea_tbl.attrs['time_variant_keys']
index_cols = bea_tbl.attrs['index_cols']

for k, bea_tbl in MNE_tbls2.items():
    #print(k)
    #gc.collect()
    
    # since the composits don't have them
    bea_tbl.attrs['time_invariant_vars'] = time_invariant_vars
    bea_tbl.attrs['time_invariant_keys'] = time_invariant_keys
    bea_tbl.attrs['time_variant_vars'] = time_variant_vars
    bea_tbl.attrs['time_variant_keys'] = time_variant_keys
    bea_tbl.attrs['index_cols'] = index_cols

This next part requires lots of memory

In [None]:
# Might want to do these
#del(MNE_tbls)
#gc.collect()
    
for k, bea_tbl in MNE_tbls2.items():
    #print(k)
    #gc.collect()
    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

## Others

In [6]:
# IntlServTrade
# One Country
cntr_IST = beaapi.get_parameter_values(beakey, 'IntlServTrade', 'AreaOrCountry')
for cntr in cntr_IST["Key"]:
    bea_tbl = beaapi.get_data(beakey, "IntlServTrade", AreaOrCountry=cntr, do_checks=True)

    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

# One type of service
tos_IST = beaapi.get_parameter_values(beakey, 'IntlServTrade', 'TypeOfService')
for tos in tos_IST["Key"]:
    bea_tbl = beaapi.get_data(beakey, "IntlServTrade", TypeOfService=tos, do_checks=True)

    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

In [None]:
# Specific TypeOfInvestment
toi_IIP = beaapi.get_parameter_values(beakey, 'IIP', 'TypeOfInvestment')
for toi in toi_IIP["Key"]:
    bea_tbl = beaapi.get_data(beakey, "IIP", TypeOfInvestment=toi, do_checks=True)

    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

# Specific Year
year_IIP = beaapi.get_parameter_values(beakey, 'IIP', 'Year')
for year in year_IIP["Key"]:
    bea_tbl = lambda: beaapi.get_data(beakey, "IIP", Year=year, do_checks=True)

    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

In [None]:
# One indicator
idic_ITA = beaapi.get_parameter_values(beakey, 'ITA', 'Indicator')
idic_ITA_noglobal = ['ExpGdsAutoEngAndEngParts',
 'ExpGdsOthAutoPartsAndAcc',
 'ExpGdsPassCars',
 'ExpGdsTrucksBusesSpecPurpVeh',
 'ImpGdsAutoEngAndEngParts',
 'ImpGdsOthAutoPartsAndAcc',
 'ImpGdsPassCars',
 'ImpGdsTrucksBusesSpecPurpVeh']  # These don't report global sum, so ask for all countries individually
# pd.DataFrame({'Indicator':idic_ITA_noglobal}).to_csv("../docs/tables/idic_ITA_noglobal.csv")
for idic in idic_ITA["Key"]:
    if idic not in idic_ITA_noglobal:
        bea_tbl = beaapi.get_data(beakey, "ITA", Indicator=idic, do_checks=True)
    else:
        bea_tbl = beaapi.get_data(beakey, "ITA", Indicator=idic, AreaOrCountry="All", do_checks=True)
    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

# One Area
areas_ITA = beaapi.get_parameter_values(beakey, 'ITA', 'AreaOrCountry')
for area in areas_ITA["Key"]:
    if area=="AllCountries":
        continue
    bea_tbl = beaapi.get_data(beakey, "ITA", AreaOrCountry=area, do_checks=True)

    beaapi.to_wide_vars_in_cols(bea_tbl)
    beaapi.to_wide_vars_in_rows(bea_tbl)

In [23]:
tbls_Reg = beaapi.get_parameter_values(beakey, 'Regional', 'TableName')

# 1 line code
for table_name in tbls_Reg["Key"]:
	line_codes = beaapi.get_parameter_values_filtered(beakey, 'Regional', 'LineCode', TableName=table_name)
	line_code = line_codes['Key'][0]
	
	geo_fips_list = beaapi.get_parameter_values_filtered(beakey, 'Regional', 'GeoFips', TableName=table_name)
	geo_fips_list = geo_fips_list['Key']
	geo_fips = geo_fips_list[0]
	if geo_fips_list.shape[0]>1:
		geo_fips = geo_fips +","+geo_fips_list[1]

	bea_tbl = beaapi.get_data(beakey, "Regional", TableName=table_name, LineCode=line_code, 
										 Year="ALL", GeoFips=geo_fips, do_checks=True)
	
	beaapi.to_wide_vars_in_cols(bea_tbl)
	beaapi.to_wide_vars_in_rows(bea_tbl)

# 1 GeoFips
for table_name in tbls_Reg["Key"]:
	geo_fips_list = beaapi.get_parameter_values_filtered(beakey, 'Regional', 'GeoFips', TableName=table_name)
	geo_fips = geo_fips_list['Key'][0]
	bea_tbl = beaapi.get_data(beakey, 'Regional', LineCode="All", Year="ALL", GeoFips=geo_fips,
									 TableName=table_name, do_checks=True)
	beaapi.to_wide_vars_in_cols(bea_tbl)
	beaapi.to_wide_vars_in_rows(bea_tbl)

  return op.get_result()


In [29]:
tbls_NIPA = beaapi.get_parameter_values(beakey, 'NIPA', 'TableName')

NIPA_tbls = [beaapi.get_data(beakey, 'NIPA', Frequency='A,Q,M', Year='X', TableName=table_name, do_checks=True)
			  for table_name in tbls_NIPA["TableName"]]
for bea_tbl in NIPA_tbls:
	beaapi.to_wide_vars_in_cols(bea_tbl)
	beaapi.to_wide_vars_in_rows(bea_tbl)


In [30]:

tbls_NIUD = beaapi.get_parameter_values(beakey, 'NIUnderlyingDetail', 'TableName')

NIUD_tbls = [beaapi.get_data(beakey, 'NIUnderlyingDetail', Frequency='A,Q,M', Year='X',
									  TableName=table_name, do_checks=True)
			  for table_name in tbls_NIUD["TableName"]]
for bea_tbl in NIUD_tbls:
	beaapi.to_wide_vars_in_cols(bea_tbl)
	beaapi.to_wide_vars_in_rows(bea_tbl)


In [5]:
tbls_FA = beaapi.get_parameter_values(beakey, 'FixedAssets', 'TableName')

FA_tbls = [beaapi.get_data(beakey, 'FixedAssets', Year='X', TableName=table_name, do_checks=True)
			for table_name in tbls_FA["TableName"]]
for bea_tbl in FA_tbls:
	beaapi.to_wide_vars_in_cols(bea_tbl)
	beaapi.to_wide_vars_in_rows(bea_tbl)

In [None]:
# Wait until they've fixed the issue with requesting multiple tables
#bea_tbl = lambda: beaapi.get_data(beakey, 'InputOutput', Year="ALL", do_checks=True)
#beaapi.to_wide_vars_in_cols(bea_tbl)
#x=beaapi.to_wide_vars_in_rows(bea_tbl)

In [None]:
bea_tbl = beaapi.get_data(beakey, 'underlyingGDPbyIndustry', Year='ALL', Industry='ALL', 
                                     tableID='ALL', Frequency="A", do_checks=True)
beaapi.to_wide_vars_in_cols(bea_tbl)
x = beaapi.to_wide_vars_in_rows(bea_tbl)


In [7]:
bea_tbl = beaapi.get_data(beakey, 'GDPbyIndustry', Year='ALL', Industry='ALL', 
                                     tableID='ALL', Frequency="A,Q", do_checks=True)
beaapi.to_wide_vars_in_cols(bea_tbl)
x = beaapi.to_wide_vars_in_rows(bea_tbl)
