Right now just download mapping table for (same for IS and CF):
- Non-blank `Final account name` and
- Non-N/A `Lookup on vstock BS nonfin`

See https://docs.google.com/spreadsheets/d/1iiR5EZk3MVRvdNpcJJWF41Yj2yTtVgSQAmxWeNh0YTk/

In [128]:
import pandas as pd
import numpy as np
import json
import glob
import traceback
from functions.fad_crawl.helpers.processingData import simplifyText

In [129]:
BS_sheet = "vstock+VAS BS"
IS_sheet = "vstock+VAS IS"
CF_D_sheet = "vstock+VAS CF Direct"
CF_IND_sheet = "vstock+VAS CF Indirect"
mapping_all = pd.read_excel("functions/schema/mapping_table.xlsx", sheet_name=[BS_sheet,IS_sheet,CF_D_sheet,CF_IND_sheet])

In [130]:
### Read XLSX mapping file, use Pandas to convert to JSON
mapping_BS_filtered = mapping_all[BS_sheet][(mapping_all[BS_sheet]['Final account name'].notnull()) & (mapping_all[BS_sheet]['Lookup on vstock BS nonfin'].notnull())]
mapping_IS_filtered = mapping_all[IS_sheet][(mapping_all[IS_sheet]['Final account name'].notnull()) & (mapping_all[IS_sheet]['Lookup on vstock IS nonfin'].notnull())]
mapping_CF_D_filtered = mapping_all[CF_D_sheet][(mapping_all[CF_D_sheet]['Final account name'].notnull()) & (mapping_all[CF_D_sheet]['Lookup on vstock CF Direct nonfin'].notnull())]
mapping_CF_IND_filtered = mapping_all[CF_IND_sheet][(mapping_all[CF_IND_sheet]['Final account name'].notnull()) & (mapping_all[CF_IND_sheet]['Lookup on vstock CF Indirect nonfin'].notnull())]

In [131]:
### Create mapping dictionaries
mapping_BS_dict = dict(zip(mapping_BS_filtered['Lookup on vstock BS nonfin'], mapping_BS_filtered['Final account name']))
mapping_IS_dict = dict(zip(mapping_IS_filtered['Lookup on vstock IS nonfin'], mapping_IS_filtered['Final account name']))
mapping_CF_D_dict = dict(zip(mapping_CF_D_filtered['Lookup on vstock CF Direct nonfin'], mapping_CF_D_filtered['Final account name']))
mapping_CF_IND_dict = dict(zip(mapping_CF_IND_filtered['Lookup on vstock CF Indirect nonfin'], mapping_CF_IND_filtered['Final account name']))


### The mapping dict is constructed manually based on the structure of the lookup_dict_all_nonfin file
mapping_dict_all = {
    "CDKT": {
        "Balance Sheet": mapping_BS_dict
    },
    "KQKD": {
        "Income Statement": mapping_IS_dict
    },
    "LC": {
        "CashFlow Direct" : mapping_CF_D_dict,
        "CashFlow Indirect": mapping_CF_IND_dict
    }
}

with open("functions/schema/mapping_dict_all_nonfin.json", "w") as writefile:
    json.dump(mapping_dict_all, writefile, ensure_ascii=False, indent=4)

In [132]:
### Load lookup all accounts nonfin file
with open("functions/schema/lookup_dict_all_nonfin.json", "r") as jsonfile:
    lookup = json.load(jsonfile)
lookup

{'CDKT': {'Balance Sheet': {'2995': {'ID': 1,
    'ReportNormID': 2995,
    'Name': 'TÀI SẢN ',
    'NameEn': 'ASSETS',
    'NameMobile': 'TÀI SẢN ',
    'NameMobileEn': 'ASSETS',
    'CssStyle': 'MaxB',
    'Padding': 'Padding1',
    'ParentReportNormID': 2995,
    'ReportComponentName': 'Cân đối kế toán',
    'ReportComponentNameEn': 'Balance Sheet',
    'Unit': None,
    'UnitEn': None,
    'OrderType': None,
    'OrderingComponent': None,
    'RowNumber': None,
    'ReportComponentTypeID': None,
    'ChildTotal': 0,
    'Levels': 0,
    'Value1': None,
    'Value2': None,
    'Value3': None,
    'Value4': None,
    'Vl': None,
    'IsShowData': True},
   '3000': {'ID': 2,
    'ReportNormID': 3000,
    'Name': 'A. TÀI SẢN NGẮN HẠN',
    'NameEn': 'A. SHORT-TERM ASSETS',
    'NameMobile': 'A. TÀI SẢN NGẮN HẠN',
    'NameMobileEn': 'A. SHORT-TERM ASSETS',
    'CssStyle': 'LargeB',
    'Padding': 'Padding1',
    'ParentReportNormID': 2996,
    'ReportComponentName': 'Cân đối kế toán',


In [133]:
### Generate entry and return FAD account
def get_FAD_acc(report, report_fullname, d, lookup_dict, mapping_dict):
    """Input is a dict which is an account of a report in the financeInfo API;
    Returns the final FAD account name
    """
    acc_n = simplifyText(d['NameEn'])
    acc_vi_n = simplifyText(d['Name'])
    parent_n = simplifyText(lookup_dict[report][report_fullname][str(d['ParentReportNormID'])]['NameEn'])
    parent_vi_n = simplifyText(lookup_dict[report][report_fullname][str(d['ParentReportNormID'])]['Name'])
    try:
        return mapping_dict[report][report_fullname][f'{acc_n};{parent_n};{acc_vi_n};{parent_vi_n}']
    except:
        return "N/A"

In [134]:
### Testing
for file in glob.glob(f'functions/schemaData/financeInfo/*_*_*_CDKT_Annual_*.json')[:20]:
    file_components = file.split("_")
    if file_components[1] != "Finance and Insurance":
        report = file_components[3]
        with open(file, 'r') as jsonfile:
            try:
                j = json.load(jsonfile)
                report_fullname = list(j[1].keys())[0]
                for account_content in j[1][report_fullname]:
                    print(get_FAD_acc(report, report_fullname, account_content, lookup, mapping_dict_all))
            except Exception as e:
                print(traceback.format_exc())

N/A
current_assets
cash_cash_equiv
cash
cash_equiv
st_invmts
hld_trading_secs_st
pvsn_hld_trading_secs
hld_mature_invmts
current_ar
st_trade_ar
st_adv_suppliers
st_internal_ar
construction_cntrct_ar
st_loan_ar
other_st_ar
pvsn_doubt_st_ar
st_wait_resolution
inventories_all
inventories
pvsn_obsolete_inventories
other_current_assets_all
st_prepaid_exp
vat_deductible
from_state_tax_ar
gov_bonds_hld_resale
other_current_assets
noncurrent_assets
lt_ar
lt_trade_ar
lt_adv_suppliers
pd_capital_dependent
lt_internal_ar
lt_loan_ar
other_lt_ar
pvsn_doubt_lt_ar
fixed_assets
tgbl_fixed_assets
tgbl_fixed_assets_cost
tgbl_fixed_assets_depre
fin_leases
fin_leases_cost
fin_leases_cost_depre
intgbl_fixed_assets
intgbl_fixed_assets_cost
intgbl_fixed_assets_amor
invmts_properties
invmts_properties_cost
invmts_properties_depre
lt_assets_in_progress
lt_wip
construction_wip
lt_invmts
invmts_subs
invmts_assocs
invmts_other_entities
pvsn_lt_invmts
hld_mature_invmts_lt
other_lt_invmts
other_lt_assets_all
lt_pre