exploring and listing the various attributes found in a response from EDGAR, by accounting standard (currently set to ```'us-gaap'```, but could be set to ```'ifrs-full'``` for example)

In [1]:
# import modules
import requests
import pandas as pd
import matplotlib
import time

from edgar_func import get_company_basics

In [2]:
# create request header
headers = {'User-Agent': "email@address.com"}

# get all companies data
tickers = requests.get("https://www.sec.gov/files/company_tickers.json",headers=headers)
time.sleep(0.2)

# parse CIK // without leading zeros
cik_unprocessed = tickers.json()['0']['cik_str']

# dictionary to dataframe
data = pd.DataFrame.from_dict(tickers.json(), orient='index')

# add leading zeros to CIK
data['cik_str'] = data['cik_str'].astype(str).str.zfill(10)

In [3]:
keys_df = pd.DataFrame()
keys_all = set([])

for i in range(50):
    current = data.iloc[i]
    try:
        cik = current.cik_str
        
        # get company facts data
        companyFacts = requests.get(f'https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json',headers=headers)

        keys = set(list(companyFacts.json()['facts']['us-gaap'].keys()))
        
        keys_all = keys_all.union(keys)

    except ValueError:
        print(f"JSON Decode Error for {current.ticker}")
    except KeyError:
        print(f"KeyError for {current.ticker}")
    

KeyError for TSM
KeyError for NVO
JSON Decode Error for SPY
KeyError for AZN
KeyError for SAP
KeyError for SHEL
KeyError for NVS
KeyError for FMX
JSON Decode Error for QQQ


In [4]:
print(list(keys_all))

['SecuritiesBorrowedCollateralObligationToReturnSecurities', 'InvestmentIncomeNonoperating', 'ProceedsFromPaymentsToMinorityShareholders', 'StockRepurchaseProgramAuthorizedAmount1', 'TemporaryEquityAccretionToRedemptionValueAdjustment', 'HedgedLiabilityFairValueHedgeCumulativeIncreaseDecrease', 'IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest', 'ShortdurationInsuranceContractsLiabilityForUnpaidClaimsAndClaimAdjustmentExpenseOtherReconcilingItem', 'DebtSecuritiesHeldToMaturityAllowanceForCreditLossPurchasedWithCreditDeteriorationIncrease', 'PreferredStockIncludingAdditionalPaidInCapital', 'AdjustmentForAmortization', 'InterestIncomeExpenseNonoperatingNet', 'AssetsAverageOutstanding', 'GoodwillOtherChanges', 'DeferredTaxAssetsTaxDeferredExpenseCompensationAndBenefits', 'DerivativeInstrumentsLossReclassifiedFromAccumulatedOCIIntoIncomeEffectivePortion', 'ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsForfeit

In [5]:
lst = list(keys_all)
lst.sort()

print(lst)

['AcceleratedShareRepurchaseProgramAdjustment', 'AcceleratedShareRepurchasesFinalPricePaidPerShare', 'AcceleratedShareRepurchasesSettlementPaymentOrReceipt', 'AccountsAndFinancingReceivableAllowanceForCreditLoss', 'AccountsAndNotesReceivableNet', 'AccountsAndOtherReceivablesNetCurrent', 'AccountsNotesAndLoansReceivableNetCurrent', 'AccountsPayable', 'AccountsPayableAndAccruedLiabilities', 'AccountsPayableAndAccruedLiabilitiesCurrent', 'AccountsPayableAndAccruedLiabilitiesCurrentAndNoncurrent', 'AccountsPayableAndAccruedLiabilitiesFairValueDisclosure', 'AccountsPayableAndAccruedLiabilitiesNoncurrent', 'AccountsPayableAndOtherAccruedLiabilities', 'AccountsPayableAndOtherAccruedLiabilitiesCurrent', 'AccountsPayableCurrent', 'AccountsPayableOtherCurrent', 'AccountsPayableOtherCurrentAndNoncurrent', 'AccountsPayableRelatedPartiesCurrent', 'AccountsPayableTradeCurrent', 'AccountsReceivableAllowanceForCreditLossExcludingAccruedInterestNoncurrent', 'AccountsReceivableBilledForLongTermContracts

In [6]:
df = pd.DataFrame(lst, columns=["attribute"])

In [7]:
df.head()

Unnamed: 0,attribute
0,AcceleratedShareRepurchaseProgramAdjustment
1,AcceleratedShareRepurchasesFinalPricePaidPerShare
2,AcceleratedShareRepurchasesSettlementPaymentOr...
3,AccountsAndFinancingReceivableAllowanceForCred...
4,AccountsAndNotesReceivableNet


In [8]:
df.to_csv('us_gaap_attributes.csv')