# Download Quandl data to apply Defensive Investor's criteria

In [103]:
%load_ext autoreload
%autoreload 2

import sys
import pandas as pd
import numpy as np
import quandl
import os
import warnings
from datetime import datetime
from tqdm import tqdm_notebook as tqdm
import pandas_datareader as pdr
import matplotlib.pyplot as plt
from graham import *

warnings.filterwarnings('ignore')
quandl.ApiConfig.api_key = os.getenv('QUANDL_APY_KEY')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## DJIA 30 companies

In [17]:
list_djia = get_djia_symbols()

In [31]:
fname = os.path.join(os.getcwd(), 'data_defensive', 'djia.csv')

columns = ['ticker', 'calendardate', 'datekey', 'revenueusd', 'currentratio', 'eps', 'dps', 'bvps', 'price']
df = quandl.get_table('SHARADAR/SF1', dimension='ARY', ticker=list_djia, qopts={"columns":columns}, 
                            paginate=True)
df.sort_values(['ticker', 'calendardate'], inplace=True)
df.to_csv(fname, index=False)

## S&P 500 companies

In [60]:
list_sp500 = get_sp500_symbols()
len(list_sp500)

505

In [62]:
fname = os.path.join(os.getcwd(), 'data_defensive', 'sp500.csv')


def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

        
list_dfs = []
i = 1
for x in batch(list_sp500, 200):
    print(f'Batch {i}: from {x[0]} to {x[-1]}')
    columns = ['ticker', 'calendardate', 'datekey', 'revenueusd', 'currentratio', 'eps', 'dps', 'bvps', 'price']
    df = quandl.get_table('SHARADAR/SF1', dimension='ARY', ticker=x, qopts={"columns":columns}, 
                          paginate=True)
    df.sort_values(['ticker', 'calendardate'], inplace=True)
    list_dfs.append(df)
    i += 1

final_df = pd.concat(list_dfs, axis=0)
final_df.to_csv(fname, index=False)

Batch 1: from MMM to FLIR
Batch 2: from FLS to REGN
Batch 3: from RF to ZTS


In [63]:
final_df['ticker'].nunique()

495

## All Quandl SHARADAR/SF1 companies

In [104]:
sf1_df = quandl.get_table('SHARADAR/TICKERS', table='SF1', paginate=True)
sf1_df.head()

Unnamed: 0_level_0,table,permaticker,ticker,name,exchange,isdelisted,category,cusips,siccode,sicsector,...,currency,location,lastupdated,firstadded,firstpricedate,lastpricedate,firstquarter,lastquarter,secfilings,companysite
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,SF1,196290,A,Agilent Technologies Inc,NYSE,N,Domestic,00846U101,3826.0,Manufacturing,...,USD,California U.S.A,2018-12-20,2014-09-26,1999-11-18,2019-01-28,1997-06-30,2018-09-30,https://www.sec.gov/cgi-bin/browse-edgar?actio...,http://www.agilent.com
1,SF1,124392,AA,Alcoa Corp,NYSE,N,Domestic,013872106,3350.0,Manufacturing,...,USD,New York U.S.A,2018-11-02,2016-11-01,2016-11-01,2019-01-28,2014-12-31,2018-09-30,https://www.sec.gov/cgi-bin/browse-edgar?actio...,http://www.alcoa.com
2,SF1,122827,AAAB,Admiralty Bancorp Inc,NASDAQ,Y,Domestic,007231103,6022.0,Finance Insurance And Real Estate,...,USD,Florida U.S.A,2018-10-16,2017-09-09,1998-09-28,2003-01-29,1997-09-30,2002-09-30,https://www.sec.gov/cgi-bin/browse-edgar?actio...,
3,SF1,120538,AAAGY,Altana Aktiengesellschaft,NYSE,Y,ADR,02143N103,2834.0,Manufacturing,...,EUR,Jordan,2018-02-13,2018-02-13,2002-05-22,2010-08-12,2000-12-31,2005-12-31,https://www.sec.gov/cgi-bin/browse-edgar?actio...,
4,SF1,155760,AAAP,Advanced Accelerator Applications SA,NASDAQ,Y,ADR,00790T100,2834.0,Manufacturing,...,EUR,France,2017-07-17,2016-05-19,2015-11-11,2018-02-09,2013-12-31,2016-12-31,https://www.sec.gov/cgi-bin/browse-edgar?actio...,


In [105]:
# Checking survival bias
print(f"{sf1_df[(sf1_df['table'] == 'SF1') & (sf1_df['isdelisted'] == 'N')].shape[0]:,} " \
      f"companies alive from {sf1_df[sf1_df['table'] == 'SF1'].shape[0]:,} total included, table SF1")

5,260 companies alive from 14,121 total included, table SF1


In [106]:
fname = os.path.join(os.getcwd(), 'data_defensive', 'sf1.csv')

list_sf1 = sf1_df['ticker'].tolist()
list_dfs = []
i = 1
for x in batch(list_sf1, 200):
    print(f'Batch {i}: from {x[0]} to {x[-1]}')
    columns = ['ticker', 'calendardate', 'datekey', 'revenueusd', 'currentratio', 'eps', 'dps', 'bvps', 'price']
    df = quandl.get_table('SHARADAR/SF1', dimension='ARY', ticker=x, qopts={"columns":columns}, 
                          paginate=True)
    df.sort_values(['ticker', 'calendardate'], inplace=True)
    list_dfs.append(df)
    i += 1

final_df = pd.concat(list_dfs, axis=0)
final_df.to_csv(fname, index=False)

Batch 1: from A to ACY
Batch 2: from AD to AHL
Batch 3: from AHL1 to AMCX
Batch 4: from AMD to APMC
Batch 5: from APN to ASML
Batch 6: from ASN to AVTM
Batch 7: from AVV to BDI
Batch 8: from BDIC to BL
Batch 9: from BL1 to BRLS
Batch 10: from BRN to CAGZ
Batch 11: from CAH to CCGM
Batch 12: from CCH to CFSG
Batch 13: from CFSI to CLA
Batch 14: from CLAC to CNET1
Batch 15: from CNF to CPP
Batch 16: from CPPL to CTB
Batch 17: from CTBC to CYM
Batch 18: from CYMI to DHR
Batch 19: from DHSM1 to DSIT
Batch 20: from DSKE to EDMC
Batch 21: from EDNT to ENHT
Batch 22: from ENIA to ET1
Batch 23: from ETEC to FCAP
Batch 24: from FCAU to FLD1
Batch 25: from FLDM to FRO1
Batch 26: from FRP to GBSE
Batch 27: from GBSN to GLMD
Batch 28: from GLNG to GSHD
Batch 29: from GSIC to HCOR1
Batch 30: from HCOW to HOLX
Batch 31: from HOMB to IAAI
Batch 32: from IAC to IHT
Batch 33: from IICR to INTS
Batch 34: from INTT to IVZ
Batch 35: from IWA to KBR
Batch 36: from KBSF to KTN1
Batch 37: from KTO to LIA
Bat

In [107]:
final_df

Unnamed: 0_level_0,ticker,calendardate,datekey,revenueusd,currentratio,eps,dps,bvps,price
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1694,A,1999-12-31,2000-01-25,8.331000e+09,2.105,1.35,0.000,8.900,67.690
1693,A,2000-12-31,2001-01-17,1.077300e+10,2.050,1.68,0.000,11.648,61.940
1691,A,2001-12-31,2002-02-01,8.396000e+09,2.397,0.38,0.000,12.275,29.610
1692,A,2001-12-31,2002-01-22,8.396000e+09,2.397,0.38,0.000,12.275,27.160
1690,A,2002-12-31,2002-12-20,6.010000e+09,2.238,-2.22,0.000,9.951,18.000
1689,A,2003-12-31,2003-12-22,6.056000e+09,2.040,-4.35,0.000,5.933,28.370
1688,A,2004-12-31,2004-12-21,7.181000e+09,2.446,0.72,0.000,7.344,23.820
1687,A,2005-12-31,2006-01-17,5.139000e+09,2.297,0.66,0.000,8.162,33.760
1686,A,2006-12-31,2006-12-22,4.973000e+09,2.573,7.67,0.000,8.898,33.720
1685,A,2007-12-31,2007-12-21,5.420000e+09,2.207,1.62,0.000,8.601,37.190


In [83]:
116720 / 14121

8.265703562070675

In [94]:
import random

random_company = random.choice(list_sf1)
print(random_company)
df_random_company = get_data(random_company)
df_random_company

MSG
Retrieving from file


Unnamed: 0,calendardate,datekey,revenueusd,currentratio,eps,dps,bvps,price
69778,2014-12-31,2015-03-27,913615000.0,0.288,,0.0,,80.73
69781,2015-12-31,2015-08-21,1071551000.0,0.316,,0.0,,72.03
69782,2016-12-31,2016-08-19,1115311000.0,2.609,-3.12,0.0,105.65,184.19
69783,2017-12-31,2017-08-17,1318452000.0,1.902,-3.05,0.0,102.218,208.15
69784,2018-12-31,2018-08-17,1559095000.0,1.849,5.99,0.0,107.083,298.17
