# Download Daily Prices, PE & PB data from all companies passing Defensive Investor Criteria

In [1]:
%load_ext autoreload
%autoreload 2

import sys
import pandas as pd
import numpy as np
import quandl
import os
import warnings
from datetime import datetime
from tqdm import tqdm_notebook as tqdm
import pandas_datareader as pdr
import matplotlib.pyplot as plt
from collections import defaultdict
from graham import *

warnings.filterwarnings('ignore')
quandl.ApiConfig.api_key = os.getenv('QUANDL_APY_KEY')

## List of ALL companies passing criteria
This just loads companies passing the criteria, saved in a pickle file by the end of [Evolution of companies meeting Defensive Investor criteria notebook](http://localhost:8888/notebooks/Evolution%20of%20companies%20meeting%20Defensive%20Investor%20criteria.ipynb). 

This is a very time consuming process:
- It takes aprox. **1'45"/date** to test the whole Sharadar common stocks' database of 14,000 companies
- This means, for past 10 years, to test all 14,000 companies on a monthly basis (12 x 120) **takes aprox 3h30min**

In [2]:
import pickle

def load_obj(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)
    
fname = os.path.join(os.getcwd(), 'data_defensive', 'defensive2_companies_20081231_20181231.pkl')
dic = load_obj(fname)

In [3]:
all_companies_passing_in_period = set()
for key, value in dic.items():
    print(f'{key:%Y-%m-%d}: {len(value)} companies pass criteria')
    for el in value:
        all_companies_passing_in_period.add(el)
        
all_companies_passing_in_period = list(all_companies_passing_in_period)
print(f'All period: {len(all_companies_passing_in_period)} companies pass criteria')

2008-12-31: 30 companies pass criteria
2009-01-31: 30 companies pass criteria
2009-02-28: 38 companies pass criteria
2009-03-31: 37 companies pass criteria
2009-04-30: 37 companies pass criteria
2009-05-31: 37 companies pass criteria
2009-06-30: 37 companies pass criteria
2009-07-31: 38 companies pass criteria
2009-08-31: 36 companies pass criteria
2009-09-30: 35 companies pass criteria
2009-10-31: 35 companies pass criteria
2009-11-30: 34 companies pass criteria
2009-12-31: 35 companies pass criteria
2010-01-31: 35 companies pass criteria
2010-02-28: 33 companies pass criteria
2010-03-31: 34 companies pass criteria
2010-04-30: 34 companies pass criteria
2010-05-31: 34 companies pass criteria
2010-06-30: 34 companies pass criteria
2010-07-31: 33 companies pass criteria
2010-08-31: 33 companies pass criteria
2010-09-30: 34 companies pass criteria
2010-10-31: 33 companies pass criteria
2010-11-30: 33 companies pass criteria
2010-12-31: 32 companies pass criteria
2011-01-31: 32 companies 

## Retrieve the data from web and save it on disc

In [4]:
def batch_download_price_pepb(list_companies, filename, start_date, end_date, batch_size=20):
    def batch(iterable, n=1):
        l = len(iterable)
        for ndx in range(0, l, n):
            yield iterable[ndx:min(ndx + n, l)]

    list_dfs = []
    i = 1
    for b in batch(list_companies, batch_size):
        print(f'Batch {i}: from {b[0]} to {b[-1]}')
        df1 = quandl.get_table('SHARADAR/SEP', ticker=b, date={'gte': start_date, 'lte': end_date}, paginate=True,
                            qopts={"columns":['ticker', 'date', 'open', 'high', 'low', 'close', 'volume']})

        df2 = quandl.get_table('SHARADAR/DAILY', ticker=b, date={'gte': start_date, 'lte': end_date}, paginate=True,
                                qopts={"columns":['ticker', 'date', 'pe', 'pb']})
        result = pd.merge(df1, df2, on=['ticker', 'date'])
        result.sort_values(['ticker', 'date'], inplace=True)

        list_dfs.append(result)
        i += 1

    final_df = pd.concat(list_dfs, axis=0)
    final_df.to_csv(filename)
    return final_df

In [8]:
dr = pd.date_range('2008-12-31', '2018-12-31', freq='M')
fname = os.path.join(os.getcwd(), 'data_defensive', 'defensive2_pricedata_20081231_20181231.csv')
df = batch_download_price_pepb(all_companies_passing_in_period, fname, start_date=dr[0], end_date=dr[-1])
df

Batch 1: from SEB to HRS
Batch 2: from FAST to UVV
Batch 3: from CW to HFC
Batch 4: from MCO to HRL
Batch 5: from LB to JWN
Batch 6: from EXPD to VZ


Unnamed: 0,ticker,date,open,high,low,close,volume,pe,pb
48463,ADM,2008-12-31,28.77,29.010,28.60,28.83,4250200.0,7.7,1.4
48462,ADM,2009-01-02,28.93,29.230,28.52,29.19,5808500.0,7.8,1.4
48461,ADM,2009-01-05,28.92,29.500,28.61,29.08,6677000.0,7.7,1.4
48460,ADM,2009-01-06,29.26,29.260,28.20,28.29,13002500.0,7.5,1.3
48459,ADM,2009-01-07,28.05,28.710,27.87,28.34,13265500.0,7.5,1.3
48458,ADM,2009-01-08,28.13,28.310,27.57,27.76,7361200.0,7.4,1.3
48457,ADM,2009-01-09,27.77,28.170,27.67,27.96,7054900.0,7.4,1.3
48456,ADM,2009-01-12,27.50,28.000,26.74,26.90,7366300.0,7.2,1.3
48455,ADM,2009-01-13,26.84,28.080,26.53,27.88,8891600.0,7.4,1.3
48454,ADM,2009-01-14,26.27,26.390,24.08,24.25,14894100.0,6.5,1.1


In [9]:
df['ticker'].nunique()

113