# Download Daily Prices, PE & PB data from all companies passing Defensive Investor Criteria

In [3]:
%load_ext autoreload
%autoreload 2

import sys
import pandas as pd
import numpy as np
import quandl
import os
import warnings
from datetime import datetime
from tqdm import tqdm_notebook as tqdm
import pandas_datareader as pdr
import matplotlib.pyplot as plt
from collections import defaultdict
from graham import *

warnings.filterwarnings('ignore')
quandl.ApiConfig.api_key = os.getenv('QUANDL_APY_KEY')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## List of ALL companies passing criteria
This just loads companies passing the criteria, saved in a pickle file by the end of [Evolution of companies meeting Defensive Investor criteria notebook](http://localhost:8888/notebooks/Evolution%20of%20companies%20meeting%20Defensive%20Investor%20criteria.ipynb). 

This is a very time consuming process:
- It takes aprox. **1'45"/date** to test the whole Sharadar common stocks' database of 14,000 companies
- This means, for past 10 years, to test all 14,000 companies on a monthly basis (12 x 120) **takes aprox 3h30min**

In [23]:
import pickle

def load_obj(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)
    
fname = os.path.join(os.getcwd(), 'data_defensive', 'defensive1_companies_20081231_20181231.pkl')
dic = load_obj(fname)

In [24]:
all_companies_passing_in_period = set()
for key, value in dic.items():
    print(f'{key:%Y-%m-%d}: {len(value)} companies pass criteria')
    for el in value:
        all_companies_passing_in_period.add(el)
        
all_companies_passing_in_period = list(all_companies_passing_in_period)
print(f'All period: {len(all_companies_passing_in_period)} companies pass criteria')

2008-12-31: 63 companies pass criteria
2009-01-31: 63 companies pass criteria
2009-02-28: 73 companies pass criteria
2009-03-31: 73 companies pass criteria
2009-04-30: 73 companies pass criteria
2009-05-31: 74 companies pass criteria
2009-06-30: 75 companies pass criteria
2009-07-31: 76 companies pass criteria
2009-08-31: 73 companies pass criteria
2009-09-30: 74 companies pass criteria
2009-10-31: 73 companies pass criteria
2009-11-30: 74 companies pass criteria
2009-12-31: 75 companies pass criteria
2010-01-31: 75 companies pass criteria
2010-02-28: 79 companies pass criteria
2010-03-31: 78 companies pass criteria
2010-04-30: 77 companies pass criteria
2010-05-31: 77 companies pass criteria
2010-06-30: 76 companies pass criteria
2010-07-31: 75 companies pass criteria
2010-08-31: 75 companies pass criteria
2010-09-30: 74 companies pass criteria
2010-10-31: 74 companies pass criteria
2010-11-30: 75 companies pass criteria
2010-12-31: 73 companies pass criteria
2011-01-31: 73 companies 

## Retrieve the data from web and save it on disc

In [25]:
def batch_download_price_pepb(list_companies, filename, start_date, end_date, batch_size=20):
    def batch(iterable, n=1):
        l = len(iterable)
        for ndx in range(0, l, n):
            yield iterable[ndx:min(ndx + n, l)]

    list_dfs = []
    i = 1
    for b in batch(list_companies, batch_size):
        print(f'Batch {i}: from {b[0]} to {b[-1]}')
        df1 = quandl.get_table('SHARADAR/SEP', ticker=b, date={'gte': start_date, 'lte': end_date}, paginate=True,
                            qopts={"columns":['ticker', 'date', 'open', 'high', 'low', 'close', 'volume']})

        df2 = quandl.get_table('SHARADAR/DAILY', ticker=b, date={'gte': start_date, 'lte': end_date}, paginate=True,
                                qopts={"columns":['ticker', 'date', 'pe', 'pb']})
        result = pd.merge(df1, df2, on=['ticker', 'date'])
        result.sort_values(['ticker', 'date'], inplace=True)

        list_dfs.append(result)
        i += 1

    final_df = pd.concat(list_dfs, axis=0)
    final_df.to_csv(filename)
    return final_df

In [26]:
fname = os.path.join(os.getcwd(), 'data_defensive', 'defensive1_pricedata_20081231_20181231.csv')
df = batch_download_price_pepb(all_companies_passing_in_period, fname, start_date=dr[0], end_date=dr[-1])
df

Batch 1: from SIAL to CLC
Batch 2: from BMY to BRC
Batch 3: from SGC to MGEE
Batch 4: from WERN to CAG
Batch 5: from TWIN to UFPI
Batch 6: from THO to WSO
Batch 7: from VGR to LANC
Batch 8: from BCR to PCP
Batch 9: from SEIC to BDX
Batch 10: from BEN to ODC


Unnamed: 0,ticker,date,open,high,low,close,volume,pe,pb
44539,ACO,2008-12-31,21.41,21.700,20.780,20.95,166814.0,11.4,1.7
44538,ACO,2009-01-02,21.12,22.570,20.860,22.27,142691.0,12.1,1.8
44537,ACO,2009-01-05,22.88,22.940,22.080,22.31,141967.0,12.2,1.8
44536,ACO,2009-01-06,22.50,24.240,22.450,23.45,178156.0,12.8,1.9
44535,ACO,2009-01-07,23.00,23.270,21.580,21.86,104642.0,11.9,1.7
44534,ACO,2009-01-08,21.78,22.740,21.680,22.73,122623.0,12.4,1.8
44533,ACO,2009-01-09,22.57,22.570,20.500,20.59,149614.0,11.2,1.6
44532,ACO,2009-01-12,20.50,20.530,19.110,19.37,193444.0,10.5,1.5
44531,ACO,2009-01-13,19.31,20.100,19.160,19.88,169703.0,10.8,1.6
44530,ACO,2009-01-14,19.59,20.060,18.370,18.57,195881.0,10.1,1.5


In [27]:
df['ticker'].nunique()

194