# Import working libraries

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import pandas_datareader as pdr

In [4]:
import datetime as dt

In [5]:
import matplotlib.pyplot as plt

In [6]:
import bs4 as bs
import pickle
import requests

In [7]:
import unicodedata as un 

# Import Data

## Import Dow Jones as data

In [8]:
startdate=dt.datetime(2015,1,1)
enddate=dt.datetime(2020,12,21)

tickers=["DJIA"]
data=pdr.get_data_yahoo(tickers, start=startdate, end=enddate)
data

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,DJIA,DJIA,DJIA,DJIA,DJIA,DJIA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2015-01-02,17832.990234,17832.990234,18008.300781,17697.250000,17837.289062,2708700000
2015-01-05,17501.650391,17501.650391,17820.919922,17446.789062,17820.919922,3799120000
2015-01-06,17371.640625,17371.640625,17638.919922,17236.769531,17504.179688,4460110000
2015-01-07,17584.519531,17584.519531,17660.080078,17396.099609,17396.099609,3805480000
2015-01-08,17907.869141,17907.869141,17956.839844,17591.970703,17591.970703,3934010000
...,...,...,...,...,...,...
2020-12-15,30199.310547,30199.310547,30243.259766,29894.949219,29919.089844,4360280000
2020-12-16,30154.539062,30154.539062,30236.029297,30080.109375,30191.380859,4056950000
2020-12-17,30303.369141,30303.369141,30323.779297,30216.000000,30216.000000,4184930000
2020-12-18,30179.050781,30179.050781,30343.589844,30029.439453,30314.259766,7068340000


## Import Dow Jones companies

In [9]:
# get unformatted dow company tickers
def get_dow_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average#Components')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[2].text
        tickers.append(ticker)   
    return tickers

In [10]:
get_dow_tickers()

['NYSE:\xa0MMM\n',
 'NYSE:\xa0AXP\n',
 'AMGN\n',
 'AAPL\n',
 'NYSE:\xa0BA\n',
 'NYSE:\xa0CAT\n',
 'NYSE:\xa0CVX\n',
 'CSCO\n',
 'NYSE:\xa0KO\n',
 'NYSE:\xa0DOW\n',
 'NYSE:\xa0GS\n',
 'NYSE:\xa0HD\n',
 'NYSE:\xa0HON\n',
 'NYSE:\xa0IBM\n',
 'INTC\n',
 'NYSE:\xa0JNJ\n',
 'NYSE:\xa0JPM\n',
 'NYSE:\xa0MCD\n',
 'NYSE:\xa0MRK\n',
 'MSFT\n',
 'NYSE:\xa0NKE\n',
 'NYSE:\xa0PG\n',
 'NYSE:\xa0CRM\n',
 'NYSE:\xa0TRV\n',
 'NYSE:\xa0UNH\n',
 'NYSE:\xa0VZ\n',
 'NYSE:\xa0V\n',
 'WBA\n',
 'NYSE:\xa0WMT\n',
 'NYSE:\xa0DIS\n']

In [11]:
# format tickers to usable format
def save_dow_tickers():
  get_dow_tickers()
  ticks=[]
  for element in get_dow_tickers():
    element=un.normalize("NFKD", element)
    if element[0:5] == "NYSE:":
      ticker2=element[6:-1]
      ticks.append(ticker2)
    else:
      ticks.append(element[:-1])
    with open("dowtickers.pickle","wb") as f:
      pickle.dump(ticks,f)
  return ticks
save_dow_tickers()

['MMM',
 'AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CVX',
 'CSCO',
 'KO',
 'DOW',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'MCD',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'CRM',
 'TRV',
 'UNH',
 'VZ',
 'V',
 'WBA',
 'WMT',
 'DIS']

In [12]:
startdate=dt.datetime(2015,1,1)
enddate=dt.datetime(2020,12,21)

tickers=save_dow_tickers()
data_all=pdr.get_data_yahoo(tickers, start=startdate, end=enddate)
data_all

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Symbols,MMM,AXP,AMGN,AAPL,BA,CAT,CVX,CSCO,KO,DOW,...,NKE,PG,CRM,TRV,UNH,VZ,V,WBA,WMT,DIS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-01-02,138.201538,84.234779,136.289856,24.898552,112.786011,75.253960,86.833397,22.780064,34.617867,,...,4985800.0,7251400.0,2796400.0,1270800.0,3060900.0,11421200.0,8389600.0,3938500.0,4501800.0,5865400.0
2015-01-05,135.084717,82.007118,134.670273,24.197117,112.004875,71.281593,83.362534,22.326277,34.617867,,...,6889200.0,8626100.0,3688200.0,1728700.0,4679000.0,18964500.0,12751200.0,4767900.0,6979000.0,7789400.0
2015-01-06,133.644241,80.259384,130.331604,24.199400,110.685638,70.822929,83.323952,22.318022,34.880741,,...,7576000.0,7791200.0,3174900.0,2350900.0,3468300.0,22950100.0,11070000.0,4881600.0,8205100.0,6793100.0
2015-01-07,134.612991,82.012276,134.883408,24.538729,112.404106,71.920425,83.254547,22.524290,35.316135,,...,7256000.0,5986600.0,4284300.0,1558200.0,3225800.0,20793600.0,9346800.0,5672100.0,8498400.0,6589500.0
2015-01-08,137.839325,83.174782,134.397507,25.481558,114.391670,72.657578,85.159653,22.697554,35.743309,,...,5978200.0,6823300.0,3756600.0,1941200.0,5346100.0,17617500.0,10443200.0,4083900.0,12713600.0,7579400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-15,174.679993,118.683838,230.529999,127.879997,229.500000,181.740005,89.370003,44.360241,53.840000,54.130001,...,7642600.0,5398200.0,10775800.0,975600.0,2616900.0,15877200.0,9799000.0,5461000.0,10631500.0,18817200.0
2020-12-16,175.410004,118.584190,228.440002,127.809998,225.869995,179.500000,88.690002,44.360241,53.060001,53.830002,...,6573400.0,5382500.0,8821400.0,1253300.0,2365600.0,17165900.0,7504400.0,7870700.0,8550300.0,11105800.0
2020-12-17,176.649994,118.564262,231.240005,128.699997,221.240005,179.169998,88.410004,44.459435,53.270000,53.830002,...,8727000.0,7405300.0,9946900.0,1428800.0,2525700.0,12095500.0,6517200.0,5612700.0,10226500.0,9004600.0
2020-12-18,176.419998,117.099396,228.490005,126.660004,219.750000,180.960007,87.190002,45.074448,53.740002,55.660000,...,17970800.0,19988800.0,11845400.0,3347100.0,8742700.0,31909400.0,13298200.0,11942400.0,13794700.0,21172100.0
