In [1]:
from capiq_excel import download_data 
import pandas as pd 
import numpy as np 
import requests 
from bs4 import BeautifulSoup
import plotly.graph_objects as go

url = '''https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'''

ind_feats = pd.read_excel('independent_features.xlsx') 
ind_feats.head()

Unnamed: 0,feature_name,feature_code,Unnamed: 2
0,net income,IQ_NI_CF,
1,total revenue,IQ_TOTAL_REV,
2,accounts receivable,IQ_AR,
3,Gross Profit,IQ_GP,
4,Total Current Assets,IQ_TOTAL_CA,


In [2]:
html = requests.get(url = url) 
soup = BeautifulSoup(html.content, 'html.parser')
table = soup.find_all('table')[0]

headers = [] 
for i in table.find_all('th'): 
    ## 
    
    headers.append(i.getText()) 
headers

['Symbol\n',
 'Security',
 'SEC filings',
 'GICS Sector',
 'GICS Sub-Industry',
 'Headquarters Location',
 'Date first added',
 'CIK',
 'Founded\n']

In [3]:
trs = [] 

for tr in table.find_all('tr'): 
    ## Extract all the table rows 
    
    
    elems = [] 
    for elem in tr.find_all('td'): 
        ## Extract elems from each row
        
        elems.append(elem.getText()) 
    
    trs.append(elems)

trs = trs[1:]

In [4]:
sp500_names = pd.DataFrame(trs, columns = headers)
sp500_names.head()

Unnamed: 0,Symbol\n,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded\n
0,MMM\n,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",1976-08-09,66740,1902\n
1,ABT\n,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888\n
2,ABBV\n,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)\n
3,ABMD\n,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981\n
4,ACN\n,Accenture,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989\n


In [5]:
sp500_names.columns = ['symbol', 'security', 'sec_filings', 'gics_sector', 
                'gics_sub_industry', 'headquarters_loc', 'date_added', 
                'cik', 'founded']

sp500_names.symbol = sp500_names.symbol.str.replace('\n', '')




sp500_names.head()

Unnamed: 0,symbol,security,sec_filings,gics_sector,gics_sub_industry,headquarters_loc,date_added,cik,founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",1976-08-09,66740,1902\n
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888\n
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)\n
3,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981\n
4,ACN,Accenture,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989\n


In [6]:
sp500_names.symbol.to_list()

['MMM',
 'ABT',
 'ABBV',
 'ABMD',
 'ACN',
 'ATVI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALXN',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BKR',
 'BLL',
 'BAC',
 'BK',
 'BAX',
 'BDX',
 'BRK.B',
 'BBY',
 'BIO',
 'BIIB',
 'BLK',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BF.B',
 'CHRW',
 'COG',
 'CDNS',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'CNC',
 'CNP',
 'CERN',
 'CF',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA',
 'CMA',
 'CAG',
 'COP',
 'ED'

In [7]:
ind_feats.feature_code.to_list()[4:7]

['IQ_TOTAL_CA', 'IQ_TOTAL_ASSETS', 'IQ_AP']

In [None]:



download_data(sp500_names.symbol.to_list(), ## Ticker Symbol Names
              ind_feats.feature_code.to_list(), ## Independent Features
              freq = 'Q', 
              num_periods = 4 * 10
) 




Creating XLSX files with commands to get ids
Populating XLSX files for ids
Completed 4/5 (80%) Estimated finish: 2021-03-02 23:12:08.795972