In [1]:
# declare a list tasks whose products you want to use as inputs
upstream = ['fetch_all_securities_list', 'fetch_snp500_securities_list']

In [2]:
# Parameters
sec_base_url = "https://data.sec.gov/submissions/CIK{cik_str}.json"
sample_pct = 1
snp_filter = False
upstream = {
    "fetch_snp500_securities_list": {
        "nb": "/Users/aiujdm2/market_watch/output/notebooks/fetch_snp500_securities_list.ipynb",
        "data": "/Users/aiujdm2/market_watch/output/data/raw/snp500_securities_list.csv",
    },
    "fetch_all_securities_list": {
        "nb": "/Users/aiujdm2/market_watch/output/notebooks/fetch_all_securities_list.ipynb",
        "data": "/Users/aiujdm2/market_watch/output/data/raw/all_securities_list.csv",
    },
}
product = {
    "nb": "/Users/aiujdm2/market_watch/output/notebooks/fetch_securities.ipynb",
    "data": "/Users/aiujdm2/market_watch/output/data/raw/securities.csv",
}


In [3]:
import pandas as pd
import json
import requests
import time 
from pathlib import Path
from tqdm import tqdm
from src.utils import preprocess_text

Get list of publicaly traded companies, ticker and associated cik ID. We will use cik ID to pull data from SEC's [DATA APIS](https://www.sec.gov/edgar/sec-api-documentation)

In [4]:
traded_company_df = pd.read_csv(upstream["fetch_all_securities_list"]["data"], index_col=0)
snp500_company_df = pd.read_csv(upstream["fetch_snp500_securities_list"]["data"], index_col=0)

# SEC data API needs cik number to be padded by 0's to make it a length of 10 string
traded_company_df['cik'] = traded_company_df.cik_str
traded_company_df.cik_str = traded_company_df.cik_str.astype(str).str.zfill(10)
snp500_company_df['cik_str'] = snp500_company_df.CIK.astype(str).str.zfill(10)

Download dataset for publicly traded companies incl. metadata like previous name etc. See the [DEVELOPER FAQ](https://www.sec.gov/os/webmaster-faq#developers) to get more infomration about rate limit and API call requirements.

In [5]:
# snp_filter parameter controls if we want to fetch all master publicly traded securities or just the snp500 ones
if snp_filter == True:
    traded_companies = snp500_company_df.cik_str
else:
    # sample_pct is helpful parameter to help speed up development by selecting only a fraction of securities out of 12,227 master securities       
    traded_companies = traded_company_df.cik_str.sample(frac=sample_pct)

Let's Fetch SEC submission data. This API has 10 request per second limit. so it will take about 1 hour to finish.preprocess_text

In [6]:
# SEC data API requires explicit setting of User-agent as email address, otehrwise we will get 404 error
headers = {
  'User-Agent': 'vgaurav@umich.edu'
}

df_lst = []
counter = 0
#  let's reuse connection pool by using same session.
session = requests.Session()

for cik_str in tqdm(traded_companies):
    url = sec_base_url.format(cik_str = cik_str)
    # we could have used following code, but it does not work due to nested json format of payload
    # pd.read_json(url, stosample_pctptions = {'User-Agent': 'vgaurav@umich.edu'})
    response = session.get(url, headers=headers)
    data = json.loads(response.text)
    df_lst.append(pd.json_normalize(data))
    counter += 1
    # SEC API restricts 10 API request per second, so we will use counter and time to meet this restriction
    if counter % 10 == 0:
        time.sleep(1)
        
securities_df = pd.concat(df_lst)

  0%|                                                                                                                                                                                        | 0/9213 [00:00<?, ?it/s]

  0%|                                                                                                                                                                                | 1/9213 [00:00<31:10,  4.92it/s]

  0%|                                                                                                                                                                                | 2/9213 [00:00<24:34,  6.25it/s]

  0%|                                                                                                                                                                                | 4/9213 [00:00<21:04,  7.28it/s]

  0%|                                                                                                                                                                                | 5/9213 [00:00<21:02,  7.29it/s]

  0%|                                                                                                                                                                                | 6/9213 [00:00<19:49,  7.74it/s]

  0%|▏                                                                                                                                                                               | 7/9213 [00:01<24:31,  6.25it/s]

  0%|▏                                                                                                                                                                               | 8/9213 [00:01<23:42,  6.47it/s]

  0%|▏                                                                                                                                                                               | 9/9213 [00:01<22:56,  6.69it/s]

  0%|▏                                                                                                                                                                            | 10/9213 [00:02<1:09:36,  2.20it/s]

  0%|▏                                                                                                                                                                              | 12/9213 [00:02<43:48,  3.50it/s]

  0%|▎                                                                                                                                                                              | 14/9213 [00:02<32:58,  4.65it/s]

  0%|▎                                                                                                                                                                              | 16/9213 [00:03<25:43,  5.96it/s]

  0%|▎                                                                                                                                                                              | 17/9213 [00:03<24:54,  6.15it/s]

  0%|▎                                                                                                                                                                              | 18/9213 [00:03<23:09,  6.62it/s]

  0%|▎                                                                                                                                                                              | 19/9213 [00:03<22:46,  6.73it/s]

  0%|▍                                                                                                                                                                            | 20/9213 [00:04<1:00:25,  2.54it/s]

  0%|▍                                                                                                                                                                              | 22/9213 [00:04<41:15,  3.71it/s]

  0%|▍                                                                                                                                                                              | 24/9213 [00:04<31:18,  4.89it/s]

  0%|▍                                                                                                                                                                              | 25/9213 [00:05<29:03,  5.27it/s]

  0%|▌                                                                                                                                                                              | 27/9213 [00:05<25:03,  6.11it/s]

  0%|▌                                                                                                                                                                              | 29/9213 [00:05<20:51,  7.34it/s]

  0%|▌                                                                                                                                                                              | 30/9213 [00:06<49:28,  3.09it/s]

  0%|▌                                                                                                                                                                              | 31/9213 [00:06<45:56,  3.33it/s]

  0%|▌                                                                                                                                                                              | 32/9213 [00:06<40:48,  3.75it/s]

  0%|▋                                                                                                                                                                              | 33/9213 [00:07<39:09,  3.91it/s]

  0%|▋                                                                                                                                                                              | 34/9213 [00:07<41:17,  3.70it/s]

  0%|▋                                                                                                                                                                              | 36/9213 [00:07<28:36,  5.35it/s]

  0%|▋                                                                                                                                                                              | 37/9213 [00:07<28:49,  5.31it/s]

  0%|▋                                                                                                                                                                              | 39/9213 [00:08<26:30,  5.77it/s]

  0%|▊                                                                                                                                                                              | 40/9213 [00:09<58:53,  2.60it/s]

  0%|▊                                                                                                                                                                              | 41/9213 [00:09<56:14,  2.72it/s]

  0%|▊                                                                                                                                                                              | 42/9213 [00:09<49:39,  3.08it/s]

  0%|▊                                                                                                                                                                              | 43/9213 [00:09<44:51,  3.41it/s]

  0%|▊                                                                                                                                                                              | 44/9213 [00:10<38:40,  3.95it/s]

  0%|▊                                                                                                                                                                              | 45/9213 [00:10<39:34,  3.86it/s]

  0%|▊                                                                                                                                                                              | 46/9213 [00:10<41:11,  3.71it/s]

  1%|▉                                                                                                                                                                              | 47/9213 [00:10<35:57,  4.25it/s]

  1%|▉                                                                                                                                                                              | 49/9213 [00:11<28:31,  5.35it/s]

  1%|▉                                                                                                                                                                            | 50/9213 [00:12<1:03:02,  2.42it/s]

  1%|▉                                                                                                                                                                              | 51/9213 [00:12<54:00,  2.83it/s]

  1%|▉                                                                                                                                                                              | 52/9213 [00:12<46:28,  3.29it/s]

  1%|█                                                                                                                                                                              | 53/9213 [00:12<40:29,  3.77it/s]

  1%|█                                                                                                                                                                              | 54/9213 [00:12<34:31,  4.42it/s]

  1%|█                                                                                                                                                                              | 55/9213 [00:12<29:00,  5.26it/s]

  1%|█                                                                                                                                                                              | 56/9213 [00:13<26:56,  5.66it/s]

  1%|█                                                                                                                                                                              | 57/9213 [00:13<26:02,  5.86it/s]

  1%|█                                                                                                                                                                              | 58/9213 [00:13<23:39,  6.45it/s]

  1%|█                                                                                                                                                                              | 59/9213 [00:13<22:31,  6.77it/s]

  1%|█▏                                                                                                                                                                           | 60/9213 [00:14<1:06:01,  2.31it/s]

  1%|█▏                                                                                                                                                                             | 61/9213 [00:14<51:39,  2.95it/s]

  1%|█▏                                                                                                                                                                             | 63/9213 [00:14<34:11,  4.46it/s]

  1%|█▏                                                                                                                                                                             | 64/9213 [00:15<29:57,  5.09it/s]

  1%|█▏                                                                                                                                                                             | 65/9213 [00:15<27:08,  5.62it/s]

  1%|█▎                                                                                                                                                                             | 66/9213 [00:15<26:47,  5.69it/s]

  1%|█▎                                                                                                                                                                             | 68/9213 [00:15<20:58,  7.27it/s]

  1%|█▎                                                                                                                                                                             | 69/9213 [00:15<21:39,  7.04it/s]

  1%|█▎                                                                                                                                                                             | 70/9213 [00:16<58:14,  2.62it/s]

  1%|█▎                                                                                                                                                                             | 71/9213 [00:16<46:56,  3.25it/s]

  1%|█▍                                                                                                                                                                             | 73/9213 [00:17<31:56,  4.77it/s]

  1%|█▍                                                                                                                                                                             | 75/9213 [00:17<26:00,  5.86it/s]

  1%|█▍                                                                                                                                                                             | 76/9213 [00:17<25:36,  5.95it/s]

  1%|█▍                                                                                                                                                                             | 77/9213 [00:17<25:05,  6.07it/s]

  1%|█▍                                                                                                                                                                             | 78/9213 [00:17<23:35,  6.45it/s]

  1%|█▌                                                                                                                                                                             | 79/9213 [00:17<22:49,  6.67it/s]

  1%|█▌                                                                                                                                                                           | 80/9213 [00:18<1:01:51,  2.46it/s]

  1%|█▌                                                                                                                                                                             | 81/9213 [00:19<49:41,  3.06it/s]

  1%|█▌                                                                                                                                                                             | 82/9213 [00:19<40:31,  3.76it/s]

  1%|█▌                                                                                                                                                                             | 83/9213 [00:19<34:48,  4.37it/s]

  1%|█▌                                                                                                                                                                             | 85/9213 [00:19<25:30,  5.97it/s]

  1%|█▋                                                                                                                                                                             | 87/9213 [00:19<20:50,  7.30it/s]

  1%|█▋                                                                                                                                                                             | 88/9213 [00:19<19:49,  7.67it/s]

  1%|█▋                                                                                                                                                                             | 90/9213 [00:20<47:08,  3.23it/s]

  1%|█▋                                                                                                                                                                             | 91/9213 [00:21<41:31,  3.66it/s]

  1%|█▋                                                                                                                                                                             | 92/9213 [00:21<36:55,  4.12it/s]

  1%|█▊                                                                                                                                                                             | 93/9213 [00:21<32:15,  4.71it/s]

  1%|█▊                                                                                                                                                                             | 95/9213 [00:21<26:30,  5.73it/s]

  1%|█▊                                                                                                                                                                             | 97/9213 [00:21<21:25,  7.09it/s]

  1%|█▉                                                                                                                                                                             | 99/9213 [00:21<20:24,  7.44it/s]

  1%|█▉                                                                                                                                                                            | 100/9213 [00:23<50:13,  3.02it/s]

  1%|█▉                                                                                                                                                                            | 101/9213 [00:23<44:09,  3.44it/s]

  1%|█▉                                                                                                                                                                            | 102/9213 [00:23<38:18,  3.96it/s]

  1%|█▉                                                                                                                                                                            | 103/9213 [00:23<40:39,  3.73it/s]

  1%|█▉                                                                                                                                                                            | 104/9213 [00:23<39:43,  3.82it/s]

  1%|█▉                                                                                                                                                                            | 105/9213 [00:24<33:46,  4.49it/s]

  1%|██                                                                                                                                                                            | 106/9213 [00:24<29:46,  5.10it/s]

  1%|██                                                                                                                                                                            | 108/9213 [00:24<22:41,  6.69it/s]

  1%|██                                                                                                                                                                            | 110/9213 [00:25<50:23,  3.01it/s]

  1%|██                                                                                                                                                                            | 111/9213 [00:25<45:21,  3.34it/s]

  1%|██▏                                                                                                                                                                           | 113/9213 [00:26<34:14,  4.43it/s]

  1%|██▏                                                                                                                                                                           | 115/9213 [00:26<27:30,  5.51it/s]

  1%|██▏                                                                                                                                                                           | 116/9213 [00:26<25:01,  6.06it/s]

  1%|██▏                                                                                                                                                                           | 117/9213 [00:26<32:18,  4.69it/s]

  1%|██▏                                                                                                                                                                           | 118/9213 [00:26<31:23,  4.83it/s]

  1%|██▎                                                                                                                                                                           | 120/9213 [00:28<58:07,  2.61it/s]

  1%|██▎                                                                                                                                                                           | 121/9213 [00:28<50:17,  3.01it/s]

  1%|██▎                                                                                                                                                                           | 122/9213 [00:28<41:56,  3.61it/s]

  1%|██▎                                                                                                                                                                           | 124/9213 [00:28<29:13,  5.18it/s]

  1%|██▍                                                                                                                                                                           | 126/9213 [00:29<34:15,  4.42it/s]

  1%|██▍                                                                                                                                                                           | 127/9213 [00:29<39:56,  3.79it/s]

  1%|██▍                                                                                                                                                                           | 129/9213 [00:29<31:01,  4.88it/s]

  1%|██▍                                                                                                                                                                           | 130/9213 [00:30<59:03,  2.56it/s]

  1%|██▍                                                                                                                                                                           | 131/9213 [00:31<51:44,  2.93it/s]

  1%|██▍                                                                                                                                                                           | 132/9213 [00:31<43:16,  3.50it/s]

  1%|██▌                                                                                                                                                                           | 134/9213 [00:31<32:07,  4.71it/s]

  1%|██▌                                                                                                                                                                           | 136/9213 [00:31<25:20,  5.97it/s]

  1%|██▌                                                                                                                                                                           | 137/9213 [00:31<24:00,  6.30it/s]

  1%|██▌                                                                                                                                                                           | 138/9213 [00:31<22:21,  6.76it/s]

  2%|██▋                                                                                                                                                                           | 139/9213 [00:31<23:28,  6.44it/s]

  2%|██▌                                                                                                                                                                         | 140/9213 [00:33<1:02:50,  2.41it/s]

  2%|██▋                                                                                                                                                                         | 141/9213 [00:33<1:12:08,  2.10it/s]

  2%|██▋                                                                                                                                                                         | 142/9213 [00:34<1:17:43,  1.95it/s]

  2%|██▋                                                                                                                                                                         | 143/9213 [00:34<1:04:19,  2.35it/s]

  2%|██▋                                                                                                                                                                           | 144/9213 [00:34<51:09,  2.95it/s]

  2%|██▋                                                                                                                                                                           | 145/9213 [00:34<42:52,  3.53it/s]

  2%|██▊                                                                                                                                                                           | 146/9213 [00:35<41:27,  3.64it/s]

  2%|██▊                                                                                                                                                                           | 147/9213 [00:35<38:35,  3.92it/s]

  2%|██▊                                                                                                                                                                           | 148/9213 [00:35<31:36,  4.78it/s]

  2%|██▊                                                                                                                                                                           | 149/9213 [00:35<35:25,  4.26it/s]

  2%|██▊                                                                                                                                                                         | 150/9213 [00:36<1:16:20,  1.98it/s]

  2%|██▊                                                                                                                                                                         | 151/9213 [00:36<1:00:17,  2.50it/s]

  2%|██▉                                                                                                                                                                           | 153/9213 [00:37<39:31,  3.82it/s]

  2%|██▉                                                                                                                                                                           | 154/9213 [00:37<33:32,  4.50it/s]

  2%|██▉                                                                                                                                                                           | 155/9213 [00:37<33:32,  4.50it/s]

  2%|██▉                                                                                                                                                                           | 156/9213 [00:37<28:52,  5.23it/s]

  2%|██▉                                                                                                                                                                           | 157/9213 [00:37<26:14,  5.75it/s]

  2%|███                                                                                                                                                                           | 159/9213 [00:38<23:55,  6.31it/s]

  2%|███                                                                                                                                                                           | 160/9213 [00:39<59:16,  2.55it/s]

  2%|███                                                                                                                                                                           | 162/9213 [00:39<42:10,  3.58it/s]

  2%|███                                                                                                                                                                           | 163/9213 [00:39<36:55,  4.09it/s]

  2%|███                                                                                                                                                                           | 165/9213 [00:39<28:48,  5.23it/s]

  2%|███▏                                                                                                                                                                          | 167/9213 [00:39<22:41,  6.65it/s]

  2%|███▏                                                                                                                                                                          | 168/9213 [00:40<21:58,  6.86it/s]

  2%|███▏                                                                                                                                                                          | 169/9213 [00:40<22:22,  6.74it/s]

  2%|███▏                                                                                                                                                                          | 170/9213 [00:41<57:04,  2.64it/s]

  2%|███▏                                                                                                                                                                          | 172/9213 [00:41<40:02,  3.76it/s]

  2%|███▎                                                                                                                                                                          | 174/9213 [00:41<30:18,  4.97it/s]

  2%|███▎                                                                                                                                                                          | 176/9213 [00:41<24:28,  6.16it/s]

  2%|███▎                                                                                                                                                                          | 177/9213 [00:41<23:08,  6.51it/s]

  2%|███▎                                                                                                                                                                          | 178/9213 [00:42<21:39,  6.95it/s]

  2%|███▍                                                                                                                                                                          | 180/9213 [00:43<46:53,  3.21it/s]

  2%|███▍                                                                                                                                                                          | 181/9213 [00:43<44:23,  3.39it/s]

  2%|███▍                                                                                                                                                                          | 182/9213 [00:43<45:37,  3.30it/s]

  2%|███▍                                                                                                                                                                          | 183/9213 [00:43<40:55,  3.68it/s]

  2%|███▍                                                                                                                                                                          | 184/9213 [00:44<43:14,  3.48it/s]

  2%|███▍                                                                                                                                                                          | 185/9213 [00:44<36:18,  4.14it/s]

  2%|███▌                                                                                                                                                                          | 186/9213 [00:44<31:47,  4.73it/s]

  2%|███▌                                                                                                                                                                          | 187/9213 [00:44<27:36,  5.45it/s]

  2%|███▌                                                                                                                                                                          | 188/9213 [00:44<24:30,  6.14it/s]

  2%|███▌                                                                                                                                                                          | 189/9213 [00:44<22:16,  6.75it/s]

  2%|███▌                                                                                                                                                                        | 190/9213 [00:45<1:05:13,  2.31it/s]

  2%|███▌                                                                                                                                                                          | 191/9213 [00:46<51:27,  2.92it/s]

  2%|███▋                                                                                                                                                                          | 193/9213 [00:46<33:55,  4.43it/s]

  2%|███▋                                                                                                                                                                          | 194/9213 [00:46<33:01,  4.55it/s]

  2%|███▋                                                                                                                                                                          | 196/9213 [00:46<24:07,  6.23it/s]

  2%|███▋                                                                                                                                                                          | 198/9213 [00:47<26:16,  5.72it/s]

  2%|███▊                                                                                                                                                                          | 199/9213 [00:47<26:20,  5.70it/s]

  2%|███▊                                                                                                                                                                          | 200/9213 [00:48<58:15,  2.58it/s]

  2%|███▊                                                                                                                                                                          | 201/9213 [00:48<47:54,  3.14it/s]

  2%|███▊                                                                                                                                                                          | 203/9213 [00:48<32:29,  4.62it/s]

  2%|███▊                                                                                                                                                                          | 204/9213 [00:48<31:33,  4.76it/s]

  2%|███▊                                                                                                                                                                          | 205/9213 [00:48<27:37,  5.44it/s]

  2%|███▉                                                                                                                                                                          | 207/9213 [00:49<21:51,  6.87it/s]

  2%|███▉                                                                                                                                                                          | 209/9213 [00:49<18:09,  8.27it/s]

  2%|███▉                                                                                                                                                                          | 211/9213 [00:50<44:16,  3.39it/s]

  2%|████                                                                                                                                                                          | 213/9213 [00:50<34:33,  4.34it/s]

  2%|████                                                                                                                                                                          | 215/9213 [00:50<27:54,  5.37it/s]

  2%|████                                                                                                                                                                          | 216/9213 [00:50<26:35,  5.64it/s]

  2%|████                                                                                                                                                                          | 218/9213 [00:51<23:45,  6.31it/s]

  2%|████▏                                                                                                                                                                         | 219/9213 [00:51<24:45,  6.05it/s]

  2%|████▏                                                                                                                                                                         | 220/9213 [00:52<54:55,  2.73it/s]

  2%|████▏                                                                                                                                                                         | 221/9213 [00:52<46:55,  3.19it/s]

  2%|████▏                                                                                                                                                                         | 222/9213 [00:52<39:38,  3.78it/s]

  2%|████▏                                                                                                                                                                         | 223/9213 [00:52<38:17,  3.91it/s]

  2%|████▏                                                                                                                                                                         | 224/9213 [00:53<33:32,  4.47it/s]

  2%|████▎                                                                                                                                                                         | 226/9213 [00:53<26:57,  5.56it/s]

  2%|████▎                                                                                                                                                                         | 227/9213 [00:53<34:01,  4.40it/s]

  2%|████▎                                                                                                                                                                         | 228/9213 [00:53<30:20,  4.94it/s]

  2%|████▎                                                                                                                                                                         | 230/9213 [00:55<55:30,  2.70it/s]

  3%|████▎                                                                                                                                                                         | 231/9213 [00:55<46:12,  3.24it/s]

  3%|████▍                                                                                                                                                                         | 232/9213 [00:55<41:54,  3.57it/s]

  3%|████▍                                                                                                                                                                         | 234/9213 [00:55<31:45,  4.71it/s]

  3%|████▍                                                                                                                                                                         | 235/9213 [00:55<29:11,  5.13it/s]

  3%|████▍                                                                                                                                                                         | 236/9213 [00:55<27:49,  5.38it/s]

  3%|████▍                                                                                                                                                                         | 237/9213 [00:56<25:49,  5.79it/s]

  3%|████▍                                                                                                                                                                         | 238/9213 [00:56<24:43,  6.05it/s]

  3%|████▌                                                                                                                                                                         | 240/9213 [00:57<52:12,  2.86it/s]

  3%|████▌                                                                                                                                                                         | 241/9213 [00:57<45:49,  3.26it/s]

  3%|████▌                                                                                                                                                                         | 242/9213 [00:57<38:30,  3.88it/s]

  3%|████▌                                                                                                                                                                         | 243/9213 [00:57<32:52,  4.55it/s]

  3%|████▌                                                                                                                                                                         | 244/9213 [00:57<30:48,  4.85it/s]

  3%|████▋                                                                                                                                                                         | 245/9213 [00:58<38:04,  3.93it/s]

  3%|████▋                                                                                                                                                                         | 247/9213 [00:58<28:32,  5.23it/s]

  3%|████▋                                                                                                                                                                         | 248/9213 [00:58<25:30,  5.86it/s]

  3%|████▋                                                                                                                                                                         | 249/9213 [00:58<23:32,  6.34it/s]

  3%|████▋                                                                                                                                                                       | 250/9213 [00:59<1:04:05,  2.33it/s]

  3%|████▋                                                                                                                                                                         | 251/9213 [01:00<51:15,  2.91it/s]

  3%|████▊                                                                                                                                                                         | 252/9213 [01:00<42:34,  3.51it/s]

  3%|████▊                                                                                                                                                                         | 253/9213 [01:00<46:00,  3.25it/s]

  3%|████▊                                                                                                                                                                         | 255/9213 [01:00<32:10,  4.64it/s]

  3%|████▊                                                                                                                                                                         | 256/9213 [01:00<28:14,  5.29it/s]

  3%|████▊                                                                                                                                                                         | 258/9213 [01:01<21:22,  6.98it/s]

  3%|████▉                                                                                                                                                                         | 259/9213 [01:01<20:38,  7.23it/s]

  3%|████▉                                                                                                                                                                         | 260/9213 [01:02<55:26,  2.69it/s]

  3%|████▊                                                                                                                                                                       | 261/9213 [01:03<1:15:29,  1.98it/s]

  3%|████▉                                                                                                                                                                       | 262/9213 [01:03<1:23:56,  1.78it/s]

  3%|████▉                                                                                                                                                                       | 263/9213 [01:04<1:17:25,  1.93it/s]

  3%|████▉                                                                                                                                                                       | 264/9213 [01:04<1:04:45,  2.30it/s]

  3%|█████                                                                                                                                                                         | 265/9213 [01:04<52:31,  2.84it/s]

  3%|█████                                                                                                                                                                         | 266/9213 [01:04<43:53,  3.40it/s]

  3%|█████                                                                                                                                                                         | 267/9213 [01:04<36:41,  4.06it/s]

  3%|█████                                                                                                                                                                         | 268/9213 [01:05<31:23,  4.75it/s]

  3%|█████                                                                                                                                                                         | 269/9213 [01:05<27:38,  5.39it/s]

  3%|█████                                                                                                                                                                       | 270/9213 [01:06<1:07:32,  2.21it/s]

  3%|█████                                                                                                                                                                       | 271/9213 [01:07<1:53:52,  1.31it/s]

  3%|█████                                                                                                                                                                       | 272/9213 [01:08<1:37:50,  1.52it/s]

  3%|█████                                                                                                                                                                       | 273/9213 [01:08<1:31:27,  1.63it/s]

  3%|█████                                                                                                                                                                       | 274/9213 [01:09<1:22:38,  1.80it/s]

  3%|█████▏                                                                                                                                                                      | 275/9213 [01:09<1:20:19,  1.85it/s]

  3%|█████▏                                                                                                                                                                      | 276/9213 [01:09<1:14:35,  2.00it/s]

  3%|█████▏                                                                                                                                                                      | 277/9213 [01:10<1:16:13,  1.95it/s]

  3%|█████▏                                                                                                                                                                      | 278/9213 [01:10<1:10:35,  2.11it/s]

  3%|█████▏                                                                                                                                                                      | 279/9213 [01:11<1:21:26,  1.83it/s]

  3%|█████▏                                                                                                                                                                      | 280/9213 [01:13<2:04:31,  1.20it/s]

  3%|█████▏                                                                                                                                                                      | 281/9213 [01:13<1:33:57,  1.58it/s]

  3%|█████▎                                                                                                                                                                      | 282/9213 [01:13<1:32:09,  1.62it/s]

  3%|█████▎                                                                                                                                                                      | 283/9213 [01:14<1:27:56,  1.69it/s]

  3%|█████▎                                                                                                                                                                      | 284/9213 [01:14<1:21:45,  1.82it/s]

  3%|█████▎                                                                                                                                                                      | 285/9213 [01:15<1:21:59,  1.81it/s]

  3%|█████▎                                                                                                                                                                      | 286/9213 [01:15<1:20:14,  1.85it/s]

  3%|█████▎                                                                                                                                                                      | 287/9213 [01:16<1:01:58,  2.40it/s]

  3%|█████▍                                                                                                                                                                        | 288/9213 [01:16<48:13,  3.08it/s]

  3%|█████▍                                                                                                                                                                      | 290/9213 [01:17<1:06:24,  2.24it/s]

  3%|█████▍                                                                                                                                                                        | 291/9213 [01:17<54:51,  2.71it/s]

  3%|█████▌                                                                                                                                                                        | 292/9213 [01:17<45:45,  3.25it/s]

  3%|█████▌                                                                                                                                                                        | 293/9213 [01:17<37:33,  3.96it/s]

  3%|█████▌                                                                                                                                                                        | 294/9213 [01:17<34:30,  4.31it/s]

  3%|█████▌                                                                                                                                                                        | 296/9213 [01:18<26:03,  5.70it/s]

  3%|█████▌                                                                                                                                                                        | 297/9213 [01:18<23:38,  6.28it/s]

  3%|█████▋                                                                                                                                                                        | 298/9213 [01:18<27:57,  5.32it/s]

  3%|█████▋                                                                                                                                                                        | 299/9213 [01:18<24:34,  6.05it/s]

  3%|█████▌                                                                                                                                                                      | 300/9213 [01:19<1:04:31,  2.30it/s]

  3%|█████▋                                                                                                                                                                        | 301/9213 [01:19<55:24,  2.68it/s]

  3%|█████▋                                                                                                                                                                      | 302/9213 [01:20<1:00:16,  2.46it/s]

  3%|█████▋                                                                                                                                                                        | 303/9213 [01:20<54:42,  2.71it/s]

  3%|█████▊                                                                                                                                                                        | 305/9213 [01:20<38:45,  3.83it/s]

  3%|█████▊                                                                                                                                                                        | 306/9213 [01:21<36:13,  4.10it/s]

  3%|█████▊                                                                                                                                                                        | 307/9213 [01:21<34:17,  4.33it/s]

  3%|█████▊                                                                                                                                                                        | 308/9213 [01:21<30:55,  4.80it/s]

  3%|█████▊                                                                                                                                                                        | 309/9213 [01:21<28:46,  5.16it/s]

  3%|█████▊                                                                                                                                                                      | 310/9213 [01:22<1:06:45,  2.22it/s]

  3%|█████▊                                                                                                                                                                        | 311/9213 [01:22<53:17,  2.78it/s]

  3%|█████▉                                                                                                                                                                        | 312/9213 [01:23<44:55,  3.30it/s]

  3%|█████▉                                                                                                                                                                        | 313/9213 [01:23<38:14,  3.88it/s]

  3%|█████▉                                                                                                                                                                        | 314/9213 [01:23<40:33,  3.66it/s]

  3%|█████▉                                                                                                                                                                        | 315/9213 [01:23<37:26,  3.96it/s]

  3%|█████▉                                                                                                                                                                        | 316/9213 [01:23<35:49,  4.14it/s]

  3%|██████                                                                                                                                                                        | 318/9213 [01:24<26:17,  5.64it/s]

  3%|██████                                                                                                                                                                        | 320/9213 [01:25<50:34,  2.93it/s]

  3%|██████                                                                                                                                                                        | 321/9213 [01:25<46:40,  3.18it/s]

  3%|██████                                                                                                                                                                        | 322/9213 [01:25<40:29,  3.66it/s]

  4%|██████                                                                                                                                                                        | 323/9213 [01:25<34:06,  4.34it/s]

  4%|██████                                                                                                                                                                        | 324/9213 [01:25<30:38,  4.84it/s]

  4%|██████▏                                                                                                                                                                       | 325/9213 [01:26<29:49,  4.97it/s]

  4%|██████▏                                                                                                                                                                       | 326/9213 [01:26<28:29,  5.20it/s]

  4%|██████▏                                                                                                                                                                       | 327/9213 [01:26<24:52,  5.95it/s]

  4%|██████▏                                                                                                                                                                       | 328/9213 [01:26<22:33,  6.56it/s]

  4%|██████▏                                                                                                                                                                       | 329/9213 [01:26<20:44,  7.14it/s]

  4%|██████▏                                                                                                                                                                     | 330/9213 [01:27<1:02:03,  2.39it/s]

  4%|██████▏                                                                                                                                                                     | 331/9213 [01:28<1:06:44,  2.22it/s]

  4%|██████▏                                                                                                                                                                     | 332/9213 [01:28<1:04:31,  2.29it/s]

  4%|██████▎                                                                                                                                                                       | 333/9213 [01:28<58:22,  2.54it/s]

  4%|██████▎                                                                                                                                                                       | 334/9213 [01:29<59:12,  2.50it/s]

  4%|██████▎                                                                                                                                                                       | 335/9213 [01:29<46:14,  3.20it/s]

  4%|██████▎                                                                                                                                                                       | 336/9213 [01:29<41:09,  3.59it/s]

  4%|██████▎                                                                                                                                                                       | 337/9213 [01:30<46:58,  3.15it/s]

  4%|██████▍                                                                                                                                                                       | 338/9213 [01:30<58:25,  2.53it/s]

  4%|██████▍                                                                                                                                                                       | 339/9213 [01:30<56:17,  2.63it/s]

  4%|██████▎                                                                                                                                                                     | 340/9213 [01:32<1:38:03,  1.51it/s]

  4%|██████▎                                                                                                                                                                     | 341/9213 [01:32<1:16:16,  1.94it/s]

  4%|██████▍                                                                                                                                                                     | 342/9213 [01:32<1:08:48,  2.15it/s]

  4%|██████▍                                                                                                                                                                       | 343/9213 [01:32<54:23,  2.72it/s]

  4%|██████▍                                                                                                                                                                       | 344/9213 [01:33<43:07,  3.43it/s]

  4%|██████▌                                                                                                                                                                       | 345/9213 [01:33<34:50,  4.24it/s]

  4%|██████▌                                                                                                                                                                       | 346/9213 [01:33<29:27,  5.02it/s]

  4%|██████▌                                                                                                                                                                       | 347/9213 [01:33<40:29,  3.65it/s]

  4%|██████▌                                                                                                                                                                       | 348/9213 [01:33<33:43,  4.38it/s]

  4%|██████▌                                                                                                                                                                       | 349/9213 [01:34<31:47,  4.65it/s]

  4%|██████▌                                                                                                                                                                     | 350/9213 [01:35<1:29:33,  1.65it/s]

  4%|██████▌                                                                                                                                                                     | 351/9213 [01:36<1:26:07,  1.71it/s]

  4%|██████▌                                                                                                                                                                     | 352/9213 [01:36<1:18:29,  1.88it/s]

  4%|██████▌                                                                                                                                                                     | 353/9213 [01:36<1:13:16,  2.02it/s]

  4%|██████▋                                                                                                                                                                       | 354/9213 [01:37<55:55,  2.64it/s]

  4%|██████▋                                                                                                                                                                       | 355/9213 [01:37<45:13,  3.26it/s]

  4%|██████▋                                                                                                                                                                       | 356/9213 [01:37<40:09,  3.68it/s]

  4%|██████▋                                                                                                                                                                       | 357/9213 [01:37<49:32,  2.98it/s]

  4%|██████▊                                                                                                                                                                       | 358/9213 [01:38<43:10,  3.42it/s]

  4%|██████▊                                                                                                                                                                       | 359/9213 [01:38<36:53,  4.00it/s]

  4%|██████▋                                                                                                                                                                     | 360/9213 [01:39<1:16:53,  1.92it/s]

  4%|██████▋                                                                                                                                                                     | 361/9213 [01:39<1:18:06,  1.89it/s]

  4%|██████▊                                                                                                                                                                       | 362/9213 [01:39<59:26,  2.48it/s]

  4%|██████▊                                                                                                                                                                       | 363/9213 [01:40<49:06,  3.00it/s]

  4%|██████▊                                                                                                                                                                       | 364/9213 [01:40<41:04,  3.59it/s]

  4%|██████▉                                                                                                                                                                       | 365/9213 [01:40<35:27,  4.16it/s]

  4%|██████▉                                                                                                                                                                       | 366/9213 [01:40<31:38,  4.66it/s]

  4%|██████▉                                                                                                                                                                       | 368/9213 [01:40<25:07,  5.87it/s]

  4%|██████▉                                                                                                                                                                       | 369/9213 [01:41<24:23,  6.04it/s]

  4%|██████▉                                                                                                                                                                     | 370/9213 [01:42<1:03:40,  2.31it/s]

  4%|███████                                                                                                                                                                       | 372/9213 [01:42<43:27,  3.39it/s]

  4%|███████                                                                                                                                                                       | 374/9213 [01:42<34:08,  4.32it/s]

  4%|███████                                                                                                                                                                       | 375/9213 [01:42<31:05,  4.74it/s]

  4%|███████                                                                                                                                                                       | 376/9213 [01:43<31:30,  4.67it/s]

  4%|███████                                                                                                                                                                       | 377/9213 [01:43<27:40,  5.32it/s]

  4%|███████▏                                                                                                                                                                      | 378/9213 [01:43<24:38,  5.98it/s]

  4%|███████▏                                                                                                                                                                      | 379/9213 [01:43<23:04,  6.38it/s]

  4%|███████                                                                                                                                                                     | 380/9213 [01:44<1:03:58,  2.30it/s]

  4%|███████▏                                                                                                                                                                      | 381/9213 [01:44<50:42,  2.90it/s]

  4%|███████▏                                                                                                                                                                      | 383/9213 [01:44<35:32,  4.14it/s]

  4%|███████▎                                                                                                                                                                      | 384/9213 [01:44<30:39,  4.80it/s]

  4%|███████▎                                                                                                                                                                      | 385/9213 [01:45<27:19,  5.39it/s]

  4%|███████▎                                                                                                                                                                      | 386/9213 [01:45<27:47,  5.29it/s]

  4%|███████▎                                                                                                                                                                      | 387/9213 [01:45<27:44,  5.30it/s]

  4%|███████▎                                                                                                                                                                      | 388/9213 [01:45<24:44,  5.95it/s]

  4%|███████▎                                                                                                                                                                      | 389/9213 [01:45<24:37,  5.97it/s]

  4%|███████▎                                                                                                                                                                    | 390/9213 [01:46<1:03:45,  2.31it/s]

  4%|███████▎                                                                                                                                                                    | 391/9213 [01:47<1:07:17,  2.19it/s]

  4%|███████▍                                                                                                                                                                      | 392/9213 [01:47<56:26,  2.60it/s]

  4%|███████▍                                                                                                                                                                      | 394/9213 [01:48<47:01,  3.13it/s]

  4%|███████▍                                                                                                                                                                      | 395/9213 [01:48<47:07,  3.12it/s]

  4%|███████▍                                                                                                                                                                      | 396/9213 [01:48<41:20,  3.55it/s]

  4%|███████▍                                                                                                                                                                      | 397/9213 [01:48<37:52,  3.88it/s]

  4%|███████▌                                                                                                                                                                      | 398/9213 [01:49<37:39,  3.90it/s]

  4%|███████▌                                                                                                                                                                      | 399/9213 [01:49<35:26,  4.14it/s]

  4%|███████▍                                                                                                                                                                    | 400/9213 [01:50<1:16:40,  1.92it/s]

  4%|███████▍                                                                                                                                                                    | 401/9213 [01:50<1:01:41,  2.38it/s]

  4%|███████▌                                                                                                                                                                      | 402/9213 [01:50<52:15,  2.81it/s]

  4%|███████▌                                                                                                                                                                      | 403/9213 [01:50<41:33,  3.53it/s]

  4%|███████▋                                                                                                                                                                      | 404/9213 [01:51<36:40,  4.00it/s]

  4%|███████▋                                                                                                                                                                      | 406/9213 [01:51<25:50,  5.68it/s]

  4%|███████▋                                                                                                                                                                      | 407/9213 [01:51<23:21,  6.28it/s]

  4%|███████▋                                                                                                                                                                      | 408/9213 [01:51<25:02,  5.86it/s]

  4%|███████▋                                                                                                                                                                      | 410/9213 [01:52<58:29,  2.51it/s]

  4%|███████▊                                                                                                                                                                      | 411/9213 [01:53<56:06,  2.61it/s]

  4%|███████▊                                                                                                                                                                      | 412/9213 [01:53<53:19,  2.75it/s]

  4%|███████▊                                                                                                                                                                      | 413/9213 [01:53<51:03,  2.87it/s]

  4%|███████▊                                                                                                                                                                      | 414/9213 [01:54<45:01,  3.26it/s]

  5%|███████▊                                                                                                                                                                      | 415/9213 [01:54<45:27,  3.23it/s]

  5%|███████▊                                                                                                                                                                      | 416/9213 [01:54<49:34,  2.96it/s]

  5%|███████▉                                                                                                                                                                      | 417/9213 [01:54<41:40,  3.52it/s]

  5%|███████▉                                                                                                                                                                      | 418/9213 [01:55<34:50,  4.21it/s]

  5%|███████▉                                                                                                                                                                      | 419/9213 [01:55<39:05,  3.75it/s]

  5%|███████▊                                                                                                                                                                    | 420/9213 [01:56<1:19:42,  1.84it/s]

  5%|███████▊                                                                                                                                                                    | 421/9213 [01:57<1:15:06,  1.95it/s]

  5%|███████▉                                                                                                                                                                    | 422/9213 [01:57<1:10:35,  2.08it/s]

  5%|███████▉                                                                                                                                                                    | 423/9213 [01:57<1:07:22,  2.17it/s]

  5%|███████▉                                                                                                                                                                    | 424/9213 [01:58<1:13:10,  2.00it/s]

  5%|███████▉                                                                                                                                                                    | 425/9213 [01:58<1:05:39,  2.23it/s]

  5%|███████▉                                                                                                                                                                    | 426/9213 [01:59<1:08:28,  2.14it/s]

  5%|███████▉                                                                                                                                                                    | 427/9213 [01:59<1:01:25,  2.38it/s]

  5%|████████                                                                                                                                                                      | 428/9213 [02:00<58:39,  2.50it/s]

  5%|████████                                                                                                                                                                      | 429/9213 [02:00<57:04,  2.57it/s]

  5%|████████                                                                                                                                                                    | 430/9213 [02:02<1:55:22,  1.27it/s]

  5%|████████                                                                                                                                                                    | 431/9213 [02:02<1:32:34,  1.58it/s]

  5%|████████                                                                                                                                                                    | 432/9213 [02:02<1:16:16,  1.92it/s]

In [None]:
securities_df.cik = securities_df.cik.astype(int)
securities_df.info()

In [None]:
traded_company_df.info()

In [None]:
merged_df = securities_df.merge(traded_company_df, on='cik', how='inner')
# merged_df = merged_df.dropna(subset=['tickers'])
merged_df.info()

In [None]:
snp500_company_df.info()

In [None]:
# left join ensures we populate fields from snp 500 df where applicable
merged_securities_df = merged_df.merge(snp500_company_df, left_on='cik', right_on='CIK', how='left')
merged_securities_df = merged_securities_df[['cik','ticker', 'exchanges', 'ein', 'name','formerNames', 'Security', 'GICS Sector', 'GICS Sub-Industry', 'sic', 'sicDescription', 'Headquarters Location']]
merged_securities_df.columns = ['cik','ticker', 'exchanges', 'ein', 'full_name','former_names', 'short_name', 'gics_sector', 'gics_sub_industry', 'sic','sic_description', 'headquarters_location']
merged_securities_df.info()

In [None]:
output_file_path = product['data']
print(len(merged_securities_df))

In [None]:
merged_securities_df.to_csv(output_file_path)
print(f"Saved file {output_file_path}")

In [None]:
merged_securities_df.sample(10)

In [None]:
del securities_df, traded_company_df, traded_companies, snp500_company_df, merged_securities_df